Skip to content

Commit 51c9ba7

Browse files
authored
Merge pull request #21895 from compnerd/suga-suga-you-so-fly
test: port Syntax test to Windows
2 parents f744af1 + 93e6109 commit 51c9ba7

File tree

4 files changed

+55
-60
lines changed

4 files changed

+55
-60
lines changed

test/Syntax/Inputs/invalid.sed

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
2+
# [0xC2] is utf8 2 byte character start byte.
3+
# 0xC2 without second byte is invalid UTF-8 sequence.
4+
# It becomes garbage text trivia.
5+
# Marker(1) is replaced to this sequence.
6+
s/Z1/Â/g
7+
8+
# [0xCC, 0x82] in UTF-8 is U+0302.
9+
# This character is invalid for identifier start, but valid for identifier body.
10+
# It becomes unknown token.
11+
# If this type characters are conitguous, they are concatenated to one long unknown token.
12+
# Marker(2) is replaced to this sequence.
13+
s/Z2/Ì‚/g
14+
15+
# [0xE2, 0x80, 0x9C] in UTF-8 is U+201C, left quote.
16+
# It becomes single character unknown token.
17+
# If this left quote and right quote enclosure text,
18+
# they become one long unknown token.
19+
# Marker(3) is replaced to this sequence.
20+
s/Z3/“/g
21+
22+
# [0xE2, 0x80, 0x9D] in UTF-8 is U+201D, right quote.
23+
# It becomes single character unknown token.
24+
# Marker(4) is replaced to this sequence.
25+
s/Z4/”/g
26+
27+
# [0xE1, 0x9A, 0x80] in UTF-8 is U+1680.
28+
# This character is invalid for swift source.
29+
# It becomes garbage trivia.
30+
# Marker(5) is replaced to this sequence.
31+
s/Z5/ /g
32+

test/Syntax/Inputs/nbsp.sed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
s/Z/ /g

test/Syntax/tokens_nonbreaking_space.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: cat %s | sed -e 's/'$(echo -ne "\x5a")'/'$(echo -ne "\xc2\xa0")'/g' > %t.tmp
1+
// RUN: cat %s | sed -f %S/Inputs/nbsp.sed > %t.tmp
22
// RUN: cp -f %t.tmp %t
33
// RUN: %swift-syntax-test -input-source-filename %t -dump-full-tokens 2>&1 | %FileCheck %t
44
let a =Z3Z // nbsp(Z)

test/Syntax/tokens_unknown_and_invalid.swift

Lines changed: 21 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -5,46 +5,8 @@
55
// To avoid replace marker in sed command by sed itself,
66
// marker is also represented in escape sequence.
77

8-
// RUN: cat %s | sed \
9-
10-
// [0xC2] is utf8 2 byte character start byte.
11-
// 0xC2 without second byte is invalid UTF-8 sequence.
12-
// It becomes garbage text trivia.
13-
// Marker(1) is replaced to this sequence.
14-
15-
// RUN: -e 's/'$(echo -ne "\x5a1")'/'$(echo -ne "\xc2")'/g' \
16-
17-
// [0xCC, 0x82] in UTF-8 is U+0302.
18-
// This character is invalid for identifier start, but valid for identifier body.
19-
// It becomes unknown token.
20-
// If this type characters are conitguous, they are concatenated to one long unknown token.
21-
// Marker(2) is replaced to this sequence.
22-
23-
// RUN: -e 's/'$(echo -ne "\x5a2")'/'$(echo -ne "\xcc\x82")'/g' \
24-
25-
// [0xE2, 0x80, 0x9C] in UTF-8 is U+201C, left quote.
26-
// It becomes single character unknown token.
27-
// If this left quote and right quote enclosure text,
28-
// they become one long unknown token.
29-
// Marker(3) is replaced to this sequence.
30-
31-
// RUN: -e 's/'$(echo -ne "\x5a3")'/'$(echo -ne "\xe2\x80\x9c")'/g' \
32-
33-
// [0xE2, 0x80, 0x9D] in UTF-8 is U+201D, right quote.
34-
// It becomes single character unknown token.
35-
// Marker(4) is replaced to this sequence.
36-
37-
// RUN: -e 's/'$(echo -ne "\x5a4")'/'$(echo -ne "\xe2\x80\x9d")'/g' \
38-
39-
// [0xE1, 0x9A, 0x80] in UTF-8 is U+1680.
40-
// This character is invalid for swift source.
41-
// It becomes garbage trivia.
42-
// Marker(5) is replaced to this sequence.
43-
44-
// RUN: -e 's/'$(echo -ne "\x5a5")'/'$(echo -ne "\xe1\x9a\x80")'/g' \
45-
46-
// RUN: > %t
47-
8+
// RUN: cat %s | sed -f %S/Inputs/invalid.sed > %t
9+
// RUN: %{python} -c "import sys; t = open(sys.argv[1], 'rb').read().replace('\r\n', '\n'); open(sys.argv[1], 'wb').write(t)" %t
4810
// RUN: %swift-syntax-test -input-source-filename %t -dump-full-tokens 2>&1 | %FileCheck %t
4911
// RUN: %round-trip-syntax-test --swift-syntax-test %swift-syntax-test --file %t
5012

@@ -66,20 +28,20 @@ Z5 iii Z5
6628
jjj
6729

6830
// Diagnostics
69-
// CHECK: 52:1: error: invalid UTF-8 found in source file
70-
// CHECK: 52:7: error: invalid UTF-8 found in source file
71-
// CHECK: 54:5: error: an identifier cannot begin with this character
72-
// CHECK: 56:5: error: an identifier cannot begin with this character
73-
// CHECK: 58:5: error: unicode curly quote found
74-
// CHECK: 58:8: error: unicode curly quote found
75-
// CHECK: 60:19: error: unicode curly quote found
76-
// CHECK: 60:5: error: unicode curly quote found
77-
// CHECK: 62:5: error: unicode curly quote found
78-
// CHECK: 65:1: error: invalid character in source file
79-
// CHECK: 65:9: error: invalid character in source file
31+
// CHECK: 14:1: error: invalid UTF-8 found in source file
32+
// CHECK: 14:7: error: invalid UTF-8 found in source file
33+
// CHECK: 16:5: error: an identifier cannot begin with this character
34+
// CHECK: 18:5: error: an identifier cannot begin with this character
35+
// CHECK: 20:5: error: unicode curly quote found
36+
// CHECK: 20:8: error: unicode curly quote found
37+
// CHECK: 22:19: error: unicode curly quote found
38+
// CHECK: 22:5: error: unicode curly quote found
39+
// CHECK: 24:5: error: unicode curly quote found
40+
// CHECK: 27:1: error: invalid character in source file
41+
// CHECK: 27:9: error: invalid character in source file
8042

8143
// Checks around bbb
82-
// CHECK-LABEL: 52:3
44+
// CHECK-LABEL: 14:3
8345
// CHECK-NEXT: (Token identifier
8446
// CHECK-NEXT: (trivia newline 1)
8547
// CHECK-NEXT: (trivia garbageText \302)
@@ -89,35 +51,35 @@ jjj
8951
// CHECK-NEXT: (trivia garbageText \302))
9052

9153
// Checks around ccc
92-
// CHECK-LABEL: 54:5
54+
// CHECK-LABEL: 16:5
9355
// CHECK-NEXT: (Token unknown
9456
// CHECK-NEXT: (text="\xCC\x82"))
9557

9658
// Checks around ddd
97-
// CHECK-LABEL: 56:5
59+
// CHECK-LABEL: 18:5
9860
// CHECK-NEXT: (Token unknown
9961
// CHECK-NEXT: (text="\xCC\x82\xCC\x82\xCC\x82\xCC\x82"))
10062

10163
// Checks around eee
102-
// CHECK-LABEL: 58:5
64+
// CHECK-LABEL: 20:5
10365
// CHECK-NEXT: (Token unknown
10466
// CHECK-NEXT: (text="\xE2\x80\x9C"))
105-
// CHECK-LABEL: 58:8
67+
// CHECK-LABEL: 20:8
10668
// CHECK-NEXT: (Token unknown
10769
// CHECK-NEXT: (text="\xE2\x80\x9C"))
10870

10971
// Checks around fff
110-
// CHECK-LABEL: 60:5
72+
// CHECK-LABEL: 22:5
11173
// CHECK-NEXT: (Token unknown
11274
// CHECK-NEXT: (text="\xE2\x80\x9Chello world\xE2\x80\x9D"))
11375

11476
// Checks around ggg
115-
// CHECK-LABEL: 62:5
77+
// CHECK-LABEL: 24:5
11678
// CHECK-NEXT: (Token unknown
11779
// CHECK-NEXT: (text="\xE2\x80\x9D"))
11880

11981
// Checks around iii
120-
// CHECK-LABEL: 65:5
82+
// CHECK-LABEL: 27:5
12183
// CHECK-NEXT: (Token identifier
12284
// CHECK-NEXT: (trivia newline 1)
12385
// CHECK-NEXT: (trivia garbageText \341\232\200)

0 commit comments

Comments
 (0)