Skip to content

Commit 93e6109

Browse files
committed
test: port Syntax test to Windows
Replace the inline sed commands with sed scripts to avoid the subshells on Windows. Additionally, the unicode handling on Windows causes problems and using the scripts circumvents that problem. Implement an inline dos2unix as the BSD sed does not support `-b` and on Windows, sed will convert the line endings.
1 parent 167f94f commit 93e6109

File tree

4 files changed

+55
-60
lines changed

4 files changed

+55
-60
lines changed

test/Syntax/Inputs/invalid.sed

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
2+
# [0xC2] is utf8 2 byte character start byte.
3+
# 0xC2 without second byte is invalid UTF-8 sequence.
4+
# It becomes garbage text trivia.
5+
# Marker(1) is replaced to this sequence.
6+
s/Z1/Â/g
7+
8+
# [0xCC, 0x82] in UTF-8 is U+0302.
9+
# This character is invalid for identifier start, but valid for identifier body.
10+
# It becomes unknown token.
11+
# If this type characters are conitguous, they are concatenated to one long unknown token.
12+
# Marker(2) is replaced to this sequence.
13+
s/Z2/Ì‚/g
14+
15+
# [0xE2, 0x80, 0x9C] in UTF-8 is U+201C, left quote.
16+
# It becomes single character unknown token.
17+
# If this left quote and right quote enclosure text,
18+
# they become one long unknown token.
19+
# Marker(3) is replaced to this sequence.
20+
s/Z3/“/g
21+
22+
# [0xE2, 0x80, 0x9D] in UTF-8 is U+201D, right quote.
23+
# It becomes single character unknown token.
24+
# Marker(4) is replaced to this sequence.
25+
s/Z4/”/g
26+
27+
# [0xE1, 0x9A, 0x80] in UTF-8 is U+1680.
28+
# This character is invalid for swift source.
29+
# It becomes garbage trivia.
30+
# Marker(5) is replaced to this sequence.
31+
s/Z5/ /g
32+

test/Syntax/Inputs/nbsp.sed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
s/Z/ /g

test/Syntax/tokens_nonbreaking_space.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: cat %s | sed -e 's/'$(echo -ne "\x5a")'/'$(echo -ne "\xc2\xa0")'/g' > %t.tmp
1+
// RUN: cat %s | sed -f %S/Inputs/nbsp.sed > %t.tmp
22
// RUN: cp -f %t.tmp %t
33
// RUN: %swift-syntax-test -input-source-filename %t -dump-full-tokens 2>&1 | %FileCheck %t
44
let a =Z3Z // nbsp(Z)

test/Syntax/tokens_unknown_and_invalid.swift

Lines changed: 21 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -5,46 +5,8 @@
55
// To avoid replace marker in sed command by sed itself,
66
// marker is also represented in escape sequence.
77

8-
// RUN: cat %s | sed \
9-
10-
// [0xC2] is utf8 2 byte character start byte.
11-
// 0xC2 without second byte is invalid UTF-8 sequence.
12-
// It becomes garbage text trivia.
13-
// Marker(1) is replaced to this sequence.
14-
15-
// RUN: -e 's/'$(echo -ne "\x5a1")'/'$(echo -ne "\xc2")'/g' \
16-
17-
// [0xCC, 0x82] in UTF-8 is U+0302.
18-
// This character is invalid for identifier start, but valid for identifier body.
19-
// It becomes unknown token.
20-
// If this type characters are conitguous, they are concatenated to one long unknown token.
21-
// Marker(2) is replaced to this sequence.
22-
23-
// RUN: -e 's/'$(echo -ne "\x5a2")'/'$(echo -ne "\xcc\x82")'/g' \
24-
25-
// [0xE2, 0x80, 0x9C] in UTF-8 is U+201C, left quote.
26-
// It becomes single character unknown token.
27-
// If this left quote and right quote enclosure text,
28-
// they become one long unknown token.
29-
// Marker(3) is replaced to this sequence.
30-
31-
// RUN: -e 's/'$(echo -ne "\x5a3")'/'$(echo -ne "\xe2\x80\x9c")'/g' \
32-
33-
// [0xE2, 0x80, 0x9D] in UTF-8 is U+201D, right quote.
34-
// It becomes single character unknown token.
35-
// Marker(4) is replaced to this sequence.
36-
37-
// RUN: -e 's/'$(echo -ne "\x5a4")'/'$(echo -ne "\xe2\x80\x9d")'/g' \
38-
39-
// [0xE1, 0x9A, 0x80] in UTF-8 is U+1680.
40-
// This character is invalid for swift source.
41-
// It becomes garbage trivia.
42-
// Marker(5) is replaced to this sequence.
43-
44-
// RUN: -e 's/'$(echo -ne "\x5a5")'/'$(echo -ne "\xe1\x9a\x80")'/g' \
45-
46-
// RUN: > %t
47-
8+
// RUN: cat %s | sed -f %S/Inputs/invalid.sed > %t
9+
// RUN: %{python} -c "import sys; t = open(sys.argv[1], 'rb').read().replace('\r\n', '\n'); open(sys.argv[1], 'wb').write(t)" %t
4810
// RUN: %swift-syntax-test -input-source-filename %t -dump-full-tokens 2>&1 | %FileCheck %t
4911
// RUN: %round-trip-syntax-test --swift-syntax-test %swift-syntax-test --file %t
5012

@@ -66,20 +28,20 @@ Z5 iii Z5
6628
jjj
6729

6830
// Diagnostics
69-
// CHECK: 52:1: error: invalid UTF-8 found in source file
70-
// CHECK: 52:7: error: invalid UTF-8 found in source file
71-
// CHECK: 54:5: error: an identifier cannot begin with this character
72-
// CHECK: 56:5: error: an identifier cannot begin with this character
73-
// CHECK: 58:5: error: unicode curly quote found
74-
// CHECK: 58:8: error: unicode curly quote found
75-
// CHECK: 60:19: error: unicode curly quote found
76-
// CHECK: 60:5: error: unicode curly quote found
77-
// CHECK: 62:5: error: unicode curly quote found
78-
// CHECK: 65:1: error: invalid character in source file
79-
// CHECK: 65:9: error: invalid character in source file
31+
// CHECK: 14:1: error: invalid UTF-8 found in source file
32+
// CHECK: 14:7: error: invalid UTF-8 found in source file
33+
// CHECK: 16:5: error: an identifier cannot begin with this character
34+
// CHECK: 18:5: error: an identifier cannot begin with this character
35+
// CHECK: 20:5: error: unicode curly quote found
36+
// CHECK: 20:8: error: unicode curly quote found
37+
// CHECK: 22:19: error: unicode curly quote found
38+
// CHECK: 22:5: error: unicode curly quote found
39+
// CHECK: 24:5: error: unicode curly quote found
40+
// CHECK: 27:1: error: invalid character in source file
41+
// CHECK: 27:9: error: invalid character in source file
8042

8143
// Checks around bbb
82-
// CHECK-LABEL: 52:3
44+
// CHECK-LABEL: 14:3
8345
// CHECK-NEXT: (Token identifier
8446
// CHECK-NEXT: (trivia newline 1)
8547
// CHECK-NEXT: (trivia garbageText \302)
@@ -89,35 +51,35 @@ jjj
8951
// CHECK-NEXT: (trivia garbageText \302))
9052

9153
// Checks around ccc
92-
// CHECK-LABEL: 54:5
54+
// CHECK-LABEL: 16:5
9355
// CHECK-NEXT: (Token unknown
9456
// CHECK-NEXT: (text="\xCC\x82"))
9557

9658
// Checks around ddd
97-
// CHECK-LABEL: 56:5
59+
// CHECK-LABEL: 18:5
9860
// CHECK-NEXT: (Token unknown
9961
// CHECK-NEXT: (text="\xCC\x82\xCC\x82\xCC\x82\xCC\x82"))
10062

10163
// Checks around eee
102-
// CHECK-LABEL: 58:5
64+
// CHECK-LABEL: 20:5
10365
// CHECK-NEXT: (Token unknown
10466
// CHECK-NEXT: (text="\xE2\x80\x9C"))
105-
// CHECK-LABEL: 58:8
67+
// CHECK-LABEL: 20:8
10668
// CHECK-NEXT: (Token unknown
10769
// CHECK-NEXT: (text="\xE2\x80\x9C"))
10870

10971
// Checks around fff
110-
// CHECK-LABEL: 60:5
72+
// CHECK-LABEL: 22:5
11173
// CHECK-NEXT: (Token unknown
11274
// CHECK-NEXT: (text="\xE2\x80\x9Chello world\xE2\x80\x9D"))
11375

11476
// Checks around ggg
115-
// CHECK-LABEL: 62:5
77+
// CHECK-LABEL: 24:5
11678
// CHECK-NEXT: (Token unknown
11779
// CHECK-NEXT: (text="\xE2\x80\x9D"))
11880

11981
// Checks around iii
120-
// CHECK-LABEL: 65:5
82+
// CHECK-LABEL: 27:5
12183
// CHECK-NEXT: (Token identifier
12284
// CHECK-NEXT: (trivia newline 1)
12385
// CHECK-NEXT: (trivia garbageText \341\232\200)

0 commit comments

Comments
 (0)