5
5
// To avoid replace marker in sed command by sed itself,
6
6
// marker is also represented in escape sequence.
7
7
8
- // RUN: cat %s | sed \
9
-
10
- // [0xC2] is utf8 2 byte character start byte.
11
- // 0xC2 without second byte is invalid UTF-8 sequence.
12
- // It becomes garbage text trivia.
13
- // Marker(1) is replaced to this sequence.
14
-
15
- // RUN: -e 's/'$(echo -ne "\x5a1")'/'$(echo -ne "\xc2")'/g' \
16
-
17
- // [0xCC, 0x82] in UTF-8 is U+0302.
18
- // This character is invalid for identifier start, but valid for identifier body.
19
- // It becomes unknown token.
20
- // If this type characters are conitguous, they are concatenated to one long unknown token.
21
- // Marker(2) is replaced to this sequence.
22
-
23
- // RUN: -e 's/'$(echo -ne "\x5a2")'/'$(echo -ne "\xcc\x82")'/g' \
24
-
25
- // [0xE2, 0x80, 0x9C] in UTF-8 is U+201C, left quote.
26
- // It becomes single character unknown token.
27
- // If this left quote and right quote enclosure text,
28
- // they become one long unknown token.
29
- // Marker(3) is replaced to this sequence.
30
-
31
- // RUN: -e 's/'$(echo -ne "\x5a3")'/'$(echo -ne "\xe2\x80\x9c")'/g' \
32
-
33
- // [0xE2, 0x80, 0x9D] in UTF-8 is U+201D, right quote.
34
- // It becomes single character unknown token.
35
- // Marker(4) is replaced to this sequence.
36
-
37
- // RUN: -e 's/'$(echo -ne "\x5a4")'/'$(echo -ne "\xe2\x80\x9d")'/g' \
38
-
39
- // [0xE1, 0x9A, 0x80] in UTF-8 is U+1680.
40
- // This character is invalid for swift source.
41
- // It becomes garbage trivia.
42
- // Marker(5) is replaced to this sequence.
43
-
44
- // RUN: -e 's/'$(echo -ne "\x5a5")'/'$(echo -ne "\xe1\x9a\x80")'/g' \
45
-
46
- // RUN: > %t
47
-
8
+ // RUN: cat %s | sed -f %S/Inputs/invalid.sed > %t
9
+ // RUN: %{python} -c "import sys; t = open(sys.argv[1], 'rb').read().replace('\r\n', '\n'); open(sys.argv[1], 'wb').write(t)" %t
48
10
// RUN: %swift-syntax-test -input-source-filename %t -dump-full-tokens 2>&1 | %FileCheck %t
49
11
// RUN: %round-trip-syntax-test --swift-syntax-test %swift-syntax-test --file %t
50
12
@@ -66,20 +28,20 @@ Z5 iii Z5
66
28
jjj
67
29
68
30
// Diagnostics
69
- // CHECK: 52 :1: error: invalid UTF-8 found in source file
70
- // CHECK: 52 :7: error: invalid UTF-8 found in source file
71
- // CHECK: 54 :5: error: an identifier cannot begin with this character
72
- // CHECK: 56 :5: error: an identifier cannot begin with this character
73
- // CHECK: 58 :5: error: unicode curly quote found
74
- // CHECK: 58 :8: error: unicode curly quote found
75
- // CHECK: 60 :19: error: unicode curly quote found
76
- // CHECK: 60 :5: error: unicode curly quote found
77
- // CHECK: 62 :5: error: unicode curly quote found
78
- // CHECK: 65 :1: error: invalid character in source file
79
- // CHECK: 65 :9: error: invalid character in source file
31
+ // CHECK: 14 :1: error: invalid UTF-8 found in source file
32
+ // CHECK: 14 :7: error: invalid UTF-8 found in source file
33
+ // CHECK: 16 :5: error: an identifier cannot begin with this character
34
+ // CHECK: 18 :5: error: an identifier cannot begin with this character
35
+ // CHECK: 20 :5: error: unicode curly quote found
36
+ // CHECK: 20 :8: error: unicode curly quote found
37
+ // CHECK: 22 :19: error: unicode curly quote found
38
+ // CHECK: 22 :5: error: unicode curly quote found
39
+ // CHECK: 24 :5: error: unicode curly quote found
40
+ // CHECK: 27 :1: error: invalid character in source file
41
+ // CHECK: 27 :9: error: invalid character in source file
80
42
81
43
// Checks around bbb
82
- // CHECK-LABEL: 52 :3
44
+ // CHECK-LABEL: 14 :3
83
45
// CHECK-NEXT: (Token identifier
84
46
// CHECK-NEXT: (trivia newline 1)
85
47
// CHECK-NEXT: (trivia garbageText \302)
89
51
// CHECK-NEXT: (trivia garbageText \302))
90
52
91
53
// Checks around ccc
92
- // CHECK-LABEL: 54 :5
54
+ // CHECK-LABEL: 16 :5
93
55
// CHECK-NEXT: (Token unknown
94
56
// CHECK-NEXT: (text="\xCC\x82"))
95
57
96
58
// Checks around ddd
97
- // CHECK-LABEL: 56 :5
59
+ // CHECK-LABEL: 18 :5
98
60
// CHECK-NEXT: (Token unknown
99
61
// CHECK-NEXT: (text="\xCC\x82\xCC\x82\xCC\x82\xCC\x82"))
100
62
101
63
// Checks around eee
102
- // CHECK-LABEL: 58 :5
64
+ // CHECK-LABEL: 20 :5
103
65
// CHECK-NEXT: (Token unknown
104
66
// CHECK-NEXT: (text="\xE2\x80\x9C"))
105
- // CHECK-LABEL: 58 :8
67
+ // CHECK-LABEL: 20 :8
106
68
// CHECK-NEXT: (Token unknown
107
69
// CHECK-NEXT: (text="\xE2\x80\x9C"))
108
70
109
71
// Checks around fff
110
- // CHECK-LABEL: 60 :5
72
+ // CHECK-LABEL: 22 :5
111
73
// CHECK-NEXT: (Token unknown
112
74
// CHECK-NEXT: (text="\xE2\x80\x9Chello world\xE2\x80\x9D"))
113
75
114
76
// Checks around ggg
115
- // CHECK-LABEL: 62 :5
77
+ // CHECK-LABEL: 24 :5
116
78
// CHECK-NEXT: (Token unknown
117
79
// CHECK-NEXT: (text="\xE2\x80\x9D"))
118
80
119
81
// Checks around iii
120
- // CHECK-LABEL: 65 :5
82
+ // CHECK-LABEL: 27 :5
121
83
// CHECK-NEXT: (Token identifier
122
84
// CHECK-NEXT: (trivia newline 1)
123
85
// CHECK-NEXT: (trivia garbageText \341\232\200)
0 commit comments