|
| 1 | +// To embed test byte sequence, |
| 2 | +// this source replace marker to byte sequence first in runtime. |
| 3 | +// Marker(N) have `ZN` style format. Z is Z, N is number. |
| 4 | +// Byte sequence is represented in escape sequence. |
| 5 | +// To avoid replace marker in sed command by sed itself, |
| 6 | +// marker is also represented in escape sequence. |
| 7 | + |
| 8 | +// RUN: cat %s | sed \ |
| 9 | + |
| 10 | +// [0xC2] is utf8 2 byte character start byte. |
| 11 | +// 0xC2 without second byte is invalid UTF-8 sequence. |
| 12 | +// It becomes garbage text trivia. |
| 13 | +// Marker(1) is replaced to this sequence. |
| 14 | + |
| 15 | +// RUN: -e 's/'$(echo -ne "\x5a1")'/'$(echo -ne "\xc2")'/g' \ |
| 16 | + |
| 17 | +// [0xCC, 0x82] in UTF-8 is U+0302. |
| 18 | +// This character is invalid for identifier start, but valid for identifier body. |
| 19 | +// It becomes unknown token. |
| 20 | +// If this type characters are conitguous, they are concatenated to one long unknown token. |
| 21 | +// Marker(2) is replaced to this sequence. |
| 22 | + |
| 23 | +// RUN: -e 's/'$(echo -ne "\x5a2")'/'$(echo -ne "\xcc\x82")'/g' \ |
| 24 | + |
| 25 | +// [0xE2, 0x80, 0x9C] in UTF-8 is U+201C, left quote. |
| 26 | +// It becomes single character unknown token. |
| 27 | +// If this left quote and right quote enclosure text, |
| 28 | +// they become one long unknown token. |
| 29 | +// Marker(3) is replaced to this sequence. |
| 30 | + |
| 31 | +// RUN: -e 's/'$(echo -ne "\x5a3")'/'$(echo -ne "\xe2\x80\x9c")'/g' \ |
| 32 | + |
| 33 | +// [0xE2, 0x80, 0x9D] in UTF-8 is U+201D, right quote. |
| 34 | +// It becomes single character unknown token. |
| 35 | +// Marker(4) is replaced to this sequence. |
| 36 | + |
| 37 | +// RUN: -e 's/'$(echo -ne "\x5a4")'/'$(echo -ne "\xe2\x80\x9d")'/g' \ |
| 38 | + |
| 39 | +// [0xE1, 0x9A, 0x80] in UTF-8 is U+1680. |
| 40 | +// This character is invalid for swift source. |
| 41 | +// It becomes garbage trivia. |
| 42 | +// Marker(5) is replaced to this sequence. |
| 43 | + |
| 44 | +// RUN: -e 's/'$(echo -ne "\x5a5")'/'$(echo -ne "\xe1\x9a\x80")'/g' \ |
| 45 | + |
| 46 | +// RUN: > %t |
| 47 | + |
| 48 | +// RUN: %swift-syntax-test -input-source-filename %t -dump-full-tokens 2>&1 | %FileCheck %t |
| 49 | +// RUN: %round-trip-syntax-test --swift-syntax-test %swift-syntax-test --file %t |
| 50 | + |
| 51 | +aaa |
| 52 | +Z1 bbb Z1 |
| 53 | + |
| 54 | +ccc Z2 |
| 55 | + |
| 56 | +ddd Z2Z2Z2Z2 |
| 57 | + |
| 58 | +eee Z3Z3 |
| 59 | + |
| 60 | +fff Z3hello worldZ4 |
| 61 | + |
| 62 | +ggg Z4 |
| 63 | + |
| 64 | +hhh |
| 65 | +Z5 iii Z5 |
| 66 | +jjj |
| 67 | + |
| 68 | +// Diagnostics |
| 69 | +// CHECK: 52:1: error: invalid UTF-8 found in source file |
| 70 | +// CHECK: 52:7: error: invalid UTF-8 found in source file |
| 71 | +// CHECK: 54:5: error: an identifier cannot begin with this character |
| 72 | +// CHECK: 56:5: error: an identifier cannot begin with this character |
| 73 | +// CHECK: 58:5: error: unicode curly quote found |
| 74 | +// CHECK: 58:8: error: unicode curly quote found |
| 75 | +// CHECK: 60:19: error: unicode curly quote found |
| 76 | +// CHECK: 60:5: error: unicode curly quote found |
| 77 | +// CHECK: 62:5: error: unicode curly quote found |
| 78 | +// CHECK: 65:1: error: invalid character in source file |
| 79 | +// CHECK: 65:9: error: invalid character in source file |
| 80 | + |
| 81 | +// Checks around bbb |
| 82 | +// CHECK-LABEL: 52:3 |
| 83 | +// CHECK-NEXT: (Token identifier |
| 84 | +// CHECK-NEXT: (trivia newline 1) |
| 85 | +// CHECK-NEXT: (trivia garbage_text \302) |
| 86 | +// CHECK-NEXT: (trivia space 1) |
| 87 | +// CHECK-NEXT: (text="bbb") |
| 88 | +// CHECK-NEXT: (trivia space 1) |
| 89 | +// CHECK-NEXT: (trivia garbage_text \302)) |
| 90 | + |
| 91 | +// Checks around ccc |
| 92 | +// CHECK-LABEL: 54:5 |
| 93 | +// CHECK-NEXT: (Token unknown |
| 94 | +// CHECK-NEXT: (text="\xCC\x82")) |
| 95 | + |
| 96 | +// Checks around ddd |
| 97 | +// CHECK-LABEL: 56:5 |
| 98 | +// CHECK-NEXT: (Token unknown |
| 99 | +// CHECK-NEXT: (text="\xCC\x82\xCC\x82\xCC\x82\xCC\x82")) |
| 100 | + |
| 101 | +// Checks around eee |
| 102 | +// CHECK-LABEL: 58:5 |
| 103 | +// CHECK-NEXT: (Token unknown |
| 104 | +// CHECK-NEXT: (text="\xE2\x80\x9C")) |
| 105 | +// CHECK-LABEL: 58:8 |
| 106 | +// CHECK-NEXT: (Token unknown |
| 107 | +// CHECK-NEXT: (text="\xE2\x80\x9C")) |
| 108 | + |
| 109 | +// Checks around fff |
| 110 | +// CHECK-LABEL: 60:5 |
| 111 | +// CHECK-NEXT: (Token unknown |
| 112 | +// CHECK-NEXT: (text="\xE2\x80\x9Chello world\xE2\x80\x9D")) |
| 113 | + |
| 114 | +// Checks around ggg |
| 115 | +// CHECK-LABEL: 62:5 |
| 116 | +// CHECK-NEXT: (Token unknown |
| 117 | +// CHECK-NEXT: (text="\xE2\x80\x9D")) |
| 118 | + |
| 119 | +// Checks around iii |
| 120 | +// CHECK-LABEL: 65:5 |
| 121 | +// CHECK-NEXT: (Token identifier |
| 122 | +// CHECK-NEXT: (trivia newline 1) |
| 123 | +// CHECK-NEXT: (trivia garbage_text \341\232\200) |
| 124 | +// CHECK-NEXT: (trivia space 1) |
| 125 | +// CHECK-NEXT: (text="iii") |
| 126 | +// CHECK-NEXT: (trivia space 1) |
| 127 | +// CHECK-NEXT: (trivia garbage_text \341\232\200)) |
0 commit comments