test: port Syntax test to Windows

compnerd · compnerd · commit 93e610920f89 · 2019-01-15T15:37:55.000-08:00
Replace the inline sed commands with sed scripts to avoid the subshells
on Windows.  Additionally, the unicode handling on Windows causes
problems and using the scripts circumvents that problem.

Implement an inline dos2unix as the BSD sed does not support `-b` and on
Windows, sed will convert the line endings.
diff --git a/test/Syntax/Inputs/invalid.sed b/test/Syntax/Inputs/invalid.sed
@@ -0,0 +1,32 @@
+
+# [0xC2] is utf8 2 byte character start byte.
+# 0xC2 without second byte is invalid UTF-8 sequence.
+# It becomes garbage text trivia.
+# Marker(1) is replaced to this sequence.
+s/Z1/�/g
+
+# [0xCC, 0x82] in UTF-8 is U+0302.
+# This character is invalid for identifier start, but valid for identifier body.
+# It becomes unknown token.
+# If this type characters are conitguous, they are concatenated to one long unknown token.
+# Marker(2) is replaced to this sequence.
+s/Z2/̂/g
+
+# [0xE2, 0x80, 0x9C] in UTF-8 is U+201C, left quote.
+# It becomes single character unknown token.
+# If this left quote and right quote enclosure text,
+# they become one long unknown token.
+# Marker(3) is replaced to this sequence.
+s/Z3/“/g
+
+# [0xE2, 0x80, 0x9D] in UTF-8 is U+201D, right quote.
+# It becomes single character unknown token.
+# Marker(4) is replaced to this sequence.
+s/Z4/”/g
+
+# [0xE1, 0x9A, 0x80] in UTF-8 is U+1680.
+# This character is invalid for swift source.
+# It becomes garbage trivia.
+# Marker(5) is replaced to this sequence.
+s/Z5/ /g
+
diff --git a/test/Syntax/Inputs/nbsp.sed b/test/Syntax/Inputs/nbsp.sed
@@ -0,0 +1 @@
+s/Z/ /g
diff --git a/test/Syntax/tokens_nonbreaking_space.swift b/test/Syntax/tokens_nonbreaking_space.swift
@@ -1,4 +1,4 @@
-// RUN: cat %s | sed -e 's/'$(echo -ne "\x5a")'/'$(echo -ne "\xc2\xa0")'/g' > %t.tmp
+// RUN: cat %s | sed -f %S/Inputs/nbsp.sed > %t.tmp
 // RUN: cp -f %t.tmp %t
 // RUN: %swift-syntax-test -input-source-filename %t -dump-full-tokens 2>&1 | %FileCheck %t
 let a =Z3Z // nbsp(Z)
diff --git a/test/Syntax/tokens_unknown_and_invalid.swift b/test/Syntax/tokens_unknown_and_invalid.swift
@@ -5,46 +5,8 @@
 // To avoid replace marker in sed command by sed itself,
 // marker is also represented in escape sequence.
 
-// RUN: cat %s | sed \
-
-// [0xC2] is utf8 2 byte character start byte.
-// 0xC2 without second byte is invalid UTF-8 sequence.
-// It becomes garbage text trivia.
-// Marker(1) is replaced to this sequence.
-
-// RUN: -e 's/'$(echo -ne "\x5a1")'/'$(echo -ne "\xc2")'/g' \
-
-// [0xCC, 0x82] in UTF-8 is U+0302.
-// This character is invalid for identifier start, but valid for identifier body.
-// It becomes unknown token.
-// If this type characters are conitguous, they are concatenated to one long unknown token.
-// Marker(2) is replaced to this sequence.
-
-// RUN: -e 's/'$(echo -ne "\x5a2")'/'$(echo -ne "\xcc\x82")'/g' \
-
-// [0xE2, 0x80, 0x9C] in UTF-8 is U+201C, left quote.
-// It becomes single character unknown token.
-// If this left quote and right quote enclosure text,
-// they become one long unknown token.
-// Marker(3) is replaced to this sequence.
-
-// RUN: -e 's/'$(echo -ne "\x5a3")'/'$(echo -ne "\xe2\x80\x9c")'/g' \
-
-// [0xE2, 0x80, 0x9D] in UTF-8 is U+201D, right quote.
-// It becomes single character unknown token.
-// Marker(4) is replaced to this sequence.
-
-// RUN: -e 's/'$(echo -ne "\x5a4")'/'$(echo -ne "\xe2\x80\x9d")'/g' \
-
-// [0xE1, 0x9A, 0x80] in UTF-8 is U+1680.
-// This character is invalid for swift source.
-// It becomes garbage trivia.
-// Marker(5) is replaced to this sequence.
-
-// RUN: -e 's/'$(echo -ne "\x5a5")'/'$(echo -ne "\xe1\x9a\x80")'/g' \
-
-// RUN: > %t
-
+// RUN: cat %s | sed -f %S/Inputs/invalid.sed > %t
+// RUN: %{python} -c "import sys; t = open(sys.argv[1], 'rb').read().replace('\r\n', '\n'); open(sys.argv[1], 'wb').write(t)" %t
 // RUN: %swift-syntax-test -input-source-filename %t -dump-full-tokens 2>&1 | %FileCheck %t
 // RUN: %round-trip-syntax-test --swift-syntax-test %swift-syntax-test --file %t
 
@@ -66,20 +28,20 @@ Z5 iii Z5
 jjj
 
 // Diagnostics
-// CHECK: 52:1: error: invalid UTF-8 found in source file
-// CHECK: 52:7: error: invalid UTF-8 found in source file
-// CHECK: 54:5: error: an identifier cannot begin with this character
-// CHECK: 56:5: error: an identifier cannot begin with this character
-// CHECK: 58:5: error: unicode curly quote found
-// CHECK: 58:8: error: unicode curly quote found
-// CHECK: 60:19: error: unicode curly quote found
-// CHECK: 60:5: error: unicode curly quote found
-// CHECK: 62:5: error: unicode curly quote found
-// CHECK: 65:1: error: invalid character in source file
-// CHECK: 65:9: error: invalid character in source file
+// CHECK: 14:1: error: invalid UTF-8 found in source file
+// CHECK: 14:7: error: invalid UTF-8 found in source file
+// CHECK: 16:5: error: an identifier cannot begin with this character
+// CHECK: 18:5: error: an identifier cannot begin with this character
+// CHECK: 20:5: error: unicode curly quote found
+// CHECK: 20:8: error: unicode curly quote found
+// CHECK: 22:19: error: unicode curly quote found
+// CHECK: 22:5: error: unicode curly quote found
+// CHECK: 24:5: error: unicode curly quote found
+// CHECK: 27:1: error: invalid character in source file
+// CHECK: 27:9: error: invalid character in source file
 
 // Checks around bbb
-// CHECK-LABEL: 52:3
+// CHECK-LABEL: 14:3
 // CHECK-NEXT:  (Token identifier
 // CHECK-NEXT:   (trivia newline 1)
 // CHECK-NEXT:   (trivia garbageText \302)
@@ -89,35 +51,35 @@ jjj
 // CHECK-NEXT:   (trivia garbageText \302))
 
 // Checks around ccc
-// CHECK-LABEL: 54:5
+// CHECK-LABEL: 16:5
 // CHECK-NEXT:  (Token unknown
 // CHECK-NEXT:   (text="\xCC\x82"))
 
 // Checks around ddd
-// CHECK-LABEL: 56:5
+// CHECK-LABEL: 18:5
 // CHECK-NEXT:  (Token unknown
 // CHECK-NEXT:   (text="\xCC\x82\xCC\x82\xCC\x82\xCC\x82"))
 
 // Checks around eee
-// CHECK-LABEL: 58:5
+// CHECK-LABEL: 20:5
 // CHECK-NEXT:  (Token unknown
 // CHECK-NEXT:   (text="\xE2\x80\x9C"))
-// CHECK-LABEL: 58:8
+// CHECK-LABEL: 20:8
 // CHECK-NEXT:  (Token unknown
 // CHECK-NEXT:   (text="\xE2\x80\x9C"))
 
 // Checks around fff
-// CHECK-LABEL: 60:5
+// CHECK-LABEL: 22:5
 // CHECK-NEXT:  (Token unknown
 // CHECK-NEXT:   (text="\xE2\x80\x9Chello world\xE2\x80\x9D"))
 
 // Checks around ggg
-// CHECK-LABEL: 62:5
+// CHECK-LABEL: 24:5
 // CHECK-NEXT:  (Token unknown
 // CHECK-NEXT:   (text="\xE2\x80\x9D"))
 
 // Checks around iii
-// CHECK-LABEL: 65:5
+// CHECK-LABEL: 27:5
 // CHECK-NEXT:  (Token identifier
 // CHECK-NEXT:   (trivia newline 1)
 // CHECK-NEXT:   (trivia garbageText \341\232\200)

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// RUN: cat %s \| sed -e 's/'$(echo -ne "\x5a")'/'$(echo -ne "\xc2\xa0")'/g' > %t.tmp`
	`1`	`+// RUN: cat %s \| sed -f %S/Inputs/nbsp.sed > %t.tmp`
`2`	`2`	`// RUN: cp -f %t.tmp %t`
`3`	`3`	`// RUN: %swift-syntax-test -input-source-filename %t -dump-full-tokens 2>&1 \| %FileCheck %t`
`4`	`4`	`let a =Z3Z // nbsp(Z)`