@@ -121,11 +121,13 @@ fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) {
121
121
errors. push ( SyntaxError :: new ( MalformedUnicodeEscape , range) ) ;
122
122
}
123
123
}
124
-
125
- // FIXME: we really need tests for this
126
124
}
127
- // Code points are always valid
128
- CodePoint => ( ) ,
125
+ CodePoint => {
126
+ // These code points must always be escaped
127
+ if text == "\t " || text == "\r " {
128
+ errors. push ( SyntaxError :: new ( UnescapedCodepoint , range) ) ;
129
+ }
130
+ } ,
129
131
}
130
132
}
131
133
@@ -148,3 +150,115 @@ fn is_ascii_escape(code: char) -> bool {
148
150
_ => false ,
149
151
}
150
152
}
153
+
154
+ #[ cfg( test) ]
155
+ mod test {
156
+ use crate :: File ;
157
+
158
+ fn build_file ( literal : & str ) -> File {
159
+ let src = format ! ( "const C: char = '{}';" , literal) ;
160
+ File :: parse ( & src)
161
+ }
162
+
163
+ fn assert_valid_char ( literal : & str ) {
164
+ let file = build_file ( literal) ;
165
+ assert ! ( file. errors( ) . len( ) == 0 , "Errors for literal '{}': {:?}" , literal, file. errors( ) ) ;
166
+ }
167
+
168
+ fn assert_invalid_char ( literal : & str ) { //, expected_errors: HashSet<SyntaxErrorKind>) {
169
+ let file = build_file ( literal) ;
170
+ assert ! ( file. errors( ) . len( ) > 0 ) ;
171
+ //let found_errors = file.errors().iter().map(|e| e.kind()).collect();
172
+ }
173
+
174
+ #[ test]
175
+ fn test_ansi_codepoints ( ) {
176
+ for byte in 0 ..=255u8 {
177
+ match byte {
178
+ b'\n' | b'\r' | b'\t' => assert_invalid_char ( & ( byte as char ) . to_string ( ) ) ,
179
+ b'\'' | b'\\' => { /* Ignore character close and backslash */ }
180
+ _ => assert_valid_char ( & ( byte as char ) . to_string ( ) ) ,
181
+ }
182
+ }
183
+ }
184
+
185
+ #[ test]
186
+ fn test_unicode_codepoints ( ) {
187
+ let valid = [
188
+ "Ƒ" , "バ" , "メ" , "﷽"
189
+ ] ;
190
+ for c in & valid {
191
+ assert_valid_char ( c) ;
192
+ }
193
+ }
194
+
195
+ #[ test]
196
+ fn test_unicode_multiple_codepoints ( ) {
197
+ let invalid = [
198
+ "नी" , "👨👨"
199
+ ] ;
200
+ for c in & invalid {
201
+ assert_invalid_char ( c) ;
202
+ }
203
+ }
204
+
205
+ #[ test]
206
+ fn test_valid_ascii_escape ( ) {
207
+ let valid = [
208
+ r"\'" , "\" " , "\\ \" " , r"\n" , r"\r" , r"\t" , r"\0" , "a" , "b"
209
+ ] ;
210
+ for c in & valid {
211
+ assert_valid_char ( c) ;
212
+ }
213
+ }
214
+
215
+ #[ test]
216
+ fn test_invalid_ascii_escape ( ) {
217
+ let invalid = [
218
+ r"\a" , r"\?" , r"\"
219
+ ] ;
220
+ for c in & invalid {
221
+ assert_invalid_char ( c) ;
222
+ }
223
+ }
224
+
225
+ #[ test]
226
+ fn test_valid_ascii_code_escape ( ) {
227
+ let valid = [
228
+ r"\x00" , r"\x7F" , r"\x55"
229
+ ] ;
230
+ for c in & valid {
231
+ assert_valid_char ( c) ;
232
+ }
233
+ }
234
+
235
+ #[ test]
236
+ fn test_invalid_ascii_code_escape ( ) {
237
+ let invalid = [
238
+ r"\x" , r"\x7" , r"\xF0"
239
+ ] ;
240
+ for c in & invalid {
241
+ assert_invalid_char ( c) ;
242
+ }
243
+ }
244
+
245
+ #[ test]
246
+ fn test_valid_unicode_escape ( ) {
247
+ let valid = [
248
+ r"\u{FF}" , r"\u{0}" , r"\u{F}" , r"\u{10FFFF}" , r"\u{1_0__FF___FF_____}"
249
+ ] ;
250
+ for c in & valid {
251
+ assert_valid_char ( c) ;
252
+ }
253
+ }
254
+
255
+ #[ test]
256
+ fn test_invalid_unicode_escape ( ) {
257
+ let invalid = [
258
+ r"\u" , r"\u{}" , r"\u{" , r"\u{FF" , r"\u{FFFFFF}" , r"\u{_F}" , r"\u{00FFFFF}" , r"\u{110000}"
259
+ ] ;
260
+ for c in & invalid {
261
+ assert_invalid_char ( c) ;
262
+ }
263
+ }
264
+ }
0 commit comments