@@ -44,6 +44,12 @@ pub struct UnmatchedBrace {
44
44
pub candidate_span : Option < Span > ,
45
45
}
46
46
47
+ #[ derive( Clone , Copy , Debug ) ]
48
+ enum RawStringType {
49
+ Unicode ,
50
+ Byte ,
51
+ }
52
+
47
53
pub struct StringReader < ' a > {
48
54
crate sess : & ' a ParseSess ,
49
55
/// The absolute offset within the source_map of the next character to read
@@ -1122,11 +1128,10 @@ impl<'a> StringReader<'a> {
1122
1128
self . validate_byte_str_escape ( start_with_quote) ;
1123
1129
( token:: ByteStr , symbol)
1124
1130
} ,
1125
- Some ( 'r' ) => self . scan_raw_byte_string ( ) ,
1131
+ Some ( 'r' ) => self . scan_raw_string ( RawStringType :: Byte ) ,
1126
1132
_ => unreachable ! ( ) , // Should have been a token::Ident above.
1127
1133
} ;
1128
1134
let suffix = self . scan_optional_raw_name ( ) ;
1129
-
1130
1135
Ok ( Token :: lit ( kind, symbol, suffix) )
1131
1136
}
1132
1137
'"' => {
@@ -1138,100 +1143,9 @@ impl<'a> StringReader<'a> {
1138
1143
Ok ( Token :: lit ( token:: Str , symbol, suffix) )
1139
1144
}
1140
1145
'r' => {
1141
- let start_bpos = self . pos ;
1142
- self . bump ( ) ;
1143
- let mut hash_count: u16 = 0 ;
1144
- while self . ch_is ( '#' ) {
1145
- if hash_count == 65535 {
1146
- let bpos = self . next_pos ;
1147
- self . fatal_span_ ( start_bpos,
1148
- bpos,
1149
- "too many `#` symbols: raw strings may be \
1150
- delimited by up to 65535 `#` symbols") . raise ( ) ;
1151
- }
1152
- self . bump ( ) ;
1153
- hash_count += 1 ;
1154
- }
1155
-
1156
- if self . is_eof ( ) {
1157
- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1158
- } else if !self . ch_is ( '"' ) {
1159
- let last_bpos = self . pos ;
1160
- let curr_char = self . ch . unwrap ( ) ;
1161
- self . fatal_span_char ( start_bpos,
1162
- last_bpos,
1163
- "found invalid character; only `#` is allowed \
1164
- in raw string delimitation",
1165
- curr_char) . raise ( ) ;
1166
- }
1167
- self . bump ( ) ;
1168
- let content_start_bpos = self . pos ;
1169
- let mut content_end_bpos;
1170
- let mut valid = true ;
1171
- ' outer: loop {
1172
- if self . is_eof ( ) {
1173
- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1174
- }
1175
- let c = self . ch . unwrap ( ) ;
1176
- match c {
1177
- '"' => {
1178
- content_end_bpos = self . pos ;
1179
- for _ in 0 ..hash_count {
1180
- self . bump ( ) ;
1181
- if !self . ch_is ( '#' ) {
1182
- continue ' outer;
1183
- }
1184
- }
1185
- break ;
1186
- }
1187
- '\r' => {
1188
- if !self . nextch_is ( '\n' ) {
1189
- let last_bpos = self . pos ;
1190
- self . err_span_ ( start_bpos,
1191
- last_bpos,
1192
- "bare CR not allowed in raw string, use \\ r \
1193
- instead") ;
1194
- valid = false ;
1195
- }
1196
- }
1197
- _ => ( ) ,
1198
- }
1199
- self . bump ( ) ;
1200
- }
1201
-
1202
- self . bump ( ) ;
1203
- if self . ch_is ( '#' ) {
1204
- let lo = self . pos ;
1205
- while self . ch_is ( '#' ) {
1206
- self . bump ( ) ;
1207
- }
1208
-
1209
- let sp = self . mk_sp ( start_bpos, self . pos ) ;
1210
- let sp_beg = self . mk_sp ( BytePos ( start_bpos. 0 + 1 ) , BytePos ( start_bpos. 0 + 1 + hash_count as u32 ) ) ;
1211
- let sp_end = self . mk_sp ( BytePos ( lo. 0 - hash_count as u32 ) , self . pos ) ;
1212
-
1213
- let mut err = self . sess . span_diagnostic . struct_span_err ( sp, "too many `#` when terminating raw string" ) ;
1214
- err. span_label ( sp_beg, format ! ( "The raw string has {} leading `#`..." , hash_count) ) ;
1215
- err. span_label ( sp_end, format ! ( "...but is closed with {}." , self . pos. 0 - lo. 0 + hash_count as u32 ) ) ;
1216
- err. span_suggestion_hidden (
1217
- self . mk_sp ( lo, self . pos ) ,
1218
- "remove the unneeded `#`" ,
1219
- String :: new ( ) ,
1220
- Applicability :: MachineApplicable ,
1221
- ) ;
1222
-
1223
- err. emit ( ) ;
1224
- valid = false ;
1225
- }
1226
-
1227
- let symbol = if valid {
1228
- self . name_from_to ( content_start_bpos, content_end_bpos)
1229
- } else {
1230
- Symbol :: intern ( "??" )
1231
- } ;
1146
+ let ( lit, symbol) = self . scan_raw_string ( RawStringType :: Unicode ) ;
1232
1147
let suffix = self . scan_optional_raw_name ( ) ;
1233
-
1234
- Ok ( Token :: lit ( token:: StrRaw ( hash_count) , symbol, suffix) )
1148
+ Ok ( Token :: lit ( lit, symbol, suffix) )
1235
1149
}
1236
1150
'-' => {
1237
1151
if self . nextch_is ( '>' ) {
@@ -1385,42 +1299,44 @@ impl<'a> StringReader<'a> {
1385
1299
id
1386
1300
}
1387
1301
1388
- fn scan_raw_byte_string ( & mut self ) -> ( token:: LitKind , Symbol ) {
1302
+ fn scan_raw_string ( & mut self , raw_type : RawStringType ) -> ( token:: LitKind , Symbol ) {
1389
1303
let start_bpos = self . pos ;
1390
1304
self . bump ( ) ;
1391
- let mut hash_count = 0 ;
1305
+ let mut hash_count: u16 = 0 ;
1392
1306
while self . ch_is ( '#' ) {
1393
1307
if hash_count == 65535 {
1394
1308
let bpos = self . next_pos ;
1395
1309
self . fatal_span_ ( start_bpos,
1396
1310
bpos,
1397
- "too many `#` symbols: raw byte strings may be \
1311
+ "too many `#` symbols: raw strings may be \
1398
1312
delimited by up to 65535 `#` symbols") . raise ( ) ;
1399
1313
}
1400
1314
self . bump ( ) ;
1401
1315
hash_count += 1 ;
1402
1316
}
1403
1317
1404
- if self . is_eof ( ) {
1405
- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1406
- } else if !self . ch_is ( '"' ) {
1407
- let pos = self . pos ;
1408
- let ch = self . ch . unwrap ( ) ;
1409
- self . fatal_span_char ( start_bpos,
1410
- pos,
1411
- "found invalid character; only `#` is allowed in raw \
1412
- string delimitation",
1413
- ch) . raise ( ) ;
1318
+ match self . ch {
1319
+ None => self . fail_unterminated_raw_string ( start_bpos, hash_count, vec ! [ ] ) ,
1320
+ Some ( '"' ) => { } ,
1321
+ Some ( c) => {
1322
+ let last_bpos = self . pos ;
1323
+ self . fatal_span_char ( start_bpos,
1324
+ last_bpos,
1325
+ "found invalid character; only `#` is allowed \
1326
+ in raw string delimitation",
1327
+ c) . raise ( ) ;
1328
+ }
1414
1329
}
1330
+
1415
1331
self . bump ( ) ;
1416
1332
let content_start_bpos = self . pos ;
1417
1333
let mut content_end_bpos;
1334
+ let mut valid = true ;
1335
+
1418
1336
' outer: loop {
1419
- match self . ch {
1420
- None => {
1421
- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1422
- }
1423
- Some ( '"' ) => {
1337
+ match ( self . ch , raw_type) {
1338
+ ( None , _) => self . fail_unterminated_raw_string ( start_bpos, hash_count) ,
1339
+ ( Some ( '"' ) , _) => {
1424
1340
content_end_bpos = self . pos ;
1425
1341
for _ in 0 ..hash_count {
1426
1342
self . bump ( ) ;
@@ -1430,19 +1346,66 @@ impl<'a> StringReader<'a> {
1430
1346
}
1431
1347
break ;
1432
1348
}
1433
- Some ( c) => {
1349
+ ( Some ( '\r' ) , RawStringType :: Unicode ) => {
1350
+ if !self . nextch_is ( '\n' ) {
1351
+ let last_bpos = self . pos ;
1352
+ self . err_span_ ( start_bpos,
1353
+ last_bpos,
1354
+ "bare CR not allowed in raw string, use \\ r \
1355
+ instead") ;
1356
+ valid = false ;
1357
+ }
1358
+ }
1359
+ ( Some ( c) , RawStringType :: Byte ) => {
1434
1360
if c > '\x7F' {
1435
1361
let pos = self . pos ;
1436
1362
self . err_span_char ( pos, pos, "raw byte string must be ASCII" , c) ;
1437
1363
}
1438
1364
}
1365
+ _ => ( ) ,
1439
1366
}
1440
1367
self . bump ( ) ;
1441
1368
}
1442
1369
1443
1370
self . bump ( ) ;
1371
+ if self . ch_is ( '#' ) {
1372
+ let lo = self . pos ;
1373
+ while self . ch_is ( '#' ) {
1374
+ self . bump ( ) ;
1375
+ }
1376
+
1377
+ let sp = self . mk_sp ( start_bpos, self . pos ) ;
1378
+ let sp_beg = self . mk_sp ( BytePos ( start_bpos. 0 + 1 ) ,
1379
+ BytePos ( start_bpos. 0 + 1 + hash_count as u32 ) ) ;
1380
+ let sp_end = self . mk_sp ( BytePos ( lo. 0 - hash_count as u32 ) , self . pos ) ;
1381
+
1382
+ let mut err = self . sess
1383
+ . span_diagnostic . struct_span_err ( sp, "too many `#` when terminating raw string" ) ;
1384
+ err. span_label ( sp_beg, format ! ( "The raw string has {} leading `#`..." , hash_count) ) ;
1385
+ err. span_label ( sp_end,
1386
+ format ! ( "...but is closed with {}." ,
1387
+ self . pos. 0 - lo. 0 + hash_count as u32 ) ) ;
1388
+ err. span_suggestion_hidden (
1389
+ self . mk_sp ( lo, self . pos ) ,
1390
+ "remove the unneeded `#`" ,
1391
+ String :: new ( ) ,
1392
+ Applicability :: MachineApplicable ,
1393
+ ) ;
1444
1394
1445
- ( token:: ByteStrRaw ( hash_count) , self . name_from_to ( content_start_bpos, content_end_bpos) )
1395
+ err. emit ( ) ;
1396
+ valid = false ;
1397
+ }
1398
+
1399
+ let symbol = if valid {
1400
+ self . name_from_to ( content_start_bpos, content_end_bpos)
1401
+ } else {
1402
+ Symbol :: intern ( "??" )
1403
+ } ;
1404
+
1405
+ match raw_type {
1406
+ RawStringType :: Unicode => ( token:: StrRaw ( hash_count) , symbol) ,
1407
+ RawStringType :: Byte => ( token:: ByteStrRaw ( hash_count) , symbol) ,
1408
+ }
1446
1409
}
1447
1410
1448
1411
fn validate_char_escape ( & self , start_with_quote : BytePos ) {
0 commit comments