Skip to content

Commit b6f2931

Browse files
committed
merge raw byte stirng and raw string parsing
1 parent f7f196c commit b6f2931

File tree

1 file changed

+78
-115
lines changed
  • src/libsyntax/parse/lexer

1 file changed

+78
-115
lines changed

src/libsyntax/parse/lexer/mod.rs

Lines changed: 78 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ pub struct UnmatchedBrace {
4444
pub candidate_span: Option<Span>,
4545
}
4646

47+
#[derive(Clone, Copy, Debug)]
48+
enum RawStringType {
49+
Unicode,
50+
Byte,
51+
}
52+
4753
pub struct StringReader<'a> {
4854
crate sess: &'a ParseSess,
4955
/// The absolute offset within the source_map of the next character to read
@@ -1122,11 +1128,10 @@ impl<'a> StringReader<'a> {
11221128
self.validate_byte_str_escape(start_with_quote);
11231129
(token::ByteStr, symbol)
11241130
},
1125-
Some('r') => self.scan_raw_byte_string(),
1131+
Some('r') => self.scan_raw_string(RawStringType::Byte),
11261132
_ => unreachable!(), // Should have been a token::Ident above.
11271133
};
11281134
let suffix = self.scan_optional_raw_name();
1129-
11301135
Ok(Token::lit(kind, symbol, suffix))
11311136
}
11321137
'"' => {
@@ -1138,100 +1143,9 @@ impl<'a> StringReader<'a> {
11381143
Ok(Token::lit(token::Str, symbol, suffix))
11391144
}
11401145
'r' => {
1141-
let start_bpos = self.pos;
1142-
self.bump();
1143-
let mut hash_count: u16 = 0;
1144-
while self.ch_is('#') {
1145-
if hash_count == 65535 {
1146-
let bpos = self.next_pos;
1147-
self.fatal_span_(start_bpos,
1148-
bpos,
1149-
"too many `#` symbols: raw strings may be \
1150-
delimited by up to 65535 `#` symbols").raise();
1151-
}
1152-
self.bump();
1153-
hash_count += 1;
1154-
}
1155-
1156-
if self.is_eof() {
1157-
self.fail_unterminated_raw_string(start_bpos, hash_count);
1158-
} else if !self.ch_is('"') {
1159-
let last_bpos = self.pos;
1160-
let curr_char = self.ch.unwrap();
1161-
self.fatal_span_char(start_bpos,
1162-
last_bpos,
1163-
"found invalid character; only `#` is allowed \
1164-
in raw string delimitation",
1165-
curr_char).raise();
1166-
}
1167-
self.bump();
1168-
let content_start_bpos = self.pos;
1169-
let mut content_end_bpos;
1170-
let mut valid = true;
1171-
'outer: loop {
1172-
if self.is_eof() {
1173-
self.fail_unterminated_raw_string(start_bpos, hash_count);
1174-
}
1175-
let c = self.ch.unwrap();
1176-
match c {
1177-
'"' => {
1178-
content_end_bpos = self.pos;
1179-
for _ in 0..hash_count {
1180-
self.bump();
1181-
if !self.ch_is('#') {
1182-
continue 'outer;
1183-
}
1184-
}
1185-
break;
1186-
}
1187-
'\r' => {
1188-
if !self.nextch_is('\n') {
1189-
let last_bpos = self.pos;
1190-
self.err_span_(start_bpos,
1191-
last_bpos,
1192-
"bare CR not allowed in raw string, use \\r \
1193-
instead");
1194-
valid = false;
1195-
}
1196-
}
1197-
_ => (),
1198-
}
1199-
self.bump();
1200-
}
1201-
1202-
self.bump();
1203-
if self.ch_is('#') {
1204-
let lo = self.pos;
1205-
while self.ch_is('#') {
1206-
self.bump();
1207-
}
1208-
1209-
let sp = self.mk_sp(start_bpos, self.pos);
1210-
let sp_beg = self.mk_sp(BytePos(start_bpos.0 + 1), BytePos(start_bpos.0 + 1 + hash_count as u32));
1211-
let sp_end = self.mk_sp(BytePos(lo.0 - hash_count as u32), self.pos);
1212-
1213-
let mut err = self.sess.span_diagnostic.struct_span_err(sp, "too many `#` when terminating raw string");
1214-
err.span_label(sp_beg, format!("The raw string has {} leading `#`...", hash_count));
1215-
err.span_label(sp_end, format!("...but is closed with {}.", self.pos.0 - lo.0 + hash_count as u32));
1216-
err.span_suggestion_hidden(
1217-
self.mk_sp(lo, self.pos),
1218-
"remove the unneeded `#`",
1219-
String::new(),
1220-
Applicability::MachineApplicable,
1221-
);
1222-
1223-
err.emit();
1224-
valid = false;
1225-
}
1226-
1227-
let symbol = if valid {
1228-
self.name_from_to(content_start_bpos, content_end_bpos)
1229-
} else {
1230-
Symbol::intern("??")
1231-
};
1146+
let (lit, symbol) = self.scan_raw_string(RawStringType::Unicode);
12321147
let suffix = self.scan_optional_raw_name();
1233-
1234-
Ok(Token::lit(token::StrRaw(hash_count), symbol, suffix))
1148+
Ok(Token::lit(lit, symbol, suffix))
12351149
}
12361150
'-' => {
12371151
if self.nextch_is('>') {
@@ -1385,42 +1299,44 @@ impl<'a> StringReader<'a> {
13851299
id
13861300
}
13871301

1388-
fn scan_raw_byte_string(&mut self) -> (token::LitKind, Symbol) {
1302+
fn scan_raw_string(&mut self, raw_type: RawStringType) -> (token::LitKind, Symbol) {
13891303
let start_bpos = self.pos;
13901304
self.bump();
1391-
let mut hash_count = 0;
1305+
let mut hash_count: u16 = 0;
13921306
while self.ch_is('#') {
13931307
if hash_count == 65535 {
13941308
let bpos = self.next_pos;
13951309
self.fatal_span_(start_bpos,
13961310
bpos,
1397-
"too many `#` symbols: raw byte strings may be \
1311+
"too many `#` symbols: raw strings may be \
13981312
delimited by up to 65535 `#` symbols").raise();
13991313
}
14001314
self.bump();
14011315
hash_count += 1;
14021316
}
14031317

1404-
if self.is_eof() {
1405-
self.fail_unterminated_raw_string(start_bpos, hash_count);
1406-
} else if !self.ch_is('"') {
1407-
let pos = self.pos;
1408-
let ch = self.ch.unwrap();
1409-
self.fatal_span_char(start_bpos,
1410-
pos,
1411-
"found invalid character; only `#` is allowed in raw \
1412-
string delimitation",
1413-
ch).raise();
1318+
match self.ch {
1319+
None => self.fail_unterminated_raw_string(start_bpos, hash_count, vec![]),
1320+
Some('"') => {},
1321+
Some(c) => {
1322+
let last_bpos = self.pos;
1323+
self.fatal_span_char(start_bpos,
1324+
last_bpos,
1325+
"found invalid character; only `#` is allowed \
1326+
in raw string delimitation",
1327+
c).raise();
1328+
}
14141329
}
1330+
14151331
self.bump();
14161332
let content_start_bpos = self.pos;
14171333
let mut content_end_bpos;
1334+
let mut valid = true;
1335+
14181336
'outer: loop {
1419-
match self.ch {
1420-
None => {
1421-
self.fail_unterminated_raw_string(start_bpos, hash_count);
1422-
}
1423-
Some('"') => {
1337+
match (self.ch, raw_type) {
1338+
(None, _) => self.fail_unterminated_raw_string(start_bpos, hash_count),
1339+
(Some('"'), _) => {
14241340
content_end_bpos = self.pos;
14251341
for _ in 0..hash_count {
14261342
self.bump();
@@ -1430,19 +1346,66 @@ impl<'a> StringReader<'a> {
14301346
}
14311347
break;
14321348
}
1433-
Some(c) => {
1349+
(Some('\r'), RawStringType::Unicode) => {
1350+
if !self.nextch_is('\n') {
1351+
let last_bpos = self.pos;
1352+
self.err_span_(start_bpos,
1353+
last_bpos,
1354+
"bare CR not allowed in raw string, use \\r \
1355+
instead");
1356+
valid = false;
1357+
}
1358+
}
1359+
(Some(c), RawStringType::Byte) => {
14341360
if c > '\x7F' {
14351361
let pos = self.pos;
14361362
self.err_span_char(pos, pos, "raw byte string must be ASCII", c);
14371363
}
14381364
}
1365+
_ => (),
14391366
}
14401367
self.bump();
14411368
}
14421369

14431370
self.bump();
1371+
if self.ch_is('#') {
1372+
let lo = self.pos;
1373+
while self.ch_is('#') {
1374+
self.bump();
1375+
}
1376+
1377+
let sp = self.mk_sp(start_bpos, self.pos);
1378+
let sp_beg = self.mk_sp(BytePos(start_bpos.0 + 1),
1379+
BytePos(start_bpos.0 + 1 + hash_count as u32));
1380+
let sp_end = self.mk_sp(BytePos(lo.0 - hash_count as u32), self.pos);
1381+
1382+
let mut err = self.sess
1383+
.span_diagnostic.struct_span_err(sp, "too many `#` when terminating raw string");
1384+
err.span_label(sp_beg, format!("The raw string has {} leading `#`...", hash_count));
1385+
err.span_label(sp_end,
1386+
format!("...but is closed with {}.",
1387+
self.pos.0 - lo.0 + hash_count as u32));
1388+
err.span_suggestion_hidden(
1389+
self.mk_sp(lo, self.pos),
1390+
"remove the unneeded `#`",
1391+
String::new(),
1392+
Applicability::MachineApplicable,
1393+
);
14441394

1445-
(token::ByteStrRaw(hash_count), self.name_from_to(content_start_bpos, content_end_bpos))
1395+
err.emit();
1396+
valid = false;
1397+
}
1398+
1399+
let symbol = if valid {
1400+
self.name_from_to(content_start_bpos, content_end_bpos)
1401+
} else {
1402+
Symbol::intern("??")
1403+
};
1404+
1405+
match raw_type {
1406+
RawStringType::Unicode => (token::StrRaw(hash_count), symbol),
1407+
RawStringType::Byte => (token::ByteStrRaw(hash_count), symbol),
1408+
}
14461409
}
14471410

14481411
fn validate_char_escape(&self, start_with_quote: BytePos) {

0 commit comments

Comments
 (0)