Skip to content

Commit 47fe8aa

Browse files
committed
lexer: shuffle around some functions
1 parent 5f970e6 commit 47fe8aa

File tree

1 file changed

+100
-99
lines changed
  • src/libsyntax/parse/lexer

1 file changed

+100
-99
lines changed

src/libsyntax/parse/lexer/mod.rs

Lines changed: 100 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -969,108 +969,12 @@ impl<'a> StringReader<'a> {
969969
'b' => {
970970
self.bump();
971971
return match self.curr {
972-
Some('\'') => parse_byte(self),
973-
Some('"') => parse_byte_string(self),
974-
Some('r') => parse_raw_byte_string(self),
972+
Some('\'') => self.scan_byte(),
973+
Some('"') => self.scan_byte_string(),
974+
Some('r') => self.scan_raw_byte_string(),
975975
_ => unreachable!() // Should have been a token::IDENT above.
976976
};
977977

978-
fn parse_byte(self_: &mut StringReader) -> token::Token {
979-
self_.bump();
980-
let start = self_.last_pos;
981-
982-
// the eof will be picked up by the final `'` check below
983-
let mut c2 = self_.curr.unwrap_or('\x00');
984-
self_.bump();
985-
986-
c2 = self_.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap();
987-
if !self_.curr_is('\'') {
988-
// Byte offsetting here is okay because the
989-
// character before position `start` are an
990-
// ascii single quote and ascii 'b'.
991-
let last_pos = self_.last_pos;
992-
self_.fatal_span_verbose(
993-
start - BytePos(2), last_pos,
994-
"unterminated byte constant".to_string());
995-
}
996-
self_.bump(); // advance curr past token
997-
return token::LIT_BYTE(c2 as u8);
998-
}
999-
1000-
fn parse_byte_string(self_: &mut StringReader) -> token::Token {
1001-
self_.bump();
1002-
let start = self_.last_pos;
1003-
let mut value = Vec::new();
1004-
while !self_.curr_is('"') {
1005-
if self_.is_eof() {
1006-
let last_pos = self_.last_pos;
1007-
self_.fatal_span_(start, last_pos,
1008-
"unterminated double quote byte string");
1009-
}
1010-
1011-
let ch_start = self_.last_pos;
1012-
let ch = self_.curr.unwrap();
1013-
self_.bump();
1014-
self_.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"')
1015-
.map(|ch| value.push(ch as u8));
1016-
}
1017-
self_.bump();
1018-
return token::LIT_BINARY(Rc::new(value));
1019-
}
1020-
1021-
fn parse_raw_byte_string(self_: &mut StringReader) -> token::Token {
1022-
let start_bpos = self_.last_pos;
1023-
self_.bump();
1024-
let mut hash_count = 0u;
1025-
while self_.curr_is('#') {
1026-
self_.bump();
1027-
hash_count += 1;
1028-
}
1029-
1030-
if self_.is_eof() {
1031-
let last_pos = self_.last_pos;
1032-
self_.fatal_span_(start_bpos, last_pos, "unterminated raw string");
1033-
} else if !self_.curr_is('"') {
1034-
let last_pos = self_.last_pos;
1035-
let ch = self_.curr.unwrap();
1036-
self_.fatal_span_char(start_bpos, last_pos,
1037-
"only `#` is allowed in raw string delimitation; \
1038-
found illegal character",
1039-
ch);
1040-
}
1041-
self_.bump();
1042-
let content_start_bpos = self_.last_pos;
1043-
let mut content_end_bpos;
1044-
'outer: loop {
1045-
match self_.curr {
1046-
None => {
1047-
let last_pos = self_.last_pos;
1048-
self_.fatal_span_(start_bpos, last_pos, "unterminated raw string")
1049-
},
1050-
Some('"') => {
1051-
content_end_bpos = self_.last_pos;
1052-
for _ in range(0, hash_count) {
1053-
self_.bump();
1054-
if !self_.curr_is('#') {
1055-
continue 'outer;
1056-
}
1057-
}
1058-
break;
1059-
},
1060-
Some(c) => if c > '\x7F' {
1061-
let last_pos = self_.last_pos;
1062-
self_.err_span_char(
1063-
last_pos, last_pos, "raw byte string must be ASCII", c);
1064-
}
1065-
}
1066-
self_.bump();
1067-
}
1068-
self_.bump();
1069-
let bytes = self_.with_str_from_to(content_start_bpos,
1070-
content_end_bpos,
1071-
|s| s.as_bytes().to_owned());
1072-
return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count);
1073-
}
1074978
}
1075979
'"' => {
1076980
let mut accum_str = String::new();
@@ -1221,6 +1125,103 @@ impl<'a> StringReader<'a> {
12211125
// consider shebangs comments, but not inner attributes
12221126
|| (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
12231127
}
1128+
1129+
fn scan_byte(&mut self) -> token::Token {
1130+
self.bump();
1131+
let start = self.last_pos;
1132+
1133+
// the eof will be picked up by the final `'` check below
1134+
let mut c2 = self.curr.unwrap_or('\x00');
1135+
self.bump();
1136+
1137+
c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap();
1138+
if !self.curr_is('\'') {
1139+
// Byte offsetting here is okay because the
1140+
// character before position `start` are an
1141+
// ascii single quote and ascii 'b'.
1142+
let last_pos = self.last_pos;
1143+
self.fatal_span_verbose(
1144+
start - BytePos(2), last_pos,
1145+
"unterminated byte constant".to_string());
1146+
}
1147+
self.bump(); // advance curr past token
1148+
return token::LIT_BYTE(c2 as u8);
1149+
}
1150+
1151+
fn scan_byte_string(&mut self) -> token::Token {
1152+
self.bump();
1153+
let start = self.last_pos;
1154+
let mut value = Vec::new();
1155+
while !self.curr_is('"') {
1156+
if self.is_eof() {
1157+
let last_pos = self.last_pos;
1158+
self.fatal_span_(start, last_pos,
1159+
"unterminated double quote byte string");
1160+
}
1161+
1162+
let ch_start = self.last_pos;
1163+
let ch = self.curr.unwrap();
1164+
self.bump();
1165+
self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"')
1166+
.map(|ch| value.push(ch as u8));
1167+
}
1168+
self.bump();
1169+
return token::LIT_BINARY(Rc::new(value));
1170+
}
1171+
1172+
fn scan_raw_byte_string(&mut self) -> token::Token {
1173+
let start_bpos = self.last_pos;
1174+
self.bump();
1175+
let mut hash_count = 0u;
1176+
while self.curr_is('#') {
1177+
self.bump();
1178+
hash_count += 1;
1179+
}
1180+
1181+
if self.is_eof() {
1182+
let last_pos = self.last_pos;
1183+
self.fatal_span_(start_bpos, last_pos, "unterminated raw string");
1184+
} else if !self.curr_is('"') {
1185+
let last_pos = self.last_pos;
1186+
let ch = self.curr.unwrap();
1187+
self.fatal_span_char(start_bpos, last_pos,
1188+
"only `#` is allowed in raw string delimitation; \
1189+
found illegal character",
1190+
ch);
1191+
}
1192+
self.bump();
1193+
let content_start_bpos = self.last_pos;
1194+
let mut content_end_bpos;
1195+
'outer: loop {
1196+
match self.curr {
1197+
None => {
1198+
let last_pos = self.last_pos;
1199+
self.fatal_span_(start_bpos, last_pos, "unterminated raw string")
1200+
},
1201+
Some('"') => {
1202+
content_end_bpos = self.last_pos;
1203+
for _ in range(0, hash_count) {
1204+
self.bump();
1205+
if !self.curr_is('#') {
1206+
continue 'outer;
1207+
}
1208+
}
1209+
break;
1210+
},
1211+
Some(c) => if c > '\x7F' {
1212+
let last_pos = self.last_pos;
1213+
self.err_span_char(
1214+
last_pos, last_pos, "raw byte string must be ASCII", c);
1215+
}
1216+
}
1217+
self.bump();
1218+
}
1219+
self.bump();
1220+
let bytes = self.with_str_from_to(content_start_bpos,
1221+
content_end_bpos,
1222+
|s| s.as_bytes().to_owned());
1223+
return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count);
1224+
}
12241225
}
12251226

12261227
pub fn is_whitespace(c: Option<char>) -> bool {

0 commit comments

Comments
 (0)