Skip to content

Commit 2bc9d8b

Browse files
committed
---
yaml --- r: 152614 b: refs/heads/try2 c: 612bbaf h: refs/heads/master v: v3
1 parent c6a99cd commit 2bc9d8b

File tree

2 files changed

+70
-152
lines changed

2 files changed

+70
-152
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ refs/heads/snap-stage3: 78a7676898d9f80ab540c6df5d4c9ce35bb50463
55
refs/heads/try: 519addf6277dbafccbb4159db4b710c37eaa2ec5
66
refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105
77
refs/heads/ndm: f3868061cd7988080c30d6d5bf352a5a5fe2460b
8-
refs/heads/try2: 3a52a8a8b8079de795dabdd35985f9f663aa0b5d
8+
refs/heads/try2: 612bbaf7a07fe247e5e2d057cc4f10742918ead0
99
refs/heads/dist-snap: ba4081a5a8573875fed17545846f6f6902c8ba8d
1010
refs/tags/release-0.2: c870d2dffb391e14efb05aa27898f1f6333a9596
1111
refs/tags/release-0.3: b5f0d0f648d9a6153664837026ba1be43d3e2503

branches/try2/src/libsyntax/parse/lexer/mod.rs

Lines changed: 69 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,67 @@ impl<'a> StringReader<'a> {
636636
}
637637
}
638638

639+
/// Scan for a single (possibly escaped) byte or char
640+
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
641+
/// `start` is the position of `first_source_char`, which is already consumed.
642+
fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
643+
ascii_only: bool, delim: char) -> Option<char> {
644+
match first_source_char {
645+
'\\' => {
646+
// '\X' for some X must be a character constant:
647+
let escaped = self.curr;
648+
let escaped_pos = self.last_pos;
649+
self.bump();
650+
match escaped {
651+
None => {}, // EOF here is an error that will be checked later.
652+
Some(e) => {
653+
return Some(match e {
654+
'n' => '\n',
655+
'r' => '\r',
656+
't' => '\t',
657+
'\\' => '\\',
658+
'\'' => '\'',
659+
'"' => '"',
660+
'0' => '\x00',
661+
'x' => self.scan_numeric_escape(2u, delim),
662+
'u' if !ascii_only => self.scan_numeric_escape(4u, delim),
663+
'U' if !ascii_only => self.scan_numeric_escape(8u, delim),
664+
'\n' if delim == '"' => {
665+
self.consume_whitespace();
666+
return None
667+
},
668+
c => {
669+
let last_pos = self.last_pos;
670+
self.err_span_char(
671+
escaped_pos, last_pos,
672+
if ascii_only { "unknown byte escape" }
673+
else { "unknown character escape" },
674+
c);
675+
c
676+
}
677+
})
678+
}
679+
}
680+
}
681+
'\t' | '\n' | '\r' | '\'' if delim == '\'' => {
682+
let last_pos = self.last_pos;
683+
self.err_span_char(
684+
start, last_pos,
685+
if ascii_only { "byte constant must be escaped" }
686+
else { "character constant must be escaped" },
687+
first_source_char);
688+
}
689+
_ => if ascii_only && first_source_char > '\x7F' {
690+
let last_pos = self.last_pos;
691+
self.err_span_char(
692+
start, last_pos,
693+
"byte constant must be ASCII. \
694+
Use a \\xHH escape for a non-ASCII byte", first_source_char);
695+
}
696+
}
697+
Some(first_source_char)
698+
}
699+
639700
fn binop(&mut self, op: token::BinOp) -> token::Token {
640701
self.bump();
641702
if self.curr_is('=') {
@@ -810,43 +871,7 @@ impl<'a> StringReader<'a> {
810871
}
811872

812873
// Otherwise it is a character constant:
813-
match c2 {
814-
'\\' => {
815-
// '\X' for some X must be a character constant:
816-
let escaped = self.curr;
817-
let escaped_pos = self.last_pos;
818-
self.bump();
819-
match escaped {
820-
None => {}
821-
Some(e) => {
822-
c2 = match e {
823-
'n' => '\n',
824-
'r' => '\r',
825-
't' => '\t',
826-
'\\' => '\\',
827-
'\'' => '\'',
828-
'"' => '"',
829-
'0' => '\x00',
830-
'x' => self.scan_numeric_escape(2u, '\''),
831-
'u' => self.scan_numeric_escape(4u, '\''),
832-
'U' => self.scan_numeric_escape(8u, '\''),
833-
c2 => {
834-
let last_bpos = self.last_pos;
835-
self.err_span_char(escaped_pos, last_bpos,
836-
"unknown character escape", c2);
837-
c2
838-
}
839-
}
840-
}
841-
}
842-
}
843-
'\t' | '\n' | '\r' | '\'' => {
844-
let last_bpos = self.last_pos;
845-
self.err_span_char( start, last_bpos,
846-
"character constant must be escaped", c2);
847-
}
848-
_ => {}
849-
}
874+
c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'').unwrap();
850875
if !self.curr_is('\'') {
851876
let last_bpos = self.last_pos;
852877
self.fatal_span_verbose(
@@ -876,44 +901,7 @@ impl<'a> StringReader<'a> {
876901
let mut c2 = self_.curr.unwrap_or('\x00');
877902
self_.bump();
878903

879-
match c2 {
880-
'\\' => {
881-
// '\X' for some X must be a character constant:
882-
let escaped = self_.curr;
883-
let escaped_pos = self_.last_pos;
884-
self_.bump();
885-
match escaped {
886-
None => {}
887-
Some(e) => {
888-
c2 = match e {
889-
'n' => '\n',
890-
'r' => '\r',
891-
't' => '\t',
892-
'\\' => '\\',
893-
'\'' => '\'',
894-
'"' => '"',
895-
'0' => '\x00',
896-
'x' => self_.scan_numeric_escape(2u, '\''),
897-
c2 => {
898-
self_.err_span_char(
899-
escaped_pos, self_.last_pos,
900-
"unknown byte escape", c2);
901-
c2
902-
}
903-
}
904-
}
905-
}
906-
}
907-
'\t' | '\n' | '\r' | '\'' => {
908-
self_.err_span_char( start, self_.last_pos,
909-
"byte constant must be escaped", c2);
910-
}
911-
_ => if c2 > '\x7F' {
912-
self_.err_span_char( start, self_.last_pos,
913-
"byte constant must be ASCII. \
914-
Use a \\xHH escape for a non-ASCII byte", c2);
915-
}
916-
}
904+
c2 = self_.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap();
917905
if !self_.curr_is('\'') {
918906
// Byte offsetting here is okay because the
919907
// character before position `start` are an
@@ -936,46 +924,11 @@ impl<'a> StringReader<'a> {
936924
"unterminated double quote byte string");
937925
}
938926

927+
let ch_start = self_.last_pos;
939928
let ch = self_.curr.unwrap();
940929
self_.bump();
941-
match ch {
942-
'\\' => {
943-
if self_.is_eof() {
944-
self_.fatal_span(start, self_.last_pos,
945-
"unterminated double quote byte string");
946-
}
947-
948-
let escaped = self_.curr.unwrap();
949-
let escaped_pos = self_.last_pos;
950-
self_.bump();
951-
match escaped {
952-
'n' => value.push('\n' as u8),
953-
'r' => value.push('\r' as u8),
954-
't' => value.push('\t' as u8),
955-
'\\' => value.push('\\' as u8),
956-
'\'' => value.push('\'' as u8),
957-
'"' => value.push('"' as u8),
958-
'\n' => self_.consume_whitespace(),
959-
'0' => value.push(0),
960-
'x' => {
961-
value.push(self_.scan_numeric_escape(2u, '"') as u8);
962-
}
963-
c2 => {
964-
self_.err_span_char(escaped_pos, self_.last_pos,
965-
"unknown byte string escape", c2);
966-
}
967-
}
968-
}
969-
_ => {
970-
if ch <= '\x7F' {
971-
value.push(ch as u8)
972-
} else {
973-
self_.err_span_char(self_.last_pos, self_.last_pos,
974-
"byte string must be ASCII. \
975-
Use a \\xHH escape for a non-ASCII byte", ch);
976-
}
977-
}
978-
}
930+
self_.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"')
931+
.map(|ch| value.push(ch as u8));
979932
}
980933
self_.bump();
981934
return token::LIT_BINARY(Rc::new(value));
@@ -1039,46 +992,11 @@ impl<'a> StringReader<'a> {
1039992
self.fatal_span(start_bpos, last_bpos, "unterminated double quote string");
1040993
}
1041994

995+
let ch_start = self.last_pos;
1042996
let ch = self.curr.unwrap();
1043997
self.bump();
1044-
match ch {
1045-
'\\' => {
1046-
if self.is_eof() {
1047-
let last_bpos = self.last_pos;
1048-
self.fatal_span(start_bpos, last_bpos,
1049-
"unterminated double quote string");
1050-
}
1051-
1052-
let escaped = self.curr.unwrap();
1053-
let escaped_pos = self.last_pos;
1054-
self.bump();
1055-
match escaped {
1056-
'n' => accum_str.push_char('\n'),
1057-
'r' => accum_str.push_char('\r'),
1058-
't' => accum_str.push_char('\t'),
1059-
'\\' => accum_str.push_char('\\'),
1060-
'\'' => accum_str.push_char('\''),
1061-
'"' => accum_str.push_char('"'),
1062-
'\n' => self.consume_whitespace(),
1063-
'0' => accum_str.push_char('\x00'),
1064-
'x' => {
1065-
accum_str.push_char(self.scan_numeric_escape(2u, '"'));
1066-
}
1067-
'u' => {
1068-
accum_str.push_char(self.scan_numeric_escape(4u, '"'));
1069-
}
1070-
'U' => {
1071-
accum_str.push_char(self.scan_numeric_escape(8u, '"'));
1072-
}
1073-
c2 => {
1074-
let last_bpos = self.last_pos;
1075-
self.err_span_char(escaped_pos, last_bpos,
1076-
"unknown string escape", c2);
1077-
}
1078-
}
1079-
}
1080-
_ => accum_str.push_char(ch)
1081-
}
998+
self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"')
999+
.map(|ch| accum_str.push_char(ch));
10821000
}
10831001
self.bump();
10841002
return token::LIT_STR(str_to_ident(accum_str.as_slice()));

0 commit comments

Comments
 (0)