@@ -685,7 +685,7 @@ impl<'a> StringReader<'a> {
685
685
}
686
686
687
687
688
- fn scan_numeric_escape ( & mut self , n_hex_digits : uint , delim : char ) -> char {
688
+ fn scan_numeric_escape ( & mut self , n_hex_digits : uint , delim : char ) -> bool {
689
689
let mut accum_int = 0u32 ;
690
690
let start_bpos = self . last_pos ;
691
691
for _ in range ( 0 , n_hex_digits) {
@@ -709,20 +709,22 @@ impl<'a> StringReader<'a> {
709
709
}
710
710
711
711
match char:: from_u32 ( accum_int) {
712
- Some ( x ) => x ,
712
+ Some ( _ ) => true ,
713
713
None => {
714
714
let last_bpos = self . last_pos ;
715
715
self . err_span_ ( start_bpos, last_bpos, "illegal numeric character escape" ) ;
716
- '?'
716
+ false
717
717
}
718
718
}
719
719
}
720
720
721
721
/// Scan for a single (possibly escaped) byte or char
722
722
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
723
723
/// `start` is the position of `first_source_char`, which is already consumed.
724
+ ///
725
+ /// Returns true if there was a valid char/byte, false otherwise.
724
726
fn scan_char_or_byte ( & mut self , start : BytePos , first_source_char : char ,
725
- ascii_only : bool , delim : char ) -> Option < char > {
727
+ ascii_only : bool , delim : char ) -> bool {
726
728
match first_source_char {
727
729
'\\' => {
728
730
// '\X' for some X must be a character constant:
@@ -732,24 +734,18 @@ impl<'a> StringReader<'a> {
732
734
match escaped {
733
735
None => { } , // EOF here is an error that will be checked later.
734
736
Some ( e) => {
735
- return Some ( match e {
736
- 'n' => '\n' ,
737
- 'r' => '\r' ,
738
- 't' => '\t' ,
739
- '\\' => '\\' ,
740
- '\'' => '\'' ,
741
- '"' => '"' ,
742
- '0' => '\x00' ,
737
+ return match e {
738
+ 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true ,
743
739
'x' => self . scan_numeric_escape ( 2 u, delim) ,
744
740
'u' if !ascii_only => self . scan_numeric_escape ( 4 u, delim) ,
745
741
'U' if !ascii_only => self . scan_numeric_escape ( 8 u, delim) ,
746
742
'\n' if delim == '"' => {
747
743
self . consume_whitespace ( ) ;
748
- return None
744
+ true
749
745
} ,
750
746
'\r' if delim == '"' && self . curr_is ( '\n' ) => {
751
747
self . consume_whitespace ( ) ;
752
- return None
748
+ true
753
749
}
754
750
c => {
755
751
let last_pos = self . last_pos ;
@@ -758,9 +754,9 @@ impl<'a> StringReader<'a> {
758
754
if ascii_only { "unknown byte escape" }
759
755
else { "unknown character escape" } ,
760
756
c) ;
761
- c
757
+ false
762
758
}
763
- } )
759
+ }
764
760
}
765
761
}
766
762
}
@@ -771,14 +767,16 @@ impl<'a> StringReader<'a> {
771
767
if ascii_only { "byte constant must be escaped" }
772
768
else { "character constant must be escaped" } ,
773
769
first_source_char) ;
770
+ return false ;
774
771
}
775
772
'\r' => {
776
773
if self . curr_is ( '\n' ) {
777
774
self . bump ( ) ;
778
- return Some ( '\n' ) ;
775
+ return true ;
779
776
} else {
780
777
self . err_span_ ( start, self . last_pos ,
781
778
"bare CR not allowed in string, use \\ r instead" ) ;
779
+ return false ;
782
780
}
783
781
}
784
782
_ => if ascii_only && first_source_char > '\x7F' {
@@ -787,9 +785,10 @@ impl<'a> StringReader<'a> {
787
785
start, last_pos,
788
786
"byte constant must be ASCII. \
789
787
Use a \\ xHH escape for a non-ASCII byte", first_source_char) ;
788
+ return false ;
790
789
}
791
790
}
792
- Some ( first_source_char )
791
+ true
793
792
}
794
793
795
794
fn binop ( & mut self , op : token:: BinOp ) -> token:: Token {
@@ -924,7 +923,7 @@ impl<'a> StringReader<'a> {
924
923
let start = self . last_pos ;
925
924
926
925
// the eof will be picked up by the final `'` check below
927
- let mut c2 = self . curr . unwrap_or ( '\x00' ) ;
926
+ let c2 = self . curr . unwrap_or ( '\x00' ) ;
928
927
self . bump ( ) ;
929
928
930
929
// If the character is an ident start not followed by another single
@@ -967,7 +966,7 @@ impl<'a> StringReader<'a> {
967
966
}
968
967
969
968
// Otherwise it is a character constant:
970
- c2 = self . scan_char_or_byte ( start, c2, /* ascii_only = */ false , '\'' ) . unwrap ( ) ;
969
+ let valid = self . scan_char_or_byte ( start, c2, /* ascii_only = */ false , '\'' ) ;
971
970
if !self . curr_is ( '\'' ) {
972
971
let last_bpos = self . last_pos ;
973
972
self . fatal_span_verbose (
@@ -977,8 +976,9 @@ impl<'a> StringReader<'a> {
977
976
start - BytePos ( 1 ) , last_bpos,
978
977
"unterminated character constant" . to_string ( ) ) ;
979
978
}
979
+ let id = if valid { self . ident_from ( start) } else { str_to_ident ( "0" ) } ;
980
980
self . bump ( ) ; // advance curr past token
981
- return token:: LIT_CHAR ( c2 ) ;
981
+ return token:: LIT_CHAR ( id ) ;
982
982
}
983
983
'b' => {
984
984
self . bump ( ) ;
@@ -991,8 +991,8 @@ impl<'a> StringReader<'a> {
991
991
992
992
}
993
993
'"' => {
994
- let mut accum_str = String :: new ( ) ;
995
994
let start_bpos = self . last_pos ;
995
+ let mut valid = true ;
996
996
self . bump ( ) ;
997
997
while !self . curr_is ( '"' ) {
998
998
if self . is_eof ( ) {
@@ -1003,11 +1003,13 @@ impl<'a> StringReader<'a> {
1003
1003
let ch_start = self . last_pos ;
1004
1004
let ch = self . curr . unwrap ( ) ;
1005
1005
self . bump ( ) ;
1006
- self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ false , '"' )
1007
- . map ( |ch| accum_str. push_char ( ch) ) ;
1006
+ valid &= self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ false , '"' ) ;
1008
1007
}
1008
+ // adjust for the ACSII " at the start of the literal
1009
+ let id = if valid { self . ident_from ( start_bpos + BytePos ( 1 ) ) }
1010
+ else { str_to_ident ( "??" ) } ;
1009
1011
self . bump ( ) ;
1010
- return token:: LIT_STR ( str_to_ident ( accum_str . as_slice ( ) ) ) ;
1012
+ return token:: LIT_STR ( id ) ;
1011
1013
}
1012
1014
'r' => {
1013
1015
let start_bpos = self . last_pos ;
@@ -1032,7 +1034,7 @@ impl<'a> StringReader<'a> {
1032
1034
self . bump ( ) ;
1033
1035
let content_start_bpos = self . last_pos ;
1034
1036
let mut content_end_bpos;
1035
- let mut has_cr = false ;
1037
+ let mut valid = true ;
1036
1038
' outer: loop {
1037
1039
if self . is_eof ( ) {
1038
1040
let last_bpos = self . last_pos ;
@@ -1055,23 +1057,26 @@ impl<'a> StringReader<'a> {
1055
1057
}
1056
1058
}
1057
1059
break ;
1058
- }
1060
+ } ,
1059
1061
'\r' => {
1060
- has_cr = true ;
1062
+ if !self . nextch_is ( '\n' ) {
1063
+ let last_bpos = self . last_pos ;
1064
+ self . err_span_ ( start_bpos, last_bpos, "bare CR not allowed in raw \
1065
+ string, use \\ r instead") ;
1066
+ valid = false ;
1067
+ }
1061
1068
}
1062
1069
_ => ( )
1063
1070
}
1064
1071
self . bump ( ) ;
1065
1072
}
1066
1073
self . bump ( ) ;
1067
- let str_content = self . with_str_from_to ( content_start_bpos, content_end_bpos, |string| {
1068
- let string = if has_cr {
1069
- self . translate_crlf ( content_start_bpos, string,
1070
- "bare CR not allowed in raw string" )
1071
- } else { string. into_maybe_owned ( ) } ;
1072
- str_to_ident ( string. as_slice ( ) )
1073
- } ) ;
1074
- return token:: LIT_STR_RAW ( str_content, hash_count) ;
1074
+ let id = if valid {
1075
+ self . ident_from_to ( content_start_bpos, content_end_bpos)
1076
+ } else {
1077
+ str_to_ident ( "??" )
1078
+ } ;
1079
+ return token:: LIT_STR_RAW ( id, hash_count) ;
1075
1080
}
1076
1081
'-' => {
1077
1082
if self . nextch_is ( '>' ) {
@@ -1145,10 +1150,10 @@ impl<'a> StringReader<'a> {
1145
1150
let start = self . last_pos ;
1146
1151
1147
1152
// the eof will be picked up by the final `'` check below
1148
- let mut c2 = self . curr . unwrap_or ( '\x00' ) ;
1153
+ let c2 = self . curr . unwrap_or ( '\x00' ) ;
1149
1154
self . bump ( ) ;
1150
1155
1151
- c2 = self . scan_char_or_byte ( start, c2, /* ascii_only = */ true , '\'' ) . unwrap ( ) ;
1156
+ let valid = self . scan_char_or_byte ( start, c2, /* ascii_only = */ true , '\'' ) ;
1152
1157
if !self . curr_is ( '\'' ) {
1153
1158
// Byte offsetting here is okay because the
1154
1159
// character before position `start` are an
@@ -1158,14 +1163,17 @@ impl<'a> StringReader<'a> {
1158
1163
start - BytePos ( 2 ) , last_pos,
1159
1164
"unterminated byte constant" . to_string ( ) ) ;
1160
1165
}
1166
+
1167
+ let id = if valid { self . ident_from ( start) } else { str_to_ident ( "??" ) } ;
1161
1168
self . bump ( ) ; // advance curr past token
1162
- return token:: LIT_BYTE ( c2 as u8 ) ;
1169
+ return token:: LIT_BYTE ( id ) ;
1163
1170
}
1164
1171
1165
1172
fn scan_byte_string ( & mut self ) -> token:: Token {
1166
1173
self . bump ( ) ;
1167
1174
let start = self . last_pos ;
1168
- let mut value = Vec :: new ( ) ;
1175
+ let mut valid = true ;
1176
+
1169
1177
while !self . curr_is ( '"' ) {
1170
1178
if self . is_eof ( ) {
1171
1179
let last_pos = self . last_pos ;
@@ -1176,11 +1184,11 @@ impl<'a> StringReader<'a> {
1176
1184
let ch_start = self . last_pos ;
1177
1185
let ch = self . curr . unwrap ( ) ;
1178
1186
self . bump ( ) ;
1179
- self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ true , '"' )
1180
- . map ( |ch| value. push ( ch as u8 ) ) ;
1187
+ valid &= self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ true , '"' ) ;
1181
1188
}
1189
+ let id = if valid { self . ident_from ( start) } else { str_to_ident ( "??" ) } ;
1182
1190
self . bump ( ) ;
1183
- return token:: LIT_BINARY ( Rc :: new ( value ) ) ;
1191
+ return token:: LIT_BINARY ( id ) ;
1184
1192
}
1185
1193
1186
1194
fn scan_raw_byte_string ( & mut self ) -> token:: Token {
@@ -1231,10 +1239,8 @@ impl<'a> StringReader<'a> {
1231
1239
self . bump ( ) ;
1232
1240
}
1233
1241
self . bump ( ) ;
1234
- let bytes = self . with_str_from_to ( content_start_bpos,
1235
- content_end_bpos,
1236
- |s| s. as_bytes ( ) . to_owned ( ) ) ;
1237
- return token:: LIT_BINARY_RAW ( Rc :: new ( bytes) , hash_count) ;
1242
+ return token:: LIT_BINARY_RAW ( self . ident_from_to ( content_start_bpos, content_end_bpos) ,
1243
+ hash_count) ;
1238
1244
}
1239
1245
}
1240
1246
0 commit comments