@@ -969,108 +969,12 @@ impl<'a> StringReader<'a> {
969
969
'b' => {
970
970
self . bump ( ) ;
971
971
return match self . curr {
972
- Some ( '\'' ) => parse_byte ( self ) ,
973
- Some ( '"' ) => parse_byte_string ( self ) ,
974
- Some ( 'r' ) => parse_raw_byte_string ( self ) ,
972
+ Some ( '\'' ) => self . scan_byte ( ) ,
973
+ Some ( '"' ) => self . scan_byte_string ( ) ,
974
+ Some ( 'r' ) => self . scan_raw_byte_string ( ) ,
975
975
_ => unreachable ! ( ) // Should have been a token::IDENT above.
976
976
} ;
977
977
978
- fn parse_byte ( self_ : & mut StringReader ) -> token:: Token {
979
- self_. bump ( ) ;
980
- let start = self_. last_pos ;
981
-
982
- // the eof will be picked up by the final `'` check below
983
- let mut c2 = self_. curr . unwrap_or ( '\x00' ) ;
984
- self_. bump ( ) ;
985
-
986
- c2 = self_. scan_char_or_byte ( start, c2, /* ascii_only = */ true , '\'' ) . unwrap ( ) ;
987
- if !self_. curr_is ( '\'' ) {
988
- // Byte offsetting here is okay because the
989
- // character before position `start` are an
990
- // ascii single quote and ascii 'b'.
991
- let last_pos = self_. last_pos ;
992
- self_. fatal_span_verbose (
993
- start - BytePos ( 2 ) , last_pos,
994
- "unterminated byte constant" . to_string ( ) ) ;
995
- }
996
- self_. bump ( ) ; // advance curr past token
997
- return token:: LIT_BYTE ( c2 as u8 ) ;
998
- }
999
-
1000
- fn parse_byte_string ( self_ : & mut StringReader ) -> token:: Token {
1001
- self_. bump ( ) ;
1002
- let start = self_. last_pos ;
1003
- let mut value = Vec :: new ( ) ;
1004
- while !self_. curr_is ( '"' ) {
1005
- if self_. is_eof ( ) {
1006
- let last_pos = self_. last_pos ;
1007
- self_. fatal_span_ ( start, last_pos,
1008
- "unterminated double quote byte string" ) ;
1009
- }
1010
-
1011
- let ch_start = self_. last_pos ;
1012
- let ch = self_. curr . unwrap ( ) ;
1013
- self_. bump ( ) ;
1014
- self_. scan_char_or_byte ( ch_start, ch, /* ascii_only = */ true , '"' )
1015
- . map ( |ch| value. push ( ch as u8 ) ) ;
1016
- }
1017
- self_. bump ( ) ;
1018
- return token:: LIT_BINARY ( Rc :: new ( value) ) ;
1019
- }
1020
-
1021
- fn parse_raw_byte_string ( self_ : & mut StringReader ) -> token:: Token {
1022
- let start_bpos = self_. last_pos ;
1023
- self_. bump ( ) ;
1024
- let mut hash_count = 0 u;
1025
- while self_. curr_is ( '#' ) {
1026
- self_. bump ( ) ;
1027
- hash_count += 1 ;
1028
- }
1029
-
1030
- if self_. is_eof ( ) {
1031
- let last_pos = self_. last_pos ;
1032
- self_. fatal_span_ ( start_bpos, last_pos, "unterminated raw string" ) ;
1033
- } else if !self_. curr_is ( '"' ) {
1034
- let last_pos = self_. last_pos ;
1035
- let ch = self_. curr . unwrap ( ) ;
1036
- self_. fatal_span_char ( start_bpos, last_pos,
1037
- "only `#` is allowed in raw string delimitation; \
1038
- found illegal character",
1039
- ch) ;
1040
- }
1041
- self_. bump ( ) ;
1042
- let content_start_bpos = self_. last_pos ;
1043
- let mut content_end_bpos;
1044
- ' outer: loop {
1045
- match self_. curr {
1046
- None => {
1047
- let last_pos = self_. last_pos ;
1048
- self_. fatal_span_ ( start_bpos, last_pos, "unterminated raw string" )
1049
- } ,
1050
- Some ( '"' ) => {
1051
- content_end_bpos = self_. last_pos ;
1052
- for _ in range ( 0 , hash_count) {
1053
- self_. bump ( ) ;
1054
- if !self_. curr_is ( '#' ) {
1055
- continue ' outer;
1056
- }
1057
- }
1058
- break ;
1059
- } ,
1060
- Some ( c) => if c > '\x7F' {
1061
- let last_pos = self_. last_pos ;
1062
- self_. err_span_char (
1063
- last_pos, last_pos, "raw byte string must be ASCII" , c) ;
1064
- }
1065
- }
1066
- self_. bump ( ) ;
1067
- }
1068
- self_. bump ( ) ;
1069
- let bytes = self_. with_str_from_to ( content_start_bpos,
1070
- content_end_bpos,
1071
- |s| s. as_bytes ( ) . to_owned ( ) ) ;
1072
- return token:: LIT_BINARY_RAW ( Rc :: new ( bytes) , hash_count) ;
1073
- }
1074
978
}
1075
979
'"' => {
1076
980
let mut accum_str = String :: new ( ) ;
@@ -1221,6 +1125,103 @@ impl<'a> StringReader<'a> {
1221
1125
// consider shebangs comments, but not inner attributes
1222
1126
|| ( self . curr_is ( '#' ) && self . nextch_is ( '!' ) && !self . nextnextch_is ( '[' ) )
1223
1127
}
1128
+
1129
+ fn scan_byte ( & mut self ) -> token:: Token {
1130
+ self . bump ( ) ;
1131
+ let start = self . last_pos ;
1132
+
1133
+ // the eof will be picked up by the final `'` check below
1134
+ let mut c2 = self . curr . unwrap_or ( '\x00' ) ;
1135
+ self . bump ( ) ;
1136
+
1137
+ c2 = self . scan_char_or_byte ( start, c2, /* ascii_only = */ true , '\'' ) . unwrap ( ) ;
1138
+ if !self . curr_is ( '\'' ) {
1139
+ // Byte offsetting here is okay because the
1140
+ // character before position `start` are an
1141
+ // ascii single quote and ascii 'b'.
1142
+ let last_pos = self . last_pos ;
1143
+ self . fatal_span_verbose (
1144
+ start - BytePos ( 2 ) , last_pos,
1145
+ "unterminated byte constant" . to_string ( ) ) ;
1146
+ }
1147
+ self . bump ( ) ; // advance curr past token
1148
+ return token:: LIT_BYTE ( c2 as u8 ) ;
1149
+ }
1150
+
1151
+ fn scan_byte_string ( & mut self ) -> token:: Token {
1152
+ self . bump ( ) ;
1153
+ let start = self . last_pos ;
1154
+ let mut value = Vec :: new ( ) ;
1155
+ while !self . curr_is ( '"' ) {
1156
+ if self . is_eof ( ) {
1157
+ let last_pos = self . last_pos ;
1158
+ self . fatal_span_ ( start, last_pos,
1159
+ "unterminated double quote byte string" ) ;
1160
+ }
1161
+
1162
+ let ch_start = self . last_pos ;
1163
+ let ch = self . curr . unwrap ( ) ;
1164
+ self . bump ( ) ;
1165
+ self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ true , '"' )
1166
+ . map ( |ch| value. push ( ch as u8 ) ) ;
1167
+ }
1168
+ self . bump ( ) ;
1169
+ return token:: LIT_BINARY ( Rc :: new ( value) ) ;
1170
+ }
1171
+
1172
+ fn scan_raw_byte_string ( & mut self ) -> token:: Token {
1173
+ let start_bpos = self . last_pos ;
1174
+ self . bump ( ) ;
1175
+ let mut hash_count = 0 u;
1176
+ while self . curr_is ( '#' ) {
1177
+ self . bump ( ) ;
1178
+ hash_count += 1 ;
1179
+ }
1180
+
1181
+ if self . is_eof ( ) {
1182
+ let last_pos = self . last_pos ;
1183
+ self . fatal_span_ ( start_bpos, last_pos, "unterminated raw string" ) ;
1184
+ } else if !self . curr_is ( '"' ) {
1185
+ let last_pos = self . last_pos ;
1186
+ let ch = self . curr . unwrap ( ) ;
1187
+ self . fatal_span_char ( start_bpos, last_pos,
1188
+ "only `#` is allowed in raw string delimitation; \
1189
+ found illegal character",
1190
+ ch) ;
1191
+ }
1192
+ self . bump ( ) ;
1193
+ let content_start_bpos = self . last_pos ;
1194
+ let mut content_end_bpos;
1195
+ ' outer: loop {
1196
+ match self . curr {
1197
+ None => {
1198
+ let last_pos = self . last_pos ;
1199
+ self . fatal_span_ ( start_bpos, last_pos, "unterminated raw string" )
1200
+ } ,
1201
+ Some ( '"' ) => {
1202
+ content_end_bpos = self . last_pos ;
1203
+ for _ in range ( 0 , hash_count) {
1204
+ self . bump ( ) ;
1205
+ if !self . curr_is ( '#' ) {
1206
+ continue ' outer;
1207
+ }
1208
+ }
1209
+ break ;
1210
+ } ,
1211
+ Some ( c) => if c > '\x7F' {
1212
+ let last_pos = self . last_pos ;
1213
+ self . err_span_char (
1214
+ last_pos, last_pos, "raw byte string must be ASCII" , c) ;
1215
+ }
1216
+ }
1217
+ self . bump ( ) ;
1218
+ }
1219
+ self . bump ( ) ;
1220
+ let bytes = self . with_str_from_to ( content_start_bpos,
1221
+ content_end_bpos,
1222
+ |s| s. as_bytes ( ) . to_owned ( ) ) ;
1223
+ return token:: LIT_BINARY_RAW ( Rc :: new ( bytes) , hash_count) ;
1224
+ }
1224
1225
}
1225
1226
1226
1227
pub fn is_whitespace ( c : Option < char > ) -> bool {
0 commit comments