@@ -636,6 +636,67 @@ impl<'a> StringReader<'a> {
636
636
}
637
637
}
638
638
639
+ /// Scan for a single (possibly escaped) byte or char
640
+ /// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
641
+ /// `start` is the position of `first_source_char`, which is already consumed.
642
+ fn scan_char_or_byte ( & mut self , start : BytePos , first_source_char : char ,
643
+ ascii_only : bool , delim : char ) -> Option < char > {
644
+ match first_source_char {
645
+ '\\' => {
646
+ // '\X' for some X must be a character constant:
647
+ let escaped = self . curr ;
648
+ let escaped_pos = self . last_pos ;
649
+ self . bump ( ) ;
650
+ match escaped {
651
+ None => { } , // EOF here is an error that will be checked later.
652
+ Some ( e) => {
653
+ return Some ( match e {
654
+ 'n' => '\n' ,
655
+ 'r' => '\r' ,
656
+ 't' => '\t' ,
657
+ '\\' => '\\' ,
658
+ '\'' => '\'' ,
659
+ '"' => '"' ,
660
+ '0' => '\x00' ,
661
+ 'x' => self . scan_numeric_escape ( 2 u, delim) ,
662
+ 'u' if !ascii_only => self . scan_numeric_escape ( 4 u, delim) ,
663
+ 'U' if !ascii_only => self . scan_numeric_escape ( 8 u, delim) ,
664
+ '\n' if delim == '"' => {
665
+ self . consume_whitespace ( ) ;
666
+ return None
667
+ } ,
668
+ c => {
669
+ let last_pos = self . last_pos ;
670
+ self . err_span_char (
671
+ escaped_pos, last_pos,
672
+ if ascii_only { "unknown byte escape" }
673
+ else { "unknown character escape" } ,
674
+ c) ;
675
+ c
676
+ }
677
+ } )
678
+ }
679
+ }
680
+ }
681
+ '\t' | '\n' | '\r' | '\'' if delim == '\'' => {
682
+ let last_pos = self . last_pos ;
683
+ self . err_span_char (
684
+ start, last_pos,
685
+ if ascii_only { "byte constant must be escaped" }
686
+ else { "character constant must be escaped" } ,
687
+ first_source_char) ;
688
+ }
689
+ _ => if ascii_only && first_source_char > '\x7F' {
690
+ let last_pos = self . last_pos ;
691
+ self . err_span_char (
692
+ start, last_pos,
693
+ "byte constant must be ASCII. \
694
+ Use a \\ xHH escape for a non-ASCII byte", first_source_char) ;
695
+ }
696
+ }
697
+ Some ( first_source_char)
698
+ }
699
+
639
700
fn binop ( & mut self , op : token:: BinOp ) -> token:: Token {
640
701
self . bump ( ) ;
641
702
if self . curr_is ( '=' ) {
@@ -810,43 +871,7 @@ impl<'a> StringReader<'a> {
810
871
}
811
872
812
873
// Otherwise it is a character constant:
813
- match c2 {
814
- '\\' => {
815
- // '\X' for some X must be a character constant:
816
- let escaped = self . curr ;
817
- let escaped_pos = self . last_pos ;
818
- self . bump ( ) ;
819
- match escaped {
820
- None => { }
821
- Some ( e) => {
822
- c2 = match e {
823
- 'n' => '\n' ,
824
- 'r' => '\r' ,
825
- 't' => '\t' ,
826
- '\\' => '\\' ,
827
- '\'' => '\'' ,
828
- '"' => '"' ,
829
- '0' => '\x00' ,
830
- 'x' => self . scan_numeric_escape ( 2 u, '\'' ) ,
831
- 'u' => self . scan_numeric_escape ( 4 u, '\'' ) ,
832
- 'U' => self . scan_numeric_escape ( 8 u, '\'' ) ,
833
- c2 => {
834
- let last_bpos = self . last_pos ;
835
- self . err_span_char ( escaped_pos, last_bpos,
836
- "unknown character escape" , c2) ;
837
- c2
838
- }
839
- }
840
- }
841
- }
842
- }
843
- '\t' | '\n' | '\r' | '\'' => {
844
- let last_bpos = self . last_pos ;
845
- self . err_span_char ( start, last_bpos,
846
- "character constant must be escaped" , c2) ;
847
- }
848
- _ => { }
849
- }
874
+ c2 = self . scan_char_or_byte ( start, c2, /* ascii_only = */ false , '\'' ) . unwrap ( ) ;
850
875
if !self . curr_is ( '\'' ) {
851
876
let last_bpos = self . last_pos ;
852
877
self . fatal_span_verbose (
@@ -876,44 +901,7 @@ impl<'a> StringReader<'a> {
876
901
let mut c2 = self_. curr . unwrap_or ( '\x00' ) ;
877
902
self_. bump ( ) ;
878
903
879
- match c2 {
880
- '\\' => {
881
- // '\X' for some X must be a character constant:
882
- let escaped = self_. curr ;
883
- let escaped_pos = self_. last_pos ;
884
- self_. bump ( ) ;
885
- match escaped {
886
- None => { }
887
- Some ( e) => {
888
- c2 = match e {
889
- 'n' => '\n' ,
890
- 'r' => '\r' ,
891
- 't' => '\t' ,
892
- '\\' => '\\' ,
893
- '\'' => '\'' ,
894
- '"' => '"' ,
895
- '0' => '\x00' ,
896
- 'x' => self_. scan_numeric_escape ( 2 u, '\'' ) ,
897
- c2 => {
898
- self_. err_span_char (
899
- escaped_pos, self_. last_pos ,
900
- "unknown byte escape" , c2) ;
901
- c2
902
- }
903
- }
904
- }
905
- }
906
- }
907
- '\t' | '\n' | '\r' | '\'' => {
908
- self_. err_span_char ( start, self_. last_pos ,
909
- "byte constant must be escaped" , c2) ;
910
- }
911
- _ => if c2 > '\x7F' {
912
- self_. err_span_char ( start, self_. last_pos ,
913
- "byte constant must be ASCII. \
914
- Use a \\ xHH escape for a non-ASCII byte", c2) ;
915
- }
916
- }
904
+ c2 = self_. scan_char_or_byte ( start, c2, /* ascii_only = */ true , '\'' ) . unwrap ( ) ;
917
905
if !self_. curr_is ( '\'' ) {
918
906
// Byte offsetting here is okay because the
919
907
// character before position `start` are an
@@ -936,46 +924,11 @@ impl<'a> StringReader<'a> {
936
924
"unterminated double quote byte string" ) ;
937
925
}
938
926
927
+ let ch_start = self_. last_pos ;
939
928
let ch = self_. curr . unwrap ( ) ;
940
929
self_. bump ( ) ;
941
- match ch {
942
- '\\' => {
943
- if self_. is_eof ( ) {
944
- self_. fatal_span ( start, self_. last_pos ,
945
- "unterminated double quote byte string" ) ;
946
- }
947
-
948
- let escaped = self_. curr . unwrap ( ) ;
949
- let escaped_pos = self_. last_pos ;
950
- self_. bump ( ) ;
951
- match escaped {
952
- 'n' => value. push ( '\n' as u8 ) ,
953
- 'r' => value. push ( '\r' as u8 ) ,
954
- 't' => value. push ( '\t' as u8 ) ,
955
- '\\' => value. push ( '\\' as u8 ) ,
956
- '\'' => value. push ( '\'' as u8 ) ,
957
- '"' => value. push ( '"' as u8 ) ,
958
- '\n' => self_. consume_whitespace ( ) ,
959
- '0' => value. push ( 0 ) ,
960
- 'x' => {
961
- value. push ( self_. scan_numeric_escape ( 2 u, '"' ) as u8 ) ;
962
- }
963
- c2 => {
964
- self_. err_span_char ( escaped_pos, self_. last_pos ,
965
- "unknown byte string escape" , c2) ;
966
- }
967
- }
968
- }
969
- _ => {
970
- if ch <= '\x7F' {
971
- value. push ( ch as u8 )
972
- } else {
973
- self_. err_span_char ( self_. last_pos , self_. last_pos ,
974
- "byte string must be ASCII. \
975
- Use a \\ xHH escape for a non-ASCII byte", ch) ;
976
- }
977
- }
978
- }
930
+ self_. scan_char_or_byte ( ch_start, ch, /* ascii_only = */ true , '"' )
931
+ . map ( |ch| value. push ( ch as u8 ) ) ;
979
932
}
980
933
self_. bump ( ) ;
981
934
return token:: LIT_BINARY ( Rc :: new ( value) ) ;
@@ -1039,46 +992,11 @@ impl<'a> StringReader<'a> {
1039
992
self . fatal_span ( start_bpos, last_bpos, "unterminated double quote string" ) ;
1040
993
}
1041
994
995
+ let ch_start = self . last_pos ;
1042
996
let ch = self . curr . unwrap ( ) ;
1043
997
self . bump ( ) ;
1044
- match ch {
1045
- '\\' => {
1046
- if self . is_eof ( ) {
1047
- let last_bpos = self . last_pos ;
1048
- self . fatal_span ( start_bpos, last_bpos,
1049
- "unterminated double quote string" ) ;
1050
- }
1051
-
1052
- let escaped = self . curr . unwrap ( ) ;
1053
- let escaped_pos = self . last_pos ;
1054
- self . bump ( ) ;
1055
- match escaped {
1056
- 'n' => accum_str. push_char ( '\n' ) ,
1057
- 'r' => accum_str. push_char ( '\r' ) ,
1058
- 't' => accum_str. push_char ( '\t' ) ,
1059
- '\\' => accum_str. push_char ( '\\' ) ,
1060
- '\'' => accum_str. push_char ( '\'' ) ,
1061
- '"' => accum_str. push_char ( '"' ) ,
1062
- '\n' => self . consume_whitespace ( ) ,
1063
- '0' => accum_str. push_char ( '\x00' ) ,
1064
- 'x' => {
1065
- accum_str. push_char ( self . scan_numeric_escape ( 2 u, '"' ) ) ;
1066
- }
1067
- 'u' => {
1068
- accum_str. push_char ( self . scan_numeric_escape ( 4 u, '"' ) ) ;
1069
- }
1070
- 'U' => {
1071
- accum_str. push_char ( self . scan_numeric_escape ( 8 u, '"' ) ) ;
1072
- }
1073
- c2 => {
1074
- let last_bpos = self . last_pos ;
1075
- self . err_span_char ( escaped_pos, last_bpos,
1076
- "unknown string escape" , c2) ;
1077
- }
1078
- }
1079
- }
1080
- _ => accum_str. push_char ( ch)
1081
- }
998
+ self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ false , '"' )
999
+ . map ( |ch| accum_str. push_char ( ch) ) ;
1082
1000
}
1083
1001
self . bump ( ) ;
1084
1002
return token:: LIT_STR ( str_to_ident ( accum_str. as_slice ( ) ) ) ;
0 commit comments