@@ -161,22 +161,20 @@ fn string_advance_token(r: @mut StringReader) {
161
161
}
162
162
}
163
163
164
- fn byte_offset ( rdr : & StringReader ) -> BytePos {
165
- ( rdr . pos - rdr. filemap . start_pos )
164
+ fn byte_offset ( rdr : & StringReader , pos : BytePos ) -> BytePos {
165
+ ( pos - rdr. filemap . start_pos )
166
166
}
167
167
168
168
pub fn get_str_from ( rdr : @mut StringReader , start : BytePos ) -> ~str {
169
- // I'm pretty skeptical about this subtraction. What if there's a
170
- // multi-byte character before the mark?
171
- return str:: slice ( * rdr. src , start. to_uint ( ) - 1 u,
172
- byte_offset ( rdr) . to_uint ( ) - 1 u) . to_owned ( ) ;
169
+ return str:: slice ( * rdr. src , start. to_uint ( ) ,
170
+ byte_offset ( rdr, rdr. last_pos ) . to_uint ( ) ) . to_owned ( ) ;
173
171
}
174
172
175
173
// EFFECT: advance the StringReader by one character. If a newline is
176
174
// discovered, add it to the FileMap's list of line start offsets.
177
175
pub fn bump ( rdr : & mut StringReader ) {
178
176
rdr. last_pos = rdr. pos ;
179
- let current_byte_offset = byte_offset ( rdr) . to_uint ( ) ; ;
177
+ let current_byte_offset = byte_offset ( rdr, rdr . pos ) . to_uint ( ) ;
180
178
if current_byte_offset < ( * rdr. src ) . len ( ) {
181
179
assert ! ( rdr. curr != -1 as char ) ;
182
180
let last_char = rdr. curr ;
@@ -202,7 +200,7 @@ pub fn is_eof(rdr: @mut StringReader) -> bool {
202
200
rdr. curr == -1 as char
203
201
}
204
202
pub fn nextch ( rdr : @mut StringReader ) -> char {
205
- let offset = byte_offset ( rdr) . to_uint ( ) ;
203
+ let offset = byte_offset ( rdr, rdr . pos ) . to_uint ( ) ;
206
204
if offset < ( * rdr. src ) . len ( ) {
207
205
return str:: char_at ( * rdr. src , offset) ;
208
206
} else { return -1 as char ; }
@@ -540,19 +538,19 @@ fn ident_continue(c: char) -> bool {
540
538
// EFFECT: advances the input past that token
541
539
// EFFECT: updates the interner
542
540
fn next_token_inner ( rdr : @mut StringReader ) -> token:: Token {
543
- let mut accum_str = ~"";
544
541
let mut c = rdr. curr ;
545
542
if ident_start ( c) {
546
- while ident_continue ( c ) {
547
- str :: push_char ( & mut accum_str , c ) ;
543
+ let start = byte_offset ( rdr , rdr . last_pos ) ;
544
+ while ident_continue ( rdr . curr ) {
548
545
bump ( rdr) ;
549
- c = rdr. curr ;
550
546
}
551
- if accum_str == ~"_" { return token:: UNDERSCORE ; }
552
- let is_mod_name = c == ':' && nextch ( rdr) == ':' ;
547
+ let string = get_str_from ( rdr, start) ;
548
+
549
+ if "_" == string { return token:: UNDERSCORE ; }
550
+ let is_mod_name = rdr. curr == ':' && nextch ( rdr) == ':' ;
553
551
554
552
// FIXME: perform NFKC normalization here. (Issue #2253)
555
- return token:: IDENT ( str_to_ident ( accum_str ) , is_mod_name) ;
553
+ return token:: IDENT ( str_to_ident ( string ) , is_mod_name) ;
556
554
}
557
555
if is_dec_digit ( c) {
558
556
return scan_number ( c, rdr) ;
@@ -692,7 +690,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
692
690
return token:: LIT_INT ( c2 as i64 , ast:: ty_char) ;
693
691
}
694
692
'"' => {
695
- let n = byte_offset ( rdr) ;
693
+ let mut accum_str = ~"";
694
+ let n = byte_offset ( rdr, rdr. last_pos ) ;
696
695
bump ( rdr) ;
697
696
while rdr. curr != '"' {
698
697
if is_eof ( rdr) {
0 commit comments