@@ -23,7 +23,7 @@ use clone::Clone;
23
23
use container:: { Container , Mutable } ;
24
24
use iter:: Times ;
25
25
use iterator:: { Iterator , FromIterator , Extendable } ;
26
- use iterator:: { Filter , AdditiveIterator , Map } ;
26
+ use iterator:: { Filter , AdditiveIterator , Map , Enumerate } ;
27
27
use iterator:: { Invert , DoubleEndedIterator } ;
28
28
use libc;
29
29
use num:: { Saturating , Zero } ;
@@ -359,9 +359,18 @@ pub type ByteIterator<'self> =
359
359
/// Use with the `std::iterator` module.
360
360
pub type ByteRevIterator < ' self > = Invert < ByteIterator < ' self > > ;
361
361
362
+ /// An iterator over byte index and either &u8 or char
363
+ #[ deriving( Clone ) ]
364
+ enum OffsetIterator < ' self > {
365
+ // use ByteIterator here when it can be cloned
366
+ ByteOffset ( Enumerate < vec:: VecIterator < ' self , u8 > > ) ,
367
+ CharOffset ( CharOffsetIterator < ' self > ) ,
368
+ }
369
+
362
370
/// An iterator over the substrings of a string, separated by `sep`.
363
371
#[ deriving( Clone ) ]
364
372
pub struct CharSplitIterator < ' self , Sep > {
373
+ priv iter : OffsetIterator < ' self > ,
365
374
priv string : & ' self str ,
366
375
priv position : uint ,
367
376
priv sep: Sep ,
@@ -370,7 +379,6 @@ pub struct CharSplitIterator<'self,Sep> {
370
379
/// Whether an empty string at the end is allowed
371
380
priv allow_trailing_empty : bool ,
372
381
priv finished : bool ,
373
- priv only_ascii : bool
374
382
}
375
383
376
384
/// An iterator over the words of a string, separated by an sequence of whitespace
@@ -386,39 +394,39 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep>
386
394
fn next ( & mut self ) -> Option < & ' self str > {
387
395
if self . finished { return None }
388
396
389
- let l = self . string . len ( ) ;
390
397
let start = self . position ;
391
-
392
- if self . only_ascii {
393
- // this gives a *huge* speed up for splitting on ASCII
394
- // characters (e.g. '\n' or ' ')
395
- while self . position < l && self . count > 0 {
396
- let byte = self . string [ self . position ] ;
397
-
398
- if self . sep . matches ( byte as char ) {
399
- let slice = unsafe { raw:: slice_bytes ( self . string , start, self . position ) } ;
400
- self . position += 1 ;
401
- self . count -= 1 ;
402
- return Some ( slice) ;
403
- }
404
- self . position += 1 ;
405
- }
406
- } else {
407
- while self . position < l && self . count > 0 {
408
- let CharRange { ch, next} = self . string . char_range_at ( self . position ) ;
409
-
410
- if self . sep . matches ( ch) {
411
- let slice = unsafe { raw:: slice_bytes ( self . string , start, self . position ) } ;
412
- self . position = next;
413
- self . count -= 1 ;
414
- return Some ( slice) ;
415
- }
416
- self . position = next;
398
+ let len = self . string . len ( ) ;
399
+
400
+ if self . count > 0 {
401
+ match self . iter {
402
+ // this gives a *huge* speed up for splitting on ASCII
403
+ // characters (e.g. '\n' or ' ')
404
+ ByteOffset ( ref mut iter) =>
405
+ for ( idx, & byte) in * iter {
406
+ if self . sep . matches ( byte as char ) {
407
+ self . position = idx + 1 ;
408
+ self . count -= 1 ;
409
+ return Some ( unsafe {
410
+ raw:: slice_bytes ( self . string , start, idx)
411
+ } )
412
+ }
413
+ } ,
414
+ CharOffset ( ref mut iter) =>
415
+ for ( idx, ch) in * iter {
416
+ if self . sep . matches ( ch) {
417
+ // skip over the separator
418
+ self . position = self . string . char_range_at ( idx) . next ;
419
+ self . count -= 1 ;
420
+ return Some ( unsafe {
421
+ raw:: slice_bytes ( self . string , start, idx)
422
+ } )
423
+ }
424
+ } ,
417
425
}
418
426
}
419
427
self . finished = true ;
420
- if self . allow_trailing_empty || start < l {
421
- Some ( unsafe { raw:: slice_bytes ( self . string , start, l ) } )
428
+ if self . allow_trailing_empty || start < len {
429
+ Some ( unsafe { raw:: slice_bytes ( self . string , start, len ) } )
422
430
} else {
423
431
None
424
432
}
@@ -1327,15 +1335,19 @@ impl<'self> StrSlice<'self> for &'self str {
1327
1335
#[inline]
1328
1336
fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1329
1337
-> CharSplitIterator<'self, Sep> {
1330
- let only_ascii = sep.only_ascii();
1338
+ let iter = if sep.only_ascii() {
1339
+ ByteOffset(self.as_bytes().iter().enumerate())
1340
+ } else {
1341
+ CharOffset(self.char_offset_iter())
1342
+ };
1331
1343
CharSplitIterator {
1344
+ iter: iter,
1332
1345
string: *self,
1333
1346
position: 0,
1334
1347
sep: sep,
1335
1348
count: count,
1336
1349
allow_trailing_empty: allow_trailing_empty,
1337
1350
finished: false,
1338
- only_ascii: only_ascii
1339
1351
}
1340
1352
}
1341
1353
0 commit comments