Skip to content

Commit 8fe8302

Browse files
author
blake2-ppc
committed
std::str: Use iterators instead of while loops for CharSplitIterator
Embed an iterator in the CharSplitIterator struct, and combine that with the former bool `only_ascii`; so use an enum instead.
1 parent 30ab96b commit 8fe8302

File tree

1 file changed

+45
-33
lines changed

1 file changed

+45
-33
lines changed

src/libstd/str.rs

Lines changed: 45 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use clone::Clone;
2323
use container::{Container, Mutable};
2424
use iter::Times;
2525
use iterator::{Iterator, FromIterator, Extendable};
26-
use iterator::{Filter, AdditiveIterator, Map};
26+
use iterator::{Filter, AdditiveIterator, Map, Enumerate};
2727
use iterator::{Invert, DoubleEndedIterator};
2828
use libc;
2929
use num::{Saturating, Zero};
@@ -359,9 +359,18 @@ pub type ByteIterator<'self> =
359359
/// Use with the `std::iterator` module.
360360
pub type ByteRevIterator<'self> = Invert<ByteIterator<'self>>;
361361

362+
/// An iterator over byte index and either &u8 or char
363+
#[deriving(Clone)]
364+
enum OffsetIterator<'self> {
365+
// use ByteIterator here when it can be cloned
366+
ByteOffset(Enumerate<vec::VecIterator<'self, u8>>),
367+
CharOffset(CharOffsetIterator<'self>),
368+
}
369+
362370
/// An iterator over the substrings of a string, separated by `sep`.
363371
#[deriving(Clone)]
364372
pub struct CharSplitIterator<'self,Sep> {
373+
priv iter: OffsetIterator<'self>,
365374
priv string: &'self str,
366375
priv position: uint,
367376
priv sep: Sep,
@@ -370,7 +379,6 @@ pub struct CharSplitIterator<'self,Sep> {
370379
/// Whether an empty string at the end is allowed
371380
priv allow_trailing_empty: bool,
372381
priv finished: bool,
373-
priv only_ascii: bool
374382
}
375383

376384
/// An iterator over the words of a string, separated by an sequence of whitespace
@@ -386,39 +394,39 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep>
386394
fn next(&mut self) -> Option<&'self str> {
387395
if self.finished { return None }
388396

389-
let l = self.string.len();
390397
let start = self.position;
391-
392-
if self.only_ascii {
393-
// this gives a *huge* speed up for splitting on ASCII
394-
// characters (e.g. '\n' or ' ')
395-
while self.position < l && self.count > 0 {
396-
let byte = self.string[self.position];
397-
398-
if self.sep.matches(byte as char) {
399-
let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
400-
self.position += 1;
401-
self.count -= 1;
402-
return Some(slice);
403-
}
404-
self.position += 1;
405-
}
406-
} else {
407-
while self.position < l && self.count > 0 {
408-
let CharRange {ch, next} = self.string.char_range_at(self.position);
409-
410-
if self.sep.matches(ch) {
411-
let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
412-
self.position = next;
413-
self.count -= 1;
414-
return Some(slice);
415-
}
416-
self.position = next;
398+
let len = self.string.len();
399+
400+
if self.count > 0 {
401+
match self.iter {
402+
// this gives a *huge* speed up for splitting on ASCII
403+
// characters (e.g. '\n' or ' ')
404+
ByteOffset(ref mut iter) =>
405+
for (idx, &byte) in *iter {
406+
if self.sep.matches(byte as char) {
407+
self.position = idx + 1;
408+
self.count -= 1;
409+
return Some(unsafe {
410+
raw::slice_bytes(self.string, start, idx)
411+
})
412+
}
413+
},
414+
CharOffset(ref mut iter) =>
415+
for (idx, ch) in *iter {
416+
if self.sep.matches(ch) {
417+
// skip over the separator
418+
self.position = self.string.char_range_at(idx).next;
419+
self.count -= 1;
420+
return Some(unsafe {
421+
raw::slice_bytes(self.string, start, idx)
422+
})
423+
}
424+
},
417425
}
418426
}
419427
self.finished = true;
420-
if self.allow_trailing_empty || start < l {
421-
Some(unsafe { raw::slice_bytes(self.string, start, l) })
428+
if self.allow_trailing_empty || start < len {
429+
Some(unsafe { raw::slice_bytes(self.string, start, len) })
422430
} else {
423431
None
424432
}
@@ -1327,15 +1335,19 @@ impl<'self> StrSlice<'self> for &'self str {
13271335
#[inline]
13281336
fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
13291337
-> CharSplitIterator<'self, Sep> {
1330-
let only_ascii = sep.only_ascii();
1338+
let iter = if sep.only_ascii() {
1339+
ByteOffset(self.as_bytes().iter().enumerate())
1340+
} else {
1341+
CharOffset(self.char_offset_iter())
1342+
};
13311343
CharSplitIterator {
1344+
iter: iter,
13321345
string: *self,
13331346
position: 0,
13341347
sep: sep,
13351348
count: count,
13361349
allow_trailing_empty: allow_trailing_empty,
13371350
finished: false,
1338-
only_ascii: only_ascii
13391351
}
13401352
}
13411353

0 commit comments

Comments
 (0)