@@ -667,6 +667,7 @@ impl<'self> StrCharSplitSeparator for extern "Rust" fn(char) -> bool {
667
667
}
668
668
669
669
impl < ' self , Sep : StrCharSplitSeparator > Iterator < & ' self str > for StrCharSplitIterator < ' self , Sep > {
670
+ #[ inline]
670
671
fn next ( & mut self ) -> Option < & ' self str > {
671
672
if self . finished { return None }
672
673
@@ -709,88 +710,69 @@ impl<'self, Sep: StrCharSplitSeparator> Iterator<&'self str> for StrCharSplitIte
709
710
}
710
711
}
711
712
712
- // See Issue #1932 for why this is a naive search
713
- fn iter_matches < ' a , ' b > ( s : & ' a str , sep : & ' b str ,
714
- f : & fn ( uint , uint ) -> bool ) -> bool {
715
- let ( sep_len, l) = ( sep. len ( ) , s. len ( ) ) ;
716
- assert ! ( sep_len > 0 u) ;
717
- let mut ( i, match_start, match_i) = ( 0 u, 0 u, 0 u) ;
713
+ /// An iterator over the start and end indicies of the matches of a
714
+ /// substring within a larger string
715
+ pub struct StrMatchesIndexIterator < ' self > {
716
+ priv haystack : & ' self str ,
717
+ priv needle : & ' self str ,
718
+ priv position : uint ,
719
+ }
718
720
719
- while i < l {
720
- if s[ i] == sep[ match_i] {
721
- if match_i == 0 u { match_start = i; }
722
- match_i += 1 u;
723
- // Found a match
724
- if match_i == sep_len {
725
- if !f ( match_start, i + 1 u) { return false ; }
726
- match_i = 0 u;
727
- }
728
- i += 1 u;
729
- } else {
730
- // Failed match, backtrack
731
- if match_i > 0 u {
732
- match_i = 0 u;
733
- i = match_start + 1 u;
721
+ /// An iterator over the substrings of a string separated by a given
722
+ /// search string
723
+ pub struct StrStrSplitIterator < ' self > {
724
+ priv it: StrMatchesIndexIterator < ' self > ,
725
+ priv last_end : uint ,
726
+ priv finished : bool
727
+ }
728
+
729
+ impl < ' self > Iterator < ( uint , uint ) > for StrMatchesIndexIterator < ' self > {
730
+ #[ inline]
731
+ fn next ( & mut self ) -> Option < ( uint , uint ) > {
732
+ // See Issue #1932 for why this is a naive search
733
+ let ( h_len, n_len) = ( self . haystack . len ( ) , self . needle . len ( ) ) ;
734
+ let mut ( match_start, match_i) = ( 0 , 0 ) ;
735
+
736
+ while self . position < h_len {
737
+ if self . haystack [ self . position ] == self . needle [ match_i] {
738
+ if match_i == 0 { match_start = self . position ; }
739
+ match_i += 1 ;
740
+ self . position += 1 ;
741
+
742
+ if match_i == n_len {
743
+ // found a match!
744
+ return Some ( ( match_start, self . position ) ) ;
745
+ }
734
746
} else {
735
- i += 1 u;
747
+ // failed match, backtrack
748
+ if match_i > 0 {
749
+ match_i = 0 ;
750
+ self . position = match_start;
751
+ }
752
+ self . position += 1 ;
736
753
}
737
754
}
755
+ None
738
756
}
739
- return true ;
740
- }
741
-
742
- fn iter_between_matches < ' a , ' b > ( s : & ' a str ,
743
- sep : & ' b str ,
744
- f : & fn ( uint , uint ) -> bool ) -> bool {
745
- let mut last_end = 0 u;
746
- for iter_matches( s, sep) |from, to| {
747
- if !f ( last_end, from) { return false ; }
748
- last_end = to;
749
- }
750
- return f ( last_end, s. len ( ) ) ;
751
757
}
752
758
753
- /**
754
- * Splits a string into a vector of the substrings separated by a given string
755
- *
756
- * # Example
757
- *
758
- * ~~~ {.rust}
759
- * let mut v = ~[];
760
- * for each_split_str(".XXX.YYY.", ".") |subs| { v.push(subs); }
761
- * assert!(v == ["", "XXX", "YYY", ""]);
762
- * ~~~
763
- */
764
- pub fn each_split_str < ' a , ' b > ( s : & ' a str ,
765
- sep : & ' b str ,
766
- it : & fn ( & ' a str ) -> bool ) -> bool {
767
- for iter_between_matches( s, sep) |from, to| {
768
- if !it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return false ; }
769
- }
770
- return true ;
771
- }
759
+ impl < ' self > Iterator < & ' self str > for StrStrSplitIterator < ' self > {
760
+ #[ inline]
761
+ fn next ( & mut self ) -> Option < & ' self str > {
762
+ if self . finished { return None ; }
772
763
773
- /**
774
- * Splits the string `s` based on `sep`, yielding all splits to the iterator
775
- * function provide
776
- *
777
- * # Example
778
- *
779
- * ~~~ {.rust}
780
- * let mut v = ~[];
781
- * for each_split_str(".XXX.YYY.", ".") |subs| { v.push(subs); }
782
- * assert!(v == ["XXX", "YYY"]);
783
- * ~~~
784
- */
785
- pub fn each_split_str_nonempty < ' a , ' b > ( s : & ' a str ,
786
- sep : & ' b str ,
787
- it : & fn ( & ' a str ) -> bool ) -> bool {
788
- for iter_between_matches( s, sep) |from, to| {
789
- if to > from {
790
- if !it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return false ; }
764
+ match self . it . next ( ) {
765
+ Some ( ( from, to) ) => {
766
+ let ret = Some ( self . it . haystack . slice ( self . last_end , from) ) ;
767
+ self . last_end = to;
768
+ ret
769
+ }
770
+ None => {
771
+ self . finished = true ;
772
+ Some ( self . it . haystack . slice ( self . last_end , self . it . haystack . len ( ) ) )
773
+ }
791
774
}
792
775
}
793
- return true ;
794
776
}
795
777
796
778
/// Levenshtein Distance between two strings
@@ -929,15 +911,13 @@ pub fn each_split_within<'a>(ss: &'a str,
929
911
* The original string with all occurances of `from` replaced with `to`
930
912
*/
931
913
pub fn replace ( s : & str , from : & str , to : & str ) -> ~str {
932
- let mut ( result, first) = ( ~"", true ) ;
933
- for iter_between_matches( s, from) |start, end| {
934
- if first {
935
- first = false ;
936
- } else {
937
- push_str ( & mut result, to) ;
938
- }
939
- push_str ( & mut result, unsafe { raw:: slice_bytes ( s, start, end) } ) ;
914
+ let mut ( result, last_end) = ( ~"", 0 ) ;
915
+ for s. matches_index_iter( from) . advance |( start, end) | {
916
+ result. push_str ( unsafe { raw:: slice_bytes ( s, last_end, start) } ) ;
917
+ result. push_str ( to) ;
918
+ last_end = end;
940
919
}
920
+ result. push_str ( unsafe { raw:: slice_bytes ( s, last_end, s. len ( ) ) } ) ;
941
921
result
942
922
}
943
923
@@ -2441,6 +2421,20 @@ pub trait StrSlice<'self> {
2441
2421
fn split_options_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep,
2442
2422
count: uint, allow_trailing_empty: bool)
2443
2423
-> StrCharSplitIterator<'self, Sep>;
2424
+ /// An iterator over the start and end indices of each match of
2425
+ /// `sep` within `self`.
2426
+ fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self>;
2427
+ /**
2428
+ * An iterator over the substrings of `self` separated by `sep`.
2429
+ *
2430
+ * # Example
2431
+ *
2432
+ * ~~~ {.rust}
2433
+ * let v: ~[&str] = " . XXX . YYY . ".split_str_iter(" . ").collect()
2434
+ * assert_eq!(v, [" ", " XXX ", " YYY ", " "]);
2435
+ * ~~~
2436
+ */
2437
+ fn split_str_iter(&self, &'self str) -> StrStrSplitIterator<'self>;
2444
2438
/// An iterator over the lines of a string (subsequences separated
2445
2439
/// by `\n `).
2446
2440
fn line_iter(&self) -> StrCharSplitIterator<'self, char>;
@@ -2454,7 +2448,6 @@ pub trait StrSlice<'self> {
2454
2448
fn len(&self) -> uint;
2455
2449
fn char_len(&self) -> uint;
2456
2450
fn slice(&self, begin: uint, end: uint) -> &'self str;
2457
- fn each_split_str<'a>(&self, sep: &'a str, it: &fn(&'self str) -> bool) -> bool;
2458
2451
fn starts_with<'a>(&self, needle: &'a str) -> bool;
2459
2452
fn substr(&self, begin: uint, n: uint) -> &'self str;
2460
2453
fn escape_default(&self) -> ~str;
@@ -2529,6 +2522,21 @@ impl<'self> StrSlice<'self> for &'self str {
2529
2522
only_ascii: only_ascii
2530
2523
}
2531
2524
}
2525
+ fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self> {
2526
+ assert!(!sep.is_empty())
2527
+ StrMatchesIndexIterator {
2528
+ haystack: *self,
2529
+ needle: sep,
2530
+ position: 0
2531
+ }
2532
+ }
2533
+ fn split_str_iter(&self, sep: &'self str) -> StrStrSplitIterator<'self> {
2534
+ StrStrSplitIterator {
2535
+ it: self.matches_index_iter(sep),
2536
+ last_end: 0,
2537
+ finished: false
2538
+ }
2539
+ }
2532
2540
2533
2541
fn line_iter(&self) -> StrCharSplitIterator<'self, char> {
2534
2542
self.split_options_iter('\n ', self.len(), false)
@@ -2581,15 +2589,6 @@ impl<'self> StrSlice<'self> for &'self str {
2581
2589
assert!(is_char_boundary(*self, end));
2582
2590
unsafe { raw::slice_bytes(*self, begin, end) }
2583
2591
}
2584
- /**
2585
- * Splits a string into a vector of the substrings separated by a given
2586
- * string
2587
- */
2588
- #[inline]
2589
- fn each_split_str<'a>(&self, sep: &'a str, it: &fn(&'self str) -> bool) -> bool {
2590
- each_split_str(*self, sep, it)
2591
- }
2592
- /// Returns true if one string starts with another
2593
2592
#[inline]
2594
2593
fn starts_with<'a>(&self, needle: &'a str) -> bool {
2595
2594
starts_with(*self, needle)
@@ -2836,30 +2835,6 @@ mod tests {
2836
2835
let _cc3 = pop_char(&mut data);
2837
2836
}
2838
2837
2839
- #[test]
2840
- fn test_split_str() {
2841
- fn t<'a>(s: &str, sep: &'a str, u: &[~str]) {
2842
- let mut v = ~[];
2843
- for each_split_str(s, sep) |s| { v.push(s.to_owned()) }
2844
- assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
2845
- }
2846
- t(" --1233345 --", " 12345 ", [~" --1233345 --"]);
2847
- t(" abc:: hello:: there", " :: ", [~" abc", ~" hello", ~" there"]);
2848
- t(" :: hello:: there", " :: ", [~" ", ~" hello", ~" there"]);
2849
- t(" hello:: there:: ", " :: ", [~" hello", ~" there", ~" "]);
2850
- t(" :: hello:: there:: ", " :: ", [~" ", ~" hello", ~" there", ~" "]);
2851
- t(" ประเทศไทย中华Việt Nam ", " 中华", [~" ประเทศไทย", ~" Việt Nam "]);
2852
- t(" zzXXXzzYYYzz", " zz", [~" ", ~" XXX ", ~" YYY ", ~" "]);
2853
- t(" zzXXXzYYYz", " XXX ", [~" zz", ~" zYYYz"]);
2854
- t(" . XXX . YYY . ", " . ", [~" ", ~" XXX ", ~" YYY ", ~" "]);
2855
- t(" ", " . ", [~" "]);
2856
- t(" zz", " zz", [~" ",~" "]);
2857
- t(" ok", " z", [~" ok"]);
2858
- t(" zzz", " zz", [~" ",~" z"]);
2859
- t(" zzzzz", " zz", [~" ",~" ",~" z"]);
2860
- }
2861
-
2862
-
2863
2838
#[test]
2864
2839
fn test_split_within() {
2865
2840
fn t(s: &str, i: uint, u: &[~str]) {
@@ -3727,4 +3702,27 @@ mod tests {
3727
3702
let lines: ~[ & str ] = data. line_iter( ) . collect( ) ;
3728
3703
assert_eq!( lines, ~[ "" , "Märy häd ä little lämb" , "" , "Little lämb" ] ) ;
3729
3704
}
3705
+
3706
+
3707
+ #[ test]
3708
+ fn test_split_str_iterator( ) {
3709
+ fn t<' a>( s: & str , sep: & ' a str , u: ~[ & str ] ) {
3710
+ let v: ~[ & str ] = s. split_str_iter( sep) . collect( ) ;
3711
+ assert_eq!( v, u) ;
3712
+ }
3713
+ t( "--1233345--" , "12345" , ~[ "--1233345--" ] ) ;
3714
+ t( "abc::hello::there" , "::" , ~[ "abc" , "hello" , "there" ] ) ;
3715
+ t( "::hello::there" , "::" , ~[ "" , "hello" , "there" ] ) ;
3716
+ t( "hello::there::" , "::" , ~[ "hello" , "there" , "" ] ) ;
3717
+ t( "::hello::there::" , "::" , ~[ "" , "hello" , "there" , "" ] ) ;
3718
+ t( "ประเทศไทย中华Việt Nam" , "中华" , ~[ "ประเทศไทย" , "Việt Nam" ] ) ;
3719
+ t( "zzXXXzzYYYzz" , "zz" , ~[ "" , "XXX" , "YYY" , "" ] ) ;
3720
+ t( "zzXXXzYYYz" , "XXX" , ~[ "zz" , "zYYYz" ] ) ;
3721
+ t( ".XXX.YYY." , "." , ~[ "" , "XXX" , "YYY" , "" ] ) ;
3722
+ t( "" , "." , ~[ "" ] ) ;
3723
+ t( "zz" , "zz" , ~[ "" , "" ] ) ;
3724
+ t( "ok" , "z" , ~[ "ok" ] ) ;
3725
+ t( "zzz" , "zz" , ~[ "" , "z" ] ) ;
3726
+ t( "zzzzz" , "zz" , ~[ "" , "" , "z" ] ) ;
3727
+ }
3730
3728
}
0 commit comments