@@ -500,7 +500,7 @@ impl str {
500
500
///
501
501
/// # Unsafety
502
502
///
503
- /// Caller must check both UTF-8 sequence boundaries and the boundaries
503
+ /// Caller must check both UTF-8 character boundaries and the boundaries
504
504
/// of the entire slice as
505
505
/// well.
506
506
///
@@ -526,16 +526,15 @@ impl str {
526
526
core_str:: StrExt :: slice_mut_unchecked ( self , begin, end)
527
527
}
528
528
529
- /// Returns a slice of the string from the range [`begin`..`end`) where indices
530
- /// are counted in code points.
529
+ /// Returns a slice of the string from the character range [`begin`..`end`).
531
530
///
532
531
/// That is, start at the `begin`-th code point of the string and continue
533
532
/// to the `end`-th code point. This does not detect or handle edge cases
534
- /// such as leaving a combining character as the first `char` of the
533
+ /// such as leaving a combining character as the first code point of the
535
534
/// string.
536
535
///
537
536
/// Due to the design of UTF-8, this operation is `O(end)`. Use slicing
538
- /// syntax if you want to use `O(1)` byte indices instead .
537
+ /// syntax if you want to use byte indices rather than codepoint indices .
539
538
///
540
539
/// # Panics
541
540
///
@@ -557,26 +556,26 @@ impl str {
557
556
core_str:: StrExt :: slice_chars ( self , begin, end)
558
557
}
559
558
560
- /// Given a byte position, return the next code point and its index.
559
+ /// Given a byte position, return the next char and its index.
561
560
///
562
- /// This can be used to iterate over the Unicode code points of a string.
561
+ /// This can be used to iterate over the Unicode characters of a string.
563
562
///
564
563
/// # Panics
565
564
///
566
565
/// If `i` is greater than or equal to the length of the string.
567
- /// If `i` is not the index of the beginning of a valid UTF-8 sequence .
566
+ /// If `i` is not the index of the beginning of a valid UTF-8 character .
568
567
///
569
568
/// # Examples
570
569
///
571
- /// This example manually iterates through the code points of a string;
570
+ /// This example manually iterates through the characters of a string;
572
571
/// this should normally be
573
572
/// done by `.chars()` or `.char_indices()`.
574
573
///
575
574
/// ```
576
575
/// # #![feature(str_char, core)]
577
576
/// use std::str::CharRange;
578
577
///
579
- /// let s = "中华Việt Nam";
578
+ /// let s = "中华Việt Nam";
580
579
/// let mut i = 0;
581
580
/// while i < s.len() {
582
581
/// let CharRange {ch, next} = s.char_range_at(i);
@@ -592,14 +591,12 @@ impl str {
592
591
/// 3: 华
593
592
/// 6: V
594
593
/// 7: i
595
- /// 8: e
596
- /// 9: ̣
597
- /// 11: ̂
598
- /// 13: t
599
- /// 14:
600
- /// 15: N
601
- /// 16: a
602
- /// 17: m
594
+ /// 8: ệ
595
+ /// 11: t
596
+ /// 12:
597
+ /// 13: N
598
+ /// 14: a
599
+ /// 15: m
603
600
/// ```
604
601
#[ unstable( feature = "str_char" ,
605
602
reason = "often replaced by char_indices, this method may \
@@ -611,29 +608,26 @@ impl str {
611
608
612
609
/// Given a byte position, return the previous `char` and its position.
613
610
///
614
- /// This function can be used to iterate over a Unicode code points in reverse.
615
- ///
616
- /// Note that Unicode has many features, such as combining marks, ligatures,
617
- /// and direction marks, that need to be taken into account to correctly reverse a string.
611
+ /// This function can be used to iterate over a Unicode string in reverse.
618
612
///
619
613
/// Returns 0 for next index if called on start index 0.
620
614
///
621
615
/// # Panics
622
616
///
623
617
/// If `i` is greater than the length of the string.
624
- /// If `i` is not an index following a valid UTF-8 sequence .
618
+ /// If `i` is not an index following a valid UTF-8 character .
625
619
///
626
620
/// # Examples
627
621
///
628
- /// This example manually iterates through the code points of a string;
622
+ /// This example manually iterates through the characters of a string;
629
623
/// this should normally be
630
624
/// done by `.chars().rev()` or `.char_indices()`.
631
625
///
632
626
/// ```
633
627
/// # #![feature(str_char, core)]
634
628
/// use std::str::CharRange;
635
629
///
636
- /// let s = "中华Việt Nam";
630
+ /// let s = "中华Việt Nam";
637
631
/// let mut i = s.len();
638
632
/// while i > 0 {
639
633
/// let CharRange {ch, next} = s.char_range_at_reverse(i);
@@ -645,14 +639,12 @@ impl str {
645
639
/// This outputs:
646
640
///
647
641
/// ```text
648
- /// 18: m
649
- /// 17: a
650
- /// 16: N
651
- /// 15:
652
- /// 14: t
653
- /// 13: ̂
654
- /// 11: ̣
655
- /// 9: e
642
+ /// 16: m
643
+ /// 15: a
644
+ /// 14: N
645
+ /// 13:
646
+ /// 12: t
647
+ /// 11: ệ
656
648
/// 8: i
657
649
/// 7: V
658
650
/// 6: 华
@@ -671,7 +663,7 @@ impl str {
671
663
/// # Panics
672
664
///
673
665
/// If `i` is greater than or equal to the length of the string.
674
- /// If `i` is not the index of the beginning of a valid UTF-8 sequence .
666
+ /// If `i` is not the index of the beginning of a valid UTF-8 character .
675
667
///
676
668
/// # Examples
677
669
///
@@ -680,7 +672,6 @@ impl str {
680
672
/// let s = "abπc";
681
673
/// assert_eq!(s.char_at(1), 'b');
682
674
/// assert_eq!(s.char_at(2), 'π');
683
- /// assert_eq!(s.char_at(4), 'c');
684
675
/// ```
685
676
#[ unstable( feature = "str_char" ,
686
677
reason = "frequently replaced by the chars() iterator, this \
@@ -698,7 +689,7 @@ impl str {
698
689
/// # Panics
699
690
///
700
691
/// If `i` is greater than the length of the string.
701
- /// If `i` is not an index following a valid UTF-8 sequence .
692
+ /// If `i` is not an index following a valid UTF-8 character .
702
693
///
703
694
/// # Examples
704
695
///
@@ -707,7 +698,6 @@ impl str {
707
698
/// let s = "abπc";
708
699
/// assert_eq!(s.char_at_reverse(1), 'a');
709
700
/// assert_eq!(s.char_at_reverse(2), 'b');
710
- /// assert_eq!(s.char_at_reverse(3), 'π');
711
701
/// ```
712
702
#[ unstable( feature = "str_char" ,
713
703
reason = "see char_at for more details, but reverse semantics \
@@ -717,30 +707,28 @@ impl str {
717
707
core_str:: StrExt :: char_at_reverse ( self , i)
718
708
}
719
709
720
- /// Retrieves the first code point from a `&str` and returns it.
721
- ///
722
- /// Note that a single Unicode character (grapheme cluster)
723
- /// can be composed of multiple `char`s.
710
+ /// Retrieves the first character from a `&str` and returns it.
724
711
///
725
712
/// This does not allocate a new string; instead, it returns a slice that
726
- /// points one code point beyond the code point that was shifted.
713
+ /// points one character
714
+ /// beyond the character that was shifted.
727
715
///
728
- /// `None` is returned if the slice is empty .
716
+ /// If the slice does not contain any characters, None is returned instead .
729
717
///
730
718
/// # Examples
731
719
///
732
720
/// ```
733
721
/// # #![feature(str_char)]
734
- /// let s = "Łódź"; // \u{141}o\u{301}dz\u{301}
722
+ /// let s = "Löwe 老虎 Léopard";
735
723
/// let (c, s1) = s.slice_shift_char().unwrap();
736
724
///
737
- /// assert_eq!(c, 'Ł ');
738
- /// assert_eq!(s1, "ódź ");
725
+ /// assert_eq!(c, 'L ');
726
+ /// assert_eq!(s1, "öwe 老虎 Léopard ");
739
727
///
740
728
/// let (c, s2) = s1.slice_shift_char().unwrap();
741
729
///
742
- /// assert_eq!(c, 'o ');
743
- /// assert_eq!(s2, "\u{301}dz\u{301} ");
730
+ /// assert_eq!(c, 'ö ');
731
+ /// assert_eq!(s2, "we 老虎 Léopard ");
744
732
/// ```
745
733
#[ unstable( feature = "str_char" ,
746
734
reason = "awaiting conventions about shifting and slices and \
@@ -753,14 +741,14 @@ impl str {
753
741
/// Divide one string slice into two at an index.
754
742
///
755
743
/// The index `mid` is a byte offset from the start of the string
756
- /// that must be on a `char` boundary.
744
+ /// that must be on a character boundary.
757
745
///
758
746
/// Return slices `&self[..mid]` and `&self[mid..]`.
759
747
///
760
748
/// # Panics
761
749
///
762
- /// Panics if `mid` is beyond the last code point of the string,
763
- /// or if it is not on a `char` boundary.
750
+ /// Panics if `mid` is beyond the last character of the string,
751
+ /// or if it is not on a character boundary.
764
752
///
765
753
/// # Examples
766
754
/// ```
@@ -785,39 +773,27 @@ impl str {
785
773
core_str:: StrExt :: split_at_mut ( self , mid)
786
774
}
787
775
788
- /// An iterator over the code points of `self`.
789
- ///
790
- /// In Unicode relationship between code points and characters is complex.
791
- /// A single character may be composed of multiple code points
792
- /// (e.g. diacritical marks added to a letter), and a single code point
793
- /// (e.g. Hangul syllable) may contain multiple characters.
794
- ///
795
- /// For iteration over human-readable characters a grapheme cluster iterator
796
- /// may be more appropriate. See the [unicode-segmentation crate][1].
797
- ///
798
- /// [1]: https://crates.io/crates/unicode-segmentation
776
+ /// An iterator over the codepoints of `self`.
799
777
///
800
778
/// # Examples
801
779
///
802
780
/// ```
803
- /// let v: Vec<char> = "ASCII żółć 🇨🇭 한 ".chars().collect();
781
+ /// let v: Vec<char> = "abc åäö ".chars().collect();
804
782
///
805
- /// assert_eq!(v, ['A', 'S', 'C', 'I', 'I', ' ',
806
- /// 'z', '\u{307}', 'o', '\u{301}', 'ł', 'c', '\u{301}', ' ',
807
- /// '\u{1f1e8}', '\u{1f1ed}', ' ', '한']);
783
+ /// assert_eq!(v, ['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
808
784
/// ```
809
785
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
810
786
pub fn chars ( & self ) -> Chars {
811
787
core_str:: StrExt :: chars ( self )
812
788
}
813
789
814
- /// An iterator over the `char`s of `self` and their byte offsets.
790
+ /// An iterator over the characters of `self` and their byte offsets.
815
791
///
816
792
/// # Examples
817
793
///
818
794
/// ```
819
- /// let v: Vec<(usize, char)> = "A🇨🇭 ".char_indices().collect();
820
- /// let b = vec![(0, 'A '), (1, '\u{1f1e8} '), (5 , '\u{1f1ed} ')];
795
+ /// let v: Vec<(usize, char)> = "abc ".char_indices().collect();
796
+ /// let b = vec![(0, 'a '), (1, 'b '), (2 , 'c ')];
821
797
///
822
798
/// assert_eq!(v, b);
823
799
/// ```
@@ -846,7 +822,7 @@ impl str {
846
822
/// # Examples
847
823
///
848
824
/// ```
849
- /// let some_words = " Mary had\ta\u{2009} little \n\t lamb";
825
+ /// let some_words = " Mary had\ta little \n\t lamb";
850
826
/// let v: Vec<&str> = some_words.split_whitespace().collect();
851
827
///
852
828
/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
@@ -864,7 +840,7 @@ impl str {
864
840
/// ```
865
841
/// # #![feature(str_words)]
866
842
/// # #![allow(deprecated)]
867
- /// let some_words = " Mary had\ta\u{2009} little \n\t lamb";
843
+ /// let some_words = " Mary had\ta little \n\t lamb";
868
844
/// let v: Vec<&str> = some_words.words().collect();
869
845
///
870
846
/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
0 commit comments