@@ -779,3 +779,142 @@ public let utf16Tests = [
779
779
] ,
780
780
]
781
781
782
+ extension String {
783
+ /// Print out a full list of indices in every view of this string.
784
+ /// This is useful while debugging string indexing issues.
785
+ public func dumpIndices( ) {
786
+ print ( " ------------------------------------------------------------------- " )
787
+ print ( " String: \( String ( reflecting: self ) ) " )
788
+ print ( " Characters: " )
789
+ self . indices. forEach { i in
790
+ let char = self [ i]
791
+ print ( " \( i) -> \( String ( reflecting: char) ) " )
792
+ }
793
+ print ( " Scalars: " )
794
+ self . unicodeScalars. indices. forEach { i in
795
+ let scalar = self . unicodeScalars [ i]
796
+ let value = String ( scalar. value, radix: 16 , uppercase: true )
797
+ let padding = String ( repeating: " 0 " , count: max ( 0 , 4 - value. count) )
798
+ let name = scalar. properties. name ?? " \( scalar. debugDescription) "
799
+ print ( " \( i) -> U+ \( padding) \( value) \( name) " )
800
+ }
801
+ print ( " UTF-8: " )
802
+ self . utf8. indices. forEach { i in
803
+ let code = self . utf8 [ i]
804
+ let value = String ( code, radix: 16 , uppercase: true )
805
+ let padding = value. count < 2 ? " 0 " : " "
806
+ print ( " \( i) -> \( padding) \( value) " )
807
+ }
808
+ print ( " UTF-16: " )
809
+ self . utf16. indices. forEach { i in
810
+ let code = self . utf16 [ i]
811
+ let value = String ( code, radix: 16 , uppercase: true )
812
+ let padding = String ( repeating: " 0 " , count: 4 - value. count)
813
+ print ( " \( i) -> \( padding) \( value) " )
814
+ }
815
+ }
816
+
817
+ // Returns a list of every valid index in every string view, optionally
818
+ // including end indices. We keep equal indices originating from different
819
+ // views because they may have different grapheme size caches or flags etc.
820
+ public func allIndices( includingEnd: Bool = true ) -> [ String . Index ] {
821
+ var r = Array ( self . indices)
822
+ if includingEnd { r. append ( self . endIndex) }
823
+ r += Array ( self . unicodeScalars. indices)
824
+ if includingEnd { r. append ( self . unicodeScalars. endIndex) }
825
+ r += Array ( self . utf8. indices)
826
+ if includingEnd { r. append ( self . utf8. endIndex) }
827
+ r += Array ( self . utf16. indices)
828
+ if includingEnd { r. append ( self . utf16. endIndex) }
829
+ return r
830
+ }
831
+ }
832
+
833
+ extension Substring {
834
+ // Returns a list of every valid index in every substring view, optionally
835
+ // including end indices. We keep equal indices originating from different
836
+ // views because they may have different grapheme size caches or flags etc.
837
+ public func allIndices( includingEnd: Bool = true ) -> [ String . Index ] {
838
+ var r = Array ( self . indices)
839
+ if includingEnd { r. append ( self . endIndex) }
840
+ r += Array ( self . unicodeScalars. indices)
841
+ if includingEnd { r. append ( self . unicodeScalars. endIndex) }
842
+ r += Array ( self . utf8. indices)
843
+ if includingEnd { r. append ( self . utf8. endIndex) }
844
+ r += Array ( self . utf16. indices)
845
+ if includingEnd { r. append ( self . utf16. endIndex) }
846
+ return r
847
+ }
848
+ }
849
+
850
+ extension Collection {
851
+ // Assuming both `self` and `other` use the same index space, call `body` for
852
+ // each index `i` in `other`, along with the slice in `self` that begins at
853
+ // `i` and ends at the index following it in `other`.
854
+ //
855
+ // `other` must start with an item that is less than or equal to the first
856
+ // item in `self`.
857
+ func forEachIndexGroup< G: Collection > (
858
+ by other: G ,
859
+ body: ( G . Index , Self . SubSequence , Int ) throws -> Void
860
+ ) rethrows
861
+ where G. Index == Self . Index
862
+ {
863
+ if other. isEmpty {
864
+ assert ( self . isEmpty)
865
+ return
866
+ }
867
+ var i = other. startIndex
868
+ var j = self . startIndex
869
+ var offset = 0
870
+ while i != other. endIndex {
871
+ let current = i
872
+ other. formIndex ( after: & i)
873
+ let start = j
874
+ while j < i, j < self . endIndex {
875
+ self . formIndex ( after: & j)
876
+ }
877
+ let end = j
878
+ try body ( current, self [ start ..< end] , offset)
879
+ offset += 1
880
+ }
881
+ }
882
+ }
883
+
884
+ extension String {
885
+ /// Returns a dictionary mapping each valid index to the index that addresses
886
+ /// the nearest scalar boundary, rounding down.
887
+ public func scalarMap( ) -> [ Index : ( index: Index , offset: Int ) ] {
888
+ var map : [ Index : ( index: Index , offset: Int ) ] = [ : ]
889
+
890
+ utf8. forEachIndexGroup ( by: unicodeScalars) { scalar, slice, offset in
891
+ for i in slice. indices { map [ i] = ( scalar, offset) }
892
+ }
893
+ utf16. forEachIndexGroup ( by: unicodeScalars) { scalar, slice, offset in
894
+ for i in slice. indices { map [ i] = ( scalar, offset) }
895
+ }
896
+ self . forEachIndexGroup ( by: unicodeScalars) { scalar, slice, offset in
897
+ for i in slice. indices { map [ i] = ( scalar, offset) }
898
+ }
899
+ map [ endIndex] = ( endIndex, unicodeScalars. count)
900
+ return map
901
+ }
902
+
903
+ /// Returns a dictionary mapping each valid index to the index that addresses
904
+ /// the nearest character boundary, rounding down.
905
+ public func characterMap( ) -> [ Index : ( index: Index , offset: Int ) ] {
906
+ var map : [ Index : ( index: Index , offset: Int ) ] = [ : ]
907
+ utf8. forEachIndexGroup ( by: self ) { char, slice, offset in
908
+ for i in slice. indices { map [ i] = ( char, offset) }
909
+ }
910
+ utf16. forEachIndexGroup ( by: self ) { char, slice, offset in
911
+ for i in slice. indices { map [ i] = ( char, offset) }
912
+ }
913
+ unicodeScalars. forEachIndexGroup ( by: self ) { char, slice, offset in
914
+ for i in slice. indices { map [ i] = ( char, offset) }
915
+ }
916
+ map [ endIndex] = ( endIndex, count)
917
+ return map
918
+ }
919
+ }
920
+
0 commit comments