@@ -201,6 +201,17 @@ extension String.UTF16View: BidirectionalCollection {
201
201
return _foreignIndex ( i, offsetBy: n)
202
202
}
203
203
204
+ if n. magnitude <= _StringBreadcrumbs. breadcrumbStride {
205
+ // Do not use breadcrumbs if directly computing the result is expected to
206
+ // be cheaper.
207
+ if _guts. isASCII {
208
+ return Index (
209
+ _encodedOffset: i. _encodedOffset + n
210
+ ) . _scalarAligned. _encodingIndependent
211
+ }
212
+ return _index ( i, offsetBy: n) . _knownUTF8
213
+ }
214
+
204
215
let lowerOffset = _nativeGetOffset ( for: i)
205
216
let result = _nativeGetIndex ( for: lowerOffset + n)
206
217
return result
@@ -219,6 +230,17 @@ extension String.UTF16View: BidirectionalCollection {
219
230
return _foreignIndex ( i, offsetBy: n, limitedBy: limit)
220
231
}
221
232
233
+ if n. magnitude <= _StringBreadcrumbs. breadcrumbStride {
234
+ // Do not use breadcrumbs if directly computing the result is expected to
235
+ // be cheaper.
236
+ if _guts. isASCII {
237
+ return ( 0 ..< _guts. count) . index (
238
+ i. _encodedOffset, offsetBy: n, limitedBy: limit. _encodedOffset
239
+ ) . map { Index ( _encodedOffset: $0) . _scalarAligned. _encodingIndependent }
240
+ }
241
+ return _index ( i, offsetBy: n, limitedBy: limit) ? . _knownUTF8
242
+ }
243
+
222
244
let iOffset = _nativeGetOffset ( for: i)
223
245
let limitOffset = _nativeGetOffset ( for: limit)
224
246
@@ -255,6 +277,18 @@ extension String.UTF16View: BidirectionalCollection {
255
277
return _foreignDistance ( from: start, to: end)
256
278
}
257
279
280
+ let utf8Distance = end. _encodedOffset - start. _encodedOffset
281
+ if utf8Distance. magnitude <= _StringBreadcrumbs. breadcrumbStride {
282
+ // Do not use breadcrumbs if directly computing the result is expected to
283
+ // be cheaper. The conservative threshold above assumes that each UTF-16
284
+ // code unit will map to a single UTF-8 code unit, i.e., the worst
285
+ // possible (a.k.a. most compact) case with all ASCII scalars.
286
+ // FIXME: Figure out if a more optimistic threshold would work better.
287
+ if _guts. isASCII {
288
+ return end. _encodedOffset - start. _encodedOffset
289
+ }
290
+ return _utf16Distance ( from: start, to: end)
291
+ }
258
292
let lower = _nativeGetOffset ( for: start)
259
293
let upper = _nativeGetOffset ( for: end)
260
294
return upper &- lower
@@ -691,6 +725,14 @@ extension String.UTF16View {
691
725
}
692
726
}
693
727
728
+ /// Return the UTF-16 offset corresponding to `idx`, measured from the
729
+ /// start of this string, which must be a native UTF-8 string.
730
+ ///
731
+ /// - Complexity: This measures the UTF-16 distance of `idx` from its nearest
732
+ /// breadcrumb index (rounding down), so on average it needs to look at
733
+ /// `breadcrumbStride / 2` UTF-8 code units. (In addition to the O(log(n))
734
+ /// cost of looking up the nearest breadcrumb, and the amortizable O(n)
735
+ /// cost of generating the breadcrumbs in the first place.)
694
736
@usableFromInline
695
737
@_effects ( releasenone)
696
738
internal func _nativeGetOffset( for idx: Index ) -> Int {
@@ -714,11 +756,22 @@ extension String.UTF16View {
714
756
if idx == endIndex { return breadcrumbsPtr. pointee. utf16Length }
715
757
716
758
// Otherwise, find the nearest lower-bound breadcrumb and count from there
759
+ // FIXME: Starting from the upper-bound crumb when that is closer would cut
760
+ // the average cost of the subsequent iteration by 50%.
717
761
let ( crumb, crumbOffset) = breadcrumbsPtr. pointee. getBreadcrumb (
718
762
forIndex: idx)
719
763
return crumbOffset + _utf16Distance( from: crumb, to: idx)
720
764
}
721
765
766
+ /// Return the index at the given UTF-16 offset, measured from the
767
+ /// start of this string, which must be a native UTF-8 string.
768
+ ///
769
+ /// - Complexity: This iterates UTF-16 code units starting from the
770
+ /// nearest breadcrumb to `offset` (rounding down), so on
771
+ /// average it needs to look at `breadcrumbStride / 2` UTF-8 code
772
+ /// units. (In addition to the O(1) cost of looking up the nearest
773
+ /// breadcrumb, and the amortizable O(n) cost of generating the
774
+ /// breadcrumbs in the first place.)
722
775
@usableFromInline
723
776
@_effects ( releasenone)
724
777
internal func _nativeGetIndex( for offset: Int ) -> Index {
@@ -742,6 +795,8 @@ extension String.UTF16View {
742
795
if offset == breadcrumbsPtr. pointee. utf16Length { return endIndex }
743
796
744
797
// Otherwise, find the nearest lower-bound breadcrumb and advance that
798
+ // FIXME: Starting from the upper-bound crumb when that is closer would cut
799
+ // the average cost of the subsequent iteration by 50%.
745
800
let ( crumb, remaining) = breadcrumbsPtr. pointee. getBreadcrumb (
746
801
forOffset: offset)
747
802
if remaining == 0 { return crumb }
0 commit comments