@@ -138,6 +138,9 @@ extension String.UTF16View: BidirectionalCollection {
138
138
@inlinable @inline ( __always)
139
139
public var endIndex : Index { return _guts. endIndex }
140
140
141
+ @inline ( __always)
142
+ internal var _breadcrumbStride : Int { _StringBreadcrumbs. breadcrumbStride }
143
+
141
144
@inlinable @inline ( __always)
142
145
public func index( after idx: Index ) -> Index {
143
146
var idx = _guts. ensureMatchingEncoding ( idx)
@@ -194,31 +197,61 @@ extension String.UTF16View: BidirectionalCollection {
194
197
return idx. encoded ( offsetBy: - len) . _scalarAligned. _knownUTF8
195
198
}
196
199
200
+ @_effects ( releasenone)
197
201
public func index( _ i: Index , offsetBy n: Int ) -> Index {
198
- let i = _guts. ensureMatchingEncoding ( i)
202
+ var i = _guts. ensureMatchingEncoding ( i)
199
203
_precondition ( i <= endIndex, " String index is out of bounds " )
204
+
200
205
if _slowPath ( _guts. isForeign) {
201
206
return _foreignIndex ( i, offsetBy: n)
202
207
}
203
208
209
+ if _guts. isASCII {
210
+ return Index (
211
+ _encodedOffset: i. _encodedOffset + n
212
+ ) . _scalarAligned. _encodingIndependent
213
+ }
214
+
215
+ i = _utf16AlignNativeIndex ( i)
216
+ let threshold = (
217
+ i == startIndex ? _breadcrumbStride / 2 : _breadcrumbStride)
218
+ if n. magnitude < threshold {
219
+ // Do not use breadcrumbs if directly computing the result is expected
220
+ // to be cheaper.
221
+ return _index ( i, offsetBy: n) . _knownUTF8
222
+ }
223
+
204
224
let lowerOffset = _nativeGetOffset ( for: i)
205
225
let result = _nativeGetIndex ( for: lowerOffset + n)
206
226
return result
207
227
}
208
228
229
+ @_effects ( releasenone)
209
230
public func index(
210
231
_ i: Index , offsetBy n: Int , limitedBy limit: Index
211
232
) -> Index ? {
212
- let limit = _guts. ensureMatchingEncoding ( limit)
233
+ var limit = _guts. ensureMatchingEncoding ( limit)
213
234
guard _fastPath ( limit <= endIndex) else { return index ( i, offsetBy: n) }
214
235
215
- let i = _guts. ensureMatchingEncoding ( i)
236
+ var i = _guts. ensureMatchingEncoding ( i)
216
237
_precondition ( i <= endIndex, " String index is out of bounds " )
217
238
218
239
if _slowPath ( _guts. isForeign) {
219
240
return _foreignIndex ( i, offsetBy: n, limitedBy: limit)
220
241
}
221
242
243
+ if !_guts. isASCII { // We have ASCII fast paths below
244
+ limit = _utf16AlignNativeIndex ( limit)
245
+ i = _utf16AlignNativeIndex ( i)
246
+ let threshold = (
247
+ _breadcrumbStride + ( i == startIndex ? 0 : _breadcrumbStride / 2 ) )
248
+ if n. magnitude < threshold {
249
+ // Do not use breadcrumbs if directly computing the result is expected
250
+ // to be cheaper.
251
+ return _index ( i, offsetBy: n, limitedBy: limit) ? . _knownUTF8
252
+ }
253
+ }
254
+
222
255
let iOffset = _nativeGetOffset ( for: i)
223
256
let limitOffset = _nativeGetOffset ( for: limit)
224
257
@@ -235,9 +268,10 @@ extension String.UTF16View: BidirectionalCollection {
235
268
return result
236
269
}
237
270
271
+ @_effects ( releasenone)
238
272
public func distance( from start: Index , to end: Index ) -> Int {
239
- let start = _guts. ensureMatchingEncoding ( start)
240
- let end = _guts. ensureMatchingEncoding ( end)
273
+ var start = _guts. ensureMatchingEncoding ( start)
274
+ var end = _guts. ensureMatchingEncoding ( end)
241
275
242
276
// FIXME: This method used to not properly validate indices before 5.7;
243
277
// temporarily allow older binaries to keep invoking undefined behavior as
@@ -255,6 +289,29 @@ extension String.UTF16View: BidirectionalCollection {
255
289
return _foreignDistance ( from: start, to: end)
256
290
}
257
291
292
+ let utf8Distance = end. _encodedOffset - start. _encodedOffset
293
+
294
+ if _guts. isASCII {
295
+ return utf8Distance
296
+ }
297
+
298
+ let threshold = ( start == startIndex || end == startIndex
299
+ ? _breadcrumbStride / 2
300
+ : _breadcrumbStride)
301
+ if utf8Distance. magnitude < threshold {
302
+ // Do not use breadcrumbs if directly computing the result is expected to
303
+ // be cheaper. The conservative threshold above assumes that each UTF-16
304
+ // code unit will map to a single UTF-8 code unit, i.e., the worst
305
+ // possible (a.k.a. most compact) case with all ASCII scalars.
306
+ // FIXME: Figure out if a more optimistic threshold would work better.
307
+ start = _utf16AlignNativeIndex ( start)
308
+ end = _utf16AlignNativeIndex ( end)
309
+ guard start <= end else {
310
+ return - _utf16Distance( from: end, to: start)
311
+ }
312
+ return _utf16Distance ( from: start, to: end)
313
+ }
314
+
258
315
let lower = _nativeGetOffset ( for: start)
259
316
let upper = _nativeGetOffset ( for: end)
260
317
return upper &- lower
@@ -268,6 +325,86 @@ extension String.UTF16View: BidirectionalCollection {
268
325
return _nativeGetOffset ( for: endIndex)
269
326
}
270
327
328
+ internal func _indexRange(
329
+ for offsets: Range < Int > ,
330
+ from start: Index
331
+ ) -> Range < Index > {
332
+ _internalInvariant ( _guts. hasMatchingEncoding ( start) )
333
+ if _slowPath ( _guts. isForeign) {
334
+ let lower = self . index ( start, offsetBy: offsets. lowerBound)
335
+ let upper = _foreignIndex ( lower, offsetBy: offsets. count)
336
+ return Range ( uncheckedBounds: ( lower, upper) )
337
+ }
338
+
339
+ if _guts. isASCII {
340
+ let lower = self . index ( start, offsetBy: offsets. lowerBound)
341
+ let upper = self . index ( lower, offsetBy: offsets. count)
342
+ return Range ( uncheckedBounds: ( lower, upper) )
343
+ }
344
+
345
+ if offsets. count < _breadcrumbStride / 2 {
346
+ let lower = self . index ( start, offsetBy: offsets. lowerBound)
347
+ let upper = _index ( lower, offsetBy: offsets. count) . _knownUTF8
348
+ return Range ( uncheckedBounds: ( lower, upper) )
349
+ }
350
+
351
+ let bias = _nativeGetOffset ( for: start)
352
+ let lower = (
353
+ offsets. lowerBound - bias <= _breadcrumbStride / 2
354
+ ? _index ( start, offsetBy: offsets. lowerBound)
355
+ : _nativeGetIndex ( for: bias + offsets. lowerBound) )
356
+ let upper = _nativeGetIndex ( for: bias + offsets. upperBound)
357
+ return Range ( uncheckedBounds: ( lower, upper) )
358
+ }
359
+
360
+ internal func _offsetRange(
361
+ for range: Range < Index > ,
362
+ from start: Index
363
+ ) -> Range < Int > {
364
+ var lower = _guts. ensureMatchingEncoding ( range. lowerBound)
365
+ var upper = _guts. ensureMatchingEncoding ( range. upperBound)
366
+ _internalInvariant ( _guts. hasMatchingEncoding ( start) )
367
+
368
+ _precondition (
369
+ ifLinkedOnOrAfter: . v5_7_0,
370
+ lower. _encodedOffset <= _guts. count,
371
+ " String index is out of bounds " )
372
+ _precondition (
373
+ ifLinkedOnOrAfter: . v5_7_0,
374
+ upper. _encodedOffset <= _guts. count,
375
+ " String index is out of bounds " )
376
+
377
+ if _slowPath ( _guts. isForeign) {
378
+ let lowerOffset = _foreignDistance ( from: start, to: lower)
379
+ let distance = _foreignDistance ( from: lower, to: upper)
380
+ return Range ( uncheckedBounds: ( lowerOffset, lowerOffset + distance) )
381
+ }
382
+
383
+ let utf8Distance = upper. _encodedOffset - lower. _encodedOffset
384
+
385
+ if _guts. isASCII {
386
+ let lowerOffset = lower. _encodedOffset - start. _encodedOffset
387
+ return Range ( uncheckedBounds: ( lowerOffset, lowerOffset + utf8Distance) )
388
+ }
389
+
390
+ if utf8Distance. magnitude <= _breadcrumbStride / 2 {
391
+ lower = _utf16AlignNativeIndex ( lower)
392
+ upper = _utf16AlignNativeIndex ( upper)
393
+ let lowerOffset = distance ( from: start, to: lower)
394
+ let distance = _utf16Distance ( from: lower, to: upper)
395
+ return Range ( uncheckedBounds: ( lowerOffset, lowerOffset + distance) )
396
+ }
397
+
398
+ let bias = _nativeGetOffset ( for: start)
399
+ let utf8StartOffset = lower. _encodedOffset - start. _encodedOffset
400
+ let lowerOffset = (
401
+ utf8StartOffset <= _breadcrumbStride / 2
402
+ ? _utf16Distance ( from: start, to: lower)
403
+ : _nativeGetOffset ( for: lower) - bias)
404
+ let upperOffset = _nativeGetOffset ( for: upper) - bias
405
+ return Range ( uncheckedBounds: ( lowerOffset, upperOffset) )
406
+ }
407
+
271
408
/// Accesses the code unit at the given position.
272
409
///
273
410
/// The following example uses the subscript to print the value of a
@@ -618,8 +755,7 @@ extension String.UTF16View {
618
755
return utf16Count
619
756
}
620
757
#endif
621
-
622
- @inline ( __always)
758
+
623
759
internal func _utf16Distance( from start: Index , to end: Index ) -> Int {
624
760
_internalInvariant ( end. transcodedOffset == 0 || end. transcodedOffset == 1 )
625
761
@@ -691,17 +827,24 @@ extension String.UTF16View {
691
827
}
692
828
}
693
829
830
+ /// Return the UTF-16 offset corresponding to `idx`, measured from the
831
+ /// start of this string, which must be a native UTF-8 string.
832
+ ///
833
+ /// - Complexity: This measures the UTF-16 distance of `idx` from its nearest
834
+ /// breadcrumb index (rounding down), so on average it needs to look at
835
+ /// `breadcrumbStride / 2` UTF-16 code units. (In addition to the O(log(n))
836
+ /// cost of looking up the nearest breadcrumb, and the amortizable O(n)
837
+ /// cost of generating the breadcrumbs in the first place.)
694
838
@usableFromInline
695
839
@_effects ( releasenone)
696
840
internal func _nativeGetOffset( for idx: Index ) -> Int {
697
841
_internalInvariant ( idx. _encodedOffset <= _guts. count)
698
- // Trivial and common: start
699
- if idx == startIndex { return 0 }
700
-
701
842
if _guts. isASCII {
702
843
_internalInvariant ( idx. transcodedOffset == 0 )
703
844
return idx. _encodedOffset
704
845
}
846
+ // Trivial and common: start
847
+ if idx == startIndex { return 0 }
705
848
706
849
let idx = _utf16AlignNativeIndex ( idx)
707
850
@@ -714,11 +857,22 @@ extension String.UTF16View {
714
857
if idx == endIndex { return breadcrumbsPtr. pointee. utf16Length }
715
858
716
859
// Otherwise, find the nearest lower-bound breadcrumb and count from there
860
+ // FIXME: Starting from the upper-bound crumb when that is closer would cut
861
+ // the average cost of the subsequent iteration by 50%.
717
862
let ( crumb, crumbOffset) = breadcrumbsPtr. pointee. getBreadcrumb (
718
863
forIndex: idx)
719
864
return crumbOffset + _utf16Distance( from: crumb, to: idx)
720
865
}
721
866
867
+ /// Return the index at the given UTF-16 offset, measured from the
868
+ /// start of this string, which must be a native UTF-8 string.
869
+ ///
870
+ /// - Complexity: This iterates UTF-16 code units starting from the
871
+ /// nearest breadcrumb to `offset` (rounding down), so on
872
+ /// average it needs to look at `breadcrumbStride / 2` UTF-16 code
873
+ /// units. (In addition to the O(1) cost of looking up the nearest
874
+ /// breadcrumb, and the amortizable O(n) cost of generating the
875
+ /// breadcrumbs in the first place.)
722
876
@usableFromInline
723
877
@_effects ( releasenone)
724
878
internal func _nativeGetIndex( for offset: Int ) -> Index {
@@ -742,6 +896,8 @@ extension String.UTF16View {
742
896
if offset == breadcrumbsPtr. pointee. utf16Length { return endIndex }
743
897
744
898
// Otherwise, find the nearest lower-bound breadcrumb and advance that
899
+ // FIXME: Starting from the upper-bound crumb when that is closer would cut
900
+ // the average cost of the subsequent iteration by 50%.
745
901
let ( crumb, remaining) = breadcrumbsPtr. pointee. getBreadcrumb (
746
902
forOffset: offset)
747
903
if remaining == 0 { return crumb }
0 commit comments