Skip to content

Commit d00f8ed

Browse files
committed
[stdlib] Optimize StringProtocol._toUTF16Indices/_toUTF16Offsets
Speed up conversion between UTF-16 offset ranges and string index ranges, by carefully switching between absolute and relative index calculations, depending on the distance we need to go. It is a surprisingly tricky puzzle to do this correctly while avoiding redundant calculations. Offset ranges within substrings add the additional complication of having to bias offset values with the absolute offset of the substring’s start index.
1 parent ec35728 commit d00f8ed

File tree

2 files changed

+84
-6
lines changed

2 files changed

+84
-6
lines changed

stdlib/public/core/StringBridge.swift

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -795,19 +795,33 @@ extension StringProtocol {
795795
@_specialize(where Self == Substring)
796796
public // SPI(Foundation)
797797
func _toUTF16Offsets(_ indices: Range<Index>) -> Range<Int> {
798-
let lowerbound = _toUTF16Offset(indices.lowerBound)
799-
let length = self.utf16.distance(
800-
from: indices.lowerBound, to: indices.upperBound)
801-
return Range(
802-
uncheckedBounds: (lower: lowerbound, upper: lowerbound + length))
798+
if Self.self == String.self {
799+
let s = unsafeBitCast(self, to: String.self)
800+
return s.utf16._offsetRange(for: indices, from: s.startIndex)
801+
}
802+
if Self.self == Substring.self {
803+
let s = unsafeBitCast(self, to: Substring.self)
804+
return s._slice._base.utf16._offsetRange(for: indices, from: s.startIndex)
805+
}
806+
let startOffset = _toUTF16Offset(indices.lowerBound)
807+
let endOffset = _toUTF16Offset(indices.upperBound)
808+
return Range(uncheckedBounds: (lower: startOffset, upper: endOffset))
803809
}
804810

805811
@_specialize(where Self == String)
806812
@_specialize(where Self == Substring)
807813
public // SPI(Foundation)
808814
func _toUTF16Indices(_ range: Range<Int>) -> Range<Index> {
815+
if Self.self == String.self {
816+
let s = unsafeBitCast(self, to: String.self)
817+
return s.utf16._indexRange(for: range, from: s.startIndex)
818+
}
819+
if Self.self == Substring.self {
820+
let s = unsafeBitCast(self, to: Substring.self)
821+
return s._slice._base.utf16._indexRange(for: range, from: s.startIndex)
822+
}
809823
let lowerbound = _toUTF16Index(range.lowerBound)
810-
let upperbound = self.utf16.index(lowerbound, offsetBy: range.count)
824+
let upperbound = _toUTF16Index(range.upperBound)
811825
return Range(uncheckedBounds: (lower: lowerbound, upper: upperbound))
812826
}
813827
}

stdlib/public/core/StringUTF16View.swift

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,70 @@ extension String.UTF16View: BidirectionalCollection {
299299
return _nativeGetOffset(for: endIndex)
300300
}
301301

302+
internal func _indexRange(
303+
for offsets: Range<Int>,
304+
from start: Index
305+
) -> Range<Index> {
306+
_internalInvariant(_guts.hasMatchingEncoding(start))
307+
if _slowPath(_guts.isForeign) {
308+
let lower = self.index(start, offsetBy: offsets.lowerBound)
309+
let upper = _foreignIndex(lower, offsetBy: offsets.count)
310+
return Range(uncheckedBounds: (lower, upper))
311+
}
312+
if offsets.count < _breadcrumbStride / 2, !_guts.isASCII {
313+
let lower = self.index(start, offsetBy: offsets.lowerBound)
314+
let upper = _index(lower, offsetBy: offsets.count)._knownUTF8
315+
return Range(uncheckedBounds: (lower, upper))
316+
}
317+
318+
let bias = _nativeGetOffset(for: start)
319+
let lower = (
320+
offsets.lowerBound - bias <= _breadcrumbStride / 2
321+
? _index(start, offsetBy: offsets.lowerBound)
322+
: _nativeGetIndex(for: bias + offsets.lowerBound))
323+
let upper = _nativeGetIndex(for: bias + offsets.upperBound)
324+
return Range(uncheckedBounds: (lower, upper))
325+
}
326+
327+
internal func _offsetRange(
328+
for range: Range<Index>,
329+
from start: Index
330+
) -> Range<Int> {
331+
let lower = _guts.ensureMatchingEncoding(range.lowerBound)
332+
let upper = _guts.ensureMatchingEncoding(range.upperBound)
333+
_internalInvariant(_guts.hasMatchingEncoding(start))
334+
335+
_precondition(
336+
ifLinkedOnOrAfter: .v5_7_0,
337+
lower._encodedOffset <= _guts.count,
338+
"String index is out of bounds")
339+
_precondition(
340+
ifLinkedOnOrAfter: .v5_7_0,
341+
upper._encodedOffset <= _guts.count,
342+
"String index is out of bounds")
343+
344+
if _slowPath(_guts.isForeign) {
345+
let lowerOffset = _foreignDistance(from: start, to: lower)
346+
let distance = _foreignDistance(from: lower, to: upper)
347+
return Range(uncheckedBounds: (lowerOffset, lowerOffset + distance))
348+
}
349+
350+
let utf8Distance = upper._encodedOffset - lower._encodedOffset
351+
if utf8Distance.magnitude <= _breadcrumbStride / 2, !_guts.isASCII {
352+
let lowerOffset = distance(from: start, to: lower)
353+
let distance = _utf16Distance(from: lower, to: upper)
354+
return Range(uncheckedBounds: (lowerOffset, lowerOffset + distance))
355+
}
356+
let bias = _nativeGetOffset(for: start)
357+
let utf8StartOffset = lower._encodedOffset - start._encodedOffset
358+
let lowerOffset = (
359+
utf8StartOffset <= _breadcrumbStride / 2
360+
? _utf16Distance(from: start, to: lower)
361+
: _nativeGetOffset(for: lower) - bias)
362+
let upperOffset = _nativeGetOffset(for: upper) - bias
363+
return Range(uncheckedBounds: (lowerOffset, upperOffset))
364+
}
365+
302366
/// Accesses the code unit at the given position.
303367
///
304368
/// The following example uses the subscript to print the value of a

0 commit comments

Comments
 (0)