Skip to content

Commit c3c6fdc

Browse files
authored
[String] ASCII fast-path for UTF16View (#20848)
Add an isASCII fast-path for many UTF16View operations. These are heavily utilized in random-access scenarios, allowing us to both be more efficient and skip generating breadcrumbs for ASCII strings.
1 parent 70bbeaa commit c3c6fdc

File tree

1 file changed

+33
-20
lines changed

1 file changed

+33
-20
lines changed

stdlib/public/core/StringUTF16View.swift

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ extension String.UTF16View {
117117
#else
118118
@usableFromInline @inline(never) @_effects(releasenone)
119119
internal func _invariantCheck() {
120-
// TODO: Ensure start/end are not sub-scalr UTF-8 transcoded indices
120+
_internalInvariant(
121+
startIndex.transcodedOffset == 0 && endIndex.transcodedOffset == 0)
121122
}
122123
#endif // INTERNAL_CHECKS_ENABLED
123124
}
@@ -143,9 +144,8 @@ extension String.UTF16View: BidirectionalCollection {
143144

144145
@inlinable @inline(__always)
145146
public func index(after i: Index) -> Index {
146-
// TODO(String performance) known-ASCII fast path
147-
148147
if _slowPath(_guts.isForeign) { return _foreignIndex(after: i) }
148+
if _guts.isASCII { return i.nextEncoded }
149149

150150
// For a BMP scalar (1-3 UTF-8 code units), advance past it. For a non-BMP
151151
// scalar, use a transcoded offset first.
@@ -159,9 +159,8 @@ extension String.UTF16View: BidirectionalCollection {
159159
@inlinable @inline(__always)
160160
public func index(before i: Index) -> Index {
161161
precondition(!i.isZeroPosition)
162-
// TODO(String performance) known-ASCII fast path
163-
164162
if _slowPath(_guts.isForeign) { return _foreignIndex(before: i) }
163+
if _guts.isASCII { return i.priorEncoded }
165164

166165
if i.transcodedOffset != 0 {
167166
_internalInvariant(i.transcodedOffset == 1)
@@ -181,8 +180,6 @@ extension String.UTF16View: BidirectionalCollection {
181180
}
182181

183182
public func index(_ i: Index, offsetBy n: Int) -> Index {
184-
// TODO(String performance) known-ASCII fast path
185-
186183
if _slowPath(_guts.isForeign) {
187184
return _foreignIndex(i, offsetBy: n)
188185
}
@@ -195,7 +192,6 @@ extension String.UTF16View: BidirectionalCollection {
195192
public func index(
196193
_ i: Index, offsetBy n: Int, limitedBy limit: Index
197194
) -> Index? {
198-
// TODO(String performance) known-ASCII fast path
199195
if _slowPath(_guts.isForeign) {
200196
return _foreignIndex(i, offsetBy: n, limitedBy: limit)
201197
}
@@ -217,7 +213,6 @@ extension String.UTF16View: BidirectionalCollection {
217213
}
218214

219215
public func distance(from start: Index, to end: Index) -> Int {
220-
// TODO(String performance) known-ASCII fast path
221216
if _slowPath(_guts.isForeign) {
222217
return _foreignDistance(from: start, to: end)
223218
}
@@ -250,7 +245,6 @@ extension String.UTF16View: BidirectionalCollection {
250245
@inlinable
251246
public subscript(i: Index) -> UTF16.CodeUnit {
252247
@inline(__always) get {
253-
// TODO(String performance) known-ASCII fast path
254248
String(_guts)._boundsCheck(i)
255249

256250
if _fastPath(_guts.isFastUTF8) {
@@ -267,16 +261,16 @@ extension String.UTF16View: BidirectionalCollection {
267261
}
268262
}
269263
extension String.UTF16View: CustomStringConvertible {
270-
@inlinable
271-
public var description: String {
272-
@inline(__always) get { return String(_guts) }
273-
}
264+
@inlinable
265+
public var description: String {
266+
@inline(__always) get { return String(_guts) }
267+
}
274268
}
275269

276270
extension String.UTF16View: CustomDebugStringConvertible {
277-
public var debugDescription: String {
278-
return "StringUTF16(\(self.description.debugDescription))"
279-
}
271+
public var debugDescription: String {
272+
return "StringUTF16(\(self.description.debugDescription))"
273+
}
280274
}
281275

282276
extension String {
@@ -462,8 +456,11 @@ extension String.UTF16View {
462456
// Trivial and common: start
463457
if idx == startIndex { return 0 }
464458

465-
if _guts.isASCII { return idx.encodedOffset }
466-
459+
if _guts.isASCII {
460+
_internalInvariant(idx.transcodedOffset == 0)
461+
return idx.encodedOffset
462+
}
463+
467464
if idx.encodedOffset < _shortHeuristic || !_guts.hasBreadcrumbs {
468465
return _distance(from: startIndex, to: idx)
469466
}
@@ -483,7 +480,9 @@ extension String.UTF16View {
483480
internal func _nativeGetIndex(for offset: Int) -> Index {
484481
// Trivial and common: start
485482
if offset == 0 { return startIndex }
486-
483+
484+
if _guts.isASCII { return Index(encodedOffset: offset) }
485+
487486
if offset < _shortHeuristic || !_guts.hasBreadcrumbs {
488487
return _index(startIndex, offsetBy: offset)
489488
}
@@ -542,12 +541,26 @@ extension String {
542541

543542
if _slowPath(range.isEmpty) { return }
544543

544+
let isASCII = _guts.isASCII
545545
return _guts.withFastUTF8 { utf8 in
546546
var writeIdx = 0
547547
let writeEnd = buffer.count
548548
var readIdx = range.lowerBound.encodedOffset
549549
let readEnd = range.upperBound.encodedOffset
550550

551+
if isASCII {
552+
_internalInvariant(range.lowerBound.transcodedOffset == 0)
553+
_internalInvariant(range.upperBound.transcodedOffset == 0)
554+
while readIdx < readEnd {
555+
_internalInvariant(utf8[readIdx] < 0x80)
556+
buffer[_unchecked: writeIdx] = UInt16(
557+
truncatingIfNeeded: utf8[_unchecked: readIdx])
558+
readIdx &+= 1
559+
writeIdx &+= 1
560+
}
561+
return
562+
}
563+
551564
// Handle mid-transcoded-scalar initial index
552565
if _slowPath(range.lowerBound.transcodedOffset != 0) {
553566
_internalInvariant(range.lowerBound.transcodedOffset == 1)

0 commit comments

Comments
 (0)