Skip to content

Commit cb082e1

Browse files
authored
Merge pull request #41866 from Catfish-Man/what-a-crumb-y-optimization
Vectorize UTF16 offset calculations
2 parents 32cdb77 + eaf3f31 commit cb082e1

File tree

2 files changed

+104
-7
lines changed

2 files changed

+104
-7
lines changed

stdlib/public/core/StringUTF16View.swift

Lines changed: 104 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ extension String.UTF16View: BidirectionalCollection {
137137
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
138138
@inlinable @inline(__always)
139139
public var endIndex: Index { return _guts.endIndex }
140-
140+
141141
@inlinable @inline(__always)
142142
public func index(after idx: Index) -> Index {
143143
if _slowPath(_guts.isForeign) { return _foreignIndex(after: idx) }
@@ -149,6 +149,7 @@ extension String.UTF16View: BidirectionalCollection {
149149
// TODO: If transcoded is 1, can we just skip ahead 4?
150150

151151
let idx = _utf16AlignNativeIndex(idx)
152+
152153
let len = _guts.fastUTF8ScalarLength(startingAt: idx._encodedOffset)
153154
if len == 4 && idx.transcodedOffset == 0 {
154155
return idx.nextTranscoded
@@ -518,6 +519,105 @@ extension _StringGuts {
518519
}
519520

520521
extension String.UTF16View {
522+
523+
@inline(__always)
524+
internal func _utf16Length<U: SIMD, S: SIMD>(
525+
readPtr: inout UnsafeRawPointer,
526+
endPtr: UnsafeRawPointer,
527+
unsignedSIMDType: U.Type,
528+
signedSIMDType: S.Type
529+
) -> Int where U.Scalar == UInt8, S.Scalar == Int8 {
530+
var utf16Count = 0
531+
532+
while readPtr + MemoryLayout<U>.stride < endPtr {
533+
//Find the number of continuations (0b10xxxxxx)
534+
let sValue = Builtin.loadRaw(readPtr._rawValue) as S
535+
let continuations = S.zero.replacing(with: S.one, where: sValue .< -65 + 1)
536+
let continuationCount = Int(continuations.wrappedSum())
537+
538+
//Find the number of 4 byte code points (0b11110xxx)
539+
let uValue = Builtin.loadRaw(readPtr._rawValue) as U
540+
let fourBytes = U.zero.replacing(with: U.one, where: uValue .>= 0b11110000)
541+
let fourByteCount = Int(fourBytes.wrappedSum())
542+
543+
utf16Count &+= (U.scalarCount - continuationCount) + fourByteCount
544+
545+
readPtr += MemoryLayout<U>.stride
546+
}
547+
548+
return utf16Count
549+
}
550+
551+
@inline(__always)
552+
internal func _utf16Distance(from start: Index, to end: Index) -> Int {
553+
_internalInvariant(end.transcodedOffset == 0 || end.transcodedOffset == 1)
554+
555+
return (end.transcodedOffset - start.transcodedOffset) + _guts.withFastUTF8(
556+
range: start._encodedOffset ..< end._encodedOffset
557+
) { utf8 in
558+
let rawBuffer = UnsafeRawBufferPointer(utf8)
559+
guard rawBuffer.count > 0 else { return 0 }
560+
561+
var utf16Count = 0
562+
var readPtr = rawBuffer.baseAddress.unsafelyUnwrapped
563+
let initialReadPtr = readPtr
564+
let endPtr = readPtr + rawBuffer.count
565+
566+
//eat leading continuations
567+
while readPtr < endPtr {
568+
let byte = readPtr.load(as: UInt8.self)
569+
if !UTF8.isContinuation(byte) {
570+
break
571+
}
572+
readPtr += 1
573+
}
574+
575+
// TODO: Currently, using SIMD sizes above SIMD8 is slower
576+
// Once that's fixed we should go up to SIMD64 here
577+
578+
utf16Count &+= _utf16Length(
579+
readPtr: &readPtr,
580+
endPtr: endPtr,
581+
unsignedSIMDType: SIMD8<UInt8>.self,
582+
signedSIMDType: SIMD8<Int8>.self
583+
)
584+
585+
//TO CONSIDER: SIMD widths <8 here
586+
587+
//back up to the start of the current scalar if we may have a trailing
588+
//incomplete scalar
589+
if utf16Count > 0 && UTF8.isContinuation(readPtr.load(as: UInt8.self)) {
590+
while readPtr > initialReadPtr && UTF8.isContinuation(readPtr.load(as: UInt8.self)) {
591+
readPtr -= 1
592+
}
593+
594+
//The trailing scalar may be incomplete, subtract it out and check below
595+
let byte = readPtr.load(as: UInt8.self)
596+
let len = _utf8ScalarLength(byte)
597+
utf16Count &-= len == 4 ? 2 : 1
598+
if readPtr == initialReadPtr {
599+
//if we backed up all the way and didn't hit a non-continuation, then
600+
//we don't have any complete scalars, and we should bail.
601+
return 0
602+
}
603+
}
604+
605+
//trailing bytes
606+
while readPtr < endPtr {
607+
let byte = readPtr.load(as: UInt8.self)
608+
let len = _utf8ScalarLength(byte)
609+
// if we don't have enough bytes left, we don't have a complete scalar,
610+
// so don't add it to the count.
611+
if readPtr + len <= endPtr {
612+
utf16Count &+= len == 4 ? 2 : 1
613+
}
614+
readPtr += len
615+
}
616+
617+
return utf16Count
618+
}
619+
}
620+
521621
@usableFromInline
522622
@_effects(releasenone)
523623
internal func _nativeGetOffset(for idx: Index) -> Int {
@@ -532,9 +632,7 @@ extension String.UTF16View {
532632
let idx = _utf16AlignNativeIndex(idx)
533633

534634
guard _guts._useBreadcrumbs(forEncodedOffset: idx._encodedOffset) else {
535-
// TODO: Generic _distance is still very slow. We should be able to
536-
// skip over ASCII substrings quickly
537-
return _distance(from: startIndex, to: idx)
635+
return _utf16Distance(from: startIndex, to: idx)
538636
}
539637

540638
// Simple and common: endIndex aka `length`.
@@ -544,7 +642,8 @@ extension String.UTF16View {
544642
// Otherwise, find the nearest lower-bound breadcrumb and count from there
545643
let (crumb, crumbOffset) = breadcrumbsPtr.pointee.getBreadcrumb(
546644
forIndex: idx)
547-
return crumbOffset + _distance(from: crumb, to: idx)
645+
646+
return crumbOffset + _utf16Distance(from: crumb, to: idx)
548647
}
549648

550649
@usableFromInline

validation-test/stdlib/StringBreadcrumbs.swift

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
// rdar://84233775
2-
// REQUIRES: rdar84233775
31

42
// RUN: %target-run-stdlib-swift
53
// REQUIRES: executable_test,optimized_stdlib

0 commit comments

Comments
 (0)