Skip to content

Commit 3df9291

Browse files
committed
[String] Speed up ASCII checking.
Perform ASCII checking using pointer-width strides, making sure to align properly.
1 parent a088e13 commit 3df9291

File tree

2 files changed

+28
-4
lines changed

2 files changed

+28
-4
lines changed

stdlib/public/core/StringCreate.swift

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,33 @@
1515
internal func _allASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
1616
// NOTE: Avoiding for-in syntax to avoid bounds checks
1717
//
18-
// TODO(String performance): Vectorize and/or incorporate into validity
19-
// checking, perhaps both.
18+
// TODO(String performance): SIMD-ize
2019
//
2120
let ptr = input.baseAddress._unsafelyUnwrappedUnchecked
2221
var i = 0
23-
while i < input.count {
24-
guard ptr[i] <= 0x7F else { return false }
22+
23+
let count = input.count
24+
let stride = MemoryLayout<UInt>.stride
25+
let address = Int(bitPattern: ptr)
26+
27+
let wordASCIIMask = UInt(truncatingIfNeeded: 0x8080_8080_8080_8080 as UInt64)
28+
let byteASCIIMask = UInt8(truncatingIfNeeded: wordASCIIMask)
29+
30+
while (address &+ i) % stride != 0 && i < count {
31+
guard ptr[i] & byteASCIIMask == 0 else { return false }
32+
i &+= 1
33+
}
34+
35+
while (i &+ stride) <= count {
36+
let word: UInt = UnsafePointer(
37+
bitPattern: address &+ i
38+
)._unsafelyUnwrappedUnchecked.pointee
39+
guard word & wordASCIIMask == 0 else { return false }
40+
i &+= stride
41+
}
42+
43+
while i < count {
44+
guard ptr[i] & byteASCIIMask == 0 else { return false }
2545
i &+= 1
2646
}
2747
return true

stdlib/public/core/StringUTF8Validation.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ extension UTF8ValidationResult: Equatable {}
3636
private struct UTF8ValidationError: Error {}
3737

3838
internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationResult {
39+
if _allASCII(buf) {
40+
return .success(UTF8ExtraInfo(isASCII: true))
41+
}
42+
3943
var iter = buf.makeIterator()
4044
var lastValidIndex = buf.startIndex
4145

0 commit comments

Comments
 (0)