Skip to content

Commit 81e87ac

Browse files
authored
Merge pull request #21959 from milseman/fast_foundation
[String] Add UTF-8 fast-paths for Foundation initializers
2 parents 753eb53 + 3df9291 commit 81e87ac

File tree

3 files changed

+56
-12
lines changed

3 files changed

+56
-12
lines changed

stdlib/public/Darwin/Foundation/NSStringAPI.swift

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ extension String {
178178
/// Creates a string by copying the data from a given
179179
/// C array of UTF8-encoded bytes.
180180
public init?(utf8String bytes: UnsafePointer<CChar>) {
181+
if let str = String(validatingUTF8: bytes) {
182+
self = str
183+
return
184+
}
181185
if let ns = NSString(utf8String: bytes) {
182186
self = String._unconditionallyBridgeFromObjectiveC(ns)
183187
} else {
@@ -202,12 +206,18 @@ extension String {
202206
/// - Parameters:
203207
/// - bytes: A sequence of bytes to interpret using `encoding`.
204208
/// - encoding: The ecoding to use to interpret `bytes`.
205-
public init? <S: Sequence>(bytes: __shared S, encoding: Encoding)
206-
where S.Iterator.Element == UInt8 {
209+
public init?<S: Sequence>(bytes: __shared S, encoding: Encoding)
210+
where S.Iterator.Element == UInt8 {
207211
let byteArray = Array(bytes)
212+
if encoding == .utf8,
213+
let str = byteArray.withUnsafeBufferPointer({ String._tryFromUTF8($0) })
214+
{
215+
self = str
216+
return
217+
}
218+
208219
if let ns = NSString(
209220
bytes: byteArray, length: byteArray.count, encoding: encoding.rawValue) {
210-
211221
self = String._unconditionallyBridgeFromObjectiveC(ns)
212222
} else {
213223
return nil
@@ -365,6 +375,10 @@ extension String {
365375
cString: UnsafePointer<CChar>,
366376
encoding enc: Encoding
367377
) {
378+
if enc == .utf8, let str = String(validatingUTF8: cString) {
379+
self = str
380+
return
381+
}
368382
if let ns = NSString(cString: cString, encoding: enc.rawValue) {
369383
self = String._unconditionallyBridgeFromObjectiveC(ns)
370384
} else {
@@ -381,6 +395,14 @@ extension String {
381395
/// Returns a `String` initialized by converting given `data` into
382396
/// Unicode characters using a given `encoding`.
383397
public init?(data: __shared Data, encoding: Encoding) {
398+
if encoding == .utf8,
399+
let str = data.withUnsafeBytes({
400+
String._tryFromUTF8($0.bindMemory(to: UInt8.self))
401+
}) {
402+
self = str
403+
return
404+
}
405+
384406
guard let s = NSString(data: data, encoding: encoding.rawValue) else { return nil }
385407
self = String._unconditionallyBridgeFromObjectiveC(s)
386408
}

stdlib/public/core/StringCreate.swift

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,33 @@
1515
internal func _allASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
1616
// NOTE: Avoiding for-in syntax to avoid bounds checks
1717
//
18-
// TODO(String performance): Vectorize and/or incorporate into validity
19-
// checking, perhaps both.
18+
// TODO(String performance): SIMD-ize
2019
//
2120
let ptr = input.baseAddress._unsafelyUnwrappedUnchecked
2221
var i = 0
23-
while i < input.count {
24-
guard ptr[i] <= 0x7F else { return false }
22+
23+
let count = input.count
24+
let stride = MemoryLayout<UInt>.stride
25+
let address = Int(bitPattern: ptr)
26+
27+
let wordASCIIMask = UInt(truncatingIfNeeded: 0x8080_8080_8080_8080 as UInt64)
28+
let byteASCIIMask = UInt8(truncatingIfNeeded: wordASCIIMask)
29+
30+
while (address &+ i) % stride != 0 && i < count {
31+
guard ptr[i] & byteASCIIMask == 0 else { return false }
32+
i &+= 1
33+
}
34+
35+
while (i &+ stride) <= count {
36+
let word: UInt = UnsafePointer(
37+
bitPattern: address &+ i
38+
)._unsafelyUnwrappedUnchecked.pointee
39+
guard word & wordASCIIMask == 0 else { return false }
40+
i &+= stride
41+
}
42+
43+
while i < count {
44+
guard ptr[i] & byteASCIIMask == 0 else { return false }
2545
i &+= 1
2646
}
2747
return true
@@ -42,12 +62,10 @@ extension String {
4262
return storage.asString
4363
}
4464

45-
@usableFromInline
46-
internal static func _tryFromUTF8(
47-
_ input: UnsafeBufferPointer<UInt8>
48-
) -> String? {
65+
public // SPI(Foundation)
66+
static func _tryFromUTF8(_ input: UnsafeBufferPointer<UInt8>) -> String? {
4967
guard case .success(let extraInfo) = validateUTF8(input) else {
50-
return nil
68+
return nil
5169
}
5270

5371
return String._uncheckedFromUTF8(input, isASCII: extraInfo.isASCII)

stdlib/public/core/StringUTF8Validation.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ extension UTF8ValidationResult: Equatable {}
3636
private struct UTF8ValidationError: Error {}
3737

3838
internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationResult {
39+
if _allASCII(buf) {
40+
return .success(UTF8ExtraInfo(isASCII: true))
41+
}
42+
3943
var iter = buf.makeIterator()
4044
var lastValidIndex = buf.startIndex
4145

0 commit comments

Comments
 (0)