Skip to content

Commit c1ab69e

Browse files
authored
Merge pull request #30729 from milseman/merge_me_if_available
[string] String(decoding:as:) fast path for withContiguousStorageIfAvailable
2 parents 503da8f + 38fce16 commit c1ab69e

File tree

5 files changed

+238
-94
lines changed

5 files changed

+238
-94
lines changed

stdlib/public/core/ContiguouslyStored.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
// NOTE: The below is necessary for fast String initialization from untyped
14+
// memory. When we add Collection.withContiguousRawStorageIfAvailabe(), we can
15+
// deprecate this functionality.
16+
1317
@usableFromInline
1418
internal protocol _HasContiguousBytes {
1519
func withUnsafeBytes<R>(

stdlib/public/core/String.swift

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,22 @@ extension String {
388388
}
389389

390390
extension String {
391+
// This force type-casts element to UInt8, since we cannot currently
392+
// communicate to the type checker that we proved this with our dynamic
393+
// check in String(decoding:as:).
394+
@_alwaysEmitIntoClient
395+
@inline(never) // slow-path
396+
private static func _fromNonContiguousUnsafeBitcastUTF8Repairing<
397+
C: Collection
398+
>(_ input: C) -> (result: String, repairsMade: Bool) {
399+
_internalInvariant(C.Element.self == UInt8.self)
400+
return Array(input).withUnsafeBufferPointer {
401+
let raw = UnsafeRawBufferPointer($0)
402+
return String._fromUTF8Repairing(raw.bindMemory(to: UInt8.self))
403+
}
404+
}
405+
406+
391407
/// Creates a string from the given Unicode code units in the specified
392408
/// encoding.
393409
///
@@ -407,8 +423,27 @@ extension String {
407423
return
408424
}
409425

426+
// Fast path for user-defined Collections and typed contiguous collections.
427+
//
428+
// Note: this comes first, as the optimizer nearly always has insight into
429+
// wCSIA, but cannot prove that a type does not have conformance to
430+
// _HasContiguousBytes.
431+
if let str = codeUnits.withContiguousStorageIfAvailable({
432+
(buffer: UnsafeBufferPointer<C.Element>) -> String in
433+
Builtin.onFastPath() // encourage SIL Optimizer to inline this closure :-(
434+
let rawBufPtr = UnsafeRawBufferPointer(buffer)
435+
return String._fromUTF8Repairing(
436+
UnsafeBufferPointer(
437+
start: rawBufPtr.baseAddress?.assumingMemoryBound(to: UInt8.self),
438+
count: rawBufPtr.count)).0
439+
}) {
440+
self = str
441+
return
442+
}
443+
444+
// Fast path for untyped raw storage and known stdlib types
410445
if let contigBytes = codeUnits as? _HasContiguousBytes,
411-
contigBytes._providesContiguousBytesNoCopy
446+
contigBytes._providesContiguousBytesNoCopy
412447
{
413448
self = contigBytes.withUnsafeBytes { rawBufPtr in
414449
return String._fromUTF8Repairing(
@@ -419,15 +454,9 @@ extension String {
419454
return
420455
}
421456

422-
// Just copying to an Array is significantly faster than performing
423-
// generic operations
424-
self = Array(codeUnits).withUnsafeBufferPointer {
425-
let raw = UnsafeRawBufferPointer($0)
426-
return String._fromUTF8Repairing(raw.bindMemory(to: UInt8.self)).0
427-
}
428-
return
457+
self = String._fromNonContiguousUnsafeBitcastUTF8Repairing(codeUnits).0
429458
}
430-
459+
431460
/// Creates a new string with the specified capacity in UTF-8 code units, and
432461
/// then calls the given closure with a buffer covering the string's
433462
/// uninitialized memory.
@@ -484,7 +513,7 @@ extension String {
484513
initializingUTF8With: initializer
485514
)
486515
}
487-
516+
488517
@inline(__always)
489518
internal init(
490519
_uninitializedCapacity capacity: Int,
@@ -503,7 +532,7 @@ extension String {
503532
}
504533
return
505534
}
506-
535+
507536
self = try String._fromLargeUTF8Repairing(
508537
uninitializedCapacity: capacity,
509538
initializingWith: initializer)
@@ -968,20 +997,20 @@ extension _StringGutsSlice {
968997
var outputBuffer = outputBuffer
969998
var icuInputBuffer = icuInputBuffer
970999
var icuOutputBuffer = icuOutputBuffer
971-
1000+
9721001
var index = range.lowerBound
9731002
let cachedEndIndex = range.upperBound
974-
1003+
9751004
var hasBufferOwnership = false
976-
1005+
9771006
defer {
9781007
if hasBufferOwnership {
9791008
outputBuffer.deallocate()
9801009
icuInputBuffer.deallocate()
9811010
icuOutputBuffer.deallocate()
9821011
}
9831012
}
984-
1013+
9851014
while index < cachedEndIndex {
9861015
let result = _foreignNormalize(
9871016
readIndex: index,
@@ -1017,17 +1046,17 @@ internal func _fastWithNormalizedCodeUnitsImpl(
10171046

10181047
var index = String.Index(_encodedOffset: 0)
10191048
let cachedEndIndex = String.Index(_encodedOffset: sourceBuffer.count)
1020-
1049+
10211050
var hasBufferOwnership = false
1022-
1051+
10231052
defer {
10241053
if hasBufferOwnership {
10251054
outputBuffer.deallocate()
10261055
icuInputBuffer.deallocate()
10271056
icuOutputBuffer.deallocate()
10281057
}
10291058
}
1030-
1059+
10311060
while index < cachedEndIndex {
10321061
let result = _fastNormalize(
10331062
readIndex: index,

stdlib/public/core/StringCreate.swift

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ internal func _allASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
5050
}
5151

5252
extension String {
53-
53+
5454
internal static func _uncheckedFromASCII(
5555
_ input: UnsafeBufferPointer<UInt8>
5656
) -> String {
@@ -61,15 +61,15 @@ extension String {
6161
let storage = __StringStorage.create(initializingFrom: input, isASCII: true)
6262
return storage.asString
6363
}
64-
64+
6565
@usableFromInline
6666
internal static func _fromASCII(
6767
_ input: UnsafeBufferPointer<UInt8>
6868
) -> String {
6969
_internalInvariant(_allASCII(input), "not actually ASCII")
7070
return _uncheckedFromASCII(input)
7171
}
72-
72+
7373
internal static func _fromASCIIValidating(
7474
_ input: UnsafeBufferPointer<UInt8>
7575
) -> String? {
@@ -101,7 +101,7 @@ extension String {
101101
return (repairUTF8(input, firstKnownBrokenRange: initialRange), true)
102102
}
103103
}
104-
104+
105105
internal static func _fromLargeUTF8Repairing(
106106
uninitializedCapacity capacity: Int,
107107
initializingWith initializer: (
@@ -111,7 +111,7 @@ extension String {
111111
let result = try __StringStorage.create(
112112
uninitializedCodeUnitCapacity: capacity,
113113
initializingUncheckedUTF8With: initializer)
114-
114+
115115
switch validateUTF8(result.codeUnits) {
116116
case .success(let info):
117117
result._updateCountAndFlags(
@@ -181,7 +181,7 @@ extension String {
181181

182182
return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
183183
}
184-
184+
185185
@inline(never) // slow path
186186
private static func _slowFromCodeUnits<
187187
Input: Collection,
@@ -209,7 +209,7 @@ extension String {
209209
let str = contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
210210
return (str, repaired)
211211
}
212-
212+
213213
@usableFromInline @inline(never) // can't be inlined w/out breaking ABI
214214
@_specialize(
215215
where Input == UnsafeBufferPointer<UInt8>, Encoding == Unicode.ASCII)
@@ -227,27 +227,39 @@ extension String {
227227
guard _fastPath(encoding == Unicode.ASCII.self) else {
228228
return _slowFromCodeUnits(input, encoding: encoding, repair: repair)
229229
}
230-
231-
var result:String? = nil
232-
230+
231+
// Helper to simplify early returns
232+
func resultOrSlow(_ resultOpt: String?) -> (String, repairsMade: Bool)? {
233+
guard let result = resultOpt else {
234+
return _slowFromCodeUnits(input, encoding: encoding, repair: repair)
235+
}
236+
return (result, repairsMade: false)
237+
}
238+
239+
// Fast path for untyped raw storage and known stdlib types
233240
if let contigBytes = input as? _HasContiguousBytes,
234241
contigBytes._providesContiguousBytesNoCopy {
235-
result = contigBytes.withUnsafeBytes { rawBufPtr in
242+
return resultOrSlow(contigBytes.withUnsafeBytes { rawBufPtr in
236243
let buffer = UnsafeBufferPointer(
237244
start: rawBufPtr.baseAddress?.assumingMemoryBound(to: UInt8.self),
238245
count: rawBufPtr.count)
239246
return String._fromASCIIValidating(buffer)
240-
}
241-
} else {
242-
result = Array(input).withUnsafeBufferPointer {
247+
})
248+
}
249+
250+
// Fast path for user-defined Collections
251+
if let strOpt = input.withContiguousStorageIfAvailable({
252+
(buffer: UnsafeBufferPointer<Input.Element>) -> String? in
253+
return String._fromASCIIValidating(
254+
UnsafeRawBufferPointer(buffer).bindMemory(to: UInt8.self))
255+
}) {
256+
return resultOrSlow(strOpt)
257+
}
258+
259+
return resultOrSlow(Array(input).withUnsafeBufferPointer {
243260
let buffer = UnsafeRawBufferPointer($0).bindMemory(to: UInt8.self)
244261
return String._fromASCIIValidating(buffer)
245-
}
246-
}
247-
248-
return result != nil ?
249-
(result!, repairsMade: false) :
250-
_slowFromCodeUnits(input, encoding: encoding, repair: repair)
262+
})
251263
}
252264

253265
public // @testable

stdlib/public/core/Substring.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,7 @@ extension Substring.UTF8View: BidirectionalCollection {
391391
}
392392

393393
@_alwaysEmitIntoClient
394+
@inlinable
394395
public func withContiguousStorageIfAvailable<R>(
395396
_ body: (UnsafeBufferPointer<Element>) throws -> R
396397
) rethrows -> R? {

0 commit comments

Comments
 (0)