Skip to content

[string] String(decoding:as:) fast path for withContiguousStorageIfAvailable #30729

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions stdlib/public/core/ContiguouslyStored.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
//
//===----------------------------------------------------------------------===//

// NOTE: The below is necessary for fast String initialization from untyped
// memory. When we add Collection.withContiguousRawStorageIfAvailabe(), we can
// deprecate this functionality.

@usableFromInline
internal protocol _HasContiguousBytes {
func withUnsafeBytes<R>(
Expand Down
65 changes: 47 additions & 18 deletions stdlib/public/core/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,22 @@ extension String {
}

extension String {
// This force type-casts element to UInt8, since we cannot currently
// communicate to the type checker that we proved this with our dynamic
// check in String(decoding:as:).
@_alwaysEmitIntoClient
@inline(never) // slow-path
private static func _fromNonContiguousUnsafeBitcastUTF8Repairing<
C: Collection
>(_ input: C) -> (result: String, repairsMade: Bool) {
_internalInvariant(C.Element.self == UInt8.self)
return Array(input).withUnsafeBufferPointer {
let raw = UnsafeRawBufferPointer($0)
return String._fromUTF8Repairing(raw.bindMemory(to: UInt8.self))
}
}


/// Creates a string from the given Unicode code units in the specified
/// encoding.
///
Expand All @@ -407,8 +423,27 @@ extension String {
return
}

// Fast path for user-defined Collections and typed contiguous collections.
//
// Note: this comes first, as the optimizer nearly always has insight into
// wCSIA, but cannot prove that a type does not have conformance to
// _HasContiguousBytes.
if let str = codeUnits.withContiguousStorageIfAvailable({
(buffer: UnsafeBufferPointer<C.Element>) -> String in
Builtin.onFastPath() // encourage SIL Optimizer to inline this closure :-(
let rawBufPtr = UnsafeRawBufferPointer(buffer)
return String._fromUTF8Repairing(
UnsafeBufferPointer(
start: rawBufPtr.baseAddress?.assumingMemoryBound(to: UInt8.self),
count: rawBufPtr.count)).0
}) {
self = str
return
}

// Fast path for untyped raw storage and known stdlib types
if let contigBytes = codeUnits as? _HasContiguousBytes,
contigBytes._providesContiguousBytesNoCopy
contigBytes._providesContiguousBytesNoCopy
{
self = contigBytes.withUnsafeBytes { rawBufPtr in
return String._fromUTF8Repairing(
Expand All @@ -419,15 +454,9 @@ extension String {
return
}

// Just copying to an Array is significantly faster than performing
// generic operations
self = Array(codeUnits).withUnsafeBufferPointer {
let raw = UnsafeRawBufferPointer($0)
return String._fromUTF8Repairing(raw.bindMemory(to: UInt8.self)).0
}
return
self = String._fromNonContiguousUnsafeBitcastUTF8Repairing(codeUnits).0
}

/// Creates a new string with the specified capacity in UTF-8 code units, and
/// then calls the given closure with a buffer covering the string's
/// uninitialized memory.
Expand Down Expand Up @@ -484,7 +513,7 @@ extension String {
initializingUTF8With: initializer
)
}

@inline(__always)
internal init(
_uninitializedCapacity capacity: Int,
Expand All @@ -503,7 +532,7 @@ extension String {
}
return
}

self = try String._fromLargeUTF8Repairing(
uninitializedCapacity: capacity,
initializingWith: initializer)
Expand Down Expand Up @@ -968,20 +997,20 @@ extension _StringGutsSlice {
var outputBuffer = outputBuffer
var icuInputBuffer = icuInputBuffer
var icuOutputBuffer = icuOutputBuffer

var index = range.lowerBound
let cachedEndIndex = range.upperBound

var hasBufferOwnership = false

defer {
if hasBufferOwnership {
outputBuffer.deallocate()
icuInputBuffer.deallocate()
icuOutputBuffer.deallocate()
}
}

while index < cachedEndIndex {
let result = _foreignNormalize(
readIndex: index,
Expand Down Expand Up @@ -1017,17 +1046,17 @@ internal func _fastWithNormalizedCodeUnitsImpl(

var index = String.Index(_encodedOffset: 0)
let cachedEndIndex = String.Index(_encodedOffset: sourceBuffer.count)

var hasBufferOwnership = false

defer {
if hasBufferOwnership {
outputBuffer.deallocate()
icuInputBuffer.deallocate()
icuOutputBuffer.deallocate()
}
}

while index < cachedEndIndex {
let result = _fastNormalize(
readIndex: index,
Expand Down
52 changes: 32 additions & 20 deletions stdlib/public/core/StringCreate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ internal func _allASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
}

extension String {

internal static func _uncheckedFromASCII(
_ input: UnsafeBufferPointer<UInt8>
) -> String {
Expand All @@ -61,15 +61,15 @@ extension String {
let storage = __StringStorage.create(initializingFrom: input, isASCII: true)
return storage.asString
}

@usableFromInline
internal static func _fromASCII(
_ input: UnsafeBufferPointer<UInt8>
) -> String {
_internalInvariant(_allASCII(input), "not actually ASCII")
return _uncheckedFromASCII(input)
}

internal static func _fromASCIIValidating(
_ input: UnsafeBufferPointer<UInt8>
) -> String? {
Expand Down Expand Up @@ -101,7 +101,7 @@ extension String {
return (repairUTF8(input, firstKnownBrokenRange: initialRange), true)
}
}

internal static func _fromLargeUTF8Repairing(
uninitializedCapacity capacity: Int,
initializingWith initializer: (
Expand All @@ -111,7 +111,7 @@ extension String {
let result = try __StringStorage.create(
uninitializedCodeUnitCapacity: capacity,
initializingUncheckedUTF8With: initializer)

switch validateUTF8(result.codeUnits) {
case .success(let info):
result._updateCountAndFlags(
Expand Down Expand Up @@ -181,7 +181,7 @@ extension String {

return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
}

@inline(never) // slow path
private static func _slowFromCodeUnits<
Input: Collection,
Expand Down Expand Up @@ -209,7 +209,7 @@ extension String {
let str = contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
return (str, repaired)
}

@usableFromInline @inline(never) // can't be inlined w/out breaking ABI
@_specialize(
where Input == UnsafeBufferPointer<UInt8>, Encoding == Unicode.ASCII)
Expand All @@ -227,27 +227,39 @@ extension String {
guard _fastPath(encoding == Unicode.ASCII.self) else {
return _slowFromCodeUnits(input, encoding: encoding, repair: repair)
}

var result:String? = nil


// Helper to simplify early returns
func resultOrSlow(_ resultOpt: String?) -> (String, repairsMade: Bool)? {
guard let result = resultOpt else {
return _slowFromCodeUnits(input, encoding: encoding, repair: repair)
}
return (result, repairsMade: false)
}

// Fast path for untyped raw storage and known stdlib types
if let contigBytes = input as? _HasContiguousBytes,
contigBytes._providesContiguousBytesNoCopy {
result = contigBytes.withUnsafeBytes { rawBufPtr in
return resultOrSlow(contigBytes.withUnsafeBytes { rawBufPtr in
let buffer = UnsafeBufferPointer(
start: rawBufPtr.baseAddress?.assumingMemoryBound(to: UInt8.self),
count: rawBufPtr.count)
return String._fromASCIIValidating(buffer)
}
} else {
result = Array(input).withUnsafeBufferPointer {
})
}

// Fast path for user-defined Collections
if let strOpt = input.withContiguousStorageIfAvailable({
(buffer: UnsafeBufferPointer<Input.Element>) -> String? in
return String._fromASCIIValidating(
UnsafeRawBufferPointer(buffer).bindMemory(to: UInt8.self))
}) {
return resultOrSlow(strOpt)
}

return resultOrSlow(Array(input).withUnsafeBufferPointer {
let buffer = UnsafeRawBufferPointer($0).bindMemory(to: UInt8.self)
return String._fromASCIIValidating(buffer)
}
}

return result != nil ?
(result!, repairsMade: false) :
_slowFromCodeUnits(input, encoding: encoding, repair: repair)
})
}

public // @testable
Expand Down
1 change: 1 addition & 0 deletions stdlib/public/core/Substring.swift
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ extension Substring.UTF8View: BidirectionalCollection {
}

@_alwaysEmitIntoClient
@inlinable
public func withContiguousStorageIfAvailable<R>(
_ body: (UnsafeBufferPointer<Element>) throws -> R
) rethrows -> R? {
Expand Down
Loading