Skip to content

[SE-0247] Add contiguous string APIs #23788

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions stdlib/public/core/AssertCommon.swift
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ internal func _assertionFailure(
) -> Never {
prefix.withUTF8Buffer {
(prefix) -> Void in
message._withUnsafeBufferPointerToUTF8 {
var message = message
message.withUTF8 {
(messageUTF8) -> Void in
file.withUTF8Buffer {
(file) -> Void in
Expand Down Expand Up @@ -145,7 +146,8 @@ internal func _assertionFailure(
) -> Never {
prefix.withUTF8Buffer {
(prefix) -> Void in
message._withUnsafeBufferPointerToUTF8 {
var message = message
message.withUTF8 {
(messageUTF8) -> Void in
_swift_stdlib_reportFatalError(
prefix.baseAddress!, CInt(prefix.count),
Expand Down
30 changes: 4 additions & 26 deletions stdlib/public/core/ContiguouslyStored.swift
Original file line number Diff line number Diff line change
Expand Up @@ -79,23 +79,12 @@ extension String: _HasContiguousBytes {
@inline(__always) get { return self._guts.isFastUTF8 }
}

@inlinable @inline(__always)
internal func _withUTF8<R>(
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
) rethrows -> R {
if _fastPath(self._guts.isFastUTF8) {
return try self._guts.withFastUTF8 {
try body($0)
}
}
return try String._copying(self)._guts.withFastUTF8 { try body($0) }
}

@inlinable @inline(__always)
internal func withUnsafeBytes<R>(
_ body: (UnsafeRawBufferPointer) throws -> R
) rethrows -> R {
return try self._withUTF8 { return try body(UnsafeRawBufferPointer($0)) }
var copy = self
return try copy.withUTF8 { return try body(UnsafeRawBufferPointer($0)) }
}
}
extension Substring: _HasContiguousBytes {
Expand All @@ -104,22 +93,11 @@ extension Substring: _HasContiguousBytes {
@inline(__always) get { return self._wholeGuts.isFastUTF8 }
}

@inlinable @inline(__always)
internal func _withUTF8<R>(
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
) rethrows -> R {
if _fastPath(_wholeGuts.isFastUTF8) {
return try _wholeGuts.withFastUTF8(range: self._offsetRange) {
return try body($0)
}
}
return try String._copying(self)._guts.withFastUTF8 { try body($0) }
}

@inlinable @inline(__always)
internal func withUnsafeBytes<R>(
_ body: (UnsafeRawBufferPointer) throws -> R
) rethrows -> R {
return try self._withUTF8 { return try body(UnsafeRawBufferPointer($0)) }
var copy = self
return try copy.withUTF8 { return try body(UnsafeRawBufferPointer($0)) }
}
}
21 changes: 21 additions & 0 deletions stdlib/public/core/LegacyABI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,24 @@ extension Substring {
internal var _wholeString: String { return base }
}

extension String {
@available(*, unavailable, renamed: "String.withUTF8")
@inlinable
internal func _withUTF8<R>(
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
) rethrows -> R {
var copy = self
return try copy.withUTF8(body)
}
}

extension Substring {
@available(*, unavailable, renamed: "Substring.withUTF8")
@inlinable
internal func _withUTF8<R>(
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
) rethrows -> R {
var copy = self
return try copy.withUTF8(body)
}
}
3 changes: 2 additions & 1 deletion stdlib/public/core/OutputStream.swift
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,8 @@ internal struct _Stdout : TextOutputStream {
internal mutating func write(_ string: String) {
if string.isEmpty { return }

_ = string._withUTF8 { utf8 in
var string = string
_ = string.withUTF8 { utf8 in
_swift_stdlib_fwrite_stdout(utf8.baseAddress!, 1, utf8.count)
}
}
Expand Down
3 changes: 2 additions & 1 deletion stdlib/public/core/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,8 @@ extension String {
encodedAs targetEncoding: TargetEncoding.Type,
_ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
) rethrows -> Result {
return try self._withUTF8 { utf8 in
var copy = self
return try copy.withUTF8 { utf8 in
var arg = Array<TargetEncoding.CodeUnit>()
arg.reserveCapacity(1 &+ self._guts.count / 4)
let repaired = transcode(
Expand Down
11 changes: 0 additions & 11 deletions stdlib/public/core/StringCreate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -144,16 +144,6 @@ extension String {
return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
}

internal func _withUnsafeBufferPointerToUTF8<R>(
_ body: (UnsafeBufferPointer<UTF8.CodeUnit>) throws -> R
) rethrows -> R {
return try self.withUnsafeBytes { rawBufPtr in
return try body(UnsafeBufferPointer(
start: rawBufPtr.baseAddress?.assumingMemoryBound(to: UInt8.self),
count: rawBufPtr.count))
}
}

@usableFromInline @inline(never) // slow-path
internal static func _fromCodeUnits<
Input: Collection,
Expand Down Expand Up @@ -218,4 +208,3 @@ extension String {
}
}
}

94 changes: 94 additions & 0 deletions stdlib/public/core/StringProtocol.swift
Original file line number Diff line number Diff line change
Expand Up @@ -173,4 +173,98 @@ extension StringProtocol {
}
}

// Contiguous UTF-8 strings
extension String {
/// Returns whether this string is capable of providing access to
/// validly-encoded UTF-8 contents in contiguous memory in O(1) time.
///
/// Contiguous strings always operate in O(1) time for withUTF8 and always
/// give a result for String.UTF8View.withContiguousStorageIfAvailable.
/// Contiguous strings also benefit from fast-paths and better optimizations.
///
@_alwaysEmitIntoClient
public var isContiguousUTF8: Bool { return _guts.isFastUTF8 }

/// If this string is not contiguous, make it so. If this mutates the string,
/// it will invalidate any pre-existing indices.
///
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
///
@_alwaysEmitIntoClient
public mutating func makeContiguousUTF8() {
if _fastPath(isContiguousUTF8) { return }
self = String._copying(self)
}

/// Runs `body` over the content of this string in contiguous memory. If this
/// string is not contiguous, this will first make it contiguous, which will
/// also speed up subsequent access. If this mutates the string,
/// it will invalidate any pre-existing indices.
///
/// Note that it is unsafe to escape the pointer provided to `body`. For
/// example, strings of up to 15 UTF-8 code units in length may be represented
/// in a small-string representation, and thus will be spilled into
/// temporary stack space which is invalid after `withUTF8` finishes
/// execution.
///
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
///
@_alwaysEmitIntoClient
public mutating func withUTF8<R>(
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
) rethrows -> R {
makeContiguousUTF8()
return try _guts.withFastUTF8(body)
}
}

// Contiguous UTF-8 strings
extension Substring {
/// Returns whether this string is capable of providing access to
/// validly-encoded UTF-8 contents in contiguous memory in O(1) time.
///
/// Contiguous strings always operate in O(1) time for withUTF8 and always
/// give a result for String.UTF8View.withContiguousStorageIfAvailable.
/// Contiguous strings also benefit from fast-paths and better optimizations.
///
@_alwaysEmitIntoClient
public var isContiguousUTF8: Bool { return self.base.isContiguousUTF8 }

/// If this string is not contiguous, make it so. If this mutates the
/// substring, it will invalidate any pre-existing indices.
///
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
///
@_alwaysEmitIntoClient
public mutating func makeContiguousUTF8() {
if _fastPath(isContiguousUTF8) { return }
self = String._copying(self)[...]
}

/// Runs `body` over the content of this substring in contiguous memory. If
/// this substring is not contiguous, this will first make it contiguous,
/// which will also speed up subsequent access. If this mutates the substring,
/// it will invalidate any pre-existing indices.
///
/// Note that it is unsafe to escape the pointer provided to `body`. For
/// example, strings of up to 15 UTF-8 code units in length may be represented
/// in a small-string representation, and thus will be spilled into
/// temporary stack space which is invalid after `withUTF8` finishes
/// execution.
///
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
///
@_alwaysEmitIntoClient
public mutating func withUTF8<R>(
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
) rethrows -> R {
if _fastPath(isContiguousUTF8) {
return try _wholeGuts.withFastUTF8(range: self._offsetRange) {
return try body($0)
}
}

makeContiguousUTF8()
return try _wholeGuts.withFastUTF8(body)
}
}