Skip to content

Commit aab8063

Browse files
committed
[SE-0247] Add contiguous string APIs
Adds API for querying, enforcing, and using contiguous strings.
1 parent 52e8340 commit aab8063

File tree

7 files changed

+127
-41
lines changed

7 files changed

+127
-41
lines changed

stdlib/public/core/AssertCommon.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ internal func _assertionFailure(
115115
) -> Never {
116116
prefix.withUTF8Buffer {
117117
(prefix) -> Void in
118-
message._withUnsafeBufferPointerToUTF8 {
118+
var message = message
119+
message.withUTF8 {
119120
(messageUTF8) -> Void in
120121
file.withUTF8Buffer {
121122
(file) -> Void in
@@ -145,7 +146,8 @@ internal func _assertionFailure(
145146
) -> Never {
146147
prefix.withUTF8Buffer {
147148
(prefix) -> Void in
148-
message._withUnsafeBufferPointerToUTF8 {
149+
var message = message
150+
message.withUTF8 {
149151
(messageUTF8) -> Void in
150152
_swift_stdlib_reportFatalError(
151153
prefix.baseAddress!, CInt(prefix.count),

stdlib/public/core/ContiguouslyStored.swift

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -79,23 +79,12 @@ extension String: _HasContiguousBytes {
7979
@inline(__always) get { return self._guts.isFastUTF8 }
8080
}
8181

82-
@inlinable @inline(__always)
83-
internal func _withUTF8<R>(
84-
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
85-
) rethrows -> R {
86-
if _fastPath(self._guts.isFastUTF8) {
87-
return try self._guts.withFastUTF8 {
88-
try body($0)
89-
}
90-
}
91-
return try String._copying(self)._guts.withFastUTF8 { try body($0) }
92-
}
93-
9482
@inlinable @inline(__always)
9583
internal func withUnsafeBytes<R>(
9684
_ body: (UnsafeRawBufferPointer) throws -> R
9785
) rethrows -> R {
98-
return try self._withUTF8 { return try body(UnsafeRawBufferPointer($0)) }
86+
var copy = self
87+
return try copy.withUTF8 { return try body(UnsafeRawBufferPointer($0)) }
9988
}
10089
}
10190
extension Substring: _HasContiguousBytes {
@@ -104,22 +93,11 @@ extension Substring: _HasContiguousBytes {
10493
@inline(__always) get { return self._wholeGuts.isFastUTF8 }
10594
}
10695

107-
@inlinable @inline(__always)
108-
internal func _withUTF8<R>(
109-
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
110-
) rethrows -> R {
111-
if _fastPath(_wholeGuts.isFastUTF8) {
112-
return try _wholeGuts.withFastUTF8(range: self._offsetRange) {
113-
return try body($0)
114-
}
115-
}
116-
return try String._copying(self)._guts.withFastUTF8 { try body($0) }
117-
}
118-
11996
@inlinable @inline(__always)
12097
internal func withUnsafeBytes<R>(
12198
_ body: (UnsafeRawBufferPointer) throws -> R
12299
) rethrows -> R {
123-
return try self._withUTF8 { return try body(UnsafeRawBufferPointer($0)) }
100+
var copy = self
101+
return try copy.withUTF8 { return try body(UnsafeRawBufferPointer($0)) }
124102
}
125103
}

stdlib/public/core/LegacyABI.swift

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,24 @@ extension Substring {
3939
internal var _wholeString: String { return base }
4040
}
4141

42+
extension String {
43+
@available(*, unavailable, renamed: "String.withUTF8")
44+
@inlinable
45+
internal func _withUTF8<R>(
46+
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
47+
) rethrows -> R {
48+
var copy = self
49+
return try copy.withUTF8(body)
50+
}
51+
}
52+
53+
extension Substring {
54+
@available(*, unavailable, renamed: "Substring.withUTF8")
55+
@inlinable
56+
internal func _withUTF8<R>(
57+
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
58+
) rethrows -> R {
59+
var copy = self
60+
return try copy.withUTF8(body)
61+
}
62+
}

stdlib/public/core/OutputStream.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,8 @@ internal struct _Stdout : TextOutputStream {
521521
internal mutating func write(_ string: String) {
522522
if string.isEmpty { return }
523523

524-
_ = string._withUTF8 { utf8 in
524+
var string = string
525+
_ = string.withUTF8 { utf8 in
525526
_swift_stdlib_fwrite_stdout(utf8.baseAddress!, 1, utf8.count)
526527
}
527528
}

stdlib/public/core/String.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,8 @@ extension String {
468468
encodedAs targetEncoding: TargetEncoding.Type,
469469
_ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
470470
) rethrows -> Result {
471-
return try self._withUTF8 { utf8 in
471+
var copy = self
472+
return try copy.withUTF8 { utf8 in
472473
var arg = Array<TargetEncoding.CodeUnit>()
473474
arg.reserveCapacity(1 &+ self._guts.count / 4)
474475
let repaired = transcode(

stdlib/public/core/StringCreate.swift

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -144,16 +144,6 @@ extension String {
144144
return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) }
145145
}
146146

147-
internal func _withUnsafeBufferPointerToUTF8<R>(
148-
_ body: (UnsafeBufferPointer<UTF8.CodeUnit>) throws -> R
149-
) rethrows -> R {
150-
return try self.withUnsafeBytes { rawBufPtr in
151-
return try body(UnsafeBufferPointer(
152-
start: rawBufPtr.baseAddress?.assumingMemoryBound(to: UInt8.self),
153-
count: rawBufPtr.count))
154-
}
155-
}
156-
157147
@usableFromInline @inline(never) // slow-path
158148
internal static func _fromCodeUnits<
159149
Input: Collection,
@@ -218,4 +208,3 @@ extension String {
218208
}
219209
}
220210
}
221-

stdlib/public/core/StringProtocol.swift

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,4 +173,98 @@ extension StringProtocol {
173173
}
174174
}
175175

176+
// Contiguous UTF-8 strings
177+
extension String {
178+
/// Returns whether this string is capable of providing access to
179+
/// validly-encoded UTF-8 contents in contiguous memory in O(1) time.
180+
///
181+
/// Contiguous strings always operate in O(1) time for withUTF8 and always
182+
/// give a result for String.UTF8View.withContiguousStorageIfAvailable.
183+
/// Contiguous strings also benefit from fast-paths and better optimizations.
184+
///
185+
@_alwaysEmitIntoClient
186+
public var isContiguousUTF8: Bool { return _guts.isFastUTF8 }
187+
188+
/// If this string is not contiguous, make it so. If this mutates the string,
189+
/// it will invalidate any pre-existing indices.
190+
///
191+
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
192+
///
193+
@_alwaysEmitIntoClient
194+
public mutating func makeContiguousUTF8() {
195+
if _fastPath(isContiguousUTF8) { return }
196+
self = String._copying(self)
197+
}
198+
199+
/// Runs `body` over the content of this string in contiguous memory. If this
200+
/// string is not contiguous, this will first make it contiguous, which will
201+
/// also speed up subsequent access. If this mutates the string,
202+
/// it will invalidate any pre-existing indices.
203+
///
204+
/// Note that it is unsafe to escape the pointer provided to `body`. For
205+
/// example, strings of up to 15 UTF-8 code units in length may be represented
206+
/// in a small-string representation, and thus will be spilled into
207+
/// temporary stack space which is invalid after `withUTF8` finishes
208+
/// execution.
209+
///
210+
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
211+
///
212+
@_alwaysEmitIntoClient
213+
public mutating func withUTF8<R>(
214+
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
215+
) rethrows -> R {
216+
makeContiguousUTF8()
217+
return try _guts.withFastUTF8(body)
218+
}
219+
}
176220

221+
// Contiguous UTF-8 strings
222+
extension Substring {
223+
/// Returns whether this string is capable of providing access to
224+
/// validly-encoded UTF-8 contents in contiguous memory in O(1) time.
225+
///
226+
/// Contiguous strings always operate in O(1) time for withUTF8 and always
227+
/// give a result for String.UTF8View.withContiguousStorageIfAvailable.
228+
/// Contiguous strings also benefit from fast-paths and better optimizations.
229+
///
230+
@_alwaysEmitIntoClient
231+
public var isContiguousUTF8: Bool { return self.base.isContiguousUTF8 }
232+
233+
/// If this string is not contiguous, make it so. If this mutates the
234+
/// substring, it will invalidate any pre-existing indices.
235+
///
236+
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
237+
///
238+
@_alwaysEmitIntoClient
239+
public mutating func makeContiguousUTF8() {
240+
if _fastPath(isContiguousUTF8) { return }
241+
self = String._copying(self)[...]
242+
}
243+
244+
/// Runs `body` over the content of this substring in contiguous memory. If
245+
/// this substring is not contiguous, this will first make it contiguous,
246+
/// which will also speed up subsequent access. If this mutates the substring,
247+
/// it will invalidate any pre-existing indices.
248+
///
249+
/// Note that it is unsafe to escape the pointer provided to `body`. For
250+
/// example, strings of up to 15 UTF-8 code units in length may be represented
251+
/// in a small-string representation, and thus will be spilled into
252+
/// temporary stack space which is invalid after `withUTF8` finishes
253+
/// execution.
254+
///
255+
/// Complexity: O(n) if non-contiguous, O(1) if already contiguous
256+
///
257+
@_alwaysEmitIntoClient
258+
public mutating func withUTF8<R>(
259+
_ body: (UnsafeBufferPointer<UInt8>) throws -> R
260+
) rethrows -> R {
261+
if _fastPath(isContiguousUTF8) {
262+
return try _wholeGuts.withFastUTF8(range: self._offsetRange) {
263+
return try body($0)
264+
}
265+
}
266+
267+
makeContiguousUTF8()
268+
return try _wholeGuts.withFastUTF8(body)
269+
}
270+
}

0 commit comments

Comments
 (0)