5
5
@frozen
6
6
@available ( SwiftStdlib 6 . 1 , * )
7
7
public struct UTF8Span : Copyable , ~ Escapable, BitwiseCopyable {
8
- public var unsafeBaseAddress : UnsafeRawPointer ?
8
+ @usableFromInline
9
+ internal var _unsafeBaseAddress : UnsafeRawPointer ?
9
10
10
11
/*
11
12
A bit-packed count and flags (such as isASCII)
12
13
13
- ╔═══════╦═════╦═════╦═════╦══════════╦═══════╗
14
- ║ b63 ║ b62 ║ b61 ║ b60 ║ b59:56 ║ b56:0 ║
15
- ╠═══════╬═════╬═════╬═════╬══════════╬═══════╣
16
- ║ ASCII ║ NFC ║ SSC ║ NUL ║ reserved ║ count ║
17
- ╚═══════╩═════╩═════╩═════╩══════════╩═══════╝
18
-
19
- ASCII means the contents are all-ASCII (<0x7F).
20
- NFC means contents are in normal form C for fast comparisons.
21
- SSC means single-scalar Characters (i.e. grapheme clusters): every
22
- `Character` holds only a single `Unicode.Scalar`.
23
- NUL means the contents are a null-terminated C string (that is,
24
- there is a guranteed, borrowed NULL byte after the end of `count`).
25
-
26
- TODO: NUL means both no-interior and null-terminator, so does this
27
- mean that String doesn't ever set it because we don't want to scan
28
- for interior nulls? I think this is the only viable option...
29
-
30
- TODO: Contains-newline would be useful for Regex `.`
31
-
32
- Question: Should we have null-termination support?
33
- A null-terminated UTF8Span has a NUL byte after its contents
34
- and contains no interior NULs. How would we ensure the
35
- NUL byte is exclusively borrowed by us?
14
+ ╔═══════╦═════╦══════════╦═══════╗
15
+ ║ b63 ║ b62 ║ b61:56 ║ b56:0 ║
16
+ ╠═══════╬═════╬══════════╬═══════╣
17
+ ║ ASCII ║ NFC ║ reserved ║ count ║
18
+ ╚═══════╩═════╩══════════╩═══════╝
36
19
20
+ ASCII means the contents are known to be all-ASCII (<0x7F).
21
+ NFC means contents are known to be in normal form C for fast comparisons.
37
22
*/
38
23
@usableFromInline
39
24
internal var _countAndFlags : UInt64
@@ -45,7 +30,7 @@ public struct UTF8Span: Copyable, ~Escapable, BitwiseCopyable {
45
30
_unsafeAssumingValidUTF8 start: borrowing UnsafeRawPointer ,
46
31
_countAndFlags: UInt64
47
32
) {
48
- self . unsafeBaseAddress = copy start
33
+ self . _unsafeBaseAddress = copy start
49
34
self . _countAndFlags = _countAndFlags
50
35
51
36
_invariantCheck ( )
@@ -55,8 +40,11 @@ public struct UTF8Span: Copyable, ~Escapable, BitwiseCopyable {
55
40
// at least check the count first
56
41
@_alwaysEmitIntoClient
57
42
internal func _start( ) -> UnsafeRawPointer {
58
- unsafeBaseAddress . _unsafelyUnwrappedUnchecked
43
+ _unsafeBaseAddress . _unsafelyUnwrappedUnchecked
59
44
}
45
+
46
+ // HACK: working around lack of internals
47
+ internal var _str : String { _start ( ) . _str ( 0 ..< count) }
60
48
}
61
49
62
50
// TODO: init strategy: underscored public that use lifetime annotations
@@ -65,13 +53,23 @@ public struct UTF8Span: Copyable, ~Escapable, BitwiseCopyable {
65
53
66
54
@available ( SwiftStdlib 6 . 1 , * )
67
55
extension UTF8Span {
56
+ /// Creates a UTF8Span containing `codeUnits`. Validates that the input is
57
+ /// valid UTF-8, otherwise throws an error.
58
+ ///
59
+ /// The resulting UTF8Span has the same lifetime constraints as `codeUnits`.
60
+ public init (
61
+ validating codeUnits: consuming Span < UInt8 >
62
+ ) throws ( UTF8. EncodingError) {
63
+ try self . init ( _validating: codeUnits)
64
+ }
65
+
68
66
// TODO: this doesn't need to be underscored, I don't think
69
67
@lifetime ( codeUnits)
70
- public init (
68
+ internal init (
71
69
_validating codeUnits: consuming Span < UInt8 >
72
70
) throws ( UTF8. EncodingError) {
73
71
guard let ptr = codeUnits. _pointer else {
74
- self . unsafeBaseAddress = nil
72
+ self . _unsafeBaseAddress = nil
75
73
self . _countAndFlags = 0
76
74
return
77
75
}
@@ -81,14 +79,37 @@ extension UTF8Span {
81
79
let count = codeUnits. _count
82
80
let isASCII = try basePtr. _validateUTF8 ( limitedBy: count)
83
81
84
- self . unsafeBaseAddress = . init( basePtr)
82
+ self . _unsafeBaseAddress = . init( basePtr)
85
83
self . _countAndFlags = UInt64 ( truncatingIfNeeded: count)
86
84
if isASCII {
87
85
_setIsASCII ( )
88
86
}
89
87
_internalInvariant ( self . count == codeUnits. count)
90
88
}
91
89
90
+ // TODO: SPI?
91
+ internal init (
92
+ _uncheckedAssumingValidUTF8 codeUnits: consuming Span < UInt8 > ,
93
+ isKnownASCII: Bool ,
94
+ isKnownNFC: Bool
95
+ ) {
96
+ guard let ptr = codeUnits. _pointer else {
97
+ self . _unsafeBaseAddress = nil
98
+ self . _countAndFlags = 0
99
+ return
100
+ }
101
+
102
+ self . _unsafeBaseAddress = codeUnits. _start ( )
103
+ self . _countAndFlags = UInt64 ( truncatingIfNeeded: codeUnits. count)
104
+ if isKnownASCII {
105
+ _setIsASCII ( )
106
+ }
107
+ if isKnownNFC {
108
+ _setIsNFC ( )
109
+ }
110
+ _internalInvariant ( self . count == codeUnits. count)
111
+ }
112
+
92
113
// @_alwaysEmitIntoClient
93
114
// public init<Owner: ~Copyable & ~Escapable>(
94
115
// validatingUnsafe codeUnits: UnsafeBufferPointer<UInt8>,
@@ -161,30 +182,6 @@ extension UTF8Span {
161
182
}
162
183
163
184
164
- // MARK: Canonical comparison
165
-
166
- @_unavailableInEmbedded
167
- @available ( SwiftStdlib 6 . 1 , * )
168
- extension UTF8Span {
169
- // HACK: working around lack of internals
170
- internal var _str : String { _start ( ) . _str ( 0 ..< count) }
171
-
172
- /// Whether `self` is equivalent to `other` under Unicode Canonical
173
- /// Equivalence.
174
- public func isCanonicallyEquivalent(
175
- to other: UTF8Span
176
- ) -> Bool {
177
- self . _str == other. _str
178
- }
179
-
180
- /// Whether `self` orders less than `other` under Unicode Canonical
181
- /// Equivalence using normalized code-unit order (in NFC).
182
- public func isCanonicallyLessThan(
183
- _ other: UTF8Span
184
- ) -> Bool {
185
- self . _str < other. _str
186
- }
187
- }
188
185
189
186
190
187
@@ -206,7 +203,7 @@ extension UTF8Span {
206
203
/// parameter is valid only for the duration of its execution.
207
204
/// - Returns: The return value of the `body` closure parameter.
208
205
@_alwaysEmitIntoClient
209
- borrowing public func withUnsafeBufferPointer <
206
+ borrowing public func _withUnsafeBufferPointer <
210
207
E: Error , Result: ~ Copyable //& ~Escapable
211
208
> (
212
209
_ body: ( _ buffer: /*borrowing*/ UnsafeBufferPointer < UInt8 > ) throws ( E ) -> Result
@@ -233,45 +230,38 @@ extension UTF8Span {
233
230
}
234
231
}
235
232
236
- #if false
237
- extension RawSpan {
238
- public func parseUTF8( from start: Int , length: Int ) throws -> UTF8Span {
239
- let span = self [
240
- uncheckedOffsets: start ..< start &+ length
241
- ] . view ( as: UInt8 . self)
242
- return try UTF8Span ( validating: span)
233
+ @available ( SwiftStdlib 6 . 1 , * )
234
+ extension UTF8Span {
235
+ public var isEmpty : Bool {
236
+ self . count == 0
243
237
}
244
238
245
- // TODO: Below are contingent on how we want to handle NUL-termination
246
- public func parseNullTerminatedUTF8( ) throws -> UTF8Span {
247
- fatalError ( )
239
+ public var span : Span < UInt8 > {
240
+ Span ( _unchecked: _unsafeBaseAddress, count: self . count)
248
241
}
242
+
243
+
249
244
}
250
245
251
- // TODO: Below is contingent on a Cursor or Iterator type
252
- extension RawSpan . Cursor {
253
- public mutating func parseUTF8( length: Int ) throws -> UTF8Span {
254
- fatalError ( )
255
- }
256
- public mutating func parseNullTerminatedUTF8( ) throws -> UTF8Span {
257
- fatalError ( )
258
- }
246
+ func TODO( _ message: String ) -> Never {
247
+ fatalError ( " TODO: message " )
259
248
}
260
- #endif
261
249
250
+ // TODO(toolchain): decide if we rebase on top of Guillaume's work
262
251
@available ( SwiftStdlib 6 . 1 , * )
263
- extension UTF8Span {
264
- public static func ~= ( _ lhs: StaticString , _ rhs: UTF8Span ) -> Bool {
265
- return lhs. withUTF8Buffer { str in
266
- rhs. withUnsafeBufferPointer { span in
267
- str. elementsEqual ( span)
268
- }
269
- }
252
+ extension String {
253
+ public var utf8Span : UTF8Span {
254
+ TODO ( " Decide when to rebase on top of Guillaume's PR " )
270
255
}
256
+ }
271
257
272
- // Not doing == between two UTFSpan, as pointerness
273
- // Note: avove might not be possible
258
+ @available ( SwiftStdlib 6 . 1 , * )
259
+ extension Substring {
260
+ public var utf8Span : UTF8Span {
261
+ TODO ( " Decide when to rebase on top of Guillaume's PR " )
262
+ }
274
263
}
275
264
276
265
277
- // TODO: cString var, or something like that
266
+
267
+
0 commit comments