@@ -42,6 +42,45 @@ public protocol StringProtocol
42
42
43
43
func lowercased( ) -> String
44
44
func uppercased( ) -> String
45
+
46
+ /// Constructs a `String` having the same contents as `codeUnits`.
47
+ ///
48
+ /// - Parameter codeUnits: a collection of code units in
49
+ /// the given `encoding`.
50
+ /// - Parameter encoding: describes the encoding in which the code units
51
+ /// should be interpreted.
52
+ init < C: Collection , Encoding: UnicodeEncoding > (
53
+ codeUnits: C , encoding: Encoding . Type
54
+ )
55
+ where C. Iterator. Element == Encoding . CodeUnit
56
+
57
+ /// Constructs a `String` having the same contents as `nulTerminatedUTF8`.
58
+ ///
59
+ /// - Parameter nulTerminatedUTF8: a sequence of contiguous UTF-8 encoded
60
+ /// bytes ending just before the first zero byte (NUL character).
61
+ init ( cString nulTerminatedUTF8: UnsafePointer < CChar > )
62
+
63
+ /// Constructs a `String` having the same contents as `nulTerminatedCodeUnits`.
64
+ ///
65
+ /// - Parameter nulTerminatedCodeUnits: a sequence of contiguous code units in
66
+ /// the given `encoding`, ending just before the first zero code unit.
67
+ /// - Parameter encoding: describes the encoding in which the code units
68
+ /// should be interpreted.
69
+ init < Encoding: UnicodeEncoding > (
70
+ cString nulTerminatedCodeUnits: UnsafePointer < Encoding . CodeUnit > ,
71
+ encoding: Encoding . Type )
72
+
73
+ /// Invokes the given closure on the contents of the string, represented as a
74
+ /// pointer to a null-terminated sequence of UTF-8 code units.
75
+ func withCString< Result> (
76
+ _ body: ( UnsafePointer < CChar > ) throws -> Result ) rethrows -> Result
77
+
78
+ /// Invokes the given closure on the contents of the string, represented as a
79
+ /// pointer to a null-terminated sequence of code units in the given encoding.
80
+ func withCString< Result, Encoding: UnicodeEncoding > (
81
+ encoding: Encoding . Type ,
82
+ _ body: ( UnsafePointer < Encoding . CodeUnit > ) throws -> Result
83
+ ) rethrows -> Result
45
84
}
46
85
47
86
extension StringProtocol {
@@ -52,7 +91,141 @@ extension StringProtocol {
52
91
}
53
92
}
54
93
55
- // FIXME: complexity documentation for most of methods on String is ought to be
94
+ /// Call body with a pointer to zero-terminated sequence of
95
+ /// `TargetEncoding.CodeUnit` representing the same string as `source`, when
96
+ /// `source` is interpreted as being encoded with `SourceEncoding`.
97
+ internal func _withCString<
98
+ Source : Collection ,
99
+ SourceEncoding : UnicodeEncoding ,
100
+ TargetEncoding : UnicodeEncoding ,
101
+ Result
102
+ > (
103
+ encodedAs targetEncoding: TargetEncoding . Type ,
104
+ from source: Source ,
105
+ encodedAs sourceEncoding: SourceEncoding . Type ,
106
+ execute body : ( UnsafePointer < TargetEncoding . CodeUnit > ) throws -> Result
107
+ ) rethrows -> Result
108
+ where Source. Iterator. Element == SourceEncoding . CodeUnit {
109
+ return try _withCStringAndLength (
110
+ encodedAs: targetEncoding,
111
+ from: source,
112
+ encodedAs: sourceEncoding) { p, _ in try body ( p) }
113
+ }
114
+
115
+ internal func _withCStringAndLength<
116
+ Source : Collection ,
117
+ SourceEncoding : UnicodeEncoding ,
118
+ TargetEncoding : UnicodeEncoding ,
119
+ Result
120
+ > (
121
+ encodedAs targetEncoding: TargetEncoding . Type ,
122
+ from source: Source ,
123
+ encodedAs sourceEncoding: SourceEncoding . Type ,
124
+ execute body : ( UnsafePointer < TargetEncoding . CodeUnit > , Int ) throws -> Result
125
+ ) rethrows -> Result
126
+ where Source. Iterator. Element == SourceEncoding . CodeUnit {
127
+ var targetLength = 0 // nul terminator
128
+ var i = source. makeIterator ( )
129
+ SourceEncoding . ForwardParser. parse ( & i) {
130
+ targetLength += numericCast (
131
+ targetEncoding. transcode ( $0, from: SourceEncoding . self) . count)
132
+ }
133
+ var a : [ TargetEncoding . CodeUnit ] = [ ]
134
+ a. reserveCapacity ( targetLength + 1 )
135
+ i = source. makeIterator ( )
136
+ SourceEncoding . ForwardParser. parse ( & i) {
137
+ a. append (
138
+ contentsOf: targetEncoding. transcode ( $0, from: SourceEncoding . self) )
139
+ }
140
+ a. append ( 0 )
141
+ return try body ( a, targetLength)
142
+ }
143
+
144
+ extension _StringCore {
145
+ /// Invokes `body` on a null-terminated sequence of code units in the given
146
+ /// encoding corresponding to the substring in `bounds`.
147
+ internal func _withCSubstring< Result, TargetEncoding: UnicodeEncoding > (
148
+ in bounds: Range < Index > ,
149
+ encoding targetEncoding: TargetEncoding . Type ,
150
+ _ body: ( UnsafePointer < TargetEncoding . CodeUnit > ) throws -> Result
151
+ ) rethrows -> Result {
152
+ return try _withCSubstringAndLength ( in: bounds, encoding: targetEncoding) {
153
+ p, _ in try body ( p)
154
+ }
155
+ }
156
+
157
+ internal func _withCSubstringAndLength<
158
+ Result, TargetEncoding: UnicodeEncoding
159
+ > (
160
+ in bounds: Range < Index > ,
161
+ encoding targetEncoding: TargetEncoding . Type ,
162
+ _ body: ( UnsafePointer < TargetEncoding . CodeUnit > , Int ) throws -> Result
163
+ ) rethrows -> Result {
164
+ if _fastPath ( hasContiguousStorage) {
165
+ defer { _fixLifetime ( self ) }
166
+ if isASCII {
167
+ return try Swift . _withCStringAndLength (
168
+ encodedAs: targetEncoding,
169
+ from: UnsafeBufferPointer ( start: startASCII, count: count) [ bounds] ,
170
+ encodedAs: _Unicode. ASCII. self,
171
+ execute: body
172
+ )
173
+ }
174
+ else {
175
+ return try Swift . _withCStringAndLength (
176
+ encodedAs: targetEncoding,
177
+ from: UnsafeBufferPointer ( start: startUTF16, count: count) [ bounds] ,
178
+ encodedAs: _Unicode. UTF16. self,
179
+ execute: body
180
+ )
181
+ }
182
+ }
183
+ return try Swift . _withCStringAndLength (
184
+ encodedAs: targetEncoding,
185
+ from: self [ bounds] ,
186
+ encodedAs: _Unicode. UTF16. self,
187
+ execute: body
188
+ )
189
+ }
190
+ }
191
+
192
+ extension String {
193
+ public init < C: Collection , Encoding: UnicodeEncoding > (
194
+ codeUnits: C , encoding: Encoding . Type
195
+ ) where C. Iterator. Element == Encoding . CodeUnit {
196
+ let ( b, _) = _StringBuffer. fromCodeUnits (
197
+ codeUnits, encoding: encoding, repairIllFormedSequences: true )
198
+ self = String ( _StringCore ( b!) )
199
+ }
200
+
201
+ /// Constructs a `String` having the same contents as `nulTerminatedCodeUnits`.
202
+ ///
203
+ /// - Parameter nulTerminatedCodeUnits: a sequence of contiguous code units in
204
+ /// the given `encoding`, ending just before the first zero code unit.
205
+ /// - Parameter encoding: describes the encoding in which the code units
206
+ /// should be interpreted.
207
+ public init < Encoding: UnicodeEncoding > (
208
+ cString nulTerminatedCodeUnits: UnsafePointer < Encoding . CodeUnit > ,
209
+ encoding: Encoding . Type ) {
210
+
211
+ let codeUnits = _SentinelCollection (
212
+ UnsafeBufferPointer ( _unboundedStartingAt: nulTerminatedCodeUnits) ,
213
+ until: _IsZero ( )
214
+ )
215
+ self . init ( codeUnits: codeUnits, encoding: encoding)
216
+ }
217
+
218
+ /// Invokes the given closure on the contents of the string, represented as a
219
+ /// pointer to a null-terminated sequence of code units in the given encoding.
220
+ public func withCString< Result, TargetEncoding: UnicodeEncoding > (
221
+ encoding targetEncoding: TargetEncoding . Type ,
222
+ _ body: ( UnsafePointer < TargetEncoding . CodeUnit > ) throws -> Result
223
+ ) rethrows -> Result {
224
+ return try _core. _withCSubstring (
225
+ in: _core. startIndex..< _core. endIndex, encoding: targetEncoding, body)
226
+ }
227
+ }
228
+ // FIXME: complexity documentation for most of methods on String ought to be
56
229
// qualified with "amortized" at least, as Characters are variable-length.
57
230
58
231
/// A Unicode string value.
0 commit comments