@@ -138,8 +138,7 @@ extension String {
138
138
> (
139
139
_ input: Input ,
140
140
encoding: Encoding . Type ,
141
- repairIllFormedSequences: Bool ,
142
- minimumCapacity: Int = 0
141
+ repairIllFormedSequences: Bool
143
142
) -> String ?
144
143
where Input. Element == Encoding . CodeUnit {
145
144
@@ -154,7 +153,7 @@ extension String {
154
153
return nil
155
154
}
156
155
157
- let capacity = Swift . max ( utf16Count, minimumCapacity )
156
+ let capacity = utf16Count
158
157
if isASCII {
159
158
if let small = _SmallUTF8String (
160
159
_fromCodeUnits: input,
@@ -201,6 +200,43 @@ extension String {
201
200
}
202
201
}
203
202
203
+ internal static func _fromNonASCIIUTF8(
204
+ _ input: UnsafeBufferPointer < UInt8 > , repair: Bool
205
+ ) -> String ? {
206
+ if let smol = _SmallUTF8String ( input) {
207
+ return String ( _StringGuts ( smol) )
208
+ }
209
+
210
+ // Determine how many UTF-16 code units we'll need
211
+ let inputStream = input. makeIterator ( )
212
+
213
+ // TODO: Replace with much, much faster length check
214
+ guard let ( utf16Count, isASCII) = UTF16 . transcodedLength (
215
+ of: inputStream,
216
+ decodedAs: UTF8 . self,
217
+ repairingIllFormedSequences: repair) else {
218
+ return nil
219
+ }
220
+
221
+ let capacity = utf16Count
222
+ _sanityCheck ( !isASCII, " was given ASCII UTF-8 " )
223
+ let storage = _SwiftStringStorage< UTF16 . CodeUnit> . create(
224
+ capacity: capacity,
225
+ count: utf16Count)
226
+ var p = storage. start
227
+ let sink : ( UTF16 . CodeUnit ) -> Void = {
228
+ p. pointee = $0
229
+ p += 1
230
+ }
231
+ // TODO: Replace with much, much faster transcoding
232
+ _ = transcode (
233
+ input. makeIterator ( ) ,
234
+ from: UTF8 . self, to: UTF16 . self,
235
+ stoppingOnError: !repair,
236
+ into: sink)
237
+ return String ( _largeStorage: storage)
238
+ }
239
+
204
240
/// Creates a string from the given Unicode code units in the specified
205
241
/// encoding.
206
242
///
@@ -210,9 +246,24 @@ extension String {
210
246
/// - sourceEncoding: The encoding in which `codeUnits` should be
211
247
/// interpreted.
212
248
@inlinable // FIXME(sil-serialize-all)
249
+ @inline ( __always) // Eliminate dynamic type check when possible
213
250
public init < C: Collection , Encoding: Unicode . Encoding > (
214
251
decoding codeUnits: C , as sourceEncoding: Encoding . Type
215
252
) where C. Iterator. Element == Encoding . CodeUnit {
253
+ if let contigBytes = codeUnits as? _HasContiguousBytes ,
254
+ sourceEncoding == UTF8 . self
255
+ {
256
+ self = contigBytes. withUnsafeBytes { rawBufPtr in
257
+ let ptr = rawBufPtr. baseAddress. _unsafelyUnwrappedUnchecked
258
+ return String . _fromUTF8 (
259
+ UnsafeBufferPointer (
260
+ start: ptr. assumingMemoryBound ( to: UInt8 . self) ,
261
+ count: rawBufPtr. count) ,
262
+ repair: true ) . unsafelyUnwrapped
263
+ }
264
+ return
265
+ }
266
+
216
267
self = String . _fromCodeUnits (
217
268
codeUnits, encoding: sourceEncoding, repairIllFormedSequences: true ) !
218
269
}
@@ -629,20 +680,47 @@ internal func _isAllASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
629
680
return true
630
681
}
631
682
683
+ // TODO: re-organize a bit before merging...
684
+
685
+ @usableFromInline
686
+ internal protocol _HasContiguousBytes {
687
+ func withUnsafeBytes< R> (
688
+ _ body: ( UnsafeRawBufferPointer ) throws -> R
689
+ ) rethrows -> R
690
+ }
691
+ extension Array : _HasContiguousBytes { }
692
+ extension UnsafeBufferPointer : _HasContiguousBytes {
693
+ @inlinable
694
+ @inline ( __always)
695
+ func withUnsafeBytes< R> (
696
+ _ body: ( UnsafeRawBufferPointer ) throws -> R
697
+ ) rethrows -> R {
698
+ let ptr = UnsafeRawPointer ( self . baseAddress. _unsafelyUnwrappedUnchecked)
699
+ let len = self . count &* MemoryLayout< Element> . stride
700
+ return try body ( UnsafeRawBufferPointer ( start: ptr, count: len) )
701
+ }
702
+ }
703
+ extension UnsafeMutableBufferPointer : _HasContiguousBytes {
704
+ @inlinable
705
+ @inline ( __always)
706
+ func withUnsafeBytes< R> (
707
+ _ body: ( UnsafeRawBufferPointer ) throws -> R
708
+ ) rethrows -> R {
709
+ let ptr = UnsafeRawPointer ( self . baseAddress. _unsafelyUnwrappedUnchecked)
710
+ let len = self . count &* MemoryLayout< Element> . stride
711
+ return try body ( UnsafeRawBufferPointer ( start: ptr, count: len) )
712
+ }
713
+ }
714
+
632
715
extension String {
633
- static func _fromUTF8CodeUnitSequence(
716
+ @usableFromInline
717
+ static func _fromUTF8(
634
718
_ input: UnsafeBufferPointer < UInt8 > , repair: Bool
635
719
) -> String ? {
636
720
if _isAllASCII ( input) {
637
721
return _fromASCII ( input)
638
722
}
639
-
640
- if let smol = _SmallUTF8String ( input) {
641
- return String ( _StringGuts ( smol) )
642
- }
643
-
644
- return String . _fromCodeUnits (
645
- input, encoding: UTF8 . self, repairIllFormedSequences: repair)
723
+ return _fromNonASCIIUTF8 ( input, repair: repair)
646
724
}
647
725
648
726
@usableFromInline
@@ -659,10 +737,10 @@ extension String {
659
737
}
660
738
661
739
@usableFromInline
662
- static func _fromWellFormedUTF8CodeUnitSequence (
740
+ static func _fromWellFormedUTF8 (
663
741
_ input: UnsafeBufferPointer < UInt8 > , repair: Bool = false
664
742
) -> String {
665
- return String . _fromUTF8CodeUnitSequence ( input, repair: repair) !
743
+ return String . _fromUTF8 ( input, repair: repair) !
666
744
}
667
745
}
668
746
@@ -759,7 +837,7 @@ extension String : _ExpressibleByBuiltinStringLiteral {
759
837
self = String ( _StringGuts ( _large: _UnmanagedString ( bufPtr) ) )
760
838
return
761
839
}
762
- self = String . _fromWellFormedUTF8CodeUnitSequence ( bufPtr)
840
+ self = String . _fromWellFormedUTF8 ( bufPtr)
763
841
}
764
842
}
765
843
@@ -951,7 +1029,7 @@ extension String {
951
1029
utf8CodeUnitCount: Int
952
1030
) {
953
1031
resultStorage. initialize ( to:
954
- String . _fromWellFormedUTF8CodeUnitSequence (
1032
+ String . _fromWellFormedUTF8 (
955
1033
UnsafeBufferPointer ( start: start, count: utf8CodeUnitCount) ) )
956
1034
}
957
1035
}
0 commit comments