Skip to content

Commit 4ab45df

Browse files
committed
[String] Drop in initial UTF-8 String prototype
This is a giant squashing of a lot of individual changes prototyping a switch of String in Swift 5 to be natively encoded as UTF-8. It includes what's necessary for a functional prototype, dropping some history, but still leaves plenty of history available for future commits. My apologies to anyone trying to do code archeology between this commit and the one prior. This was the lesser of evils.
1 parent b2f60bf commit 4ab45df

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+3908
-9903
lines changed

stdlib/private/StdlibUnittest/StdlibCoreExtras.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import Foundation
2828
//
2929

3030
func findSubstring(_ haystack: Substring, _ needle: String) -> String.Index? {
31-
return findSubstring(String(haystack._ephemeralContent), needle)
31+
return findSubstring(haystack._ephemeralString, needle)
3232
}
3333

3434
func findSubstring(_ string: String, _ substring: String) -> String.Index? {

stdlib/public/SDK/Foundation/NSStringAPI.swift

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,21 @@ extension Optional {
6666
}
6767
#endif
6868

69+
/// From a non-`nil` `UnsafePointer` to a null-terminated string
70+
/// with possibly-transient lifetime, create a null-terminated array of 'C' char.
71+
/// Returns `nil` if passed a null pointer.
72+
internal func _persistCString(_ p: UnsafePointer<CChar>?) -> [CChar]? {
73+
guard let cString = p else {
74+
return nil
75+
}
76+
let len = UTF8._nullCodeUnitOffset(in: cString)
77+
var result = [CChar](repeating: 0, count: len + 1)
78+
for i in 0..<len {
79+
result[i] = cString[i]
80+
}
81+
return result
82+
}
83+
6984
extension String {
7085
//===--- Class Methods --------------------------------------------------===//
7186
//===--------------------------------------------------------------------===//

stdlib/public/SDK/Foundation/String.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ extension Substring : _ObjectiveCBridgeable {
7272
result: inout Substring?
7373
) {
7474
let s = String(x)
75-
result = Substring(_base: s, s.startIndex ..< s.endIndex)
75+
result = s[...]
7676
}
7777

7878
public static func _conditionallyBridgeFromObjectiveC(
@@ -91,7 +91,7 @@ extension Substring : _ObjectiveCBridgeable {
9191
// string; map it to an empty substring.
9292
if _slowPath(source == nil) { return Substring() }
9393
let s = String(source!)
94-
return Substring(_base: s, s.startIndex ..< s.endIndex)
94+
return s[...]
9595
}
9696
}
9797

stdlib/public/core/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,14 @@ set(SWIFTLIB_ESSENTIAL
3939
Builtin.swift
4040
BuiltinMath.swift.gyb
4141
Character.swift
42-
CharacterUnicodeScalars.swift
4342
CocoaArray.swift
4443
Codable.swift.gyb
4544
Collection.swift
4645
CollectionAlgorithms.swift
4746
Comparable.swift
4847
CompilerProtocols.swift
4948
ContiguousArray.swift
49+
ContiguouslyStored.swift
5050
ClosedRange.swift
5151
ContiguousArrayBuffer.swift
5252
CString.swift

stdlib/public/core/CString.swift

Lines changed: 49 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ extension String {
4444
///
4545
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
4646
public init(cString: UnsafePointer<CChar>) {
47-
self = _decodeValidCString(cString, repair: true)
47+
let len = UTF8._nullCodeUnitOffset(in: cString)
48+
self = String._fromUTF8Repairing(
49+
UnsafeBufferPointer(start: cString._asUInt8, count: len)).0
4850
}
4951

5052
/// Creates a new string by copying the null-terminated UTF-8 data referenced
@@ -53,7 +55,9 @@ extension String {
5355
/// This is identical to init(cString: UnsafePointer<CChar> but operates on an
5456
/// unsigned sequence of bytes.
5557
public init(cString: UnsafePointer<UInt8>) {
56-
self = _decodeValidCString(cString, repair: true)
58+
let len = UTF8._nullCodeUnitOffset(in: cString)
59+
self = String._fromUTF8Repairing(
60+
UnsafeBufferPointer(start: cString, count: len)).0
5761
}
5862

5963
/// Creates a new string by copying and validating the null-terminated UTF-8
@@ -83,9 +87,11 @@ extension String {
8387
///
8488
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
8589
public init?(validatingUTF8 cString: UnsafePointer<CChar>) {
86-
guard let str = _decodeCString(cString, repair: false) else {
87-
return nil
88-
}
90+
let len = UTF8._nullCodeUnitOffset(in: cString)
91+
guard let str = String._tryFromUTF8(
92+
UnsafeBufferPointer(start: cString._asUInt8, count: len))
93+
else { return nil }
94+
8995
self = str
9096
}
9197

@@ -133,92 +139,50 @@ extension String {
133139
/// ill-formed sequence is detected, this method returns `nil`.
134140
@_specialize(where Encoding == Unicode.UTF8)
135141
@_specialize(where Encoding == Unicode.UTF16)
142+
@inlinable // Fold away specializations
136143
public static func decodeCString<Encoding : _UnicodeEncoding>(
137144
_ cString: UnsafePointer<Encoding.CodeUnit>?,
138145
as encoding: Encoding.Type,
139-
repairingInvalidCodeUnits isRepairing: Bool = true)
140-
-> (result: String, repairsMade: Bool)? {
141-
142-
guard let cString = cString else {
143-
return nil
146+
repairingInvalidCodeUnits isRepairing: Bool = true
147+
) -> (result: String, repairsMade: Bool)? {
148+
guard let cPtr = cString else { return nil }
149+
150+
if _fastPath(encoding == Unicode.UTF8.self) {
151+
let ptr = UnsafeRawPointer(cPtr).assumingMemoryBound(to: UInt8.self)
152+
let len = UTF8._nullCodeUnitOffset(in: ptr)
153+
let codeUnits = UnsafeBufferPointer(start: ptr, count: len)
154+
if isRepairing {
155+
return String._fromUTF8Repairing(codeUnits)
156+
} else {
157+
guard let str = String._tryFromUTF8(codeUnits) else { return nil }
158+
return (str, false)
159+
}
144160
}
145-
var end = cString
146-
while end.pointee != 0 { end += 1 }
147-
let len = end - cString
148-
return _decodeCString(
149-
cString, as: encoding, length: len,
150-
repairingInvalidCodeUnits: isRepairing)
151-
}
152-
153-
}
154161

155-
/// From a non-`nil` `UnsafePointer` to a null-terminated string
156-
/// with possibly-transient lifetime, create a null-terminated array of 'C' char.
157-
/// Returns `nil` if passed a null pointer.
158-
public func _persistCString(_ p: UnsafePointer<CChar>?) -> [CChar]? {
159-
guard let s = p else {
160-
return nil
161-
}
162-
let count = Int(_swift_stdlib_strlen(s))
163-
var result = [CChar](repeating: 0, count: count + 1)
164-
for i in 0..<count {
165-
result[i] = s[i]
166-
}
167-
return result
168-
}
169-
170-
internal func _decodeValidCString(
171-
_ cString: UnsafePointer<Int8>, repair: Bool
172-
) -> String {
173-
let len = UTF8._nullCodeUnitOffset(in: cString)
174-
return cString.withMemoryRebound(to: UInt8.self, capacity: len) {
175-
(ptr: UnsafePointer<UInt8>) -> String in
176-
let bufPtr = UnsafeBufferPointer(start: ptr, count: len)
177-
return String._fromWellFormedUTF8(bufPtr, repair: repair)
162+
var end = cPtr
163+
while end.pointee != 0 { end += 1 }
164+
let len = end - cPtr
165+
let codeUnits = UnsafeBufferPointer(start: cPtr, count: len)
166+
return String._fromCodeUnits(
167+
codeUnits, encoding: encoding, repair: isRepairing)
178168
}
179-
}
180-
181-
internal func _decodeValidCString(
182-
_ cString: UnsafePointer<UInt8>, repair: Bool
183-
) -> String {
184-
let len = UTF8._nullCodeUnitOffset(in: cString)
185-
let bufPtr = UnsafeBufferPointer(start: cString, count: len)
186-
return String._fromWellFormedUTF8(bufPtr, repair: repair)
187-
}
188-
189-
internal func _decodeCString(
190-
_ cString: UnsafePointer<Int8>, repair: Bool
191-
) -> String? {
192-
let len = UTF8._nullCodeUnitOffset(in: cString)
193-
return cString.withMemoryRebound(to: UInt8.self, capacity: len) {
194-
(ptr: UnsafePointer<UInt8>) -> String? in
195-
let bufPtr = UnsafeBufferPointer(start: ptr, count: len)
196-
return String._fromUTF8(bufPtr, repair: repair)
169+
/// Creates a string from the null-terminated sequence of bytes at the given
170+
/// pointer.
171+
///
172+
/// - Parameters:
173+
/// - nullTerminatedCodeUnits: A pointer to a sequence of contiguous code
174+
/// units in the encoding specified in `sourceEncoding`, ending just
175+
/// before the first zero code unit.
176+
/// - sourceEncoding: The encoding in which the code units should be
177+
/// interpreted.
178+
@_specialize(where Encoding == Unicode.UTF8)
179+
@_specialize(where Encoding == Unicode.UTF16)
180+
@inlinable // Fold away specializations
181+
public init<Encoding: Unicode.Encoding>(
182+
decodingCString ptr: UnsafePointer<Encoding.CodeUnit>,
183+
as sourceEncoding: Encoding.Type
184+
) {
185+
self = String.decodeCString(ptr, as: sourceEncoding)!.0
197186
}
198187
}
199188

200-
internal func _decodeCString(
201-
_ cString: UnsafePointer<UInt8>, repair: Bool
202-
) -> String? {
203-
let len = UTF8._nullCodeUnitOffset(in: cString)
204-
let bufPtr = UnsafeBufferPointer(start: cString, count: len)
205-
return String._fromUTF8(bufPtr, repair: repair)
206-
}
207-
208-
/// Creates a new string by copying the null-terminated data referenced by
209-
/// the given pointer using the specified encoding.
210-
///
211-
/// This internal helper takes the string length as an argument.
212-
internal func _decodeCString<Encoding : _UnicodeEncoding>(
213-
_ cString: UnsafePointer<Encoding.CodeUnit>,
214-
as encoding: Encoding.Type, length: Int,
215-
repairingInvalidCodeUnits isRepairing: Bool = true)
216-
-> (result: String, repairsMade: Bool)? {
217-
218-
let buffer = UnsafeBufferPointer<Encoding.CodeUnit>(
219-
start: cString, count: length)
220-
221-
let (guts, hadError) = _StringGuts.fromCodeUnits(
222-
buffer, encoding: encoding, repairIllFormedSequences: isRepairing)
223-
return guts.map { (result: String($0), repairsMade: hadError) }
224-
}

0 commit comments

Comments
 (0)