Skip to content

Commit 9d9f900

Browse files
committed
[String] Define performance flags and plumb them throughout
1 parent 752423b commit 9d9f900

12 files changed

+341
-208
lines changed

stdlib/public/core/SmallString.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ extension _SmallString {
112112
internal func _invariantCheck() {
113113
// Avoid `asStringObject`, which triggers more invariant checks (runtime)
114114
var _object = _StringObject(zero:())
115-
_object._otherBits = _storage.0
115+
_object._countAndFlags = _StringObject.CountAndFlags(raw: _storage.0)
116116
_object._object = Builtin.reinterpretCast(_storage.1)
117117
_sanityCheck(_object.smallCount <= _SmallString.capacity)
118118
_sanityCheck(_object.smallIsASCII == computeIsASCII())

stdlib/public/core/String.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,7 @@ extension String: _ExpressibleByBuiltinStringLiteral {
859859
self = String(_StringGuts(smol))
860860
return
861861
}
862-
self.init(_StringGuts(bufPtr, isKnownASCII: Bool(isASCII)))
862+
self.init(_StringGuts(bufPtr, isASCII: Bool(isASCII)))
863863
}
864864
}
865865

stdlib/public/core/StringBreadcrumbs.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ internal final class _StringBreadcrumbs {
5555

5656
self.utf16Length = i
5757
_sanityCheck(self.crumbs.count == 1 + (self.utf16Length / stride))
58+
59+
_invariantCheck()
5860
}
5961
}
6062

@@ -92,6 +94,14 @@ extension _StringBreadcrumbs {
9294

9395
return (crumb, lowerBound &* stride)
9496
}
97+
98+
#if !INTERNAL_CHECKS_ENABLED
99+
@nonobjc @inline(__always) internal func _invariantCheck() {}
100+
#else
101+
@nonobjc @inline(never) @_effects(releasenone)
102+
internal func _invariantCheck() {
103+
}
104+
#endif // INTERNAL_CHECKS_ENABLED
95105
}
96106

97107
extension _StringGuts {

stdlib/public/core/StringBridge.swift

Lines changed: 33 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -60,25 +60,6 @@ internal func _cocoaStringCopyCharacters(
6060
destination)
6161
}
6262

63-
64-
@_effects(releasenone)
65-
internal func _cocoaStringSlice(
66-
_ target: _CocoaString, _ bounds: Range<Int>
67-
) -> _CocoaString {
68-
let cfSelf: _swift_shims_CFStringRef = target
69-
70-
_sanityCheck(
71-
_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil,
72-
"Known contiguously stored strings should already be converted to Swift")
73-
74-
let cfResult = _swift_stdlib_CFStringCreateWithSubstring(
75-
nil, cfSelf, _swift_shims_CFRange(
76-
location: bounds.lowerBound, length: bounds.count)) as AnyObject
77-
78-
return cfResult
79-
}
80-
81-
8263
@_effects(readonly)
8364
internal func _cocoaStringSubscript(
8465
_ target: _CocoaString, _ position: Int
@@ -91,10 +72,10 @@ internal func _cocoaStringSubscript(
9172
// Conversion from NSString to Swift's native representation
9273
//
9374

94-
internal var kCFStringEncodingASCII : _swift_shims_CFStringEncoding {
75+
private var kCFStringEncodingASCII : _swift_shims_CFStringEncoding {
9576
@inline(__always) get { return 0x0600 }
9677
}
97-
internal var kCFStringEncodingUTF8 : _swift_shims_CFStringEncoding {
78+
private var kCFStringEncodingUTF8 : _swift_shims_CFStringEncoding {
9879
@inline(__always) get { return 0x8000100 }
9980
}
10081

@@ -115,6 +96,7 @@ internal func _bridgeTagged(
11596
return length == numCharWritten ? count : nil
11697
}
11798

99+
@_effects(releasenone) // @opaque
118100
internal func _cocoaUTF8Pointer(_ str: _CocoaString) -> UnsafePointer<UInt8>? {
119101
// TODO(UTF8): Is there a better interface here? This requires nul
120102
// termination and may assume ASCII.
@@ -125,23 +107,25 @@ internal func _cocoaUTF8Pointer(_ str: _CocoaString) -> UnsafePointer<UInt8>? {
125107
return ptr._asUInt8
126108
}
127109

110+
private enum CocoaStringPointer {
111+
case ascii(UnsafePointer<UInt8>)
112+
case utf8(UnsafePointer<UInt8>)
113+
case utf16(UnsafePointer<UInt16>)
114+
case none
115+
}
116+
128117
@_effects(readonly)
129-
internal func _getCocoaStringPointer(
118+
private func _getCocoaStringPointer(
130119
_ cfImmutableValue: _CocoaString
131-
) -> (UnsafeRawPointer?, isUTF16: Bool) {
132-
let nulTerminatedASCII = _cocoaUTF8Pointer(cfImmutableValue)
133-
134-
// start will hold the base pointer of contiguous storage, if it
135-
// is found.
136-
var start: UnsafeRawPointer?
137-
let isUTF16 = (nulTerminatedASCII == nil)
138-
if isUTF16 {
139-
let utf16Buf = _swift_stdlib_CFStringGetCharactersPtr(cfImmutableValue)
140-
start = UnsafeRawPointer(utf16Buf)
141-
} else {
142-
start = UnsafeRawPointer(nulTerminatedASCII)
120+
) -> CocoaStringPointer {
121+
if let utf8Ptr = _cocoaUTF8Pointer(cfImmutableValue) {
122+
// TODO(UTF8 perf): Remember Cocoa ASCII-ness
123+
return .utf8(utf8Ptr)
124+
}
125+
if let utf16Ptr = _swift_stdlib_CFStringGetCharactersPtr(cfImmutableValue) {
126+
return .utf16(utf16Ptr)
143127
}
144-
return (start, isUTF16: isUTF16)
128+
return .none
145129
}
146130

147131
@usableFromInline
@@ -170,13 +154,19 @@ internal func _bridgeCocoaString(_ cocoaString: _CocoaString) -> _StringGuts {
170154
return _StringGuts(_SmallString(taggedCocoa: immutableCopy))
171155
}
172156

173-
let (start, isUTF16) = _getCocoaStringPointer(immutableCopy)
157+
let (fastUTF8, isASCII): (Bool, Bool)
158+
switch _getCocoaStringPointer(immutableCopy) {
159+
case .ascii(_): (fastUTF8, isASCII) = (true, true)
160+
case .utf8(_): (fastUTF8, isASCII) = (true, false)
161+
default: (fastUTF8, isASCII) = (false, false)
162+
}
174163
let length = _stdlib_binary_CFStringGetLength(immutableCopy)
175164

176-
// Detect fast-UTF8 Cocoa
177-
let fastUTF8 = !isUTF16 && start != nil
178165
return _StringGuts(
179-
cocoa: immutableCopy, providesFastUTF8: fastUTF8, length: length)
166+
cocoa: immutableCopy,
167+
providesFastUTF8: fastUTF8,
168+
isASCII: isASCII,
169+
length: length)
180170
}
181171

182172
extension String {
@@ -195,7 +185,7 @@ extension String {
195185
// other such visitors.
196186
if _guts._object.isSmall {
197187
return _guts._object.asSmallString.withUTF8 { bufPtr in
198-
// TODO(UTF8 perf): worth isKnownASCII check for different encoding?
188+
// TODO(UTF8 perf): worth isASCII check for different encoding?
199189
return _swift_stdlib_CFStringCreateWithBytes(
200190
nil, bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
201191
bufPtr.count,
@@ -204,7 +194,9 @@ extension String {
204194
}
205195
}
206196
if _guts._object.isImmortal {
207-
return _SharedStringStorage(immortal: _guts._object.fastUTF8)
197+
return _SharedStringStorage(
198+
immortal: _guts._object.fastUTF8.baseAddress!,
199+
countAndFlags: _guts._object._countAndFlags)
208200
}
209201

210202
_sanityCheck(_guts._object.hasObjCBridgeableObject,

stdlib/public/core/StringComparison.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ internal func _compareStringsCanonical(
8888
return .equal
8989
}
9090

91-
if left.isKnownASCII && right.isKnownASCII {
91+
if left.isASCII && right.isASCII {
9292
return left.withFastUTF8 { l in
9393
return right.withFastUTF8 { r in
9494
return _StringComparison(signedNotation: l.compareASCII(to: r))

stdlib/public/core/StringCreate.swift

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,33 @@
1212
// String Creation Helpers
1313
//===----------------------------------------------------------------------===//
1414

15+
internal func _allASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
16+
// NOTE: Avoiding for-in syntax to avoid bounds checks
17+
//
18+
// TODO(UTF8 perf): Vectorize and/or incorporate into validity checking,
19+
// perhaps both.
20+
//
21+
let ptr = input.baseAddress._unsafelyUnwrappedUnchecked
22+
var i = 0
23+
while i < input.count {
24+
guard ptr[i] <= 0x7F else { return false }
25+
i &+= 1
26+
}
27+
return true
28+
}
29+
1530
extension String {
1631
@usableFromInline
1732
internal static func _fromASCII(
1833
_ input: UnsafeBufferPointer<UInt8>
1934
) -> String {
35+
_sanityCheck(_allASCII(input), "not actually ASCII")
36+
2037
if let smol = _SmallString(input) {
2138
return String(_StringGuts(smol))
2239
}
2340

24-
// TODO(UTF8): Do we want to do remember ASCII-ness?
25-
let storage = _StringStorage.create(initializingFrom: input)
41+
let storage = _StringStorage.create(initializingFrom: input, isASCII: true)
2642
return storage.asString
2743
}
2844

@@ -73,8 +89,9 @@ extension String {
7389
return String(_StringGuts(smol))
7490
}
7591

76-
// TODO(UTF8): Do we want to do an ascii scan?
77-
let storage = _StringStorage.create(initializingFrom: input)
92+
let isASCII = false // TODO: _allASCII(input)
93+
let storage = _StringStorage.create(
94+
initializingFrom: input, isASCII: isASCII)
7895
return storage.asString
7996
}
8097

stdlib/public/core/StringGuts.swift

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,14 @@ extension _StringGuts {
5959
}
6060

6161
@inlinable @inline(__always)
62-
internal init(_ bufPtr: UnsafeBufferPointer<UInt8>, isKnownASCII: Bool) {
63-
self.init(_StringObject(immortal: bufPtr, isASCII: isKnownASCII))
62+
internal init(_ bufPtr: UnsafeBufferPointer<UInt8>, isASCII: Bool) {
63+
self.init(_StringObject(immortal: bufPtr, isASCII: isASCII))
6464
}
6565

6666
@inlinable @inline(__always)
6767
internal init(_ storage: _StringStorage) {
6868
// TODO(UTF8): We should probably store perf flags on the storage's capacity
69-
self.init(_StringObject(storage, isASCII: false))
69+
self.init(_StringObject(storage))
7070
}
7171

7272
internal init(_ storage: _SharedStringStorage) {
@@ -83,9 +83,14 @@ extension _StringGuts {
8383
self.init(_StringObject(storage, isASCII: false))
8484
}
8585

86-
internal init(cocoa: AnyObject, providesFastUTF8: Bool, length: Int) {
86+
internal init(
87+
cocoa: AnyObject, providesFastUTF8: Bool, isASCII: Bool, length: Int
88+
) {
8789
self.init(_StringObject(
88-
cocoa: cocoa, providesFastUTF8: providesFastUTF8, length: length))
90+
cocoa: cocoa,
91+
providesFastUTF8: providesFastUTF8,
92+
isASCII: isASCII,
93+
length: length))
8994
}
9095
}
9196

@@ -99,7 +104,7 @@ extension _StringGuts {
99104
internal var isEmpty: Bool { @inline(__always) get { return count == 0 } }
100105

101106
@inlinable
102-
internal var isKnownASCII: Bool {
107+
internal var isASCII: Bool {
103108
@inline(__always) get { return _object.isASCII }
104109
}
105110

stdlib/public/core/StringGutsRangeReplaceable.swift

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,10 @@ extension _StringGuts {
8080
self.uniqueNativeCapacity == nil || self.uniqueNativeCapacity! < n)
8181

8282
if _fastPath(isFastUTF8) {
83+
let isASCII = self.isASCII
8384
let storage = self.withFastUTF8 {
84-
_StringStorage.create(initializingFrom: $0, capacity: n)
85+
_StringStorage.create(
86+
initializingFrom: $0, capacity: n, isASCII: isASCII)
8587
}
8688

8789
// TODO(UTF8): Track known ascii
@@ -98,7 +100,7 @@ extension _StringGuts {
98100
let selfUTF8 = Array(String(self).utf8)
99101
selfUTF8.withUnsafeBufferPointer {
100102
self = _StringGuts(_StringStorage.create(
101-
initializingFrom: $0, capacity: n))
103+
initializingFrom: $0, capacity: n, isASCII: self.isASCII))
102104
}
103105
}
104106

@@ -142,14 +144,17 @@ extension _StringGuts {
142144
"growth should produce uniqueness")
143145

144146
if other.isFastUTF8 {
145-
other.withFastUTF8 { self.appendInPlace($0) }
147+
let otherIsASCII = other.isASCII
148+
other.withFastUTF8 { self.appendInPlace($0, isASCII: otherIsASCII) }
146149
return
147150
}
148151
_foreignAppendInPlace(other)
149152
}
150153

151-
internal mutating func appendInPlace(_ other: UnsafeBufferPointer<UInt8>) {
152-
self._object.nativeStorage.appendInPlace(other)
154+
internal mutating func appendInPlace(
155+
_ other: UnsafeBufferPointer<UInt8>, isASCII: Bool
156+
) {
157+
self._object.nativeStorage.appendInPlace(other, isASCII: isASCII)
153158

154159
// We re-initialize from the modified storage to pick up new count, flags,
155160
// etc.
@@ -162,7 +167,7 @@ extension _StringGuts {
162167
_sanityCheck(self.uniqueNativeUnusedCapacity != nil)
163168

164169
var iter = String(other).utf8.makeIterator()
165-
self._object.nativeStorage.appendInPlace(&iter)
170+
self._object.nativeStorage.appendInPlace(&iter, isASCII: other.isASCII)
166171

167172
// We re-initialize from the modified storage to pick up new count, flags,
168173
// etc.
@@ -207,7 +212,8 @@ extension _StringGuts {
207212
if isUniqueNative {
208213
if let replStr = newElements as? String, replStr._guts.isFastUTF8 {
209214
replStr._guts.withFastUTF8 {
210-
uniqueNativeReplaceSubrange(bounds, with: $0)
215+
uniqueNativeReplaceSubrange(
216+
bounds, with: $0, isASCII: replStr._guts.isASCII)
211217
}
212218
return
213219
}
@@ -224,7 +230,8 @@ extension _StringGuts {
224230

225231
internal mutating func uniqueNativeReplaceSubrange(
226232
_ bounds: Range<Index>,
227-
with codeUnits: UnsafeBufferPointer<UInt8>
233+
with codeUnits: UnsafeBufferPointer<UInt8>,
234+
isASCII: Bool
228235
) {
229236
let neededCapacity =
230237
bounds.lowerBound.encodedOffset

0 commit comments

Comments
 (0)