Skip to content

Commit 8851bac

Browse files
committed
[String] Inlining, NFC fast paths, and more.
Add inlinability annotations to restore performance parity with 4.2 String. Take advantage of known NFC as a fast-path for comparison, and overhaul comparison dispatch. RRC improvements and optmizations.
1 parent 9d9f900 commit 8851bac

19 files changed

+636
-363
lines changed

stdlib/public/core/Character.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,13 +205,15 @@ extension String {
205205

206206
extension Character : Equatable {
207207
@inlinable @inline(__always)
208+
@_effects(readonly)
208209
public static func == (lhs: Character, rhs: Character) -> Bool {
209210
return lhs._str == rhs._str
210211
}
211212
}
212213

213214
extension Character : Comparable {
214215
@inlinable @inline(__always)
216+
@_effects(readonly)
215217
public static func < (lhs: Character, rhs: Character) -> Bool {
216218
return lhs._str < rhs._str
217219
}

stdlib/public/core/ContiguouslyStored.swift

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,19 +74,17 @@ extension String: _HasContiguousBytes {
7474
extension Substring: _HasContiguousBytes {
7575
@inlinable
7676
var _providesContiguousBytesNoCopy: Bool {
77-
@inline(__always) get { return self.wholeGuts.isFastUTF8 }
77+
@inline(__always) get { return self._wholeGuts.isFastUTF8 }
7878
}
7979

8080
@inlinable @inline(__always)
8181
func withUnsafeBytes<R>(
8282
_ body: (UnsafeRawBufferPointer) throws -> R
8383
) rethrows -> R {
84-
// TODO(UTF8): less error prone to have Substring and/or slice provide a
85-
// sliced fastUTF8
86-
if _fastPath(self.wholeGuts.isFastUTF8) {
87-
return try self.wholeGuts.withFastUTF8() {
88-
try body(UnsafeRawBufferPointer(UnsafeBufferPointer(rebasing:
89-
$0[self.startIndex.encodedOffset..<self.endIndex.encodedOffset])))
84+
let sliced = self._slicedGuts
85+
if _fastPath(sliced.isFastUTF8) {
86+
return try sliced.withFastUTF8 {
87+
return try body(UnsafeRawBufferPointer($0))
9088
}
9189
}
9290

stdlib/public/core/NormalizedCodeUnitIterator.swift

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ internal struct _NormalizedUTF8CodeUnitIterator: IteratorProtocol {
142142
var bufferIndex = 0
143143
var bufferCount = 0
144144

145-
internal init(_ guts: _StringGuts, range: Range<String.Index>) {
145+
internal init(foreign guts: _StringGuts, range: Range<String.Index>) {
146+
_sanityCheck(guts.isForeign)
146147
utf16Iterator = _NormalizedCodeUnitIterator(guts, range)
147148
}
148149

@@ -188,7 +189,7 @@ internal struct _NormalizedUTF8CodeUnitIterator: IteratorProtocol {
188189

189190
internal mutating func compare(
190191
with other: _NormalizedUTF8CodeUnitIterator
191-
) -> _StringComparison {
192+
) -> _StringComparisonResult {
192193
var mutableOther = other
193194

194195
for cu in self {
@@ -240,7 +241,7 @@ struct _NormalizedCodeUnitIterator: IteratorProtocol {
240241

241242
mutating func compare(
242243
with other: _NormalizedCodeUnitIterator
243-
) -> _StringComparison {
244+
) -> _StringComparisonResult {
244245
var mutableOther = other
245246
for cu in IteratorSequence(self) {
246247
if let otherCU = mutableOther.next() {

stdlib/public/core/SmallString.swift

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ extension _SmallString {
112112
internal func _invariantCheck() {
113113
// Avoid `asStringObject`, which triggers more invariant checks (runtime)
114114
var _object = _StringObject(zero:())
115-
_object._countAndFlags = _StringObject.CountAndFlags(raw: _storage.0)
115+
_object._countAndFlags = Builtin.reinterpretCast(_storage.0)
116116
_object._object = Builtin.reinterpretCast(_storage.1)
117117
_sanityCheck(_object.smallCount <= _SmallString.capacity)
118118
_sanityCheck(_object.smallIsASCII == computeIsASCII())
@@ -242,7 +242,19 @@ extension _SmallString {
242242

243243
// Appending
244244
@usableFromInline // testable
245-
internal init?(base: _StringGuts, appending other: _StringGuts) {
245+
@_effects(releasenone)
246+
internal init?(
247+
base: __shared _StringGuts, appending other: __shared _StringGuts
248+
) {
249+
self.init(
250+
base: _SlicedStringGuts(base), appending: _SlicedStringGuts(other))
251+
}
252+
253+
// Appending
254+
@_effects(releasenone)
255+
internal init?(
256+
base: __shared _SlicedStringGuts, appending other: __shared _SlicedStringGuts
257+
) {
246258
guard (base.utf8Count + other.utf8Count) <= _SmallString.capacity else {
247259
return nil
248260
}

stdlib/public/core/String.swift

Lines changed: 109 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,7 @@ extension String: CustomDebugStringConvertible {
898898
}
899899

900900
extension String {
901+
@inlinable // Forward inlinability to append
901902
@_effects(readonly) @_semantics("string.concat")
902903
public static func + (lhs: String, rhs: String) -> String {
903904
var result = lhs
@@ -906,6 +907,7 @@ extension String {
906907
}
907908

908909
// String append
910+
@inlinable // Forward inlinability to append
909911
public static func += (lhs: inout String, rhs: String) {
910912
lhs.append(rhs)
911913
}
@@ -1014,18 +1016,80 @@ internal func _stdlib_NSStringUppercaseString(_ str: AnyObject) -> _CocoaString
10141016

10151017
// Unicode algorithms
10161018
extension String {
1017-
/// Returns a lowercase version of the string.
1018-
///
1019-
/// Here's an example of transforming a string to all lowercase letters.
1020-
///
1021-
/// let cafe = "BBQ Café 🍵"
1022-
/// print(cafe.lowercased())
1023-
/// // Prints "bbq café 🍵"
1024-
///
1025-
/// - Returns: A lowercase copy of the string.
1026-
///
1027-
/// - Complexity: O(*n*)
1028-
public func lowercased() -> String {
1019+
@inline(__always)
1020+
internal func _uppercaseASCII(_ x: UInt8) -> UInt8 {
1021+
/// A "table" for which ASCII characters need to be upper cased.
1022+
/// To determine which bit corresponds to which ASCII character, subtract 1
1023+
/// from the ASCII value of that character and divide by 2. The bit is set iff
1024+
/// that character is a lower case character.
1025+
let _lowercaseTable: UInt64 =
1026+
0b0001_1111_1111_1111_0000_0000_0000_0000 &<< 32
1027+
1028+
// Lookup if it should be shifted in our ascii table, then we subtract 0x20 if
1029+
// it should, 0x0 if not.
1030+
// This code is equivalent to:
1031+
// This code is equivalent to:
1032+
// switch sourcex {
1033+
// case let x where (x >= 0x41 && x <= 0x5a):
1034+
// return x &- 0x20
1035+
// case let x:
1036+
// return x
1037+
// }
1038+
let isLower = _lowercaseTable &>> UInt64(((x &- 1) & 0b0111_1111) &>> 1)
1039+
let toSubtract = (isLower & 0x1) &<< 5
1040+
return x &- UInt8(truncatingIfNeeded: toSubtract)
1041+
}
1042+
1043+
@inline(__always)
1044+
internal func _lowercaseASCII(_ x: UInt8) -> UInt8 {
1045+
/// A "table" for which ASCII characters need to be lower cased.
1046+
/// To determine which bit corresponds to which ASCII character, subtract 1
1047+
/// from the ASCII value of that character and divide by 2. The bit is set iff
1048+
/// that character is a upper case character.
1049+
let _uppercaseTable: UInt64 =
1050+
0b0000_0000_0000_0000_0001_1111_1111_1111 &<< 32
1051+
1052+
// Lookup if it should be shifted in our ascii table, then we add 0x20 if
1053+
// it should, 0x0 if not.
1054+
// This code is equivalent to:
1055+
// This code is equivalent to:
1056+
// switch sourcex {
1057+
// case let x where (x >= 0x41 && x <= 0x5a):
1058+
// return x &- 0x20
1059+
// case let x:
1060+
// return x
1061+
// }
1062+
let isUpper = _uppercaseTable &>> UInt64(((x &- 1) & 0b0111_1111) &>> 1)
1063+
let toAdd = (isUpper & 0x1) &<< 5
1064+
return x &+ UInt8(truncatingIfNeeded: toAdd)
1065+
}
1066+
1067+
1068+
/// Returns a lowercase version of the string.
1069+
///
1070+
/// Here's an example of transforming a string to all lowercase letters.
1071+
///
1072+
/// let cafe = "BBQ Café 🍵"
1073+
/// print(cafe.lowercased())
1074+
/// // Prints "bbq café 🍵"
1075+
///
1076+
/// - Returns: A lowercase copy of the string.
1077+
///
1078+
/// - Complexity: O(*n*)
1079+
@_effects(releasenone)
1080+
public func lowercased() -> String {
1081+
if _fastPath(_guts.isFastASCII) {
1082+
return _guts.withFastUTF8 { utf8 in
1083+
// TODO(UTF8 perf): code-unit appendInPlace on guts
1084+
var result = String()
1085+
result.reserveCapacity(utf8.count)
1086+
for u8 in utf8 {
1087+
result._guts.append(String(Unicode.Scalar(_lowercaseASCII(u8)))._guts)
1088+
}
1089+
return result
1090+
}
1091+
}
1092+
10291093
// TODO(UTF8 perf): This is a horribly slow means...
10301094
let codeUnits = Array(self.utf16).withUnsafeBufferPointer {
10311095
(uChars: UnsafeBufferPointer<UInt16>) -> Array<UInt16> in
@@ -1059,20 +1123,33 @@ extension String {
10591123
return result
10601124
}
10611125
return codeUnits.withUnsafeBufferPointer { String._uncheckedFromUTF16($0) }
1062-
}
1126+
}
1127+
1128+
/// Returns an uppercase version of the string.
1129+
///
1130+
/// The following example transforms a string to uppercase letters:
1131+
///
1132+
/// let cafe = "Café 🍵"
1133+
/// print(cafe.uppercased())
1134+
/// // Prints "CAFÉ 🍵"
1135+
///
1136+
/// - Returns: An uppercase copy of the string.
1137+
///
1138+
/// - Complexity: O(*n*)
1139+
@_effects(releasenone)
1140+
public func uppercased() -> String {
1141+
if _fastPath(_guts.isFastASCII) {
1142+
return _guts.withFastUTF8 { utf8 in
1143+
// TODO(UTF8 perf): code-unit appendInPlace on guts
1144+
var result = String()
1145+
result.reserveCapacity(utf8.count)
1146+
for u8 in utf8 {
1147+
result._guts.append(String(Unicode.Scalar(_uppercaseASCII(u8)))._guts)
1148+
}
1149+
return result
1150+
}
1151+
}
10631152

1064-
/// Returns an uppercase version of the string.
1065-
///
1066-
/// The following example transforms a string to uppercase letters:
1067-
///
1068-
/// let cafe = "Café 🍵"
1069-
/// print(cafe.uppercased())
1070-
/// // Prints "CAFÉ 🍵"
1071-
///
1072-
/// - Returns: An uppercase copy of the string.
1073-
///
1074-
/// - Complexity: O(*n*)
1075-
public func uppercased() -> String {
10761153
// TODO(UTF8 perf): This is a horribly slow means...
10771154
let codeUnits = Array(self.utf16).withUnsafeBufferPointer {
10781155
(uChars: UnsafeBufferPointer<UInt16>) -> Array<UInt16> in
@@ -1106,14 +1183,14 @@ extension String {
11061183
return result
11071184
}
11081185
return codeUnits.withUnsafeBufferPointer { String._uncheckedFromUTF16($0) }
1109-
}
1186+
}
11101187

1111-
/// Creates an instance from the description of a given
1112-
/// `LosslessStringConvertible` instance.
1113-
@inlinable @inline(__always)
1114-
public init<T : LosslessStringConvertible>(_ value: T) {
1115-
self = value.description
1116-
}
1188+
/// Creates an instance from the description of a given
1189+
/// `LosslessStringConvertible` instance.
1190+
@inlinable @inline(__always)
1191+
public init<T : LosslessStringConvertible>(_ value: T) {
1192+
self = value.description
1193+
}
11171194
}
11181195

11191196
extension String: CustomStringConvertible {

stdlib/public/core/StringBridge.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,8 @@ private func _getCocoaStringPointer(
119119
_ cfImmutableValue: _CocoaString
120120
) -> CocoaStringPointer {
121121
if let utf8Ptr = _cocoaUTF8Pointer(cfImmutableValue) {
122-
// TODO(UTF8 perf): Remember Cocoa ASCII-ness
123-
return .utf8(utf8Ptr)
122+
// NOTE: CFStringGetCStringPointer means ASCII
123+
return .ascii(utf8Ptr)
124124
}
125125
if let utf16Ptr = _swift_stdlib_CFStringGetCharactersPtr(cfImmutableValue) {
126126
return .utf16(utf16Ptr)

stdlib/public/core/StringComparable.swift

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,62 +13,82 @@
1313
import SwiftShims
1414

1515
extension StringProtocol {
16-
@inlinable
17-
@inline(__always) // de-virtualize
18-
public static func ==<S: StringProtocol>(lhs: Self, rhs: S) -> Bool {
19-
// TODO(UTF8 perf): This is a horribly slow means...
20-
return String(lhs) == String(rhs)
16+
@_specialize(where Self == String, R == String)
17+
@_specialize(where Self == String, R == Substring)
18+
@_specialize(where Self == Substring, R == String)
19+
@_specialize(where Self == Substring, R == Substring)
20+
@_effects(readonly)
21+
public static func == <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
22+
return lhs._slicedGuts.compare(with: rhs._slicedGuts) == .equal
2123
}
2224

23-
@inlinable
24-
@inline(__always) // de-virtualize
25-
public static func !=<S: StringProtocol>(lhs: Self, rhs: S) -> Bool {
26-
// TODO(UTF8 perf): This is a horribly slow means...
27-
return String(lhs) != String(rhs)
25+
@inlinable @inline(__always) // forward to other operator
26+
@_effects(readonly)
27+
public static func != <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
28+
return !(lhs == rhs)
2829
}
2930

30-
@inlinable
31-
@inline(__always) // de-virtualize
31+
@_specialize(where Self == String, R == String)
32+
@_specialize(where Self == String, R == Substring)
33+
@_specialize(where Self == Substring, R == String)
34+
@_specialize(where Self == Substring, R == Substring)
35+
@_effects(readonly)
3236
public static func < <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
33-
// TODO(UTF8 perf): This is a horribly slow means...
34-
return String(lhs) < String(rhs)
37+
return lhs._slicedGuts.compare(with: rhs._slicedGuts) == .less
3538
}
3639

37-
@inlinable
38-
@inline(__always) // de-virtualize
40+
@inlinable @inline(__always) // forward to other operator
41+
@_effects(readonly)
3942
public static func > <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
40-
// TODO(UTF8 perf): This is a horribly slow means...
41-
return String(lhs) > String(rhs)
43+
return rhs < lhs
4244
}
4345

44-
@inlinable
45-
@inline(__always) // de-virtualize
46+
@inlinable @inline(__always) // forward to other operator
47+
@_effects(readonly)
4648
public static func <= <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
47-
// TODO(UTF8 perf): This is a horribly slow means...
48-
return String(lhs) <= String(rhs)
49+
return !(rhs < lhs)
4950
}
5051

51-
@inlinable
52-
@inline(__always) // de-virtualize
52+
@inlinable @inline(__always) // forward to other operator
53+
@_effects(readonly)
5354
public static func >= <R: StringProtocol>(lhs: Self, rhs: R) -> Bool {
54-
// TODO(UTF8 perf): This is a horribly slow means...
55-
return String(lhs) >= String(rhs)
55+
return !(lhs < rhs)
5656
}
5757
}
5858

5959
extension String : Equatable {
6060
@inlinable @inline(__always) // For the bitwise comparision
61-
public static func ==(lhs: String, rhs: String) -> Bool {
61+
@_effects(readonly)
62+
public static func == (lhs: String, rhs: String) -> Bool {
6263
if lhs._guts.rawBits == rhs._guts.rawBits { return true }
63-
return _compareStringsEqual(lhs, rhs)
64+
if _fastPath(lhs._guts.isNFCFastUTF8 && rhs._guts.isNFCFastUTF8) {
65+
Builtin.onFastPath() // aggressively inline / optimize
66+
return lhs._guts.withFastUTF8 { nfcSelf in
67+
return rhs._guts.withFastUTF8 { nfcOther in
68+
return _binaryCompare(nfcSelf, nfcOther) == 0
69+
}
70+
}
71+
}
72+
73+
return lhs._slicedGuts.compare(with: rhs._slicedGuts) == .equal
6474
}
6575
}
6676

6777
extension String : Comparable {
6878
@inlinable @inline(__always) // For the bitwise comparision
79+
@_effects(readonly)
6980
public static func < (lhs: String, rhs: String) -> Bool {
7081
if lhs._guts.rawBits == rhs._guts.rawBits { return false }
71-
return _compareStringsLess(lhs, rhs)
82+
if _fastPath(lhs._guts.isNFCFastUTF8 && rhs._guts.isNFCFastUTF8) {
83+
Builtin.onFastPath() // aggressively inline / optimize
84+
return lhs._guts.withFastUTF8 { nfcSelf in
85+
return rhs._guts.withFastUTF8 { nfcOther in
86+
return _binaryCompare(nfcSelf, nfcOther) < 0
87+
}
88+
}
89+
}
90+
91+
return lhs._slicedGuts.compare(with: rhs._slicedGuts) == .less
7292
}
7393
}
7494

0 commit comments

Comments
 (0)