Skip to content

Commit ea09f2d

Browse files
authored
Merge pull request #16733 from milseman/the_incredible_shrinking_index
[String] Shrink and simplify String.Index
2 parents 43fa4e9 + 3ee1710 commit ea09f2d

12 files changed

+304
-190
lines changed

stdlib/public/SDK/Foundation/ExtraStringAPIs.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ extension String.UTF16View.Index {
1616
@available(swift, obsoleted: 4.0)
1717
public init(_ offset: Int) {
1818
_precondition(offset >= 0, "Negative UTF16 index offset not allowed")
19-
self.init(_offset: offset)
19+
self.init(encodedOffset: offset)
2020
}
2121

2222
@available(swift, deprecated: 3.2)

stdlib/public/core/StringCharacterView.swift

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -221,18 +221,14 @@ extension String._CharacterView : BidirectionalCollection {
221221
/// this view's `_baseOffset`.
222222
@inlinable // FIXME(sil-serialize-all)
223223
internal func _toBaseIndex(_ index: Index) -> Index {
224-
return Index(
225-
encodedOffset: index.encodedOffset - _baseOffset,
226-
index._cache)
224+
return String.Index(from: index, adjustingEncodedOffsetBy: -_baseOffset)
227225
}
228226

229227
/// Translates an index in the underlying base string into a view index using
230228
/// this view's `_baseOffset`.
231229
@inlinable // FIXME(sil-serialize-all)
232230
internal func _toViewIndex(_ index: Index) -> Index {
233-
return Index(
234-
encodedOffset: index.encodedOffset + _baseOffset,
235-
index._cache)
231+
return String.Index(from: index, adjustingEncodedOffsetBy: _baseOffset)
236232
}
237233

238234
/// The position of the first character in a nonempty character view.

stdlib/public/core/StringComparable.swift

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@ extension _StringGuts {
2727
if left._bitwiseEqualTo(right) {
2828
return true
2929
}
30+
if left._isSmall && right._isSmall {
31+
// TODO: Ensure normality when adding UTF-8 support
32+
_sanityCheck(left._isASCIIOrSmallASCII && right._isASCIIOrSmallASCII,
33+
"Need to ensure normality")
34+
35+
// Equal small strings should be bitwise equal if ASCII
36+
return false
37+
}
3038
return compare(left, to: right) == 0
3139
}
3240

@@ -50,6 +58,10 @@ extension _StringGuts {
5058
if left._bitwiseEqualTo(right) {
5159
return false
5260
}
61+
if left._isSmall && right._isSmall {
62+
// Small strings compare lexicographically if ASCII
63+
return left._smallUTF8String._compare(right._smallUTF8String) == .less
64+
}
5365
return compare(left, to: right) == -1
5466
}
5567

@@ -72,14 +84,14 @@ extension _StringGuts {
7284
) -> Int {
7385
defer { _fixLifetime(left) }
7486
defer { _fixLifetime(right) }
75-
87+
7688
if left.isASCII && right.isASCII {
7789
let leftASCII = left._unmanagedASCIIView[leftRange]
7890
let rightASCII = right._unmanagedASCIIView[rightRange]
7991
let result = leftASCII.compareASCII(to: rightASCII)
8092
return result
8193
}
82-
94+
8395
let leftBits = left.rawBits
8496
let rightBits = right.rawBits
8597

@@ -92,14 +104,14 @@ extension _StringGuts {
92104
) -> Int {
93105
defer { _fixLifetime(left) }
94106
defer { _fixLifetime(right) }
95-
107+
96108
if left.isASCII && right.isASCII {
97109
let leftASCII = left._unmanagedASCIIView
98110
let rightASCII = right._unmanagedASCIIView
99111
let result = leftASCII.compareASCII(to: rightASCII)
100112
return result
101113
}
102-
114+
103115
let leftBits = left.rawBits
104116
let rightBits = right.rawBits
105117

stdlib/public/core/StringComparison.swift

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -120,25 +120,19 @@ func _compareUnicode(
120120
// TODO: coalesce many of these into a protocol to simplify the code
121121

122122
extension _SmallUTF8String {
123+
@inlinable
123124
func _compare(_ other: _SmallUTF8String) -> _Ordering {
124125
#if arch(i386) || arch(arm)
125126
_conditionallyUnreachable()
126127
#else
127-
if _fastPath(self.isASCII && other.isASCII) {
128-
// TODO: fast in-register comparison
129-
return self.withUnmanagedASCII { selfView in
130-
return other.withUnmanagedASCII { otherView in
131-
return _Ordering(signedNotation: selfView.compareASCII(to: otherView))
132-
}
133-
}
134-
}
135-
136-
// TODO: fast in-register comparison
137-
return self.withUnmanagedUTF16 { selfView in
138-
return other.withUnmanagedUTF16 { otherView in
139-
return selfView._compare(otherView)
140-
}
128+
// TODO: Ensure normality when adding UTF-8 support
129+
_sanityCheck(self.isASCII && other.isASCII, "Need to ensure normality")
130+
if self._storage == other._storage { return .equal }
131+
for i in 0..<Swift.min(self.count, other.count) {
132+
if self[i] < other[i] { return .less }
133+
if self[i] > other[i] { return .greater }
141134
}
135+
return self.count < other.count ? .less : .greater
142136
#endif // 64-bit
143137
}
144138
func _compare(_contiguous other: _StringGuts) -> _Ordering {

stdlib/public/core/StringGraphemeBreaking.swift

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,10 @@ internal var _CR: UInt8 { return 0x0d }
1818
@inlinable // FIXME(sil-serialize-all)
1919
internal var _LF: UInt8 { return 0x0a }
2020

21-
extension String.Index {
22-
@inlinable // FIXME(sil-serialize-all)
23-
internal init(encodedOffset: Int, characterStride stride: Int) {
24-
if _slowPath(stride == 0 || stride > UInt16.max) {
25-
// Don't store a 0 stride for the endIndex
26-
// or a truncated stride for an overlong grapheme cluster.
27-
self.init(encodedOffset: encodedOffset)
28-
return
29-
}
30-
self.init(
31-
encodedOffset: encodedOffset,
32-
.character(stride: UInt16(truncatingIfNeeded: stride)))
33-
}
34-
}
35-
3621
extension _StringVariant {
3722
@inlinable
3823
internal func _stride(at i: String.Index) -> Int {
39-
if case .character(let stride) = i._cache {
40-
// TODO: should _fastPath the case somehow
41-
_sanityCheck(stride > 0)
42-
return Int(stride)
43-
}
24+
if let stride = i.characterStride { return stride }
4425
return characterStride(atOffset: i.encodedOffset)
4526
}
4627

stdlib/public/core/StringIndex.swift

Lines changed: 75 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -13,54 +13,57 @@ extension String {
1313
/// A position of a character or code unit in a string.
1414
@_fixed_layout // FIXME(sil-serialize-all)
1515
public struct Index {
16+
internal typealias _UTF8Buffer = UTF8.EncodedScalar
17+
1618
@usableFromInline // FIXME(sil-serialize-all)
17-
internal var _compoundOffset : UInt64
19+
internal var _compoundOffset: UInt64
20+
1821
@usableFromInline
19-
internal var _cache: _Cache
22+
internal var _utf8Buffer = _UTF8Buffer()
2023

21-
internal typealias _UTF8Buffer = _ValidUTF8Buffer<UInt64>
22-
@_frozen // FIXME(sil-serialize-all)
2324
@usableFromInline
24-
internal enum _Cache {
25-
case utf16
26-
case utf8(buffer: _UTF8Buffer)
27-
case character(stride: UInt16)
28-
case unicodeScalar(value: Unicode.Scalar)
29-
}
25+
internal var _graphemeStrideCache: UInt16 = 0
3026
}
3127
}
3228

3329
/// Convenience accessors
34-
extension String.Index._Cache {
35-
@inlinable // FIXME(sil-serialize-all)
36-
internal var utf16: Void? {
37-
if case .utf16 = self { return () } else { return nil }
38-
}
30+
extension String.Index {
3931
@inlinable // FIXME(sil-serialize-all)
40-
internal var utf8: String.Index._UTF8Buffer? {
41-
if case .utf8(let r) = self { return r } else { return nil }
32+
internal var utf8Buffer: String.Index._UTF8Buffer? {
33+
guard !_utf8Buffer.isEmpty else { return nil }
34+
return _utf8Buffer
4235
}
36+
4337
@inlinable // FIXME(sil-serialize-all)
44-
internal var character: UInt16? {
45-
if case .character(let r) = self { return r } else { return nil }
38+
internal var characterStride: Int? {
39+
guard _graphemeStrideCache > 0 else { return nil }
40+
return Int(truncatingIfNeeded: _graphemeStrideCache)
4641
}
42+
43+
// TODO: Probably worth carving a bit for, or maybe a isSubScalar bit...
4744
@inlinable // FIXME(sil-serialize-all)
48-
internal var unicodeScalar: UnicodeScalar? {
49-
if case .unicodeScalar(let r) = self { return r } else { return nil }
45+
internal var isUTF8: Bool {
46+
return self.utf8Buffer != nil || self.transcodedOffset > 0
5047
}
5148
}
5249

5350
extension String.Index : Equatable {
51+
// A combined code unit and transcoded offset, for comparison purposes
52+
@inlinable // FIXME(sil-serialize-all)
53+
internal var _orderingValue: UInt64 {
54+
return _compoundOffset
55+
}
56+
5457
@inlinable // FIXME(sil-serialize-all)
5558
public static func == (lhs: String.Index, rhs: String.Index) -> Bool {
56-
return lhs._compoundOffset == rhs._compoundOffset
59+
return lhs._orderingValue == rhs._orderingValue
5760
}
5861
}
5962

6063
extension String.Index : Comparable {
6164
@inlinable // FIXME(sil-serialize-all)
6265
public static func < (lhs: String.Index, rhs: String.Index) -> Bool {
63-
return lhs._compoundOffset < rhs._compoundOffset
66+
return lhs._orderingValue < rhs._orderingValue
6467
}
6568
}
6669

@@ -72,56 +75,67 @@ extension String.Index : Hashable {
7275
/// of this instance.
7376
@inlinable // FIXME(sil-serialize-all)
7477
public func hash(into hasher: inout Hasher) {
75-
hasher.combine(_compoundOffset)
78+
hasher.combine(_orderingValue)
7679
}
7780
}
7881

7982
extension String.Index {
80-
internal typealias _Self = String.Index
81-
83+
@inline(__always)
84+
@inlinable
85+
internal init(encodedOffset: Int, transcodedOffset: Int) {
86+
let cuOffset = UInt64(truncatingIfNeeded: encodedOffset)
87+
_sanityCheck(
88+
cuOffset & 0xFFFF_0000_0000_0000 == 0, "String length capped at 48bits")
89+
let transOffset = UInt64(truncatingIfNeeded: transcodedOffset)
90+
_sanityCheck(transOffset <= 4, "UTF-8 max transcoding is 4 code units")
91+
92+
self._compoundOffset = cuOffset &<< 2 | transOffset
93+
}
94+
95+
@inline(__always)
96+
@inlinable
97+
internal init(from other: String.Index, adjustingEncodedOffsetBy adj: Int) {
98+
self.init(
99+
encodedOffset: other.encodedOffset &+ adj,
100+
transcodedOffset: other.transcodedOffset)
101+
self._utf8Buffer = other._utf8Buffer
102+
self._graphemeStrideCache = other._graphemeStrideCache
103+
}
104+
82105
/// Creates a new index at the specified UTF-16 offset.
83106
///
84107
/// - Parameter offset: An offset in UTF-16 code units.
85108
@inlinable // FIXME(sil-serialize-all)
86109
public init(encodedOffset offset: Int) {
87-
_compoundOffset = UInt64(offset << _Self._strideBits)
88-
_cache = .utf16
110+
self.init(encodedOffset: offset, transcodedOffset: 0)
89111
}
90112

91113
@inlinable // FIXME(sil-serialize-all)
92-
internal init(encodedOffset o: Int, transcodedOffset: Int = 0, _ c: _Cache) {
93-
_compoundOffset = UInt64(o << _Self._strideBits | transcodedOffset)
94-
_cache = c
114+
internal init(
115+
encodedOffset offset: Int, transcodedOffset: Int, buffer: _UTF8Buffer
116+
) {
117+
self.init(encodedOffset: offset, transcodedOffset: transcodedOffset)
118+
self._utf8Buffer = buffer
95119
}
96-
97-
@inlinable // FIXME(sil-serialize-all)
98-
internal static var _strideBits : Int { return 2 }
99-
@inlinable // FIXME(sil-serialize-all)
100-
internal static var _mask : UInt64 { return (1 &<< _Self._strideBits) &- 1 }
101-
102-
@inlinable // FIXME(sil-serialize-all)
103-
internal mutating func _setEncodedOffset(_ x: Int) {
104-
_compoundOffset = UInt64(x << _Self._strideBits)
120+
121+
@inlinable
122+
internal init(encodedOffset: Int, characterStride: Int) {
123+
self.init(encodedOffset: encodedOffset, transcodedOffset: 0)
124+
if characterStride < UInt16.max {
125+
self._graphemeStrideCache = UInt16(truncatingIfNeeded: characterStride)
126+
}
105127
}
106-
128+
107129
/// The offset into a string's UTF-16 encoding for this index.
108130
@inlinable // FIXME(sil-serialize-all)
109131
public var encodedOffset : Int {
110-
return Int(_compoundOffset >> _Self._strideBits)
132+
return Int(truncatingIfNeeded: _compoundOffset &>> 2)
111133
}
112134

113135
/// The offset of this index within whatever encoding this is being viewed as
114136
@inlinable // FIXME(sil-serialize-all)
115-
internal var _transcodedOffset : Int {
116-
get {
117-
return Int(_compoundOffset & _Self._mask)
118-
}
119-
set {
120-
let extended = UInt64(newValue)
121-
_sanityCheck(extended <= _Self._mask)
122-
_compoundOffset &= ~_Self._mask
123-
_compoundOffset |= extended
124-
}
137+
internal var transcodedOffset: Int {
138+
return Int(truncatingIfNeeded: _compoundOffset & 0x3)
125139
}
126140
}
127141

@@ -130,27 +144,27 @@ extension String.Index {
130144
@inlinable // FIXME(sil-serialize-all)
131145
@available(swift, deprecated: 3.2)
132146
@available(swift, obsoleted: 4.0)
133-
public // SPI(Foundation)
147+
public // SPI(Foundation)
134148
init(_position: Int) {
135149
self.init(encodedOffset: _position)
136150
}
137-
151+
138152
@inlinable // FIXME(sil-serialize-all)
139153
@available(swift, deprecated: 3.2)
140154
@available(swift, obsoleted: 4.0)
141-
public // SPI(Foundation)
142-
init(_offset: Int) {
143-
self.init(encodedOffset: _offset)
155+
public // SPI(Foundation)
156+
init(_codeUnitOffset: Int) {
157+
self.init(encodedOffset: _codeUnitOffset)
144158
}
145-
159+
146160
@inlinable // FIXME(sil-serialize-all)
147161
@available(swift, deprecated: 3.2)
148162
@available(swift, obsoleted: 4.0)
149-
public // SPI(Foundation)
163+
public // SPI(Foundation)
150164
init(_base: String.Index, in c: String.CharacterView) {
151165
self = _base
152166
}
153-
167+
154168
/// The integer offset of this index in UTF-16 code units.
155169
@inlinable // FIXME(sil-serialize-all)
156170
@available(swift, deprecated: 3.2)
@@ -171,7 +185,7 @@ extension String.Index {
171185
}
172186

173187

174-
// backward compatibility for index interchange.
188+
// backward compatibility for index interchange.
175189
extension Optional where Wrapped == String.Index {
176190
@inlinable // FIXME(sil-serialize-all)
177191
@available(

stdlib/public/core/StringRangeReplaceableCollection.swift

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -455,11 +455,8 @@ extension String {
455455

456456
@inlinable // FIXME(sil-serialize-all)
457457
internal func _stride(of i: Index) -> Int {
458-
if case .character(let stride) = i._cache {
459-
// TODO: should _fastPath the case somehow
460-
_sanityCheck(stride > 0)
461-
return Int(stride)
462-
}
458+
if let stride = i.characterStride { return stride }
459+
463460
let offset = i.encodedOffset
464461
return _visitGuts(_guts, args: offset,
465462
ascii: { ascii, offset in

0 commit comments

Comments
 (0)