Skip to content

Commit c2bc72d

Browse files
natecook1000moiseev
authored andcommitted
[stdlib] Fix string index sharing (#4896)
* [stdlib] Fix String.UTF16View index sharing * [stdlib] Fix String.UnicodeScalarView index sharing * [stdlib] Fix String.CharacterView index sharing * [stdlib] Test advancing string indices past their ends * [stdlib] Simplify CharacterView ranged subscript
1 parent 91bba4d commit c2bc72d

File tree

4 files changed

+93
-55
lines changed

4 files changed

+93
-55
lines changed

stdlib/public/core/StringCharacterView.swift

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,22 @@ extension String {
5454
public struct CharacterView {
5555
internal var _core: _StringCore
5656

57+
/// The offset of this view's `_core` from an original core. This works
58+
/// around the fact that `_StringCore` is always zero-indexed.
59+
/// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position`
60+
/// before that value is used as a `_core` index.
61+
internal var _coreOffset: Int
62+
5763
/// Creates a view of the given string.
5864
public init(_ text: String) {
5965
self._core = text._core
66+
self._coreOffset = 0
6067
}
6168

6269
public // @testable
63-
init(_ _core: _StringCore) {
70+
init(_ _core: _StringCore, coreOffset: Int = 0) {
6471
self._core = _core
72+
self._coreOffset = coreOffset
6573
}
6674
}
6775

@@ -139,7 +147,7 @@ extension String {
139147
extension String.CharacterView : BidirectionalCollection {
140148
internal typealias UnicodeScalarView = String.UnicodeScalarView
141149
internal var unicodeScalars: UnicodeScalarView {
142-
return UnicodeScalarView(_core)
150+
return UnicodeScalarView(_core, coreOffset: _coreOffset)
143151
}
144152

145153
/// A position in a string's `CharacterView` instance.
@@ -246,7 +254,7 @@ extension String.CharacterView : BidirectionalCollection {
246254
from start: UnicodeScalarView.Index
247255
) -> Int {
248256
var start = start
249-
let end = UnicodeScalarView.Index(_position: _core.count)
257+
let end = unicodeScalars.endIndex
250258
if start == end {
251259
return 0
252260
}
@@ -288,7 +296,7 @@ extension String.CharacterView : BidirectionalCollection {
288296
internal func _measureExtendedGraphemeClusterBackward(
289297
from end: UnicodeScalarView.Index
290298
) -> Int {
291-
let start = UnicodeScalarView.Index(_position: 0)
299+
let start = unicodeScalars.startIndex
292300
if start == end {
293301
return 0
294302
}
@@ -363,8 +371,8 @@ extension String.CharacterView : RangeReplaceableCollection {
363371
with newElements: C
364372
) where C : Collection, C.Iterator.Element == Character {
365373
let rawSubRange: Range<Int> =
366-
bounds.lowerBound._base._position
367-
..< bounds.upperBound._base._position
374+
bounds.lowerBound._base._position - _coreOffset
375+
..< bounds.upperBound._base._position - _coreOffset
368376
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
369377
_core.replaceSubrange(rawSubRange, with: lazyUTF16)
370378
}
@@ -436,10 +444,9 @@ extension String.CharacterView {
436444
/// - Complexity: O(*n*) if the underlying string is bridged from
437445
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
438446
public subscript(bounds: Range<Index>) -> String.CharacterView {
439-
let unicodeScalarRange =
440-
bounds.lowerBound._base..<bounds.upperBound._base
441-
return String.CharacterView(
442-
String(_core).unicodeScalars[unicodeScalarRange]._core)
447+
let unicodeScalarRange = bounds.lowerBound._base..<bounds.upperBound._base
448+
return String.CharacterView(unicodeScalars[unicodeScalarRange]._core,
449+
coreOffset: unicodeScalarRange.lowerBound._position)
443450
}
444451
}
445452

stdlib/public/core/StringUTF16.swift

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,15 @@ extension String {
145145
/// The position of the first code unit if the `String` is
146146
/// nonempty; identical to `endIndex` otherwise.
147147
public var startIndex: Index {
148-
return Index(_offset: 0)
148+
return Index(_offset: _offset)
149149
}
150150

151151
/// The "past the end" position---that is, the position one greater than
152152
/// the last valid subscript argument.
153153
///
154154
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
155155
public var endIndex: Index {
156-
return Index(_offset: _length)
156+
return Index(_offset: _offset + _length)
157157
}
158158

159159
public struct Indices {
@@ -204,7 +204,7 @@ extension String {
204204
}
205205

206206
func _internalIndex(at i: Int) -> Int {
207-
return _core.startIndex + _offset + i
207+
return _core.startIndex + i
208208
}
209209

210210
/// Accesses the code unit at the given position.
@@ -220,11 +220,10 @@ extension String {
220220
/// - Parameter position: A valid index of the view. `position` must be
221221
/// less than the view's end index.
222222
public subscript(i: Index) -> UTF16.CodeUnit {
223-
let position = i._offset
224-
_precondition(position >= 0 && position < _length,
223+
_precondition(i >= startIndex && i < endIndex,
225224
"out-of-range access on a UTF16View")
226225

227-
let index = _internalIndex(at: position)
226+
let index = _internalIndex(at: i._offset)
228227
let u = _core[index]
229228
if _fastPath((u >> 11) != 0b1101_1) {
230229
// Neither high-surrogate, nor low-surrogate -- well-formed sequence
@@ -292,8 +291,8 @@ extension String {
292291
}
293292

294293
public var description: String {
295-
let start = _internalIndex(at: 0)
296-
let end = _internalIndex(at: _length)
294+
let start = _internalIndex(at: _offset)
295+
let end = _internalIndex(at: _offset + _length)
297296
return String(_core[start..<end])
298297
}
299298

@@ -337,17 +336,16 @@ extension String {
337336
public init?(_ utf16: UTF16View) {
338337
let wholeString = String(utf16._core)
339338

340-
if let start = UTF16Index(
341-
_offset: utf16._offset
342-
).samePosition(in: wholeString) {
343-
if let end = UTF16Index(
344-
_offset: utf16._offset + utf16._length
345-
).samePosition(in: wholeString) {
346-
self = wholeString[start..<end]
347-
return
348-
}
339+
guard
340+
let start = UTF16Index(_offset: utf16._offset)
341+
.samePosition(in: wholeString),
342+
let end = UTF16Index(_offset: utf16._offset + utf16._length)
343+
.samePosition(in: wholeString)
344+
else
345+
{
346+
return nil
349347
}
350-
return nil
348+
self = wholeString[start..<end]
351349
}
352350

353351
/// The index type for subscripting a string's `utf16` view.

stdlib/public/core/StringUnicodeScalarView.swift

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,9 @@ extension String {
6262
CustomStringConvertible,
6363
CustomDebugStringConvertible
6464
{
65-
internal init(_ _core: _StringCore) {
65+
internal init(_ _core: _StringCore, coreOffset: Int = 0) {
6666
self._core = _core
67+
self._coreOffset = coreOffset
6768
}
6869

6970
internal struct _ScratchIterator : IteratorProtocol {
@@ -109,44 +110,57 @@ extension String {
109110
@_versioned internal var _position: Int
110111
}
111112

113+
/// Translates a `_core` index into a `UnicodeScalarIndex` using this view's
114+
/// `_coreOffset`.
115+
internal func _fromCoreIndex(_ i: Int) -> Index {
116+
return Index(_position: i + _coreOffset)
117+
}
118+
119+
/// Translates a `UnicodeScalarIndex` into a `_core` index using this view's
120+
/// `_coreOffset`.
121+
internal func _toCoreIndex(_ i: Index) -> Int {
122+
return i._position - _coreOffset
123+
}
124+
112125
/// The position of the first Unicode scalar value if the string is
113126
/// nonempty.
114127
///
115128
/// If the string is empty, `startIndex` is equal to `endIndex`.
116129
public var startIndex: Index {
117-
return Index(_position: _core.startIndex)
130+
return _fromCoreIndex(_core.startIndex)
118131
}
119132

120133
/// The "past the end" position---that is, the position one greater than
121134
/// the last valid subscript argument.
122135
///
123136
/// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`.
124137
public var endIndex: Index {
125-
return Index(_position: _core.endIndex)
138+
return _fromCoreIndex(_core.endIndex)
126139
}
127140

128141
/// Returns the next consecutive location after `i`.
129142
///
130143
/// - Precondition: The next location exists.
131144
public func index(after i: Index) -> Index {
132-
var scratch = _ScratchIterator(_core, i._position)
145+
let i = _toCoreIndex(i)
146+
var scratch = _ScratchIterator(_core, i)
133147
var decoder = UTF16()
134148
let (_, length) = decoder._decodeOne(&scratch)
135-
return Index(_position: i._position + length)
149+
return _fromCoreIndex(i + length)
136150
}
137151

138152
/// Returns the previous consecutive location before `i`.
139153
///
140154
/// - Precondition: The previous location exists.
141155
public func index(before i: Index) -> Index {
142-
var i = i._position - 1
156+
var i = _toCoreIndex(i) - 1
143157
let codeUnit = _core[i]
144158
if _slowPath((codeUnit >> 10) == 0b1101_11) {
145159
if i != 0 && (_core[i - 1] >> 10) == 0b1101_10 {
146160
i -= 1
147161
}
148162
}
149-
return Index(_position: i)
163+
return _fromCoreIndex(i)
150164
}
151165

152166
/// Accesses the Unicode scalar value at the given position.
@@ -166,7 +180,7 @@ extension String {
166180
/// - Parameter position: A valid index of the character view. `position`
167181
/// must be less than the view's end index.
168182
public subscript(position: Index) -> UnicodeScalar {
169-
var scratch = _ScratchIterator(_core, position._position)
183+
var scratch = _ScratchIterator(_core, _toCoreIndex(position))
170184
var decoder = UTF16()
171185
switch decoder.decode(&scratch) {
172186
case .scalarValue(let us):
@@ -192,8 +206,9 @@ extension String {
192206
/// - Complexity: O(*n*) if the underlying string is bridged from
193207
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
194208
public subscript(r: Range<Index>) -> UnicodeScalarView {
195-
return UnicodeScalarView(
196-
_core[r.lowerBound._position..<r.upperBound._position])
209+
let rawSubRange = _toCoreIndex(r.lowerBound)..<_toCoreIndex(r.upperBound)
210+
return UnicodeScalarView(_core[rawSubRange],
211+
coreOffset: r.lowerBound._position)
197212
}
198213

199214
/// An iterator over the Unicode scalars that make up a `UnicodeScalarView`
@@ -270,14 +285,20 @@ extension String {
270285
}
271286

272287
public var description: String {
273-
return String(_core[startIndex._position..<endIndex._position])
288+
return String(_core)
274289
}
275290

276291
public var debugDescription: String {
277292
return "StringUnicodeScalarView(\(self.description.debugDescription))"
278293
}
279294

280295
internal var _core: _StringCore
296+
297+
/// The offset of this view's `_core` from an original core. This works
298+
/// around the fact that `_StringCore` is always zero-indexed.
299+
/// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position`
300+
/// before that value is used as a `_core` index.
301+
internal var _coreOffset: Int
281302
}
282303

283304
/// Creates a string corresponding to the given collection of Unicode
@@ -391,9 +412,8 @@ extension String.UnicodeScalarView : RangeReplaceableCollection {
391412
_ bounds: Range<Index>,
392413
with newElements: C
393414
) where C : Collection, C.Iterator.Element == UnicodeScalar {
394-
let rawSubRange: Range<Int> =
395-
bounds.lowerBound._position
396-
..< bounds.upperBound._position
415+
let rawSubRange: Range<Int> = _toCoreIndex(bounds.lowerBound) ..<
416+
_toCoreIndex(bounds.upperBound)
397417
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
398418
_core.replaceSubrange(rawSubRange, with: lazyUTF16)
399419
}

test/stdlib/subString.swift

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,10 @@ import StdlibUnittest
55

66
var SubstringTests = TestSuite("SubstringTests")
77

8-
func checkMatch<S: Collection, T: Collection
8+
func checkMatch<S: Collection, T: Collection>(_ x: S, _ y: T, _ i: S.Index)
99
where S.Index == T.Index, S.Iterator.Element == T.Iterator.Element,
10-
S.Iterator.Element: Equatable>(
11-
_ x: S, _ y: T, _ i: S.Index) {
12-
10+
S.Iterator.Element: Equatable
11+
{
1312
expectEqual(x[i], y[i])
1413
}
1514

@@ -25,9 +24,7 @@ SubstringTests.test("String") {
2524
expectEqual(s3, "cd")
2625
}
2726

28-
SubstringTests.test("CharacterView")
29-
.xfail(.always("CharacterView slices don't share indices"))
30-
.code {
27+
SubstringTests.test("CharacterView") {
3128
let s = "abcdefg"
3229
var t = s.characters.dropFirst(2)
3330
var u = t.dropFirst(2)
@@ -41,16 +38,19 @@ SubstringTests.test("CharacterView")
4138
checkMatch(t, u, u.index(after: u.startIndex))
4239
checkMatch(t, u, u.index(before: u.endIndex))
4340

41+
expectEqual("", String(t.dropFirst(10)))
42+
expectEqual("", String(t.dropLast(10)))
43+
expectEqual("", String(u.dropFirst(10)))
44+
expectEqual("", String(u.dropLast(10)))
45+
4446
t.replaceSubrange(t.startIndex...t.startIndex, with: ["C"])
4547
u.replaceSubrange(u.startIndex...u.startIndex, with: ["E"])
4648
expectEqual(String(u), "Efg")
4749
expectEqual(String(t), "Cdefg")
4850
expectEqual(s, "abcdefg")
4951
}
5052

51-
SubstringTests.test("UnicodeScalars")
52-
.xfail(.always("UnicodeScalarsView slices don't share indices"))
53-
.code {
53+
SubstringTests.test("UnicodeScalars") {
5454
let s = "abcdefg"
5555
var t = s.unicodeScalars.dropFirst(2)
5656
var u = t.dropFirst(2)
@@ -64,16 +64,19 @@ SubstringTests.test("UnicodeScalars")
6464
checkMatch(t, u, u.index(after: u.startIndex))
6565
checkMatch(t, u, u.index(before: u.endIndex))
6666

67+
expectEqual("", String(t.dropFirst(10)))
68+
expectEqual("", String(t.dropLast(10)))
69+
expectEqual("", String(u.dropFirst(10)))
70+
expectEqual("", String(u.dropLast(10)))
71+
6772
t.replaceSubrange(t.startIndex...t.startIndex, with: ["C"])
6873
u.replaceSubrange(u.startIndex...u.startIndex, with: ["E"])
6974
expectEqual(String(u), "Efg")
7075
expectEqual(String(t), "Cdefg")
7176
expectEqual(s, "abcdefg")
7277
}
7378

74-
SubstringTests.test("UTF16View")
75-
.xfail(.always("UTF16View slices don't share indices"))
76-
.code {
79+
SubstringTests.test("UTF16View") {
7780
let s = "abcdefg"
7881
let t = s.utf16.dropFirst(2)
7982
let u = t.dropFirst(2)
@@ -86,6 +89,11 @@ SubstringTests.test("UTF16View")
8689
checkMatch(t, u, u.startIndex)
8790
checkMatch(t, u, u.index(after: u.startIndex))
8891
checkMatch(t, u, u.index(before: u.endIndex))
92+
93+
expectEqual("", String(t.dropFirst(10))!)
94+
expectEqual("", String(t.dropLast(10))!)
95+
expectEqual("", String(u.dropFirst(10))!)
96+
expectEqual("", String(u.dropLast(10))!)
8997
}
9098

9199
SubstringTests.test("UTF8View") {
@@ -99,6 +107,11 @@ SubstringTests.test("UTF8View") {
99107
checkMatch(s.utf8, t, u.startIndex)
100108
checkMatch(t, u, u.startIndex)
101109
checkMatch(t, u, u.index(after: u.startIndex))
110+
111+
expectEqual("", String(t.dropFirst(10))!)
112+
expectEqual("", String(t.dropLast(10))!)
113+
expectEqual("", String(u.dropFirst(10))!)
114+
expectEqual("", String(u.dropLast(10))!)
102115
}
103116

104117
runAllTests()

0 commit comments

Comments
 (0)