Skip to content

Commit 0cff8fb

Browse files
committed
[String.Index] Deprecate encodedOffset var/init
String.Index has an encodedOffset-based initializer and computed property that exists for serialization purposes. It was documented as UTF-16 in the SE proposal introducing it, which was String's underlying encoding at the time, but the dream of String even then was to abstract away whatever encoding happend to be used. Serialization needs an explicit encoding for serialized indices to make sense: the offsets need to align with the view. With String utilizing UTF-8 encoding for native contents in Swift 5, serialization isn't necessarily the most efficient in UTF-16. Furthermore, the majority of usage of encodedOffset in the wild is buggy and operates under the assumption that a UTF-16 code unit was a Swift Character, which isn't even valid if the String is known to be all-ASCII (because CR-LF). This change introduces a set of new initializers and methods with an explicit encoding (through the type of the provided view) for serialization. It also adds an init and method for String's default view, which most code in the wild should migrate to if it eliminates their underlying bugs.
1 parent dc2ea3e commit 0cff8fb

22 files changed

+248
-111
lines changed

stdlib/public/Darwin/Foundation/URLComponents.swift

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -192,16 +192,8 @@ public struct URLComponents : ReferenceConvertible, Hashable, Equatable, _Mutabl
192192

193193
@available(macOS 10.11, iOS 9.0, *)
194194
private func _toStringRange(_ r : NSRange) -> Range<String.Index>? {
195-
guard r.location != NSNotFound else { return nil }
196-
197-
let utf16Start = String.UTF16View.Index(encodedOffset: r.location)
198-
let utf16End = String.UTF16View.Index(encodedOffset: r.location + r.length)
199-
200195
guard let s = self.string else { return nil }
201-
guard let start = String.Index(utf16Start, within: s) else { return nil }
202-
guard let end = String.Index(utf16End, within: s) else { return nil }
203-
204-
return start..<end
196+
return Range(r, in: s)
205197
}
206198

207199
/// Returns the character range of the scheme in the string returned by `var string`.

stdlib/public/Darwin/NaturalLanguage/NLTagger.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ extension NLTagger {
1818
@nonobjc
1919
public func tokenRange(at index: String.Index, unit: NLTokenUnit) -> Range<String.Index> {
2020
let str = self.string ?? ""
21-
let characterIndex = index.encodedOffset
21+
let characterIndex = index.offset(within: str.utf16)
2222
let nsrange = self.__tokenRange(at: characterIndex, unit: unit)
2323
return Range(nsrange, in: str)!
2424
}
2525

2626
@nonobjc
2727
public func tag(at index: String.Index, unit: NLTokenUnit, scheme: NLTagScheme) -> (NLTag?, Range<String.Index>) {
2828
let str = self.string ?? ""
29-
let characterIndex = index.encodedOffset
29+
let characterIndex = index.offset(within: str.utf16)
3030
let rangePointer = NSRangePointer.allocate(capacity: 1)
3131
rangePointer.initialize(to: NSMakeRange(0, 0))
3232
let tag = self.__tag(at: characterIndex, unit: unit, scheme: scheme, tokenRange: rangePointer)

stdlib/public/Darwin/NaturalLanguage/NLTokenizer.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ extension NLTokenizer {
1818
@nonobjc
1919
public func tokenRange(at index: String.Index) -> Range<String.Index> {
2020
let str = self.string ?? ""
21-
let characterIndex = index.encodedOffset
21+
let characterIndex = index.offset(within: str.utf16)
2222
let nsrange = self.__tokenRange(at:characterIndex)
2323
return Range(nsrange, in: str)!
2424
}

stdlib/public/core/String.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -925,8 +925,8 @@ internal func _fastWithNormalizedCodeUnitsImpl(
925925
var icuInputBuffer = icuInputBuffer
926926
var icuOutputBuffer = icuOutputBuffer
927927

928-
var index = String.Index(encodedOffset: 0)
929-
let cachedEndIndex = String.Index(encodedOffset: sourceBuffer.count)
928+
var index = String.Index(_encodedOffset: 0)
929+
let cachedEndIndex = String.Index(_encodedOffset: sourceBuffer.count)
930930

931931
var hasBufferOwnership = false
932932

stdlib/public/core/StringBreadcrumbs.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ extension _StringBreadcrumbs {
7979
internal func getBreadcrumb(
8080
forIndex idx: String.Index
8181
) -> (lowerBound: String.Index, offset: Int) {
82-
var lowerBound = idx.encodedOffset / 3 / stride
83-
var upperBound = Swift.min(1 + (idx.encodedOffset / stride), crumbs.count)
82+
var lowerBound = idx._encodedOffset / 3 / stride
83+
var upperBound = Swift.min(1 + (idx._encodedOffset / stride), crumbs.count)
8484
_internalInvariant(crumbs[lowerBound] <= idx)
8585
_internalInvariant(upperBound == crumbs.count || crumbs[upperBound] >= idx)
8686

stdlib/public/core/StringCharacterView.swift

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ extension String: BidirectionalCollection {
6666

6767
// TODO: known-ASCII fast path, single-scalar-grapheme fast path, etc.
6868
let stride = _characterStride(startingAt: i)
69-
let nextOffset = i.encodedOffset &+ stride
69+
let nextOffset = i._encodedOffset &+ stride
7070
let nextStride = _characterStride(
71-
startingAt: Index(encodedOffset: nextOffset))
71+
startingAt: Index(_encodedOffset: nextOffset))
7272

7373
return Index(
7474
encodedOffset: nextOffset, characterStride: nextStride)
@@ -84,7 +84,7 @@ extension String: BidirectionalCollection {
8484

8585
// TODO: known-ASCII fast path, single-scalar-grapheme fast path, etc.
8686
let stride = _characterStride(endingAt: i)
87-
let priorOffset = i.encodedOffset &- stride
87+
let priorOffset = i._encodedOffset &- stride
8888
return Index(encodedOffset: priorOffset, characterStride: stride)
8989
}
9090
/// Returns an index that is the specified distance from the given index.
@@ -198,7 +198,7 @@ extension String: BidirectionalCollection {
198198
let i = _guts.scalarAlign(i)
199199
let distance = _characterStride(startingAt: i)
200200
return _guts.errorCorrectedCharacter(
201-
startingAt: i.encodedOffset, endingAt: i.encodedOffset &+ distance)
201+
startingAt: i._encodedOffset, endingAt: i._encodedOffset &+ distance)
202202
}
203203
}
204204

@@ -209,14 +209,14 @@ extension String: BidirectionalCollection {
209209

210210
if i == endIndex { return 0 }
211211

212-
return _guts._opaqueCharacterStride(startingAt: i.encodedOffset)
212+
return _guts._opaqueCharacterStride(startingAt: i._encodedOffset)
213213
}
214214

215215
@inlinable @inline(__always)
216216
internal func _characterStride(endingAt i: Index) -> Int {
217217
if i == startIndex { return 0 }
218218

219-
return _guts._opaqueCharacterStride(endingAt: i.encodedOffset)
219+
return _guts._opaqueCharacterStride(endingAt: i._encodedOffset)
220220
}
221221
}
222222

stdlib/public/core/StringComparison.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -328,10 +328,10 @@ extension _StringGutsSlice {
328328
if _fastPath(self.isFastUTF8 && other.isFastUTF8) {
329329
return self.withFastUTF8 { leftUTF8 in
330330
other.withFastUTF8 { rightUTF8 in
331-
let leftStartIndex = String.Index(encodedOffset: 0)
332-
let rightStartIndex = String.Index(encodedOffset: 0)
333-
let leftEndIndex = String.Index(encodedOffset: leftUTF8.count)
334-
let rightEndIndex = String.Index(encodedOffset: rightUTF8.count)
331+
let leftStartIndex = String.Index(_encodedOffset: 0)
332+
let rightStartIndex = String.Index(_encodedOffset: 0)
333+
let leftEndIndex = String.Index(_encodedOffset: leftUTF8.count)
334+
let rightEndIndex = String.Index(_encodedOffset: rightUTF8.count)
335335
return _normalizedCompareImpl(
336336
left_outputBuffer: _castOutputBuffer(&left_output),
337337
left_icuInputBuffer: _castOutputBuffer(&left_icuInput),

stdlib/public/core/StringGraphemeBreaking.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ extension _StringGuts {
156156
internal func isOnGraphemeClusterBoundary(_ i: String.Index) -> Bool {
157157
guard i.transcodedOffset == 0 else { return false }
158158

159-
let offset = i.encodedOffset
159+
let offset = i._encodedOffset
160160
if offset == 0 || offset == self.count { return true }
161161

162162
guard isOnUnicodeScalarBoundary(i) else { return false }
@@ -197,7 +197,7 @@ extension _StringGuts {
197197
let count = _object.largeCount
198198
let cocoa = _object.cocoaObject
199199

200-
let startIdx = String.Index(encodedOffset: i)
200+
let startIdx = String.Index(_encodedOffset: i)
201201
let (sc1, len) = foreignErrorCorrectedScalar(startingAt: startIdx)
202202
if i &+ len == count {
203203
// Last scalar is last grapheme
@@ -263,7 +263,7 @@ extension _StringGuts {
263263
let count = _object.largeCount
264264
let cocoa = _object.cocoaObject
265265

266-
let endIdx = String.Index(encodedOffset: i)
266+
let endIdx = String.Index(_encodedOffset: i)
267267
let (sc2, len) = foreignErrorCorrectedScalar(endingAt: endIdx)
268268
if i &- len == 0 {
269269
// First scalar is first grapheme

stdlib/public/core/StringGuts.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,11 +274,11 @@ extension _StringGuts {
274274

275275
@inlinable
276276
internal var startIndex: String.Index {
277-
@inline(__always) get { return Index(encodedOffset: 0) }
277+
@inline(__always) get { return Index(_encodedOffset: 0) }
278278
}
279279
@inlinable
280280
internal var endIndex: String.Index {
281-
@inline(__always) get { return Index(encodedOffset: self.count) }
281+
@inline(__always) get { return Index(_encodedOffset: self.count) }
282282
}
283283
}
284284

stdlib/public/core/StringGutsRangeReplaceable.swift

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,8 @@ extension _StringGuts {
218218
}
219219

220220
internal mutating func remove(from lower: Index, to upper: Index) {
221-
let lowerOffset = lower.encodedOffset
222-
let upperOffset = upper.encodedOffset
221+
let lowerOffset = lower._encodedOffset
222+
let upperOffset = upper._encodedOffset
223223
_internalInvariant(lower.transcodedOffset == 0 && upper.transcodedOffset == 0)
224224
_internalInvariant(lowerOffset <= upperOffset && upperOffset <= self.count)
225225

@@ -279,16 +279,16 @@ extension _StringGuts {
279279
isASCII: Bool
280280
) {
281281
let neededCapacity =
282-
bounds.lowerBound.encodedOffset
283-
+ codeUnits.count + (self.count - bounds.upperBound.encodedOffset)
282+
bounds.lowerBound._encodedOffset
283+
+ codeUnits.count + (self.count - bounds.upperBound._encodedOffset)
284284
reserveCapacity(neededCapacity)
285285

286286
_internalInvariant(bounds.lowerBound.transcodedOffset == 0)
287287
_internalInvariant(bounds.upperBound.transcodedOffset == 0)
288288

289289
_object.nativeStorage.replace(
290-
from: bounds.lowerBound.encodedOffset,
291-
to: bounds.upperBound.encodedOffset,
290+
from: bounds.lowerBound._encodedOffset,
291+
to: bounds.upperBound._encodedOffset,
292292
with: codeUnits)
293293
self = _StringGuts(_object.nativeStorage)
294294
}
@@ -300,16 +300,16 @@ extension _StringGuts {
300300
let replCount = codeUnits.count
301301

302302
let neededCapacity =
303-
bounds.lowerBound.encodedOffset
304-
+ replCount + (self.count - bounds.upperBound.encodedOffset)
303+
bounds.lowerBound._encodedOffset
304+
+ replCount + (self.count - bounds.upperBound._encodedOffset)
305305
reserveCapacity(neededCapacity)
306306

307307
_internalInvariant(bounds.lowerBound.transcodedOffset == 0)
308308
_internalInvariant(bounds.upperBound.transcodedOffset == 0)
309309

310310
_object.nativeStorage.replace(
311-
from: bounds.lowerBound.encodedOffset,
312-
to: bounds.upperBound.encodedOffset,
311+
from: bounds.lowerBound._encodedOffset,
312+
to: bounds.upperBound._encodedOffset,
313313
with: codeUnits,
314314
replacementCount: replCount)
315315
self = _StringGuts(_object.nativeStorage)

stdlib/public/core/StringGutsSlice.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ internal struct _StringGutsSlice {
7474
@inlinable
7575
internal var range: Range<String.Index> {
7676
@inline(__always) get {
77-
return String.Index(encodedOffset: _offsetRange.lowerBound)
78-
..< String.Index(encodedOffset: _offsetRange.upperBound)
77+
return String.Index(_encodedOffset: _offsetRange.lowerBound)
78+
..< String.Index(_encodedOffset: _offsetRange.upperBound)
7979
}
8080
}
8181

0 commit comments

Comments
 (0)