Skip to content

Commit 14f8ba7

Browse files
committed
[stdlib] Make String.Index(_:within:) initializers more permissive
In Swift 5.6 and below, (broken) code that acquired indices from a UTF-16-encoded string bridged from Cocoa and kept using them after a `makeContiguousUTF8` call (or other mutation) may have appeared to be working correctly as long as the string was ASCII. Since swiftlang#41417, the `String(_:within:)` initializers recognize miscoded indices and reject them by returning nil. This is technically correct, but it unfortunately may be a binary compatibility issue, as these used to return non-nil in previous versions. Mitigate this issue by accepting UTF-16 indices on a UTF-8 string, transcoding their offset as needed. (Attempting to use an UTF-8 index on a UTF-16 string is still rejected — we do not implicitly convert strings in that direction.) rdar://89369680 (cherry picked from commit 4d557b0)
1 parent 30e15ab commit 14f8ba7

File tree

6 files changed

+101
-20
lines changed

6 files changed

+101
-20
lines changed

stdlib/public/core/StringGuts.swift

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -334,23 +334,56 @@ extension _StringGuts {
334334
i._hasMatchingEncoding(isUTF8: isUTF8)
335335
}
336336

337-
/// Return an index whose encoding can be assumed to match that of `self`.
337+
/// Return an index whose encoding can be assumed to match that of `self`,
338+
/// trapping if `i` has an incompatible encoding.
339+
///
340+
/// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then trap.
341+
///
342+
/// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
343+
/// `i`'s offset to UTF-8 and return the resulting index. This allows the use
344+
/// of indices from a bridged Cocoa string after the string has been converted
345+
/// to a native Swift string. (Such indices are technically still considered
346+
/// invalid, but we allow this specific case to keep compatibility with
347+
/// existing code that assumes otherwise.)
338348
///
339349
/// Detecting an encoding mismatch isn't always possible -- older binaries did
340350
/// not set the flags that this method relies on. However, false positives
341351
/// cannot happen: if this method detects a mismatch, then it is guaranteed to
342352
/// be a real one.
343353
@_alwaysEmitIntoClient
344354
@inline(__always)
345-
internal func ensureMatchingEncoding(_ i: String.Index) -> String.Index {
355+
internal func ensureMatchingEncoding(_ i: Index) -> Index {
346356
if _fastPath(hasMatchingEncoding(i)) { return i }
357+
if let i = _slowEnsureMatchingEncoding(i) { return i }
358+
_preconditionFailure("Invalid string index")
359+
}
360+
361+
/// Return an index that corresponds to the same position as `i`, but whose
362+
/// encoding can be assumed to match that of `self`, returning `nil` if `i`
363+
/// has incompatible encoding.
364+
///
365+
/// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then return nil.
366+
///
367+
/// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
368+
/// `i`'s offset to UTF-8 and return the resulting index. This allows the use
369+
/// of indices from a bridged Cocoa string after the string has been converted
370+
/// to a native Swift string. (Such indices are technically still considered
371+
/// invalid, but we allow this specific case to keep compatibility with
372+
/// existing code that assumes otherwise.)
373+
///
374+
/// Detecting an encoding mismatch isn't always possible -- older binaries did
375+
/// not set the flags that this method relies on. However, false positives
376+
/// cannot happen: if this method detects a mismatch, then it is guaranteed to
377+
/// be a real one.
378+
internal func ensureMatchingEncodingNoTrap(_ i: Index) -> Index? {
379+
if hasMatchingEncoding(i) { return i }
347380
return _slowEnsureMatchingEncoding(i)
348381
}
349382

350383
@_alwaysEmitIntoClient
351384
@inline(never)
352385
@_effects(releasenone)
353-
internal func _slowEnsureMatchingEncoding(_ i: String.Index) -> String.Index {
386+
internal func _slowEnsureMatchingEncoding(_ i: Index) -> Index? {
354387
guard isUTF8 else {
355388
// Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
356389
// get converted to UTF-16 storage, so it seems okay to trap in this case
@@ -368,7 +401,7 @@ extension _StringGuts {
368401
//
369402
// This trap can never trigger on OSes that have stdlibs <= 5.6, because
370403
// those versions never set the `isKnownUTF16` flag in `_StringObject`.
371-
_preconditionFailure("Invalid string index")
404+
return nil
372405
}
373406
// Attempt to use an UTF-16 index on a UTF-8 string.
374407
//
@@ -384,10 +417,14 @@ extension _StringGuts {
384417
// FIXME: Consider emitting a runtime warning here.
385418
// FIXME: Consider performing a linked-on-or-after check & trapping if the
386419
// client executable was built on some particular future Swift release.
387-
let utf16 = String(self).utf16
388-
let base = utf16.index(utf16.startIndex, offsetBy: i._encodedOffset)
389-
if i.transcodedOffset == 0 { return base }
390-
return base.encoded(offsetBy: i.transcodedOffset)._knownUTF8
420+
let utf16 = String.UTF16View(self)
421+
var r = utf16.index(utf16.startIndex, offsetBy: i._encodedOffset)
422+
if i.transcodedOffset != 0 {
423+
r = r.encoded(offsetBy: i.transcodedOffset)
424+
} else {
425+
r = r._copyingAlignment(from: i)
426+
}
427+
return r._knownUTF8
391428
}
392429
}
393430

stdlib/public/core/StringIndex.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,14 @@ extension String.Index {
365365
}
366366
}
367367

368+
extension String.Index {
369+
@_alwaysEmitIntoClient // Swift 5.7
370+
internal func _copyingAlignment(from index: Self) -> Self {
371+
let mask = Self.__scalarAlignmentBit | Self.__characterAlignmentBit
372+
return Self((_rawBits & ~mask) | (index._rawBits & mask))
373+
}
374+
}
375+
368376
// ### Index Encoding
369377
//
370378
// Swift 5.7 introduced bookkeeping to keep track of the Unicode encoding

stdlib/public/core/StringIndexConversions.swift

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,17 @@ extension String.Index {
5050
/// of `target`.
5151
/// - target: The string referenced by the resulting index.
5252
public init?(_ sourcePosition: String.Index, within target: String) {
53-
guard target._isValidIndex(sourcePosition) else { return nil }
54-
self = sourcePosition._characterAligned
53+
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
54+
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
55+
// code that keeps using indices from a bridged string after converting the
56+
// string to a native representation. Such indices are invalid, but
57+
// returning nil here can break code that appeared to work fine for ASCII
58+
// strings in Swift releases prior to 5.7.
59+
guard
60+
let i = target._guts.ensureMatchingEncodingNoTrap(sourcePosition),
61+
target._isValidIndex(i)
62+
else { return nil }
63+
self = i._characterAligned
5564
}
5665

5766
/// Creates an index in the given string that corresponds exactly to the
@@ -101,8 +110,17 @@ extension String.Index {
101110
return
102111
}
103112
if let str = target as? Substring {
104-
guard str._isValidIndex(sourcePosition) else { return nil }
105-
self = sourcePosition
113+
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
114+
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
115+
// code that keeps using indices from a bridged string after converting
116+
// the string to a native representation. Such indices are invalid, but
117+
// returning nil here can break code that appeared to work fine for ASCII
118+
// strings in Swift releases prior to 5.7.
119+
guard
120+
let i = str._wholeGuts.ensureMatchingEncodingNoTrap(sourcePosition),
121+
str._isValidIndex(i)
122+
else { return nil }
123+
self = i
106124
return
107125
}
108126
self.init(sourcePosition, within: String(target))

stdlib/public/core/StringUTF16View.swift

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,8 +402,17 @@ extension String.UTF16View.Index {
402402
public init?(
403403
_ idx: String.Index, within target: String.UTF16View
404404
) {
405-
guard target._guts.hasMatchingEncoding(idx) else { return nil }
406-
guard idx._encodedOffset <= target._guts.count else { return nil }
405+
// As a special exception, we allow `idx` to be an UTF-16 index when `self`
406+
// is a UTF-8 string, to preserve compatibility with (broken) code that
407+
// keeps using indices from a bridged string after converting the string to
408+
// a native representation. Such indices are invalid, but returning nil here
409+
// can break code that appeared to work fine for ASCII strings in Swift
410+
// releases prior to 5.7.
411+
guard
412+
let idx = target._guts.ensureMatchingEncodingNoTrap(idx),
413+
idx._encodedOffset <= target._guts.count
414+
else { return nil }
415+
407416
if _slowPath(target._guts.isForeign) {
408417
guard idx._foreignIsWithin(target) else { return nil }
409418
} else {

stdlib/public/core/StringUTF8View.swift

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,8 +359,17 @@ extension String.UTF8View.Index {
359359
public init?(_ idx: String.Index, within target: String.UTF8View) {
360360
// Note: This method used to be inlinable until Swift 5.7.
361361

362-
guard target._guts.hasMatchingEncoding(idx) else { return nil }
363-
guard idx._encodedOffset <= target._guts.count else { return nil }
362+
// As a special exception, we allow `idx` to be an UTF-16 index when `self`
363+
// is a UTF-8 string, to preserve compatibility with (broken) code that
364+
// keeps using indices from a bridged string after converting the string to
365+
// a native representation. Such indices are invalid, but returning nil here
366+
// can break code that appeared to work fine for ASCII strings in Swift
367+
// releases prior to 5.7.
368+
guard
369+
let idx = target._guts.ensureMatchingEncodingNoTrap(idx),
370+
idx._encodedOffset <= target._guts.count
371+
else { return nil }
372+
364373
if _slowPath(target._guts.isForeign) {
365374
guard idx._foreignIsWithin(target) else { return nil }
366375
} else {

stdlib/public/core/StringUnicodeScalarView.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -429,13 +429,13 @@ extension String.UnicodeScalarIndex {
429429
within unicodeScalars: String.UnicodeScalarView
430430
) {
431431
guard
432-
unicodeScalars._guts.hasMatchingEncoding(sourcePosition),
433-
sourcePosition._encodedOffset <= unicodeScalars._guts.count,
434-
unicodeScalars._guts.isOnUnicodeScalarBoundary(sourcePosition)
432+
let i = unicodeScalars._guts.ensureMatchingEncodingNoTrap(sourcePosition),
433+
i._encodedOffset <= unicodeScalars._guts.count,
434+
unicodeScalars._guts.isOnUnicodeScalarBoundary(i)
435435
else {
436436
return nil
437437
}
438-
self = sourcePosition
438+
self = i
439439
}
440440

441441
/// Returns the position in the given string that corresponds exactly to this

0 commit comments

Comments
 (0)