Skip to content

Commit 4d557b0

Browse files
committed
[stdlib] Make String.Index(_:within:) initializers more permissive
In Swift 5.6 and below, (broken) code that acquired indices from a UTF-16-encoded string bridged from Cocoa and kept using them after a `makeContiguousUTF8` call (or other mutation) may have appeared to be working correctly as long as the string was ASCII. Since swiftlang#41417, the `String(_:within:)` initializers recognize miscoded indices and reject them by returning nil. This is technically correct, but it unfortunately may be a binary compatibility issue, as these used to return non-nil in previous versions. Mitigate this issue by accepting UTF-16 indices on a UTF-8 string, transcoding their offset as needed. (Attempting to use an UTF-8 index on a UTF-16 string is still rejected — we do not implicitly convert strings in that direction.) rdar://89369680
1 parent e21b846 commit 4d557b0

File tree

6 files changed

+101
-20
lines changed

6 files changed

+101
-20
lines changed

stdlib/public/core/StringGuts.swift

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -333,23 +333,56 @@ extension _StringGuts {
333333
i._hasMatchingEncoding(isUTF8: isUTF8)
334334
}
335335

336-
/// Return an index whose encoding can be assumed to match that of `self`.
336+
/// Return an index whose encoding can be assumed to match that of `self`,
337+
/// trapping if `i` has an incompatible encoding.
338+
///
339+
/// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then trap.
340+
///
341+
/// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
342+
/// `i`'s offset to UTF-8 and return the resulting index. This allows the use
343+
/// of indices from a bridged Cocoa string after the string has been converted
344+
/// to a native Swift string. (Such indices are technically still considered
345+
/// invalid, but we allow this specific case to keep compatibility with
346+
/// existing code that assumes otherwise.)
337347
///
338348
/// Detecting an encoding mismatch isn't always possible -- older binaries did
339349
/// not set the flags that this method relies on. However, false positives
340350
/// cannot happen: if this method detects a mismatch, then it is guaranteed to
341351
/// be a real one.
342352
@_alwaysEmitIntoClient
343353
@inline(__always)
344-
internal func ensureMatchingEncoding(_ i: String.Index) -> String.Index {
354+
internal func ensureMatchingEncoding(_ i: Index) -> Index {
345355
if _fastPath(hasMatchingEncoding(i)) { return i }
356+
if let i = _slowEnsureMatchingEncoding(i) { return i }
357+
_preconditionFailure("Invalid string index")
358+
}
359+
360+
/// Return an index that corresponds to the same position as `i`, but whose
361+
/// encoding can be assumed to match that of `self`, returning `nil` if `i`
362+
/// has incompatible encoding.
363+
///
364+
/// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then return nil.
365+
///
366+
/// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
367+
/// `i`'s offset to UTF-8 and return the resulting index. This allows the use
368+
/// of indices from a bridged Cocoa string after the string has been converted
369+
/// to a native Swift string. (Such indices are technically still considered
370+
/// invalid, but we allow this specific case to keep compatibility with
371+
/// existing code that assumes otherwise.)
372+
///
373+
/// Detecting an encoding mismatch isn't always possible -- older binaries did
374+
/// not set the flags that this method relies on. However, false positives
375+
/// cannot happen: if this method detects a mismatch, then it is guaranteed to
376+
/// be a real one.
377+
internal func ensureMatchingEncodingNoTrap(_ i: Index) -> Index? {
378+
if hasMatchingEncoding(i) { return i }
346379
return _slowEnsureMatchingEncoding(i)
347380
}
348381

349382
@_alwaysEmitIntoClient
350383
@inline(never)
351384
@_effects(releasenone)
352-
internal func _slowEnsureMatchingEncoding(_ i: String.Index) -> String.Index {
385+
internal func _slowEnsureMatchingEncoding(_ i: Index) -> Index? {
353386
guard isUTF8 else {
354387
// Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
355388
// get converted to UTF-16 storage, so it seems okay to trap in this case
@@ -367,7 +400,7 @@ extension _StringGuts {
367400
//
368401
// This trap can never trigger on OSes that have stdlibs <= 5.6, because
369402
// those versions never set the `isKnownUTF16` flag in `_StringObject`.
370-
_preconditionFailure("Invalid string index")
403+
return nil
371404
}
372405
// Attempt to use an UTF-16 index on a UTF-8 string.
373406
//
@@ -383,10 +416,14 @@ extension _StringGuts {
383416
// FIXME: Consider emitting a runtime warning here.
384417
// FIXME: Consider performing a linked-on-or-after check & trapping if the
385418
// client executable was built on some particular future Swift release.
386-
let utf16 = String(self).utf16
387-
let base = utf16.index(utf16.startIndex, offsetBy: i._encodedOffset)
388-
if i.transcodedOffset == 0 { return base }
389-
return base.encoded(offsetBy: i.transcodedOffset)._knownUTF8
419+
let utf16 = String.UTF16View(self)
420+
var r = utf16.index(utf16.startIndex, offsetBy: i._encodedOffset)
421+
if i.transcodedOffset != 0 {
422+
r = r.encoded(offsetBy: i.transcodedOffset)
423+
} else {
424+
r = r._copyingAlignment(from: i)
425+
}
426+
return r._knownUTF8
390427
}
391428
}
392429

stdlib/public/core/StringIndex.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,14 @@ extension String.Index {
365365
}
366366
}
367367

368+
extension String.Index {
369+
@_alwaysEmitIntoClient // Swift 5.7
370+
internal func _copyingAlignment(from index: Self) -> Self {
371+
let mask = Self.__scalarAlignmentBit | Self.__characterAlignmentBit
372+
return Self((_rawBits & ~mask) | (index._rawBits & mask))
373+
}
374+
}
375+
368376
// ### Index Encoding
369377
//
370378
// Swift 5.7 introduced bookkeeping to keep track of the Unicode encoding

stdlib/public/core/StringIndexConversions.swift

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,17 @@ extension String.Index {
5050
/// of `target`.
5151
/// - target: The string referenced by the resulting index.
5252
public init?(_ sourcePosition: String.Index, within target: String) {
53-
guard target._isValidIndex(sourcePosition) else { return nil }
54-
self = sourcePosition._characterAligned
53+
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
54+
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
55+
// code that keeps using indices from a bridged string after converting the
56+
// string to a native representation. Such indices are invalid, but
57+
// returning nil here can break code that appeared to work fine for ASCII
58+
// strings in Swift releases prior to 5.7.
59+
guard
60+
let i = target._guts.ensureMatchingEncodingNoTrap(sourcePosition),
61+
target._isValidIndex(i)
62+
else { return nil }
63+
self = i._characterAligned
5564
}
5665

5766
/// Creates an index in the given string that corresponds exactly to the
@@ -101,8 +110,17 @@ extension String.Index {
101110
return
102111
}
103112
if let str = target as? Substring {
104-
guard str._isValidIndex(sourcePosition) else { return nil }
105-
self = sourcePosition
113+
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
114+
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
115+
// code that keeps using indices from a bridged string after converting
116+
// the string to a native representation. Such indices are invalid, but
117+
// returning nil here can break code that appeared to work fine for ASCII
118+
// strings in Swift releases prior to 5.7.
119+
guard
120+
let i = str._wholeGuts.ensureMatchingEncodingNoTrap(sourcePosition),
121+
str._isValidIndex(i)
122+
else { return nil }
123+
self = i
106124
return
107125
}
108126
self.init(sourcePosition, within: String(target))

stdlib/public/core/StringUTF16View.swift

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,8 +402,17 @@ extension String.UTF16View.Index {
402402
public init?(
403403
_ idx: String.Index, within target: String.UTF16View
404404
) {
405-
guard target._guts.hasMatchingEncoding(idx) else { return nil }
406-
guard idx._encodedOffset <= target._guts.count else { return nil }
405+
// As a special exception, we allow `idx` to be an UTF-16 index when `self`
406+
// is a UTF-8 string, to preserve compatibility with (broken) code that
407+
// keeps using indices from a bridged string after converting the string to
408+
// a native representation. Such indices are invalid, but returning nil here
409+
// can break code that appeared to work fine for ASCII strings in Swift
410+
// releases prior to 5.7.
411+
guard
412+
let idx = target._guts.ensureMatchingEncodingNoTrap(idx),
413+
idx._encodedOffset <= target._guts.count
414+
else { return nil }
415+
407416
if _slowPath(target._guts.isForeign) {
408417
guard idx._foreignIsWithin(target) else { return nil }
409418
} else {

stdlib/public/core/StringUTF8View.swift

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,8 +359,17 @@ extension String.UTF8View.Index {
359359
public init?(_ idx: String.Index, within target: String.UTF8View) {
360360
// Note: This method used to be inlinable until Swift 5.7.
361361

362-
guard target._guts.hasMatchingEncoding(idx) else { return nil }
363-
guard idx._encodedOffset <= target._guts.count else { return nil }
362+
// As a special exception, we allow `idx` to be an UTF-16 index when `self`
363+
// is a UTF-8 string, to preserve compatibility with (broken) code that
364+
// keeps using indices from a bridged string after converting the string to
365+
// a native representation. Such indices are invalid, but returning nil here
366+
// can break code that appeared to work fine for ASCII strings in Swift
367+
// releases prior to 5.7.
368+
guard
369+
let idx = target._guts.ensureMatchingEncodingNoTrap(idx),
370+
idx._encodedOffset <= target._guts.count
371+
else { return nil }
372+
364373
if _slowPath(target._guts.isForeign) {
365374
guard idx._foreignIsWithin(target) else { return nil }
366375
} else {

stdlib/public/core/StringUnicodeScalarView.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -429,13 +429,13 @@ extension String.UnicodeScalarIndex {
429429
within unicodeScalars: String.UnicodeScalarView
430430
) {
431431
guard
432-
unicodeScalars._guts.hasMatchingEncoding(sourcePosition),
433-
sourcePosition._encodedOffset <= unicodeScalars._guts.count,
434-
unicodeScalars._guts.isOnUnicodeScalarBoundary(sourcePosition)
432+
let i = unicodeScalars._guts.ensureMatchingEncodingNoTrap(sourcePosition),
433+
i._encodedOffset <= unicodeScalars._guts.count,
434+
unicodeScalars._guts.isOnUnicodeScalarBoundary(i)
435435
else {
436436
return nil
437437
}
438-
self = sourcePosition
438+
self = i
439439
}
440440

441441
/// Returns the position in the given string that corresponds exactly to this

0 commit comments

Comments
 (0)