Skip to content

Commit 50c2399

Browse files
committed
[stdlib] Work around binary compatibility issues with String index validation fixes in 5.7
Swift 5.7 added stronger index validation for `String`, so some illegal cases that previously triggered inconsistently diagnosed out of bounds accesses now result in reliable runtime errors. Similarly, attempts at applying an index originally vended by a UTF-8 string on a UTF-16 string now result in a reliable runtime error. As is usually the case, new traps to the stdlib exposes code that contains previously undiagnosed / unreliably diagnosed coding issues. Allow invalid code in binaries built with earlier versions of the stdlib to continue running with the 5.7 library by disabling some of the new traps based on the version of Swift the binary was built with. In the case of an index encoding mismatch, allow transcoding of string storage regardless of the direction of the mismatch. (Previously we only allowed transcoding a UTF-8 string to UTF-16.) rdar://93379333
1 parent 8fb604c commit 50c2399

16 files changed

+543
-229
lines changed

stdlib/public/SwiftShims/RuntimeShims.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,20 @@ SWIFT_RUNTIME_STDLIB_SPI
116116
__swift_bool _swift_stdlib_getCurrentStackBounds(__swift_uintptr_t *outBegin,
117117
__swift_uintptr_t *outEnd);
118118

119+
/// A value representing a version number for the Standard Library.
120+
typedef struct {
121+
__swift_uint32_t _value;
122+
} _SwiftStdlibVersion;
123+
124+
/// Checks if the currently running executable was built using a Swift release
125+
/// matching or exceeding the specified Standard Library version number. This
126+
/// can be used to stage behavioral changes in the Standard Library, preventing
127+
/// them from causing compatibility issues with existing binaries.
128+
SWIFT_RUNTIME_STDLIB_INTERNAL
129+
__swift_bool _swift_stdlib_isExecutableLinkedOnOrAfter(
130+
_SwiftStdlibVersion version
131+
) __attribute__((const));
132+
119133
#ifdef __cplusplus
120134
} // extern "C"
121135
#endif

stdlib/public/core/Assert.swift

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,40 @@ internal func _internalInvariant_5_1(
325325
#endif
326326
}
327327

328+
/// Library precondition checks with a linked-on-or-after check, allowing the
329+
/// addition of new preconditions while maintaining compatibility with older
330+
/// binaries.
331+
///
332+
/// This version of `_precondition` only traps if the condition returns false
333+
/// **and** the current executable was built with a Swift Standard Library
334+
/// version equal to or greater than the supplied version.
335+
@_transparent
336+
internal func _precondition(
337+
ifLinkedOnOrAfter version: _SwiftStdlibVersion,
338+
_ condition: @autoclosure () -> Bool,
339+
_ message: StaticString = StaticString(),
340+
file: StaticString = #file, line: UInt = #line
341+
) {
342+
// Delay the linked-on-or-after check until after we know we have a failed
343+
// condition, so that we don't slow down the usual case too much.
344+
345+
// Note: this is an internal function, so `_isDebugAssertConfiguration` is
346+
// expected to evaluate (at compile time) to true in production builds of the
347+
// stdlib. The other branches are kept in case the stdlib is built with an
348+
// unusual configuration.
349+
if _isDebugAssertConfiguration() {
350+
if _slowPath(!condition()) {
351+
guard _isExecutableLinkedOnOrAfter(version) else { return }
352+
_assertionFailure("Fatal error", message, file: file, line: line,
353+
flags: _fatalErrorFlags())
354+
}
355+
} else if _isReleaseAssertConfiguration() {
356+
let error = (!condition() && _isExecutableLinkedOnOrAfter(version))
357+
Builtin.condfail_message(error._value, message.unsafeRawPointer)
358+
}
359+
}
360+
361+
328362
@usableFromInline @_transparent
329363
internal func _internalInvariantFailure(
330364
_ message: StaticString = StaticString(),

stdlib/public/core/Availability.swift

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,48 @@ public func _stdlib_isOSVersionAtLeastOrVariantVersionAtLeast(
5959
return _stdlib_isOSVersionAtLeast(major, minor, patch)
6060
}
6161
#endif
62+
63+
public typealias _SwiftStdlibVersion = SwiftShims._SwiftStdlibVersion
64+
65+
/// Return true if the main executable was linked with an SDK version
66+
/// corresponding to the given Swift Stdlib release, or later. Otherwise, return
67+
/// false.
68+
///
69+
/// This is useful to maintain compatibility with older binaries after a
70+
/// behavioral change in the stdlib.
71+
///
72+
/// This function must not be called from inlinable code.
73+
@inline(__always)
74+
internal func _isExecutableLinkedOnOrAfter(
75+
_ stdlibVersion: _SwiftStdlibVersion
76+
) -> Bool {
77+
#if SWIFT_RUNTIME_OS_VERSIONING
78+
return _swift_stdlib_isExecutableLinkedOnOrAfter(stdlibVersion)
79+
#else
80+
return true
81+
#endif
82+
}
83+
84+
extension _SwiftStdlibVersion {
85+
@_alwaysEmitIntoClient
86+
public static var v5_6_0: Self { Self(_value: 0x050600) }
87+
88+
@_alwaysEmitIntoClient
89+
public static var v5_7_0: Self { Self(_value: 0x050700) }
90+
91+
@available(SwiftStdlib 5.7, *)
92+
public static var current: Self { .v5_7_0 }
93+
}
94+
95+
@available(SwiftStdlib 5.7, *)
96+
extension _SwiftStdlibVersion: CustomStringConvertible {
97+
@available(SwiftStdlib 5.7, *)
98+
public var description: String {
99+
let major = _value >> 16
100+
let minor = (_value >> 8) & 0xFF
101+
let patch = _value & 0xFF
102+
return "\(major).\(minor).\(patch)"
103+
}
104+
}
105+
106+

stdlib/public/core/StringCharacterView.swift

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,16 @@ extension String: BidirectionalCollection {
8383
/// `startIndex`.
8484
/// - Returns: The index value immediately before `i`.
8585
public func index(before i: Index) -> Index {
86-
let i = _guts.validateInclusiveCharacterIndex(i)
86+
// FIXME: This method used to not properly validate indices before 5.7;
87+
// temporarily allow older binaries to keep invoking undefined behavior as
88+
// before.
89+
let i = _guts.validateInclusiveCharacterIndex_5_7(i)
90+
8791
// Note: Aligning an index may move it closer towards the `startIndex`, so
8892
// the `i > startIndex` check needs to come after rounding.
89-
_precondition(i > startIndex, "String index is out of bounds")
93+
_precondition(
94+
ifLinkedOnOrAfter: .v5_7_0,
95+
i > startIndex, "String index is out of bounds")
9096

9197
return _uncheckedIndex(before: i)
9298
}
@@ -137,7 +143,10 @@ extension String: BidirectionalCollection {
137143

138144
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
139145

140-
var i = _guts.validateInclusiveCharacterIndex(i)
146+
// FIXME: This method used to not properly validate indices before 5.7;
147+
// temporarily allow older binaries to keep invoking undefined behavior as
148+
// before.
149+
var i = _guts.validateInclusiveCharacterIndex_5_7(i)
141150

142151
if distance >= 0 {
143152
for _ in stride(from: 0, to: distance, by: 1) {
@@ -209,10 +218,14 @@ extension String: BidirectionalCollection {
209218
// ensure our behavior exactly matches the documentation above. We do need
210219
// to ensure it has a matching encoding, though. The same goes for `start`,
211220
// which is used to determine whether the limit applies at all.
221+
212222
let limit = _guts.ensureMatchingEncoding(limit)
213223
let start = _guts.ensureMatchingEncoding(i)
214224

215-
var i = _guts.validateInclusiveCharacterIndex(i)
225+
// FIXME: This method used to not properly validate indices before 5.7;
226+
// temporarily allow older binaries to keep invoking undefined behavior as
227+
// before.
228+
var i = _guts.validateInclusiveCharacterIndex_5_7(i)
216229

217230
if distance >= 0 {
218231
for _ in stride(from: 0, to: distance, by: 1) {
@@ -245,8 +258,11 @@ extension String: BidirectionalCollection {
245258
// Note: Prior to Swift 5.7, this function used to be inlinable, forwarding
246259
// to `BidirectionalCollection._distance(from:to:)`.
247260

248-
let start = _guts.validateInclusiveCharacterIndex(start)
249-
let end = _guts.validateInclusiveCharacterIndex(end)
261+
// FIXME: This method used to not properly validate indices before 5.7;
262+
// temporarily allow older binaries to keep invoking undefined behavior as
263+
// before.
264+
let start = _guts.validateInclusiveCharacterIndex_5_7(start)
265+
let end = _guts.validateInclusiveCharacterIndex_5_7(end)
250266

251267
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
252268

stdlib/public/core/StringGuts.swift

Lines changed: 36 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -347,79 +347,55 @@ extension _StringGuts {
347347
@inline(__always)
348348
internal func ensureMatchingEncoding(_ i: Index) -> Index {
349349
if _fastPath(hasMatchingEncoding(i)) { return i }
350-
if let i = _slowEnsureMatchingEncoding(i) { return i }
351-
// Note that this trap is not guaranteed to trigger when the process
352-
// includes client binaries compiled with a previous Swift release.
353-
// (`i._canBeUTF16` can sometimes return true in that case even if the index
354-
// actually came from an UTF-8 string.) However, the trap will still often
355-
// trigger in this case, as long as the index was initialized by code that
356-
// was compiled with 5.7+.
357-
//
358-
// This trap will rarely if ever trigger on OSes that have stdlibs <= 5.6,
359-
// because those versions never set the `isKnownUTF16` flag in
360-
// `_StringObject`. (The flag may still be set within inlinable code,
361-
// though.)
362-
_preconditionFailure("Invalid string index")
363-
}
364-
365-
/// Return an index that corresponds to the same position as `i`, but whose
366-
/// encoding can be assumed to match that of `self`, returning `nil` if `i`
367-
/// has incompatible encoding.
368-
///
369-
/// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then return nil.
370-
///
371-
/// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
372-
/// `i`'s offset to UTF-8 and return the resulting index. This allows the use
373-
/// of indices from a bridged Cocoa string after the string has been converted
374-
/// to a native Swift string. (Such indices are technically still considered
375-
/// invalid, but we allow this specific case to keep compatibility with
376-
/// existing code that assumes otherwise.)
377-
///
378-
/// Detecting an encoding mismatch isn't always possible -- older binaries did
379-
/// not set the flags that this method relies on. However, false positives
380-
/// cannot happen: if this method detects a mismatch, then it is guaranteed to
381-
/// be a real one.
382-
internal func ensureMatchingEncodingNoTrap(_ i: Index) -> Index? {
383-
if hasMatchingEncoding(i) { return i }
384350
return _slowEnsureMatchingEncoding(i)
385351
}
386352

387353
@_alwaysEmitIntoClient
388354
@inline(never)
389355
@_effects(releasenone)
390-
internal func _slowEnsureMatchingEncoding(_ i: Index) -> Index? {
391-
guard isUTF8 else {
392-
// Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
393-
// get converted to UTF-16 storage, so it seems okay to reject this case
394-
// -- the index most likely comes from an unrelated string. (This may
395-
// still turn out to affect binary compatibility with broken code in
396-
// existing binaries running with new stdlibs. If so, we can replace this
397-
// with the same transcoding hack as in the UTF-16->8 case below.)
398-
return nil
356+
internal func _slowEnsureMatchingEncoding(_ i: Index) -> Index {
357+
// Attempt to recover from mismatched encodings between a string and its
358+
// index.
359+
360+
if isUTF8 {
361+
// Attempt to use an UTF-16 index on a UTF-8 string.
362+
//
363+
// This can happen if `self` was originally verbatim-bridged, and someone
364+
// mistakenly attempts to keep using an old index after a mutation. This
365+
// is technically an error, but trapping here would trigger a lot of
366+
// broken code that previously happened to work "fine" on e.g. ASCII
367+
// strings. Instead, attempt to convert the offset to UTF-8 code units by
368+
// transcoding the string. This can be slow, but it often results in a
369+
// usable index, even if non-ASCII characters are present. (UTF-16
370+
// breadcrumbs help reduce the severity of the slowdown.)
371+
372+
// FIXME: Consider emitting a runtime warning here.
373+
// FIXME: Consider performing a linked-on-or-after check & trapping if the
374+
// client executable was built on some particular future Swift release.
375+
let utf16 = String.UTF16View(self)
376+
var r = utf16.index(utf16.startIndex, offsetBy: i._encodedOffset)
377+
if i.transcodedOffset != 0 {
378+
r = r.encoded(offsetBy: i.transcodedOffset)
379+
} else {
380+
// Preserve alignment bits if possible.
381+
r = r._copyingAlignment(from: i)
382+
}
383+
return r._knownUTF8
399384
}
400-
// Attempt to use an UTF-16 index on a UTF-8 string.
401-
//
402-
// This can happen if `self` was originally verbatim-bridged, and someone
403-
// mistakenly attempts to keep using an old index after a mutation. This is
404-
// technically an error, but trapping here would trigger a lot of broken
405-
// code that previously happened to work "fine" on e.g. ASCII strings.
406-
// Instead, attempt to convert the offset to UTF-8 code units by transcoding
407-
// the string. This can be slow, but it often results in a usable index,
408-
// even if non-ASCII characters are present. (UTF-16 breadcrumbs help reduce
409-
// the severity of the slowdown.)
410-
411-
// FIXME: Consider emitting a runtime warning here.
412-
// FIXME: Consider performing a linked-on-or-after check & trapping if the
413-
// client executable was built on some particular future Swift release.
414-
let utf16 = String.UTF16View(self)
415-
var r = utf16.index(utf16.startIndex, offsetBy: i._encodedOffset)
385+
386+
// Attempt to use an UTF-8 index on a UTF-16 string. This is rarer, but it
387+
// can still happen when e.g. people apply an index they got from
388+
// `AttributedString` on the original (bridged) string that they constructed
389+
// it from.
390+
let utf8 = String.UTF8View(self)
391+
var r = utf8.index(utf8.startIndex, offsetBy: i._encodedOffset)
416392
if i.transcodedOffset != 0 {
417393
r = r.encoded(offsetBy: i.transcodedOffset)
418394
} else {
419395
// Preserve alignment bits if possible.
420396
r = r._copyingAlignment(from: i)
421397
}
422-
return r._knownUTF8
398+
return r._knownUTF16
423399
}
424400
}
425401

stdlib/public/core/StringIndexConversions.swift

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,13 @@ extension String.Index {
5151
/// - target: The string referenced by the resulting index.
5252
public init?(_ sourcePosition: String.Index, within target: String) {
5353
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
54-
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
55-
// code that keeps using indices from a bridged string after converting the
56-
// string to a native representation. Such indices are invalid, but
57-
// returning nil here can break code that appeared to work fine for ASCII
58-
// strings in Swift releases prior to 5.7.
59-
guard
60-
let i = target._guts.ensureMatchingEncodingNoTrap(sourcePosition),
61-
target._isValidIndex(i)
62-
else { return nil }
54+
// when `self` is a UTF-8 string (or vice versa), to preserve compatibility
55+
// with (broken) code that keeps using indices from a bridged string after
56+
// converting the string to a native representation. Such indices are
57+
// invalid, but returning nil here can break code that appeared to work fine
58+
// for ASCII strings in Swift releases prior to 5.7.
59+
let i = target._guts.ensureMatchingEncoding(sourcePosition)
60+
guard target._isValidIndex(i) else { return nil }
6361
self = i._characterAligned
6462
}
6563

@@ -111,15 +109,13 @@ extension String.Index {
111109
}
112110
if let str = target as? Substring {
113111
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
114-
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
115-
// code that keeps using indices from a bridged string after converting
116-
// the string to a native representation. Such indices are invalid, but
117-
// returning nil here can break code that appeared to work fine for ASCII
118-
// strings in Swift releases prior to 5.7.
119-
guard
120-
let i = str._wholeGuts.ensureMatchingEncodingNoTrap(sourcePosition),
121-
str._isValidIndex(i)
122-
else { return nil }
112+
// when `self` is a UTF-8 string (or vice versa), to preserve
113+
// compatibility with (broken) code that keeps using indices from a
114+
// bridged string after converting the string to a native representation.
115+
// Such indices are invalid, but returning nil here can break code that
116+
// appeared to work fine for ASCII strings in Swift releases prior to 5.7.
117+
let i = str._wholeGuts.ensureMatchingEncoding(sourcePosition)
118+
guard str._isValidIndex(i) else { return nil }
123119
self = i
124120
return
125121
}

0 commit comments

Comments
 (0)