Skip to content

[5.7][stdlib] Work around binary compatibility issues with String index validation fixes in 5.7 #58993

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions stdlib/public/SwiftShims/RuntimeShims.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,20 @@ SWIFT_RUNTIME_STDLIB_SPI
__swift_bool _swift_stdlib_getCurrentStackBounds(__swift_uintptr_t *outBegin,
__swift_uintptr_t *outEnd);

/// A value representing a version number for the Standard Library.
typedef struct {
__swift_uint32_t _value;
} _SwiftStdlibVersion;

/// Checks if the currently running executable was built using a Swift release
/// matching or exceeding the specified Standard Library version number. This
/// can be used to stage behavioral changes in the Standard Library, preventing
/// them from causing compatibility issues with existing binaries.
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_bool _swift_stdlib_isExecutableLinkedOnOrAfter(
_SwiftStdlibVersion version
) __attribute__((const));

#ifdef __cplusplus
} // extern "C"
#endif
Expand Down
34 changes: 34 additions & 0 deletions stdlib/public/core/Assert.swift
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,40 @@ internal func _internalInvariant_5_1(
#endif
}

/// Library precondition checks with a linked-on-or-after check, allowing the
/// addition of new preconditions while maintaining compatibility with older
/// binaries.
///
/// This version of `_precondition` only traps if the condition returns false
/// **and** the current executable was built with a Swift Standard Library
/// version equal to or greater than the supplied version.
@_transparent
internal func _precondition(
ifLinkedOnOrAfter version: _SwiftStdlibVersion,
_ condition: @autoclosure () -> Bool,
_ message: StaticString = StaticString(),
file: StaticString = #file, line: UInt = #line
) {
// Delay the linked-on-or-after check until after we know we have a failed
// condition, so that we don't slow down the usual case too much.

// Note: this is an internal function, so `_isDebugAssertConfiguration` is
// expected to evaluate (at compile time) to true in production builds of the
// stdlib. The other branches are kept in case the stdlib is built with an
// unusual configuration.
if _isDebugAssertConfiguration() {
if _slowPath(!condition()) {
guard _isExecutableLinkedOnOrAfter(version) else { return }
_assertionFailure("Fatal error", message, file: file, line: line,
flags: _fatalErrorFlags())
}
} else if _isReleaseAssertConfiguration() {
let error = (!condition() && _isExecutableLinkedOnOrAfter(version))
Builtin.condfail_message(error._value, message.unsafeRawPointer)
}
}


@usableFromInline @_transparent
internal func _internalInvariantFailure(
_ message: StaticString = StaticString(),
Expand Down
45 changes: 45 additions & 0 deletions stdlib/public/core/Availability.swift
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,48 @@ public func _stdlib_isOSVersionAtLeastOrVariantVersionAtLeast(
return _stdlib_isOSVersionAtLeast(major, minor, patch)
}
#endif

public typealias _SwiftStdlibVersion = SwiftShims._SwiftStdlibVersion

/// Return true if the main executable was linked with an SDK version
/// corresponding to the given Swift Stdlib release, or later. Otherwise, return
/// false.
///
/// This is useful to maintain compatibility with older binaries after a
/// behavioral change in the stdlib.
///
/// This function must not be called from inlinable code.
@inline(__always)
internal func _isExecutableLinkedOnOrAfter(
_ stdlibVersion: _SwiftStdlibVersion
) -> Bool {
#if SWIFT_RUNTIME_OS_VERSIONING
return _swift_stdlib_isExecutableLinkedOnOrAfter(stdlibVersion)
#else
return true
#endif
}

extension _SwiftStdlibVersion {
@_alwaysEmitIntoClient
public static var v5_6_0: Self { Self(_value: 0x050600) }

@_alwaysEmitIntoClient
public static var v5_7_0: Self { Self(_value: 0x050700) }

@available(SwiftStdlib 5.7, *)
public static var current: Self { .v5_7_0 }
}

@available(SwiftStdlib 5.7, *)
extension _SwiftStdlibVersion: CustomStringConvertible {
@available(SwiftStdlib 5.7, *)
public var description: String {
let major = _value >> 16
let minor = (_value >> 8) & 0xFF
let patch = _value & 0xFF
return "\(major).\(minor).\(patch)"
}
}


28 changes: 22 additions & 6 deletions stdlib/public/core/StringCharacterView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,16 @@ extension String: BidirectionalCollection {
/// `startIndex`.
/// - Returns: The index value immediately before `i`.
public func index(before i: Index) -> Index {
let i = _guts.validateInclusiveCharacterIndex(i)
// FIXME: This method used to not properly validate indices before 5.7;
// temporarily allow older binaries to keep invoking undefined behavior as
// before.
let i = _guts.validateInclusiveCharacterIndex_5_7(i)

// Note: Aligning an index may move it closer towards the `startIndex`, so
// the `i > startIndex` check needs to come after rounding.
_precondition(i > startIndex, "String index is out of bounds")
_precondition(
ifLinkedOnOrAfter: .v5_7_0,
i > startIndex, "String index is out of bounds")

return _uncheckedIndex(before: i)
}
Expand Down Expand Up @@ -137,7 +143,10 @@ extension String: BidirectionalCollection {

// TODO: known-ASCII and single-scalar-grapheme fast path, etc.

var i = _guts.validateInclusiveCharacterIndex(i)
// FIXME: This method used to not properly validate indices before 5.7;
// temporarily allow older binaries to keep invoking undefined behavior as
// before.
var i = _guts.validateInclusiveCharacterIndex_5_7(i)

if distance >= 0 {
for _ in stride(from: 0, to: distance, by: 1) {
Expand Down Expand Up @@ -209,10 +218,14 @@ extension String: BidirectionalCollection {
// ensure our behavior exactly matches the documentation above. We do need
// to ensure it has a matching encoding, though. The same goes for `start`,
// which is used to determine whether the limit applies at all.

let limit = _guts.ensureMatchingEncoding(limit)
let start = _guts.ensureMatchingEncoding(i)

var i = _guts.validateInclusiveCharacterIndex(i)
// FIXME: This method used to not properly validate indices before 5.7;
// temporarily allow older binaries to keep invoking undefined behavior as
// before.
var i = _guts.validateInclusiveCharacterIndex_5_7(i)

if distance >= 0 {
for _ in stride(from: 0, to: distance, by: 1) {
Expand Down Expand Up @@ -245,8 +258,11 @@ extension String: BidirectionalCollection {
// Note: Prior to Swift 5.7, this function used to be inlinable, forwarding
// to `BidirectionalCollection._distance(from:to:)`.

let start = _guts.validateInclusiveCharacterIndex(start)
let end = _guts.validateInclusiveCharacterIndex(end)
// FIXME: This method used to not properly validate indices before 5.7;
// temporarily allow older binaries to keep invoking undefined behavior as
// before.
let start = _guts.validateInclusiveCharacterIndex_5_7(start)
let end = _guts.validateInclusiveCharacterIndex_5_7(end)

// TODO: known-ASCII and single-scalar-grapheme fast path, etc.

Expand Down
96 changes: 36 additions & 60 deletions stdlib/public/core/StringGuts.swift
Original file line number Diff line number Diff line change
Expand Up @@ -347,79 +347,55 @@ extension _StringGuts {
@inline(__always)
internal func ensureMatchingEncoding(_ i: Index) -> Index {
if _fastPath(hasMatchingEncoding(i)) { return i }
if let i = _slowEnsureMatchingEncoding(i) { return i }
// Note that this trap is not guaranteed to trigger when the process
// includes client binaries compiled with a previous Swift release.
// (`i._canBeUTF16` can sometimes return true in that case even if the index
// actually came from an UTF-8 string.) However, the trap will still often
// trigger in this case, as long as the index was initialized by code that
// was compiled with 5.7+.
//
// This trap will rarely if ever trigger on OSes that have stdlibs <= 5.6,
// because those versions never set the `isKnownUTF16` flag in
// `_StringObject`. (The flag may still be set within inlinable code,
// though.)
_preconditionFailure("Invalid string index")
}

/// Return an index that corresponds to the same position as `i`, but whose
/// encoding can be assumed to match that of `self`, returning `nil` if `i`
/// has incompatible encoding.
///
/// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then return nil.
///
/// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
/// `i`'s offset to UTF-8 and return the resulting index. This allows the use
/// of indices from a bridged Cocoa string after the string has been converted
/// to a native Swift string. (Such indices are technically still considered
/// invalid, but we allow this specific case to keep compatibility with
/// existing code that assumes otherwise.)
///
/// Detecting an encoding mismatch isn't always possible -- older binaries did
/// not set the flags that this method relies on. However, false positives
/// cannot happen: if this method detects a mismatch, then it is guaranteed to
/// be a real one.
internal func ensureMatchingEncodingNoTrap(_ i: Index) -> Index? {
if hasMatchingEncoding(i) { return i }
return _slowEnsureMatchingEncoding(i)
}

@_alwaysEmitIntoClient
@inline(never)
@_effects(releasenone)
internal func _slowEnsureMatchingEncoding(_ i: Index) -> Index? {
guard isUTF8 else {
// Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
// get converted to UTF-16 storage, so it seems okay to reject this case
// -- the index most likely comes from an unrelated string. (This may
// still turn out to affect binary compatibility with broken code in
// existing binaries running with new stdlibs. If so, we can replace this
// with the same transcoding hack as in the UTF-16->8 case below.)
return nil
internal func _slowEnsureMatchingEncoding(_ i: Index) -> Index {
// Attempt to recover from mismatched encodings between a string and its
// index.

if isUTF8 {
// Attempt to use an UTF-16 index on a UTF-8 string.
//
// This can happen if `self` was originally verbatim-bridged, and someone
// mistakenly attempts to keep using an old index after a mutation. This
// is technically an error, but trapping here would trigger a lot of
// broken code that previously happened to work "fine" on e.g. ASCII
// strings. Instead, attempt to convert the offset to UTF-8 code units by
// transcoding the string. This can be slow, but it often results in a
// usable index, even if non-ASCII characters are present. (UTF-16
// breadcrumbs help reduce the severity of the slowdown.)

// FIXME: Consider emitting a runtime warning here.
// FIXME: Consider performing a linked-on-or-after check & trapping if the
// client executable was built on some particular future Swift release.
let utf16 = String.UTF16View(self)
var r = utf16.index(utf16.startIndex, offsetBy: i._encodedOffset)
if i.transcodedOffset != 0 {
r = r.encoded(offsetBy: i.transcodedOffset)
} else {
// Preserve alignment bits if possible.
r = r._copyingAlignment(from: i)
}
return r._knownUTF8
}
// Attempt to use an UTF-16 index on a UTF-8 string.
//
// This can happen if `self` was originally verbatim-bridged, and someone
// mistakenly attempts to keep using an old index after a mutation. This is
// technically an error, but trapping here would trigger a lot of broken
// code that previously happened to work "fine" on e.g. ASCII strings.
// Instead, attempt to convert the offset to UTF-8 code units by transcoding
// the string. This can be slow, but it often results in a usable index,
// even if non-ASCII characters are present. (UTF-16 breadcrumbs help reduce
// the severity of the slowdown.)

// FIXME: Consider emitting a runtime warning here.
// FIXME: Consider performing a linked-on-or-after check & trapping if the
// client executable was built on some particular future Swift release.
let utf16 = String.UTF16View(self)
var r = utf16.index(utf16.startIndex, offsetBy: i._encodedOffset)

// Attempt to use an UTF-8 index on a UTF-16 string. This is rarer, but it
// can still happen when e.g. people apply an index they got from
// `AttributedString` on the original (bridged) string that they constructed
// it from.
let utf8 = String.UTF8View(self)
var r = utf8.index(utf8.startIndex, offsetBy: i._encodedOffset)
if i.transcodedOffset != 0 {
r = r.encoded(offsetBy: i.transcodedOffset)
} else {
// Preserve alignment bits if possible.
r = r._copyingAlignment(from: i)
}
return r._knownUTF8
return r._knownUTF16
}
}

Expand Down
32 changes: 14 additions & 18 deletions stdlib/public/core/StringIndexConversions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,13 @@ extension String.Index {
/// - target: The string referenced by the resulting index.
public init?(_ sourcePosition: String.Index, within target: String) {
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
// code that keeps using indices from a bridged string after converting the
// string to a native representation. Such indices are invalid, but
// returning nil here can break code that appeared to work fine for ASCII
// strings in Swift releases prior to 5.7.
guard
let i = target._guts.ensureMatchingEncodingNoTrap(sourcePosition),
target._isValidIndex(i)
else { return nil }
// when `self` is a UTF-8 string (or vice versa), to preserve compatibility
// with (broken) code that keeps using indices from a bridged string after
// converting the string to a native representation. Such indices are
// invalid, but returning nil here can break code that appeared to work fine
// for ASCII strings in Swift releases prior to 5.7.
let i = target._guts.ensureMatchingEncoding(sourcePosition)
guard target._isValidIndex(i) else { return nil }
self = i._characterAligned
}

Expand Down Expand Up @@ -111,15 +109,13 @@ extension String.Index {
}
if let str = target as? Substring {
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
// code that keeps using indices from a bridged string after converting
// the string to a native representation. Such indices are invalid, but
// returning nil here can break code that appeared to work fine for ASCII
// strings in Swift releases prior to 5.7.
guard
let i = str._wholeGuts.ensureMatchingEncodingNoTrap(sourcePosition),
str._isValidIndex(i)
else { return nil }
// when `self` is a UTF-8 string (or vice versa), to preserve
// compatibility with (broken) code that keeps using indices from a
// bridged string after converting the string to a native representation.
// Such indices are invalid, but returning nil here can break code that
// appeared to work fine for ASCII strings in Swift releases prior to 5.7.
let i = str._wholeGuts.ensureMatchingEncoding(sourcePosition)
guard str._isValidIndex(i) else { return nil }
self = i
return
}
Expand Down
Loading