Skip to content

Commit 07ffdde

Browse files
authored
Merge pull request #58954 from lorentey/string-bincompat-workarounds
[stdlib] Work around binary compatibility issues with String index validation fixes in 5.7
2 parents 42655e1 + 50c2399 commit 07ffdde

16 files changed

+543
-229
lines changed

stdlib/public/SwiftShims/RuntimeShims.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,20 @@ SWIFT_RUNTIME_STDLIB_SPI
116116
__swift_bool _swift_stdlib_getCurrentStackBounds(__swift_uintptr_t *outBegin,
117117
__swift_uintptr_t *outEnd);
118118

119+
/// A value representing a version number for the Standard Library.
120+
typedef struct {
121+
__swift_uint32_t _value;
122+
} _SwiftStdlibVersion;
123+
124+
/// Checks if the currently running executable was built using a Swift release
125+
/// matching or exceeding the specified Standard Library version number. This
126+
/// can be used to stage behavioral changes in the Standard Library, preventing
127+
/// them from causing compatibility issues with existing binaries.
128+
SWIFT_RUNTIME_STDLIB_INTERNAL
129+
__swift_bool _swift_stdlib_isExecutableLinkedOnOrAfter(
130+
_SwiftStdlibVersion version
131+
) __attribute__((const));
132+
119133
#ifdef __cplusplus
120134
} // extern "C"
121135
#endif

stdlib/public/core/Assert.swift

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,40 @@ internal func _internalInvariant_5_1(
325325
#endif
326326
}
327327

328+
/// Library precondition checks with a linked-on-or-after check, allowing the
329+
/// addition of new preconditions while maintaining compatibility with older
330+
/// binaries.
331+
///
332+
/// This version of `_precondition` only traps if the condition returns false
333+
/// **and** the current executable was built with a Swift Standard Library
334+
/// version equal to or greater than the supplied version.
335+
@_transparent
336+
internal func _precondition(
337+
ifLinkedOnOrAfter version: _SwiftStdlibVersion,
338+
_ condition: @autoclosure () -> Bool,
339+
_ message: StaticString = StaticString(),
340+
file: StaticString = #file, line: UInt = #line
341+
) {
342+
// Delay the linked-on-or-after check until after we know we have a failed
343+
// condition, so that we don't slow down the usual case too much.
344+
345+
// Note: this is an internal function, so `_isDebugAssertConfiguration` is
346+
// expected to evaluate (at compile time) to true in production builds of the
347+
// stdlib. The other branches are kept in case the stdlib is built with an
348+
// unusual configuration.
349+
if _isDebugAssertConfiguration() {
350+
if _slowPath(!condition()) {
351+
guard _isExecutableLinkedOnOrAfter(version) else { return }
352+
_assertionFailure("Fatal error", message, file: file, line: line,
353+
flags: _fatalErrorFlags())
354+
}
355+
} else if _isReleaseAssertConfiguration() {
356+
let error = (!condition() && _isExecutableLinkedOnOrAfter(version))
357+
Builtin.condfail_message(error._value, message.unsafeRawPointer)
358+
}
359+
}
360+
361+
328362
@usableFromInline @_transparent
329363
internal func _internalInvariantFailure(
330364
_ message: StaticString = StaticString(),

stdlib/public/core/Availability.swift

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,48 @@ public func _stdlib_isOSVersionAtLeastOrVariantVersionAtLeast(
5959
return _stdlib_isOSVersionAtLeast(major, minor, patch)
6060
}
6161
#endif
62+
63+
public typealias _SwiftStdlibVersion = SwiftShims._SwiftStdlibVersion
64+
65+
/// Return true if the main executable was linked with an SDK version
66+
/// corresponding to the given Swift Stdlib release, or later. Otherwise, return
67+
/// false.
68+
///
69+
/// This is useful to maintain compatibility with older binaries after a
70+
/// behavioral change in the stdlib.
71+
///
72+
/// This function must not be called from inlinable code.
73+
@inline(__always)
74+
internal func _isExecutableLinkedOnOrAfter(
75+
_ stdlibVersion: _SwiftStdlibVersion
76+
) -> Bool {
77+
#if SWIFT_RUNTIME_OS_VERSIONING
78+
return _swift_stdlib_isExecutableLinkedOnOrAfter(stdlibVersion)
79+
#else
80+
return true
81+
#endif
82+
}
83+
84+
extension _SwiftStdlibVersion {
85+
@_alwaysEmitIntoClient
86+
public static var v5_6_0: Self { Self(_value: 0x050600) }
87+
88+
@_alwaysEmitIntoClient
89+
public static var v5_7_0: Self { Self(_value: 0x050700) }
90+
91+
@available(SwiftStdlib 5.7, *)
92+
public static var current: Self { .v5_7_0 }
93+
}
94+
95+
@available(SwiftStdlib 5.7, *)
96+
extension _SwiftStdlibVersion: CustomStringConvertible {
97+
@available(SwiftStdlib 5.7, *)
98+
public var description: String {
99+
let major = _value >> 16
100+
let minor = (_value >> 8) & 0xFF
101+
let patch = _value & 0xFF
102+
return "\(major).\(minor).\(patch)"
103+
}
104+
}
105+
106+

stdlib/public/core/StringCharacterView.swift

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,16 @@ extension String: BidirectionalCollection {
8383
/// `startIndex`.
8484
/// - Returns: The index value immediately before `i`.
8585
public func index(before i: Index) -> Index {
86-
let i = _guts.validateInclusiveCharacterIndex(i)
86+
// FIXME: This method used to not properly validate indices before 5.7;
87+
// temporarily allow older binaries to keep invoking undefined behavior as
88+
// before.
89+
let i = _guts.validateInclusiveCharacterIndex_5_7(i)
90+
8791
// Note: Aligning an index may move it closer towards the `startIndex`, so
8892
// the `i > startIndex` check needs to come after rounding.
89-
_precondition(i > startIndex, "String index is out of bounds")
93+
_precondition(
94+
ifLinkedOnOrAfter: .v5_7_0,
95+
i > startIndex, "String index is out of bounds")
9096

9197
return _uncheckedIndex(before: i)
9298
}
@@ -137,7 +143,10 @@ extension String: BidirectionalCollection {
137143

138144
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
139145

140-
var i = _guts.validateInclusiveCharacterIndex(i)
146+
// FIXME: This method used to not properly validate indices before 5.7;
147+
// temporarily allow older binaries to keep invoking undefined behavior as
148+
// before.
149+
var i = _guts.validateInclusiveCharacterIndex_5_7(i)
141150

142151
if distance >= 0 {
143152
for _ in stride(from: 0, to: distance, by: 1) {
@@ -209,10 +218,14 @@ extension String: BidirectionalCollection {
209218
// ensure our behavior exactly matches the documentation above. We do need
210219
// to ensure it has a matching encoding, though. The same goes for `start`,
211220
// which is used to determine whether the limit applies at all.
221+
212222
let limit = _guts.ensureMatchingEncoding(limit)
213223
let start = _guts.ensureMatchingEncoding(i)
214224

215-
var i = _guts.validateInclusiveCharacterIndex(i)
225+
// FIXME: This method used to not properly validate indices before 5.7;
226+
// temporarily allow older binaries to keep invoking undefined behavior as
227+
// before.
228+
var i = _guts.validateInclusiveCharacterIndex_5_7(i)
216229

217230
if distance >= 0 {
218231
for _ in stride(from: 0, to: distance, by: 1) {
@@ -245,8 +258,11 @@ extension String: BidirectionalCollection {
245258
// Note: Prior to Swift 5.7, this function used to be inlinable, forwarding
246259
// to `BidirectionalCollection._distance(from:to:)`.
247260

248-
let start = _guts.validateInclusiveCharacterIndex(start)
249-
let end = _guts.validateInclusiveCharacterIndex(end)
261+
// FIXME: This method used to not properly validate indices before 5.7;
262+
// temporarily allow older binaries to keep invoking undefined behavior as
263+
// before.
264+
let start = _guts.validateInclusiveCharacterIndex_5_7(start)
265+
let end = _guts.validateInclusiveCharacterIndex_5_7(end)
250266

251267
// TODO: known-ASCII and single-scalar-grapheme fast path, etc.
252268

stdlib/public/core/StringGuts.swift

Lines changed: 36 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -347,79 +347,55 @@ extension _StringGuts {
347347
@inline(__always)
348348
internal func ensureMatchingEncoding(_ i: Index) -> Index {
349349
if _fastPath(hasMatchingEncoding(i)) { return i }
350-
if let i = _slowEnsureMatchingEncoding(i) { return i }
351-
// Note that this trap is not guaranteed to trigger when the process
352-
// includes client binaries compiled with a previous Swift release.
353-
// (`i._canBeUTF16` can sometimes return true in that case even if the index
354-
// actually came from an UTF-8 string.) However, the trap will still often
355-
// trigger in this case, as long as the index was initialized by code that
356-
// was compiled with 5.7+.
357-
//
358-
// This trap will rarely if ever trigger on OSes that have stdlibs <= 5.6,
359-
// because those versions never set the `isKnownUTF16` flag in
360-
// `_StringObject`. (The flag may still be set within inlinable code,
361-
// though.)
362-
_preconditionFailure("Invalid string index")
363-
}
364-
365-
/// Return an index that corresponds to the same position as `i`, but whose
366-
/// encoding can be assumed to match that of `self`, returning `nil` if `i`
367-
/// has incompatible encoding.
368-
///
369-
/// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then return nil.
370-
///
371-
/// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
372-
/// `i`'s offset to UTF-8 and return the resulting index. This allows the use
373-
/// of indices from a bridged Cocoa string after the string has been converted
374-
/// to a native Swift string. (Such indices are technically still considered
375-
/// invalid, but we allow this specific case to keep compatibility with
376-
/// existing code that assumes otherwise.)
377-
///
378-
/// Detecting an encoding mismatch isn't always possible -- older binaries did
379-
/// not set the flags that this method relies on. However, false positives
380-
/// cannot happen: if this method detects a mismatch, then it is guaranteed to
381-
/// be a real one.
382-
internal func ensureMatchingEncodingNoTrap(_ i: Index) -> Index? {
383-
if hasMatchingEncoding(i) { return i }
384350
return _slowEnsureMatchingEncoding(i)
385351
}
386352

387353
@_alwaysEmitIntoClient
388354
@inline(never)
389355
@_effects(releasenone)
390-
internal func _slowEnsureMatchingEncoding(_ i: Index) -> Index? {
391-
guard isUTF8 else {
392-
// Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
393-
// get converted to UTF-16 storage, so it seems okay to reject this case
394-
// -- the index most likely comes from an unrelated string. (This may
395-
// still turn out to affect binary compatibility with broken code in
396-
// existing binaries running with new stdlibs. If so, we can replace this
397-
// with the same transcoding hack as in the UTF-16->8 case below.)
398-
return nil
356+
internal func _slowEnsureMatchingEncoding(_ i: Index) -> Index {
357+
// Attempt to recover from mismatched encodings between a string and its
358+
// index.
359+
360+
if isUTF8 {
361+
// Attempt to use an UTF-16 index on a UTF-8 string.
362+
//
363+
// This can happen if `self` was originally verbatim-bridged, and someone
364+
// mistakenly attempts to keep using an old index after a mutation. This
365+
// is technically an error, but trapping here would trigger a lot of
366+
// broken code that previously happened to work "fine" on e.g. ASCII
367+
// strings. Instead, attempt to convert the offset to UTF-8 code units by
368+
// transcoding the string. This can be slow, but it often results in a
369+
// usable index, even if non-ASCII characters are present. (UTF-16
370+
// breadcrumbs help reduce the severity of the slowdown.)
371+
372+
// FIXME: Consider emitting a runtime warning here.
373+
// FIXME: Consider performing a linked-on-or-after check & trapping if the
374+
// client executable was built on some particular future Swift release.
375+
let utf16 = String.UTF16View(self)
376+
var r = utf16.index(utf16.startIndex, offsetBy: i._encodedOffset)
377+
if i.transcodedOffset != 0 {
378+
r = r.encoded(offsetBy: i.transcodedOffset)
379+
} else {
380+
// Preserve alignment bits if possible.
381+
r = r._copyingAlignment(from: i)
382+
}
383+
return r._knownUTF8
399384
}
400-
// Attempt to use an UTF-16 index on a UTF-8 string.
401-
//
402-
// This can happen if `self` was originally verbatim-bridged, and someone
403-
// mistakenly attempts to keep using an old index after a mutation. This is
404-
// technically an error, but trapping here would trigger a lot of broken
405-
// code that previously happened to work "fine" on e.g. ASCII strings.
406-
// Instead, attempt to convert the offset to UTF-8 code units by transcoding
407-
// the string. This can be slow, but it often results in a usable index,
408-
// even if non-ASCII characters are present. (UTF-16 breadcrumbs help reduce
409-
// the severity of the slowdown.)
410-
411-
// FIXME: Consider emitting a runtime warning here.
412-
// FIXME: Consider performing a linked-on-or-after check & trapping if the
413-
// client executable was built on some particular future Swift release.
414-
let utf16 = String.UTF16View(self)
415-
var r = utf16.index(utf16.startIndex, offsetBy: i._encodedOffset)
385+
386+
// Attempt to use an UTF-8 index on a UTF-16 string. This is rarer, but it
387+
// can still happen when e.g. people apply an index they got from
388+
// `AttributedString` on the original (bridged) string that they constructed
389+
// it from.
390+
let utf8 = String.UTF8View(self)
391+
var r = utf8.index(utf8.startIndex, offsetBy: i._encodedOffset)
416392
if i.transcodedOffset != 0 {
417393
r = r.encoded(offsetBy: i.transcodedOffset)
418394
} else {
419395
// Preserve alignment bits if possible.
420396
r = r._copyingAlignment(from: i)
421397
}
422-
return r._knownUTF8
398+
return r._knownUTF16
423399
}
424400
}
425401

stdlib/public/core/StringIndexConversions.swift

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,13 @@ extension String.Index {
5151
/// - target: The string referenced by the resulting index.
5252
public init?(_ sourcePosition: String.Index, within target: String) {
5353
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
54-
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
55-
// code that keeps using indices from a bridged string after converting the
56-
// string to a native representation. Such indices are invalid, but
57-
// returning nil here can break code that appeared to work fine for ASCII
58-
// strings in Swift releases prior to 5.7.
59-
guard
60-
let i = target._guts.ensureMatchingEncodingNoTrap(sourcePosition),
61-
target._isValidIndex(i)
62-
else { return nil }
54+
// when `self` is a UTF-8 string (or vice versa), to preserve compatibility
55+
// with (broken) code that keeps using indices from a bridged string after
56+
// converting the string to a native representation. Such indices are
57+
// invalid, but returning nil here can break code that appeared to work fine
58+
// for ASCII strings in Swift releases prior to 5.7.
59+
let i = target._guts.ensureMatchingEncoding(sourcePosition)
60+
guard target._isValidIndex(i) else { return nil }
6361
self = i._characterAligned
6462
}
6563

@@ -111,15 +109,13 @@ extension String.Index {
111109
}
112110
if let str = target as? Substring {
113111
// As a special exception, we allow `sourcePosition` to be an UTF-16 index
114-
// when `self` is a UTF-8 string, to preserve compatibility with (broken)
115-
// code that keeps using indices from a bridged string after converting
116-
// the string to a native representation. Such indices are invalid, but
117-
// returning nil here can break code that appeared to work fine for ASCII
118-
// strings in Swift releases prior to 5.7.
119-
guard
120-
let i = str._wholeGuts.ensureMatchingEncodingNoTrap(sourcePosition),
121-
str._isValidIndex(i)
122-
else { return nil }
112+
// when `self` is a UTF-8 string (or vice versa), to preserve
113+
// compatibility with (broken) code that keeps using indices from a
114+
// bridged string after converting the string to a native representation.
115+
// Such indices are invalid, but returning nil here can break code that
116+
// appeared to work fine for ASCII strings in Swift releases prior to 5.7.
117+
let i = str._wholeGuts.ensureMatchingEncoding(sourcePosition)
118+
guard str._isValidIndex(i) else { return nil }
123119
self = i
124120
return
125121
}

0 commit comments

Comments
 (0)