@@ -347,79 +347,55 @@ extension _StringGuts {
347
347
@inline ( __always)
348
348
internal func ensureMatchingEncoding( _ i: Index ) -> Index {
349
349
if _fastPath ( hasMatchingEncoding ( i) ) { return i }
350
- if let i = _slowEnsureMatchingEncoding ( i) { return i }
351
- // Note that this trap is not guaranteed to trigger when the process
352
- // includes client binaries compiled with a previous Swift release.
353
- // (`i._canBeUTF16` can sometimes return true in that case even if the index
354
- // actually came from an UTF-8 string.) However, the trap will still often
355
- // trigger in this case, as long as the index was initialized by code that
356
- // was compiled with 5.7+.
357
- //
358
- // This trap will rarely if ever trigger on OSes that have stdlibs <= 5.6,
359
- // because those versions never set the `isKnownUTF16` flag in
360
- // `_StringObject`. (The flag may still be set within inlinable code,
361
- // though.)
362
- _preconditionFailure ( " Invalid string index " )
363
- }
364
-
365
- /// Return an index that corresponds to the same position as `i`, but whose
366
- /// encoding can be assumed to match that of `self`, returning `nil` if `i`
367
- /// has incompatible encoding.
368
- ///
369
- /// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then return nil.
370
- ///
371
- /// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
372
- /// `i`'s offset to UTF-8 and return the resulting index. This allows the use
373
- /// of indices from a bridged Cocoa string after the string has been converted
374
- /// to a native Swift string. (Such indices are technically still considered
375
- /// invalid, but we allow this specific case to keep compatibility with
376
- /// existing code that assumes otherwise.)
377
- ///
378
- /// Detecting an encoding mismatch isn't always possible -- older binaries did
379
- /// not set the flags that this method relies on. However, false positives
380
- /// cannot happen: if this method detects a mismatch, then it is guaranteed to
381
- /// be a real one.
382
- internal func ensureMatchingEncodingNoTrap( _ i: Index ) -> Index ? {
383
- if hasMatchingEncoding ( i) { return i }
384
350
return _slowEnsureMatchingEncoding ( i)
385
351
}
386
352
387
353
@_alwaysEmitIntoClient
388
354
@inline ( never)
389
355
@_effects ( releasenone)
390
- internal func _slowEnsureMatchingEncoding( _ i: Index ) -> Index ? {
391
- guard isUTF8 else {
392
- // Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
393
- // get converted to UTF-16 storage, so it seems okay to reject this case
394
- // -- the index most likely comes from an unrelated string. (This may
395
- // still turn out to affect binary compatibility with broken code in
396
- // existing binaries running with new stdlibs. If so, we can replace this
397
- // with the same transcoding hack as in the UTF-16->8 case below.)
398
- return nil
356
+ internal func _slowEnsureMatchingEncoding( _ i: Index ) -> Index {
357
+ // Attempt to recover from mismatched encodings between a string and its
358
+ // index.
359
+
360
+ if isUTF8 {
361
+ // Attempt to use an UTF-16 index on a UTF-8 string.
362
+ //
363
+ // This can happen if `self` was originally verbatim-bridged, and someone
364
+ // mistakenly attempts to keep using an old index after a mutation. This
365
+ // is technically an error, but trapping here would trigger a lot of
366
+ // broken code that previously happened to work "fine" on e.g. ASCII
367
+ // strings. Instead, attempt to convert the offset to UTF-8 code units by
368
+ // transcoding the string. This can be slow, but it often results in a
369
+ // usable index, even if non-ASCII characters are present. (UTF-16
370
+ // breadcrumbs help reduce the severity of the slowdown.)
371
+
372
+ // FIXME: Consider emitting a runtime warning here.
373
+ // FIXME: Consider performing a linked-on-or-after check & trapping if the
374
+ // client executable was built on some particular future Swift release.
375
+ let utf16 = String . UTF16View ( self )
376
+ var r = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
377
+ if i. transcodedOffset != 0 {
378
+ r = r. encoded ( offsetBy: i. transcodedOffset)
379
+ } else {
380
+ // Preserve alignment bits if possible.
381
+ r = r. _copyingAlignment ( from: i)
382
+ }
383
+ return r. _knownUTF8
399
384
}
400
- // Attempt to use an UTF-16 index on a UTF-8 string.
401
- //
402
- // This can happen if `self` was originally verbatim-bridged, and someone
403
- // mistakenly attempts to keep using an old index after a mutation. This is
404
- // technically an error, but trapping here would trigger a lot of broken
405
- // code that previously happened to work "fine" on e.g. ASCII strings.
406
- // Instead, attempt to convert the offset to UTF-8 code units by transcoding
407
- // the string. This can be slow, but it often results in a usable index,
408
- // even if non-ASCII characters are present. (UTF-16 breadcrumbs help reduce
409
- // the severity of the slowdown.)
410
-
411
- // FIXME: Consider emitting a runtime warning here.
412
- // FIXME: Consider performing a linked-on-or-after check & trapping if the
413
- // client executable was built on some particular future Swift release.
414
- let utf16 = String . UTF16View ( self )
415
- var r = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
385
+
386
+ // Attempt to use an UTF-8 index on a UTF-16 string. This is rarer, but it
387
+ // can still happen when e.g. people apply an index they got from
388
+ // `AttributedString` on the original (bridged) string that they constructed
389
+ // it from.
390
+ let utf8 = String . UTF8View ( self )
391
+ var r = utf8. index ( utf8. startIndex, offsetBy: i. _encodedOffset)
416
392
if i. transcodedOffset != 0 {
417
393
r = r. encoded ( offsetBy: i. transcodedOffset)
418
394
} else {
419
395
// Preserve alignment bits if possible.
420
396
r = r. _copyingAlignment ( from: i)
421
397
}
422
- return r. _knownUTF8
398
+ return r. _knownUTF16
423
399
}
424
400
}
425
401
0 commit comments