@@ -301,19 +301,13 @@ extension _StringGuts {
301
301
// Encoding
302
302
extension _StringGuts {
303
303
/// Returns whether this string has a UTF-8 storage representation.
304
+ /// If this returns false, then the string is encoded in UTF-16.
304
305
///
305
306
/// This always returns a value corresponding to the string's actual encoding.
306
307
@_alwaysEmitIntoClient
307
308
@inline ( __always)
308
309
internal var isUTF8 : Bool { _object. isUTF8 }
309
310
310
- /// Returns whether this string has a UTF-16 storage representation.
311
- ///
312
- /// This always returns a value corresponding to the string's actual encoding.
313
- @_alwaysEmitIntoClient
314
- @inline ( __always)
315
- internal var isUTF16 : Bool { _object. isUTF16 }
316
-
317
311
@_alwaysEmitIntoClient // Swift 5.7
318
312
@inline ( __always)
319
313
internal func markEncoding( _ i: String . Index ) -> String . Index {
@@ -333,41 +327,75 @@ extension _StringGuts {
333
327
i. _hasMatchingEncoding ( isUTF8: isUTF8)
334
328
}
335
329
336
- /// Return an index whose encoding can be assumed to match that of `self`.
330
+ /// Return an index whose encoding can be assumed to match that of `self`,
331
+ /// trapping if `i` has an incompatible encoding.
332
+ ///
333
+ /// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then trap.
334
+ ///
335
+ /// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
336
+ /// `i`'s offset to UTF-8 and return the resulting index. This allows the use
337
+ /// of indices from a bridged Cocoa string after the string has been converted
338
+ /// to a native Swift string. (Such indices are technically still considered
339
+ /// invalid, but we allow this specific case to keep compatibility with
340
+ /// existing code that assumes otherwise.)
337
341
///
338
342
/// Detecting an encoding mismatch isn't always possible -- older binaries did
339
343
/// not set the flags that this method relies on. However, false positives
340
344
/// cannot happen: if this method detects a mismatch, then it is guaranteed to
341
345
/// be a real one.
342
346
@_alwaysEmitIntoClient
343
347
@inline ( __always)
344
- internal func ensureMatchingEncoding( _ i: String . Index ) -> String . Index {
348
+ internal func ensureMatchingEncoding( _ i: Index ) -> Index {
345
349
if _fastPath ( hasMatchingEncoding ( i) ) { return i }
350
+ if let i = _slowEnsureMatchingEncoding ( i) { return i }
351
+ // Note that this trap is not guaranteed to trigger when the process
352
+ // includes client binaries compiled with a previous Swift release.
353
+ // (`i._canBeUTF16` can sometimes return true in that case even if the index
354
+ // actually came from an UTF-8 string.) However, the trap will still often
355
+ // trigger in this case, as long as the index was initialized by code that
356
+ // was compiled with 5.7+.
357
+ //
358
+ // This trap will rarely if ever trigger on OSes that have stdlibs <= 5.6,
359
+ // because those versions never set the `isKnownUTF16` flag in
360
+ // `_StringObject`. (The flag may still be set within inlinable code,
361
+ // though.)
362
+ _preconditionFailure ( " Invalid string index " )
363
+ }
364
+
365
+ /// Return an index that corresponds to the same position as `i`, but whose
366
+ /// encoding can be assumed to match that of `self`, returning `nil` if `i`
367
+ /// has incompatible encoding.
368
+ ///
369
+ /// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then return nil.
370
+ ///
371
+ /// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
372
+ /// `i`'s offset to UTF-8 and return the resulting index. This allows the use
373
+ /// of indices from a bridged Cocoa string after the string has been converted
374
+ /// to a native Swift string. (Such indices are technically still considered
375
+ /// invalid, but we allow this specific case to keep compatibility with
376
+ /// existing code that assumes otherwise.)
377
+ ///
378
+ /// Detecting an encoding mismatch isn't always possible -- older binaries did
379
+ /// not set the flags that this method relies on. However, false positives
380
+ /// cannot happen: if this method detects a mismatch, then it is guaranteed to
381
+ /// be a real one.
382
+ internal func ensureMatchingEncodingNoTrap( _ i: Index ) -> Index ? {
383
+ if hasMatchingEncoding ( i) { return i }
346
384
return _slowEnsureMatchingEncoding ( i)
347
385
}
348
386
349
387
@_alwaysEmitIntoClient
350
388
@inline ( never)
351
389
@_effects ( releasenone)
352
- internal func _slowEnsureMatchingEncoding( _ i: String . Index ) -> String . Index {
390
+ internal func _slowEnsureMatchingEncoding( _ i: Index ) -> Index ? {
353
391
guard isUTF8 else {
354
392
// Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
355
- // get converted to UTF-16 storage, so it seems okay to trap in this case
356
- // -- the index most likely comes from an unrelated string. (Trapping here
357
- // may still turn out to affect binary compatibility with broken code in
393
+ // get converted to UTF-16 storage, so it seems okay to reject this case
394
+ // -- the index most likely comes from an unrelated string. (This may
395
+ // still turn out to affect binary compatibility with broken code in
358
396
// existing binaries running with new stdlibs. If so, we can replace this
359
397
// with the same transcoding hack as in the UTF-16->8 case below.)
360
- //
361
- // Note that this trap is not guaranteed to trigger when the process
362
- // includes client binaries compiled with a previous Swift release.
363
- // (`i._canBeUTF16` can sometimes return true in that case even if the
364
- // index actually came from an UTF-8 string.) However, the trap will still
365
- // often trigger in this case, as long as the index was initialized by
366
- // code that was compiled with 5.7+.
367
- //
368
- // This trap can never trigger on OSes that have stdlibs <= 5.6, because
369
- // those versions never set the `isKnownUTF16` flag in `_StringObject`.
370
- _preconditionFailure ( " Invalid string index " )
398
+ return nil
371
399
}
372
400
// Attempt to use an UTF-16 index on a UTF-8 string.
373
401
//
@@ -383,10 +411,15 @@ extension _StringGuts {
383
411
// FIXME: Consider emitting a runtime warning here.
384
412
// FIXME: Consider performing a linked-on-or-after check & trapping if the
385
413
// client executable was built on some particular future Swift release.
386
- let utf16 = String ( self ) . utf16
387
- let base = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
388
- if i. transcodedOffset == 0 { return base }
389
- return base. encoded ( offsetBy: i. transcodedOffset) . _knownUTF8
414
+ let utf16 = String . UTF16View ( self )
415
+ var r = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
416
+ if i. transcodedOffset != 0 {
417
+ r = r. encoded ( offsetBy: i. transcodedOffset)
418
+ } else {
419
+ // Preserve alignment bits if possible.
420
+ r = r. _copyingAlignment ( from: i)
421
+ }
422
+ return r. _knownUTF8
390
423
}
391
424
}
392
425
0 commit comments