@@ -302,19 +302,13 @@ extension _StringGuts {
302
302
// Encoding
303
303
extension _StringGuts {
304
304
/// Returns whether this string has a UTF-8 storage representation.
305
+ /// If this returns false, then the string is encoded in UTF-16.
305
306
///
306
307
/// This always returns a value corresponding to the string's actual encoding.
307
308
@_alwaysEmitIntoClient
308
309
@inline ( __always)
309
310
internal var isUTF8 : Bool { _object. isUTF8 }
310
311
311
- /// Returns whether this string has a UTF-16 storage representation.
312
- ///
313
- /// This always returns a value corresponding to the string's actual encoding.
314
- @_alwaysEmitIntoClient
315
- @inline ( __always)
316
- internal var isUTF16 : Bool { _object. isUTF16 }
317
-
318
312
@_alwaysEmitIntoClient // Swift 5.7
319
313
@inline ( __always)
320
314
internal func markEncoding( _ i: String . Index ) -> String . Index {
@@ -334,41 +328,75 @@ extension _StringGuts {
334
328
i. _hasMatchingEncoding ( isUTF8: isUTF8)
335
329
}
336
330
337
- /// Return an index whose encoding can be assumed to match that of `self`.
331
+ /// Return an index whose encoding can be assumed to match that of `self`,
332
+ /// trapping if `i` has an incompatible encoding.
333
+ ///
334
+ /// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then trap.
335
+ ///
336
+ /// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
337
+ /// `i`'s offset to UTF-8 and return the resulting index. This allows the use
338
+ /// of indices from a bridged Cocoa string after the string has been converted
339
+ /// to a native Swift string. (Such indices are technically still considered
340
+ /// invalid, but we allow this specific case to keep compatibility with
341
+ /// existing code that assumes otherwise.)
338
342
///
339
343
/// Detecting an encoding mismatch isn't always possible -- older binaries did
340
344
/// not set the flags that this method relies on. However, false positives
341
345
/// cannot happen: if this method detects a mismatch, then it is guaranteed to
342
346
/// be a real one.
343
347
@_alwaysEmitIntoClient
344
348
@inline ( __always)
345
- internal func ensureMatchingEncoding( _ i: String . Index ) -> String . Index {
349
+ internal func ensureMatchingEncoding( _ i: Index ) -> Index {
346
350
if _fastPath ( hasMatchingEncoding ( i) ) { return i }
351
+ if let i = _slowEnsureMatchingEncoding ( i) { return i }
352
+ // Note that this trap is not guaranteed to trigger when the process
353
+ // includes client binaries compiled with a previous Swift release.
354
+ // (`i._canBeUTF16` can sometimes return true in that case even if the index
355
+ // actually came from an UTF-8 string.) However, the trap will still often
356
+ // trigger in this case, as long as the index was initialized by code that
357
+ // was compiled with 5.7+.
358
+ //
359
+ // This trap will rarely if ever trigger on OSes that have stdlibs <= 5.6,
360
+ // because those versions never set the `isKnownUTF16` flag in
361
+ // `_StringObject`. (The flag may still be set within inlinable code,
362
+ // though.)
363
+ _preconditionFailure ( " Invalid string index " )
364
+ }
365
+
366
+ /// Return an index that corresponds to the same position as `i`, but whose
367
+ /// encoding can be assumed to match that of `self`, returning `nil` if `i`
368
+ /// has incompatible encoding.
369
+ ///
370
+ /// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then return nil.
371
+ ///
372
+ /// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
373
+ /// `i`'s offset to UTF-8 and return the resulting index. This allows the use
374
+ /// of indices from a bridged Cocoa string after the string has been converted
375
+ /// to a native Swift string. (Such indices are technically still considered
376
+ /// invalid, but we allow this specific case to keep compatibility with
377
+ /// existing code that assumes otherwise.)
378
+ ///
379
+ /// Detecting an encoding mismatch isn't always possible -- older binaries did
380
+ /// not set the flags that this method relies on. However, false positives
381
+ /// cannot happen: if this method detects a mismatch, then it is guaranteed to
382
+ /// be a real one.
383
+ internal func ensureMatchingEncodingNoTrap( _ i: Index ) -> Index ? {
384
+ if hasMatchingEncoding ( i) { return i }
347
385
return _slowEnsureMatchingEncoding ( i)
348
386
}
349
387
350
388
@_alwaysEmitIntoClient
351
389
@inline ( never)
352
390
@_effects ( releasenone)
353
- internal func _slowEnsureMatchingEncoding( _ i: String . Index ) -> String . Index {
391
+ internal func _slowEnsureMatchingEncoding( _ i: Index ) -> Index ? {
354
392
guard isUTF8 else {
355
393
// Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
356
- // get converted to UTF-16 storage, so it seems okay to trap in this case
357
- // -- the index most likely comes from an unrelated string. (Trapping here
358
- // may still turn out to affect binary compatibility with broken code in
394
+ // get converted to UTF-16 storage, so it seems okay to reject this case
395
+ // -- the index most likely comes from an unrelated string. (This may
396
+ // still turn out to affect binary compatibility with broken code in
359
397
// existing binaries running with new stdlibs. If so, we can replace this
360
398
// with the same transcoding hack as in the UTF-16->8 case below.)
361
- //
362
- // Note that this trap is not guaranteed to trigger when the process
363
- // includes client binaries compiled with a previous Swift release.
364
- // (`i._canBeUTF16` can sometimes return true in that case even if the
365
- // index actually came from an UTF-8 string.) However, the trap will still
366
- // often trigger in this case, as long as the index was initialized by
367
- // code that was compiled with 5.7+.
368
- //
369
- // This trap can never trigger on OSes that have stdlibs <= 5.6, because
370
- // those versions never set the `isKnownUTF16` flag in `_StringObject`.
371
- _preconditionFailure ( " Invalid string index " )
399
+ return nil
372
400
}
373
401
// Attempt to use an UTF-16 index on a UTF-8 string.
374
402
//
@@ -384,10 +412,15 @@ extension _StringGuts {
384
412
// FIXME: Consider emitting a runtime warning here.
385
413
// FIXME: Consider performing a linked-on-or-after check & trapping if the
386
414
// client executable was built on some particular future Swift release.
387
- let utf16 = String ( self ) . utf16
388
- let base = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
389
- if i. transcodedOffset == 0 { return base }
390
- return base. encoded ( offsetBy: i. transcodedOffset) . _knownUTF8
415
+ let utf16 = String . UTF16View ( self )
416
+ var r = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
417
+ if i. transcodedOffset != 0 {
418
+ r = r. encoded ( offsetBy: i. transcodedOffset)
419
+ } else {
420
+ // Preserve alignment bits if possible.
421
+ r = r. _copyingAlignment ( from: i)
422
+ }
423
+ return r. _knownUTF8
391
424
}
392
425
}
393
426
0 commit comments