Skip to content

Commit 1b240d5

Browse files
committed
wip: more aligning and testing
1 parent dc23e91 commit 1b240d5

File tree

4 files changed

+375
-328
lines changed

4 files changed

+375
-328
lines changed

stdlib/public/core/StringGraphemeBreaking.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,10 +219,10 @@ extension _StringGuts {
219219
return len
220220
}
221221

222-
print("Calculating foreign grapheme stride")
222+
// print("Calculating foreign grapheme stride")
223223

224224
if let utf16Ptr = _stdlib_binary_CFStringGetCharactersPtr(cocoa) {
225-
print("has pointer")
225+
// print("has pointer")
226226
let utf16 = UnsafeBufferPointer(start: utf16Ptr, count: count)
227227
return _measureCharacterStrideICU(of: utf16, startingAt: i)
228228
}

stdlib/public/core/StringUTF8View.swift

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -398,49 +398,61 @@ extension String.UTF8View {
398398

399399
// Foreign string support
400400
extension String.UTF8View {
401+
// Align a foreign UTF-16 index to a valid UTF-8 position. If there is a
402+
// transcoded offset already, this is already a valid UTF-8 position
403+
// (referring to a continuation byte) and returns `idx`. Otherwise, this will
404+
// scalar-align the index. This is needed because we may be passed a
405+
// non-scalar-aligned foreign index from the UTF16View.
406+
@inline(__always)
407+
internal func _utf8AlignForeignIndex(_ idx: String.Index) -> String.Index {
408+
_internalInvariant(_guts.isForeign)
409+
guard idx.transcodedOffset == 0 else { return idx }
410+
return _guts.scalarAlign(idx)
411+
}
412+
401413
@usableFromInline @inline(never)
402414
@_effects(releasenone)
403-
internal func _foreignIndex(after i: Index) -> Index {
415+
internal func _foreignIndex(after idx: Index) -> Index {
404416
_internalInvariant(_guts.isForeign)
405417

406-
// FIXME: We should need some kind of alignment if given an index into a
407-
// surrogate pair
418+
let idx = _utf8AlignForeignIndex(idx)
408419

409420
let (scalar, scalarLen) = _guts.foreignErrorCorrectedScalar(
410-
startingAt: i.strippingTranscoding)
421+
startingAt: idx.strippingTranscoding)
411422
let utf8Len = UTF8.width(scalar)
412423

413424
if utf8Len == 1 {
414-
_internalInvariant(i.transcodedOffset == 0)
415-
return i.nextEncoded
425+
_internalInvariant(idx.transcodedOffset == 0)
426+
return idx.nextEncoded
416427
}
417428

418429
// Check if we're still transcoding sub-scalar
419-
if i.transcodedOffset < utf8Len - 1 {
420-
return i.nextTranscoded
430+
if idx.transcodedOffset < utf8Len - 1 {
431+
return idx.nextTranscoded
421432
}
422433

423434
// Skip to the next scalar
424-
return i.encoded(offsetBy: scalarLen)
435+
return idx.encoded(offsetBy: scalarLen)
425436
}
426437

427438
@usableFromInline @inline(never)
428439
@_effects(releasenone)
429-
internal func _foreignIndex(before i: Index) -> Index {
440+
internal func _foreignIndex(before idx: Index) -> Index {
430441
_internalInvariant(_guts.isForeign)
431442

432-
// FIXME: We should need some kind of alignment if given an index into a
433-
// surrogate pair
443+
let idx = _utf8AlignForeignIndex(idx)
434444

435-
if i.transcodedOffset != 0 {
436-
_internalInvariant((1...3) ~= i.transcodedOffset)
437-
return i.priorTranscoded
445+
if idx.transcodedOffset != 0 {
446+
_internalInvariant((1...3) ~= idx.transcodedOffset)
447+
return idx.priorTranscoded
438448
}
439449

440450
let (scalar, scalarLen) = _guts.foreignErrorCorrectedScalar(
441-
endingAt: i)
451+
endingAt: idx)
442452
let utf8Len = UTF8.width(scalar)
443-
return i.encoded(offsetBy: -scalarLen).transcoded(withOffset: utf8Len &- 1)
453+
return idx.encoded(
454+
offsetBy: -scalarLen
455+
).transcoded(withOffset: utf8Len &- 1)
444456
}
445457

446458
@usableFromInline @inline(never)

stdlib/public/core/UnicodeHelpers.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,9 @@ extension _StringGuts {
189189
return String.Index(_encodedOffset: idx._encodedOffset).aligned
190190
}
191191
if _slowPath(self.isForeign) {
192-
return foreignScalarAlign(idx)
192+
let foreignIdx = foreignScalarAlign(idx)
193+
_internalInvariant(foreignIdx.isAligned)
194+
return foreignIdx
193195
}
194196

195197
return String.Index(_encodedOffset:
@@ -360,7 +362,7 @@ extension _StringGuts {
360362
@usableFromInline @inline(never) // slow-path
361363
@_effects(releasenone)
362364
internal func foreignScalarAlign(_ idx: Index) -> Index {
363-
guard idx._encodedOffset != self.count else { return idx }
365+
guard idx._encodedOffset != self.count else { return idx.aligned }
364366

365367
_internalInvariant(idx._encodedOffset < self.count)
366368

0 commit comments

Comments
 (0)