Skip to content

Commit f4eaf2b

Browse files
committed
Further untangling
1 parent 45dffdf commit f4eaf2b

File tree

1 file changed

+53
-36
lines changed

1 file changed

+53
-36
lines changed

stdlib/public/core/StringGraphemeBreaking.swift

Lines changed: 53 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -640,59 +640,76 @@ extension _StringGuts {
640640
nextScalar: (Int) -> (scalar: Unicode.Scalar, end: Int)?
641641
) -> Int {
642642
_internalInvariant(index < endIndex._encodedOffset)
643+
return _nextBoundary(startingAt: index, nextScalar: nextScalar)
644+
}
645+
}
643646

644-
// Note: If `index` in't already on a boundary, then starting with an empty
645-
// state here sometimes leads to this method returning results that diverge
646-
// from the true breaks in the string.
647-
var state = _GraphemeBreakingState()
648-
var (scalar, index) = nextScalar(index)!
647+
fileprivate func _nextBoundary(
648+
startingAt index: Int,
649+
nextScalar: (Int) -> (scalar: Unicode.Scalar, end: Int)?
650+
) -> Int {
649651

650-
while true {
651-
guard let (scalar2, nextIndex) = nextScalar(index) else { break }
652-
if state.shouldBreak(between: scalar, and: scalar2) {
653-
break
654-
}
655-
index = nextIndex
656-
scalar = scalar2
657-
}
652+
// Note: If `index` in't already on a boundary, then starting with an empty
653+
// state here sometimes leads to this method returning results that diverge
654+
// from the true breaks in the string.
655+
var state = _GraphemeBreakingState()
656+
var (scalar, index) = nextScalar(index)!
658657

659-
return index
658+
while true {
659+
guard let (scalar2, nextIndex) = nextScalar(index) else { break }
660+
if state.shouldBreak(between: scalar, and: scalar2) {
661+
break
662+
}
663+
index = nextIndex
664+
scalar = scalar2
660665
}
661666

667+
return index
668+
}
669+
670+
extension _StringGuts {
662671
// Returns the stride of the grapheme cluster ending at offset `index`.
663672
//
664673
// This method uses `previousScalar` to looks back in the string as far as
665674
// necessary to find a correct grapheme cluster boundary, whether or not
666675
// `index` happens to be on a boundary itself.
667-
internal func previousBoundary(
676+
fileprivate func previousBoundary(
668677
endingAt index: Int,
669678
previousScalar: (Int) -> (scalar: Unicode.Scalar, start: Int)?
670679
) -> Int {
671-
// FIXME: This requires potentially arbitrary lookback in each iteration,
672-
// leading to quadratic behavior in some edge cases. Ideally lookback should
673-
// only be done once per cluster (or in the case of RI sequences, once per
674-
// flag sequence). One way to avoid most quadratic behavior is to replace
675-
// this implementation with a scheme that first searches backwards for a
676-
// safe point then iterates forward using the regular `shouldBreak` until we
677-
// reach `index`, as recommended in section 6.4 of TR#29.
678-
//
679-
// https://www.unicode.org/reports/tr29/#Random_Access
680+
_previousBoundary(endingAt: index, previousScalar: previousScalar)
681+
}
680682

681-
var (scalar2, index) = previousScalar(index)!
683+
}
682684

683-
while true {
684-
guard let (scalar1, previousIndex) = previousScalar(index) else { break }
685-
if _shouldBreakWithLookback(
686-
between: scalar1, and: scalar2, at: index, with: previousScalar
687-
) {
688-
break
689-
}
690-
index = previousIndex
691-
scalar2 = scalar1
692-
}
685+
fileprivate func _previousBoundary(
686+
endingAt index: Int,
687+
previousScalar: (Int) -> (scalar: Unicode.Scalar, start: Int)?
688+
) -> Int {
689+
// FIXME: This requires potentially arbitrary lookback in each iteration,
690+
// leading to quadratic behavior in some edge cases. Ideally lookback should
691+
// only be done once per cluster (or in the case of RI sequences, once per
692+
// flag sequence). One way to avoid most quadratic behavior is to replace
693+
// this implementation with a scheme that first searches backwards for a
694+
// safe point then iterates forward using the regular `shouldBreak` until we
695+
// reach `index`, as recommended in section 6.4 of TR#29.
696+
//
697+
// https://www.unicode.org/reports/tr29/#Random_Access
693698

694-
return index
699+
var (scalar2, index) = previousScalar(index)!
700+
701+
while true {
702+
guard let (scalar1, previousIndex) = previousScalar(index) else { break }
703+
if _shouldBreakWithLookback(
704+
between: scalar1, and: scalar2, at: index, with: previousScalar
705+
) {
706+
break
707+
}
708+
index = previousIndex
709+
scalar2 = scalar1
695710
}
711+
712+
return index
696713
}
697714

698715
extension _GraphemeBreakingState {

0 commit comments

Comments
 (0)