Skip to content

Commit 95a222f

Browse files
authored
Improve performance for comparing AttributedStrings with differing character counts (#1224)
* Improve performance for comparing AttributedStrings with differing character counts * Address feedback * Fix typo
1 parent aeb4256 commit 95a222f

File tree

3 files changed

+40
-0
lines changed

3 files changed

+40
-0
lines changed

Benchmarks/Benchmarks/AttributedString/BenchmarkAttributedString.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,11 @@ let benchmarks = {
414414
#endif
415415

416416
let manyAttributesString2 = createManyAttributesString()
417+
let manyAttributesString3 = {
418+
var str = createManyAttributesString()
419+
str.characters.append("a")
420+
return str
421+
}()
417422
let manyAttributesStringRange = manyAttributesString.characters.index(manyAttributesString.startIndex, offsetBy: manyAttributesString.characters.count / 2)...
418423
let manyAttributesSubstring = manyAttributesString[manyAttributesStringRange]
419424
let manyAttributes2Substring = manyAttributesString2[manyAttributesStringRange]
@@ -422,6 +427,10 @@ let benchmarks = {
422427
blackHole(manyAttributesString == manyAttributesString2)
423428
}
424429

430+
Benchmark("equalityDifferingCharacters") { benchmark in
431+
blackHole(manyAttributesString == manyAttributesString3)
432+
}
433+
425434
Benchmark("substringEquality") { benchmark in
426435
blackHole(manyAttributesSubstring == manyAttributes2Substring)
427436
}

Sources/FoundationEssentials/AttributedString/AttributedString+Guts.swift

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,27 @@ extension AttributedString.Guts {
110110

111111
guard left.count == right.count else { return false }
112112
guard !left.isEmpty else { return true }
113+
114+
if !left._isPartial && !right._isPartial {
115+
// For a full BigString, we can get the grapheme cluster count in constant time since
116+
// the grapheme cluster count is cached at the node level in the tree. It is not
117+
// possible for two AttributedStrings with differing character counts to be equal,
118+
// so bail early if we detect that
119+
//
120+
// Note: we should not perform this check for cases where we are not knowingly working
121+
// with the full string. Since character counts are only cached at the node level,
122+
// to get the character count of a substring you would need to run the grapheme
123+
// breaking algorithm over the partial first and last chunks. While this is
124+
// technically done in constant time as chunks have a max of 255 UTF-8 scalars, grapheme
125+
// breaking up to 510 UTF-8 scalars would not be cheap. In the future we can
126+
// investigate best effort short cuts by comparing the counts of just the "middle"
127+
// chunks that we can determine cheaply along with the knowledge that the partial
128+
// first and last chunks have a character count no more than their UTF-8 counts.
129+
guard left._guts.string.count == right._guts.string.count else {
130+
return false
131+
}
132+
}
133+
113134

114135
guard var leftIndex = left._strBounds.ranges.first?.lowerBound, var rightIndex = right._strBounds.ranges.first?.lowerBound else { return false }
115136

Sources/FoundationEssentials/AttributedString/AttributedString+Runs.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@ extension AttributedString {
3131
internal let _strBounds: RangeSet<BigString.Index>
3232
internal let _isDiscontiguous: Bool
3333

34+
internal var _isPartial: Bool {
35+
guard !_isDiscontiguous else {
36+
return true
37+
}
38+
guard let lower = _bounds.lowerBound._stringIndex, let upper = _bounds.upperBound._stringIndex else {
39+
preconditionFailure("AttributedString.Runs created with bounds that have un-set string indices")
40+
}
41+
return _guts.string.startIndex != lower || _guts.string.endIndex != upper
42+
}
43+
3444
internal init(_ guts: Guts, in bounds: Range<BigString.Index>) {
3545
self.init(guts, in: RangeSet(bounds))
3646
}

0 commit comments

Comments
 (0)