Skip to content

[6.0] Improve RangeSet union performance #74996

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions stdlib/public/core/RangeSet.swift
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,11 @@ extension RangeSet {
/// Adds the contents of the given range set to this range set.
///
/// - Parameter other: A range set to merge with this one.
///
/// - Complexity: O(*m* + *n*), where *m* and *n* are the number of ranges in
/// this and the other range set.
public mutating func formUnion(_ other: __owned RangeSet<Bound>) {
for range in other._ranges {
insert(contentsOf: range)
}
self = self.union(other)
}

/// Removes the contents of this range set that aren't also in the given
Expand Down Expand Up @@ -293,9 +294,7 @@ extension RangeSet {
public __consuming func union(
_ other: __owned RangeSet<Bound>
) -> RangeSet<Bound> {
var result = self
result.formUnion(other)
return result
return RangeSet(_ranges: _ranges._union(other._ranges))
}

/// Returns a new range set containing the contents of both this set and the
Expand Down
73 changes: 72 additions & 1 deletion stdlib/public/core/RangeSetRanges.swift
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,77 @@ extension RangeSet.Ranges {

return Self(_ranges: result)
}

@usableFromInline
internal func _union(_ other: Self) -> Self {
// Empty cases
if other.isEmpty {
return self
} else if self.isEmpty {
return other
}

// Instead of naively inserting the ranges of `other` into `self`,
// which can cause reshuffling with every insertion, this approach
// uses the guarantees that each array of ranges is non-overlapping and in
// increasing order to directly derive the union.
//
// Each range in the resulting range set is found by:
//
// 1. Finding the current lowest bound of the two range sets.
// 2. Searching for the first upper bound that is outside the merged
// boundaries of the two range sets.

// Use temporaries so that we can swap a/b, to simplify the logic below
var a = self._storage
var b = other._storage
var aIndex = a.startIndex
var bIndex = b.startIndex

var result: [Range<Bound>] = []
while aIndex < a.endIndex, bIndex < b.endIndex {
// Make sure that `a` is the source of the lower bound and `b` is the
// potential source for the upper bound.
if b[bIndex].lowerBound < a[aIndex].lowerBound {
swap(&a, &b)
swap(&aIndex, &bIndex)
}

var candidateRange = a[aIndex]
aIndex += 1

// Look for the correct upper bound, which is the first upper bound that
// isn't contained in the next range of the "other" ranges array.
while bIndex < b.endIndex, candidateRange.upperBound >= b[bIndex].lowerBound {
if candidateRange.upperBound >= b[bIndex].upperBound {
// The range `b[bIndex]` is entirely contained by `candidateRange`,
// so we need to advance and look at the next range in `b`.
bIndex += 1
} else {
// The range `b[bIndex]` extends past `candidateRange`, so:
//
// 1. We grow `candidateRange` to the upper bound of `b[bIndex]`
// 2. We swap the two range arrays, so that we're looking for the
// new upper bound in the other array.
candidateRange = candidateRange.lowerBound ..< b[bIndex].upperBound
bIndex += 1
swap(&a, &b)
swap(&aIndex, &bIndex)
}
}

result.append(candidateRange)
}

// Collect any remaining ranges without needing to merge.
if aIndex < a.endIndex {
result.append(contentsOf: a[aIndex...])
} else if bIndex < b.endIndex {
result.append(contentsOf: b[bIndex...])
}

return Self(_ranges: result)
}
}

@available(SwiftStdlib 6.0, *)
Expand Down Expand Up @@ -344,4 +415,4 @@ internal struct _Pair<Element>: RandomAccessCollection {
}
}
}
}
}
1 change: 1 addition & 0 deletions test/abi/macOS/x86_64/stdlib.swift
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ Added: _$ss8RangeSetV6RangesV10startIndexSivpMV
Added: _$ss8RangeSetV6RangesV11descriptionSSvg
Added: _$ss8RangeSetV6RangesV11descriptionSSvpMV
Added: _$ss8RangeSetV6RangesV13_intersectionyADyx_GAFF
Added: _$ss8RangeSetV6RangesV6_unionyADyx_GAFF
Added: _$ss8RangeSetV6RangesV2eeoiySbADyx_G_AFtFZ
Added: _$ss8RangeSetV6RangesV5_gaps9boundedByADyx_GSnyxG_tF
Added: _$ss8RangeSetV6RangesV5countSivg
Expand Down
49 changes: 49 additions & 0 deletions validation-test/stdlib/RangeSet.swift
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,55 @@ if #available(SwiftStdlib 6.0, *) {
}
}

RangeSetTests.test("union") {
func unionViaSet(
_ s1: RangeSet<Int>,
_ s2: RangeSet<Int>
) -> RangeSet<Int> {
let set1 = Set(parent.indices[s1])
let set2 = Set(parent.indices[s2])
return RangeSet(set1.union(set2), within: parent)
}

func testUnion(
_ set1: RangeSet<Int>,
_ set2: RangeSet<Int>,
expect union: RangeSet<Int>
) {
expectEqual(set1.union(set2), union)
expectEqual(set2.union(set1), union)

var set3 = set1
set3.formUnion(set2)
expectEqual(set3, union)

set3 = set2
set3.formUnion(set1)
expectEqual(set3, union)
}

// Simple tests
testUnion([0..<5, 9..<14],
[1..<3, 4..<6, 8..<12],
expect: [0..<6, 8..<14])

testUnion([10..<20, 50..<60],
[15..<55, 58..<65],
expect: [10..<65])

// Test with upper bound / lower bound equality
testUnion([10..<20, 30..<40],
[15..<30, 40..<50],
expect: [10..<50])

for _ in 0..<100 {
let set1 = buildRandomRangeSet()
let set2 = buildRandomRangeSet()
testUnion(set1, set2,
expect: unionViaSet(set1, set2))
}
}

RangeSetTests.test("intersection") {
func intersectionViaSet(
_ s1: RangeSet<Int>,
Expand Down