Skip to content

Commit 1c0623a

Browse files
authored
[6.0] Improve RangeSet union performance (#74996)
* Improve RangeSet union performance The existing implementation of `RangeSet.formUnion` performs a naive insertion of each range of one range set into the other. This requires shuffling the array of ranges down with each insertion, which can have quadratic performance. This change uses the known invariants of the ranges array to instead perform the merge in linear time. Each range in the resulting array is determined by finding the next lowest bound between the two range sets, and then searching for the first upper bound that isn't included in the merged range set contents. rdar://129296438 * Update ABI checker
1 parent f8774c3 commit 1c0623a

File tree

4 files changed

+127
-7
lines changed

4 files changed

+127
-7
lines changed

stdlib/public/core/RangeSet.swift

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -251,10 +251,11 @@ extension RangeSet {
251251
/// Adds the contents of the given range set to this range set.
252252
///
253253
/// - Parameter other: A range set to merge with this one.
254+
///
255+
/// - Complexity: O(*m* + *n*), where *m* and *n* are the number of ranges in
256+
/// this and the other range set.
254257
public mutating func formUnion(_ other: __owned RangeSet<Bound>) {
255-
for range in other._ranges {
256-
insert(contentsOf: range)
257-
}
258+
self = self.union(other)
258259
}
259260

260261
/// Removes the contents of this range set that aren't also in the given
@@ -293,9 +294,7 @@ extension RangeSet {
293294
public __consuming func union(
294295
_ other: __owned RangeSet<Bound>
295296
) -> RangeSet<Bound> {
296-
var result = self
297-
result.formUnion(other)
298-
return result
297+
return RangeSet(_ranges: _ranges._union(other._ranges))
299298
}
300299

301300
/// Returns a new range set containing the contents of both this set and the

stdlib/public/core/RangeSetRanges.swift

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,77 @@ extension RangeSet.Ranges {
265265

266266
return Self(_ranges: result)
267267
}
268+
269+
@usableFromInline
270+
internal func _union(_ other: Self) -> Self {
271+
// Empty cases
272+
if other.isEmpty {
273+
return self
274+
} else if self.isEmpty {
275+
return other
276+
}
277+
278+
// Instead of naively inserting the ranges of `other` into `self`,
279+
// which can cause reshuffling with every insertion, this approach
280+
// uses the guarantees that each array of ranges is non-overlapping and in
281+
// increasing order to directly derive the union.
282+
//
283+
// Each range in the resulting range set is found by:
284+
//
285+
// 1. Finding the current lowest bound of the two range sets.
286+
// 2. Searching for the first upper bound that is outside the merged
287+
// boundaries of the two range sets.
288+
289+
// Use temporaries so that we can swap a/b, to simplify the logic below
290+
var a = self._storage
291+
var b = other._storage
292+
var aIndex = a.startIndex
293+
var bIndex = b.startIndex
294+
295+
var result: [Range<Bound>] = []
296+
while aIndex < a.endIndex, bIndex < b.endIndex {
297+
// Make sure that `a` is the source of the lower bound and `b` is the
298+
// potential source for the upper bound.
299+
if b[bIndex].lowerBound < a[aIndex].lowerBound {
300+
swap(&a, &b)
301+
swap(&aIndex, &bIndex)
302+
}
303+
304+
var candidateRange = a[aIndex]
305+
aIndex += 1
306+
307+
// Look for the correct upper bound, which is the first upper bound that
308+
// isn't contained in the next range of the "other" ranges array.
309+
while bIndex < b.endIndex, candidateRange.upperBound >= b[bIndex].lowerBound {
310+
if candidateRange.upperBound >= b[bIndex].upperBound {
311+
// The range `b[bIndex]` is entirely contained by `candidateRange`,
312+
// so we need to advance and look at the next range in `b`.
313+
bIndex += 1
314+
} else {
315+
// The range `b[bIndex]` extends past `candidateRange`, so:
316+
//
317+
// 1. We grow `candidateRange` to the upper bound of `b[bIndex]`
318+
// 2. We swap the two range arrays, so that we're looking for the
319+
// new upper bound in the other array.
320+
candidateRange = candidateRange.lowerBound ..< b[bIndex].upperBound
321+
bIndex += 1
322+
swap(&a, &b)
323+
swap(&aIndex, &bIndex)
324+
}
325+
}
326+
327+
result.append(candidateRange)
328+
}
329+
330+
// Collect any remaining ranges without needing to merge.
331+
if aIndex < a.endIndex {
332+
result.append(contentsOf: a[aIndex...])
333+
} else if bIndex < b.endIndex {
334+
result.append(contentsOf: b[bIndex...])
335+
}
336+
337+
return Self(_ranges: result)
338+
}
268339
}
269340

270341
@available(SwiftStdlib 6.0, *)
@@ -344,4 +415,4 @@ internal struct _Pair<Element>: RandomAccessCollection {
344415
}
345416
}
346417
}
347-
}
418+
}

test/abi/macOS/x86_64/stdlib.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ Added: _$ss8RangeSetV6RangesV10startIndexSivpMV
163163
Added: _$ss8RangeSetV6RangesV11descriptionSSvg
164164
Added: _$ss8RangeSetV6RangesV11descriptionSSvpMV
165165
Added: _$ss8RangeSetV6RangesV13_intersectionyADyx_GAFF
166+
Added: _$ss8RangeSetV6RangesV6_unionyADyx_GAFF
166167
Added: _$ss8RangeSetV6RangesV2eeoiySbADyx_G_AFtFZ
167168
Added: _$ss8RangeSetV6RangesV5_gaps9boundedByADyx_GSnyxG_tF
168169
Added: _$ss8RangeSetV6RangesV5countSivg

validation-test/stdlib/RangeSet.swift

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,55 @@ if #available(SwiftStdlib 6.0, *) {
174174
}
175175
}
176176

177+
RangeSetTests.test("union") {
178+
func unionViaSet(
179+
_ s1: RangeSet<Int>,
180+
_ s2: RangeSet<Int>
181+
) -> RangeSet<Int> {
182+
let set1 = Set(parent.indices[s1])
183+
let set2 = Set(parent.indices[s2])
184+
return RangeSet(set1.union(set2), within: parent)
185+
}
186+
187+
func testUnion(
188+
_ set1: RangeSet<Int>,
189+
_ set2: RangeSet<Int>,
190+
expect union: RangeSet<Int>
191+
) {
192+
expectEqual(set1.union(set2), union)
193+
expectEqual(set2.union(set1), union)
194+
195+
var set3 = set1
196+
set3.formUnion(set2)
197+
expectEqual(set3, union)
198+
199+
set3 = set2
200+
set3.formUnion(set1)
201+
expectEqual(set3, union)
202+
}
203+
204+
// Simple tests
205+
testUnion([0..<5, 9..<14],
206+
[1..<3, 4..<6, 8..<12],
207+
expect: [0..<6, 8..<14])
208+
209+
testUnion([10..<20, 50..<60],
210+
[15..<55, 58..<65],
211+
expect: [10..<65])
212+
213+
// Test with upper bound / lower bound equality
214+
testUnion([10..<20, 30..<40],
215+
[15..<30, 40..<50],
216+
expect: [10..<50])
217+
218+
for _ in 0..<100 {
219+
let set1 = buildRandomRangeSet()
220+
let set2 = buildRandomRangeSet()
221+
testUnion(set1, set2,
222+
expect: unionViaSet(set1, set2))
223+
}
224+
}
225+
177226
RangeSetTests.test("intersection") {
178227
func intersectionViaSet(
179228
_ s1: RangeSet<Int>,

0 commit comments

Comments
 (0)