Skip to content

Commit 9d4d0f3

Browse files
committed
Auto merge of #61020 - HeroicKatora:master, r=<try>
librustc_data_structures: Speedup union of sparse and dense hybrid set This optimization speeds up the union of a hybrid bitset when that switches it from a sparse representation to a dense bitset. It now clones the dense bitset and integrate only the spare elements instead of densifying the sparse bitset, initializing all elements, and then a union on two dense bitset, touching all words a second time. It's not completely certain if the added complexity is worth it but I would like to hear some feedback in any case. Benchmark results from my machine: ``` Now: bit_set::union_hybrid_sparse_to_dense ... bench: 72 ns/iter (+/- 5) Previous: bit_set::union_hybrid_sparse_to_dense ... bench: 90 ns/iter (+/- 6) ``` This being the second iteration of trying to improve the speed, since I missed the return value in the first, and forgot to run the relevant tests. Oops.
2 parents 11f01bf + 3f28811 commit 9d4d0f3

File tree

1 file changed

+143
-4
lines changed

1 file changed

+143
-4
lines changed

src/librustc_data_structures/bit_set.rs

Lines changed: 143 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ use std::iter;
55
use std::marker::PhantomData;
66
use std::mem;
77
use std::slice;
8+
#[cfg(test)]
9+
extern crate test;
10+
#[cfg(test)]
11+
use test::Bencher;
812

913
pub type Word = u64;
1014
pub const WORD_BYTES: usize = mem::size_of::<Word>();
@@ -177,6 +181,45 @@ impl<T: Idx> BitSet<T> {
177181
// Note: we currently don't bother trying to make a Sparse set.
178182
HybridBitSet::Dense(self.to_owned())
179183
}
184+
185+
/// Set `self = self | other`. In contrast to `union` returns `true` if the set contains at
186+
/// least one bit that is not in `other` (i.e. `other` is not a superset of `self`).
187+
///
188+
/// This is an optimization for union of a hybrid bitset.
189+
fn reverse_union_sparse(&mut self, sparse: &SparseBitSet<T>) -> bool {
190+
assert!(sparse.domain_size == self.domain_size);
191+
self.clear_excess_bits();
192+
193+
let mut not_already = false;
194+
// Index of the current word not yet merged.
195+
let mut current_index = 0;
196+
// Mask of bits that came from the sparse set in the current word.
197+
let mut new_bit_mask = 0;
198+
for (word_index, mask) in sparse.iter().map(|x| word_index_and_mask(*x)) {
199+
// Next bit is in a word not inspected yet.
200+
if word_index > current_index {
201+
self.words[current_index] |= new_bit_mask;
202+
// Were there any bits in the old word that did not occur in the sparse set?
203+
not_already |= (self.words[current_index] ^ new_bit_mask) != 0;
204+
// Check all words we skipped for any set bit.
205+
not_already |= self.words[current_index+1..word_index].iter().any(|&x| x != 0);
206+
// Update next word.
207+
current_index = word_index;
208+
// Reset bit mask, no bits have been merged yet.
209+
new_bit_mask = 0;
210+
}
211+
// Add bit and mark it as coming from the sparse set.
212+
// self.words[word_index] |= mask;
213+
new_bit_mask |= mask;
214+
}
215+
self.words[current_index] |= new_bit_mask;
216+
// Any bits in the last inspected word that were not in the sparse set?
217+
not_already |= (self.words[current_index] ^ new_bit_mask) != 0;
218+
// Any bits in the tail? Note `clear_excess_bits` before.
219+
not_already |= self.words[current_index+1..].iter().any(|&x| x != 0);
220+
221+
not_already
222+
}
180223
}
181224

182225
/// This is implemented by all the bitsets so that BitSet::union() can be
@@ -514,10 +557,22 @@ impl<T: Idx> HybridBitSet<T> {
514557
changed
515558
}
516559
HybridBitSet::Dense(other_dense) => {
517-
// `self` is sparse and `other` is dense. Densify
518-
// `self` and then do the bitwise union.
519-
let mut new_dense = self_sparse.to_dense();
520-
let changed = new_dense.union(other_dense);
560+
// `self` is sparse and `other` is dense. To
561+
// merge them, we have two available strategies:
562+
// * Densify `self` then merge other
563+
// * Clone other then integrate bits from `self`
564+
// The second strategy requires dedicated method
565+
// since the usual `union` returns the wrong
566+
// result. In the dedicated case the computation
567+
// is slightly faster if the bits of the sparse
568+
// bitset map to only few words of the dense
569+
// representation, i.e. indices are near each
570+
// other.
571+
//
572+
// Benchmarking seems to suggest that the second
573+
// option is worth it.
574+
let mut new_dense = other_dense.clone();
575+
let changed = new_dense.reverse_union_sparse(self_sparse);
521576
*self = HybridBitSet::Dense(new_dense);
522577
changed
523578
}
@@ -1132,3 +1187,87 @@ fn sparse_matrix_iter() {
11321187
}
11331188
assert!(iter.next().is_none());
11341189
}
1190+
1191+
/// Merge dense hybrid set into empty sparse hybrid set.
1192+
#[bench]
1193+
fn union_hybrid_sparse_empty_to_dense(b: &mut Bencher) {
1194+
let mut pre_dense: HybridBitSet<usize> = HybridBitSet::new_empty(256);
1195+
for i in 0..10 {
1196+
assert!(pre_dense.insert(i));
1197+
}
1198+
let pre_sparse: HybridBitSet<usize> = HybridBitSet::new_empty(256);
1199+
b.iter(|| {
1200+
let dense = pre_dense.clone();
1201+
let mut sparse = pre_sparse.clone();
1202+
sparse.union(&dense);
1203+
})
1204+
}
1205+
1206+
/// Merge dense hybrid set into full hybrid set with same indices.
1207+
#[bench]
1208+
fn union_hybrid_sparse_full_to_dense(b: &mut Bencher) {
1209+
let mut pre_dense: HybridBitSet<usize> = HybridBitSet::new_empty(256);
1210+
for i in 0..10 {
1211+
assert!(pre_dense.insert(i));
1212+
}
1213+
let mut pre_sparse: HybridBitSet<usize> = HybridBitSet::new_empty(256);
1214+
for i in 0..SPARSE_MAX {
1215+
assert!(pre_sparse.insert(i));
1216+
}
1217+
b.iter(|| {
1218+
let dense = pre_dense.clone();
1219+
let mut sparse = pre_sparse.clone();
1220+
sparse.union(&dense);
1221+
})
1222+
}
1223+
1224+
/// Merge dense hybrid set into full hybrid set with indices over the whole domain.
1225+
#[bench]
1226+
fn union_hybrid_sparse_domain_to_dense(b: &mut Bencher) {
1227+
let mut pre_dense: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX*64);
1228+
for i in 0..10 {
1229+
assert!(pre_dense.insert(i));
1230+
}
1231+
let mut pre_sparse: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX*64);
1232+
for i in 0..SPARSE_MAX {
1233+
assert!(pre_sparse.insert(i*64));
1234+
}
1235+
b.iter(|| {
1236+
let dense = pre_dense.clone();
1237+
let mut sparse = pre_sparse.clone();
1238+
sparse.union(&dense);
1239+
})
1240+
}
1241+
1242+
/// Merge dense hybrid set into empty hybrid set where the domain is very small.
1243+
#[bench]
1244+
fn union_hybrid_sparse_empty_small_domain(b: &mut Bencher) {
1245+
let mut pre_dense: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX);
1246+
for i in 0..SPARSE_MAX {
1247+
assert!(pre_dense.insert(i));
1248+
}
1249+
let pre_sparse: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX);
1250+
b.iter(|| {
1251+
let dense = pre_dense.clone();
1252+
let mut sparse = pre_sparse.clone();
1253+
sparse.union(&dense);
1254+
})
1255+
}
1256+
1257+
/// Merge dense hybrid set into full hybrid set where the domain is very small.
1258+
#[bench]
1259+
fn union_hybrid_sparse_full_small_domain(b: &mut Bencher) {
1260+
let mut pre_dense: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX);
1261+
for i in 0..SPARSE_MAX {
1262+
assert!(pre_dense.insert(i));
1263+
}
1264+
let mut pre_sparse: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX);
1265+
for i in 0..SPARSE_MAX {
1266+
assert!(pre_sparse.insert(i));
1267+
}
1268+
b.iter(|| {
1269+
let dense = pre_dense.clone();
1270+
let mut sparse = pre_sparse.clone();
1271+
sparse.union(&dense);
1272+
})
1273+
}

0 commit comments

Comments
 (0)