Skip to content

Commit e152189

Browse files
committed
Introduce MixedBitSet.
It just uses `BitSet` for small/medium sizes (<= 2048 bits) and `ChunkedBitSet` for larger sizes. This is good because `ChunkedBitSet` is slow and memory-hungry at smaller sizes.
1 parent dff5ce6 commit e152189

File tree

4 files changed

+202
-3
lines changed

4 files changed

+202
-3
lines changed

compiler/rustc_index/src/bit_set.rs

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,9 @@ impl<'a, T: Idx> Iterator for BitIter<'a, T> {
410410
/// some stretches with lots of 0s and 1s mixed in a way that causes trouble
411411
/// for `IntervalSet`.
412412
///
413+
/// Best used via `MixedBitSet`, rather than directly, because `MixedBitSet`
414+
/// has better performance for small bitsets.
415+
///
413416
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
414417
/// just be `usize`.
415418
///
@@ -1106,6 +1109,160 @@ where
11061109
false
11071110
}
11081111

1112+
/// A bitset with a mixed representation, using `BitSet` for small and medium
1113+
/// bitsets, and `ChunkedBitSet` for large bitsets, i.e. those with enough bits
1114+
/// for at least two chunks. This is a good choice for many bitsets that can
1115+
/// have large domain sizes (e.g. 5000+).
1116+
///
1117+
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
1118+
/// just be `usize`.
1119+
///
1120+
/// All operations that involve an element will panic if the element is equal
1121+
/// to or greater than the domain size. All operations that involve two bitsets
1122+
/// will panic if the bitsets have differing domain sizes.
1123+
#[derive(PartialEq, Eq)]
1124+
pub enum MixedBitSet<T> {
1125+
Small(BitSet<T>),
1126+
Large(ChunkedBitSet<T>),
1127+
}
1128+
1129+
impl<T> MixedBitSet<T> {
1130+
/// Gets the domain size.
1131+
pub fn domain_size(&self) -> usize {
1132+
match self {
1133+
MixedBitSet::Small(set) => set.domain_size(),
1134+
MixedBitSet::Large(set) => set.domain_size(),
1135+
}
1136+
}
1137+
}
1138+
1139+
impl<T: Idx> MixedBitSet<T> {
1140+
/// Creates a new, empty bitset with a given `domain_size`.
1141+
#[inline]
1142+
pub fn new_empty(domain_size: usize) -> MixedBitSet<T> {
1143+
if domain_size <= CHUNK_BITS {
1144+
MixedBitSet::Small(BitSet::new_empty(domain_size))
1145+
} else {
1146+
MixedBitSet::Large(ChunkedBitSet::new_empty(domain_size))
1147+
}
1148+
}
1149+
1150+
#[inline]
1151+
pub fn is_empty(&self) -> bool {
1152+
match self {
1153+
MixedBitSet::Small(set) => set.is_empty(),
1154+
MixedBitSet::Large(set) => set.is_empty(),
1155+
}
1156+
}
1157+
1158+
#[inline]
1159+
pub fn contains(&self, elem: T) -> bool {
1160+
match self {
1161+
MixedBitSet::Small(set) => set.contains(elem),
1162+
MixedBitSet::Large(set) => set.contains(elem),
1163+
}
1164+
}
1165+
1166+
#[inline]
1167+
pub fn insert(&mut self, elem: T) -> bool {
1168+
match self {
1169+
MixedBitSet::Small(set) => set.insert(elem),
1170+
MixedBitSet::Large(set) => set.insert(elem),
1171+
}
1172+
}
1173+
1174+
pub fn insert_all(&mut self) {
1175+
match self {
1176+
MixedBitSet::Small(set) => set.insert_all(),
1177+
MixedBitSet::Large(set) => set.insert_all(),
1178+
}
1179+
}
1180+
1181+
#[inline]
1182+
pub fn remove(&mut self, elem: T) -> bool {
1183+
match self {
1184+
MixedBitSet::Small(set) => set.remove(elem),
1185+
MixedBitSet::Large(set) => set.remove(elem),
1186+
}
1187+
}
1188+
1189+
pub fn iter(&self) -> MixedBitIter<'_, T> {
1190+
match self {
1191+
MixedBitSet::Small(set) => MixedBitIter::Small(set.iter()),
1192+
MixedBitSet::Large(set) => MixedBitIter::Large(set.iter()),
1193+
}
1194+
}
1195+
1196+
bit_relations_inherent_impls! {}
1197+
}
1198+
1199+
impl<T> Clone for MixedBitSet<T> {
1200+
fn clone(&self) -> Self {
1201+
match self {
1202+
MixedBitSet::Small(set) => MixedBitSet::Small(set.clone()),
1203+
MixedBitSet::Large(set) => MixedBitSet::Large(set.clone()),
1204+
}
1205+
}
1206+
1207+
/// WARNING: this implementation of clone_from may panic if the two
1208+
/// bitsets have different domain sizes. This constraint is not inherent to
1209+
/// `clone_from`, but it works with the existing call sites and allows a
1210+
/// faster implementation, which is important because this function is hot.
1211+
fn clone_from(&mut self, from: &Self) {
1212+
match (self, from) {
1213+
(MixedBitSet::Small(set), MixedBitSet::Small(from)) => set.clone_from(from),
1214+
(MixedBitSet::Large(set), MixedBitSet::Large(from)) => set.clone_from(from),
1215+
_ => panic!("MixedBitSet size mismatch"),
1216+
}
1217+
}
1218+
}
1219+
1220+
impl<T: Idx> BitRelations<MixedBitSet<T>> for MixedBitSet<T> {
1221+
fn union(&mut self, other: &MixedBitSet<T>) -> bool {
1222+
match (self, other) {
1223+
(MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.union(other),
1224+
(MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.union(other),
1225+
_ => panic!("MixedBitSet size mismatch"),
1226+
}
1227+
}
1228+
1229+
fn subtract(&mut self, other: &MixedBitSet<T>) -> bool {
1230+
match (self, other) {
1231+
(MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.subtract(other),
1232+
(MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.subtract(other),
1233+
_ => panic!("MixedBitSet size mismatch"),
1234+
}
1235+
}
1236+
1237+
fn intersect(&mut self, _other: &MixedBitSet<T>) -> bool {
1238+
unimplemented!("implement if/when necessary");
1239+
}
1240+
}
1241+
1242+
impl<T: Idx> fmt::Debug for MixedBitSet<T> {
1243+
fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
1244+
match self {
1245+
MixedBitSet::Small(set) => set.fmt(w),
1246+
MixedBitSet::Large(set) => set.fmt(w),
1247+
}
1248+
}
1249+
}
1250+
1251+
pub enum MixedBitIter<'a, T: Idx> {
1252+
Small(BitIter<'a, T>),
1253+
Large(ChunkedBitIter<'a, T>),
1254+
}
1255+
1256+
impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> {
1257+
type Item = T;
1258+
fn next(&mut self) -> Option<T> {
1259+
match self {
1260+
MixedBitIter::Small(iter) => iter.next(),
1261+
MixedBitIter::Large(iter) => iter.next(),
1262+
}
1263+
}
1264+
}
1265+
11091266
/// A resizable bitset type with a dense representation.
11101267
///
11111268
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also

compiler/rustc_mir_dataflow/src/framework/fmt.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
use std::fmt;
55

66
use rustc_index::Idx;
7-
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
7+
use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
88

99
use super::lattice::MaybeReachable;
1010

@@ -127,6 +127,26 @@ where
127127
}
128128
}
129129

130+
impl<T, C> DebugWithContext<C> for MixedBitSet<T>
131+
where
132+
T: Idx + DebugWithContext<C>,
133+
{
134+
fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135+
match self {
136+
MixedBitSet::Small(set) => set.fmt_with(ctxt, f),
137+
MixedBitSet::Large(set) => set.fmt_with(ctxt, f),
138+
}
139+
}
140+
141+
fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
142+
match (self, old) {
143+
(MixedBitSet::Small(set), MixedBitSet::Small(old)) => set.fmt_diff_with(old, ctxt, f),
144+
(MixedBitSet::Large(set), MixedBitSet::Large(old)) => set.fmt_diff_with(old, ctxt, f),
145+
_ => panic!("MixedBitSet size mismatch"),
146+
}
147+
}
148+
}
149+
130150
impl<S, C> DebugWithContext<C> for MaybeReachable<S>
131151
where
132152
S: DebugWithContext<C>,

compiler/rustc_mir_dataflow/src/framework/lattice.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
4141
use std::iter;
4242

43-
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
43+
use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
4444
use rustc_index::{Idx, IndexVec};
4545

4646
use crate::framework::BitSetExt;
@@ -132,6 +132,12 @@ impl<T: Idx> JoinSemiLattice for ChunkedBitSet<T> {
132132
}
133133
}
134134

135+
impl<T: Idx> JoinSemiLattice for MixedBitSet<T> {
136+
fn join(&mut self, other: &Self) -> bool {
137+
self.union(other)
138+
}
139+
}
140+
135141
/// Extends a type `T` with top and bottom elements to make it a partially ordered set in which no
136142
/// value of `T` is comparable with any other.
137143
///

compiler/rustc_mir_dataflow/src/framework/mod.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
use std::cmp::Ordering;
3636

3737
use rustc_data_structures::work_queue::WorkQueue;
38-
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
38+
use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
3939
use rustc_index::{Idx, IndexVec};
4040
use rustc_middle::bug;
4141
use rustc_middle::mir::{self, BasicBlock, CallReturnPlaces, Location, TerminatorEdges, traversal};
@@ -77,6 +77,12 @@ impl<T: Idx> BitSetExt<T> for ChunkedBitSet<T> {
7777
}
7878
}
7979

80+
impl<T: Idx> BitSetExt<T> for MixedBitSet<T> {
81+
fn contains(&self, elem: T) -> bool {
82+
self.contains(elem)
83+
}
84+
}
85+
8086
/// A dataflow problem with an arbitrarily complex transfer function.
8187
///
8288
/// This trait specifies the lattice on which this analysis operates (the domain), its
@@ -337,6 +343,16 @@ impl<T: Idx> GenKill<T> for ChunkedBitSet<T> {
337343
}
338344
}
339345

346+
impl<T: Idx> GenKill<T> for MixedBitSet<T> {
347+
fn gen_(&mut self, elem: T) {
348+
self.insert(elem);
349+
}
350+
351+
fn kill(&mut self, elem: T) {
352+
self.remove(elem);
353+
}
354+
}
355+
340356
impl<T, S: GenKill<T>> GenKill<T> for MaybeReachable<S> {
341357
fn gen_(&mut self, elem: T) {
342358
match self {

0 commit comments

Comments
 (0)