Skip to content

Commit 6ee1a7a

Browse files
committed
Introduce MixedBitSet.
It just uses `BitSet` for small/medium sizes (<= 2048 bits) and `ChunkedBitSet` for larger sizes. This is good because `ChunkedBitSet` is slow and memory-hungry at smaller sizes.
1 parent dff5ce6 commit 6ee1a7a

File tree

4 files changed

+200
-3
lines changed

4 files changed

+200
-3
lines changed

Diff for: compiler/rustc_index/src/bit_set.rs

+155
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,9 @@ impl<'a, T: Idx> Iterator for BitIter<'a, T> {
410410
/// some stretches with lots of 0s and 1s mixed in a way that causes trouble
411411
/// for `IntervalSet`.
412412
///
413+
/// Best used via `MixedBitSet`, rather than directly, because `MixedBitSet`
414+
/// has better performance for small bitsets.
415+
///
413416
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
414417
/// just be `usize`.
415418
///
@@ -1106,6 +1109,158 @@ where
11061109
false
11071110
}
11081111

1112+
/// A bitset with a mixed representation, using `BitSet` for small and medium
1113+
/// bitsets, and `ChunkedBitSet` for large bitsets, i.e. those with enough bits
1114+
/// for at least two chunks. This is a good choice for many bitsets that can
1115+
/// have large domain sizes (e.g. 5000+).
1116+
///
1117+
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
1118+
/// just be `usize`.
1119+
///
1120+
/// All operations that involve an element will panic if the element is equal
1121+
/// to or greater than the domain size. All operations that involve two bitsets
1122+
/// will panic if the bitsets have differing domain sizes.
1123+
#[derive(PartialEq, Eq)]
1124+
pub enum MixedBitSet<T> {
1125+
Small(BitSet<T>),
1126+
Large(ChunkedBitSet<T>),
1127+
}
1128+
1129+
impl<T> MixedBitSet<T> {
1130+
pub fn domain_size(&self) -> usize {
1131+
match self {
1132+
MixedBitSet::Small(set) => set.domain_size(),
1133+
MixedBitSet::Large(set) => set.domain_size(),
1134+
}
1135+
}
1136+
}
1137+
1138+
impl<T: Idx> MixedBitSet<T> {
1139+
#[inline]
1140+
pub fn new_empty(domain_size: usize) -> MixedBitSet<T> {
1141+
if domain_size <= CHUNK_BITS {
1142+
MixedBitSet::Small(BitSet::new_empty(domain_size))
1143+
} else {
1144+
MixedBitSet::Large(ChunkedBitSet::new_empty(domain_size))
1145+
}
1146+
}
1147+
1148+
#[inline]
1149+
pub fn is_empty(&self) -> bool {
1150+
match self {
1151+
MixedBitSet::Small(set) => set.is_empty(),
1152+
MixedBitSet::Large(set) => set.is_empty(),
1153+
}
1154+
}
1155+
1156+
#[inline]
1157+
pub fn contains(&self, elem: T) -> bool {
1158+
match self {
1159+
MixedBitSet::Small(set) => set.contains(elem),
1160+
MixedBitSet::Large(set) => set.contains(elem),
1161+
}
1162+
}
1163+
1164+
#[inline]
1165+
pub fn insert(&mut self, elem: T) -> bool {
1166+
match self {
1167+
MixedBitSet::Small(set) => set.insert(elem),
1168+
MixedBitSet::Large(set) => set.insert(elem),
1169+
}
1170+
}
1171+
1172+
pub fn insert_all(&mut self) {
1173+
match self {
1174+
MixedBitSet::Small(set) => set.insert_all(),
1175+
MixedBitSet::Large(set) => set.insert_all(),
1176+
}
1177+
}
1178+
1179+
#[inline]
1180+
pub fn remove(&mut self, elem: T) -> bool {
1181+
match self {
1182+
MixedBitSet::Small(set) => set.remove(elem),
1183+
MixedBitSet::Large(set) => set.remove(elem),
1184+
}
1185+
}
1186+
1187+
pub fn iter(&self) -> MixedBitIter<'_, T> {
1188+
match self {
1189+
MixedBitSet::Small(set) => MixedBitIter::Small(set.iter()),
1190+
MixedBitSet::Large(set) => MixedBitIter::Large(set.iter()),
1191+
}
1192+
}
1193+
1194+
bit_relations_inherent_impls! {}
1195+
}
1196+
1197+
impl<T> Clone for MixedBitSet<T> {
1198+
fn clone(&self) -> Self {
1199+
match self {
1200+
MixedBitSet::Small(set) => MixedBitSet::Small(set.clone()),
1201+
MixedBitSet::Large(set) => MixedBitSet::Large(set.clone()),
1202+
}
1203+
}
1204+
1205+
/// WARNING: this implementation of clone_from may panic if the two
1206+
/// bitsets have different domain sizes. This constraint is not inherent to
1207+
/// `clone_from`, but it works with the existing call sites and allows a
1208+
/// faster implementation, which is important because this function is hot.
1209+
fn clone_from(&mut self, from: &Self) {
1210+
match (self, from) {
1211+
(MixedBitSet::Small(set), MixedBitSet::Small(from)) => set.clone_from(from),
1212+
(MixedBitSet::Large(set), MixedBitSet::Large(from)) => set.clone_from(from),
1213+
_ => panic!("MixedBitSet size mismatch"),
1214+
}
1215+
}
1216+
}
1217+
1218+
impl<T: Idx> BitRelations<MixedBitSet<T>> for MixedBitSet<T> {
1219+
fn union(&mut self, other: &MixedBitSet<T>) -> bool {
1220+
match (self, other) {
1221+
(MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.union(other),
1222+
(MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.union(other),
1223+
_ => panic!("MixedBitSet size mismatch"),
1224+
}
1225+
}
1226+
1227+
fn subtract(&mut self, other: &MixedBitSet<T>) -> bool {
1228+
match (self, other) {
1229+
(MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.subtract(other),
1230+
(MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.subtract(other),
1231+
_ => panic!("MixedBitSet size mismatch"),
1232+
}
1233+
}
1234+
1235+
fn intersect(&mut self, _other: &MixedBitSet<T>) -> bool {
1236+
unimplemented!("implement if/when necessary");
1237+
}
1238+
}
1239+
1240+
impl<T: Idx> fmt::Debug for MixedBitSet<T> {
1241+
fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
1242+
match self {
1243+
MixedBitSet::Small(set) => set.fmt(w),
1244+
MixedBitSet::Large(set) => set.fmt(w),
1245+
}
1246+
}
1247+
}
1248+
1249+
pub enum MixedBitIter<'a, T: Idx> {
1250+
Small(BitIter<'a, T>),
1251+
Large(ChunkedBitIter<'a, T>),
1252+
}
1253+
1254+
impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> {
1255+
type Item = T;
1256+
fn next(&mut self) -> Option<T> {
1257+
match self {
1258+
MixedBitIter::Small(iter) => iter.next(),
1259+
MixedBitIter::Large(iter) => iter.next(),
1260+
}
1261+
}
1262+
}
1263+
11091264
/// A resizable bitset type with a dense representation.
11101265
///
11111266
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also

Diff for: compiler/rustc_mir_dataflow/src/framework/fmt.rs

+21-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
use std::fmt;
55

66
use rustc_index::Idx;
7-
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
7+
use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
88

99
use super::lattice::MaybeReachable;
1010

@@ -127,6 +127,26 @@ where
127127
}
128128
}
129129

130+
impl<T, C> DebugWithContext<C> for MixedBitSet<T>
131+
where
132+
T: Idx + DebugWithContext<C>,
133+
{
134+
fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135+
match self {
136+
MixedBitSet::Small(set) => set.fmt_with(ctxt, f),
137+
MixedBitSet::Large(set) => set.fmt_with(ctxt, f),
138+
}
139+
}
140+
141+
fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
142+
match (self, old) {
143+
(MixedBitSet::Small(set), MixedBitSet::Small(old)) => set.fmt_diff_with(old, ctxt, f),
144+
(MixedBitSet::Large(set), MixedBitSet::Large(old)) => set.fmt_diff_with(old, ctxt, f),
145+
_ => panic!("MixedBitSet size mismatch"),
146+
}
147+
}
148+
}
149+
130150
impl<S, C> DebugWithContext<C> for MaybeReachable<S>
131151
where
132152
S: DebugWithContext<C>,

Diff for: compiler/rustc_mir_dataflow/src/framework/lattice.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
4141
use std::iter;
4242

43-
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
43+
use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
4444
use rustc_index::{Idx, IndexVec};
4545

4646
use crate::framework::BitSetExt;
@@ -132,6 +132,12 @@ impl<T: Idx> JoinSemiLattice for ChunkedBitSet<T> {
132132
}
133133
}
134134

135+
impl<T: Idx> JoinSemiLattice for MixedBitSet<T> {
136+
fn join(&mut self, other: &Self) -> bool {
137+
self.union(other)
138+
}
139+
}
140+
135141
/// Extends a type `T` with top and bottom elements to make it a partially ordered set in which no
136142
/// value of `T` is comparable with any other.
137143
///

Diff for: compiler/rustc_mir_dataflow/src/framework/mod.rs

+17-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
use std::cmp::Ordering;
3636

3737
use rustc_data_structures::work_queue::WorkQueue;
38-
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
38+
use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
3939
use rustc_index::{Idx, IndexVec};
4040
use rustc_middle::bug;
4141
use rustc_middle::mir::{self, BasicBlock, CallReturnPlaces, Location, TerminatorEdges, traversal};
@@ -77,6 +77,12 @@ impl<T: Idx> BitSetExt<T> for ChunkedBitSet<T> {
7777
}
7878
}
7979

80+
impl<T: Idx> BitSetExt<T> for MixedBitSet<T> {
81+
fn contains(&self, elem: T) -> bool {
82+
self.contains(elem)
83+
}
84+
}
85+
8086
/// A dataflow problem with an arbitrarily complex transfer function.
8187
///
8288
/// This trait specifies the lattice on which this analysis operates (the domain), its
@@ -337,6 +343,16 @@ impl<T: Idx> GenKill<T> for ChunkedBitSet<T> {
337343
}
338344
}
339345

346+
impl<T: Idx> GenKill<T> for MixedBitSet<T> {
347+
fn gen_(&mut self, elem: T) {
348+
self.insert(elem);
349+
}
350+
351+
fn kill(&mut self, elem: T) {
352+
self.remove(elem);
353+
}
354+
}
355+
340356
impl<T, S: GenKill<T>> GenKill<T> for MaybeReachable<S> {
341357
fn gen_(&mut self, elem: T) {
342358
match self {

0 commit comments

Comments
 (0)