Skip to content

Commit 154be2c

Browse files
committed
Use HybridBitSet for rows within SparseBitMatrix.
This requires adding a few extra methods to `HybridBitSet`. (These are tested in a new unit test.) This commit reduces the `max-rss` for `nll-check` builds of `html5ever` by 46%, `ucd` by 45%, `clap-rs` by 23%, `inflate` by 14%. And the results for the `unic-ucd-name` crate are even more impressive: a 21% reduction in instructions, a 60% reduction in wall-time, a 96% reduction in `max-rss`, and a 97% reduction in faults! Fixes #52028.
1 parent 687cc29 commit 154be2c

File tree

3 files changed

+179
-36
lines changed

3 files changed

+179
-36
lines changed

src/librustc_data_structures/bit_set.rs

+167-24
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,10 @@ impl<T: Idx> SparseBitSet<T> {
337337
self.0.len()
338338
}
339339

340+
fn is_empty(&self) -> bool {
341+
self.0.len() == 0
342+
}
343+
340344
fn contains(&self, elem: T) -> bool {
341345
self.0.contains(&elem)
342346
}
@@ -417,15 +421,28 @@ pub enum HybridBitSet<T: Idx> {
417421
}
418422

419423
impl<T: Idx> HybridBitSet<T> {
424+
// FIXME: This function is used in conjunction with `mem::replace()` in
425+
// several pieces of awful code below. I can't work out how else to appease
426+
// the borrow checker.
427+
fn dummy() -> Self {
428+
// The cheapest HybridBitSet to construct, which is only used to get
429+
// around the borrow checker.
430+
HybridBitSet::Sparse(SparseBitSet::new_empty(), 0)
431+
}
432+
420433
pub fn new_empty(domain_size: usize) -> Self {
421434
HybridBitSet::Sparse(SparseBitSet::new_empty(), domain_size)
422435
}
423436

424-
pub fn clear(&mut self) {
425-
let domain_size = match *self {
437+
pub fn domain_size(&self) -> usize {
438+
match *self {
426439
HybridBitSet::Sparse(_, size) => size,
427440
HybridBitSet::Dense(_, size) => size,
428-
};
441+
}
442+
}
443+
444+
pub fn clear(&mut self) {
445+
let domain_size = self.domain_size();
429446
*self = HybridBitSet::new_empty(domain_size);
430447
}
431448

@@ -436,6 +453,22 @@ impl<T: Idx> HybridBitSet<T> {
436453
}
437454
}
438455

456+
pub fn superset(&self, other: &HybridBitSet<T>) -> bool {
457+
match (self, other) {
458+
(HybridBitSet::Dense(self_dense, _), HybridBitSet::Dense(other_dense, _)) => {
459+
self_dense.superset(other_dense)
460+
}
461+
_ => other.iter().all(|elem| self.contains(elem)),
462+
}
463+
}
464+
465+
pub fn is_empty(&self) -> bool {
466+
match self {
467+
HybridBitSet::Sparse(sparse, _) => sparse.is_empty(),
468+
HybridBitSet::Dense(dense, _) => dense.is_empty(),
469+
}
470+
}
471+
439472
pub fn insert(&mut self, elem: T) -> bool {
440473
match self {
441474
HybridBitSet::Sparse(sparse, _) if sparse.len() < SPARSE_MAX => {
@@ -449,26 +482,33 @@ impl<T: Idx> HybridBitSet<T> {
449482
}
450483
HybridBitSet::Sparse(_, _) => {
451484
// The set is sparse and full. Convert to a dense set.
452-
//
453-
// FIXME: This code is awful, but I can't work out how else to
454-
// appease the borrow checker.
455-
let dummy = HybridBitSet::Sparse(SparseBitSet::new_empty(), 0);
456-
match mem::replace(self, dummy) {
485+
match mem::replace(self, HybridBitSet::dummy()) {
457486
HybridBitSet::Sparse(sparse, domain_size) => {
458487
let mut dense = sparse.to_dense(domain_size);
459488
let changed = dense.insert(elem);
460489
assert!(changed);
461-
mem::replace(self, HybridBitSet::Dense(dense, domain_size));
490+
*self = HybridBitSet::Dense(dense, domain_size);
462491
changed
463492
}
464-
_ => panic!("impossible"),
493+
_ => unreachable!()
465494
}
466495
}
467496

468497
HybridBitSet::Dense(dense, _) => dense.insert(elem),
469498
}
470499
}
471500

501+
pub fn insert_all(&mut self) {
502+
let domain_size = self.domain_size();
503+
match self {
504+
HybridBitSet::Sparse(_, _) => {
505+
let dense = BitSet::new_filled(domain_size);
506+
*self = HybridBitSet::Dense(dense, domain_size);
507+
}
508+
HybridBitSet::Dense(dense, _) => dense.insert_all(),
509+
}
510+
}
511+
472512
pub fn remove(&mut self, elem: T) -> bool {
473513
// Note: we currently don't bother going from Dense back to Sparse.
474514
match self {
@@ -477,6 +517,42 @@ impl<T: Idx> HybridBitSet<T> {
477517
}
478518
}
479519

520+
pub fn union(&mut self, other: &HybridBitSet<T>) -> bool {
521+
match self {
522+
HybridBitSet::Sparse(_, _) => {
523+
match other {
524+
HybridBitSet::Sparse(other_sparse, _) => {
525+
// Both sets are sparse. Add the elements in
526+
// `other_sparse` to `self_hybrid` one at a time. This
527+
// may or may not cause `self_hybrid` to be densified.
528+
let mut self_hybrid = mem::replace(self, HybridBitSet::dummy());
529+
let mut changed = false;
530+
for elem in other_sparse.iter() {
531+
changed |= self_hybrid.insert(*elem);
532+
}
533+
*self = self_hybrid;
534+
changed
535+
}
536+
HybridBitSet::Dense(other_dense, _) => {
537+
// `self` is sparse and `other` is dense. Densify
538+
// `self` and then do the bitwise union.
539+
match mem::replace(self, HybridBitSet::dummy()) {
540+
HybridBitSet::Sparse(self_sparse, self_domain_size) => {
541+
let mut new_dense = self_sparse.to_dense(self_domain_size);
542+
let changed = new_dense.union(other_dense);
543+
*self = HybridBitSet::Dense(new_dense, self_domain_size);
544+
changed
545+
}
546+
_ => unreachable!()
547+
}
548+
}
549+
}
550+
}
551+
552+
HybridBitSet::Dense(self_dense, _) => self_dense.union(other),
553+
}
554+
}
555+
480556
/// Converts to a dense set, consuming itself in the process.
481557
pub fn to_dense(self) -> BitSet<T> {
482558
match self {
@@ -687,11 +763,10 @@ impl<R: Idx, C: Idx> BitMatrix<R, C> {
687763
/// sparse representation.
688764
///
689765
/// Initially, every row has no explicit representation. If any bit within a
690-
/// row is set, the entire row is instantiated as
691-
/// `Some(<full-column-width-BitSet>)`. Furthermore, any previously
692-
/// uninstantiated rows prior to it will be instantiated as `None`. Those prior
693-
/// rows may themselves become fully instantiated later on if any of their bits
694-
/// are set.
766+
/// row is set, the entire row is instantiated as `Some(<HybridBitSet>)`.
767+
/// Furthermore, any previously uninstantiated rows prior to it will be
768+
/// instantiated as `None`. Those prior rows may themselves become fully
769+
/// instantiated later on if any of their bits are set.
695770
///
696771
/// `R` and `C` are index types used to identify rows and columns respectively;
697772
/// typically newtyped `usize` wrappers, but they can also just be `usize`.
@@ -702,7 +777,7 @@ where
702777
C: Idx,
703778
{
704779
num_columns: usize,
705-
rows: IndexVec<R, Option<BitSet<C>>>,
780+
rows: IndexVec<R, Option<HybridBitSet<C>>>,
706781
}
707782

708783
impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
@@ -714,14 +789,14 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
714789
}
715790
}
716791

717-
fn ensure_row(&mut self, row: R) -> &mut BitSet<C> {
792+
fn ensure_row(&mut self, row: R) -> &mut HybridBitSet<C> {
718793
// Instantiate any missing rows up to and including row `row` with an
719-
// empty BitSet.
794+
// empty HybridBitSet.
720795
self.rows.ensure_contains_elem(row, || None);
721796

722-
// Then replace row `row` with a full BitSet if necessary.
797+
// Then replace row `row` with a full HybridBitSet if necessary.
723798
let num_columns = self.num_columns;
724-
self.rows[row].get_or_insert_with(|| BitSet::new_empty(num_columns))
799+
self.rows[row].get_or_insert_with(|| HybridBitSet::new_empty(num_columns))
725800
}
726801

727802
/// Sets the cell at `(row, column)` to true. Put another way, insert
@@ -760,8 +835,8 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
760835
}
761836
}
762837

763-
/// Merge a row, `from`, into the `into` row.
764-
pub fn union_into_row(&mut self, into: R, from: &BitSet<C>) -> bool {
838+
/// Union a row, `from`, into the `into` row.
839+
pub fn union_into_row(&mut self, into: R, from: &HybridBitSet<C>) -> bool {
765840
self.ensure_row(into).union(from)
766841
}
767842

@@ -780,7 +855,7 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
780855
self.row(row).into_iter().flat_map(|r| r.iter())
781856
}
782857

783-
pub fn row(&self, row: R) -> Option<&BitSet<C>> {
858+
pub fn row(&self, row: R) -> Option<&HybridBitSet<C>> {
784859
if let Some(Some(row)) = self.rows.get(row) {
785860
Some(row)
786861
} else {
@@ -871,7 +946,7 @@ fn bitset_iter_works_2() {
871946
}
872947

873948
#[test]
874-
fn union_two_vecs() {
949+
fn union_two_sets() {
875950
let mut set1: BitSet<usize> = BitSet::new_empty(65);
876951
let mut set2: BitSet<usize> = BitSet::new_empty(65);
877952
assert!(set1.insert(3));
@@ -887,6 +962,74 @@ fn union_two_vecs() {
887962
assert!(set1.contains(64));
888963
}
889964

965+
#[test]
966+
fn hybrid_bitset() {
967+
let mut sparse038: HybridBitSet<usize> = HybridBitSet::new_empty(256);
968+
assert!(sparse038.is_empty());
969+
assert!(sparse038.insert(0));
970+
assert!(sparse038.insert(1));
971+
assert!(sparse038.insert(8));
972+
assert!(sparse038.insert(3));
973+
assert!(!sparse038.insert(3));
974+
assert!(sparse038.remove(1));
975+
assert!(!sparse038.is_empty());
976+
assert_eq!(sparse038.iter().collect::<Vec<_>>(), [0, 3, 8]);
977+
978+
for i in 0..256 {
979+
if i == 0 || i == 3 || i == 8 {
980+
assert!(sparse038.contains(i));
981+
} else {
982+
assert!(!sparse038.contains(i));
983+
}
984+
}
985+
986+
let mut sparse01358 = sparse038.clone();
987+
assert!(sparse01358.insert(1));
988+
assert!(sparse01358.insert(5));
989+
assert_eq!(sparse01358.iter().collect::<Vec<_>>(), [0, 1, 3, 5, 8]);
990+
991+
let mut dense10 = HybridBitSet::new_empty(256);
992+
for i in 0..10 {
993+
assert!(dense10.insert(i));
994+
}
995+
assert!(!dense10.is_empty());
996+
assert_eq!(dense10.iter().collect::<Vec<_>>(), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
997+
998+
let mut dense256 = HybridBitSet::new_empty(256);
999+
assert!(dense256.is_empty());
1000+
dense256.insert_all();
1001+
assert!(!dense256.is_empty());
1002+
for i in 0..256 {
1003+
assert!(dense256.contains(i));
1004+
}
1005+
1006+
assert!(sparse038.superset(&sparse038)); // sparse + sparse (self)
1007+
assert!(sparse01358.superset(&sparse038)); // sparse + sparse
1008+
assert!(dense10.superset(&sparse038)); // dense + sparse
1009+
assert!(dense10.superset(&dense10)); // dense + dense (self)
1010+
assert!(dense256.superset(&dense10)); // dense + dense
1011+
1012+
let mut hybrid = sparse038;
1013+
assert!(!sparse01358.union(&hybrid)); // no change
1014+
assert!(hybrid.union(&sparse01358));
1015+
assert!(hybrid.superset(&sparse01358) && sparse01358.superset(&hybrid));
1016+
assert!(!dense10.union(&sparse01358));
1017+
assert!(!dense256.union(&dense10));
1018+
let mut dense = dense10;
1019+
assert!(dense.union(&dense256));
1020+
assert!(dense.superset(&dense256) && dense256.superset(&dense));
1021+
assert!(hybrid.union(&dense256));
1022+
assert!(hybrid.superset(&dense256) && dense256.superset(&hybrid));
1023+
1024+
assert_eq!(dense256.iter().count(), 256);
1025+
let mut dense0 = dense256;
1026+
for i in 0..256 {
1027+
assert!(dense0.remove(i));
1028+
}
1029+
assert!(!dense0.remove(0));
1030+
assert!(dense0.is_empty());
1031+
}
1032+
8901033
#[test]
8911034
fn grow() {
8921035
let mut set: GrowableBitSet<usize> = GrowableBitSet::with_capacity(65);

src/librustc_mir/borrow_check/nll/region_infer/values.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
use rustc::mir::{BasicBlock, Location, Mir};
1212
use rustc::ty::{self, RegionVid};
13-
use rustc_data_structures::bit_set::{BitSet, SparseBitMatrix};
13+
use rustc_data_structures::bit_set::{HybridBitSet, SparseBitMatrix};
1414
use rustc_data_structures::indexed_vec::Idx;
1515
use rustc_data_structures::indexed_vec::IndexVec;
1616
use std::fmt::Debug;
@@ -184,7 +184,7 @@ impl<N: Idx> LivenessValues<N> {
184184

185185
/// Adds all the elements in the given bit array into the given
186186
/// region. Returns true if any of them are newly added.
187-
crate fn add_elements(&mut self, row: N, locations: &BitSet<PointIndex>) -> bool {
187+
crate fn add_elements(&mut self, row: N, locations: &HybridBitSet<PointIndex>) -> bool {
188188
debug!("LivenessValues::add_elements(row={:?}, locations={:?})", row, locations);
189189
self.points.union_into_row(row, locations)
190190
}

src/librustc_mir/borrow_check/nll/type_check/liveness/trace.rs

+10-10
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use rustc::traits::query::dropck_outlives::DropckOutlivesResult;
2222
use rustc::traits::query::type_op::outlives::DropckOutlives;
2323
use rustc::traits::query::type_op::TypeOp;
2424
use rustc::ty::{Ty, TypeFoldable};
25-
use rustc_data_structures::bit_set::BitSet;
25+
use rustc_data_structures::bit_set::HybridBitSet;
2626
use rustc_data_structures::fx::FxHashMap;
2727
use std::rc::Rc;
2828
use util::liveness::LiveVariableMap;
@@ -121,16 +121,16 @@ where
121121
cx: LivenessContext<'me, 'typeck, 'flow, 'gcx, 'tcx>,
122122

123123
/// Set of points that define the current local.
124-
defs: BitSet<PointIndex>,
124+
defs: HybridBitSet<PointIndex>,
125125

126126
/// Points where the current variable is "use live" -- meaning
127127
/// that there is a future "full use" that may use its value.
128-
use_live_at: BitSet<PointIndex>,
128+
use_live_at: HybridBitSet<PointIndex>,
129129

130130
/// Points where the current variable is "drop live" -- meaning
131131
/// that there is no future "full use" that may use its value, but
132132
/// there is a future drop.
133-
drop_live_at: BitSet<PointIndex>,
133+
drop_live_at: HybridBitSet<PointIndex>,
134134

135135
/// Locations where drops may occur.
136136
drop_locations: Vec<Location>,
@@ -144,9 +144,9 @@ impl LivenessResults<'me, 'typeck, 'flow, 'gcx, 'tcx> {
144144
let num_points = cx.elements.num_points();
145145
LivenessResults {
146146
cx,
147-
defs: BitSet::new_empty(num_points),
148-
use_live_at: BitSet::new_empty(num_points),
149-
drop_live_at: BitSet::new_empty(num_points),
147+
defs: HybridBitSet::new_empty(num_points),
148+
use_live_at: HybridBitSet::new_empty(num_points),
149+
drop_live_at: HybridBitSet::new_empty(num_points),
150150
drop_locations: vec![],
151151
stack: vec![],
152152
}
@@ -448,7 +448,7 @@ impl LivenessContext<'_, '_, '_, '_, 'tcx> {
448448
fn add_use_live_facts_for(
449449
&mut self,
450450
value: impl TypeFoldable<'tcx>,
451-
live_at: &BitSet<PointIndex>,
451+
live_at: &HybridBitSet<PointIndex>,
452452
) {
453453
debug!("add_use_live_facts_for(value={:?})", value);
454454

@@ -465,7 +465,7 @@ impl LivenessContext<'_, '_, '_, '_, 'tcx> {
465465
dropped_local: Local,
466466
dropped_ty: Ty<'tcx>,
467467
drop_locations: &[Location],
468-
live_at: &BitSet<PointIndex>,
468+
live_at: &HybridBitSet<PointIndex>,
469469
) {
470470
debug!(
471471
"add_drop_live_constraint(\
@@ -508,7 +508,7 @@ impl LivenessContext<'_, '_, '_, '_, 'tcx> {
508508
elements: &RegionValueElements,
509509
typeck: &mut TypeChecker<'_, '_, 'tcx>,
510510
value: impl TypeFoldable<'tcx>,
511-
live_at: &BitSet<PointIndex>,
511+
live_at: &HybridBitSet<PointIndex>,
512512
) {
513513
debug!("make_all_regions_live(value={:?})", value);
514514
debug!(

0 commit comments

Comments
 (0)