Skip to content

Commit 7f1231c

Browse files
Shard AllocMap Lock
This improves performance on many-seed parallel (-Zthreads=32) miri executions from managing to use ~8 cores to using 27-28 cores. That's pretty reasonable scaling for the simplicity of this solution.
1 parent 7f36543 commit 7f1231c

File tree

2 files changed

+44
-26
lines changed

2 files changed

+44
-26
lines changed

Diff for: compiler/rustc_middle/src/mir/interpret/mod.rs

+42-24
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ use std::{fmt, io};
1515
use rustc_abi::{AddressSpace, Align, Endian, HasDataLayout, Size};
1616
use rustc_ast::{LitKind, Mutability};
1717
use rustc_data_structures::fx::FxHashMap;
18-
use rustc_data_structures::sync::Lock;
18+
use rustc_data_structures::sharded::ShardedHashMap;
19+
use rustc_data_structures::sync::{AtomicU64, Lock};
1920
use rustc_hir::def::DefKind;
2021
use rustc_hir::def_id::{DefId, LocalDefId};
2122
use rustc_macros::{HashStable, TyDecodable, TyEncodable, TypeFoldable, TypeVisitable};
@@ -389,35 +390,39 @@ pub const CTFE_ALLOC_SALT: usize = 0;
389390

390391
pub(crate) struct AllocMap<'tcx> {
391392
/// Maps `AllocId`s to their corresponding allocations.
392-
alloc_map: FxHashMap<AllocId, GlobalAlloc<'tcx>>,
393+
// Note that this map on rustc workloads seems to be rather dense, but in miri workloads should
394+
// be pretty sparse. In #136105 we considered replacing it with a (dense) Vec-based map, but
395+
// since there are workloads where it can be sparse we decided to go with sharding for now. At
396+
// least up to 32 cores the one workload tested didn't exhibit much difference between the two.
397+
//
398+
// Should be locked *after* locking dedup if locking both to avoid deadlocks.
399+
to_alloc: ShardedHashMap<AllocId, GlobalAlloc<'tcx>>,
393400

394401
/// Used to deduplicate global allocations: functions, vtables, string literals, ...
395402
///
396403
/// The `usize` is a "salt" used by Miri to make deduplication imperfect, thus better emulating
397404
/// the actual guarantees.
398-
dedup: FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>,
405+
dedup: Lock<FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>>,
399406

400407
/// The `AllocId` to assign to the next requested ID.
401408
/// Always incremented; never gets smaller.
402-
next_id: AllocId,
409+
next_id: AtomicU64,
403410
}
404411

405412
impl<'tcx> AllocMap<'tcx> {
406413
pub(crate) fn new() -> Self {
407414
AllocMap {
408-
alloc_map: Default::default(),
415+
to_alloc: Default::default(),
409416
dedup: Default::default(),
410-
next_id: AllocId(NonZero::new(1).unwrap()),
417+
next_id: AtomicU64::new(1),
411418
}
412419
}
413-
fn reserve(&mut self) -> AllocId {
414-
let next = self.next_id;
415-
self.next_id.0 = self.next_id.0.checked_add(1).expect(
416-
"You overflowed a u64 by incrementing by 1... \
417-
You've just earned yourself a free drink if we ever meet. \
418-
Seriously, how did you do that?!",
419-
);
420-
next
420+
fn reserve(&self) -> AllocId {
421+
// Technically there is a window here where we overflow and then another thread
422+
// increments `next_id` *again* and uses it before we panic and tear down the entire session.
423+
// We consider this fine since such overflows cannot realistically occur.
424+
let next_id = self.next_id.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
425+
AllocId(NonZero::new(next_id).unwrap())
421426
}
422427
}
423428

@@ -428,26 +433,34 @@ impl<'tcx> TyCtxt<'tcx> {
428433
/// Make sure to call `set_alloc_id_memory` or `set_alloc_id_same_memory` before returning such
429434
/// an `AllocId` from a query.
430435
pub fn reserve_alloc_id(self) -> AllocId {
431-
self.alloc_map.lock().reserve()
436+
self.alloc_map.reserve()
432437
}
433438

434439
/// Reserves a new ID *if* this allocation has not been dedup-reserved before.
435440
/// Should not be used for mutable memory.
436441
fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>, salt: usize) -> AllocId {
437-
let mut alloc_map = self.alloc_map.lock();
438442
if let GlobalAlloc::Memory(mem) = alloc {
439443
if mem.inner().mutability.is_mut() {
440444
bug!("trying to dedup-reserve mutable memory");
441445
}
442446
}
443447
let alloc_salt = (alloc, salt);
444-
if let Some(&alloc_id) = alloc_map.dedup.get(&alloc_salt) {
448+
// Locking this *before* `to_alloc` also to ensure correct lock order.
449+
let mut dedup = self.alloc_map.dedup.lock();
450+
if let Some(&alloc_id) = dedup.get(&alloc_salt) {
445451
return alloc_id;
446452
}
447-
let id = alloc_map.reserve();
453+
let id = self.alloc_map.reserve();
448454
debug!("creating alloc {:?} with id {id:?}", alloc_salt.0);
449-
alloc_map.alloc_map.insert(id, alloc_salt.0.clone());
450-
alloc_map.dedup.insert(alloc_salt, id);
455+
let had_previous = self
456+
.alloc_map
457+
.to_alloc
458+
.lock_shard_by_value(&id)
459+
.insert(id, alloc_salt.0.clone())
460+
.is_some();
461+
// We just reserved, so should always be unique.
462+
assert!(!had_previous);
463+
dedup.insert(alloc_salt, id);
451464
id
452465
}
453466

@@ -497,7 +510,7 @@ impl<'tcx> TyCtxt<'tcx> {
497510
/// local dangling pointers and allocations in constants/statics.
498511
#[inline]
499512
pub fn try_get_global_alloc(self, id: AllocId) -> Option<GlobalAlloc<'tcx>> {
500-
self.alloc_map.lock().alloc_map.get(&id).cloned()
513+
self.alloc_map.to_alloc.lock_shard_by_value(&id).get(&id).cloned()
501514
}
502515

503516
#[inline]
@@ -516,16 +529,21 @@ impl<'tcx> TyCtxt<'tcx> {
516529
/// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. Trying to
517530
/// call this function twice, even with the same `Allocation` will ICE the compiler.
518531
pub fn set_alloc_id_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) {
519-
if let Some(old) = self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Memory(mem)) {
532+
if let Some(old) =
533+
self.alloc_map.to_alloc.lock_shard_by_value(&id).insert(id, GlobalAlloc::Memory(mem))
534+
{
520535
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
521536
}
522537
}
523538

524539
/// Freezes an `AllocId` created with `reserve` by pointing it at a static item. Trying to
525540
/// call this function twice, even with the same `DefId` will ICE the compiler.
526541
pub fn set_nested_alloc_id_static(self, id: AllocId, def_id: LocalDefId) {
527-
if let Some(old) =
528-
self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Static(def_id.to_def_id()))
542+
if let Some(old) = self
543+
.alloc_map
544+
.to_alloc
545+
.lock_shard_by_value(&id)
546+
.insert(id, GlobalAlloc::Static(def_id.to_def_id()))
529547
{
530548
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
531549
}

Diff for: compiler/rustc_middle/src/ty/context.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1366,7 +1366,7 @@ pub struct GlobalCtxt<'tcx> {
13661366
pub data_layout: TargetDataLayout,
13671367

13681368
/// Stores memory for globals (statics/consts).
1369-
pub(crate) alloc_map: Lock<interpret::AllocMap<'tcx>>,
1369+
pub(crate) alloc_map: interpret::AllocMap<'tcx>,
13701370

13711371
current_gcx: CurrentGcx,
13721372
}
@@ -1583,7 +1583,7 @@ impl<'tcx> TyCtxt<'tcx> {
15831583
new_solver_evaluation_cache: Default::default(),
15841584
canonical_param_env_cache: Default::default(),
15851585
data_layout,
1586-
alloc_map: Lock::new(interpret::AllocMap::new()),
1586+
alloc_map: interpret::AllocMap::new(),
15871587
current_gcx,
15881588
});
15891589

0 commit comments

Comments
 (0)