Skip to content

Commit 840ed5d

Browse files
committed
Auto merge of rust-lang#114860 - Zoxc:sharded-layout, r=SparrowLii
Make `Sharded` an enum and specialize it for the single thread case This changes `Sharded` to use a single shard by an enum, reducing the size of `Sharded` for greater cache efficiency. Performance improvement with 1 thread and `cfg(parallel_compiler)`: <table><tr><td rowspan="2">Benchmark</td><td colspan="1"><b>Before</b></th><td colspan="2"><b>After</b></th></tr><tr><td align="right">Time</td><td align="right">Time</td><td align="right">%</th></tr><tr><td>🟣 <b>clap</b>:check</td><td align="right">1.7009s</td><td align="right">1.6748s</td><td align="right">💚 -1.53%</td></tr><tr><td>🟣 <b>hyper</b>:check</td><td align="right">0.2525s</td><td align="right">0.2451s</td><td align="right">💚 -2.90%</td></tr><tr><td>🟣 <b>regex</b>:check</td><td align="right">0.9519s</td><td align="right">0.9353s</td><td align="right">💚 -1.74%</td></tr><tr><td>🟣 <b>syn</b>:check</td><td align="right">1.5504s</td><td align="right">1.5280s</td><td align="right">💚 -1.45%</td></tr><tr><td>🟣 <b>syntex_syntax</b>:check</td><td align="right">5.9536s</td><td align="right">5.8873s</td><td align="right">💚 -1.11%</td></tr><tr><td>Total</td><td align="right">10.4092s</td><td align="right">10.2706s</td><td align="right">💚 -1.33%</td></tr><tr><td>Summary</td><td align="right">1.0000s</td><td align="right">0.9825s</td><td align="right">💚 -1.75%</td></tr></table> I did see an unexpected 0.23% change for the serial compiler, so this could use a perf run to see if that reproduces. cc `@SparrowLii`
2 parents c9db1f8 + 0823f0c commit 840ed5d

File tree

2 files changed

+49
-41
lines changed

2 files changed

+49
-41
lines changed

compiler/rustc_data_structures/src/sharded.rs

+48-40
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,26 @@
11
use crate::fx::{FxHashMap, FxHasher};
22
#[cfg(parallel_compiler)]
3-
use crate::sync::is_dyn_thread_safe;
4-
use crate::sync::{CacheAligned, Lock, LockGuard};
3+
use crate::sync::{is_dyn_thread_safe, CacheAligned};
4+
use crate::sync::{Lock, LockGuard};
55
use std::borrow::Borrow;
66
use std::collections::hash_map::RawEntryMut;
77
use std::hash::{Hash, Hasher};
88
use std::mem;
99

10-
#[cfg(parallel_compiler)]
1110
// 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700,
1211
// but this should be tested on higher core count CPUs. How the `Sharded` type gets used
1312
// may also affect the ideal number of shards.
1413
const SHARD_BITS: usize = 5;
1514

16-
#[cfg(not(parallel_compiler))]
17-
const SHARD_BITS: usize = 0;
18-
19-
pub const SHARDS: usize = 1 << SHARD_BITS;
15+
#[cfg(parallel_compiler)]
16+
const SHARDS: usize = 1 << SHARD_BITS;
2017

2118
/// An array of cache-line aligned inner locked structures with convenience methods.
22-
pub struct Sharded<T> {
23-
/// This mask is used to ensure that accesses are inbounds of `shards`.
24-
/// When dynamic thread safety is off, this field is set to 0 causing only
25-
/// a single shard to be used for greater cache efficiency.
19+
/// A single field is used when the compiler uses only one thread.
20+
pub enum Sharded<T> {
21+
Single(Lock<T>),
2622
#[cfg(parallel_compiler)]
27-
mask: usize,
28-
shards: [CacheAligned<Lock<T>>; SHARDS],
23+
Shards(Box<[CacheAligned<Lock<T>>; SHARDS]>),
2924
}
3025

3126
impl<T: Default> Default for Sharded<T> {
@@ -38,35 +33,24 @@ impl<T: Default> Default for Sharded<T> {
3833
impl<T> Sharded<T> {
3934
#[inline]
4035
pub fn new(mut value: impl FnMut() -> T) -> Self {
41-
Sharded {
42-
#[cfg(parallel_compiler)]
43-
mask: if is_dyn_thread_safe() { SHARDS - 1 } else { 0 },
44-
shards: [(); SHARDS].map(|()| CacheAligned(Lock::new(value()))),
45-
}
46-
}
47-
48-
#[inline(always)]
49-
fn mask(&self) -> usize {
5036
#[cfg(parallel_compiler)]
51-
{
52-
if SHARDS == 1 { 0 } else { self.mask }
53-
}
54-
#[cfg(not(parallel_compiler))]
55-
{
56-
0
37+
if is_dyn_thread_safe() {
38+
return Sharded::Shards(Box::new(
39+
[(); SHARDS].map(|()| CacheAligned(Lock::new(value()))),
40+
));
5741
}
58-
}
5942

60-
#[inline(always)]
61-
fn count(&self) -> usize {
62-
// `self.mask` is always one below the used shard count
63-
self.mask() + 1
43+
Sharded::Single(Lock::new(value()))
6444
}
6545

6646
/// The shard is selected by hashing `val` with `FxHasher`.
6747
#[inline]
68-
pub fn get_shard_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Lock<T> {
69-
self.get_shard_by_hash(if SHARDS == 1 { 0 } else { make_hash(val) })
48+
pub fn get_shard_by_value<K: Hash + ?Sized>(&self, _val: &K) -> &Lock<T> {
49+
match self {
50+
Self::Single(single) => &single,
51+
#[cfg(parallel_compiler)]
52+
Self::Shards(..) => self.get_shard_by_hash(make_hash(_val)),
53+
}
7054
}
7155

7256
#[inline]
@@ -75,20 +59,44 @@ impl<T> Sharded<T> {
7559
}
7660

7761
#[inline]
78-
pub fn get_shard_by_index(&self, i: usize) -> &Lock<T> {
79-
// SAFETY: The index get ANDed with the mask, ensuring it is always inbounds.
80-
unsafe { &self.shards.get_unchecked(i & self.mask()).0 }
62+
pub fn get_shard_by_index(&self, _i: usize) -> &Lock<T> {
63+
match self {
64+
Self::Single(single) => &single,
65+
#[cfg(parallel_compiler)]
66+
Self::Shards(shards) => {
67+
// SAFETY: The index gets ANDed with the shard mask, ensuring it is always inbounds.
68+
unsafe { &shards.get_unchecked(_i & (SHARDS - 1)).0 }
69+
}
70+
}
8171
}
8272

8373
pub fn lock_shards(&self) -> Vec<LockGuard<'_, T>> {
84-
(0..self.count()).map(|i| self.get_shard_by_index(i).lock()).collect()
74+
match self {
75+
Self::Single(single) => vec![single.lock()],
76+
#[cfg(parallel_compiler)]
77+
Self::Shards(shards) => shards.iter().map(|shard| shard.0.lock()).collect(),
78+
}
8579
}
8680

8781
pub fn try_lock_shards(&self) -> Option<Vec<LockGuard<'_, T>>> {
88-
(0..self.count()).map(|i| self.get_shard_by_index(i).try_lock()).collect()
82+
match self {
83+
Self::Single(single) => Some(vec![single.try_lock()?]),
84+
#[cfg(parallel_compiler)]
85+
Self::Shards(shards) => shards.iter().map(|shard| shard.0.try_lock()).collect(),
86+
}
8987
}
9088
}
9189

90+
#[inline]
91+
pub fn shards() -> usize {
92+
#[cfg(parallel_compiler)]
93+
if is_dyn_thread_safe() {
94+
return SHARDS;
95+
}
96+
97+
1
98+
}
99+
92100
pub type ShardedHashMap<K, V> = Sharded<FxHashMap<K, V>>;
93101

94102
impl<K: Eq, V> ShardedHashMap<K, V> {

compiler/rustc_query_system/src/dep_graph/graph.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1166,7 +1166,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
11661166
)),
11671167
new_node_to_index: Sharded::new(|| {
11681168
FxHashMap::with_capacity_and_hasher(
1169-
new_node_count_estimate / sharded::SHARDS,
1169+
new_node_count_estimate / sharded::shards(),
11701170
Default::default(),
11711171
)
11721172
}),

0 commit comments

Comments
 (0)