Skip to content

Commit b3f1379

Browse files
committed
Auto merge of rust-lang#110083 - saethlin:encode-hashes-as-bytes, r=cjgillot
Encode hashes as bytes, not varint In a few places, we store hashes as `u64` or `u128` and then apply `derive(Decodable, Encodable)` to the enclosing struct/enum. It is more efficient to encode hashes directly than try to apply some varint encoding. This PR adds two new types `Hash64` and `Hash128` which are produced by `StableHasher` and replace every use of storing a `u64` or `u128` that represents a hash. Distribution of the byte lengths of leb128 encodings, from `x build --stage 2` with `incremental = true` Before: ``` ( 1) 373418203 (53.7%, 53.7%): 1 ( 2) 196240113 (28.2%, 81.9%): 3 ( 3) 108157958 (15.6%, 97.5%): 2 ( 4) 17213120 ( 2.5%, 99.9%): 4 ( 5) 223614 ( 0.0%,100.0%): 9 ( 6) 216262 ( 0.0%,100.0%): 10 ( 7) 15447 ( 0.0%,100.0%): 5 ( 8) 3633 ( 0.0%,100.0%): 19 ( 9) 3030 ( 0.0%,100.0%): 8 ( 10) 1167 ( 0.0%,100.0%): 18 ( 11) 1032 ( 0.0%,100.0%): 7 ( 12) 1003 ( 0.0%,100.0%): 6 ( 13) 10 ( 0.0%,100.0%): 16 ( 14) 10 ( 0.0%,100.0%): 17 ( 15) 5 ( 0.0%,100.0%): 12 ( 16) 4 ( 0.0%,100.0%): 14 ``` After: ``` ( 1) 372939136 (53.7%, 53.7%): 1 ( 2) 196240140 (28.3%, 82.0%): 3 ( 3) 108014969 (15.6%, 97.5%): 2 ( 4) 17192375 ( 2.5%,100.0%): 4 ( 5) 435 ( 0.0%,100.0%): 5 ( 6) 83 ( 0.0%,100.0%): 18 ( 7) 79 ( 0.0%,100.0%): 10 ( 8) 50 ( 0.0%,100.0%): 9 ( 9) 6 ( 0.0%,100.0%): 19 ``` The remaining 9 or 10 and 18 or 19 are `u64` and `u128` respectively that have the high bits set. As far as I can tell these are coming primarily from `SwitchTargets`.
2 parents c609da5 + 073d99b commit b3f1379

File tree

38 files changed

+288
-137
lines changed

38 files changed

+288
-137
lines changed

compiler/rustc_abi/src/layout.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ pub trait LayoutCalculator {
7979
{
8080
// `ReprOptions.layout_seed` is a deterministic seed that we can use to
8181
// randomize field ordering with
82-
let mut rng = Xoshiro128StarStar::seed_from_u64(repr.field_shuffle_seed);
82+
let mut rng =
83+
Xoshiro128StarStar::seed_from_u64(repr.field_shuffle_seed.as_u64());
8384

8485
// Shuffle the ordering of the fields
8586
optimizing.shuffle(&mut rng);

compiler/rustc_abi/src/lib.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::str::FromStr;
99

1010
use bitflags::bitflags;
1111
use rustc_data_structures::intern::Interned;
12+
use rustc_data_structures::stable_hasher::Hash64;
1213
#[cfg(feature = "nightly")]
1314
use rustc_data_structures::stable_hasher::StableOrd;
1415
use rustc_index::vec::{IndexSlice, IndexVec};
@@ -77,12 +78,12 @@ pub struct ReprOptions {
7778
pub flags: ReprFlags,
7879
/// The seed to be used for randomizing a type's layout
7980
///
80-
/// Note: This could technically be a `[u8; 16]` (a `u128`) which would
81+
/// Note: This could technically be a `Hash128` which would
8182
/// be the "most accurate" hash as it'd encompass the item and crate
8283
/// hash without loss, but it does pay the price of being larger.
83-
/// Everything's a tradeoff, a `u64` seed should be sufficient for our
84+
/// Everything's a tradeoff, a 64-bit seed should be sufficient for our
8485
/// purposes (primarily `-Z randomize-layout`)
85-
pub field_shuffle_seed: u64,
86+
pub field_shuffle_seed: Hash64,
8687
}
8788

8889
impl ReprOptions {

compiler/rustc_codegen_llvm/src/common.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use crate::value::Value;
1010
use rustc_ast::Mutability;
1111
use rustc_codegen_ssa::mir::place::PlaceRef;
1212
use rustc_codegen_ssa::traits::*;
13-
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
13+
use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher};
1414
use rustc_hir::def_id::DefId;
1515
use rustc_middle::bug;
1616
use rustc_middle::mir::interpret::{ConstAllocation, GlobalAlloc, Scalar};
@@ -261,7 +261,7 @@ impl<'ll, 'tcx> ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> {
261261
let hash = self.tcx.with_stable_hashing_context(|mut hcx| {
262262
let mut hasher = StableHasher::new();
263263
alloc.hash_stable(&mut hcx, &mut hasher);
264-
hasher.finish::<u128>()
264+
hasher.finish::<Hash128>()
265265
});
266266
llvm::set_value_name(value, format!("alloc_{hash:032x}").as_bytes());
267267
}

compiler/rustc_codegen_llvm/src/debuginfo/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use rustc_codegen_ssa::debuginfo::type_names;
2121
use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext, VariableKind};
2222
use rustc_codegen_ssa::traits::*;
2323
use rustc_data_structures::fx::FxHashMap;
24+
use rustc_data_structures::stable_hasher::Hash128;
2425
use rustc_data_structures::sync::Lrc;
2526
use rustc_hir::def_id::{DefId, DefIdMap};
2627
use rustc_index::vec::IndexVec;
@@ -61,7 +62,7 @@ pub struct CodegenUnitDebugContext<'ll, 'tcx> {
6162
llcontext: &'ll llvm::Context,
6263
llmod: &'ll llvm::Module,
6364
builder: &'ll mut DIBuilder<'ll>,
64-
created_files: RefCell<FxHashMap<Option<(u128, SourceFileHash)>, &'ll DIFile>>,
65+
created_files: RefCell<FxHashMap<Option<(Hash128, SourceFileHash)>, &'ll DIFile>>,
6566

6667
type_map: metadata::TypeMap<'ll, 'tcx>,
6768
namespace_map: RefCell<DefIdMap<&'ll DIScope>>,

compiler/rustc_codegen_ssa/src/debuginfo/type_names.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// * `"` is treated as the start of a string.
1313

1414
use rustc_data_structures::fx::FxHashSet;
15-
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
15+
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
1616
use rustc_hir::def_id::DefId;
1717
use rustc_hir::definitions::{DefPathData, DefPathDataName, DisambiguatedDefPathData};
1818
use rustc_hir::{AsyncGeneratorKind, GeneratorKind, Mutability};
@@ -675,8 +675,7 @@ fn push_const_param<'tcx>(tcx: TyCtxt<'tcx>, ct: ty::Const<'tcx>, output: &mut S
675675
hcx.while_hashing_spans(false, |hcx| {
676676
ct.to_valtree().hash_stable(hcx, &mut hasher)
677677
});
678-
let hash: u64 = hasher.finish();
679-
hash
678+
hasher.finish::<Hash64>()
680679
});
681680

682681
if cpp_like_debuginfo(tcx) {

compiler/rustc_const_eval/src/interpret/intrinsics.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ pub(crate) fn eval_nullary_intrinsic<'tcx>(
7575
}
7676
sym::type_id => {
7777
ensure_monomorphic_enough(tcx, tp_ty)?;
78-
ConstValue::from_u64(tcx.type_id_hash(tp_ty))
78+
ConstValue::from_u64(tcx.type_id_hash(tp_ty).as_u64())
7979
}
8080
sym::variant_count => match tp_ty.kind() {
8181
// Correctly handles non-monomorphic calls, so there is no need for ensure_monomorphic_enough.

compiler/rustc_data_structures/src/fingerprint.rs

+30-13
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::stable_hasher;
1+
use crate::stable_hasher::{Hash64, StableHasher, StableHasherResult};
22
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
33
use std::hash::{Hash, Hasher};
44

@@ -9,32 +9,49 @@ mod tests;
99
#[repr(C)]
1010
pub struct Fingerprint(u64, u64);
1111

12-
impl Fingerprint {
13-
pub const ZERO: Fingerprint = Fingerprint(0, 0);
12+
pub trait FingerprintComponent {
13+
fn as_u64(&self) -> u64;
14+
}
1415

16+
impl FingerprintComponent for Hash64 {
1517
#[inline]
16-
pub fn new(_0: u64, _1: u64) -> Fingerprint {
17-
Fingerprint(_0, _1)
18+
fn as_u64(&self) -> u64 {
19+
Hash64::as_u64(*self)
20+
}
21+
}
22+
23+
impl FingerprintComponent for u64 {
24+
#[inline]
25+
fn as_u64(&self) -> u64 {
26+
*self
1827
}
28+
}
29+
30+
impl Fingerprint {
31+
pub const ZERO: Fingerprint = Fingerprint(0, 0);
1932

2033
#[inline]
21-
pub fn from_smaller_hash(hash: u64) -> Fingerprint {
22-
Fingerprint(hash, hash)
34+
pub fn new<A, B>(_0: A, _1: B) -> Fingerprint
35+
where
36+
A: FingerprintComponent,
37+
B: FingerprintComponent,
38+
{
39+
Fingerprint(_0.as_u64(), _1.as_u64())
2340
}
2441

2542
#[inline]
26-
pub fn to_smaller_hash(&self) -> u64 {
43+
pub fn to_smaller_hash(&self) -> Hash64 {
2744
// Even though both halves of the fingerprint are expected to be good
2845
// quality hash values, let's still combine the two values because the
2946
// Fingerprints in DefPathHash have the StableCrateId portion which is
3047
// the same for all DefPathHashes from the same crate. Combining the
3148
// two halves makes sure we get a good quality hash in such cases too.
32-
self.0.wrapping_mul(3).wrapping_add(self.1)
49+
Hash64::new(self.0.wrapping_mul(3).wrapping_add(self.1))
3350
}
3451

3552
#[inline]
36-
pub fn as_value(&self) -> (u64, u64) {
37-
(self.0, self.1)
53+
pub fn split(&self) -> (Hash64, Hash64) {
54+
(Hash64::new(self.0), Hash64::new(self.1))
3855
}
3956

4057
#[inline]
@@ -131,9 +148,9 @@ impl FingerprintHasher for crate::unhash::Unhasher {
131148
}
132149
}
133150

134-
impl stable_hasher::StableHasherResult for Fingerprint {
151+
impl StableHasherResult for Fingerprint {
135152
#[inline]
136-
fn finish(hasher: stable_hasher::StableHasher) -> Self {
153+
fn finish(hasher: StableHasher) -> Self {
137154
let (_0, _1) = hasher.finalize();
138155
Fingerprint(_0, _1)
139156
}

compiler/rustc_data_structures/src/fingerprint/tests.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
use super::*;
2+
use crate::stable_hasher::Hash64;
23

34
// Check that `combine_commutative` is order independent.
45
#[test]
56
fn combine_commutative_is_order_independent() {
6-
let a = Fingerprint::new(0xf6622fb349898b06, 0x70be9377b2f9c610);
7-
let b = Fingerprint::new(0xa9562bf5a2a5303c, 0x67d9b6c82034f13d);
8-
let c = Fingerprint::new(0x0d013a27811dbbc3, 0x9a3f7b3d9142ec43);
7+
let a = Fingerprint::new(Hash64::new(0xf6622fb349898b06), Hash64::new(0x70be9377b2f9c610));
8+
let b = Fingerprint::new(Hash64::new(0xa9562bf5a2a5303c), Hash64::new(0x67d9b6c82034f13d));
9+
let c = Fingerprint::new(Hash64::new(0x0d013a27811dbbc3), Hash64::new(0x9a3f7b3d9142ec43));
910
let permutations = [(a, b, c), (a, c, b), (b, a, c), (b, c, a), (c, a, b), (c, b, a)];
1011
let f = a.combine_commutative(b).combine_commutative(c);
1112
for p in &permutations {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
//! rustc encodes a lot of hashes. If hashes are stored as `u64` or `u128`, a `derive(Encodable)`
2+
//! will apply varint encoding to the hashes, which is less efficient than directly encoding the 8
3+
//! or 16 bytes of the hash.
4+
//!
5+
//! The types in this module represent 64-bit or 128-bit hashes produced by a `StableHasher`.
6+
//! `Hash64` and `Hash128` expose some utilty functions to encourage users to not extract the inner
7+
//! hash value as an integer type and accidentally apply varint encoding to it.
8+
//!
9+
//! In contrast with `Fingerprint`, users of these types cannot and should not attempt to construct
10+
//! and decompose these types into constitutent pieces. The point of these types is only to
11+
//! connect the fact that they can only be produced by a `StableHasher` to their
12+
//! `Encode`/`Decode` impls.
13+
14+
use crate::stable_hasher::{StableHasher, StableHasherResult};
15+
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
16+
use std::fmt;
17+
use std::ops::BitXorAssign;
18+
19+
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
20+
pub struct Hash64 {
21+
inner: u64,
22+
}
23+
24+
impl Hash64 {
25+
pub const ZERO: Hash64 = Hash64 { inner: 0 };
26+
27+
#[inline]
28+
pub(crate) fn new(n: u64) -> Self {
29+
Self { inner: n }
30+
}
31+
32+
#[inline]
33+
pub fn as_u64(self) -> u64 {
34+
self.inner
35+
}
36+
}
37+
38+
impl BitXorAssign<u64> for Hash64 {
39+
#[inline]
40+
fn bitxor_assign(&mut self, rhs: u64) {
41+
self.inner ^= rhs;
42+
}
43+
}
44+
45+
impl<S: Encoder> Encodable<S> for Hash64 {
46+
#[inline]
47+
fn encode(&self, s: &mut S) {
48+
s.emit_raw_bytes(&self.inner.to_le_bytes());
49+
}
50+
}
51+
52+
impl<D: Decoder> Decodable<D> for Hash64 {
53+
#[inline]
54+
fn decode(d: &mut D) -> Self {
55+
Self { inner: u64::from_le_bytes(d.read_raw_bytes(8).try_into().unwrap()) }
56+
}
57+
}
58+
59+
impl StableHasherResult for Hash64 {
60+
#[inline]
61+
fn finish(hasher: StableHasher) -> Self {
62+
Self { inner: hasher.finalize().0 }
63+
}
64+
}
65+
66+
impl fmt::Debug for Hash64 {
67+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
68+
self.inner.fmt(f)
69+
}
70+
}
71+
72+
impl fmt::LowerHex for Hash64 {
73+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
74+
fmt::LowerHex::fmt(&self.inner, f)
75+
}
76+
}
77+
78+
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
79+
pub struct Hash128 {
80+
inner: u128,
81+
}
82+
83+
impl Hash128 {
84+
#[inline]
85+
pub fn truncate(self) -> Hash64 {
86+
Hash64 { inner: self.inner as u64 }
87+
}
88+
89+
#[inline]
90+
pub fn wrapping_add(self, other: Self) -> Self {
91+
Self { inner: self.inner.wrapping_add(other.inner) }
92+
}
93+
94+
#[inline]
95+
pub fn as_u128(self) -> u128 {
96+
self.inner
97+
}
98+
}
99+
100+
impl<S: Encoder> Encodable<S> for Hash128 {
101+
#[inline]
102+
fn encode(&self, s: &mut S) {
103+
s.emit_raw_bytes(&self.inner.to_le_bytes());
104+
}
105+
}
106+
107+
impl<D: Decoder> Decodable<D> for Hash128 {
108+
#[inline]
109+
fn decode(d: &mut D) -> Self {
110+
Self { inner: u128::from_le_bytes(d.read_raw_bytes(16).try_into().unwrap()) }
111+
}
112+
}
113+
114+
impl StableHasherResult for Hash128 {
115+
#[inline]
116+
fn finish(hasher: StableHasher) -> Self {
117+
let (_0, _1) = hasher.finalize();
118+
Self { inner: u128::from(_0) | (u128::from(_1) << 64) }
119+
}
120+
}
121+
122+
impl fmt::Debug for Hash128 {
123+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124+
self.inner.fmt(f)
125+
}
126+
}
127+
128+
impl fmt::LowerHex for Hash128 {
129+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130+
fmt::LowerHex::fmt(&self.inner, f)
131+
}
132+
}

compiler/rustc_data_structures/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ pub mod work_queue;
8686
pub use atomic_ref::AtomicRef;
8787
pub mod aligned;
8888
pub mod frozen;
89+
mod hashes;
8990
pub mod owned_slice;
9091
pub mod sso;
9192
pub mod steal;

0 commit comments

Comments
 (0)