Skip to content

Commit 960893c

Browse files
Store DefPathHash->DefIndex map in on-disk-hash-table format in crate metadata.
This encoding allows for random access without an expensive upfront decoding state which in turn allows simplifying the DefPathIndex lookup logic without regressing performance.
1 parent d0be27c commit 960893c

File tree

9 files changed

+131
-144
lines changed

9 files changed

+131
-144
lines changed

Diff for: compiler/rustc_metadata/src/creader.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ pub struct CStore {
4545

4646
/// This map is used to verify we get no hash conflicts between
4747
/// `StableCrateId` values.
48-
stable_crate_ids: FxHashMap<StableCrateId, CrateNum>,
48+
pub(crate) stable_crate_ids: FxHashMap<StableCrateId, CrateNum>,
4949

5050
/// Unused externs of the crate
5151
unused_externs: Vec<Symbol>,

Diff for: compiler/rustc_metadata/src/rmeta/decoder.rs

+18-57
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,8 @@ crate struct CrateMetadata {
9595
raw_proc_macros: Option<&'static [ProcMacro]>,
9696
/// Source maps for code from the crate.
9797
source_map_import_info: OnceCell<Vec<ImportedSourceFile>>,
98-
/// For every definition in this crate, maps its `DefPathHash` to its
99-
/// `DefIndex`. See `raw_def_id_to_def_id` for more details about how
100-
/// this is used.
101-
def_path_hash_map: OnceCell<UnhashMap<DefPathHash, DefIndex>>,
98+
/// For every definition in this crate, maps its `DefPathHash` to its `DefIndex`.
99+
def_path_hash_map: DefPathHashMap<'static>,
102100
/// Likewise for ExpnHash.
103101
expn_hash_map: OnceCell<UnhashMap<ExpnHash, ExpnIndex>>,
104102
/// Used for decoding interpret::AllocIds in a cached & thread-safe manner.
@@ -320,6 +318,11 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> {
320318
self.lazy_state = LazyState::Previous(NonZeroUsize::new(position + min_size).unwrap());
321319
Ok(Lazy::from_position_and_meta(NonZeroUsize::new(position).unwrap(), meta))
322320
}
321+
322+
#[inline]
323+
pub fn read_raw_bytes(&mut self, len: usize) -> &'a [u8] {
324+
self.opaque.read_raw_bytes(len)
325+
}
323326
}
324327

325328
impl<'a, 'tcx> TyDecoder<'tcx> for DecodeContext<'a, 'tcx> {
@@ -1596,58 +1599,6 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
15961599
.or_insert_with(|| self.root.tables.def_keys.get(self, index).unwrap().decode(self))
15971600
}
15981601

1599-
/// Finds the corresponding `DefId` for the provided `DefPathHash`, if it exists.
1600-
/// This is used by incremental compilation to map a serialized `DefPathHash` to
1601-
/// its `DefId` in the current session.
1602-
/// Normally, only one 'main' crate will change between incremental compilation sessions:
1603-
/// all dependencies will be completely unchanged. In this case, we can avoid
1604-
/// decoding every `DefPathHash` in the crate, since the `DefIndex` from the previous
1605-
/// session will still be valid. If our 'guess' is wrong (the `DefIndex` no longer exists,
1606-
/// or has a different `DefPathHash`, then we need to decode all `DefPathHashes` to determine
1607-
/// the correct mapping).
1608-
fn def_path_hash_to_def_id(
1609-
&self,
1610-
krate: CrateNum,
1611-
index_guess: u32,
1612-
hash: DefPathHash,
1613-
) -> Option<DefId> {
1614-
let def_index_guess = DefIndex::from_u32(index_guess);
1615-
let old_hash = self
1616-
.root
1617-
.tables
1618-
.def_path_hashes
1619-
.get(self, def_index_guess)
1620-
.map(|lazy| lazy.decode(self));
1621-
1622-
// Fast path: the definition and its index is unchanged from the
1623-
// previous compilation session. There is no need to decode anything
1624-
// else
1625-
if old_hash == Some(hash) {
1626-
return Some(DefId { krate, index: def_index_guess });
1627-
}
1628-
1629-
let is_proc_macro = self.is_proc_macro_crate();
1630-
1631-
// Slow path: We need to find out the new `DefIndex` of the provided
1632-
// `DefPathHash`, if its still exists. This requires decoding every `DefPathHash`
1633-
// stored in this crate.
1634-
let map = self.cdata.def_path_hash_map.get_or_init(|| {
1635-
let end_id = self.root.tables.def_path_hashes.size() as u32;
1636-
let mut map = UnhashMap::with_capacity_and_hasher(end_id as usize, Default::default());
1637-
for i in 0..end_id {
1638-
let def_index = DefIndex::from_u32(i);
1639-
// There may be gaps in the encoded table if we're decoding a proc-macro crate
1640-
if let Some(hash) = self.root.tables.def_path_hashes.get(self, def_index) {
1641-
map.insert(hash.decode(self), def_index);
1642-
} else if !is_proc_macro {
1643-
panic!("Missing def_path_hashes entry for {:?}", def_index);
1644-
}
1645-
}
1646-
map
1647-
});
1648-
map.get(&hash).map(|index| DefId { krate, index: *index })
1649-
}
1650-
16511602
// Returns the path leading to the thing with this `id`.
16521603
fn def_path(&self, id: DefIndex) -> DefPath {
16531604
debug!("def_path(cnum={:?}, id={:?})", self.cnum, id);
@@ -1670,6 +1621,11 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
16701621
self.def_path_hash_unlocked(index, &mut def_path_hashes)
16711622
}
16721623

1624+
#[inline]
1625+
fn def_path_hash_to_def_index(&self, hash: DefPathHash) -> Option<DefIndex> {
1626+
self.def_path_hash_map.def_path_hash_to_def_index(&hash)
1627+
}
1628+
16731629
fn expn_hash_to_expn_id(&self, index_guess: u32, hash: ExpnHash) -> ExpnId {
16741630
debug_assert_eq!(ExpnId::from_hash(hash), None);
16751631
let index_guess = ExpnIndex::from_u32(index_guess);
@@ -1936,13 +1892,18 @@ impl CrateMetadata {
19361892
let alloc_decoding_state =
19371893
AllocDecodingState::new(root.interpret_alloc_index.decode(&blob).collect());
19381894
let dependencies = Lock::new(cnum_map.iter().cloned().collect());
1895+
1896+
// Pre-decode the DefPathHash->DefIndex table. This is a cheap operation
1897+
// that does not copy any data. It just does some data verification.
1898+
let def_path_hash_map = root.def_path_hash_map.decode(&blob);
1899+
19391900
CrateMetadata {
19401901
blob,
19411902
root,
19421903
trait_impls,
19431904
raw_proc_macros,
19441905
source_map_import_info: OnceCell::new(),
1945-
def_path_hash_map: Default::default(),
1906+
def_path_hash_map,
19461907
expn_hash_map: Default::default(),
19471908
alloc_decoding_state,
19481909
cnum,

Diff for: compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,10 @@ impl CrateStore for CStore {
498498
self.get_crate_data(cnum).root.stable_crate_id
499499
}
500500

501+
fn stable_crate_id_to_crate_num(&self, stable_crate_id: StableCrateId) -> CrateNum {
502+
self.stable_crate_ids[&stable_crate_id]
503+
}
504+
501505
/// Returns the `DefKey` for a given `DefId`. This indicates the
502506
/// parent `DefId` as well as some idea of what kind of data the
503507
/// `DefId` refers to.
@@ -513,14 +517,10 @@ impl CrateStore for CStore {
513517
self.get_crate_data(def.krate).def_path_hash(def.index)
514518
}
515519

516-
// See `CrateMetadataRef::def_path_hash_to_def_id` for more details
517-
fn def_path_hash_to_def_id(
518-
&self,
519-
cnum: CrateNum,
520-
index_guess: u32,
521-
hash: DefPathHash,
522-
) -> Option<DefId> {
523-
self.get_crate_data(cnum).def_path_hash_to_def_id(cnum, index_guess, hash)
520+
fn def_path_hash_to_def_id(&self, cnum: CrateNum, hash: DefPathHash) -> Option<DefId> {
521+
self.get_crate_data(cnum)
522+
.def_path_hash_to_def_index(hash)
523+
.map(|index| DefId { krate: cnum, index })
524524
}
525525

526526
fn expn_hash_to_expn_id(&self, cnum: CrateNum, index_guess: u32, hash: ExpnHash) -> ExpnId {
+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
use crate::rmeta::DecodeContext;
2+
use crate::rmeta::EncodeContext;
3+
use crate::rmeta::MetadataBlob;
4+
use rustc_data_structures::owning_ref::OwningRef;
5+
use rustc_hir::def_path_hash_map::{
6+
Config as HashMapConfig, DefPathHashMap as DefPathHashMapInner,
7+
};
8+
use rustc_serialize::{opaque, Decodable, Decoder, Encodable, Encoder};
9+
use rustc_span::def_id::{DefIndex, DefPathHash};
10+
11+
crate enum DefPathHashMap<'tcx> {
12+
OwnedFromMetadata(odht::HashTable<HashMapConfig, OwningRef<MetadataBlob, [u8]>>),
13+
BorrowedFromTcx(&'tcx DefPathHashMapInner),
14+
}
15+
16+
impl DefPathHashMap<'tcx> {
17+
#[inline]
18+
pub fn def_path_hash_to_def_index(&self, def_path_hash: &DefPathHash) -> Option<DefIndex> {
19+
match *self {
20+
DefPathHashMap::OwnedFromMetadata(ref map) => map.get(def_path_hash),
21+
DefPathHashMap::BorrowedFromTcx(_) => {
22+
panic!("DefPathHashMap::BorrowedFromTcx variant only exists for serialization")
23+
}
24+
}
25+
}
26+
}
27+
28+
impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for DefPathHashMap<'tcx> {
29+
fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) -> opaque::EncodeResult {
30+
match *self {
31+
DefPathHashMap::BorrowedFromTcx(def_path_hash_map) => {
32+
let bytes = def_path_hash_map.raw_bytes();
33+
e.emit_usize(bytes.len())?;
34+
e.emit_raw_bytes(bytes)
35+
}
36+
DefPathHashMap::OwnedFromMetadata(_) => {
37+
panic!("DefPathHashMap::OwnedFromMetadata variant only exists for deserialization")
38+
}
39+
}
40+
}
41+
}
42+
43+
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefPathHashMap<'static> {
44+
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> Result<DefPathHashMap<'static>, String> {
45+
// Import TyDecoder so we can access the DecodeContext::position() method
46+
use crate::rustc_middle::ty::codec::TyDecoder;
47+
48+
let len = d.read_usize()?;
49+
let pos = d.position();
50+
let o = OwningRef::new(d.blob().clone()).map(|x| &x[pos..pos + len]);
51+
52+
// Although we already have the data we need via the OwningRef, we still need
53+
// to advance the DecodeContext's position so it's in a valid state after
54+
// the method. We use read_raw_bytes() for that.
55+
let _ = d.read_raw_bytes(len);
56+
57+
let inner = odht::HashTable::from_raw_bytes(o).map_err(|e| format!("{}", e))?;
58+
Ok(DefPathHashMap::OwnedFromMetadata(inner))
59+
}
60+
}

Diff for: compiler/rustc_metadata/src/rmeta/encoder.rs

+13
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::rmeta::def_path_hash_map::DefPathHashMap;
12
use crate::rmeta::table::{FixedSizeEncoding, TableBuilder};
23
use crate::rmeta::*;
34

@@ -472,6 +473,12 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
472473
}
473474
}
474475

476+
fn encode_def_path_hash_map(&mut self) -> Lazy<DefPathHashMap<'tcx>> {
477+
self.lazy(DefPathHashMap::BorrowedFromTcx(
478+
self.tcx.resolutions(()).definitions.def_path_hash_to_def_index_map(),
479+
))
480+
}
481+
475482
fn encode_source_map(&mut self) -> Lazy<[rustc_span::SourceFile]> {
476483
let source_map = self.tcx.sess.source_map();
477484
let all_source_files = source_map.files();
@@ -675,6 +682,10 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
675682
let (syntax_contexts, expn_data, expn_hashes) = self.encode_hygiene();
676683
let hygiene_bytes = self.position() - i;
677684

685+
i = self.position();
686+
let def_path_hash_map = self.encode_def_path_hash_map();
687+
let def_path_hash_map_bytes = self.position() - i;
688+
678689
// Encode source_map. This needs to be done last,
679690
// since encoding `Span`s tells us which `SourceFiles` we actually
680691
// need to encode.
@@ -722,6 +733,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
722733
syntax_contexts,
723734
expn_data,
724735
expn_hashes,
736+
def_path_hash_map,
725737
});
726738

727739
let total_bytes = self.position();
@@ -744,6 +756,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
744756
eprintln!(" impl bytes: {}", impl_bytes);
745757
eprintln!(" exp. symbols bytes: {}", exported_symbols_bytes);
746758
eprintln!(" def-path table bytes: {}", def_path_table_bytes);
759+
eprintln!(" def-path hashes bytes: {}", def_path_hash_map_bytes);
747760
eprintln!(" proc-macro-data-bytes: {}", proc_macro_data_bytes);
748761
eprintln!(" mir bytes: {}", mir_bytes);
749762
eprintln!(" item bytes: {}", item_bytes);

Diff for: compiler/rustc_metadata/src/rmeta/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use decoder::Metadata;
2+
use def_path_hash_map::DefPathHashMap;
23
use table::{Table, TableBuilder};
34

45
use rustc_ast::{self as ast, MacroDef};
@@ -35,6 +36,7 @@ use encoder::EncodeContext;
3536
use rustc_span::hygiene::SyntaxContextData;
3637

3738
mod decoder;
39+
mod def_path_hash_map;
3840
mod encoder;
3941
mod table;
4042

@@ -231,6 +233,8 @@ crate struct CrateRoot<'tcx> {
231233
expn_data: ExpnDataTable,
232234
expn_hashes: ExpnHashTable,
233235

236+
def_path_hash_map: Lazy<DefPathHashMap<'tcx>>,
237+
234238
source_map: Lazy<[rustc_span::SourceFile]>,
235239

236240
compiler_builtins: bool,

Diff for: compiler/rustc_middle/src/middle/cstore.rs

+2-6
Original file line numberDiff line numberDiff line change
@@ -199,14 +199,10 @@ pub trait CrateStore: std::fmt::Debug {
199199
// incr. comp. uses to identify a CrateNum.
200200
fn crate_name(&self, cnum: CrateNum) -> Symbol;
201201
fn stable_crate_id(&self, cnum: CrateNum) -> StableCrateId;
202+
fn stable_crate_id_to_crate_num(&self, stable_crate_id: StableCrateId) -> CrateNum;
202203

203204
/// Fetch a DefId from a DefPathHash for a foreign crate.
204-
fn def_path_hash_to_def_id(
205-
&self,
206-
cnum: CrateNum,
207-
index_guess: u32,
208-
hash: DefPathHash,
209-
) -> Option<DefId>;
205+
fn def_path_hash_to_def_id(&self, cnum: CrateNum, hash: DefPathHash) -> Option<DefId>;
210206
fn expn_hash_to_expn_id(&self, cnum: CrateNum, index_guess: u32, hash: ExpnHash) -> ExpnId;
211207

212208
// utility functions

Diff for: compiler/rustc_middle/src/ty/context.rs

+11
Original file line numberDiff line numberDiff line change
@@ -1316,6 +1316,17 @@ impl<'tcx> TyCtxt<'tcx> {
13161316
}
13171317
}
13181318

1319+
/// Maps a StableCrateId to the corresponding CrateNum. This method assumes
1320+
/// that the crate in question has already been loaded by the CrateStore.
1321+
#[inline]
1322+
pub fn stable_crate_id_to_crate_num(self, stable_crate_id: StableCrateId) -> CrateNum {
1323+
if stable_crate_id == self.sess.local_stable_crate_id() {
1324+
LOCAL_CRATE
1325+
} else {
1326+
self.untracked_resolutions.cstore.stable_crate_id_to_crate_num(stable_crate_id)
1327+
}
1328+
}
1329+
13191330
pub fn def_path_debug_str(self, def_id: DefId) -> String {
13201331
// We are explicitly not going through queries here in order to get
13211332
// crate name and stable crate id since this code is called from debug!()

0 commit comments

Comments
 (0)