Skip to content

Commit 2c7bc5e

Browse files
committed
Auto merge of rust-lang#87867 - bjorn3:unique_type_id_interner, r=wesleywiser
Use a separate interner type for UniqueTypeId Using symbol::Interner makes it very easy to mixup UniqueTypeId symbols with the global interner. In fact the Debug implementation of UniqueTypeId did exactly this. Using a separate interner type also avoids prefilling the interner with unused symbols and allow for optimizing the symbol interner for parallel access without negatively affecting the single threaded module codegen.
2 parents e846f9c + 8c7840e commit 2c7bc5e

File tree

5 files changed

+61
-11
lines changed

5 files changed

+61
-11
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -3654,6 +3654,7 @@ dependencies = [
36543654
"libc",
36553655
"measureme",
36563656
"rustc-demangle",
3657+
"rustc_arena",
36573658
"rustc_ast",
36583659
"rustc_attr",
36593660
"rustc_codegen_ssa",

compiler/rustc_codegen_llvm/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ snap = "1"
1616
tracing = "0.1"
1717
rustc_middle = { path = "../rustc_middle" }
1818
rustc-demangle = "0.1.21"
19+
rustc_arena = { path = "../rustc_arena" }
1920
rustc_attr = { path = "../rustc_attr" }
2021
rustc_codegen_ssa = { path = "../rustc_codegen_ssa" }
2122
rustc_data_structures = { path = "../rustc_data_structures" }

compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs

+54-9
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use rustc_middle::ty::Instance;
3434
use rustc_middle::ty::{self, AdtKind, GeneratorSubsts, ParamEnv, Ty, TyCtxt};
3535
use rustc_middle::{bug, span_bug};
3636
use rustc_session::config::{self, DebugInfo};
37-
use rustc_span::symbol::{Interner, Symbol};
37+
use rustc_span::symbol::Symbol;
3838
use rustc_span::FileNameDisplayPreference;
3939
use rustc_span::{self, SourceFile, SourceFileHash, Span};
4040
use rustc_target::abi::{Abi, Align, HasDataLayout, Integer, TagEncoding};
@@ -89,8 +89,54 @@ pub const UNKNOWN_COLUMN_NUMBER: c_uint = 0;
8989

9090
pub const NO_SCOPE_METADATA: Option<&DIScope> = None;
9191

92-
#[derive(Copy, Debug, Hash, Eq, PartialEq, Clone)]
93-
pub struct UniqueTypeId(Symbol);
92+
mod unique_type_id {
93+
use super::*;
94+
use rustc_arena::DroplessArena;
95+
96+
#[derive(Copy, Hash, Eq, PartialEq, Clone)]
97+
pub(super) struct UniqueTypeId(u32);
98+
99+
// The `&'static str`s in this type actually point into the arena.
100+
//
101+
// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278
102+
// found that to regress performance up to 2% in some cases. This might be
103+
// revisited after further improvements to `indexmap`.
104+
#[derive(Default)]
105+
pub(super) struct TypeIdInterner {
106+
arena: DroplessArena,
107+
names: FxHashMap<&'static str, UniqueTypeId>,
108+
strings: Vec<&'static str>,
109+
}
110+
111+
impl TypeIdInterner {
112+
#[inline]
113+
pub(super) fn intern(&mut self, string: &str) -> UniqueTypeId {
114+
if let Some(&name) = self.names.get(string) {
115+
return name;
116+
}
117+
118+
let name = UniqueTypeId(self.strings.len() as u32);
119+
120+
// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
121+
// UTF-8.
122+
let string: &str =
123+
unsafe { std::str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes())) };
124+
// It is safe to extend the arena allocation to `'static` because we only access
125+
// these while the arena is still alive.
126+
let string: &'static str = unsafe { &*(string as *const str) };
127+
self.strings.push(string);
128+
self.names.insert(string, name);
129+
name
130+
}
131+
132+
// Get the symbol as a string. `Symbol::as_str()` should be used in
133+
// preference to this function.
134+
pub(super) fn get(&self, symbol: UniqueTypeId) -> &str {
135+
self.strings[symbol.0 as usize]
136+
}
137+
}
138+
}
139+
use unique_type_id::*;
94140

95141
/// The `TypeMap` is where the `CrateDebugContext` holds the type metadata nodes
96142
/// created so far. The metadata nodes are indexed by `UniqueTypeId`, and, for
@@ -99,7 +145,7 @@ pub struct UniqueTypeId(Symbol);
99145
#[derive(Default)]
100146
pub struct TypeMap<'ll, 'tcx> {
101147
/// The `UniqueTypeId`s created so far.
102-
unique_id_interner: Interner,
148+
unique_id_interner: TypeIdInterner,
103149
/// A map from `UniqueTypeId` to debuginfo metadata for that type. This is a 1:1 mapping.
104150
unique_id_to_metadata: FxHashMap<UniqueTypeId, &'ll DIType>,
105151
/// A map from types to debuginfo metadata. This is an N:1 mapping.
@@ -166,8 +212,7 @@ impl TypeMap<'ll, 'tcx> {
166212
/// Gets the string representation of a `UniqueTypeId`. This method will fail if
167213
/// the ID is unknown.
168214
fn get_unique_type_id_as_string(&self, unique_type_id: UniqueTypeId) -> &str {
169-
let UniqueTypeId(interner_key) = unique_type_id;
170-
self.unique_id_interner.get(interner_key)
215+
self.unique_id_interner.get(unique_type_id)
171216
}
172217

173218
/// Gets the `UniqueTypeId` for the given type. If the `UniqueTypeId` for the given
@@ -197,9 +242,9 @@ impl TypeMap<'ll, 'tcx> {
197242
let unique_type_id = hasher.finish::<Fingerprint>().to_hex();
198243

199244
let key = self.unique_id_interner.intern(&unique_type_id);
200-
self.type_to_unique_id.insert(type_, UniqueTypeId(key));
245+
self.type_to_unique_id.insert(type_, key);
201246

202-
UniqueTypeId(key)
247+
key
203248
}
204249

205250
/// Gets the `UniqueTypeId` for an enum variant. Enum variants are not really
@@ -215,7 +260,7 @@ impl TypeMap<'ll, 'tcx> {
215260
let enum_variant_type_id =
216261
format!("{}::{}", self.get_unique_type_id_as_string(enum_type_id), variant_name);
217262
let interner_key = self.unique_id_interner.intern(&enum_variant_type_id);
218-
UniqueTypeId(interner_key)
263+
interner_key
219264
}
220265

221266
/// Gets the unique type ID string for an enum variant part.

compiler/rustc_macros/src/symbols.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
215215
}
216216

217217
impl Interner {
218-
pub fn fresh() -> Self {
218+
pub(crate) fn fresh() -> Self {
219219
Interner::prefill(&[
220220
#prefill_stream
221221
])

compiler/rustc_span/src/symbol.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -1701,8 +1701,11 @@ impl<CTX> ToStableHashKey<CTX> for Symbol {
17011701
// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278
17021702
// found that to regress performance up to 2% in some cases. This might be
17031703
// revisited after further improvements to `indexmap`.
1704+
//
1705+
// This type is private to prevent accidentally constructing more than one `Interner` on the same
1706+
// thread, which makes it easy to mixup `Symbol`s between `Interner`s.
17041707
#[derive(Default)]
1705-
pub struct Interner {
1708+
pub(crate) struct Interner {
17061709
arena: DroplessArena,
17071710
names: FxHashMap<&'static str, Symbol>,
17081711
strings: Vec<&'static str>,

0 commit comments

Comments
 (0)