Skip to content

Improve the CGU merging algorithm #111712

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 16 additions & 19 deletions compiler/rustc_monomorphize/src/partitioning/merging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ pub fn merge_codegen_units<'tcx>(
// We want this merging to produce a deterministic ordering of codegen units
// from the input.
//
// Due to basically how we've implemented the merging below (merge the two
// smallest into each other) we're sure to start off with a deterministic
// Due to basically how we've implemented the merging below (repeatedly
// merging adjacent pairs of CGUs) we're sure to start off with a deterministic
// order (sorted by name). This'll mean that if two cgus have the same size
// the stable sort below will keep everything nice and deterministic.
codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
Expand All @@ -30,29 +30,26 @@ pub fn merge_codegen_units<'tcx>(
let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();

// Merge the two smallest codegen units until the target size is reached.
// Repeatedly merge cgu[n] into cgu[n-1].
while codegen_units.len() > cx.target_cgu_count {
// Sort small cgus to the back
// njn: more comments about this.
// Sort small cgus to the back. At this point
codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
let mut smallest = codegen_units.pop().unwrap();
let second_smallest = codegen_units.last_mut().unwrap();
let mut cgu_n = codegen_units.swap_remove(cx.target_cgu_count);
let cgu_n_minus_1 = &mut codegen_units[cx.target_cgu_count - 1];

// Move the mono-items from `smallest` to `second_smallest`
second_smallest.modify_size_estimate(smallest.size_estimate());
for (k, v) in smallest.items_mut().drain() {
second_smallest.items_mut().insert(k, v);
// Move the mono-items from `cgu_n` to `cgu_n_minus_1`
cgu_n_minus_1.modify_size_estimate(cgu_n.size_estimate());
for (k, v) in cgu_n.items_mut().drain() {
cgu_n_minus_1.items_mut().insert(k, v);
}

// Record that `second_smallest` now contains all the stuff that was in
// `smallest` before.
let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);
// Record that `cgu_n_minus_1` now contains all the stuff that was in
// `cgu_n` before.
let mut consumed_cgu_names = cgu_contents.remove(&cgu_n.name()).unwrap();
cgu_contents.get_mut(&cgu_n_minus_1.name()).unwrap().append(&mut consumed_cgu_names);

debug!(
"CodegenUnit {} merged into CodegenUnit {}",
smallest.name(),
second_smallest.name()
);
debug!("CodegenUnit {} merged into CodegenUnit {}", cgu_n.name(), cgu_n_minus_1.name());
}

let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
Expand Down
30 changes: 18 additions & 12 deletions compiler/rustc_monomorphize/src/partitioning/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,13 +250,13 @@ where
cgu.create_size_estimate(tcx);
}

debug_dump(tcx, "INITIAL PARTITIONING:", initial_partitioning.codegen_units.iter());
debug_dump(tcx, "INITIAL PARTITIONING", &initial_partitioning.codegen_units);

// Merge until we have at most `max_cgu_count` codegen units.
{
let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus");
partitioner.merge_codegen_units(cx, &mut initial_partitioning);
debug_dump(tcx, "POST MERGING:", initial_partitioning.codegen_units.iter());
debug_dump(tcx, "POST MERGING", &initial_partitioning.codegen_units);
}

// In the next step, we use the inlining map to determine which additional
Expand All @@ -272,7 +272,7 @@ where
cgu.create_size_estimate(tcx);
}

debug_dump(tcx, "POST INLINING:", post_inlining.codegen_units.iter());
debug_dump(tcx, "POST INLINING", &post_inlining.codegen_units);

// Next we try to make as many symbols "internal" as possible, so LLVM has
// more freedom to optimize.
Expand Down Expand Up @@ -322,6 +322,8 @@ where

result.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));

debug_dump(tcx, "FINAL", &result);

result
}

Expand All @@ -346,33 +348,37 @@ struct PostInliningPartitioning<'tcx> {
internalization_candidates: FxHashSet<MonoItem<'tcx>>,
}

fn debug_dump<'a, 'tcx, I>(tcx: TyCtxt<'tcx>, label: &str, cgus: I)
where
I: Iterator<Item = &'a CodegenUnit<'tcx>>,
'tcx: 'a,
{
fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<'tcx>]) {
let dump = move || {
use std::fmt::Write;

let num_cgus = cgus.len();
let max = cgus.iter().map(|cgu| cgu.size_estimate()).max().unwrap();
let min = cgus.iter().map(|cgu| cgu.size_estimate()).min().unwrap();
let ratio = max as f64 / min as f64;

let s = &mut String::new();
let _ = writeln!(s, "{label}");
let _ = writeln!(
s,
"{label} ({num_cgus} CodegenUnits, max={max}, min={min}, max/min={ratio:.1}):"
);
for cgu in cgus {
let _ =
writeln!(s, "CodegenUnit {} estimated size {} :", cgu.name(), cgu.size_estimate());
writeln!(s, "CodegenUnit {} estimated size {}:", cgu.name(), cgu.size_estimate());

for (mono_item, linkage) in cgu.items() {
let symbol_name = mono_item.symbol_name(tcx).name;
let symbol_hash_start = symbol_name.rfind('h');
let symbol_hash = symbol_hash_start.map_or("<no hash>", |i| &symbol_name[i..]);

let _ = writeln!(
let _ = with_no_trimmed_paths!(writeln!(
s,
" - {} [{:?}] [{}] estimated size {}",
mono_item,
linkage,
symbol_hash,
mono_item.size_estimate(tcx)
);
));
}

let _ = writeln!(s);
Expand Down