diff --git a/compiler/rustc_monomorphize/src/partitioning/merging.rs b/compiler/rustc_monomorphize/src/partitioning/merging.rs index 5c524a18454ec..d84427a27b0ed 100644 --- a/compiler/rustc_monomorphize/src/partitioning/merging.rs +++ b/compiler/rustc_monomorphize/src/partitioning/merging.rs @@ -20,8 +20,8 @@ pub fn merge_codegen_units<'tcx>( // We want this merging to produce a deterministic ordering of codegen units // from the input. // - // Due to basically how we've implemented the merging below (merge the two - // smallest into each other) we're sure to start off with a deterministic + // Due to basically how we've implemented the merging below (repeatedly + // merging adjacent pairs of CGUs) we're sure to start off with a deterministic // order (sorted by name). This'll mean that if two cgus have the same size // the stable sort below will keep everything nice and deterministic. codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str())); @@ -30,29 +30,26 @@ pub fn merge_codegen_units<'tcx>( let mut cgu_contents: FxHashMap> = codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect(); - // Merge the two smallest codegen units until the target size is reached. + // Repeatedly merge cgu[n] into cgu[n-1]. while codegen_units.len() > cx.target_cgu_count { - // Sort small cgus to the back + // njn: more comments about this. + // Sort small cgus to the back. At this point codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate())); - let mut smallest = codegen_units.pop().unwrap(); - let second_smallest = codegen_units.last_mut().unwrap(); + let mut cgu_n = codegen_units.swap_remove(cx.target_cgu_count); + let cgu_n_minus_1 = &mut codegen_units[cx.target_cgu_count - 1]; - // Move the mono-items from `smallest` to `second_smallest` - second_smallest.modify_size_estimate(smallest.size_estimate()); - for (k, v) in smallest.items_mut().drain() { - second_smallest.items_mut().insert(k, v); + // Move the mono-items from `cgu_n` to `cgu_n_minus_1` + cgu_n_minus_1.modify_size_estimate(cgu_n.size_estimate()); + for (k, v) in cgu_n.items_mut().drain() { + cgu_n_minus_1.items_mut().insert(k, v); } - // Record that `second_smallest` now contains all the stuff that was in - // `smallest` before. - let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap(); - cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names); + // Record that `cgu_n_minus_1` now contains all the stuff that was in + // `cgu_n` before. + let mut consumed_cgu_names = cgu_contents.remove(&cgu_n.name()).unwrap(); + cgu_contents.get_mut(&cgu_n_minus_1.name()).unwrap().append(&mut consumed_cgu_names); - debug!( - "CodegenUnit {} merged into CodegenUnit {}", - smallest.name(), - second_smallest.name() - ); + debug!("CodegenUnit {} merged into CodegenUnit {}", cgu_n.name(), cgu_n_minus_1.name()); } let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx); diff --git a/compiler/rustc_monomorphize/src/partitioning/mod.rs b/compiler/rustc_monomorphize/src/partitioning/mod.rs index c10180ee3f489..eafe57a0c0207 100644 --- a/compiler/rustc_monomorphize/src/partitioning/mod.rs +++ b/compiler/rustc_monomorphize/src/partitioning/mod.rs @@ -250,13 +250,13 @@ where cgu.create_size_estimate(tcx); } - debug_dump(tcx, "INITIAL PARTITIONING:", initial_partitioning.codegen_units.iter()); + debug_dump(tcx, "INITIAL PARTITIONING", &initial_partitioning.codegen_units); // Merge until we have at most `max_cgu_count` codegen units. { let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus"); partitioner.merge_codegen_units(cx, &mut initial_partitioning); - debug_dump(tcx, "POST MERGING:", initial_partitioning.codegen_units.iter()); + debug_dump(tcx, "POST MERGING", &initial_partitioning.codegen_units); } // In the next step, we use the inlining map to determine which additional @@ -272,7 +272,7 @@ where cgu.create_size_estimate(tcx); } - debug_dump(tcx, "POST INLINING:", post_inlining.codegen_units.iter()); + debug_dump(tcx, "POST INLINING", &post_inlining.codegen_units); // Next we try to make as many symbols "internal" as possible, so LLVM has // more freedom to optimize. @@ -322,6 +322,8 @@ where result.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str())); + debug_dump(tcx, "FINAL", &result); + result } @@ -346,33 +348,37 @@ struct PostInliningPartitioning<'tcx> { internalization_candidates: FxHashSet>, } -fn debug_dump<'a, 'tcx, I>(tcx: TyCtxt<'tcx>, label: &str, cgus: I) -where - I: Iterator>, - 'tcx: 'a, -{ +fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<'tcx>]) { let dump = move || { use std::fmt::Write; + let num_cgus = cgus.len(); + let max = cgus.iter().map(|cgu| cgu.size_estimate()).max().unwrap(); + let min = cgus.iter().map(|cgu| cgu.size_estimate()).min().unwrap(); + let ratio = max as f64 / min as f64; + let s = &mut String::new(); - let _ = writeln!(s, "{label}"); + let _ = writeln!( + s, + "{label} ({num_cgus} CodegenUnits, max={max}, min={min}, max/min={ratio:.1}):" + ); for cgu in cgus { let _ = - writeln!(s, "CodegenUnit {} estimated size {} :", cgu.name(), cgu.size_estimate()); + writeln!(s, "CodegenUnit {} estimated size {}:", cgu.name(), cgu.size_estimate()); for (mono_item, linkage) in cgu.items() { let symbol_name = mono_item.symbol_name(tcx).name; let symbol_hash_start = symbol_name.rfind('h'); let symbol_hash = symbol_hash_start.map_or("", |i| &symbol_name[i..]); - let _ = writeln!( + let _ = with_no_trimmed_paths!(writeln!( s, " - {} [{:?}] [{}] estimated size {}", mono_item, linkage, symbol_hash, mono_item.size_estimate(tcx) - ); + )); } let _ = writeln!(s);