diff --git a/compiler/rustc_monomorphize/src/partitioning/merging.rs b/compiler/rustc_monomorphize/src/partitioning/merging.rs
index 5c524a18454ec..d84427a27b0ed 100644
--- a/compiler/rustc_monomorphize/src/partitioning/merging.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/merging.rs
@@ -20,8 +20,8 @@ pub fn merge_codegen_units<'tcx>(
     // We want this merging to produce a deterministic ordering of codegen units
     // from the input.
     //
-    // Due to basically how we've implemented the merging below (merge the two
-    // smallest into each other) we're sure to start off with a deterministic
+    // Due to basically how we've implemented the merging below (repeatedly
+    // merging adjacent pairs of CGUs) we're sure to start off with a deterministic
     // order (sorted by name). This'll mean that if two cgus have the same size
     // the stable sort below will keep everything nice and deterministic.
     codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
@@ -30,29 +30,26 @@ pub fn merge_codegen_units<'tcx>(
     let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
         codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
 
-    // Merge the two smallest codegen units until the target size is reached.
+    // Repeatedly merge cgu[n] into cgu[n-1].
     while codegen_units.len() > cx.target_cgu_count {
-        // Sort small cgus to the back
+        // njn: more comments about this.
+        // Sort small cgus to the back. At this point
         codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
-        let mut smallest = codegen_units.pop().unwrap();
-        let second_smallest = codegen_units.last_mut().unwrap();
+        let mut cgu_n = codegen_units.swap_remove(cx.target_cgu_count);
+        let cgu_n_minus_1 = &mut codegen_units[cx.target_cgu_count - 1];
 
-        // Move the mono-items from `smallest` to `second_smallest`
-        second_smallest.modify_size_estimate(smallest.size_estimate());
-        for (k, v) in smallest.items_mut().drain() {
-            second_smallest.items_mut().insert(k, v);
+        // Move the mono-items from `cgu_n` to `cgu_n_minus_1`
+        cgu_n_minus_1.modify_size_estimate(cgu_n.size_estimate());
+        for (k, v) in cgu_n.items_mut().drain() {
+            cgu_n_minus_1.items_mut().insert(k, v);
         }
 
-        // Record that `second_smallest` now contains all the stuff that was in
-        // `smallest` before.
-        let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
-        cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);
+        // Record that `cgu_n_minus_1` now contains all the stuff that was in
+        // `cgu_n` before.
+        let mut consumed_cgu_names = cgu_contents.remove(&cgu_n.name()).unwrap();
+        cgu_contents.get_mut(&cgu_n_minus_1.name()).unwrap().append(&mut consumed_cgu_names);
 
-        debug!(
-            "CodegenUnit {} merged into CodegenUnit {}",
-            smallest.name(),
-            second_smallest.name()
-        );
+        debug!("CodegenUnit {} merged into CodegenUnit {}", cgu_n.name(), cgu_n_minus_1.name());
     }
 
     let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
diff --git a/compiler/rustc_monomorphize/src/partitioning/mod.rs b/compiler/rustc_monomorphize/src/partitioning/mod.rs
index c10180ee3f489..eafe57a0c0207 100644
--- a/compiler/rustc_monomorphize/src/partitioning/mod.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/mod.rs
@@ -250,13 +250,13 @@ where
         cgu.create_size_estimate(tcx);
     }
 
-    debug_dump(tcx, "INITIAL PARTITIONING:", initial_partitioning.codegen_units.iter());
+    debug_dump(tcx, "INITIAL PARTITIONING", &initial_partitioning.codegen_units);
 
     // Merge until we have at most `max_cgu_count` codegen units.
     {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus");
         partitioner.merge_codegen_units(cx, &mut initial_partitioning);
-        debug_dump(tcx, "POST MERGING:", initial_partitioning.codegen_units.iter());
+        debug_dump(tcx, "POST MERGING", &initial_partitioning.codegen_units);
     }
 
     // In the next step, we use the inlining map to determine which additional
@@ -272,7 +272,7 @@ where
         cgu.create_size_estimate(tcx);
     }
 
-    debug_dump(tcx, "POST INLINING:", post_inlining.codegen_units.iter());
+    debug_dump(tcx, "POST INLINING", &post_inlining.codegen_units);
 
     // Next we try to make as many symbols "internal" as possible, so LLVM has
     // more freedom to optimize.
@@ -322,6 +322,8 @@ where
 
     result.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
 
+    debug_dump(tcx, "FINAL", &result);
+
     result
 }
 
@@ -346,33 +348,37 @@ struct PostInliningPartitioning<'tcx> {
     internalization_candidates: FxHashSet<MonoItem<'tcx>>,
 }
 
-fn debug_dump<'a, 'tcx, I>(tcx: TyCtxt<'tcx>, label: &str, cgus: I)
-where
-    I: Iterator<Item = &'a CodegenUnit<'tcx>>,
-    'tcx: 'a,
-{
+fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<'tcx>]) {
     let dump = move || {
         use std::fmt::Write;
 
+        let num_cgus = cgus.len();
+        let max = cgus.iter().map(|cgu| cgu.size_estimate()).max().unwrap();
+        let min = cgus.iter().map(|cgu| cgu.size_estimate()).min().unwrap();
+        let ratio = max as f64 / min as f64;
+
         let s = &mut String::new();
-        let _ = writeln!(s, "{label}");
+        let _ = writeln!(
+            s,
+            "{label} ({num_cgus} CodegenUnits, max={max}, min={min}, max/min={ratio:.1}):"
+        );
         for cgu in cgus {
             let _ =
-                writeln!(s, "CodegenUnit {} estimated size {} :", cgu.name(), cgu.size_estimate());
+                writeln!(s, "CodegenUnit {} estimated size {}:", cgu.name(), cgu.size_estimate());
 
             for (mono_item, linkage) in cgu.items() {
                 let symbol_name = mono_item.symbol_name(tcx).name;
                 let symbol_hash_start = symbol_name.rfind('h');
                 let symbol_hash = symbol_hash_start.map_or("<no hash>", |i| &symbol_name[i..]);
 
-                let _ = writeln!(
+                let _ = with_no_trimmed_paths!(writeln!(
                     s,
                     " - {} [{:?}] [{}] estimated size {}",
                     mono_item,
                     linkage,
                     symbol_hash,
                     mono_item.size_estimate(tcx)
-                );
+                ));
             }
 
             let _ = writeln!(s);