diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs
index ff54ec56a29ac..e7aeb877873db 100644
--- a/compiler/rustc_middle/src/mir/mono.rs
+++ b/compiler/rustc_middle/src/mir/mono.rs
@@ -3,7 +3,7 @@ use crate::ty::{subst::InternalSubsts, Instance, InstanceDef, SymbolName, TyCtxt
 use rustc_attr::InlineAttr;
 use rustc_data_structures::base_n;
 use rustc_data_structures::fingerprint::Fingerprint;
-use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
 use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher};
 use rustc_hir::def_id::{CrateNum, DefId, LOCAL_CRATE};
 use rustc_hir::ItemId;
@@ -230,7 +230,7 @@ pub struct CodegenUnit<'tcx> {
     /// contain something unique to this crate (e.g., a module path)
     /// as well as the crate name and disambiguator.
     name: Symbol,
-    items: FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)>,
+    items: FxIndexMap<MonoItem<'tcx>, (Linkage, Visibility)>,
     size_estimate: Option<usize>,
     primary: bool,
     /// True if this is CGU is used to hold code coverage information for dead code,
@@ -291,11 +291,11 @@ impl<'tcx> CodegenUnit<'tcx> {
         self.primary = true;
     }
 
-    pub fn items(&self) -> &FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)> {
+    pub fn items(&self) -> &FxIndexMap<MonoItem<'tcx>, (Linkage, Visibility)> {
         &self.items
     }
 
-    pub fn items_mut(&mut self) -> &mut FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)> {
+    pub fn items_mut(&mut self) -> &mut FxIndexMap<MonoItem<'tcx>, (Linkage, Visibility)> {
         &mut self.items
     }
 
@@ -333,13 +333,22 @@ impl<'tcx> CodegenUnit<'tcx> {
             .expect("create_size_estimate must be called before getting a size_estimate")
     }
 
-    pub fn modify_size_estimate(&mut self, delta: usize) {
+    pub fn increase_size_estimate(&mut self, delta: usize) {
+        // njn: make this nicer, with as_mut().expect()
         assert!(self.size_estimate.is_some());
         if let Some(size_estimate) = self.size_estimate {
             self.size_estimate = Some(size_estimate + delta);
         }
     }
 
+    pub fn decrease_size_estimate(&mut self, delta: usize) {
+        // njn: make this nicer, with as_mut().expect()
+        assert!(self.size_estimate.is_some());
+        if let Some(size_estimate) = self.size_estimate {
+            self.size_estimate = Some(size_estimate - delta);
+        }
+    }
+
     pub fn contains_item(&self, item: &MonoItem<'tcx>) -> bool {
         self.items().contains_key(item)
     }
@@ -355,6 +364,47 @@ impl<'tcx> CodegenUnit<'tcx> {
             .unwrap_or_else(|| panic!("Could not find work-product for CGU `{}`", self.name()))
     }
 
+    // njn: dups code in items_in_deterministic_order
+    pub fn sort_items(&mut self, tcx: TyCtxt<'tcx>) {
+        // The codegen tests rely on items being process in the same order as
+        // they appear in the file, so for local items, we sort by node_id first
+        #[derive(PartialEq, Eq, PartialOrd, Ord)]
+        pub struct ItemSortKey<'tcx>(Option<usize>, SymbolName<'tcx>);
+
+        fn item_sort_key<'tcx>(tcx: TyCtxt<'tcx>, item: MonoItem<'tcx>) -> ItemSortKey<'tcx> {
+            ItemSortKey(
+                match item {
+                    MonoItem::Fn(ref instance) => {
+                        match instance.def {
+                            // We only want to take HirIds of user-defined
+                            // instances into account. The others don't matter for
+                            // the codegen tests and can even make item order
+                            // unstable.
+                            InstanceDef::Item(def) => def.as_local().map(Idx::index),
+                            InstanceDef::VTableShim(..)
+                            | InstanceDef::ReifyShim(..)
+                            | InstanceDef::Intrinsic(..)
+                            | InstanceDef::FnPtrShim(..)
+                            | InstanceDef::Virtual(..)
+                            | InstanceDef::ClosureOnceShim { .. }
+                            | InstanceDef::DropGlue(..)
+                            | InstanceDef::CloneShim(..)
+                            | InstanceDef::ThreadLocalShim(..)
+                            | InstanceDef::FnPtrAddrShim(..) => None,
+                        }
+                    }
+                    MonoItem::Static(def_id) => def_id.as_local().map(Idx::index),
+                    MonoItem::GlobalAsm(item_id) => Some(item_id.owner_id.def_id.index()),
+                },
+                item.symbol_name(tcx),
+            )
+        }
+
+        self.items_mut().sort_by(|&i1, _, &i2, _| {
+            std::cmp::Ord::cmp(&item_sort_key(tcx, i1), &item_sort_key(tcx, i2))
+        });
+    }
+
     pub fn items_in_deterministic_order(
         &self,
         tcx: TyCtxt<'tcx>,
diff --git a/compiler/rustc_monomorphize/src/lib.rs b/compiler/rustc_monomorphize/src/lib.rs
index ecc50c3f664fd..8221a8a970f28 100644
--- a/compiler/rustc_monomorphize/src/lib.rs
+++ b/compiler/rustc_monomorphize/src/lib.rs
@@ -1,4 +1,5 @@
 #![feature(array_windows)]
+#![feature(hash_drain_filter)]
 #![recursion_limit = "256"]
 #![allow(rustc::potential_query_instability)]
 #![deny(rustc::untranslatable_diagnostic)]
diff --git a/compiler/rustc_monomorphize/src/partitioning/default.rs b/compiler/rustc_monomorphize/src/partitioning/default.rs
index 37b7f6bf8a8fc..23a68614d46d1 100644
--- a/compiler/rustc_monomorphize/src/partitioning/default.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/default.rs
@@ -1,3 +1,4 @@
+use std::cmp;
 use std::collections::hash_map::Entry;
 
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
@@ -14,10 +15,7 @@ use rustc_span::symbol::Symbol;
 
 use super::PartitioningCx;
 use crate::collector::InliningMap;
-use crate::partitioning::merging;
-use crate::partitioning::{
-    MonoItemPlacement, Partition, PostInliningPartitioning, PreInliningPartitioning,
-};
+use crate::partitioning::{MonoItemPlacement, Partition};
 
 pub struct DefaultPartitioning;
 
@@ -26,7 +24,7 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
         mono_items: &mut I,
-    ) -> PreInliningPartitioning<'tcx>
+    ) -> (Vec<CodegenUnit<'tcx>>, FxHashSet<MonoItem<'tcx>>, FxHashSet<MonoItem<'tcx>>)
     where
         I: Iterator<Item = MonoItem<'tcx>>,
     {
@@ -91,38 +89,190 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
             codegen_units.insert(codegen_unit_name, CodegenUnit::new(codegen_unit_name));
         }
 
-        PreInliningPartitioning {
-            codegen_units: codegen_units.into_values().collect(),
-            roots,
-            internalization_candidates,
-        }
+        (codegen_units.into_values().collect(), roots, internalization_candidates)
     }
 
     fn merge_codegen_units(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: &mut PreInliningPartitioning<'tcx>,
+        codegen_units: &mut Vec<CodegenUnit<'tcx>>,
     ) {
-        merging::merge_codegen_units(cx, initial_partitioning);
+        assert!(cx.target_cgu_count >= 1);
+
+        // Note that at this point in time the `codegen_units` here may not be
+        // in a deterministic order (but we know they're deterministically the
+        // same set). We want this merging to produce a deterministic ordering
+        // of codegen units from the input.
+        //
+        // Due to basically how we've implemented the merging below (repeatedly
+        // merging adjacent pairs of CGUs) we're sure to start off with a
+        // deterministic deterministic order (sorted by name). This'll mean that
+        // if two cgus have the same size the stable sort below will keep
+        // everything nice and deterministic.
+        codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
+
+        //---------------------------------------------------------------------------
+        // njn: split big CGUs if necessary
+        if codegen_units.len() > cx.target_cgu_count {
+            // njn: type ann?
+            let total_size: usize = codegen_units.iter().map(|cgu| cgu.size_estimate()).sum();
+            let target_size = total_size / cx.target_cgu_count;
+            //eprintln!("----");
+            //eprintln!("SPLIT0: total:{} target:{}", total_size, target_size);
+            // njn: need a while loop because we're modifying codegen_units as we go
+            // njn: make it a for loop?
+            // njn: explain all this
+            let mut i = 0;
+            let mut j = 0; // njn: explain
+            let n = codegen_units.len();
+            while i < n {
+                let old_cgu = &mut codegen_units[i];
+                if old_cgu.size_estimate() > target_size && old_cgu.items().len() > 1 {
+                    //eprintln!("SPLIT1: old:{} old:{}", old_cgu.size_estimate(), old_cgu.name());
+
+                    // njn: too big; split
+                    // njn: explain how a very big CGU will be split multiple
+                    // times
+
+                    let mut new_name = old_cgu.name().to_string();
+                    new_name += &format!("-split{}", j);
+                    let mut new_cgu = CodegenUnit::new(Symbol::intern(&new_name));
+                    new_cgu.create_size_estimate(cx.tcx); // initially zero
+
+                    // njn: size stuff is a bit clumsy
+                    let mut moved_size = 0;
+
+                    // njn: what if this empties old_cgu?
+
+                    // njn: non-deterministic iteration results in
+                    // non-deterministic splitting, which messes up incremental
+                    // compilation
+
+                    old_cgu.sort_items(cx.tcx);
+                    while moved_size < target_size {
+                        let (item, rest) = old_cgu.items_mut().pop().unwrap();
+                        moved_size += item.size_estimate(cx.tcx);
+                        new_cgu.items_mut().insert(item, rest);
+                    }
+
+                    // njn: nicer way to do this?
+                    // njn: don't move if it's the last item
+                    //old_cgu.items_mut().drain_filter(|item, rest| {
+                    //    // njn: true->remove
+                    //    if moved_size < target_size {
+                    //        let item_size = item.size_estimate(cx.tcx);
+                    //        //eprintln!("MOVE: {}", item_size);
+                    //        moved_size += item_size;
+                    //        new_cgu.items_mut().insert(*item, *rest);
+                    //        true
+                    //    } else {
+                    //        false
+                    //    }
+                    //});
+                    new_cgu.increase_size_estimate(moved_size);
+                    old_cgu.decrease_size_estimate(moved_size);
+
+                    //eprintln!("SPLIT2: old:{} -> new:{} new:{}", old_cgu.size_estimate(), new_cgu.size_estimate(), new_cgu.name());
+
+                    codegen_units.push(new_cgu);
+                    // njn: explain lack of `i += 1`;
+                    j += 1;
+                } else {
+                    // njn: explain this
+                    i += 1;
+                    j = 0;
+                }
+            }
+        }
+        //---------------------------------------------------------------------------
+
+        // This map keeps track of what got merged into what.
+        let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
+            codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
+
+        // Repeatedly merge cgu[n] into cgu[n-1].
+        while codegen_units.len() > cx.target_cgu_count {
+            // njn: more comments about this.
+            // Sort small cgus to the back. At this point... njn: more
+            codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
+            let mut cgu_n = codegen_units.swap_remove(cx.target_cgu_count);
+            let cgu_n_minus_1 = &mut codegen_units[cx.target_cgu_count - 1];
+
+            // Move the mono-items from `cgu_n` to `cgu_n_minus_1`
+            cgu_n_minus_1.increase_size_estimate(cgu_n.size_estimate());
+            for (k, v) in cgu_n.items_mut().drain(..) {
+                cgu_n_minus_1.items_mut().insert(k, v);
+            }
+
+            // Record that `cgu_n_minus_1` now contains all the stuff that was in
+            // `cgu_n` before.
+            let mut consumed_cgu_names = cgu_contents.remove(&cgu_n.name()).unwrap();
+            cgu_contents.get_mut(&cgu_n_minus_1.name()).unwrap().append(&mut consumed_cgu_names);
+
+            debug!("CodegenUnit {} merged into CodegenUnit {}", cgu_n.name(), cgu_n_minus_1.name());
+        }
+
+        let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
+
+        if cx.tcx.sess.opts.incremental.is_some() {
+            // If we are doing incremental compilation, we want CGU names to
+            // reflect the path of the source level module they correspond to.
+            // For CGUs that contain the code of multiple modules because of the
+            // merging done above, we use a concatenation of the names of all
+            // contained CGUs.
+            let new_cgu_names: FxHashMap<Symbol, String> = cgu_contents
+                .into_iter()
+                // This `filter` makes sure we only update the name of CGUs that
+                // were actually modified by merging.
+                .filter(|(_, cgu_contents)| cgu_contents.len() > 1)
+                .map(|(current_cgu_name, cgu_contents)| {
+                    let mut cgu_contents: Vec<&str> =
+                        cgu_contents.iter().map(|s| s.as_str()).collect();
+
+                    // Sort the names, so things are deterministic and easy to
+                    // predict. We are sorting primitive `&str`s here so we can
+                    // use unstable sort.
+                    cgu_contents.sort_unstable();
+
+                    (current_cgu_name, cgu_contents.join("--"))
+                })
+                .collect();
+
+            for cgu in codegen_units.iter_mut() {
+                if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) {
+                    if cx.tcx.sess.opts.unstable_opts.human_readable_cgu_names {
+                        cgu.set_name(Symbol::intern(&new_cgu_name));
+                    } else {
+                        // If we don't require CGU names to be human-readable,
+                        // we use a fixed length hash of the composite CGU name
+                        // instead.
+                        let new_cgu_name = CodegenUnit::mangle_name(&new_cgu_name);
+                        cgu.set_name(Symbol::intern(&new_cgu_name));
+                    }
+                }
+            }
+        } else {
+            // If we are compiling non-incrementally we just generate simple CGU
+            // names containing an index.
+            for (index, cgu) in codegen_units.iter_mut().enumerate() {
+                let numbered_codegen_unit_name =
+                    cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index));
+                cgu.set_name(numbered_codegen_unit_name);
+            }
+        }
     }
 
     fn place_inlined_mono_items(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: PreInliningPartitioning<'tcx>,
-    ) -> PostInliningPartitioning<'tcx> {
-        let mut new_partitioning = Vec::new();
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        roots: FxHashSet<MonoItem<'tcx>>,
+    ) -> FxHashMap<MonoItem<'tcx>, MonoItemPlacement> {
         let mut mono_item_placements = FxHashMap::default();
 
-        let PreInliningPartitioning {
-            codegen_units: initial_cgus,
-            roots,
-            internalization_candidates,
-        } = initial_partitioning;
+        let single_codegen_unit = codegen_units.len() == 1;
 
-        let single_codegen_unit = initial_cgus.len() == 1;
-
-        for old_codegen_unit in initial_cgus {
+        for old_codegen_unit in codegen_units.iter_mut() {
             // Collect all items that need to be available in this codegen unit.
             let mut reachable = FxHashSet::default();
             for root in old_codegen_unit.items().keys() {
@@ -174,14 +324,10 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
                 }
             }
 
-            new_partitioning.push(new_codegen_unit);
+            *old_codegen_unit = new_codegen_unit;
         }
 
-        return PostInliningPartitioning {
-            codegen_units: new_partitioning,
-            mono_item_placements,
-            internalization_candidates,
-        };
+        return mono_item_placements;
 
         fn follow_inlining<'tcx>(
             mono_item: MonoItem<'tcx>,
@@ -201,14 +347,16 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
     fn internalize_symbols(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        partitioning: &mut PostInliningPartitioning<'tcx>,
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        mono_item_placements: FxHashMap<MonoItem<'tcx>, MonoItemPlacement>,
+        internalization_candidates: FxHashSet<MonoItem<'tcx>>,
     ) {
-        if partitioning.codegen_units.len() == 1 {
+        if codegen_units.len() == 1 {
             // Fast path for when there is only one codegen unit. In this case we
             // can internalize all candidates, since there is nowhere else they
             // could be accessed from.
-            for cgu in &mut partitioning.codegen_units {
-                for candidate in &partitioning.internalization_candidates {
+            for cgu in codegen_units {
+                for candidate in &internalization_candidates {
                     cgu.items_mut().insert(*candidate, (Linkage::Internal, Visibility::Default));
                 }
             }
@@ -225,15 +373,13 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
             }
         });
 
-        let mono_item_placements = &partitioning.mono_item_placements;
-
         // For each internalization candidates in each codegen unit, check if it is
         // accessed from outside its defining codegen unit.
-        for cgu in &mut partitioning.codegen_units {
+        for cgu in codegen_units {
             let home_cgu = MonoItemPlacement::SingleCgu { cgu_name: cgu.name() };
 
             for (accessee, linkage_and_visibility) in cgu.items_mut() {
-                if !partitioning.internalization_candidates.contains(accessee) {
+                if !internalization_candidates.contains(accessee) {
                     // This item is no candidate for internalizing, so skip it.
                     continue;
                 }
diff --git a/compiler/rustc_monomorphize/src/partitioning/merging.rs b/compiler/rustc_monomorphize/src/partitioning/merging.rs
deleted file mode 100644
index 5c524a18454ec..0000000000000
--- a/compiler/rustc_monomorphize/src/partitioning/merging.rs
+++ /dev/null
@@ -1,111 +0,0 @@
-use std::cmp;
-
-use rustc_data_structures::fx::FxHashMap;
-use rustc_hir::def_id::LOCAL_CRATE;
-use rustc_middle::mir::mono::{CodegenUnit, CodegenUnitNameBuilder};
-use rustc_span::symbol::Symbol;
-
-use super::PartitioningCx;
-use crate::partitioning::PreInliningPartitioning;
-
-pub fn merge_codegen_units<'tcx>(
-    cx: &PartitioningCx<'_, 'tcx>,
-    initial_partitioning: &mut PreInliningPartitioning<'tcx>,
-) {
-    assert!(cx.target_cgu_count >= 1);
-    let codegen_units = &mut initial_partitioning.codegen_units;
-
-    // Note that at this point in time the `codegen_units` here may not be in a
-    // deterministic order (but we know they're deterministically the same set).
-    // We want this merging to produce a deterministic ordering of codegen units
-    // from the input.
-    //
-    // Due to basically how we've implemented the merging below (merge the two
-    // smallest into each other) we're sure to start off with a deterministic
-    // order (sorted by name). This'll mean that if two cgus have the same size
-    // the stable sort below will keep everything nice and deterministic.
-    codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
-
-    // This map keeps track of what got merged into what.
-    let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
-        codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
-
-    // Merge the two smallest codegen units until the target size is reached.
-    while codegen_units.len() > cx.target_cgu_count {
-        // Sort small cgus to the back
-        codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
-        let mut smallest = codegen_units.pop().unwrap();
-        let second_smallest = codegen_units.last_mut().unwrap();
-
-        // Move the mono-items from `smallest` to `second_smallest`
-        second_smallest.modify_size_estimate(smallest.size_estimate());
-        for (k, v) in smallest.items_mut().drain() {
-            second_smallest.items_mut().insert(k, v);
-        }
-
-        // Record that `second_smallest` now contains all the stuff that was in
-        // `smallest` before.
-        let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
-        cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);
-
-        debug!(
-            "CodegenUnit {} merged into CodegenUnit {}",
-            smallest.name(),
-            second_smallest.name()
-        );
-    }
-
-    let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
-
-    if cx.tcx.sess.opts.incremental.is_some() {
-        // If we are doing incremental compilation, we want CGU names to
-        // reflect the path of the source level module they correspond to.
-        // For CGUs that contain the code of multiple modules because of the
-        // merging done above, we use a concatenation of the names of
-        // all contained CGUs.
-        let new_cgu_names: FxHashMap<Symbol, String> = cgu_contents
-            .into_iter()
-            // This `filter` makes sure we only update the name of CGUs that
-            // were actually modified by merging.
-            .filter(|(_, cgu_contents)| cgu_contents.len() > 1)
-            .map(|(current_cgu_name, cgu_contents)| {
-                let mut cgu_contents: Vec<&str> = cgu_contents.iter().map(|s| s.as_str()).collect();
-
-                // Sort the names, so things are deterministic and easy to
-                // predict.
-
-                // We are sorting primitive &strs here so we can use unstable sort
-                cgu_contents.sort_unstable();
-
-                (current_cgu_name, cgu_contents.join("--"))
-            })
-            .collect();
-
-        for cgu in codegen_units.iter_mut() {
-            if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) {
-                if cx.tcx.sess.opts.unstable_opts.human_readable_cgu_names {
-                    cgu.set_name(Symbol::intern(&new_cgu_name));
-                } else {
-                    // If we don't require CGU names to be human-readable, we
-                    // use a fixed length hash of the composite CGU name
-                    // instead.
-                    let new_cgu_name = CodegenUnit::mangle_name(&new_cgu_name);
-                    cgu.set_name(Symbol::intern(&new_cgu_name));
-                }
-            }
-        }
-    } else {
-        // If we are compiling non-incrementally we just generate simple CGU
-        // names containing an index.
-        for (index, cgu) in codegen_units.iter_mut().enumerate() {
-            cgu.set_name(numbered_codegen_unit_name(cgu_name_builder, index));
-        }
-    }
-}
-
-fn numbered_codegen_unit_name(
-    name_builder: &mut CodegenUnitNameBuilder<'_>,
-    index: usize,
-) -> Symbol {
-    name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index))
-}
diff --git a/compiler/rustc_monomorphize/src/partitioning/mod.rs b/compiler/rustc_monomorphize/src/partitioning/mod.rs
index eafe57a0c0207..25c10df18a18c 100644
--- a/compiler/rustc_monomorphize/src/partitioning/mod.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/mod.rs
@@ -93,7 +93,6 @@
 //! inlining, even when they are not marked `#[inline]`.
 
 mod default;
-mod merging;
 
 use std::cmp;
 use std::fs::{self, File};
@@ -129,7 +128,7 @@ impl<'tcx> Partition<'tcx> for Partitioner {
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
         mono_items: &mut I,
-    ) -> PreInliningPartitioning<'tcx>
+    ) -> (Vec<CodegenUnit<'tcx>>, FxHashSet<MonoItem<'tcx>>, FxHashSet<MonoItem<'tcx>>)
     where
         I: Iterator<Item = MonoItem<'tcx>>,
     {
@@ -142,12 +141,10 @@ impl<'tcx> Partition<'tcx> for Partitioner {
     fn merge_codegen_units(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: &mut PreInliningPartitioning<'tcx>,
+        codegen_units: &mut Vec<CodegenUnit<'tcx>>,
     ) {
         match self {
-            Partitioner::Default(partitioner) => {
-                partitioner.merge_codegen_units(cx, initial_partitioning)
-            }
+            Partitioner::Default(partitioner) => partitioner.merge_codegen_units(cx, codegen_units),
             Partitioner::Unknown => cx.tcx.sess.emit_fatal(UnknownPartitionStrategy),
         }
     }
@@ -155,11 +152,12 @@ impl<'tcx> Partition<'tcx> for Partitioner {
     fn place_inlined_mono_items(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: PreInliningPartitioning<'tcx>,
-    ) -> PostInliningPartitioning<'tcx> {
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        roots: FxHashSet<MonoItem<'tcx>>,
+    ) -> FxHashMap<MonoItem<'tcx>, MonoItemPlacement> {
         match self {
             Partitioner::Default(partitioner) => {
-                partitioner.place_inlined_mono_items(cx, initial_partitioning)
+                partitioner.place_inlined_mono_items(cx, codegen_units, roots)
             }
             Partitioner::Unknown => cx.tcx.sess.emit_fatal(UnknownPartitionStrategy),
         }
@@ -168,12 +166,17 @@ impl<'tcx> Partition<'tcx> for Partitioner {
     fn internalize_symbols(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        post_inlining_partitioning: &mut PostInliningPartitioning<'tcx>,
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        mono_item_placements: FxHashMap<MonoItem<'tcx>, MonoItemPlacement>,
+        internalization_candidates: FxHashSet<MonoItem<'tcx>>,
     ) {
         match self {
-            Partitioner::Default(partitioner) => {
-                partitioner.internalize_symbols(cx, post_inlining_partitioning)
-            }
+            Partitioner::Default(partitioner) => partitioner.internalize_symbols(
+                cx,
+                codegen_units,
+                mono_item_placements,
+                internalization_candidates,
+            ),
             Partitioner::Unknown => cx.tcx.sess.emit_fatal(UnknownPartitionStrategy),
         }
     }
@@ -190,26 +193,29 @@ trait Partition<'tcx> {
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
         mono_items: &mut I,
-    ) -> PreInliningPartitioning<'tcx>
+    ) -> (Vec<CodegenUnit<'tcx>>, FxHashSet<MonoItem<'tcx>>, FxHashSet<MonoItem<'tcx>>)
     where
         I: Iterator<Item = MonoItem<'tcx>>;
 
     fn merge_codegen_units(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: &mut PreInliningPartitioning<'tcx>,
+        codegen_units: &mut Vec<CodegenUnit<'tcx>>,
     );
 
     fn place_inlined_mono_items(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: PreInliningPartitioning<'tcx>,
-    ) -> PostInliningPartitioning<'tcx>;
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        roots: FxHashSet<MonoItem<'tcx>>,
+    ) -> FxHashMap<MonoItem<'tcx>, MonoItemPlacement>;
 
     fn internalize_symbols(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        partitioning: &mut PostInliningPartitioning<'tcx>,
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        mono_item_placements: FxHashMap<MonoItem<'tcx>, MonoItemPlacement>,
+        internalization_candidates: FxHashSet<MonoItem<'tcx>>,
     );
 }
 
@@ -241,44 +247,51 @@ where
     // In the first step, we place all regular monomorphizations into their
     // respective 'home' codegen unit. Regular monomorphizations are all
     // functions and statics defined in the local crate.
-    let mut initial_partitioning = {
+    let (mut codegen_units, roots, internalization_candidates) = {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_place_roots");
         partitioner.place_root_mono_items(cx, mono_items)
     };
 
-    for cgu in &mut initial_partitioning.codegen_units {
+    for cgu in &mut codegen_units {
         cgu.create_size_estimate(tcx);
     }
 
-    debug_dump(tcx, "INITIAL PARTITIONING", &initial_partitioning.codegen_units);
+    debug_dump(tcx, "INITIAL PARTITIONING", &codegen_units);
 
     // Merge until we have at most `max_cgu_count` codegen units.
+    // `merge_codegen_units` is responsible for updating the CGU size
+    // estimates.
     {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus");
-        partitioner.merge_codegen_units(cx, &mut initial_partitioning);
-        debug_dump(tcx, "POST MERGING", &initial_partitioning.codegen_units);
+        partitioner.merge_codegen_units(cx, &mut codegen_units);
+        debug_dump(tcx, "POST MERGING", &codegen_units);
     }
 
     // In the next step, we use the inlining map to determine which additional
     // monomorphizations have to go into each codegen unit. These additional
     // monomorphizations can be drop-glue, functions from external crates, and
     // local functions the definition of which is marked with `#[inline]`.
-    let mut post_inlining = {
+    let mono_item_placements = {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_place_inline_items");
-        partitioner.place_inlined_mono_items(cx, initial_partitioning)
+        partitioner.place_inlined_mono_items(cx, &mut codegen_units, roots)
     };
 
-    for cgu in &mut post_inlining.codegen_units {
+    for cgu in &mut codegen_units {
         cgu.create_size_estimate(tcx);
     }
 
-    debug_dump(tcx, "POST INLINING", &post_inlining.codegen_units);
+    debug_dump(tcx, "POST INLINING", &codegen_units);
 
     // Next we try to make as many symbols "internal" as possible, so LLVM has
     // more freedom to optimize.
     if !tcx.sess.link_dead_code() {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_internalize_symbols");
-        partitioner.internalize_symbols(cx, &mut post_inlining);
+        partitioner.internalize_symbols(
+            cx,
+            &mut codegen_units,
+            mono_item_placements,
+            internalization_candidates,
+        );
     }
 
     let instrument_dead_code =
@@ -286,7 +299,7 @@ where
 
     if instrument_dead_code {
         assert!(
-            post_inlining.codegen_units.len() > 0,
+            codegen_units.len() > 0,
             "There must be at least one CGU that code coverage data can be generated in."
         );
 
@@ -297,7 +310,7 @@ where
         // the object file (CGU) containing the dead function stubs is included
         // in the final binary. This will probably require forcing these
         // function symbols to be included via `-u` or `/include` linker args.
-        let mut cgus: Vec<_> = post_inlining.codegen_units.iter_mut().collect();
+        let mut cgus: Vec<_> = codegen_units.iter_mut().collect();
         cgus.sort_by_key(|cgu| cgu.size_estimate());
 
         let dead_code_cgu =
@@ -308,29 +321,17 @@ where
             } else {
                 // If there are no CGUs that have externally linked items,
                 // then we just pick the first CGU as a fallback.
-                &mut post_inlining.codegen_units[0]
+                &mut codegen_units[0]
             };
         dead_code_cgu.make_code_coverage_dead_code_cgu();
     }
 
     // Finally, sort by codegen unit name, so that we get deterministic results.
-    let PostInliningPartitioning {
-        codegen_units: mut result,
-        mono_item_placements: _,
-        internalization_candidates: _,
-    } = post_inlining;
+    codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
 
-    result.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
+    debug_dump(tcx, "FINAL", &codegen_units);
 
-    debug_dump(tcx, "FINAL", &result);
-
-    result
-}
-
-pub struct PreInliningPartitioning<'tcx> {
-    codegen_units: Vec<CodegenUnit<'tcx>>,
-    roots: FxHashSet<MonoItem<'tcx>>,
-    internalization_candidates: FxHashSet<MonoItem<'tcx>>,
+    codegen_units
 }
 
 /// For symbol internalization, we need to know whether a symbol/mono-item is
@@ -342,12 +343,6 @@ enum MonoItemPlacement {
     MultipleCgus,
 }
 
-struct PostInliningPartitioning<'tcx> {
-    codegen_units: Vec<CodegenUnit<'tcx>>,
-    mono_item_placements: FxHashMap<MonoItem<'tcx>, MonoItemPlacement>,
-    internalization_candidates: FxHashSet<MonoItem<'tcx>>,
-}
-
 fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<'tcx>]) {
     let dump = move || {
         use std::fmt::Write;