From e26c0c92bda6a66a5ea2f308b28e53a0a9bfba7f Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 24 May 2023 10:05:15 +1000
Subject: [PATCH 1/8] Inline and remove `numbered_codegen_unit_name`.

It is small and has a single call site, and this change will facilitate
the next commit.
---
 .../rustc_monomorphize/src/partitioning/merging.rs    | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/compiler/rustc_monomorphize/src/partitioning/merging.rs b/compiler/rustc_monomorphize/src/partitioning/merging.rs
index 5c524a18454ec..9ab8da1858eff 100644
--- a/compiler/rustc_monomorphize/src/partitioning/merging.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/merging.rs
@@ -98,14 +98,9 @@ pub fn merge_codegen_units<'tcx>(
         // If we are compiling non-incrementally we just generate simple CGU
         // names containing an index.
         for (index, cgu) in codegen_units.iter_mut().enumerate() {
-            cgu.set_name(numbered_codegen_unit_name(cgu_name_builder, index));
+            let numbered_codegen_unit_name =
+                cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index));
+            cgu.set_name(numbered_codegen_unit_name);
         }
     }
 }
-
-fn numbered_codegen_unit_name(
-    name_builder: &mut CodegenUnitNameBuilder<'_>,
-    index: usize,
-) -> Symbol {
-    name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index))
-}

From b39b7098eaaf5eb99e2f210ad98307ab310c1eee Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 24 May 2023 10:08:40 +1000
Subject: [PATCH 2/8] Remove the `merging` module.

Three of the four methods in `DefaultPartitioning` are defined in
`default.rs`. But `merge_codegen_units` is defined in a separate module,
`merging`, even though it's less than 100 lines of code and roughly the
same size as the other three methods. (Also, the `merging` module
currently sits alongside `default`, when it should be a submodule of
`default`, adding to the confusion.)

In #74275 this explanation was given:

> I pulled this out into a separate module since it seemed like we might
> want a few different merge algorithms to choose from.

But in the three years since there have been no additional merging
algorithms, and there is no mechanism for choosing between different
merging algorithms. (There is a mechanism,
`-Zcgu-partitioning-strategy`, for choosing between different
partitioning strategies, but the merging algorithm is just one piece of
a partitioning strategy.)

This commit merges `merging` into `default`, making the code easier to
navigate and read.
---
 .../src/partitioning/default.rs               |  96 +++++++++++++++-
 .../src/partitioning/merging.rs               | 106 ------------------
 .../src/partitioning/mod.rs                   |   1 -
 3 files changed, 94 insertions(+), 109 deletions(-)
 delete mode 100644 compiler/rustc_monomorphize/src/partitioning/merging.rs

diff --git a/compiler/rustc_monomorphize/src/partitioning/default.rs b/compiler/rustc_monomorphize/src/partitioning/default.rs
index 37b7f6bf8a8fc..60f71f5dd81c4 100644
--- a/compiler/rustc_monomorphize/src/partitioning/default.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/default.rs
@@ -1,3 +1,4 @@
+use std::cmp;
 use std::collections::hash_map::Entry;
 
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
@@ -14,7 +15,6 @@ use rustc_span::symbol::Symbol;
 
 use super::PartitioningCx;
 use crate::collector::InliningMap;
-use crate::partitioning::merging;
 use crate::partitioning::{
     MonoItemPlacement, Partition, PostInliningPartitioning, PreInliningPartitioning,
 };
@@ -103,7 +103,99 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
         cx: &PartitioningCx<'_, 'tcx>,
         initial_partitioning: &mut PreInliningPartitioning<'tcx>,
     ) {
-        merging::merge_codegen_units(cx, initial_partitioning);
+        assert!(cx.target_cgu_count >= 1);
+        let codegen_units = &mut initial_partitioning.codegen_units;
+
+        // Note that at this point in time the `codegen_units` here may not be
+        // in a deterministic order (but we know they're deterministically the
+        // same set). We want this merging to produce a deterministic ordering
+        // of codegen units from the input.
+        //
+        // Due to basically how we've implemented the merging below (merge the
+        // two smallest into each other) we're sure to start off with a
+        // deterministic order (sorted by name). This'll mean that if two cgus
+        // have the same size the stable sort below will keep everything nice
+        // and deterministic.
+        codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
+
+        // This map keeps track of what got merged into what.
+        let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
+            codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
+
+        // Merge the two smallest codegen units until the target size is
+        // reached.
+        while codegen_units.len() > cx.target_cgu_count {
+            // Sort small cgus to the back
+            codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
+            let mut smallest = codegen_units.pop().unwrap();
+            let second_smallest = codegen_units.last_mut().unwrap();
+
+            // Move the mono-items from `smallest` to `second_smallest`
+            second_smallest.modify_size_estimate(smallest.size_estimate());
+            for (k, v) in smallest.items_mut().drain() {
+                second_smallest.items_mut().insert(k, v);
+            }
+
+            // Record that `second_smallest` now contains all the stuff that was
+            // in `smallest` before.
+            let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
+            cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);
+
+            debug!(
+                "CodegenUnit {} merged into CodegenUnit {}",
+                smallest.name(),
+                second_smallest.name()
+            );
+        }
+
+        let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
+
+        if cx.tcx.sess.opts.incremental.is_some() {
+            // If we are doing incremental compilation, we want CGU names to
+            // reflect the path of the source level module they correspond to.
+            // For CGUs that contain the code of multiple modules because of the
+            // merging done above, we use a concatenation of the names of all
+            // contained CGUs.
+            let new_cgu_names: FxHashMap<Symbol, String> = cgu_contents
+                .into_iter()
+                // This `filter` makes sure we only update the name of CGUs that
+                // were actually modified by merging.
+                .filter(|(_, cgu_contents)| cgu_contents.len() > 1)
+                .map(|(current_cgu_name, cgu_contents)| {
+                    let mut cgu_contents: Vec<&str> =
+                        cgu_contents.iter().map(|s| s.as_str()).collect();
+
+                    // Sort the names, so things are deterministic and easy to
+                    // predict. We are sorting primitive `&str`s here so we can
+                    // use unstable sort.
+                    cgu_contents.sort_unstable();
+
+                    (current_cgu_name, cgu_contents.join("--"))
+                })
+                .collect();
+
+            for cgu in codegen_units.iter_mut() {
+                if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) {
+                    if cx.tcx.sess.opts.unstable_opts.human_readable_cgu_names {
+                        cgu.set_name(Symbol::intern(&new_cgu_name));
+                    } else {
+                        // If we don't require CGU names to be human-readable,
+                        // we use a fixed length hash of the composite CGU name
+                        // instead.
+                        let new_cgu_name = CodegenUnit::mangle_name(&new_cgu_name);
+                        cgu.set_name(Symbol::intern(&new_cgu_name));
+                    }
+                }
+            }
+        } else {
+            // If we are compiling non-incrementally we just generate simple CGU
+            // names containing an index.
+            for (index, cgu) in codegen_units.iter_mut().enumerate() {
+                let numbered_codegen_unit_name =
+                    cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index));
+                cgu.set_name(numbered_codegen_unit_name);
+            }
+        }
     }
 
     fn place_inlined_mono_items(
diff --git a/compiler/rustc_monomorphize/src/partitioning/merging.rs b/compiler/rustc_monomorphize/src/partitioning/merging.rs
deleted file mode 100644
index 9ab8da1858eff..0000000000000
--- a/compiler/rustc_monomorphize/src/partitioning/merging.rs
+++ /dev/null
@@ -1,106 +0,0 @@
-use std::cmp;
-
-use rustc_data_structures::fx::FxHashMap;
-use rustc_hir::def_id::LOCAL_CRATE;
-use rustc_middle::mir::mono::{CodegenUnit, CodegenUnitNameBuilder};
-use rustc_span::symbol::Symbol;
-
-use super::PartitioningCx;
-use crate::partitioning::PreInliningPartitioning;
-
-pub fn merge_codegen_units<'tcx>(
-    cx: &PartitioningCx<'_, 'tcx>,
-    initial_partitioning: &mut PreInliningPartitioning<'tcx>,
-) {
-    assert!(cx.target_cgu_count >= 1);
-    let codegen_units = &mut initial_partitioning.codegen_units;
-
-    // Note that at this point in time the `codegen_units` here may not be in a
-    // deterministic order (but we know they're deterministically the same set).
-    // We want this merging to produce a deterministic ordering of codegen units
-    // from the input.
-    //
-    // Due to basically how we've implemented the merging below (merge the two
-    // smallest into each other) we're sure to start off with a deterministic
-    // order (sorted by name). This'll mean that if two cgus have the same size
-    // the stable sort below will keep everything nice and deterministic.
-    codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
-
-    // This map keeps track of what got merged into what.
-    let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
-        codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
-
-    // Merge the two smallest codegen units until the target size is reached.
-    while codegen_units.len() > cx.target_cgu_count {
-        // Sort small cgus to the back
-        codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
-        let mut smallest = codegen_units.pop().unwrap();
-        let second_smallest = codegen_units.last_mut().unwrap();
-
-        // Move the mono-items from `smallest` to `second_smallest`
-        second_smallest.modify_size_estimate(smallest.size_estimate());
-        for (k, v) in smallest.items_mut().drain() {
-            second_smallest.items_mut().insert(k, v);
-        }
-
-        // Record that `second_smallest` now contains all the stuff that was in
-        // `smallest` before.
-        let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
-        cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);
-
-        debug!(
-            "CodegenUnit {} merged into CodegenUnit {}",
-            smallest.name(),
-            second_smallest.name()
-        );
-    }
-
-    let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
-
-    if cx.tcx.sess.opts.incremental.is_some() {
-        // If we are doing incremental compilation, we want CGU names to
-        // reflect the path of the source level module they correspond to.
-        // For CGUs that contain the code of multiple modules because of the
-        // merging done above, we use a concatenation of the names of
-        // all contained CGUs.
-        let new_cgu_names: FxHashMap<Symbol, String> = cgu_contents
-            .into_iter()
-            // This `filter` makes sure we only update the name of CGUs that
-            // were actually modified by merging.
-            .filter(|(_, cgu_contents)| cgu_contents.len() > 1)
-            .map(|(current_cgu_name, cgu_contents)| {
-                let mut cgu_contents: Vec<&str> = cgu_contents.iter().map(|s| s.as_str()).collect();
-
-                // Sort the names, so things are deterministic and easy to
-                // predict.
-
-                // We are sorting primitive &strs here so we can use unstable sort
-                cgu_contents.sort_unstable();
-
-                (current_cgu_name, cgu_contents.join("--"))
-            })
-            .collect();
-
-        for cgu in codegen_units.iter_mut() {
-            if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) {
-                if cx.tcx.sess.opts.unstable_opts.human_readable_cgu_names {
-                    cgu.set_name(Symbol::intern(&new_cgu_name));
-                } else {
-                    // If we don't require CGU names to be human-readable, we
-                    // use a fixed length hash of the composite CGU name
-                    // instead.
-                    let new_cgu_name = CodegenUnit::mangle_name(&new_cgu_name);
-                    cgu.set_name(Symbol::intern(&new_cgu_name));
-                }
-            }
-        }
-    } else {
-        // If we are compiling non-incrementally we just generate simple CGU
-        // names containing an index.
-        for (index, cgu) in codegen_units.iter_mut().enumerate() {
-            let numbered_codegen_unit_name =
-                cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index));
-            cgu.set_name(numbered_codegen_unit_name);
-        }
-    }
-}
diff --git a/compiler/rustc_monomorphize/src/partitioning/mod.rs b/compiler/rustc_monomorphize/src/partitioning/mod.rs
index eafe57a0c0207..8ea82b39534d7 100644
--- a/compiler/rustc_monomorphize/src/partitioning/mod.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/mod.rs
@@ -93,7 +93,6 @@
 //! inlining, even when they are not marked `#[inline]`.
 
 mod default;
-mod merging;
 
 use std::cmp;
 use std::fs::{self, File};

From 20de2ba7596322026501e1b5134c8724224b844e Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 24 May 2023 10:49:48 +1000
Subject: [PATCH 3/8] Remove `{Pre,Post}InliningPartitioning`.

I find that these structs obfuscate the code. Removing them and just
passing the individual fields around makes the `Partition` method
signatures a little longer, but makes the data flow much clearer. E.g.

- `codegen_units` is mutable all the way through.
- `codegen_units`'s length is changed by `merge_codegen_units`, but only
  the individual elements are changed by `place_inlined_mono_items` and
  `internalize_symbols`.
- `roots`, `internalization_candidates`, and `mono_item_placements` are
  all immutable after creation, and all used by just one of the four
  methods.
---
 .../src/partitioning/default.rs               | 55 ++++-------
 .../src/partitioning/mod.rs                   | 96 +++++++++----------
 2 files changed, 64 insertions(+), 87 deletions(-)

diff --git a/compiler/rustc_monomorphize/src/partitioning/default.rs b/compiler/rustc_monomorphize/src/partitioning/default.rs
index 60f71f5dd81c4..362e21eaecd0a 100644
--- a/compiler/rustc_monomorphize/src/partitioning/default.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/default.rs
@@ -15,9 +15,7 @@ use rustc_span::symbol::Symbol;
 
 use super::PartitioningCx;
 use crate::collector::InliningMap;
-use crate::partitioning::{
-    MonoItemPlacement, Partition, PostInliningPartitioning, PreInliningPartitioning,
-};
+use crate::partitioning::{MonoItemPlacement, Partition};
 
 pub struct DefaultPartitioning;
 
@@ -26,7 +24,7 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
         mono_items: &mut I,
-    ) -> PreInliningPartitioning<'tcx>
+    ) -> (Vec<CodegenUnit<'tcx>>, FxHashSet<MonoItem<'tcx>>, FxHashSet<MonoItem<'tcx>>)
     where
         I: Iterator<Item = MonoItem<'tcx>>,
     {
@@ -91,20 +89,15 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
             codegen_units.insert(codegen_unit_name, CodegenUnit::new(codegen_unit_name));
         }
 
-        PreInliningPartitioning {
-            codegen_units: codegen_units.into_values().collect(),
-            roots,
-            internalization_candidates,
-        }
+        (codegen_units.into_values().collect(), roots, internalization_candidates)
     }
 
     fn merge_codegen_units(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: &mut PreInliningPartitioning<'tcx>,
+        codegen_units: &mut Vec<CodegenUnit<'tcx>>,
     ) {
         assert!(cx.target_cgu_count >= 1);
-        let codegen_units = &mut initial_partitioning.codegen_units;
 
         // Note that at this point in time the `codegen_units` here may not be
         // in a deterministic order (but we know they're deterministically the
@@ -201,20 +194,14 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
     fn place_inlined_mono_items(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: PreInliningPartitioning<'tcx>,
-    ) -> PostInliningPartitioning<'tcx> {
-        let mut new_partitioning = Vec::new();
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        roots: FxHashSet<MonoItem<'tcx>>,
+    ) -> FxHashMap<MonoItem<'tcx>, MonoItemPlacement> {
         let mut mono_item_placements = FxHashMap::default();
 
-        let PreInliningPartitioning {
-            codegen_units: initial_cgus,
-            roots,
-            internalization_candidates,
-        } = initial_partitioning;
-
-        let single_codegen_unit = initial_cgus.len() == 1;
+        let single_codegen_unit = codegen_units.len() == 1;
 
-        for old_codegen_unit in initial_cgus {
+        for old_codegen_unit in codegen_units.iter_mut() {
             // Collect all items that need to be available in this codegen unit.
             let mut reachable = FxHashSet::default();
             for root in old_codegen_unit.items().keys() {
@@ -266,14 +253,10 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
                 }
             }
 
-            new_partitioning.push(new_codegen_unit);
+            *old_codegen_unit = new_codegen_unit;
         }
 
-        return PostInliningPartitioning {
-            codegen_units: new_partitioning,
-            mono_item_placements,
-            internalization_candidates,
-        };
+        return mono_item_placements;
 
         fn follow_inlining<'tcx>(
             mono_item: MonoItem<'tcx>,
@@ -293,14 +276,16 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
     fn internalize_symbols(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        partitioning: &mut PostInliningPartitioning<'tcx>,
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        mono_item_placements: FxHashMap<MonoItem<'tcx>, MonoItemPlacement>,
+        internalization_candidates: FxHashSet<MonoItem<'tcx>>,
     ) {
-        if partitioning.codegen_units.len() == 1 {
+        if codegen_units.len() == 1 {
             // Fast path for when there is only one codegen unit. In this case we
             // can internalize all candidates, since there is nowhere else they
             // could be accessed from.
-            for cgu in &mut partitioning.codegen_units {
-                for candidate in &partitioning.internalization_candidates {
+            for cgu in codegen_units {
+                for candidate in &internalization_candidates {
                     cgu.items_mut().insert(*candidate, (Linkage::Internal, Visibility::Default));
                 }
             }
@@ -317,15 +302,13 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
             }
         });
 
-        let mono_item_placements = &partitioning.mono_item_placements;
-
         // For each internalization candidates in each codegen unit, check if it is
         // accessed from outside its defining codegen unit.
-        for cgu in &mut partitioning.codegen_units {
+        for cgu in codegen_units {
             let home_cgu = MonoItemPlacement::SingleCgu { cgu_name: cgu.name() };
 
             for (accessee, linkage_and_visibility) in cgu.items_mut() {
-                if !partitioning.internalization_candidates.contains(accessee) {
+                if !internalization_candidates.contains(accessee) {
                     // This item is no candidate for internalizing, so skip it.
                     continue;
                 }
diff --git a/compiler/rustc_monomorphize/src/partitioning/mod.rs b/compiler/rustc_monomorphize/src/partitioning/mod.rs
index 8ea82b39534d7..dd0a35c4402d3 100644
--- a/compiler/rustc_monomorphize/src/partitioning/mod.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/mod.rs
@@ -128,7 +128,7 @@ impl<'tcx> Partition<'tcx> for Partitioner {
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
         mono_items: &mut I,
-    ) -> PreInliningPartitioning<'tcx>
+    ) -> (Vec<CodegenUnit<'tcx>>, FxHashSet<MonoItem<'tcx>>, FxHashSet<MonoItem<'tcx>>)
     where
         I: Iterator<Item = MonoItem<'tcx>>,
     {
@@ -141,12 +141,10 @@ impl<'tcx> Partition<'tcx> for Partitioner {
     fn merge_codegen_units(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: &mut PreInliningPartitioning<'tcx>,
+        codegen_units: &mut Vec<CodegenUnit<'tcx>>,
     ) {
         match self {
-            Partitioner::Default(partitioner) => {
-                partitioner.merge_codegen_units(cx, initial_partitioning)
-            }
+            Partitioner::Default(partitioner) => partitioner.merge_codegen_units(cx, codegen_units),
             Partitioner::Unknown => cx.tcx.sess.emit_fatal(UnknownPartitionStrategy),
         }
     }
@@ -154,11 +152,12 @@ impl<'tcx> Partition<'tcx> for Partitioner {
     fn place_inlined_mono_items(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: PreInliningPartitioning<'tcx>,
-    ) -> PostInliningPartitioning<'tcx> {
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        roots: FxHashSet<MonoItem<'tcx>>,
+    ) -> FxHashMap<MonoItem<'tcx>, MonoItemPlacement> {
         match self {
             Partitioner::Default(partitioner) => {
-                partitioner.place_inlined_mono_items(cx, initial_partitioning)
+                partitioner.place_inlined_mono_items(cx, codegen_units, roots)
             }
             Partitioner::Unknown => cx.tcx.sess.emit_fatal(UnknownPartitionStrategy),
         }
@@ -167,12 +166,17 @@ impl<'tcx> Partition<'tcx> for Partitioner {
     fn internalize_symbols(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        post_inlining_partitioning: &mut PostInliningPartitioning<'tcx>,
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        mono_item_placements: FxHashMap<MonoItem<'tcx>, MonoItemPlacement>,
+        internalization_candidates: FxHashSet<MonoItem<'tcx>>,
     ) {
         match self {
-            Partitioner::Default(partitioner) => {
-                partitioner.internalize_symbols(cx, post_inlining_partitioning)
-            }
+            Partitioner::Default(partitioner) => partitioner.internalize_symbols(
+                cx,
+                codegen_units,
+                mono_item_placements,
+                internalization_candidates,
+            ),
             Partitioner::Unknown => cx.tcx.sess.emit_fatal(UnknownPartitionStrategy),
         }
     }
@@ -189,26 +193,29 @@ trait Partition<'tcx> {
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
         mono_items: &mut I,
-    ) -> PreInliningPartitioning<'tcx>
+    ) -> (Vec<CodegenUnit<'tcx>>, FxHashSet<MonoItem<'tcx>>, FxHashSet<MonoItem<'tcx>>)
     where
         I: Iterator<Item = MonoItem<'tcx>>;
 
     fn merge_codegen_units(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: &mut PreInliningPartitioning<'tcx>,
+        codegen_units: &mut Vec<CodegenUnit<'tcx>>,
     );
 
     fn place_inlined_mono_items(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        initial_partitioning: PreInliningPartitioning<'tcx>,
-    ) -> PostInliningPartitioning<'tcx>;
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        roots: FxHashSet<MonoItem<'tcx>>,
+    ) -> FxHashMap<MonoItem<'tcx>, MonoItemPlacement>;
 
     fn internalize_symbols(
         &mut self,
         cx: &PartitioningCx<'_, 'tcx>,
-        partitioning: &mut PostInliningPartitioning<'tcx>,
+        codegen_units: &mut [CodegenUnit<'tcx>],
+        mono_item_placements: FxHashMap<MonoItem<'tcx>, MonoItemPlacement>,
+        internalization_candidates: FxHashSet<MonoItem<'tcx>>,
     );
 }
 
@@ -240,44 +247,49 @@ where
     // In the first step, we place all regular monomorphizations into their
     // respective 'home' codegen unit. Regular monomorphizations are all
     // functions and statics defined in the local crate.
-    let mut initial_partitioning = {
+    let (mut codegen_units, roots, internalization_candidates) = {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_place_roots");
         partitioner.place_root_mono_items(cx, mono_items)
     };
 
-    for cgu in &mut initial_partitioning.codegen_units {
+    for cgu in &mut codegen_units {
         cgu.create_size_estimate(tcx);
     }
 
-    debug_dump(tcx, "INITIAL PARTITIONING", &initial_partitioning.codegen_units);
+    debug_dump(tcx, "INITIAL PARTITIONING", &codegen_units);
 
     // Merge until we have at most `max_cgu_count` codegen units.
     {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus");
-        partitioner.merge_codegen_units(cx, &mut initial_partitioning);
-        debug_dump(tcx, "POST MERGING", &initial_partitioning.codegen_units);
+        partitioner.merge_codegen_units(cx, &mut codegen_units);
+        debug_dump(tcx, "POST MERGING", &codegen_units);
     }
 
     // In the next step, we use the inlining map to determine which additional
     // monomorphizations have to go into each codegen unit. These additional
     // monomorphizations can be drop-glue, functions from external crates, and
     // local functions the definition of which is marked with `#[inline]`.
-    let mut post_inlining = {
+    let mono_item_placements = {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_place_inline_items");
-        partitioner.place_inlined_mono_items(cx, initial_partitioning)
+        partitioner.place_inlined_mono_items(cx, &mut codegen_units, roots)
     };
 
-    for cgu in &mut post_inlining.codegen_units {
+    for cgu in &mut codegen_units {
         cgu.create_size_estimate(tcx);
     }
 
-    debug_dump(tcx, "POST INLINING", &post_inlining.codegen_units);
+    debug_dump(tcx, "POST INLINING", &codegen_units);
 
     // Next we try to make as many symbols "internal" as possible, so LLVM has
     // more freedom to optimize.
     if !tcx.sess.link_dead_code() {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_internalize_symbols");
-        partitioner.internalize_symbols(cx, &mut post_inlining);
+        partitioner.internalize_symbols(
+            cx,
+            &mut codegen_units,
+            mono_item_placements,
+            internalization_candidates,
+        );
     }
 
     let instrument_dead_code =
@@ -285,7 +297,7 @@ where
 
     if instrument_dead_code {
         assert!(
-            post_inlining.codegen_units.len() > 0,
+            codegen_units.len() > 0,
             "There must be at least one CGU that code coverage data can be generated in."
         );
 
@@ -296,7 +308,7 @@ where
         // the object file (CGU) containing the dead function stubs is included
         // in the final binary. This will probably require forcing these
         // function symbols to be included via `-u` or `/include` linker args.
-        let mut cgus: Vec<_> = post_inlining.codegen_units.iter_mut().collect();
+        let mut cgus: Vec<_> = codegen_units.iter_mut().collect();
         cgus.sort_by_key(|cgu| cgu.size_estimate());
 
         let dead_code_cgu =
@@ -307,29 +319,17 @@ where
             } else {
                 // If there are no CGUs that have externally linked items,
                 // then we just pick the first CGU as a fallback.
-                &mut post_inlining.codegen_units[0]
+                &mut codegen_units[0]
             };
         dead_code_cgu.make_code_coverage_dead_code_cgu();
     }
 
     // Finally, sort by codegen unit name, so that we get deterministic results.
-    let PostInliningPartitioning {
-        codegen_units: mut result,
-        mono_item_placements: _,
-        internalization_candidates: _,
-    } = post_inlining;
+    codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
 
-    result.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
+    debug_dump(tcx, "FINAL", &codegen_units);
 
-    debug_dump(tcx, "FINAL", &result);
-
-    result
-}
-
-pub struct PreInliningPartitioning<'tcx> {
-    codegen_units: Vec<CodegenUnit<'tcx>>,
-    roots: FxHashSet<MonoItem<'tcx>>,
-    internalization_candidates: FxHashSet<MonoItem<'tcx>>,
+    codegen_units
 }
 
 /// For symbol internalization, we need to know whether a symbol/mono-item is
@@ -341,12 +341,6 @@ enum MonoItemPlacement {
     MultipleCgus,
 }
 
-struct PostInliningPartitioning<'tcx> {
-    codegen_units: Vec<CodegenUnit<'tcx>>,
-    mono_item_placements: FxHashMap<MonoItem<'tcx>, MonoItemPlacement>,
-    internalization_candidates: FxHashSet<MonoItem<'tcx>>,
-}
-
 fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<'tcx>]) {
     let dump = move || {
         use std::fmt::Write;

From 59c5259bc92542f76db04cfc96bdb92c4b15401a Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 24 May 2023 11:50:32 +1000
Subject: [PATCH 4/8] Add a clarifying comment.

This is something that took me some time to figure out.
---
 compiler/rustc_monomorphize/src/partitioning/mod.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler/rustc_monomorphize/src/partitioning/mod.rs b/compiler/rustc_monomorphize/src/partitioning/mod.rs
index dd0a35c4402d3..25c10df18a18c 100644
--- a/compiler/rustc_monomorphize/src/partitioning/mod.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/mod.rs
@@ -259,6 +259,8 @@ where
     debug_dump(tcx, "INITIAL PARTITIONING", &codegen_units);
 
     // Merge until we have at most `max_cgu_count` codegen units.
+    // `merge_codegen_units` is responsible for updating the CGU size
+    // estimates.
     {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus");
         partitioner.merge_codegen_units(cx, &mut codegen_units);

From dc8ab7ede9c185758dabc7662de3b3d226ebfc47 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 24 May 2023 15:47:39 +1000
Subject: [PATCH 5/8] XXX: split big CGUs

---
 compiler/rustc_middle/src/mir/mono.rs         | 11 +++-
 compiler/rustc_monomorphize/src/lib.rs        |  1 +
 .../src/partitioning/default.rs               | 63 ++++++++++++++++++-
 .../src/partitioning/mod.rs                   |  2 +-
 4 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs
index ff54ec56a29ac..653e8c3c58bec 100644
--- a/compiler/rustc_middle/src/mir/mono.rs
+++ b/compiler/rustc_middle/src/mir/mono.rs
@@ -333,13 +333,22 @@ impl<'tcx> CodegenUnit<'tcx> {
             .expect("create_size_estimate must be called before getting a size_estimate")
     }
 
-    pub fn modify_size_estimate(&mut self, delta: usize) {
+    pub fn increase_size_estimate(&mut self, delta: usize) {
+        // njn: make this nicer, with as_mut().expect()
         assert!(self.size_estimate.is_some());
         if let Some(size_estimate) = self.size_estimate {
             self.size_estimate = Some(size_estimate + delta);
         }
     }
 
+    pub fn decrease_size_estimate(&mut self, delta: usize) {
+        // njn: make this nicer, with as_mut().expect()
+        assert!(self.size_estimate.is_some());
+        if let Some(size_estimate) = self.size_estimate {
+            self.size_estimate = Some(size_estimate - delta);
+        }
+    }
+
     pub fn contains_item(&self, item: &MonoItem<'tcx>) -> bool {
         self.items().contains_key(item)
     }
diff --git a/compiler/rustc_monomorphize/src/lib.rs b/compiler/rustc_monomorphize/src/lib.rs
index ecc50c3f664fd..8221a8a970f28 100644
--- a/compiler/rustc_monomorphize/src/lib.rs
+++ b/compiler/rustc_monomorphize/src/lib.rs
@@ -1,4 +1,5 @@
 #![feature(array_windows)]
+#![feature(hash_drain_filter)]
 #![recursion_limit = "256"]
 #![allow(rustc::potential_query_instability)]
 #![deny(rustc::untranslatable_diagnostic)]
diff --git a/compiler/rustc_monomorphize/src/partitioning/default.rs b/compiler/rustc_monomorphize/src/partitioning/default.rs
index 362e21eaecd0a..541707e58008a 100644
--- a/compiler/rustc_monomorphize/src/partitioning/default.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/default.rs
@@ -111,6 +111,67 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
         // and deterministic.
         codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
 
+        //---------------------------------------------------------------------------
+        // njn: split big CGUs if necessary
+        if false && codegen_units.len() > cx.target_cgu_count {
+            // njn: type ann?
+            let total_size: usize = codegen_units.iter().map(|cgu| cgu.size_estimate()).sum();
+            let target_size = total_size / cx.target_cgu_count;
+            eprintln!("----");
+            eprintln!("SPLIT0: total:{} target:{}", total_size, target_size);
+            // njn: need a while loop because we're modifying codegen_units as we go
+            // njn: make it a for loop?
+            // njn: explain all this
+            let mut i = 0;
+            let n = codegen_units.len();
+            while i < n {
+                let old_cgu = &mut codegen_units[i]; 
+                if old_cgu.size_estimate() > target_size && old_cgu.items().len() > 1 {
+                    eprintln!("SPLIT1: old:{} old:{}", old_cgu.size_estimate(), old_cgu.name());
+
+                    // njn: too big; split
+                    // njn: explain how a very big CGU will be split multiple
+                    // times
+                    
+                    let mut new_name = old_cgu.name().to_string();
+                    new_name += "-split";
+                    let mut new_cgu = CodegenUnit::new(Symbol::intern(&new_name));
+                    new_cgu.create_size_estimate(cx.tcx); // initially zero
+
+                    // njn: size stuff is a bit clumsy
+                    let mut moved_size = 0;
+
+                    // njn: what if this empties old_cgu?
+
+                    // njn: nicer way to do this?
+                    // njn: don't move if it's the last item
+                    old_cgu.items_mut().drain_filter(|item, rest| {
+                        // njn: true->remove
+                        if moved_size < target_size {
+                            let item_size = item.size_estimate(cx.tcx);
+                            eprintln!("MOVE: {}", item_size);
+                            moved_size += item_size;
+                            new_cgu.items_mut().insert(*item, *rest);
+                            true
+                        } else {
+                            false
+                        }
+                    });
+                    new_cgu.increase_size_estimate(moved_size);
+                    old_cgu.decrease_size_estimate(moved_size);
+
+                    eprintln!("SPLIT2: old:{} -> new:{} new:{}", old_cgu.size_estimate(), new_cgu.size_estimate(), new_cgu.name());
+
+                    codegen_units.push(new_cgu);
+                    // njn: explain lack of `i += 1`;
+                } else {
+                    // njn: explain this
+                    i += 1;
+                }
+            }
+        }
+        //---------------------------------------------------------------------------
+
         // This map keeps track of what got merged into what.
         let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
             codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
@@ -124,7 +185,7 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
             let second_smallest = codegen_units.last_mut().unwrap();
 
             // Move the mono-items from `smallest` to `second_smallest`
-            second_smallest.modify_size_estimate(smallest.size_estimate());
+            second_smallest.increase_size_estimate(smallest.size_estimate());
             for (k, v) in smallest.items_mut().drain() {
                 second_smallest.items_mut().insert(k, v);
             }
diff --git a/compiler/rustc_monomorphize/src/partitioning/mod.rs b/compiler/rustc_monomorphize/src/partitioning/mod.rs
index 25c10df18a18c..8c2a5113b1699 100644
--- a/compiler/rustc_monomorphize/src/partitioning/mod.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/mod.rs
@@ -382,7 +382,7 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
         std::mem::take(s)
     };
 
-    debug!("{}", dump());
+    eprintln!("{}", dump());
 }
 
 #[inline(never)] // give this a place in the profiler

From efbe2889598099aac424d4d8797bbae41a8f3396 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 24 May 2023 16:35:56 +1000
Subject: [PATCH 6/8] XXX: better merging, plus splitting fixes

---
 .../src/partitioning/default.rs               | 49 +++++++++----------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/compiler/rustc_monomorphize/src/partitioning/default.rs b/compiler/rustc_monomorphize/src/partitioning/default.rs
index 541707e58008a..4884df76ca088 100644
--- a/compiler/rustc_monomorphize/src/partitioning/default.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/default.rs
@@ -104,16 +104,16 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
         // same set). We want this merging to produce a deterministic ordering
         // of codegen units from the input.
         //
-        // Due to basically how we've implemented the merging below (merge the
-        // two smallest into each other) we're sure to start off with a
-        // deterministic order (sorted by name). This'll mean that if two cgus
-        // have the same size the stable sort below will keep everything nice
-        // and deterministic.
+        // Due to basically how we've implemented the merging below (repeatedly
+        // merging adjacent pairs of CGUs) we're sure to start off with a
+        // deterministic deterministic order (sorted by name). This'll mean that
+        // if two cgus have the same size the stable sort below will keep
+        // everything nice and deterministic.
         codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
 
         //---------------------------------------------------------------------------
         // njn: split big CGUs if necessary
-        if false && codegen_units.len() > cx.target_cgu_count {
+        if codegen_units.len() > cx.target_cgu_count {
             // njn: type ann?
             let total_size: usize = codegen_units.iter().map(|cgu| cgu.size_estimate()).sum();
             let target_size = total_size / cx.target_cgu_count;
@@ -123,6 +123,7 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
             // njn: make it a for loop?
             // njn: explain all this
             let mut i = 0;
+            let mut j = 0; // njn: explain
             let n = codegen_units.len();
             while i < n {
                 let old_cgu = &mut codegen_units[i]; 
@@ -134,7 +135,7 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
                     // times
                     
                     let mut new_name = old_cgu.name().to_string();
-                    new_name += "-split";
+                    new_name += &format!("-split{}", j);
                     let mut new_cgu = CodegenUnit::new(Symbol::intern(&new_name));
                     new_cgu.create_size_estimate(cx.tcx); // initially zero
 
@@ -164,9 +165,11 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
 
                     codegen_units.push(new_cgu);
                     // njn: explain lack of `i += 1`;
+                    j += 1;
                 } else {
                     // njn: explain this
                     i += 1;
+                    j = 0;
                 }
             }
         }
@@ -176,30 +179,26 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
         let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
             codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
 
-        // Merge the two smallest codegen units until the target size is
-        // reached.
+        // Repeatedly merge cgu[n] into cgu[n-1].
         while codegen_units.len() > cx.target_cgu_count {
-            // Sort small cgus to the back
+            // njn: more comments about this.
+            // Sort small cgus to the back. At this point... njn: more
             codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
-            let mut smallest = codegen_units.pop().unwrap();
-            let second_smallest = codegen_units.last_mut().unwrap();
+            let mut cgu_n = codegen_units.swap_remove(cx.target_cgu_count);
+            let cgu_n_minus_1 = &mut codegen_units[cx.target_cgu_count - 1];
 
-            // Move the mono-items from `smallest` to `second_smallest`
-            second_smallest.increase_size_estimate(smallest.size_estimate());
-            for (k, v) in smallest.items_mut().drain() {
-                second_smallest.items_mut().insert(k, v);
+            // Move the mono-items from `cgu_n` to `cgu_n_minus_1`
+            cgu_n_minus_1.increase_size_estimate(cgu_n.size_estimate());
+            for (k, v) in cgu_n.items_mut().drain() {
+                cgu_n_minus_1.items_mut().insert(k, v);
             }
 
-            // Record that `second_smallest` now contains all the stuff that was
-            // in `smallest` before.
-            let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
-            cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);
+            // Record that `cgu_n_minus_1` now contains all the stuff that was in
+            // `cgu_n` before.
+            let mut consumed_cgu_names = cgu_contents.remove(&cgu_n.name()).unwrap();
+            cgu_contents.get_mut(&cgu_n_minus_1.name()).unwrap().append(&mut consumed_cgu_names);
 
-            debug!(
-                "CodegenUnit {} merged into CodegenUnit {}",
-                smallest.name(),
-                second_smallest.name()
-            );
+            debug!("CodegenUnit {} merged into CodegenUnit {}", cgu_n.name(), cgu_n_minus_1.name());
         }
 
         let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);

From 1076b01b856268b8392d0d16a1c38872ead48874 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 24 May 2023 16:36:30 +1000
Subject: [PATCH 7/8] XXX: disable debug printing

---
 .../src/partitioning/default.rs                | 18 +++++++++++-------
 .../rustc_monomorphize/src/partitioning/mod.rs |  2 +-
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/compiler/rustc_monomorphize/src/partitioning/default.rs b/compiler/rustc_monomorphize/src/partitioning/default.rs
index 4884df76ca088..c6df0956a6036 100644
--- a/compiler/rustc_monomorphize/src/partitioning/default.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/default.rs
@@ -117,8 +117,8 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
             // njn: type ann?
             let total_size: usize = codegen_units.iter().map(|cgu| cgu.size_estimate()).sum();
             let target_size = total_size / cx.target_cgu_count;
-            eprintln!("----");
-            eprintln!("SPLIT0: total:{} target:{}", total_size, target_size);
+            //eprintln!("----");
+            //eprintln!("SPLIT0: total:{} target:{}", total_size, target_size);
             // njn: need a while loop because we're modifying codegen_units as we go
             // njn: make it a for loop?
             // njn: explain all this
@@ -126,14 +126,14 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
             let mut j = 0; // njn: explain
             let n = codegen_units.len();
             while i < n {
-                let old_cgu = &mut codegen_units[i]; 
+                let old_cgu = &mut codegen_units[i];
                 if old_cgu.size_estimate() > target_size && old_cgu.items().len() > 1 {
-                    eprintln!("SPLIT1: old:{} old:{}", old_cgu.size_estimate(), old_cgu.name());
+                    //eprintln!("SPLIT1: old:{} old:{}", old_cgu.size_estimate(), old_cgu.name());
 
                     // njn: too big; split
                     // njn: explain how a very big CGU will be split multiple
                     // times
-                    
+
                     let mut new_name = old_cgu.name().to_string();
                     new_name += &format!("-split{}", j);
                     let mut new_cgu = CodegenUnit::new(Symbol::intern(&new_name));
@@ -144,13 +144,17 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
 
                     // njn: what if this empties old_cgu?
 
+                    // njn: non-deterministic iteration results in
+                    // non-deterministic splitting, which messes up incremental
+                    // compilation
+
                     // njn: nicer way to do this?
                     // njn: don't move if it's the last item
                     old_cgu.items_mut().drain_filter(|item, rest| {
                         // njn: true->remove
                         if moved_size < target_size {
                             let item_size = item.size_estimate(cx.tcx);
-                            eprintln!("MOVE: {}", item_size);
+                            //eprintln!("MOVE: {}", item_size);
                             moved_size += item_size;
                             new_cgu.items_mut().insert(*item, *rest);
                             true
@@ -161,7 +165,7 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
                     new_cgu.increase_size_estimate(moved_size);
                     old_cgu.decrease_size_estimate(moved_size);
 
-                    eprintln!("SPLIT2: old:{} -> new:{} new:{}", old_cgu.size_estimate(), new_cgu.size_estimate(), new_cgu.name());
+                    //eprintln!("SPLIT2: old:{} -> new:{} new:{}", old_cgu.size_estimate(), new_cgu.size_estimate(), new_cgu.name());
 
                     codegen_units.push(new_cgu);
                     // njn: explain lack of `i += 1`;
diff --git a/compiler/rustc_monomorphize/src/partitioning/mod.rs b/compiler/rustc_monomorphize/src/partitioning/mod.rs
index 8c2a5113b1699..25c10df18a18c 100644
--- a/compiler/rustc_monomorphize/src/partitioning/mod.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/mod.rs
@@ -382,7 +382,7 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
         std::mem::take(s)
     };
 
-    eprintln!("{}", dump());
+    debug!("{}", dump());
 }
 
 #[inline(never)] // give this a place in the profiler

From 5005ebb4b42086102ebf19bbda6f8e96fa5ae9b4 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 25 May 2023 13:18:13 +1000
Subject: [PATCH 8/8] XXX: fix non-determinism

---
 compiler/rustc_middle/src/mir/mono.rs         | 49 +++++++++++++++++--
 .../src/partitioning/default.rs               | 33 ++++++++-----
 2 files changed, 65 insertions(+), 17 deletions(-)

diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs
index 653e8c3c58bec..e7aeb877873db 100644
--- a/compiler/rustc_middle/src/mir/mono.rs
+++ b/compiler/rustc_middle/src/mir/mono.rs
@@ -3,7 +3,7 @@ use crate::ty::{subst::InternalSubsts, Instance, InstanceDef, SymbolName, TyCtxt
 use rustc_attr::InlineAttr;
 use rustc_data_structures::base_n;
 use rustc_data_structures::fingerprint::Fingerprint;
-use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
 use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher};
 use rustc_hir::def_id::{CrateNum, DefId, LOCAL_CRATE};
 use rustc_hir::ItemId;
@@ -230,7 +230,7 @@ pub struct CodegenUnit<'tcx> {
     /// contain something unique to this crate (e.g., a module path)
     /// as well as the crate name and disambiguator.
     name: Symbol,
-    items: FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)>,
+    items: FxIndexMap<MonoItem<'tcx>, (Linkage, Visibility)>,
     size_estimate: Option<usize>,
     primary: bool,
     /// True if this is CGU is used to hold code coverage information for dead code,
@@ -291,11 +291,11 @@ impl<'tcx> CodegenUnit<'tcx> {
         self.primary = true;
     }
 
-    pub fn items(&self) -> &FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)> {
+    pub fn items(&self) -> &FxIndexMap<MonoItem<'tcx>, (Linkage, Visibility)> {
         &self.items
     }
 
-    pub fn items_mut(&mut self) -> &mut FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)> {
+    pub fn items_mut(&mut self) -> &mut FxIndexMap<MonoItem<'tcx>, (Linkage, Visibility)> {
         &mut self.items
     }
 
@@ -364,6 +364,47 @@ impl<'tcx> CodegenUnit<'tcx> {
             .unwrap_or_else(|| panic!("Could not find work-product for CGU `{}`", self.name()))
     }
 
+    // njn: dups code in items_in_deterministic_order
+    pub fn sort_items(&mut self, tcx: TyCtxt<'tcx>) {
+        // The codegen tests rely on items being process in the same order as
+        // they appear in the file, so for local items, we sort by node_id first
+        #[derive(PartialEq, Eq, PartialOrd, Ord)]
+        pub struct ItemSortKey<'tcx>(Option<usize>, SymbolName<'tcx>);
+
+        fn item_sort_key<'tcx>(tcx: TyCtxt<'tcx>, item: MonoItem<'tcx>) -> ItemSortKey<'tcx> {
+            ItemSortKey(
+                match item {
+                    MonoItem::Fn(ref instance) => {
+                        match instance.def {
+                            // We only want to take HirIds of user-defined
+                            // instances into account. The others don't matter for
+                            // the codegen tests and can even make item order
+                            // unstable.
+                            InstanceDef::Item(def) => def.as_local().map(Idx::index),
+                            InstanceDef::VTableShim(..)
+                            | InstanceDef::ReifyShim(..)
+                            | InstanceDef::Intrinsic(..)
+                            | InstanceDef::FnPtrShim(..)
+                            | InstanceDef::Virtual(..)
+                            | InstanceDef::ClosureOnceShim { .. }
+                            | InstanceDef::DropGlue(..)
+                            | InstanceDef::CloneShim(..)
+                            | InstanceDef::ThreadLocalShim(..)
+                            | InstanceDef::FnPtrAddrShim(..) => None,
+                        }
+                    }
+                    MonoItem::Static(def_id) => def_id.as_local().map(Idx::index),
+                    MonoItem::GlobalAsm(item_id) => Some(item_id.owner_id.def_id.index()),
+                },
+                item.symbol_name(tcx),
+            )
+        }
+
+        self.items_mut().sort_by(|&i1, _, &i2, _| {
+            std::cmp::Ord::cmp(&item_sort_key(tcx, i1), &item_sort_key(tcx, i2))
+        });
+    }
+
     pub fn items_in_deterministic_order(
         &self,
         tcx: TyCtxt<'tcx>,
diff --git a/compiler/rustc_monomorphize/src/partitioning/default.rs b/compiler/rustc_monomorphize/src/partitioning/default.rs
index c6df0956a6036..23a68614d46d1 100644
--- a/compiler/rustc_monomorphize/src/partitioning/default.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/default.rs
@@ -148,20 +148,27 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
                     // non-deterministic splitting, which messes up incremental
                     // compilation
 
+                    old_cgu.sort_items(cx.tcx);
+                    while moved_size < target_size {
+                        let (item, rest) = old_cgu.items_mut().pop().unwrap();
+                        moved_size += item.size_estimate(cx.tcx);
+                        new_cgu.items_mut().insert(item, rest);
+                    }
+
                     // njn: nicer way to do this?
                     // njn: don't move if it's the last item
-                    old_cgu.items_mut().drain_filter(|item, rest| {
-                        // njn: true->remove
-                        if moved_size < target_size {
-                            let item_size = item.size_estimate(cx.tcx);
-                            //eprintln!("MOVE: {}", item_size);
-                            moved_size += item_size;
-                            new_cgu.items_mut().insert(*item, *rest);
-                            true
-                        } else {
-                            false
-                        }
-                    });
+                    //old_cgu.items_mut().drain_filter(|item, rest| {
+                    //    // njn: true->remove
+                    //    if moved_size < target_size {
+                    //        let item_size = item.size_estimate(cx.tcx);
+                    //        //eprintln!("MOVE: {}", item_size);
+                    //        moved_size += item_size;
+                    //        new_cgu.items_mut().insert(*item, *rest);
+                    //        true
+                    //    } else {
+                    //        false
+                    //    }
+                    //});
                     new_cgu.increase_size_estimate(moved_size);
                     old_cgu.decrease_size_estimate(moved_size);
 
@@ -193,7 +200,7 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
 
             // Move the mono-items from `cgu_n` to `cgu_n_minus_1`
             cgu_n_minus_1.increase_size_estimate(cgu_n.size_estimate());
-            for (k, v) in cgu_n.items_mut().drain() {
+            for (k, v) in cgu_n.items_mut().drain(..) {
                 cgu_n_minus_1.items_mut().insert(k, v);
             }