Skip to content

Commit fa06a37

Browse files
committed
Auto merge of #112695 - nnethercote:inline-before-merging-cgus, r=wesleywiser
Inline before merging cgus Because CGU merging relies on CGU sizes, but the CGU sizes before inlining aren't accurate. This change doesn't have much effect on compile perf, but it makes follow-on changes that involve more sophisticated reasoning about CGU sizes much easier. r? `@wesleywiser`
2 parents 0928a1f + abde9ba commit fa06a37

File tree

2 files changed

+65
-92
lines changed

2 files changed

+65
-92
lines changed

Diff for: compiler/rustc_middle/src/mir/mono.rs

+9-11
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ pub struct CodegenUnit<'tcx> {
231231
/// as well as the crate name and disambiguator.
232232
name: Symbol,
233233
items: FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)>,
234-
size_estimate: Option<usize>,
234+
size_estimate: usize,
235235
primary: bool,
236236
/// True if this is CGU is used to hold code coverage information for dead code,
237237
/// false otherwise.
@@ -269,7 +269,7 @@ impl<'tcx> CodegenUnit<'tcx> {
269269
CodegenUnit {
270270
name,
271271
items: Default::default(),
272-
size_estimate: None,
272+
size_estimate: 0,
273273
primary: false,
274274
is_code_coverage_dead_code_cgu: false,
275275
}
@@ -320,23 +320,21 @@ impl<'tcx> CodegenUnit<'tcx> {
320320
base_n::encode(hash, base_n::CASE_INSENSITIVE)
321321
}
322322

323-
pub fn create_size_estimate(&mut self, tcx: TyCtxt<'tcx>) {
323+
pub fn compute_size_estimate(&mut self, tcx: TyCtxt<'tcx>) {
324324
// Estimate the size of a codegen unit as (approximately) the number of MIR
325325
// statements it corresponds to.
326-
self.size_estimate = Some(self.items.keys().map(|mi| mi.size_estimate(tcx)).sum());
326+
self.size_estimate = self.items.keys().map(|mi| mi.size_estimate(tcx)).sum();
327327
}
328328

329329
#[inline]
330-
/// Should only be called if [`create_size_estimate`] has previously been called.
330+
/// Should only be called if [`compute_size_estimate`] has previously been called.
331331
///
332-
/// [`create_size_estimate`]: Self::create_size_estimate
332+
/// [`compute_size_estimate`]: Self::compute_size_estimate
333333
pub fn size_estimate(&self) -> usize {
334+
// Items are never zero-sized, so if we have items the estimate must be
335+
// non-zero, unless we forgot to call `compute_size_estimate` first.
336+
assert!(self.items.is_empty() || self.size_estimate != 0);
334337
self.size_estimate
335-
.expect("create_size_estimate must be called before getting a size_estimate")
336-
}
337-
338-
pub fn modify_size_estimate(&mut self, delta: usize) {
339-
*self.size_estimate.as_mut().unwrap() += delta;
340338
}
341339

342340
pub fn contains_item(&self, item: &MonoItem<'tcx>) -> bool {

Diff for: compiler/rustc_monomorphize/src/partitioning.rs

+56-81
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ struct PartitioningCx<'a, 'tcx> {
125125
usage_map: &'a UsageMap<'tcx>,
126126
}
127127

128-
struct PlacedRootMonoItems<'tcx> {
128+
struct PlacedMonoItems<'tcx> {
129129
/// The codegen units, sorted by name to make things deterministic.
130130
codegen_units: Vec<CodegenUnit<'tcx>>,
131131

@@ -150,18 +150,13 @@ where
150150

151151
let cx = &PartitioningCx { tcx, usage_map };
152152

153-
// In the first step, we place all regular monomorphizations into their
154-
// respective 'home' codegen unit. Regular monomorphizations are all
155-
// functions and statics defined in the local crate.
156-
let PlacedRootMonoItems { mut codegen_units, internalization_candidates, unique_inlined_stats } = {
157-
let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_place_roots");
158-
let mut placed = place_root_mono_items(cx, mono_items);
153+
// Place all mono items into a codegen unit. `place_mono_items` is
154+
// responsible for initializing the CGU size estimates.
155+
let PlacedMonoItems { mut codegen_units, internalization_candidates, unique_inlined_stats } = {
156+
let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_place_items");
157+
let placed = place_mono_items(cx, mono_items);
159158

160-
for cgu in &mut placed.codegen_units {
161-
cgu.create_size_estimate(tcx);
162-
}
163-
164-
debug_dump(tcx, "ROOTS", &placed.codegen_units, placed.unique_inlined_stats);
159+
debug_dump(tcx, "PLACE", &placed.codegen_units, placed.unique_inlined_stats);
165160

166161
placed
167162
};
@@ -175,23 +170,8 @@ where
175170
debug_dump(tcx, "MERGE", &codegen_units, unique_inlined_stats);
176171
}
177172

178-
// In the next step, we use the inlining map to determine which additional
179-
// monomorphizations have to go into each codegen unit. These additional
180-
// monomorphizations can be drop-glue, functions from external crates, and
181-
// local functions the definition of which is marked with `#[inline]`.
182-
{
183-
let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_place_inline_items");
184-
place_inlined_mono_items(cx, &mut codegen_units);
185-
186-
for cgu in &mut codegen_units {
187-
cgu.create_size_estimate(tcx);
188-
}
189-
190-
debug_dump(tcx, "INLINE", &codegen_units, unique_inlined_stats);
191-
}
192-
193-
// Next we try to make as many symbols "internal" as possible, so LLVM has
194-
// more freedom to optimize.
173+
// Make as many symbols "internal" as possible, so LLVM has more freedom to
174+
// optimize.
195175
if !tcx.sess.link_dead_code() {
196176
let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_internalize_symbols");
197177
internalize_symbols(cx, &mut codegen_units, internalization_candidates);
@@ -212,10 +192,7 @@ where
212192
codegen_units
213193
}
214194

215-
fn place_root_mono_items<'tcx, I>(
216-
cx: &PartitioningCx<'_, 'tcx>,
217-
mono_items: I,
218-
) -> PlacedRootMonoItems<'tcx>
195+
fn place_mono_items<'tcx, I>(cx: &PartitioningCx<'_, 'tcx>, mono_items: I) -> PlacedMonoItems<'tcx>
219196
where
220197
I: Iterator<Item = MonoItem<'tcx>>,
221198
{
@@ -236,6 +213,8 @@ where
236213
let mut num_unique_inlined_items = 0;
237214
let mut unique_inlined_items_size = 0;
238215
for mono_item in mono_items {
216+
// Handle only root items directly here. Inlined items are handled at
217+
// the bottom of the loop based on reachability.
239218
match mono_item.instantiation_mode(cx.tcx) {
240219
InstantiationMode::GloballyShared { .. } => {}
241220
InstantiationMode::LocalCopy => {
@@ -248,7 +227,7 @@ where
248227
let characteristic_def_id = characteristic_def_id_of_mono_item(cx.tcx, mono_item);
249228
let is_volatile = is_incremental_build && mono_item.is_generic_fn();
250229

251-
let codegen_unit_name = match characteristic_def_id {
230+
let cgu_name = match characteristic_def_id {
252231
Some(def_id) => compute_codegen_unit_name(
253232
cx.tcx,
254233
cgu_name_builder,
@@ -259,9 +238,7 @@ where
259238
None => fallback_cgu_name(cgu_name_builder),
260239
};
261240

262-
let codegen_unit = codegen_units
263-
.entry(codegen_unit_name)
264-
.or_insert_with(|| CodegenUnit::new(codegen_unit_name));
241+
let cgu = codegen_units.entry(cgu_name).or_insert_with(|| CodegenUnit::new(cgu_name));
265242

266243
let mut can_be_internalized = true;
267244
let (linkage, visibility) = mono_item_linkage_and_visibility(
@@ -274,23 +251,56 @@ where
274251
internalization_candidates.insert(mono_item);
275252
}
276253

277-
codegen_unit.items_mut().insert(mono_item, (linkage, visibility));
254+
cgu.items_mut().insert(mono_item, (linkage, visibility));
255+
256+
// Get all inlined items that are reachable from `mono_item` without
257+
// going via another root item. This includes drop-glue, functions from
258+
// external crates, and local functions the definition of which is
259+
// marked with `#[inline]`.
260+
let mut reachable_inlined_items = FxHashSet::default();
261+
get_reachable_inlined_items(cx.tcx, mono_item, cx.usage_map, &mut reachable_inlined_items);
262+
263+
// Add those inlined items. It's possible an inlined item is reachable
264+
// from multiple root items within a CGU, which is fine, it just means
265+
// the `insert` will be a no-op.
266+
for inlined_item in reachable_inlined_items {
267+
// This is a CGU-private copy.
268+
cgu.items_mut().insert(inlined_item, (Linkage::Internal, Visibility::Default));
269+
}
278270
}
279271

280272
// Always ensure we have at least one CGU; otherwise, if we have a
281273
// crate with just types (for example), we could wind up with no CGU.
282274
if codegen_units.is_empty() {
283-
let codegen_unit_name = fallback_cgu_name(cgu_name_builder);
284-
codegen_units.insert(codegen_unit_name, CodegenUnit::new(codegen_unit_name));
275+
let cgu_name = fallback_cgu_name(cgu_name_builder);
276+
codegen_units.insert(cgu_name, CodegenUnit::new(cgu_name));
285277
}
286278

287279
let mut codegen_units: Vec<_> = codegen_units.into_values().collect();
288280
codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
289281

290-
PlacedRootMonoItems {
282+
for cgu in codegen_units.iter_mut() {
283+
cgu.compute_size_estimate(cx.tcx);
284+
}
285+
286+
return PlacedMonoItems {
291287
codegen_units,
292288
internalization_candidates,
293289
unique_inlined_stats: (num_unique_inlined_items, unique_inlined_items_size),
290+
};
291+
292+
fn get_reachable_inlined_items<'tcx>(
293+
tcx: TyCtxt<'tcx>,
294+
item: MonoItem<'tcx>,
295+
usage_map: &UsageMap<'tcx>,
296+
visited: &mut FxHashSet<MonoItem<'tcx>>,
297+
) {
298+
usage_map.for_each_inlined_used_item(tcx, item, |inlined_item| {
299+
let is_new = visited.insert(inlined_item);
300+
if is_new {
301+
get_reachable_inlined_items(tcx, inlined_item, usage_map, visited);
302+
}
303+
});
294304
}
295305
}
296306

@@ -314,7 +324,7 @@ fn merge_codegen_units<'tcx>(
314324
// worse generated code. So we don't allow CGUs smaller than this (unless
315325
// there is just one CGU, of course). Note that CGU sizes of 100,000+ are
316326
// common in larger programs, so this isn't all that large.
317-
const NON_INCR_MIN_CGU_SIZE: usize = 1000;
327+
const NON_INCR_MIN_CGU_SIZE: usize = 1800;
318328

319329
// Repeatedly merge the two smallest codegen units as long as:
320330
// - we have more CGUs than the upper limit, or
@@ -338,9 +348,11 @@ fn merge_codegen_units<'tcx>(
338348
let mut smallest = codegen_units.pop().unwrap();
339349
let second_smallest = codegen_units.last_mut().unwrap();
340350

341-
// Move the mono-items from `smallest` to `second_smallest`
342-
second_smallest.modify_size_estimate(smallest.size_estimate());
351+
// Move the items from `smallest` to `second_smallest`. Some of them
352+
// may be duplicate inlined items, in which case the destination CGU is
353+
// unaffected. Recalculate size estimates afterwards.
343354
second_smallest.items_mut().extend(smallest.items_mut().drain());
355+
second_smallest.compute_size_estimate(cx.tcx);
344356

345357
// Record that `second_smallest` now contains all the stuff that was
346358
// in `smallest` before.
@@ -406,43 +418,6 @@ fn merge_codegen_units<'tcx>(
406418
codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
407419
}
408420

409-
fn place_inlined_mono_items<'tcx>(
410-
cx: &PartitioningCx<'_, 'tcx>,
411-
codegen_units: &mut [CodegenUnit<'tcx>],
412-
) {
413-
for cgu in codegen_units.iter_mut() {
414-
// Collect all inlined items that need to be available in this codegen unit.
415-
let mut reachable_inlined_items = FxHashSet::default();
416-
for root in cgu.items().keys() {
417-
// Get all inlined items that are reachable from it without going
418-
// via another root item.
419-
get_reachable_inlined_items(cx.tcx, *root, cx.usage_map, &mut reachable_inlined_items);
420-
}
421-
422-
// Add all monomorphizations that are not already there.
423-
for inlined_item in reachable_inlined_items {
424-
assert!(!cgu.items().contains_key(&inlined_item));
425-
426-
// This is a CGU-private copy.
427-
cgu.items_mut().insert(inlined_item, (Linkage::Internal, Visibility::Default));
428-
}
429-
}
430-
431-
fn get_reachable_inlined_items<'tcx>(
432-
tcx: TyCtxt<'tcx>,
433-
item: MonoItem<'tcx>,
434-
usage_map: &UsageMap<'tcx>,
435-
visited: &mut FxHashSet<MonoItem<'tcx>>,
436-
) {
437-
usage_map.for_each_inlined_used_item(tcx, item, |inlined_item| {
438-
let is_new = visited.insert(inlined_item);
439-
if is_new {
440-
get_reachable_inlined_items(tcx, inlined_item, usage_map, visited);
441-
}
442-
});
443-
}
444-
}
445-
446421
fn internalize_symbols<'tcx>(
447422
cx: &PartitioningCx<'_, 'tcx>,
448423
codegen_units: &mut [CodegenUnit<'tcx>],

0 commit comments

Comments
 (0)