@@ -248,7 +248,8 @@ where
248
248
}
249
249
let size_estimate = mono_item. size_estimate ( cx. tcx ) ;
250
250
251
- cgu. items_mut ( ) . insert ( mono_item, MonoItemData { linkage, visibility, size_estimate } ) ;
251
+ cgu. items_mut ( )
252
+ . insert ( mono_item, MonoItemData { inlined : false , linkage, visibility, size_estimate } ) ;
252
253
253
254
// Get all inlined items that are reachable from `mono_item` without
254
255
// going via another root item. This includes drop-glue, functions from
@@ -263,6 +264,7 @@ where
263
264
for inlined_item in reachable_inlined_items {
264
265
// This is a CGU-private copy.
265
266
cgu. items_mut ( ) . entry ( inlined_item) . or_insert_with ( || MonoItemData {
267
+ inlined : true ,
266
268
linkage : Linkage :: Internal ,
267
269
visibility : Visibility :: Default ,
268
270
size_estimate : inlined_item. size_estimate ( cx. tcx ) ,
@@ -316,31 +318,83 @@ fn merge_codegen_units<'tcx>(
316
318
let mut cgu_contents: FxHashMap < Symbol , Vec < Symbol > > =
317
319
codegen_units. iter ( ) . map ( |cgu| ( cgu. name ( ) , vec ! [ cgu. name( ) ] ) ) . collect ( ) ;
318
320
321
+ // If N is the maximum number of CGUs, and the CGUs are sorted from largest
322
+ // to smallest, we repeatedly find which CGU in codegen_units[N..] has the
323
+ // greatest overlap of inlined items with codegen_units[N-1], merge that
324
+ // CGU into codegen_units[N-1], then re-sort by size and repeat.
325
+ //
326
+ // We use inlined item overlap to guide this merging because it minimizes
327
+ // duplication of inlined items, which makes LLVM be faster and generate
328
+ // better and smaller machine code.
329
+ //
330
+ // Why merge into codegen_units[N-1]? We want CGUs to have similar sizes,
331
+ // which means we don't want codegen_units[0..N] (the already big ones)
332
+ // getting any bigger, if we can avoid it. When we have more than N CGUs
333
+ // then at least one of the biggest N will have to grow. codegen_units[N-1]
334
+ // is the smallest of those, and so has the most room to grow.
335
+ let max_codegen_units = cx. tcx . sess . codegen_units ( ) . as_usize ( ) ;
336
+ while codegen_units. len ( ) > max_codegen_units {
337
+ // Sort small CGUs to the back.
338
+ codegen_units. sort_by_key ( |cgu| cmp:: Reverse ( cgu. size_estimate ( ) ) ) ;
339
+
340
+ let cgu_dst = & codegen_units[ max_codegen_units - 1 ] ;
341
+
342
+ // Find the CGU that overlaps the most with `cgu_dst`. In the case of a
343
+ // tie, favour the earlier (bigger) CGU.
344
+ let mut max_overlap = 0 ;
345
+ let mut max_overlap_i = max_codegen_units;
346
+ for ( i, cgu_src) in codegen_units. iter ( ) . enumerate ( ) . skip ( max_codegen_units) {
347
+ if cgu_src. size_estimate ( ) <= max_overlap {
348
+ // None of the remaining overlaps can exceed `max_overlap`, so
349
+ // stop looking.
350
+ break ;
351
+ }
352
+
353
+ let overlap = compute_inlined_overlap ( cgu_dst, cgu_src) ;
354
+ if overlap > max_overlap {
355
+ max_overlap = overlap;
356
+ max_overlap_i = i;
357
+ }
358
+ }
359
+
360
+ let mut cgu_src = codegen_units. swap_remove ( max_overlap_i) ;
361
+ let cgu_dst = & mut codegen_units[ max_codegen_units - 1 ] ;
362
+
363
+ // Move the items from `cgu_src` to `cgu_dst`. Some of them may be
364
+ // duplicate inlined items, in which case the destination CGU is
365
+ // unaffected. Recalculate size estimates afterwards.
366
+ cgu_dst. items_mut ( ) . extend ( cgu_src. items_mut ( ) . drain ( ) ) ;
367
+ cgu_dst. compute_size_estimate ( ) ;
368
+
369
+ // Record that `cgu_dst` now contains all the stuff that was in
370
+ // `cgu_src` before.
371
+ let mut consumed_cgu_names = cgu_contents. remove ( & cgu_src. name ( ) ) . unwrap ( ) ;
372
+ cgu_contents. get_mut ( & cgu_dst. name ( ) ) . unwrap ( ) . append ( & mut consumed_cgu_names) ;
373
+ }
374
+
319
375
// Having multiple CGUs can drastically speed up compilation. But for
320
376
// non-incremental builds, tiny CGUs slow down compilation *and* result in
321
377
// worse generated code. So we don't allow CGUs smaller than this (unless
322
378
// there is just one CGU, of course). Note that CGU sizes of 100,000+ are
323
379
// common in larger programs, so this isn't all that large.
324
380
const NON_INCR_MIN_CGU_SIZE : usize = 1800 ;
325
381
326
- // Repeatedly merge the two smallest codegen units as long as:
327
- // - we have more CGUs than the upper limit, or
328
- // - (Non-incremental builds only) the user didn't specify a CGU count, and
329
- // there are multiple CGUs, and some are below the minimum size.
382
+ // Repeatedly merge the two smallest codegen units as long as: it's a
383
+ // non-incremental build, and the user didn't specify a CGU count, and
384
+ // there are multiple CGUs, and some are below the minimum size.
330
385
//
331
386
// The "didn't specify a CGU count" condition is because when an explicit
332
387
// count is requested we observe it as closely as possible. For example,
333
388
// the `compiler_builtins` crate sets `codegen-units = 10000` and it's
334
389
// critical they aren't merged. Also, some tests use explicit small values
335
390
// and likewise won't work if small CGUs are merged.
336
- while codegen_units. len ( ) > cx. tcx . sess . codegen_units ( ) . as_usize ( )
337
- || ( cx. tcx . sess . opts . incremental . is_none ( )
338
- && matches ! ( cx. tcx. sess. codegen_units( ) , CodegenUnits :: Default ( _) )
339
- && codegen_units. len ( ) > 1
340
- && codegen_units. iter ( ) . any ( |cgu| cgu. size_estimate ( ) < NON_INCR_MIN_CGU_SIZE ) )
391
+ while cx. tcx . sess . opts . incremental . is_none ( )
392
+ && matches ! ( cx. tcx. sess. codegen_units( ) , CodegenUnits :: Default ( _) )
393
+ && codegen_units. len ( ) > 1
394
+ && codegen_units. iter ( ) . any ( |cgu| cgu. size_estimate ( ) < NON_INCR_MIN_CGU_SIZE )
341
395
{
342
396
// Sort small cgus to the back.
343
- codegen_units. sort_by_key ( |cgu| cmp:: Reverse ( cgu. size_estimate ( ) ) ) ;
397
+ codegen_units. sort_by_cached_key ( |cgu| cmp:: Reverse ( cgu. size_estimate ( ) ) ) ;
344
398
345
399
let mut smallest = codegen_units. pop ( ) . unwrap ( ) ;
346
400
let second_smallest = codegen_units. last_mut ( ) . unwrap ( ) ;
@@ -351,16 +405,7 @@ fn merge_codegen_units<'tcx>(
351
405
second_smallest. items_mut ( ) . extend ( smallest. items_mut ( ) . drain ( ) ) ;
352
406
second_smallest. compute_size_estimate ( ) ;
353
407
354
- // Record that `second_smallest` now contains all the stuff that was
355
- // in `smallest` before.
356
- let mut consumed_cgu_names = cgu_contents. remove ( & smallest. name ( ) ) . unwrap ( ) ;
357
- cgu_contents. get_mut ( & second_smallest. name ( ) ) . unwrap ( ) . append ( & mut consumed_cgu_names) ;
358
-
359
- debug ! (
360
- "CodegenUnit {} merged into CodegenUnit {}" ,
361
- smallest. name( ) ,
362
- second_smallest. name( )
363
- ) ;
408
+ // Don't update `cgu_contents`, that's only for incremental builds.
364
409
}
365
410
366
411
let cgu_name_builder = & mut CodegenUnitNameBuilder :: new ( cx. tcx ) ;
@@ -439,6 +484,25 @@ fn merge_codegen_units<'tcx>(
439
484
}
440
485
}
441
486
487
+ /// Compute the combined size of all inlined items that appear in both `cgu1`
488
+ /// and `cgu2`.
489
+ fn compute_inlined_overlap < ' tcx > ( cgu1 : & CodegenUnit < ' tcx > , cgu2 : & CodegenUnit < ' tcx > ) -> usize {
490
+ // Either order works. We pick the one that involves iterating over fewer
491
+ // items.
492
+ let ( src_cgu, dst_cgu) =
493
+ if cgu1. items ( ) . len ( ) <= cgu2. items ( ) . len ( ) { ( cgu1, cgu2) } else { ( cgu2, cgu1) } ;
494
+
495
+ let mut overlap = 0 ;
496
+ for ( item, data) in src_cgu. items ( ) . iter ( ) {
497
+ if data. inlined {
498
+ if dst_cgu. items ( ) . contains_key ( item) {
499
+ overlap += data. size_estimate ;
500
+ }
501
+ }
502
+ }
503
+ overlap
504
+ }
505
+
442
506
fn internalize_symbols < ' tcx > (
443
507
cx : & PartitioningCx < ' _ , ' tcx > ,
444
508
codegen_units : & mut [ CodegenUnit < ' tcx > ] ,
@@ -870,19 +934,16 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
870
934
all_cgu_sizes. push ( cgu. size_estimate ( ) ) ;
871
935
872
936
for ( item, data) in cgu. items ( ) {
873
- match item. instantiation_mode ( tcx) {
874
- InstantiationMode :: GloballyShared { .. } => {
875
- root_items += 1 ;
876
- root_size += data. size_estimate ;
877
- }
878
- InstantiationMode :: LocalCopy => {
879
- if inlined_items. insert ( item) {
880
- unique_inlined_items += 1 ;
881
- unique_inlined_size += data. size_estimate ;
882
- }
883
- placed_inlined_items += 1 ;
884
- placed_inlined_size += data. size_estimate ;
937
+ if !data. inlined {
938
+ root_items += 1 ;
939
+ root_size += data. size_estimate ;
940
+ } else {
941
+ if inlined_items. insert ( item) {
942
+ unique_inlined_items += 1 ;
943
+ unique_inlined_size += data. size_estimate ;
885
944
}
945
+ placed_inlined_items += 1 ;
946
+ placed_inlined_size += data. size_estimate ;
886
947
}
887
948
}
888
949
}
@@ -937,10 +998,7 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
937
998
let symbol_name = item. symbol_name ( tcx) . name ;
938
999
let symbol_hash_start = symbol_name. rfind ( 'h' ) ;
939
1000
let symbol_hash = symbol_hash_start. map_or ( "<no hash>" , |i| & symbol_name[ i..] ) ;
940
- let kind = match item. instantiation_mode ( tcx) {
941
- InstantiationMode :: GloballyShared { .. } => "root" ,
942
- InstantiationMode :: LocalCopy => "inlined" ,
943
- } ;
1001
+ let kind = if !data. inlined { "root" } else { "inlined" } ;
944
1002
let size = data. size_estimate ;
945
1003
let _ = with_no_trimmed_paths ! ( writeln!(
946
1004
s,
0 commit comments