Skip to content

Commit c74b306

Browse files
committed
Move all cold code to the end of the function
Fixes #836 Benchmark #1: simple-raytracer/raytracer_cg_clif Time (mean ± σ): 9.250 s ± 0.056 s [User: 9.213 s, System: 0.015 s] Range (min … max): 9.151 s … 9.348 s 20 runs Benchmark #2: simple-raytracer/raytracer_cg_clif_cold_separated Time (mean ± σ): 9.179 s ± 0.101 s [User: 9.141 s, System: 0.016 s] Range (min … max): 9.070 s … 9.473 s 20 runs Summary 'simple-raytracer/raytracer_cg_clif_cold_separated' ran 1.01 ± 0.01 times faster than 'simple-raytracer/raytracer_cg_clif'
1 parent 38797f8 commit c74b306

File tree

7 files changed

+56
-4
lines changed

7 files changed

+56
-4
lines changed

src/abi/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,7 @@ fn codegen_call_inner<'tcx>(
506506
args: Vec<CValue<'tcx>>,
507507
ret_place: Option<CPlace<'tcx>>,
508508
) {
509+
// FIXME mark the current ebb as cold when calling a `#[cold]` function.
509510
let fn_sig = fx
510511
.tcx
511512
.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), &fn_ty.fn_sig(fx.tcx));

src/base.rs

+12-3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
3232
// Predefine ebb's
3333
let start_ebb = bcx.create_ebb();
3434
let ebb_map: IndexVec<BasicBlock, Ebb> = (0..mir.basic_blocks().len()).map(|_| bcx.create_ebb()).collect();
35+
let mut cold_ebbs = EntitySet::new();
36+
for (bb, &ebb) in ebb_map.iter_enumerated() {
37+
if mir.basic_blocks()[bb].is_cleanup {
38+
cold_ebbs.insert(ebb);
39+
}
40+
}
3541

3642
// Make FunctionCx
3743
let pointer_type = cx.module.target_config().pointer_type();
@@ -49,6 +55,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
4955
ebb_map,
5056
local_map: HashMap::new(),
5157
caller_location: None, // set by `codegen_fn_prelude`
58+
cold_ebbs,
5259

5360
clif_comments,
5461
constants_cx: &mut cx.constants_cx,
@@ -73,6 +80,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
7380
let mut clif_comments = fx.clif_comments;
7481
let source_info_set = fx.source_info_set;
7582
let local_map = fx.local_map;
83+
let cold_ebbs = fx.cold_ebbs;
7684

7785
#[cfg(debug_assertions)]
7886
crate::pretty_clif::write_clif_file(cx.tcx, "unopt", instance, &context.func, &clif_comments, None);
@@ -82,7 +90,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
8290

8391
// Perform rust specific optimizations
8492
tcx.sess.time("optimize clif ir", || {
85-
crate::optimize::optimize_function(tcx, instance, context, &mut clif_comments);
93+
crate::optimize::optimize_function(tcx, instance, context, &cold_ebbs, &mut clif_comments);
8694
});
8795

8896
// Define function
@@ -191,17 +199,18 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, impl Backend>) {
191199
}
192200
}
193201
let cond = trans_operand(fx, cond).load_scalar(fx);
202+
194203
let target = fx.get_ebb(*target);
195204
let failure = fx.bcx.create_ebb();
205+
fx.cold_ebbs.insert(failure);
206+
196207
if *expected {
197208
fx.bcx.ins().brz(cond, failure, &[]);
198209
} else {
199210
fx.bcx.ins().brnz(cond, failure, &[]);
200211
};
201212
fx.bcx.ins().jump(target, &[]);
202213

203-
// FIXME insert bb after all other bb's to reduce the amount of jumps in the common
204-
// case and improve code locality.
205214
fx.bcx.switch_to_block(failure);
206215
trap_panic(
207216
fx,

src/common.rs

+3
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,9 @@ pub struct FunctionCx<'clif, 'tcx, B: Backend + 'static> {
270270
/// When `#[track_caller]` is used, the implicit caller location is stored in this variable.
271271
pub caller_location: Option<CValue<'tcx>>,
272272

273+
/// See [crate::optimize::code_layout] for more information.
274+
pub cold_ebbs: EntitySet<Ebb>,
275+
273276
pub clif_comments: crate::pretty_clif::CommentWriter,
274277
pub constants_cx: &'clif mut crate::constant::ConstantCx,
275278
pub vtables: &'clif mut HashMap<(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>), DataId>,

src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ mod prelude {
9595
pub use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleKind};
9696

9797
pub use cranelift_codegen::Context;
98+
pub use cranelift_codegen::entity::EntitySet;
9899
pub use cranelift_codegen::ir::{AbiParam, Ebb, ExternalName, FuncRef, Inst, InstBuilder, MemFlags, Signature, SourceLoc, StackSlot, StackSlotData, StackSlotKind, TrapCode, Type, Value};
99100
pub use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
100101
pub use cranelift_codegen::ir::function::Function;

src/optimize/code_layout.rs

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
//! This optimization moves cold code to the end of the function.
2+
//!
3+
//! Some code is executed much less often than other code. For example panicking or the
4+
//! landingpads for unwinding. By moving this cold code to the end of the function the average
5+
//! amount of jumps is reduced and the code locality is improved.
6+
//!
7+
//! # Undefined behaviour
8+
//!
9+
//! This optimization doesn't assume anything that isn't already assumed by Cranelift itself.
10+
11+
use crate::prelude::*;
12+
13+
pub fn optimize_function(ctx: &mut Context, cold_ebbs: &EntitySet<Ebb>) {
14+
// FIXME Move the ebb in place instead of remove and append once
15+
// bytecodealliance/cranelift#1339 is implemented.
16+
17+
let mut ebb_insts = HashMap::new();
18+
for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) {
19+
let insts = ctx.func.layout.ebb_insts(ebb).collect::<Vec<_>>();
20+
for &inst in &insts {
21+
ctx.func.layout.remove_inst(inst);
22+
}
23+
ebb_insts.insert(ebb, insts);
24+
ctx.func.layout.remove_ebb(ebb);
25+
}
26+
27+
// And then append them at the back again.
28+
for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) {
29+
ctx.func.layout.append_ebb(ebb);
30+
for inst in ebb_insts.remove(&ebb).unwrap() {
31+
ctx.func.layout.append_inst(inst, ebb);
32+
}
33+
}
34+
}

src/optimize/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
use crate::prelude::*;
22

3+
mod code_layout;
34
mod stack2reg;
45

56
pub fn optimize_function<'tcx>(
67
tcx: TyCtxt<'tcx>,
78
instance: Instance<'tcx>,
89
ctx: &mut Context,
10+
cold_ebbs: &EntitySet<Ebb>,
911
clif_comments: &mut crate::pretty_clif::CommentWriter,
1012
) {
13+
// The code_layout optimization is very cheap.
14+
self::code_layout::optimize_function(ctx, cold_ebbs);
15+
1116
if tcx.sess.opts.optimize == rustc_session::config::OptLevel::No {
1217
return; // FIXME classify optimizations over opt levels
1318
}

src/optimize/stack2reg.rs

-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ use std::collections::{BTreeMap, HashSet};
1313
use std::ops::Not;
1414

1515
use cranelift_codegen::cursor::{Cursor, FuncCursor};
16-
use cranelift_codegen::entity::EntitySet;
1716
use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef};
1817
use cranelift_codegen::ir::immediates::Offset32;
1918

0 commit comments

Comments
 (0)