Skip to content

Commit f504d3f

Browse files
committed
Auto merge of rust-lang#56487 - nikic:discard-modules-earlier, r=alexcrichton
Discard LLVM modules earlier when performing ThinLTO Currently ThinLTO is performed by first compiling all modules (and keeping them in memory), and then serializing them into ThinLTO buffers in a separate, synchronized step. Modules are later read back from ThinLTO buffers when running the ThinLTO optimization pipeline. We can also find the following comment in `lto.rs`: // FIXME: right now, like with fat LTO, we serialize all in-memory // modules before working with them and ThinLTO. We really // shouldn't do this, however, and instead figure out how to // extract a summary from an in-memory module and then merge that // into the global index. It turns out that this loop is by far // the most expensive portion of this small bit of global // analysis! I don't think that what is suggested here is the right approach: One of the primary benefits of using ThinLTO over ordinary LTO is that it's not necessary to keep all the modules (merged or not) in memory for the duration of the linking step. However, we currently don't really make use of this (at least for crate-local ThinLTO), because we keep all modules in memory until the start of the LTO step. This PR changes the implementation to instead perform the serialization into ThinLTO buffers directly after the initial optimization step. Most of the changes here are plumbing to separate out fat and thin lto handling in `write.rs`, as these now use different intermediate artifacts. For fat lto this will be in-memory modules, for thin lto it will be ThinLTO buffers. r? @alexcrichton
2 parents fc84f5f + 8128d0d commit f504d3f

File tree

4 files changed

+231
-166
lines changed

4 files changed

+231
-166
lines changed

src/librustc_codegen_llvm/back/lto.rs

+77-70
Original file line numberDiff line numberDiff line change
@@ -48,18 +48,11 @@ pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
4848
}
4949
}
5050

51-
/// Performs LTO, which in the case of full LTO means merging all modules into
52-
/// a single one and returning it for further optimizing. For ThinLTO, it will
53-
/// do the global analysis necessary and return two lists, one of the modules
54-
/// the need optimization and another for modules that can simply be copied over
55-
/// from the incr. comp. cache.
56-
pub(crate) fn run(cgcx: &CodegenContext<LlvmCodegenBackend>,
57-
modules: Vec<ModuleCodegen<ModuleLlvm>>,
58-
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
59-
timeline: &mut Timeline)
60-
-> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
51+
fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
52+
timeline: &mut Timeline,
53+
diag_handler: &Handler)
54+
-> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError>
6155
{
62-
let diag_handler = cgcx.create_diag_handler();
6356
let export_threshold = match cgcx.lto {
6457
// We're just doing LTO for our one crate
6558
Lto::ThinLocal => SymbolExportLevel::Rust,
@@ -144,36 +137,74 @@ pub(crate) fn run(cgcx: &CodegenContext<LlvmCodegenBackend>,
144137
}
145138
}
146139

140+
Ok((symbol_white_list, upstream_modules))
141+
}
142+
143+
/// Performs fat LTO by merging all modules into a single one and returning it
144+
/// for further optimization.
145+
pub(crate) fn run_fat(cgcx: &CodegenContext<LlvmCodegenBackend>,
146+
modules: Vec<ModuleCodegen<ModuleLlvm>>,
147+
timeline: &mut Timeline)
148+
-> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
149+
{
150+
let diag_handler = cgcx.create_diag_handler();
151+
let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, timeline, &diag_handler)?;
147152
let symbol_white_list = symbol_white_list.iter()
148153
.map(|c| c.as_ptr())
149154
.collect::<Vec<_>>();
150-
match cgcx.lto {
151-
Lto::Fat => {
152-
assert!(cached_modules.is_empty());
153-
let opt_jobs = fat_lto(cgcx,
154-
&diag_handler,
155-
modules,
156-
upstream_modules,
157-
&symbol_white_list,
158-
timeline);
159-
opt_jobs.map(|opt_jobs| (opt_jobs, vec![]))
160-
}
161-
Lto::Thin |
162-
Lto::ThinLocal => {
163-
if cgcx.opts.debugging_opts.cross_lang_lto.enabled() {
164-
unreachable!("We should never reach this case if the LTO step \
165-
is deferred to the linker");
166-
}
167-
thin_lto(cgcx,
168-
&diag_handler,
169-
modules,
170-
upstream_modules,
171-
cached_modules,
172-
&symbol_white_list,
173-
timeline)
155+
fat_lto(cgcx, &diag_handler, modules, upstream_modules, &symbol_white_list, timeline)
156+
}
157+
158+
/// Performs thin LTO by performing necessary global analysis and returning two
159+
/// lists, one of the modules that need optimization and another for modules that
160+
/// can simply be copied over from the incr. comp. cache.
161+
pub(crate) fn run_thin(cgcx: &CodegenContext<LlvmCodegenBackend>,
162+
modules: Vec<(String, ThinBuffer)>,
163+
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
164+
timeline: &mut Timeline)
165+
-> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
166+
{
167+
let diag_handler = cgcx.create_diag_handler();
168+
let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, timeline, &diag_handler)?;
169+
let symbol_white_list = symbol_white_list.iter()
170+
.map(|c| c.as_ptr())
171+
.collect::<Vec<_>>();
172+
if cgcx.opts.debugging_opts.cross_lang_lto.enabled() {
173+
unreachable!("We should never reach this case if the LTO step \
174+
is deferred to the linker");
175+
}
176+
thin_lto(cgcx,
177+
&diag_handler,
178+
modules,
179+
upstream_modules,
180+
cached_modules,
181+
&symbol_white_list,
182+
timeline)
183+
}
184+
185+
pub(crate) fn prepare_thin(
186+
cgcx: &CodegenContext<LlvmCodegenBackend>,
187+
module: ModuleCodegen<ModuleLlvm>
188+
) -> (String, ThinBuffer) {
189+
let name = module.name.clone();
190+
let buffer = ThinBuffer::new(module.module_llvm.llmod());
191+
192+
// We emit the module after having serialized it into a ThinBuffer
193+
// because only then it will contain the ThinLTO module summary.
194+
if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
195+
if cgcx.config(module.kind).emit_pre_thin_lto_bc {
196+
let path = incr_comp_session_dir
197+
.join(pre_lto_bitcode_filename(&name));
198+
199+
fs::write(&path, buffer.data()).unwrap_or_else(|e| {
200+
panic!("Error writing pre-lto-bitcode file `{}`: {}",
201+
path.display(),
202+
e);
203+
});
174204
}
175-
Lto::No => unreachable!(),
176205
}
206+
207+
(name, buffer)
177208
}
178209

179210
fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
@@ -182,7 +213,7 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
182213
mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
183214
symbol_white_list: &[*const libc::c_char],
184215
timeline: &mut Timeline)
185-
-> Result<Vec<LtoModuleCodegen<LlvmCodegenBackend>>, FatalError>
216+
-> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
186217
{
187218
info!("going for a fat lto");
188219

@@ -271,10 +302,10 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
271302
timeline.record("passes");
272303
}
273304

274-
Ok(vec![LtoModuleCodegen::Fat {
305+
Ok(LtoModuleCodegen::Fat {
275306
module: Some(module),
276307
_serialized_bitcode: serialized_bitcode,
277-
}])
308+
})
278309
}
279310

280311
struct Linker<'a>(&'a mut llvm::Linker<'a>);
@@ -335,7 +366,7 @@ impl Drop for Linker<'a> {
335366
/// they all go out of scope.
336367
fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
337368
diag_handler: &Handler,
338-
modules: Vec<ModuleCodegen<ModuleLlvm>>,
369+
modules: Vec<(String, ThinBuffer)>,
339370
serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
340371
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
341372
symbol_white_list: &[*const libc::c_char],
@@ -355,41 +386,17 @@ fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
355386
let mut module_names = Vec::with_capacity(full_scope_len);
356387
let mut thin_modules = Vec::with_capacity(full_scope_len);
357388

358-
// FIXME: right now, like with fat LTO, we serialize all in-memory
359-
// modules before working with them and ThinLTO. We really
360-
// shouldn't do this, however, and instead figure out how to
361-
// extract a summary from an in-memory module and then merge that
362-
// into the global index. It turns out that this loop is by far
363-
// the most expensive portion of this small bit of global
364-
// analysis!
365-
for (i, module) in modules.into_iter().enumerate() {
366-
info!("local module: {} - {}", i, module.name);
367-
let name = CString::new(module.name.clone()).unwrap();
368-
let buffer = ThinBuffer::new(module.module_llvm.llmod());
369-
370-
// We emit the module after having serialized it into a ThinBuffer
371-
// because only then it will contain the ThinLTO module summary.
372-
if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
373-
if cgcx.config(module.kind).emit_pre_thin_lto_bc {
374-
let path = incr_comp_session_dir
375-
.join(pre_lto_bitcode_filename(&module.name));
376-
377-
fs::write(&path, buffer.data()).unwrap_or_else(|e| {
378-
panic!("Error writing pre-lto-bitcode file `{}`: {}",
379-
path.display(),
380-
e);
381-
});
382-
}
383-
}
384-
389+
for (i, (name, buffer)) in modules.into_iter().enumerate() {
390+
info!("local module: {} - {}", i, name);
391+
let cname = CString::new(name.clone()).unwrap();
385392
thin_modules.push(llvm::ThinLTOModule {
386-
identifier: name.as_ptr(),
393+
identifier: cname.as_ptr(),
387394
data: buffer.data().as_ptr(),
388395
len: buffer.data().len(),
389396
});
390397
thin_buffers.push(buffer);
391-
module_names.push(name);
392-
timeline.record(&module.name);
398+
module_names.push(cname);
399+
timeline.record(&name);
393400
}
394401

395402
// FIXME: All upstream crates are deserialized internally in the

src/librustc_codegen_llvm/lib.rs

+15-2
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,20 @@ impl WriteBackendMethods for LlvmCodegenBackend {
176176
fn print_pass_timings(&self) {
177177
unsafe { llvm::LLVMRustPrintPassTimings(); }
178178
}
179-
fn run_lto(
179+
fn run_fat_lto(
180180
cgcx: &CodegenContext<Self>,
181181
modules: Vec<ModuleCodegen<Self::Module>>,
182+
timeline: &mut Timeline
183+
) -> Result<LtoModuleCodegen<Self>, FatalError> {
184+
back::lto::run_fat(cgcx, modules, timeline)
185+
}
186+
fn run_thin_lto(
187+
cgcx: &CodegenContext<Self>,
188+
modules: Vec<(String, Self::ThinBuffer)>,
182189
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
183190
timeline: &mut Timeline
184191
) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
185-
back::lto::run(cgcx, modules, cached_modules, timeline)
192+
back::lto::run_thin(cgcx, modules, cached_modules, timeline)
186193
}
187194
unsafe fn optimize(
188195
cgcx: &CodegenContext<Self>,
@@ -209,6 +216,12 @@ impl WriteBackendMethods for LlvmCodegenBackend {
209216
) -> Result<CompiledModule, FatalError> {
210217
back::write::codegen(cgcx, diag_handler, module, config, timeline)
211218
}
219+
fn prepare_thin(
220+
cgcx: &CodegenContext<Self>,
221+
module: ModuleCodegen<Self::Module>
222+
) -> (String, Self::ThinBuffer) {
223+
back::lto::prepare_thin(cgcx, module)
224+
}
212225
fn run_lto_pass_manager(
213226
cgcx: &CodegenContext<Self>,
214227
module: &ModuleCodegen<Self::Module>,

0 commit comments

Comments
 (0)