Skip to content

Commit 6b58f52

Browse files
committed
The embedded bitcode should always be prepared for LTO/ThinLTO
1 parent 672e3aa commit 6b58f52

File tree

13 files changed

+265
-67
lines changed

13 files changed

+265
-67
lines changed

compiler/rustc_codegen_cranelift/src/driver/aot.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ fn produce_final_output_artifacts(
210210
// to get rid of it.
211211
for output_type in crate_output.outputs.keys() {
212212
match *output_type {
213-
OutputType::Bitcode | OutputType::ThinLinkBitcode => {
213+
OutputType::Bitcode | OutputType::ThinLinkBitcode | OutputType::ThinBitcode => {
214214
// Cranelift doesn't have bitcode
215215
// user_wants_bitcode = true;
216216
// // Copy to .bc, but always keep the .0.bc. There is a later

compiler/rustc_codegen_llvm/src/back/lto.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::collections::BTreeMap;
22
use std::ffi::{CStr, CString};
33
use std::fs::File;
44
use std::path::Path;
5+
use std::ptr::NonNull;
56
use std::sync::Arc;
67
use std::{io, iter, slice};
78

@@ -612,6 +613,7 @@ pub(crate) fn run_pass_manager(
612613
cgcx,
613614
dcx,
614615
module,
616+
None,
615617
config,
616618
opt_level,
617619
opt_stage,
@@ -625,6 +627,7 @@ pub(crate) fn run_pass_manager(
625627
cgcx,
626628
dcx,
627629
module,
630+
None,
628631
config,
629632
opt_level,
630633
llvm::OptStage::FatLTO,
@@ -690,6 +693,11 @@ impl ThinBuffer {
690693
ThinBuffer(buffer)
691694
}
692695
}
696+
697+
pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
698+
let mut ptr = NonNull::new(ptr).unwrap();
699+
ThinBuffer(unsafe { ptr.as_mut() })
700+
}
693701
}
694702

695703
impl ThinBufferMethods for ThinBuffer {

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 91 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::ffi::{CStr, CString};
22
use std::io::{self, Write};
33
use std::path::{Path, PathBuf};
4+
use std::ptr::null_mut;
45
use std::sync::Arc;
56
use std::{fs, slice, str};
67

@@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
1516
TargetMachineFactoryFn,
1617
};
1718
use rustc_codegen_ssa::traits::*;
18-
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
19+
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
1920
use rustc_data_structures::profiling::SelfProfilerRef;
2021
use rustc_data_structures::small_c_str::SmallCStr;
2122
use rustc_errors::{DiagCtxtHandle, FatalError, Level};
@@ -544,6 +545,7 @@ pub(crate) unsafe fn llvm_optimize(
544545
cgcx: &CodegenContext<LlvmCodegenBackend>,
545546
dcx: DiagCtxtHandle<'_>,
546547
module: &ModuleCodegen<ModuleLlvm>,
548+
thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>,
547549
config: &ModuleConfig,
548550
opt_level: config::OptLevel,
549551
opt_stage: llvm::OptStage,
@@ -580,7 +582,17 @@ pub(crate) unsafe fn llvm_optimize(
580582
vectorize_loop = config.vectorize_loop;
581583
}
582584
trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
583-
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
585+
if thin_lto_buffer.is_some() {
586+
assert!(
587+
matches!(
588+
opt_stage,
589+
llvm::OptStage::PreLinkNoLTO
590+
| llvm::OptStage::PreLinkFatLTO
591+
| llvm::OptStage::PreLinkThinLTO
592+
),
593+
"the bitcode for LTO can only be obtained at the pre-link stage"
594+
);
595+
}
584596
let pgo_gen_path = get_pgo_gen_path(config);
585597
let pgo_use_path = get_pgo_use_path(config);
586598
let pgo_sample_use_path = get_pgo_sample_use_path(config);
@@ -640,7 +652,9 @@ pub(crate) unsafe fn llvm_optimize(
640652
config.no_prepopulate_passes,
641653
config.verify_llvm_ir,
642654
config.lint_llvm_ir,
643-
using_thin_buffers,
655+
thin_lto_buffer,
656+
config.emit_thin_lto,
657+
config.emit_thin_lto_summary,
644658
config.merge_functions,
645659
unroll_loops,
646660
vectorize_slp,
@@ -705,9 +719,56 @@ pub(crate) unsafe fn optimize(
705719
// usages, not just if we build rustc with autodiff support.
706720
let autodiff_stage =
707721
if cfg!(llvm_enzyme) { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
708-
return unsafe {
709-
llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
722+
// The embedded bitcode is used to run LTO/ThinLTO.
723+
// The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
724+
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
725+
// this point.
726+
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
727+
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
728+
|| config.emit_thin_lto_summary
729+
{
730+
Some(null_mut())
731+
} else {
732+
None
710733
};
734+
unsafe {
735+
llvm_optimize(
736+
cgcx,
737+
dcx,
738+
module,
739+
thin_lto_buffer.as_mut(),
740+
config,
741+
opt_level,
742+
opt_stage,
743+
autodiff_stage,
744+
)
745+
}?;
746+
if let Some(thin_lto_buffer) = thin_lto_buffer {
747+
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
748+
let thin_bc_out = cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
749+
if let Err(err) = fs::write(&thin_bc_out, thin_lto_buffer.data()) {
750+
dcx.emit_err(WriteBytecode { path: &thin_bc_out, err });
751+
}
752+
let bc_summary_out =
753+
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
754+
if config.emit_thin_lto_summary
755+
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
756+
{
757+
let summary_data = thin_lto_buffer.thin_link_data();
758+
cgcx.prof.artifact_size(
759+
"llvm_bitcode_summary",
760+
thin_link_bitcode_filename.to_string_lossy(),
761+
summary_data.len() as u64,
762+
);
763+
let _timer = cgcx.prof.generic_activity_with_arg(
764+
"LLVM_module_codegen_emit_bitcode_summary",
765+
&*module.name,
766+
);
767+
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
768+
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
769+
}
770+
}
771+
}
711772
}
712773
Ok(())
713774
}
@@ -785,59 +846,47 @@ pub(crate) unsafe fn codegen(
785846
// otherwise requested.
786847

787848
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
788-
let bc_summary_out =
789-
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
790849
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
791850

792851
if config.bitcode_needed() {
793-
let _timer = cgcx
794-
.prof
795-
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
796-
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
797-
let data = thin.data();
798-
799-
if let Some(bitcode_filename) = bc_out.file_name() {
800-
cgcx.prof.artifact_size(
801-
"llvm_bitcode",
802-
bitcode_filename.to_string_lossy(),
803-
data.len() as u64,
804-
);
805-
}
806-
807-
if config.emit_thin_lto_summary
808-
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
809-
{
810-
let summary_data = thin.thin_link_data();
811-
cgcx.prof.artifact_size(
812-
"llvm_bitcode_summary",
813-
thin_link_bitcode_filename.to_string_lossy(),
814-
summary_data.len() as u64,
815-
);
816-
817-
let _timer = cgcx.prof.generic_activity_with_arg(
818-
"LLVM_module_codegen_emit_bitcode_summary",
819-
&*module.name,
820-
);
821-
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
822-
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
823-
}
824-
}
825-
826852
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
853+
let thin = {
854+
let _timer = cgcx.prof.generic_activity_with_arg(
855+
"LLVM_module_codegen_make_bitcode",
856+
&*module.name,
857+
);
858+
ThinBuffer::new(llmod, config.emit_thin_lto, false)
859+
};
860+
let data = thin.data();
827861
let _timer = cgcx
828862
.prof
829863
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
864+
if let Some(bitcode_filename) = bc_out.file_name() {
865+
cgcx.prof.artifact_size(
866+
"llvm_bitcode",
867+
bitcode_filename.to_string_lossy(),
868+
data.len() as u64,
869+
);
870+
}
830871
if let Err(err) = fs::write(&bc_out, data) {
831872
dcx.emit_err(WriteBytecode { path: &bc_out, err });
832873
}
833874
}
834875

835-
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
876+
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
877+
&& module.kind == ModuleKind::Regular
878+
{
836879
let _timer = cgcx
837880
.prof
838881
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
882+
let thin_bc_out =
883+
cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
884+
assert!(thin_bc_out.exists(), "cannot find {:?} as embedded bitcode", thin_bc_out);
885+
let data = fs::read(&thin_bc_out).unwrap();
886+
debug!("removing embed bitcode file {:?}", thin_bc_out);
887+
ensure_removed(dcx, &thin_bc_out);
839888
unsafe {
840-
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
889+
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &data);
841890
}
842891
}
843892
}

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2375,7 +2375,9 @@ unsafe extern "C" {
23752375
NoPrepopulatePasses: bool,
23762376
VerifyIR: bool,
23772377
LintIR: bool,
2378-
UseThinLTOBuffers: bool,
2378+
ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
2379+
EmitThinLTO: bool,
2380+
EmitThinLTOSummary: bool,
23792381
MergeFunctions: bool,
23802382
UnrollLoops: bool,
23812383
SLPVectorize: bool,

compiler/rustc_codegen_ssa/src/back/write.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,9 @@ fn produce_final_output_artifacts(
625625
// them for making an rlib.
626626
copy_if_one_unit(OutputType::Bitcode, true);
627627
}
628+
OutputType::ThinBitcode => {
629+
copy_if_one_unit(OutputType::ThinBitcode, true);
630+
}
628631
OutputType::ThinLinkBitcode => {
629632
copy_if_one_unit(OutputType::ThinLinkBitcode, false);
630633
}

compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "llvm/Analysis/Lint.h"
88
#include "llvm/Analysis/TargetLibraryInfo.h"
99
#include "llvm/Bitcode/BitcodeWriter.h"
10+
#include "llvm/Bitcode/BitcodeWriterPass.h"
1011
#include "llvm/CodeGen/CommandFlags.h"
1112
#include "llvm/IR/AssemblyAnnotationWriter.h"
1213
#include "llvm/IR/AutoUpgrade.h"
@@ -36,6 +37,7 @@
3637
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
3738
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
3839
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
40+
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
3941
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
4042
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
4143
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
@@ -194,6 +196,19 @@ extern "C" void LLVMRustTimeTraceProfilerFinish(const char *FileName) {
194196
GEN_SUBTARGETS
195197
#undef SUBTARGET
196198

199+
// This struct and various functions are sort of a hack right now, but the
200+
// problem is that we've got in-memory LLVM modules after we generate and
201+
// optimize all codegen-units for one compilation in rustc. To be compatible
202+
// with the LTO support above we need to serialize the modules plus their
203+
// ThinLTO summary into memory.
204+
//
205+
// This structure is basically an owned version of a serialize module, with
206+
// a ThinLTO summary attached.
207+
struct LLVMRustThinLTOBuffer {
208+
std::string data;
209+
std::string thin_link_data;
210+
};
211+
197212
extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM,
198213
const char *Feature) {
199214
TargetMachine *Target = unwrap(TM);
@@ -697,7 +712,8 @@ extern "C" LLVMRustResult LLVMRustOptimize(
697712
LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef,
698713
LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage,
699714
bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR,
700-
bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops,
715+
bool LintIR, LLVMRustThinLTOBuffer **ThinLTOBufferRef, bool EmitThinLTO,
716+
bool EmitThinLTOSummary, bool MergeFunctions, bool UnrollLoops,
701717
bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls,
702718
bool EmitLifetimeMarkers, bool RunEnzyme,
703719
LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath,
@@ -945,7 +961,10 @@ extern "C" LLVMRustResult LLVMRustOptimize(
945961
}
946962

947963
ModulePassManager MPM;
948-
bool NeedThinLTOBufferPasses = UseThinLTOBuffers;
964+
bool NeedThinLTOBufferPasses = EmitThinLTO;
965+
auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
966+
raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data);
967+
raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data);
949968
if (!NoPrepopulatePasses) {
950969
// The pre-link pipelines don't support O0 and require using
951970
// buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
@@ -969,7 +988,25 @@ extern "C" LLVMRustResult LLVMRustOptimize(
969988

970989
switch (OptStage) {
971990
case LLVMRustOptStage::PreLinkNoLTO:
972-
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
991+
if (ThinLTOBufferRef) {
992+
// This is similar to LLVM's `buildFatLTODefaultPipeline`, where the
993+
// bitcode for embedding is obtained after performing
994+
// `ThinLTOPreLinkDefaultPipeline`.
995+
MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(OptLevel));
996+
if (EmitThinLTO) {
997+
MPM.addPass(ThinLTOBitcodeWriterPass(
998+
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
999+
} else {
1000+
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
1001+
}
1002+
*ThinLTOBufferRef = ThinLTOBuffer.release();
1003+
MPM.addPass(PB.buildModuleOptimizationPipeline(
1004+
OptLevel, ThinOrFullLTOPhase::None));
1005+
MPM.addPass(
1006+
createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
1007+
} else {
1008+
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
1009+
}
9731010
break;
9741011
case LLVMRustOptStage::PreLinkThinLTO:
9751012
MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel);
@@ -1015,6 +1052,16 @@ extern "C" LLVMRustResult LLVMRustOptimize(
10151052
MPM.addPass(CanonicalizeAliasesPass());
10161053
MPM.addPass(NameAnonGlobalPass());
10171054
}
1055+
// For `-Copt-level=0`, ThinLTO, or LTO.
1056+
if (ThinLTOBufferRef && *ThinLTOBufferRef == nullptr) {
1057+
if (EmitThinLTO) {
1058+
MPM.addPass(ThinLTOBitcodeWriterPass(
1059+
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
1060+
} else {
1061+
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
1062+
}
1063+
*ThinLTOBufferRef = ThinLTOBuffer.release();
1064+
}
10181065

10191066
// now load "-enzyme" pass:
10201067
#ifdef ENZYME
@@ -1493,19 +1540,6 @@ extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data,
14931540
return true;
14941541
}
14951542

1496-
// This struct and various functions are sort of a hack right now, but the
1497-
// problem is that we've got in-memory LLVM modules after we generate and
1498-
// optimize all codegen-units for one compilation in rustc. To be compatible
1499-
// with the LTO support above we need to serialize the modules plus their
1500-
// ThinLTO summary into memory.
1501-
//
1502-
// This structure is basically an owned version of a serialize module, with
1503-
// a ThinLTO summary attached.
1504-
struct LLVMRustThinLTOBuffer {
1505-
std::string data;
1506-
std::string thin_link_data;
1507-
};
1508-
15091543
extern "C" LLVMRustThinLTOBuffer *
15101544
LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
15111545
auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();

0 commit comments

Comments
 (0)