Skip to content

Commit e1b06f7

Browse files
committed
Auto merge of #139453 - compiler-errors:incr, r=jieyouxu
Prepend temp files with per-invocation random string to avoid temp filename conflicts #139407 uncovered a very subtle unsoundness with incremental codegen, failing compilation sessions (due to assembler errors), and the "prefer hard linking over copying files" strategy we use in the compiler for file management. Specifically, imagine we're building a single file 3 times, all with `-Csave-temps -Cincremental=...`. Let's call the object file we're building for the codegen unit for `main` "`XXX.o`" just for clarity since it's probably some gigantic hash name: ``` #[inline(never)] #[cfg(any(rpass1, rpass3))] fn a() -> i32 { 0 } #[cfg(any(cfail2))] fn a() -> i32 { 1 } fn main() { evil::evil(); assert_eq!(a(), 0); } mod evil { #[cfg(any(rpass1, rpass3))] pub fn evil() { unsafe { std::arch::asm!("/* */"); } } #[cfg(any(cfail2))] pub fn evil() { unsafe { std::arch::asm!("missing"); } } } ``` Session 1 (`rpass1`): * Type-check, borrow-check, etc. * Serialize the dep graph to the incremental working directory `.../s-...-working/`. * Codegen object file to a temp file `XXX.rcgu.o` which is spit out in the cwd. * Hard-link[^1] `XXX.rcgu.o` to the incremental working directory `.../s-...-working/XXX.o`. * Save-temps option means we don't delete `XXX.rgcu.o`. * Link the binary and stuff. * Finalize[^2] the working incremental session by renaming `.../s-...-working` to ` s-...-asjkdhsjakd` (some other finalized incr comp session dir name). Session 2 (`cfail2`): * Load artifacts from the previous *finalized* incremental session, namely the dep graph. * Type-check, borrow-check, etc. since the file has changed, so most dep graph nodes are red. * Serialize the dep graph to the incremental working directory `.../s-...-working/`. * Codegen object file to a temp file `XXX.rcgu.o`. **HERE IS THE PROBLEM**: The hard-link is still set up to point to the inode from `XXX.o` from the first session, so this also modifies the `XXX.o` in the previous finalized session directory. * Codegen emits an error b/c `missing` is not an instruction, so we abort before finalizing the incremental session. Specifically, this means that the *previous* session is the last finalized session. Session 3 (`rpass3`): * Load artifacts from the previous *finalized* incremental session, namely the dep graph. NOTE that this is from session 1. * All the dep graph nodes are green since we are basically replaying session 1. * codegen object file `XXX.o`, which is detected as *reused* from session 1 since dep nodes were green. That means we **reuse** `XXX.o` which had been dirtied from session 2. * Link the binary and stuff. This results in a binary which reuses some of the build artifacts from session 2, but thinks it's from session 1. At this point, I hope it's clear to see that the incremental results from session 1 were dirtied from session 2, but we reuse them as if session 1 was the previous (finalized) incremental session we ran. This is at best really buggy, and at worst **unsound**. This isn't limited to `-C save-temps`, since there are other combinations of flags that may keep around temporary files (hard linked) in the working directory (like `-C debuginfo=1 -C split-debuginfo=unpacked` on darwin, for example). --- This PR implements a fix which is to prepend temp filenames with a random string that is generated per invocation of rustc. This string is not *deterministic*, but temporary files are transient anyways, so I don't believe this is a problem. That means that temp files are now something like... `{crate-name}.{cgu}.{invocation_temp}.rcgu.o`, where `{invocation_temp}` is the new temporary string we generate per invocation of rustc. Fixes #139407 [^1]: https://github.com/rust-lang/rust/blob/175dcc7773d65c1b1542c351392080f48c05799f/compiler/rustc_fs_util/src/lib.rs#L60 [^2]: https://github.com/rust-lang/rust/blob/175dcc7773d65c1b1542c351392080f48c05799f/compiler/rustc_incremental/src/persist/fs.rs#L1-L40
2 parents 71b68da + 9c372d8 commit e1b06f7

File tree

20 files changed

+285
-97
lines changed

20 files changed

+285
-97
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -4444,6 +4444,7 @@ dependencies = [
44444444
"bitflags",
44454445
"getopts",
44464446
"libc",
4447+
"rand 0.9.0",
44474448
"rustc_abi",
44484449
"rustc_ast",
44494450
"rustc_data_structures",

compiler/rustc_codegen_cranelift/src/driver/aot.rs

+31-18
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,11 @@ fn produce_final_output_artifacts(
169169
if codegen_results.modules.len() == 1 {
170170
// 1) Only one codegen unit. In this case it's no difficulty
171171
// to copy `foo.0.x` to `foo.x`.
172-
let module_name = Some(&codegen_results.modules[0].name[..]);
173-
let path = crate_output.temp_path(output_type, module_name);
172+
let path = crate_output.temp_path_for_cgu(
173+
output_type,
174+
&codegen_results.modules[0].name,
175+
sess.invocation_temp.as_deref(),
176+
);
174177
let output = crate_output.path(output_type);
175178
if !output_type.is_text_output() && output.is_tty() {
176179
sess.dcx()
@@ -183,22 +186,16 @@ fn produce_final_output_artifacts(
183186
ensure_removed(sess.dcx(), &path);
184187
}
185188
} else {
186-
let extension = crate_output
187-
.temp_path(output_type, None)
188-
.extension()
189-
.unwrap()
190-
.to_str()
191-
.unwrap()
192-
.to_owned();
193-
194189
if crate_output.outputs.contains_explicit_name(&output_type) {
195190
// 2) Multiple codegen units, with `--emit foo=some_name`. We have
196191
// no good solution for this case, so warn the user.
197-
sess.dcx().emit_warn(ssa_errors::IgnoringEmitPath { extension });
192+
sess.dcx()
193+
.emit_warn(ssa_errors::IgnoringEmitPath { extension: output_type.extension() });
198194
} else if crate_output.single_output_file.is_some() {
199195
// 3) Multiple codegen units, with `-o some_name`. We have
200196
// no good solution for this case, so warn the user.
201-
sess.dcx().emit_warn(ssa_errors::IgnoringOutput { extension });
197+
sess.dcx()
198+
.emit_warn(ssa_errors::IgnoringOutput { extension: output_type.extension() });
202199
} else {
203200
// 4) Multiple codegen units, but no explicit name. We
204201
// just leave the `foo.0.x` files in place.
@@ -351,6 +348,7 @@ fn make_module(sess: &Session, name: String) -> UnwindModule<ObjectModule> {
351348

352349
fn emit_cgu(
353350
output_filenames: &OutputFilenames,
351+
invocation_temp: Option<&str>,
354352
prof: &SelfProfilerRef,
355353
name: String,
356354
module: UnwindModule<ObjectModule>,
@@ -366,6 +364,7 @@ fn emit_cgu(
366364

367365
let module_regular = emit_module(
368366
output_filenames,
367+
invocation_temp,
369368
prof,
370369
product.object,
371370
ModuleKind::Regular,
@@ -391,6 +390,7 @@ fn emit_cgu(
391390

392391
fn emit_module(
393392
output_filenames: &OutputFilenames,
393+
invocation_temp: Option<&str>,
394394
prof: &SelfProfilerRef,
395395
mut object: cranelift_object::object::write::Object<'_>,
396396
kind: ModuleKind,
@@ -409,7 +409,7 @@ fn emit_module(
409409
object.set_section_data(comment_section, producer, 1);
410410
}
411411

412-
let tmp_file = output_filenames.temp_path(OutputType::Object, Some(&name));
412+
let tmp_file = output_filenames.temp_path_for_cgu(OutputType::Object, &name, invocation_temp);
413413
let file = match File::create(&tmp_file) {
414414
Ok(file) => file,
415415
Err(err) => return Err(format!("error creating object file: {}", err)),
@@ -449,8 +449,11 @@ fn reuse_workproduct_for_cgu(
449449
cgu: &CodegenUnit<'_>,
450450
) -> Result<ModuleCodegenResult, String> {
451451
let work_product = cgu.previous_work_product(tcx);
452-
let obj_out_regular =
453-
tcx.output_filenames(()).temp_path(OutputType::Object, Some(cgu.name().as_str()));
452+
let obj_out_regular = tcx.output_filenames(()).temp_path_for_cgu(
453+
OutputType::Object,
454+
cgu.name().as_str(),
455+
tcx.sess.invocation_temp.as_deref(),
456+
);
454457
let source_file_regular = rustc_incremental::in_incr_comp_dir_sess(
455458
&tcx.sess,
456459
&work_product.saved_files.get("o").expect("no saved object file in work product"),
@@ -595,13 +598,19 @@ fn module_codegen(
595598

596599
let global_asm_object_file =
597600
profiler.generic_activity_with_arg("compile assembly", &*cgu_name).run(|| {
598-
crate::global_asm::compile_global_asm(&global_asm_config, &cgu_name, &cx.global_asm)
601+
crate::global_asm::compile_global_asm(
602+
&global_asm_config,
603+
&cgu_name,
604+
&cx.global_asm,
605+
cx.invocation_temp.as_deref(),
606+
)
599607
})?;
600608

601609
let codegen_result =
602610
profiler.generic_activity_with_arg("write object file", &*cgu_name).run(|| {
603611
emit_cgu(
604612
&global_asm_config.output_filenames,
613+
cx.invocation_temp.as_deref(),
605614
&profiler,
606615
cgu_name,
607616
module,
@@ -626,8 +635,11 @@ fn emit_metadata_module(tcx: TyCtxt<'_>, metadata: &EncodedMetadata) -> Compiled
626635
.as_str()
627636
.to_string();
628637

629-
let tmp_file =
630-
tcx.output_filenames(()).temp_path(OutputType::Metadata, Some(&metadata_cgu_name));
638+
let tmp_file = tcx.output_filenames(()).temp_path_for_cgu(
639+
OutputType::Metadata,
640+
&metadata_cgu_name,
641+
tcx.sess.invocation_temp.as_deref(),
642+
);
631643

632644
let symbol_name = rustc_middle::middle::exported_symbols::metadata_symbol_name(tcx);
633645
let obj = create_compressed_metadata_file(tcx.sess, metadata, &symbol_name);
@@ -657,6 +669,7 @@ fn emit_allocator_module(tcx: TyCtxt<'_>) -> Option<CompiledModule> {
657669

658670
match emit_module(
659671
tcx.output_filenames(()),
672+
tcx.sess.invocation_temp.as_deref(),
660673
&tcx.sess.prof,
661674
product.object,
662675
ModuleKind::Allocator,

compiler/rustc_codegen_cranelift/src/global_asm.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ pub(crate) fn compile_global_asm(
132132
config: &GlobalAsmConfig,
133133
cgu_name: &str,
134134
global_asm: &str,
135+
invocation_temp: Option<&str>,
135136
) -> Result<Option<PathBuf>, String> {
136137
if global_asm.is_empty() {
137138
return Ok(None);
@@ -146,7 +147,7 @@ pub(crate) fn compile_global_asm(
146147
global_asm.push('\n');
147148

148149
let global_asm_object_file = add_file_stem_postfix(
149-
config.output_filenames.temp_path(OutputType::Object, Some(cgu_name)),
150+
config.output_filenames.temp_path_for_cgu(OutputType::Object, cgu_name, invocation_temp),
150151
".asm",
151152
);
152153

compiler/rustc_codegen_cranelift/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ impl<F: Fn() -> String> Drop for PrintOnPanic<F> {
124124
/// inside a single codegen unit with the exception of the Cranelift [`Module`](cranelift_module::Module).
125125
struct CodegenCx {
126126
output_filenames: Arc<OutputFilenames>,
127+
invocation_temp: Option<String>,
127128
should_write_ir: bool,
128129
global_asm: String,
129130
inline_asm_index: usize,
@@ -142,6 +143,7 @@ impl CodegenCx {
142143
};
143144
CodegenCx {
144145
output_filenames: tcx.output_filenames(()).clone(),
146+
invocation_temp: tcx.sess.invocation_temp.clone(),
145147
should_write_ir: crate::pretty_clif::should_write_ir(tcx),
146148
global_asm: String::new(),
147149
inline_asm_index: 0,

compiler/rustc_codegen_gcc/src/back/write.rs

+21-8
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,23 @@ pub(crate) unsafe fn codegen(
2424
{
2525
let context = &module.module_llvm.context;
2626

27-
let module_name = module.name.clone();
28-
2927
let should_combine_object_files = module.module_llvm.should_combine_object_files;
3028

31-
let module_name = Some(&module_name[..]);
32-
3329
// NOTE: Only generate object files with GIMPLE when this environment variable is set for
3430
// now because this requires a particular setup (same gcc/lto1/lto-wrapper commit as libgccjit).
3531
// TODO(antoyo): remove this environment variable.
3632
let fat_lto = env::var("EMBED_LTO_BITCODE").as_deref() == Ok("1");
3733

38-
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
39-
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
34+
let bc_out = cgcx.output_filenames.temp_path_for_cgu(
35+
OutputType::Bitcode,
36+
&module.name,
37+
cgcx.invocation_temp.as_deref(),
38+
);
39+
let obj_out = cgcx.output_filenames.temp_path_for_cgu(
40+
OutputType::Object,
41+
&module.name,
42+
cgcx.invocation_temp.as_deref(),
43+
);
4044

4145
if config.bitcode_needed() {
4246
if fat_lto {
@@ -117,14 +121,22 @@ pub(crate) unsafe fn codegen(
117121
}
118122

119123
if config.emit_ir {
120-
let out = cgcx.output_filenames.temp_path(OutputType::LlvmAssembly, module_name);
124+
let out = cgcx.output_filenames.temp_path_for_cgu(
125+
OutputType::LlvmAssembly,
126+
&module.name,
127+
cgcx.invocation_temp.as_deref(),
128+
);
121129
std::fs::write(out, "").expect("write file");
122130
}
123131

124132
if config.emit_asm {
125133
let _timer =
126134
cgcx.prof.generic_activity_with_arg("GCC_module_codegen_emit_asm", &*module.name);
127-
let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
135+
let path = cgcx.output_filenames.temp_path_for_cgu(
136+
OutputType::Assembly,
137+
&module.name,
138+
cgcx.invocation_temp.as_deref(),
139+
);
128140
context.compile_to_file(OutputKind::Assembler, path.to_str().expect("path to str"));
129141
}
130142

@@ -238,6 +250,7 @@ pub(crate) unsafe fn codegen(
238250
config.emit_asm,
239251
config.emit_ir,
240252
&cgcx.output_filenames,
253+
cgcx.invocation_temp.as_deref(),
241254
))
242255
}
243256

compiler/rustc_codegen_llvm/src/back/write.rs

+46-18
Original file line numberDiff line numberDiff line change
@@ -119,14 +119,18 @@ pub(crate) fn create_target_machine(tcx: TyCtxt<'_>, mod_name: &str) -> OwnedTar
119119
tcx.output_filenames(()).split_dwarf_path(
120120
tcx.sess.split_debuginfo(),
121121
tcx.sess.opts.unstable_opts.split_dwarf_kind,
122-
Some(mod_name),
122+
mod_name,
123+
tcx.sess.invocation_temp.as_deref(),
123124
)
124125
} else {
125126
None
126127
};
127128

128-
let output_obj_file =
129-
Some(tcx.output_filenames(()).temp_path(OutputType::Object, Some(mod_name)));
129+
let output_obj_file = Some(tcx.output_filenames(()).temp_path_for_cgu(
130+
OutputType::Object,
131+
mod_name,
132+
tcx.sess.invocation_temp.as_deref(),
133+
));
130134
let config = TargetMachineFactoryConfig { split_dwarf_file, output_obj_file };
131135

132136
target_machine_factory(
@@ -330,8 +334,11 @@ pub(crate) fn save_temp_bitcode(
330334
return;
331335
}
332336
let ext = format!("{name}.bc");
333-
let cgu = Some(&module.name[..]);
334-
let path = cgcx.output_filenames.temp_path_ext(&ext, cgu);
337+
let path = cgcx.output_filenames.temp_path_ext_for_cgu(
338+
&ext,
339+
&module.name,
340+
cgcx.invocation_temp.as_deref(),
341+
);
335342
write_bitcode_to_file(module, &path)
336343
}
337344

@@ -694,11 +701,12 @@ pub(crate) unsafe fn optimize(
694701
let llcx = &*module.module_llvm.llcx;
695702
let _handlers = DiagnosticHandlers::new(cgcx, dcx, llcx, module, CodegenDiagnosticsStage::Opt);
696703

697-
let module_name = module.name.clone();
698-
let module_name = Some(&module_name[..]);
699-
700704
if config.emit_no_opt_bc {
701-
let out = cgcx.output_filenames.temp_path_ext("no-opt.bc", module_name);
705+
let out = cgcx.output_filenames.temp_path_ext_for_cgu(
706+
"no-opt.bc",
707+
&module.name,
708+
cgcx.invocation_temp.as_deref(),
709+
);
702710
write_bitcode_to_file(module, &out)
703711
}
704712

@@ -743,8 +751,11 @@ pub(crate) unsafe fn optimize(
743751
if let Some(thin_lto_buffer) = thin_lto_buffer {
744752
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
745753
module.thin_lto_buffer = Some(thin_lto_buffer.data().to_vec());
746-
let bc_summary_out =
747-
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
754+
let bc_summary_out = cgcx.output_filenames.temp_path_for_cgu(
755+
OutputType::ThinLinkBitcode,
756+
&module.name,
757+
cgcx.invocation_temp.as_deref(),
758+
);
748759
if config.emit_thin_lto_summary
749760
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
750761
{
@@ -801,8 +812,6 @@ pub(crate) unsafe fn codegen(
801812
let llmod = module.module_llvm.llmod();
802813
let llcx = &*module.module_llvm.llcx;
803814
let tm = &*module.module_llvm.tm;
804-
let module_name = module.name.clone();
805-
let module_name = Some(&module_name[..]);
806815
let _handlers =
807816
DiagnosticHandlers::new(cgcx, dcx, llcx, &module, CodegenDiagnosticsStage::Codegen);
808817

@@ -814,8 +823,16 @@ pub(crate) unsafe fn codegen(
814823
// copy it to the .o file, and delete the bitcode if it wasn't
815824
// otherwise requested.
816825

817-
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
818-
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
826+
let bc_out = cgcx.output_filenames.temp_path_for_cgu(
827+
OutputType::Bitcode,
828+
&module.name,
829+
cgcx.invocation_temp.as_deref(),
830+
);
831+
let obj_out = cgcx.output_filenames.temp_path_for_cgu(
832+
OutputType::Object,
833+
&module.name,
834+
cgcx.invocation_temp.as_deref(),
835+
);
819836

820837
if config.bitcode_needed() {
821838
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
@@ -857,7 +874,11 @@ pub(crate) unsafe fn codegen(
857874
if config.emit_ir {
858875
let _timer =
859876
cgcx.prof.generic_activity_with_arg("LLVM_module_codegen_emit_ir", &*module.name);
860-
let out = cgcx.output_filenames.temp_path(OutputType::LlvmAssembly, module_name);
877+
let out = cgcx.output_filenames.temp_path_for_cgu(
878+
OutputType::LlvmAssembly,
879+
&module.name,
880+
cgcx.invocation_temp.as_deref(),
881+
);
861882
let out_c = path_to_c_string(&out);
862883

863884
extern "C" fn demangle_callback(
@@ -899,7 +920,11 @@ pub(crate) unsafe fn codegen(
899920
if config.emit_asm {
900921
let _timer =
901922
cgcx.prof.generic_activity_with_arg("LLVM_module_codegen_emit_asm", &*module.name);
902-
let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
923+
let path = cgcx.output_filenames.temp_path_for_cgu(
924+
OutputType::Assembly,
925+
&module.name,
926+
cgcx.invocation_temp.as_deref(),
927+
);
903928

904929
// We can't use the same module for asm and object code output,
905930
// because that triggers various errors like invalid IR or broken
@@ -929,7 +954,9 @@ pub(crate) unsafe fn codegen(
929954
.prof
930955
.generic_activity_with_arg("LLVM_module_codegen_emit_obj", &*module.name);
931956

932-
let dwo_out = cgcx.output_filenames.temp_path_dwo(module_name);
957+
let dwo_out = cgcx
958+
.output_filenames
959+
.temp_path_dwo_for_cgu(&module.name, cgcx.invocation_temp.as_deref());
933960
let dwo_out = match (cgcx.split_debuginfo, cgcx.split_dwarf_kind) {
934961
// Don't change how DWARF is emitted when disabled.
935962
(SplitDebuginfo::Off, _) => None,
@@ -994,6 +1021,7 @@ pub(crate) unsafe fn codegen(
9941021
config.emit_asm,
9951022
config.emit_ir,
9961023
&cgcx.output_filenames,
1024+
cgcx.invocation_temp.as_deref(),
9971025
))
9981026
}
9991027

compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -910,7 +910,8 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
910910
&& let Some(f) = output_filenames.split_dwarf_path(
911911
tcx.sess.split_debuginfo(),
912912
tcx.sess.opts.unstable_opts.split_dwarf_kind,
913-
Some(codegen_unit_name),
913+
codegen_unit_name,
914+
tcx.sess.invocation_temp.as_deref(),
914915
) {
915916
// We get a path relative to the working directory from split_dwarf_path
916917
Some(tcx.sess.source_map().path_mapping().to_real_filename(f))

compiler/rustc_codegen_ssa/src/back/link.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,12 @@ pub fn link_binary(
112112
codegen_results.crate_info.local_crate_name,
113113
);
114114
let crate_name = format!("{}", codegen_results.crate_info.local_crate_name);
115-
let out_filename =
116-
output.file_for_writing(outputs, OutputType::Exe, Some(crate_name.as_str()));
115+
let out_filename = output.file_for_writing(
116+
outputs,
117+
OutputType::Exe,
118+
&crate_name,
119+
sess.invocation_temp.as_deref(),
120+
);
117121
match crate_type {
118122
CrateType::Rlib => {
119123
let _timer = sess.timer("link_rlib");

0 commit comments

Comments
 (0)