Skip to content

Commit f04fc4d

Browse files
committed
Auto merge of #129778 - RalfJung:interp-lossy-typed-copy, r=saethlin
interpret: make typed copies lossy wrt provenance and padding A "typed copy" in Rust can be a lossy process: when copying at type `usize` (or any other non-pointer type), if the original memory had any provenance, that provenance is lost. When copying at pointer type, if the original memory had partial provenance (i.e., not the same provenance for all bytes), that provenance is lost. When copying any type with padding, the contents of padding are lost. This PR equips our validity-checking pass with the ability to reset provenance and padding according to those rules. Can be reviewed commit-by-commit. The first three commits are just preparation without any functional change. Fixes #845 Fixes #2182
2 parents 9a46223 + ed3e1c4 commit f04fc4d

33 files changed

+503
-12
lines changed

src/concurrency/data_race.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
637637
// The program didn't actually do a read, so suppress the memory access hooks.
638638
// This is also a very special exception where we just ignore an error -- if this read
639639
// was UB e.g. because the memory is uninitialized, we don't want to know!
640-
let old_val = this.run_for_validation(|| this.read_scalar(dest)).ok();
640+
let old_val = this.run_for_validation(|this| this.read_scalar(dest)).ok();
641641
this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
642642
this.validate_atomic_store(dest, atomic)?;
643643
this.buffered_atomic_write(val, dest, atomic, old_val)

src/helpers.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
869869
/// Dereference a pointer operand to a place using `layout` instead of the pointer's declared type
870870
fn deref_pointer_as(
871871
&self,
872-
op: &impl Readable<'tcx, Provenance>,
872+
op: &impl Projectable<'tcx, Provenance>,
873873
layout: TyAndLayout<'tcx>,
874874
) -> InterpResult<'tcx, MPlaceTy<'tcx>> {
875875
let this = self.eval_context_ref();
@@ -880,7 +880,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
880880
/// Calculates the MPlaceTy given the offset and layout of an access on an operand
881881
fn deref_pointer_and_offset(
882882
&self,
883-
op: &impl Readable<'tcx, Provenance>,
883+
op: &impl Projectable<'tcx, Provenance>,
884884
offset: u64,
885885
base_layout: TyAndLayout<'tcx>,
886886
value_layout: TyAndLayout<'tcx>,
@@ -897,7 +897,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
897897

898898
fn deref_pointer_and_read(
899899
&self,
900-
op: &impl Readable<'tcx, Provenance>,
900+
op: &impl Projectable<'tcx, Provenance>,
901901
offset: u64,
902902
base_layout: TyAndLayout<'tcx>,
903903
value_layout: TyAndLayout<'tcx>,
@@ -909,7 +909,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
909909

910910
fn deref_pointer_and_write(
911911
&mut self,
912-
op: &impl Readable<'tcx, Provenance>,
912+
op: &impl Projectable<'tcx, Provenance>,
913913
offset: u64,
914914
value: impl Into<Scalar>,
915915
base_layout: TyAndLayout<'tcx>,

src/intrinsics/mod.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
152152
// ```
153153
// Would not be considered UB, or the other way around (`is_val_statically_known(0)`).
154154
"is_val_statically_known" => {
155-
let [arg] = check_arg_count(args)?;
156-
this.validate_operand(arg, /*recursive*/ false)?;
155+
let [_arg] = check_arg_count(args)?;
156+
// FIXME: should we check for validity here? It's tricky because we do not have a
157+
// place. Codegen does not seem to set any attributes like `noundef` for intrinsic
158+
// calls, so we don't *have* to do anything.
157159
let branch: bool = this.machine.rng.get_mut().gen();
158160
this.write_scalar(Scalar::from_bool(branch), dest)?;
159161
}

src/machine.rs

+13
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,9 @@ pub struct MiriMachine<'tcx> {
572572
/// Invariant: the promised alignment will never be less than the native alignment of the
573573
/// allocation.
574574
pub(crate) symbolic_alignment: RefCell<FxHashMap<AllocId, (Size, Align)>>,
575+
576+
/// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes).
577+
union_data_ranges: FxHashMap<Ty<'tcx>, RangeSet>,
575578
}
576579

577580
impl<'tcx> MiriMachine<'tcx> {
@@ -714,6 +717,7 @@ impl<'tcx> MiriMachine<'tcx> {
714717
allocation_spans: RefCell::new(FxHashMap::default()),
715718
const_cache: RefCell::new(FxHashMap::default()),
716719
symbolic_alignment: RefCell::new(FxHashMap::default()),
720+
union_data_ranges: FxHashMap::default(),
717721
}
718722
}
719723

@@ -826,6 +830,7 @@ impl VisitProvenance for MiriMachine<'_> {
826830
allocation_spans: _,
827831
const_cache: _,
828832
symbolic_alignment: _,
833+
union_data_ranges: _,
829834
} = self;
830835

831836
threads.visit_provenance(visit);
@@ -1627,4 +1632,12 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
16271632
ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL
16281633
}
16291634
}
1635+
1636+
fn cached_union_data_range<'e>(
1637+
ecx: &'e mut InterpCx<'tcx, Self>,
1638+
ty: Ty<'tcx>,
1639+
compute_range: impl FnOnce() -> RangeSet,
1640+
) -> Cow<'e, RangeSet> {
1641+
Cow::Borrowed(ecx.machine.union_data_ranges.entry(ty).or_insert_with(compute_range))
1642+
}
16301643
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
use std::mem;
2+
3+
// Doing a copy at integer type should lose provenance.
4+
// This tests the unoptimized base case.
5+
fn main() {
6+
let ptrs = [(&42, true)];
7+
let ints: [(usize, bool); 1] = unsafe { mem::transmute(ptrs) };
8+
let ptr = (&raw const ints[0].0).cast::<&i32>();
9+
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
10+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
2+
--> $DIR/int_copy_looses_provenance0.rs:LL:CC
3+
|
4+
LL | let _val = unsafe { *ptr.read() };
5+
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `main` at $DIR/int_copy_looses_provenance0.rs:LL:CC
11+
12+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
13+
14+
error: aborting due to 1 previous error
15+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
use std::mem;
2+
3+
// Doing a copy at integer type should lose provenance.
4+
// This tests the optimized-array case of integer copies.
5+
fn main() {
6+
let ptrs = [&42];
7+
let ints: [usize; 1] = unsafe { mem::transmute(ptrs) };
8+
let ptr = (&raw const ints[0]).cast::<&i32>();
9+
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
10+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
2+
--> $DIR/int_copy_looses_provenance1.rs:LL:CC
3+
|
4+
LL | let _val = unsafe { *ptr.read() };
5+
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `main` at $DIR/int_copy_looses_provenance1.rs:LL:CC
11+
12+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
13+
14+
error: aborting due to 1 previous error
15+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
use std::mem;
2+
3+
// Doing a copy at integer type should lose provenance.
4+
// This tests the case where provenacne is hiding in the metadata of a pointer.
5+
fn main() {
6+
let ptrs = [(&42, &42)];
7+
// Typed copy at wide pointer type (with integer-typed metadata).
8+
let ints: [*const [usize]; 1] = unsafe { mem::transmute(ptrs) };
9+
// Get a pointer to the metadata field.
10+
let ptr = (&raw const ints[0]).wrapping_byte_add(mem::size_of::<*const ()>()).cast::<&i32>();
11+
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
2+
--> $DIR/int_copy_looses_provenance2.rs:LL:CC
3+
|
4+
LL | let _val = unsafe { *ptr.read() };
5+
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `main` at $DIR/int_copy_looses_provenance2.rs:LL:CC
11+
12+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
13+
14+
error: aborting due to 1 previous error
15+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#![feature(strict_provenance)]
2+
use std::mem;
3+
4+
#[repr(C, usize)]
5+
#[allow(unused)]
6+
enum E {
7+
Var1(usize),
8+
Var2(usize),
9+
}
10+
11+
// Doing a copy at integer type should lose provenance.
12+
// This tests the case where provenacne is hiding in the discriminant of an enum.
13+
fn main() {
14+
assert_eq!(mem::size_of::<E>(), 2*mem::size_of::<usize>());
15+
16+
// We want to store provenance in the enum discriminant, but the value still needs to
17+
// be valid atfor the type. So we split provenance and data.
18+
let ptr = &42;
19+
let ptr = ptr as *const i32;
20+
let ptrs = [(ptr.with_addr(0), ptr)];
21+
// Typed copy at the enum type.
22+
let ints: [E; 1] = unsafe { mem::transmute(ptrs) };
23+
// Read the discriminant.
24+
let discr = unsafe { (&raw const ints[0]).cast::<*const i32>().read() };
25+
// Take the provenance from there, together with the original address.
26+
let ptr = discr.with_addr(ptr.addr());
27+
// There should be no provenance is `discr`, so this should be UB.
28+
let _val = unsafe { *ptr }; //~ERROR: dangling
29+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
2+
--> $DIR/int_copy_looses_provenance3.rs:LL:CC
3+
|
4+
LL | let _val = unsafe { *ptr };
5+
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `main` at $DIR/int_copy_looses_provenance3.rs:LL:CC
11+
12+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
13+
14+
error: aborting due to 1 previous error
15+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
fn main() {
2+
let half_ptr = std::mem::size_of::<*const ()>() / 2;
3+
let mut bytes = [1u8; 16];
4+
let bytes = bytes.as_mut_ptr();
5+
6+
unsafe {
7+
// Put a pointer in the middle.
8+
bytes.add(half_ptr).cast::<&i32>().write_unaligned(&42);
9+
// Typed copy of the entire thing as two pointers, but not perfectly
10+
// overlapping with the pointer we have in there.
11+
let copy = bytes.cast::<[*const (); 2]>().read_unaligned();
12+
let copy_bytes = copy.as_ptr().cast::<u8>();
13+
// Now go to the middle of the copy and get the pointer back out.
14+
let ptr = copy_bytes.add(half_ptr).cast::<*const i32>().read_unaligned();
15+
// Dereferencing this should fail as the copy has removed the provenance.
16+
let _val = *ptr; //~ERROR: dangling
17+
}
18+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
2+
--> $DIR/ptr_copy_loses_partial_provenance0.rs:LL:CC
3+
|
4+
LL | let _val = *ptr;
5+
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `main` at $DIR/ptr_copy_loses_partial_provenance0.rs:LL:CC
11+
12+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
13+
14+
error: aborting due to 1 previous error
15+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
fn main() {
2+
let half_ptr = std::mem::size_of::<*const ()>() / 2;
3+
let mut bytes = [1u8; 16];
4+
let bytes = bytes.as_mut_ptr();
5+
6+
unsafe {
7+
// Put a pointer in the middle.
8+
bytes.add(half_ptr).cast::<&i32>().write_unaligned(&42);
9+
// Typed copy of the entire thing as two *function* pointers, but not perfectly
10+
// overlapping with the pointer we have in there.
11+
let copy = bytes.cast::<[fn(); 2]>().read_unaligned();
12+
let copy_bytes = copy.as_ptr().cast::<u8>();
13+
// Now go to the middle of the copy and get the pointer back out.
14+
let ptr = copy_bytes.add(half_ptr).cast::<*const i32>().read_unaligned();
15+
// Dereferencing this should fail as the copy has removed the provenance.
16+
let _val = *ptr; //~ERROR: dangling
17+
}
18+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
2+
--> $DIR/ptr_copy_loses_partial_provenance1.rs:LL:CC
3+
|
4+
LL | let _val = *ptr;
5+
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `main` at $DIR/ptr_copy_loses_partial_provenance1.rs:LL:CC
11+
12+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
13+
14+
error: aborting due to 1 previous error
15+

tests/fail/uninit/padding-enum.rs

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
use std::mem;
2+
3+
// We have three fields to avoid the ScalarPair optimization.
4+
#[allow(unused)]
5+
enum E {
6+
None,
7+
Some(&'static (), &'static (), usize),
8+
}
9+
10+
fn main() { unsafe {
11+
let mut p: mem::MaybeUninit<E> = mem::MaybeUninit::zeroed();
12+
// The copy when `E` is returned from `transmute` should destroy padding
13+
// (even when we use `write_unaligned`, which under the hood uses an untyped copy).
14+
p.as_mut_ptr().write_unaligned(mem::transmute((0usize, 0usize, 0usize)));
15+
// This is a `None`, so everything but the discriminant is padding.
16+
assert!(matches!(*p.as_ptr(), E::None));
17+
18+
// Turns out the discriminant is (currently) stored
19+
// in the 2nd pointer, so the first half is padding.
20+
let c = &p as *const _ as *const u8;
21+
let _val = *c.add(0); // Get a padding byte.
22+
//~^ERROR: uninitialized
23+
} }

tests/fail/uninit/padding-enum.stderr

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
2+
--> $DIR/padding-enum.rs:LL:CC
3+
|
4+
LL | let _val = *c.add(0); // Get a padding byte.
5+
| ^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `main` at $DIR/padding-enum.rs:LL:CC
11+
12+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
13+
14+
error: aborting due to 1 previous error
15+

tests/fail/uninit/padding-pair.rs

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#![feature(core_intrinsics)]
2+
3+
use std::mem::{self, MaybeUninit};
4+
5+
fn main() {
6+
// This constructs a `(usize, bool)` pair: 9 bytes initialized, the rest not.
7+
// Ensure that these 9 bytes are indeed initialized, and the rest is indeed not.
8+
// This should be the case even if we write into previously initialized storage.
9+
let mut x: MaybeUninit<Box<[u8]>> = MaybeUninit::zeroed();
10+
let z = std::intrinsics::add_with_overflow(0usize, 0usize);
11+
unsafe { x.as_mut_ptr().cast::<(usize, bool)>().write(z) };
12+
// Now read this bytewise. There should be (`ptr_size + 1`) def bytes followed by
13+
// (`ptr_size - 1`) undef bytes (the padding after the bool) in there.
14+
let z: *const u8 = &x as *const _ as *const _;
15+
let first_undef = mem::size_of::<usize>() as isize + 1;
16+
for i in 0..first_undef {
17+
let byte = unsafe { *z.offset(i) };
18+
assert_eq!(byte, 0);
19+
}
20+
let v = unsafe { *z.offset(first_undef) };
21+
//~^ ERROR: uninitialized
22+
if v == 0 {
23+
println!("it is zero");
24+
}
25+
}

tests/fail/uninit/padding-pair.stderr

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
2+
--> $DIR/padding-pair.rs:LL:CC
3+
|
4+
LL | let v = unsafe { *z.offset(first_undef) };
5+
| ^^^^^^^^^^^^^^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `main` at $DIR/padding-pair.rs:LL:CC
11+
12+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
13+
14+
error: aborting due to 1 previous error
15+

0 commit comments

Comments
 (0)