Skip to content

Commit c78ebb7

Browse files
committed
Auto merge of #87123 - RalfJung:miri-provenance-overhaul, r=oli-obk
CTFE/Miri engine Pointer type overhaul This fixes the long-standing problem that we are using `Scalar` as a type to represent pointers that might be integer values (since they point to a ZST). The main problem is that with int-to-ptr casts, there are multiple ways to represent the same pointer as a `Scalar` and it is unclear if "normalization" (i.e., the cast) already happened or not. This leads to ugly methods like `force_mplace_ptr` and `force_op_ptr`. Another problem this solves is that in Miri, it would make a lot more sense to have the `Pointer::offset` field represent the full absolute address (instead of being relative to the `AllocId`). This means we can do ptr-to-int casts without access to any machine state, and it means that the overflow checks on pointer arithmetic are (finally!) accurate. To solve this, the `Pointer` type is made entirely parametric over the provenance, so that we can use `Pointer<AllocId>` inside `Scalar` but use `Pointer<Option<AllocId>>` when accessing memory (where `None` represents the case that we could not figure out an `AllocId`; in that case the `offset` is an absolute address). Moreover, the `Provenance` trait determines if a pointer with a given provenance can be cast to an integer by simply dropping the provenance. I hope this can be read commit-by-commit, but the first commit does the bulk of the work. It introduces some FIXMEs that are resolved later. Fixes rust-lang/miri#841 Miri PR: rust-lang/miri#1851 r? `@oli-obk`
2 parents f502bd3 + efbee50 commit c78ebb7

File tree

106 files changed

+1311
-1401
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+1311
-1401
lines changed

Diff for: compiler/rustc_codegen_cranelift/src/constant.rs

+11-10
Original file line numberDiff line numberDiff line change
@@ -193,20 +193,21 @@ pub(crate) fn codegen_const_value<'tcx>(
193193
place.to_cvalue(fx)
194194
}
195195
}
196-
Scalar::Ptr(ptr) => {
197-
let alloc_kind = fx.tcx.get_global_alloc(ptr.alloc_id);
196+
Scalar::Ptr(ptr, _size) => {
197+
let (alloc_id, offset) = ptr.into_parts(); // we know the `offset` is relative
198+
let alloc_kind = fx.tcx.get_global_alloc(alloc_id);
198199
let base_addr = match alloc_kind {
199200
Some(GlobalAlloc::Memory(alloc)) => {
200201
let data_id = data_id_for_alloc_id(
201202
&mut fx.constants_cx,
202203
fx.module,
203-
ptr.alloc_id,
204+
alloc_id,
204205
alloc.mutability,
205206
);
206207
let local_data_id =
207208
fx.module.declare_data_in_func(data_id, &mut fx.bcx.func);
208209
if fx.clif_comments.enabled() {
209-
fx.add_comment(local_data_id, format!("{:?}", ptr.alloc_id));
210+
fx.add_comment(local_data_id, format!("{:?}", alloc_id));
210211
}
211212
fx.bcx.ins().global_value(fx.pointer_type, local_data_id)
212213
}
@@ -226,10 +227,10 @@ pub(crate) fn codegen_const_value<'tcx>(
226227
}
227228
fx.bcx.ins().global_value(fx.pointer_type, local_data_id)
228229
}
229-
None => bug!("missing allocation {:?}", ptr.alloc_id),
230+
None => bug!("missing allocation {:?}", alloc_id),
230231
};
231-
let val = if ptr.offset.bytes() != 0 {
232-
fx.bcx.ins().iadd_imm(base_addr, i64::try_from(ptr.offset.bytes()).unwrap())
232+
let val = if offset.bytes() != 0 {
233+
fx.bcx.ins().iadd_imm(base_addr, i64::try_from(offset.bytes()).unwrap())
233234
} else {
234235
base_addr
235236
};
@@ -406,7 +407,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
406407
let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec();
407408
data_ctx.define(bytes.into_boxed_slice());
408409

409-
for &(offset, (_tag, reloc)) in alloc.relocations().iter() {
410+
for &(offset, alloc_id) in alloc.relocations().iter() {
410411
let addend = {
411412
let endianness = tcx.data_layout.endian;
412413
let offset = offset.bytes() as usize;
@@ -417,7 +418,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
417418
read_target_uint(endianness, bytes).unwrap()
418419
};
419420

420-
let reloc_target_alloc = tcx.get_global_alloc(reloc).unwrap();
421+
let reloc_target_alloc = tcx.get_global_alloc(alloc_id).unwrap();
421422
let data_id = match reloc_target_alloc {
422423
GlobalAlloc::Function(instance) => {
423424
assert_eq!(addend, 0);
@@ -427,7 +428,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
427428
continue;
428429
}
429430
GlobalAlloc::Memory(target_alloc) => {
430-
data_id_for_alloc_id(cx, module, reloc, target_alloc.mutability)
431+
data_id_for_alloc_id(cx, module, alloc_id, target_alloc.mutability)
431432
}
432433
GlobalAlloc::Static(def_id) => {
433434
if tcx.codegen_fn_attrs(def_id).flags.contains(CodegenFnAttrFlags::THREAD_LOCAL)

Diff for: compiler/rustc_codegen_llvm/src/common.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -243,16 +243,17 @@ impl ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> {
243243
self.const_bitcast(llval, llty)
244244
}
245245
}
246-
Scalar::Ptr(ptr) => {
247-
let (base_addr, base_addr_space) = match self.tcx.global_alloc(ptr.alloc_id) {
246+
Scalar::Ptr(ptr, _size) => {
247+
let (alloc_id, offset) = ptr.into_parts();
248+
let (base_addr, base_addr_space) = match self.tcx.global_alloc(alloc_id) {
248249
GlobalAlloc::Memory(alloc) => {
249250
let init = const_alloc_to_llvm(self, alloc);
250251
let value = match alloc.mutability {
251252
Mutability::Mut => self.static_addr_of_mut(init, alloc.align, None),
252253
_ => self.static_addr_of(init, alloc.align, None),
253254
};
254255
if !self.sess().fewer_names() {
255-
llvm::set_value_name(value, format!("{:?}", ptr.alloc_id).as_bytes());
256+
llvm::set_value_name(value, format!("{:?}", alloc_id).as_bytes());
256257
}
257258
(value, AddressSpace::DATA)
258259
}
@@ -269,7 +270,7 @@ impl ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> {
269270
let llval = unsafe {
270271
llvm::LLVMConstInBoundsGEP(
271272
self.const_bitcast(base_addr, self.type_i8p_ext(base_addr_space)),
272-
&self.const_usize(ptr.offset.bytes()),
273+
&self.const_usize(offset.bytes()),
273274
1,
274275
)
275276
};

Diff for: compiler/rustc_codegen_llvm/src/consts.rs

+6-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use rustc_codegen_ssa::traits::*;
1111
use rustc_hir::def_id::DefId;
1212
use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
1313
use rustc_middle::mir::interpret::{
14-
read_target_uint, Allocation, ErrorHandled, GlobalAlloc, Pointer,
14+
read_target_uint, Allocation, ErrorHandled, GlobalAlloc, Pointer, Scalar as InterpScalar,
1515
};
1616
use rustc_middle::mir::mono::MonoItem;
1717
use rustc_middle::ty::{self, Instance, Ty};
@@ -25,7 +25,7 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll
2525
let pointer_size = dl.pointer_size.bytes() as usize;
2626

2727
let mut next_offset = 0;
28-
for &(offset, ((), alloc_id)) in alloc.relocations().iter() {
28+
for &(offset, alloc_id) in alloc.relocations().iter() {
2929
let offset = offset.bytes();
3030
assert_eq!(offset as usize as u64, offset);
3131
let offset = offset as usize;
@@ -55,7 +55,10 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll
5555
};
5656

5757
llvals.push(cx.scalar_to_backend(
58-
Pointer::new(alloc_id, Size::from_bytes(ptr_offset)).into(),
58+
InterpScalar::from_pointer(
59+
Pointer::new(alloc_id, Size::from_bytes(ptr_offset)),
60+
&cx.tcx,
61+
),
5962
&Scalar { value: Primitive::Pointer, valid_range: 0..=!0 },
6063
cx.type_i8p_ext(address_space),
6164
));

Diff for: compiler/rustc_codegen_ssa/src/mir/operand.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
9090
Abi::ScalarPair(ref a, _) => a,
9191
_ => bug!("from_const: invalid ScalarPair layout: {:#?}", layout),
9292
};
93-
let a = Scalar::from(Pointer::new(
94-
bx.tcx().create_memory_alloc(data),
95-
Size::from_bytes(start),
96-
));
93+
let a = Scalar::from_pointer(
94+
Pointer::new(bx.tcx().create_memory_alloc(data), Size::from_bytes(start)),
95+
&bx.tcx(),
96+
);
9797
let a_llval = bx.scalar_to_backend(
9898
a,
9999
a_scalar,

Diff for: compiler/rustc_middle/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#![feature(iter_zip)]
5050
#![feature(thread_local_const_init)]
5151
#![feature(try_reserve)]
52+
#![feature(nonzero_ops)]
5253
#![recursion_limit = "512"]
5354

5455
#[macro_use]

Diff for: compiler/rustc_middle/src/mir/interpret/allocation.rs

+56-48
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::borrow::Cow;
44
use std::convert::TryFrom;
55
use std::iter;
6-
use std::ops::{Deref, DerefMut, Range};
6+
use std::ops::{Deref, Range};
77
use std::ptr;
88

99
use rustc_ast::Mutability;
@@ -25,7 +25,7 @@ use crate::ty;
2525
/// module provides higher-level access.
2626
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, TyEncodable, TyDecodable)]
2727
#[derive(HashStable)]
28-
pub struct Allocation<Tag = (), Extra = ()> {
28+
pub struct Allocation<Tag = AllocId, Extra = ()> {
2929
/// The actual bytes of the allocation.
3030
/// Note that the bytes of a pointer represent the offset of the pointer.
3131
bytes: Vec<u8>,
@@ -154,26 +154,32 @@ impl<Tag> Allocation<Tag> {
154154
}
155155
}
156156

157-
impl Allocation<()> {
158-
/// Add Tag and Extra fields
159-
pub fn with_tags_and_extra<T, E>(
157+
impl Allocation {
158+
/// Convert Tag and add Extra fields
159+
pub fn convert_tag_add_extra<Tag, Extra>(
160160
self,
161-
mut tagger: impl FnMut(AllocId) -> T,
162-
extra: E,
163-
) -> Allocation<T, E> {
161+
cx: &impl HasDataLayout,
162+
extra: Extra,
163+
mut tagger: impl FnMut(Pointer<AllocId>) -> Pointer<Tag>,
164+
) -> Allocation<Tag, Extra> {
165+
// Compute new pointer tags, which also adjusts the bytes.
166+
let mut bytes = self.bytes;
167+
let mut new_relocations = Vec::with_capacity(self.relocations.0.len());
168+
let ptr_size = cx.data_layout().pointer_size.bytes_usize();
169+
let endian = cx.data_layout().endian;
170+
for &(offset, alloc_id) in self.relocations.iter() {
171+
let idx = offset.bytes_usize();
172+
let ptr_bytes = &mut bytes[idx..idx + ptr_size];
173+
let bits = read_target_uint(endian, ptr_bytes).unwrap();
174+
let (ptr_tag, ptr_offset) =
175+
tagger(Pointer::new(alloc_id, Size::from_bytes(bits))).into_parts();
176+
write_target_uint(endian, ptr_bytes, ptr_offset.bytes().into()).unwrap();
177+
new_relocations.push((offset, ptr_tag));
178+
}
179+
// Create allocation.
164180
Allocation {
165-
bytes: self.bytes,
166-
relocations: Relocations::from_presorted(
167-
self.relocations
168-
.iter()
169-
// The allocations in the relocations (pointers stored *inside* this allocation)
170-
// all get the base pointer tag.
171-
.map(|&(offset, ((), alloc))| {
172-
let tag = tagger(alloc);
173-
(offset, (tag, alloc))
174-
})
175-
.collect(),
176-
),
181+
bytes,
182+
relocations: Relocations::from_presorted(new_relocations),
177183
init_mask: self.init_mask,
178184
align: self.align,
179185
mutability: self.mutability,
@@ -279,6 +285,9 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
279285
/// A raw pointer variant of `get_bytes_mut` that avoids invalidating existing aliases into this memory.
280286
pub fn get_bytes_mut_ptr(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> *mut [u8] {
281287
self.mark_init(range, true);
288+
// This also clears relocations that just overlap with the written range. So writing to some
289+
// byte can de-initialize its neighbors! See
290+
// <https://github.com/rust-lang/rust/issues/87184> for details.
282291
self.clear_relocations(cx, range);
283292

284293
assert!(range.end().bytes_usize() <= self.bytes.len()); // need to do our own bounds-check
@@ -321,7 +330,11 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
321330
cx: &impl HasDataLayout,
322331
range: AllocRange,
323332
) -> AllocResult<ScalarMaybeUninit<Tag>> {
324-
// `get_bytes_unchecked` tests relocation edges.
333+
// `get_bytes_with_uninit_and_ptr` tests relocation edges.
334+
// We deliberately error when loading data that partially has provenance, or partially
335+
// initialized data (that's the check below), into a scalar. The LLVM semantics of this are
336+
// unclear so we are conservative. See <https://github.com/rust-lang/rust/issues/69488> for
337+
// further discussion.
325338
let bytes = self.get_bytes_with_uninit_and_ptr(cx, range)?;
326339
// Uninit check happens *after* we established that the alignment is correct.
327340
// We must not return `Ok()` for unaligned pointers!
@@ -339,9 +352,9 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
339352
self.check_relocations(cx, range)?;
340353
} else {
341354
// Maybe a pointer.
342-
if let Some(&(tag, alloc_id)) = self.relocations.get(&range.start) {
343-
let ptr = Pointer::new_with_tag(alloc_id, Size::from_bytes(bits), tag);
344-
return Ok(ScalarMaybeUninit::Scalar(ptr.into()));
355+
if let Some(&prov) = self.relocations.get(&range.start) {
356+
let ptr = Pointer::new(prov, Size::from_bytes(bits));
357+
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
345358
}
346359
}
347360
// We don't. Just return the bits.
@@ -371,18 +384,23 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
371384
}
372385
};
373386

374-
let bytes = match val.to_bits_or_ptr(range.size, cx) {
375-
Err(val) => u128::from(val.offset.bytes()),
376-
Ok(data) => data,
387+
// `to_bits_or_ptr_internal` is the right method because we just want to store this data
388+
// as-is into memory.
389+
let (bytes, provenance) = match val.to_bits_or_ptr_internal(range.size) {
390+
Err(val) => {
391+
let (provenance, offset) = val.into_parts();
392+
(u128::from(offset.bytes()), Some(provenance))
393+
}
394+
Ok(data) => (data, None),
377395
};
378396

379397
let endian = cx.data_layout().endian;
380398
let dst = self.get_bytes_mut(cx, range);
381399
write_target_uint(endian, dst, bytes).unwrap();
382400

383401
// See if we have to also write a relocation.
384-
if let Scalar::Ptr(val) = val {
385-
self.relocations.insert(range.start, (val.tag, val.alloc_id));
402+
if let Some(provenance) = provenance {
403+
self.relocations.0.insert(range.start, provenance);
386404
}
387405

388406
Ok(())
@@ -392,11 +410,7 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
392410
/// Relocations.
393411
impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
394412
/// Returns all relocations overlapping with the given pointer-offset pair.
395-
pub fn get_relocations(
396-
&self,
397-
cx: &impl HasDataLayout,
398-
range: AllocRange,
399-
) -> &[(Size, (Tag, AllocId))] {
413+
pub fn get_relocations(&self, cx: &impl HasDataLayout, range: AllocRange) -> &[(Size, Tag)] {
400414
// We have to go back `pointer_size - 1` bytes, as that one would still overlap with
401415
// the beginning of this range.
402416
let start = range.start.bytes().saturating_sub(cx.data_layout().pointer_size.bytes() - 1);
@@ -446,7 +460,7 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
446460
}
447461

448462
// Forget all the relocations.
449-
self.relocations.remove_range(first..last);
463+
self.relocations.0.remove_range(first..last);
450464
}
451465

452466
/// Errors if there are relocations overlapping with the edges of the
@@ -582,39 +596,33 @@ impl<Tag, Extra> Allocation<Tag, Extra> {
582596
}
583597
}
584598

585-
/// Relocations.
599+
/// "Relocations" stores the provenance information of pointers stored in memory.
586600
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, TyEncodable, TyDecodable)]
587-
pub struct Relocations<Tag = (), Id = AllocId>(SortedMap<Size, (Tag, Id)>);
601+
pub struct Relocations<Tag = AllocId>(SortedMap<Size, Tag>);
588602

589-
impl<Tag, Id> Relocations<Tag, Id> {
603+
impl<Tag> Relocations<Tag> {
590604
pub fn new() -> Self {
591605
Relocations(SortedMap::new())
592606
}
593607

594608
// The caller must guarantee that the given relocations are already sorted
595609
// by address and contain no duplicates.
596-
pub fn from_presorted(r: Vec<(Size, (Tag, Id))>) -> Self {
610+
pub fn from_presorted(r: Vec<(Size, Tag)>) -> Self {
597611
Relocations(SortedMap::from_presorted_elements(r))
598612
}
599613
}
600614

601615
impl<Tag> Deref for Relocations<Tag> {
602-
type Target = SortedMap<Size, (Tag, AllocId)>;
616+
type Target = SortedMap<Size, Tag>;
603617

604618
fn deref(&self) -> &Self::Target {
605619
&self.0
606620
}
607621
}
608622

609-
impl<Tag> DerefMut for Relocations<Tag> {
610-
fn deref_mut(&mut self) -> &mut Self::Target {
611-
&mut self.0
612-
}
613-
}
614-
615623
/// A partial, owned list of relocations to transfer into another allocation.
616624
pub struct AllocationRelocations<Tag> {
617-
relative_relocations: Vec<(Size, (Tag, AllocId))>,
625+
relative_relocations: Vec<(Size, Tag)>,
618626
}
619627

620628
impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
@@ -652,7 +660,7 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
652660
/// The affected range, as defined in the parameters to `prepare_relocation_copy` is expected
653661
/// to be clear of relocations.
654662
pub fn mark_relocation_range(&mut self, relocations: AllocationRelocations<Tag>) {
655-
self.relocations.insert_presorted(relocations.relative_relocations);
663+
self.relocations.0.insert_presorted(relocations.relative_relocations);
656664
}
657665
}
658666

0 commit comments

Comments
 (0)