Skip to content

Commit

Permalink
Rollup merge of #133211 - Strophox:miri-correct-state-update-ffi, r=R…
Browse files Browse the repository at this point in the history
…alfJung

Extend Miri to correctly pass mutable pointers through FFI

Based off of #129684, this PR further extends Miri to execute native calls that make use of pointers to *mutable* memory.
We adapt Miri's bookkeeping of internal state upon any FFI call that gives external code permission to mutate memory.

Native code may now possibly write and therefore initialize and change the pointer provenance of bytes it has access to: Such memory is assumed to be *initialized* afterwards and bytes are given *arbitrary (wildcard) provenance*. This enables programs that correctly use mutating FFI calls to run Miri without errors, at the cost of possibly missing Undefined Behaviour caused by incorrect usage of mutating FFI.

> <details>
>
> <summary> Simple example </summary>
>
> ```rust
> extern "C" {
>   fn init_int(ptr: *mut i32);
> }
>
> fn main() {
>   let mut x = std::mem::MaybeUninit::<i32>::uninit();
>   let x = unsafe {
>     init_int(x.as_mut_ptr());
>     x.assume_init()
>   };
>
>   println!("C initialized my memory to: {x}");
> }
> ```
> ```c
> void init_int(int *ptr) {
>   *ptr = 42;
> }
> ```
> should now show `C initialized my memory to: 42`.
>
> </details>

r? ``@RalfJung``
  • Loading branch information
matthiaskrgr authored Dec 6, 2024
2 parents 820ddaf + 712ceab commit 576176d
Show file tree
Hide file tree
Showing 19 changed files with 476 additions and 59 deletions.
6 changes: 3 additions & 3 deletions compiler/rustc_const_eval/src/const_eval/dummy_machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@ impl<'tcx> interpret::Machine<'tcx> for DummyMachine {
})
}

fn expose_ptr(
_ecx: &mut InterpCx<'tcx, Self>,
_ptr: interpret::Pointer<Self::Provenance>,
fn expose_provenance(
_ecx: &InterpCx<'tcx, Self>,
_provenance: Self::Provenance,
) -> interpret::InterpResult<'tcx> {
unimplemented!()
}
Expand Down
10 changes: 6 additions & 4 deletions compiler/rustc_const_eval/src/const_eval/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ use crate::errors::{LongRunning, LongRunningWarn};
use crate::fluent_generated as fluent;
use crate::interpret::{
self, AllocId, AllocRange, ConstAllocation, CtfeProvenance, FnArg, Frame, GlobalAlloc, ImmTy,
InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, RangeSet, Scalar, compile_time_machine,
interp_ok, throw_exhaust, throw_inval, throw_ub, throw_ub_custom, throw_unsup,
throw_unsup_format,
InterpCx, InterpResult, MPlaceTy, OpTy, RangeSet, Scalar, compile_time_machine, interp_ok,
throw_exhaust, throw_inval, throw_ub, throw_ub_custom, throw_unsup, throw_unsup_format,
};

/// When hitting this many interpreted terminators we emit a deny by default lint
Expand Down Expand Up @@ -586,7 +585,10 @@ impl<'tcx> interpret::Machine<'tcx> for CompileTimeMachine<'tcx> {
}

#[inline(always)]
fn expose_ptr(_ecx: &mut InterpCx<'tcx, Self>, _ptr: Pointer) -> InterpResult<'tcx> {
fn expose_provenance(
_ecx: &InterpCx<'tcx, Self>,
_provenance: Self::Provenance,
) -> InterpResult<'tcx> {
// This is only reachable with -Zunleash-the-miri-inside-of-you.
throw_unsup_format!("exposing pointers is not possible at compile-time")
}
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_const_eval/src/interpret/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
let scalar = src.to_scalar();
let ptr = scalar.to_pointer(self)?;
match ptr.into_pointer_or_addr() {
Ok(ptr) => M::expose_ptr(self, ptr)?,
Ok(ptr) => M::expose_provenance(self, ptr.provenance)?,
Err(_) => {} // Do nothing, exposing an invalid pointer (`None` provenance) is a NOP.
};
interp_ok(ImmTy::from_scalar(
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,11 +327,11 @@ pub trait Machine<'tcx>: Sized {
addr: u64,
) -> InterpResult<'tcx, Pointer<Option<Self::Provenance>>>;

/// Marks a pointer as exposed, allowing it's provenance
/// Marks a pointer as exposed, allowing its provenance
/// to be recovered. "Pointer-to-int cast"
fn expose_ptr(
ecx: &mut InterpCx<'tcx, Self>,
ptr: Pointer<Self::Provenance>,
fn expose_provenance(
ecx: &InterpCx<'tcx, Self>,
provenance: Self::Provenance,
) -> InterpResult<'tcx>;

/// Convert a pointer with provenance into an allocation-offset pair and extra provenance info.
Expand Down
46 changes: 46 additions & 0 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,52 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
interp_ok(())
}

/// Handle the effect an FFI call might have on the state of allocations.
/// This overapproximates the modifications which external code might make to memory:
/// We set all reachable allocations as initialized, mark all provenances as exposed
/// and overwrite them with `Provenance::WILDCARD`.
pub fn prepare_for_native_call(
&mut self,
id: AllocId,
initial_prov: M::Provenance,
) -> InterpResult<'tcx> {
// Expose provenance of the root allocation.
M::expose_provenance(self, initial_prov)?;

let mut done = FxHashSet::default();
let mut todo = vec![id];
while let Some(id) = todo.pop() {
if !done.insert(id) {
// We already saw this allocation before, don't process it again.
continue;
}
let info = self.get_alloc_info(id);

// If there is no data behind this pointer, skip this.
if !matches!(info.kind, AllocKind::LiveData) {
continue;
}

// Expose all provenances in this allocation, and add them to `todo`.
let alloc = self.get_alloc_raw(id)?;
for prov in alloc.provenance().provenances() {
M::expose_provenance(self, prov)?;
if let Some(id) = prov.get_alloc_id() {
todo.push(id);
}
}

// Prepare for possible write from native code if mutable.
if info.mutbl.is_mut() {
self.get_alloc_raw_mut(id)?
.0
.prepare_for_native_write()
.map_err(|e| e.to_interp_error(id))?;
}
}
interp_ok(())
}

/// Create a lazy debug printer that prints the given allocation and all allocations it points
/// to, recursively.
#[must_use]
Expand Down
22 changes: 22 additions & 0 deletions compiler/rustc_middle/src/mir/interpret/allocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,28 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
Ok(())
}

/// Initialize all previously uninitialized bytes in the entire allocation, and set
/// provenance of everything to `Wildcard`. Before calling this, make sure all
/// provenance in this allocation is exposed!
pub fn prepare_for_native_write(&mut self) -> AllocResult {
let full_range = AllocRange { start: Size::ZERO, size: Size::from_bytes(self.len()) };
// Overwrite uninitialized bytes with 0, to ensure we don't leak whatever their value happens to be.
for chunk in self.init_mask.range_as_init_chunks(full_range) {
if !chunk.is_init() {
let uninit_bytes = &mut self.bytes
[chunk.range().start.bytes_usize()..chunk.range().end.bytes_usize()];
uninit_bytes.fill(0);
}
}
// Mark everything as initialized now.
self.mark_init(full_range, true);

// Set provenance of all bytes to wildcard.
self.provenance.write_wildcards(self.len());

Ok(())
}

/// Remove all provenance in the given memory range.
pub fn clear_provenance(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
self.provenance.clear(range, cx)?;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,25 @@ impl<Prov: Provenance> ProvenanceMap<Prov> {

Ok(())
}

/// Overwrites all provenance in the allocation with wildcard provenance.
///
/// Provided for usage in Miri and panics otherwise.
pub fn write_wildcards(&mut self, alloc_size: usize) {
assert!(
Prov::OFFSET_IS_ADDR,
"writing wildcard provenance is not supported when `OFFSET_IS_ADDR` is false"
);
let wildcard = Prov::WILDCARD.unwrap();

// Remove all pointer provenances, then write wildcards into the whole byte range.
self.ptrs.clear();
let last = Size::from_bytes(alloc_size);
let bytes = self.bytes.get_or_insert_with(Box::default);
for offset in Size::ZERO..last {
bytes.insert(offset, wildcard);
}
}
}

/// A partial, owned list of provenance to transfer into another allocation.
Expand Down
9 changes: 9 additions & 0 deletions compiler/rustc_middle/src/mir/interpret/pointer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ pub trait Provenance: Copy + fmt::Debug + 'static {
/// pointer, and implement ptr-to-int transmutation by stripping provenance.
const OFFSET_IS_ADDR: bool;

/// If wildcard provenance is implemented, contains the unique, general wildcard provenance variant.
const WILDCARD: Option<Self>;

/// Determines how a pointer should be printed.
fn fmt(ptr: &Pointer<Self>, f: &mut fmt::Formatter<'_>) -> fmt::Result;

Expand Down Expand Up @@ -168,6 +171,9 @@ impl Provenance for CtfeProvenance {
// so ptr-to-int casts are not possible (since we do not know the global physical offset).
const OFFSET_IS_ADDR: bool = false;

// `CtfeProvenance` does not implement wildcard provenance.
const WILDCARD: Option<Self> = None;

fn fmt(ptr: &Pointer<Self>, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Print AllocId.
fmt::Debug::fmt(&ptr.provenance.alloc_id(), f)?; // propagates `alternate` flag
Expand Down Expand Up @@ -197,6 +203,9 @@ impl Provenance for AllocId {
// so ptr-to-int casts are not possible (since we do not know the global physical offset).
const OFFSET_IS_ADDR: bool = false;

// `AllocId` does not implement wildcard provenance.
const WILDCARD: Option<Self> = None;

fn fmt(ptr: &Pointer<Self>, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Forward `alternate` flag to `alloc_id` printing.
if f.alternate() {
Expand Down
10 changes: 6 additions & 4 deletions src/tools/miri/src/alloc_addresses/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,9 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {

impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
let global_state = this.machine.alloc_addresses.get_mut();
fn expose_ptr(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let this = self.eval_context_ref();
let mut global_state = this.machine.alloc_addresses.borrow_mut();
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
if global_state.provenance_mode == ProvenanceMode::Strict {
return interp_ok(());
Expand All @@ -299,8 +299,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
return interp_ok(());
}
trace!("Exposing allocation id {alloc_id:?}");
let global_state = this.machine.alloc_addresses.get_mut();
global_state.exposed.insert(alloc_id);
// Release the global state before we call `expose_tag`, which may call `get_alloc_info_extra`,
// which may need access to the global state.
drop(global_state);
if this.machine.borrow_tracker.is_some() {
this.expose_tag(alloc_id, tag)?;
}
Expand Down
4 changes: 2 additions & 2 deletions src/tools/miri/src/borrow_tracker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
}
}

fn expose_tag(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
fn expose_tag(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let this = self.eval_context_ref();
let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method;
match method {
BorrowTrackerMethod::StackedBorrows => this.sb_expose_tag(alloc_id, tag),
Expand Down
4 changes: 2 additions & 2 deletions src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1011,8 +1011,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
}

/// Mark the given tag as exposed. It was found on a pointer with the given AllocId.
fn sb_expose_tag(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
fn sb_expose_tag(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let this = self.eval_context_ref();

// Function pointers and dead objects don't have an alloc_extra so we ignore them.
// This is okay because accessing them is UB anyway, no need for any Stacked Borrows checks.
Expand Down
4 changes: 2 additions & 2 deletions src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -532,8 +532,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
}

/// Mark the given tag as exposed. It was found on a pointer with the given AllocId.
fn tb_expose_tag(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
fn tb_expose_tag(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let this = self.eval_context_ref();

// Function pointers and dead objects don't have an alloc_extra so we ignore them.
// This is okay because accessing them is UB anyway, no need for any Tree Borrows checks.
Expand Down
7 changes: 5 additions & 2 deletions src/tools/miri/src/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ impl interpret::Provenance for Provenance {
/// We use absolute addresses in the `offset` of a `StrictPointer`.
const OFFSET_IS_ADDR: bool = true;

/// Miri implements wildcard provenance.
const WILDCARD: Option<Self> = Some(Provenance::Wildcard);

fn get_alloc_id(self) -> Option<AllocId> {
match self {
Provenance::Concrete { alloc_id, .. } => Some(alloc_id),
Expand Down Expand Up @@ -1242,8 +1245,8 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
/// Called on `ptr as usize` casts.
/// (Actually computing the resulting `usize` doesn't need machine help,
/// that's just `Scalar::try_to_int`.)
fn expose_ptr(ecx: &mut InterpCx<'tcx, Self>, ptr: StrictPointer) -> InterpResult<'tcx> {
match ptr.provenance {
fn expose_provenance(ecx: &InterpCx<'tcx, Self>, provenance: Self::Provenance) -> InterpResult<'tcx> {
match provenance {
Provenance::Concrete { alloc_id, tag } => ecx.expose_ptr(alloc_id, tag),
Provenance::Wildcard => {
// No need to do anything for wildcard pointers as
Expand Down
50 changes: 34 additions & 16 deletions src/tools/miri/src/shims/native_lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ use std::ops::Deref;

use libffi::high::call as ffi;
use libffi::low::CodePtr;
use rustc_abi::{BackendRepr, HasDataLayout};
use rustc_middle::ty::{self as ty, IntTy, UintTy};
use rustc_abi::{BackendRepr, HasDataLayout, Size};
use rustc_middle::{
mir::interpret::Pointer,
ty::{self as ty, IntTy, UintTy},
};
use rustc_span::Symbol;

use crate::*;
Expand Down Expand Up @@ -75,6 +78,11 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) };
return interp_ok(ImmTy::uninit(dest.layout));
}
ty::RawPtr(..) => {
let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) };
let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
Scalar::from_pointer(ptr, this)
}
_ => throw_unsup_format!("unsupported return type for native call: {:?}", link_name),
};
interp_ok(ImmTy::from_scalar(scalar, dest.layout))
Expand Down Expand Up @@ -152,8 +160,26 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
if !matches!(arg.layout.backend_repr, BackendRepr::Scalar(_)) {
throw_unsup_format!("only scalar argument types are support for native calls")
}
libffi_args.push(imm_to_carg(this.read_immediate(arg)?, this)?);
let imm = this.read_immediate(arg)?;
libffi_args.push(imm_to_carg(&imm, this)?);
// If we are passing a pointer, prepare the memory it points to.
if matches!(arg.layout.ty.kind(), ty::RawPtr(..)) {
let ptr = imm.to_scalar().to_pointer(this)?;
let Some(prov) = ptr.provenance else {
// Pointer without provenance may not access any memory.
continue;
};
// We use `get_alloc_id` for its best-effort behaviour with Wildcard provenance.
let Some(alloc_id) = prov.get_alloc_id() else {
// Wildcard pointer, whatever it points to must be already exposed.
continue;
};
this.prepare_for_native_call(alloc_id, prov)?;
}
}

// FIXME: In the future, we should also call `prepare_for_native_call` on all previously
// exposed allocations, since C may access any of them.

// Convert them to `libffi::high::Arg` type.
let libffi_args = libffi_args
Expand Down Expand Up @@ -220,7 +246,7 @@ impl<'a> CArg {

/// Extract the scalar value from the result of reading a scalar from the machine,
/// and convert it to a `CArg`.
fn imm_to_carg<'tcx>(v: ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'tcx, CArg> {
fn imm_to_carg<'tcx>(v: &ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'tcx, CArg> {
interp_ok(match v.layout.ty.kind() {
// If the primitive provided can be converted to a type matching the type pattern
// then create a `CArg` of this primitive value with the corresponding `CArg` constructor.
Expand All @@ -238,18 +264,10 @@ fn imm_to_carg<'tcx>(v: ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'t
ty::Uint(UintTy::U64) => CArg::UInt64(v.to_scalar().to_u64()?),
ty::Uint(UintTy::Usize) =>
CArg::USize(v.to_scalar().to_target_usize(cx)?.try_into().unwrap()),
ty::RawPtr(_, mutability) => {
// Arbitrary mutable pointer accesses are not currently supported in Miri.
if mutability.is_mut() {
throw_unsup_format!(
"unsupported mutable pointer type for native call: {}",
v.layout.ty
);
} else {
let s = v.to_scalar().to_pointer(cx)?.addr();
// This relies on the `expose_provenance` in `addr_from_alloc_id`.
CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
}
ty::RawPtr(..) => {
let s = v.to_scalar().to_pointer(cx)?.addr();
// This relies on the `expose_provenance` in `addr_from_alloc_id`.
CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
}
_ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty),
})
Expand Down
Loading

0 comments on commit 576176d

Please sign in to comment.