Skip to main content

miri/alloc_addresses/
mod.rs

1//! This module is responsible for managing the absolute addresses that allocations are located at,
2//! and for casting between pointers and integers based on those addresses.
3
4mod address_generator;
5mod reuse_pool;
6
7use std::cell::RefCell;
8
9use rustc_abi::{Align, Size};
10use rustc_data_structures::fx::{FxHashMap, FxHashSet};
11use rustc_middle::ty::TyCtxt;
12
13pub use self::address_generator::AddressGenerator;
14use self::reuse_pool::ReusePool;
15use crate::alloc::MiriAllocParams;
16use crate::concurrency::VClock;
17use crate::diagnostics::SpanDedupDiagnostic;
18use crate::*;
19
20#[derive(Copy, Clone, Debug, PartialEq, Eq)]
21pub enum ProvenanceMode {
22    /// We support `expose_provenance`/`with_exposed_provenance` via "wildcard" provenance.
23    /// However, we warn on `with_exposed_provenance` to alert the user of the precision loss.
24    Default,
25    /// Like `Default`, but without the warning.
26    Permissive,
27    /// We error on `with_exposed_provenance`, ensuring no precision loss.
28    Strict,
29}
30
31pub type GlobalState = RefCell<GlobalStateInner>;
32
33#[derive(Debug)]
34pub struct GlobalStateInner {
35    /// This is used as a map between the address of each allocation and its `AllocId`. It is always
36    /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
37    /// from the base address, and we need to find the `AllocId` it belongs to. This is not the
38    /// *full* inverse of `base_addr`; dead allocations have been removed.
39    /// Note that in GenMC mode, dead allocations are *not* removed -- and also, addresses are never
40    /// reused. This lets us use the address as a cross-execution-stable identifier for an allocation.
41    int_to_ptr_map: Vec<(u64, AllocId)>,
42    /// The base address for each allocation.  We cannot put that into
43    /// `AllocExtra` because function pointers also have a base address, and
44    /// they do not have an `AllocExtra`.
45    /// This is the inverse of `int_to_ptr_map`.
46    base_addr: FxHashMap<AllocId, u64>,
47    /// The set of exposed allocations. This cannot be put
48    /// into `AllocExtra` for the same reason as `base_addr`.
49    exposed: FxHashSet<AllocId>,
50    /// The provenance to use for int2ptr casts
51    provenance_mode: ProvenanceMode,
52    /// The generator for new addresses in a given range, and a pool for address reuse. This is
53    /// `None` if addresses are generated elsewhere (in native-lib mode or with GenMC).
54    address_generation: Option<(AddressGenerator, ReusePool)>,
55    /// Native-lib mode only: Temporarily store prepared memory space for global allocations the
56    /// first time their memory address is required. This is used to ensure that the memory is
57    /// allocated before Miri assigns it an internal address, which is important for matching the
58    /// internal address to the machine address so FFI can read from pointers.
59    prepared_alloc_bytes: Option<FxHashMap<AllocId, MiriAllocBytes>>,
60}
61
62impl VisitProvenance for GlobalStateInner {
63    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
64        let GlobalStateInner {
65            int_to_ptr_map: _,
66            base_addr: _,
67            prepared_alloc_bytes: _,
68            exposed: _,
69            address_generation: _,
70            provenance_mode: _,
71        } = self;
72        // Though base_addr, int_to_ptr_map, and exposed contain AllocIds, we do not want to visit them.
73        // int_to_ptr_map and exposed must contain only live allocations, and those
74        // are never garbage collected.
75        // base_addr is only relevant if we have a pointer to an AllocId and need to look up its
76        // base address; so if an AllocId is not reachable from somewhere else we can remove it
77        // here.
78    }
79}
80
81impl GlobalStateInner {
82    pub fn new<'tcx>(config: &MiriConfig, stack_addr: u64, tcx: TyCtxt<'tcx>) -> Self {
83        GlobalStateInner {
84            int_to_ptr_map: Vec::default(),
85            base_addr: FxHashMap::default(),
86            exposed: FxHashSet::default(),
87            provenance_mode: config.provenance_mode,
88            address_generation: (config.native_lib.is_empty() && config.genmc_config.is_none())
89                .then(|| {
90                    (
91                        AddressGenerator::new(stack_addr..tcx.target_usize_max()),
92                        ReusePool::new(config),
93                    )
94                }),
95            prepared_alloc_bytes: (!config.native_lib.is_empty()).then(FxHashMap::default),
96        }
97    }
98
99    pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_>) {
100        // `exposed` and `int_to_ptr_map` are cleared immediately when an allocation
101        // is freed, so `base_addr` is the only one we have to clean up based on the GC.
102        self.base_addr.retain(|id, _| allocs.is_live(*id));
103    }
104}
105
106impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
107trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
108    fn addr_from_alloc_id_uncached(
109        &self,
110        global_state: &mut GlobalStateInner,
111        alloc_id: AllocId,
112        memory_kind: MemoryKind,
113    ) -> InterpResult<'tcx, u64> {
114        let this = self.eval_context_ref();
115        let info = this.get_alloc_info(alloc_id);
116
117        // This is either called immediately after allocation (and then cached), or when
118        // adjusting `tcx` pointers (which never get freed). So assert that we are looking
119        // at a live allocation. This also ensures that we never re-assign an address to an
120        // allocation that previously had an address, but then was freed and the address
121        // information was removed.
122        assert!(!matches!(info.kind, AllocKind::Dead));
123
124        // TypeId allocations always have a "base address" of 0 (i.e., the relative offset is the
125        // hash fragment and therefore equal to the actual integer value).
126        if matches!(info.kind, AllocKind::TypeId) {
127            return interp_ok(0);
128        }
129
130        // Miri's address assignment leaks state across thread boundaries, which is incompatible
131        // with GenMC execution. So we instead let GenMC assign addresses to allocations.
132        if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
133            let addr =
134                genmc_ctx.handle_alloc(this, alloc_id, info.size, info.align, memory_kind)?;
135            return interp_ok(addr);
136        }
137
138        // This allocation does not have a base address yet, pick or reuse one.
139        if !this.machine.native_lib.is_empty() {
140            // In native lib mode, we use the "real" address of the bytes for this allocation.
141            // This ensures the interpreted program and native code have the same view of memory.
142            let params = this.machine.get_default_alloc_params();
143            let base_ptr = match info.kind {
144                AllocKind::LiveData => {
145                    if memory_kind == MiriMemoryKind::Global.into() {
146                        // For new global allocations, we always pre-allocate the memory to be able use the machine address directly.
147                        let prepared_bytes = MiriAllocBytes::zeroed(info.size, info.align, params)
148                            .unwrap_or_else(|| {
149                                panic!("Miri ran out of memory: cannot create allocation of {size:?} bytes", size = info.size)
150                            });
151                        let ptr = prepared_bytes.as_ptr();
152                        // Store prepared allocation to be picked up for use later.
153                        global_state
154                            .prepared_alloc_bytes
155                            .as_mut()
156                            .unwrap()
157                            .try_insert(alloc_id, prepared_bytes)
158                            .unwrap();
159                        ptr
160                    } else {
161                        // Non-global allocations are already in memory at this point so
162                        // we can just get a pointer to where their data is stored.
163                        this.get_alloc_bytes_unchecked_raw(alloc_id)?
164                    }
165                }
166                #[cfg(all(feature = "native-lib", unix))]
167                AllocKind::Function => {
168                    if let Some(GlobalAlloc::Function { instance, .. }) =
169                        this.tcx.try_get_global_alloc(alloc_id)
170                    {
171                        let fn_sig = this.tcx.instantiate_bound_regions_with_erased(
172                            this.tcx
173                                .fn_sig(instance.def_id())
174                                .instantiate(*this.tcx, instance.args)
175                                .skip_norm_wip(),
176                        );
177                        let fn_ptr = crate::shims::native_lib::build_libffi_closure(this, fn_sig)?;
178
179                        #[expect(
180                            clippy::as_conversions,
181                            reason = "No better way to cast a function ptr to a ptr"
182                        )]
183                        {
184                            fn_ptr as *const _
185                        }
186                    } else {
187                        dummy_alloc(params)
188                    }
189                }
190                #[cfg(not(all(feature = "native-lib", unix)))]
191                AllocKind::Function => dummy_alloc(params),
192                AllocKind::VTable | AllocKind::VaList => dummy_alloc(params),
193                AllocKind::TypeId | AllocKind::Dead => unreachable!(),
194            };
195            // We don't have to expose this pointer yet, we do that in `prepare_for_native_call`.
196            return interp_ok(base_ptr.addr().to_u64());
197        }
198        // We are not in native lib or genmc mode, so we control the addresses ourselves.
199        let (addr_gen, reuse) = global_state.address_generation.as_mut().unwrap();
200        let mut rng = this.machine.rng.borrow_mut();
201        if let Some((reuse_addr, clock)) =
202            reuse.take_addr(&mut *rng, info.size, info.align, memory_kind, this.active_thread())
203        {
204            if let Some(clock) = clock {
205                this.acquire_clock(&clock)?;
206            }
207            interp_ok(reuse_addr)
208        } else {
209            // We have to pick a fresh address.
210            let new_addr = addr_gen.generate(info.size, info.align, &mut rng)?;
211
212            // If we filled up more than half the address space, start aggressively reusing
213            // addresses to avoid running out.
214            let remaining_range = addr_gen.get_remaining();
215            if remaining_range.start > remaining_range.end / 2 {
216                reuse.address_space_shortage();
217            }
218
219            interp_ok(new_addr)
220        }
221    }
222}
223
224fn dummy_alloc(params: MiriAllocParams) -> *const u8 {
225    // Allocate some dummy memory to get a unique address for this function/vtable.
226    let alloc_bytes = MiriAllocBytes::from_bytes(&[0u8; 1], Align::from_bytes(1).unwrap(), params);
227    let ptr = alloc_bytes.as_ptr();
228    // Leak the underlying memory to ensure it remains unique.
229    std::mem::forget(alloc_bytes);
230    ptr
231}
232
233impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
234pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
235    // Returns the `AllocId` that corresponds to the specified addr,
236    // or `None` if the addr is out of bounds.
237    fn alloc_id_from_addr(&self, addr: u64, size: i64) -> Option<AllocId> {
238        let this = self.eval_context_ref();
239        let global_state = this.machine.alloc_addresses.borrow();
240        assert!(global_state.provenance_mode != ProvenanceMode::Strict);
241
242        // We always search the allocation to the right of this address. So if the size is strictly
243        // negative, we have to search for `addr-1` instead.
244        let addr = if size >= 0 { addr } else { addr.saturating_sub(1) };
245        let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr);
246
247        // Determine the in-bounds provenance for this pointer.
248        let alloc_id = match pos {
249            Ok(pos) => Some(global_state.int_to_ptr_map[pos].1),
250            Err(0) => None,
251            Err(pos) => {
252                // This is the largest of the addresses smaller than `int`,
253                // i.e. the greatest lower bound (glb)
254                let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
255                // This never overflows because `addr >= glb`
256                let offset = addr - glb;
257                // We require this to be strict in-bounds of the allocation. This arm is only
258                // entered for addresses that are not the base address, so even zero-sized
259                // allocations will get recognized at their base address -- but all other
260                // allocations will *not* be recognized at their "end" address.
261                let size = this.get_alloc_info(alloc_id).size;
262                if offset < size.bytes() { Some(alloc_id) } else { None }
263            }
264        }?;
265
266        // We only use this provenance if it has been exposed.
267        if global_state.exposed.contains(&alloc_id) {
268            // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
269            debug_assert!(this.is_alloc_live(alloc_id));
270            Some(alloc_id)
271        } else {
272            None
273        }
274    }
275
276    /// Returns the base address of an allocation, or an error if no base address could be found
277    ///
278    /// # Panics
279    /// If `memory_kind = None` and the `alloc_id` is not cached, meaning that the first call to this function per `alloc_id` must get the `memory_kind`.
280    fn addr_from_alloc_id(
281        &self,
282        alloc_id: AllocId,
283        memory_kind: Option<MemoryKind>,
284    ) -> InterpResult<'tcx, u64> {
285        let this = self.eval_context_ref();
286        let mut global_state = this.machine.alloc_addresses.borrow_mut();
287        let global_state = &mut *global_state;
288
289        match global_state.base_addr.get(&alloc_id) {
290            Some(&addr) => interp_ok(addr),
291            None => {
292                // First time we're looking for the absolute address of this allocation.
293                let memory_kind =
294                    memory_kind.expect("memory_kind is required since alloc_id is not cached");
295                let base_addr =
296                    this.addr_from_alloc_id_uncached(global_state, alloc_id, memory_kind)?;
297                trace!("Assigning base address {:#x} to allocation {:?}", base_addr, alloc_id);
298
299                // Store address in cache.
300                global_state.base_addr.try_insert(alloc_id, base_addr).unwrap();
301
302                // Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it
303                // sorted. We have a fast-path for the common case that this address is bigger than
304                // all previous ones. We skip this for allocations at address 0; those can't be
305                // real, they must be TypeId "fake allocations".
306                if base_addr != 0 {
307                    let pos = if global_state
308                        .int_to_ptr_map
309                        .last()
310                        .is_some_and(|(last_addr, _)| *last_addr < base_addr)
311                    {
312                        global_state.int_to_ptr_map.len()
313                    } else {
314                        global_state
315                            .int_to_ptr_map
316                            .binary_search_by_key(&base_addr, |(addr, _)| *addr)
317                            .unwrap_err()
318                    };
319                    global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id));
320                }
321
322                interp_ok(base_addr)
323            }
324        }
325    }
326
327    fn expose_provenance(&self, provenance: Provenance) -> InterpResult<'tcx> {
328        let this = self.eval_context_ref();
329        let mut global_state = this.machine.alloc_addresses.borrow_mut();
330
331        let (alloc_id, tag) = match provenance {
332            Provenance::Concrete { alloc_id, tag } => (alloc_id, tag),
333            Provenance::Wildcard => {
334                // No need to do anything for wildcard pointers as
335                // their provenances have already been previously exposed.
336                return interp_ok(());
337            }
338        };
339
340        // In strict mode, we don't need this, so we can save some cycles by not tracking it.
341        if global_state.provenance_mode == ProvenanceMode::Strict {
342            return interp_ok(());
343        }
344        // Exposing a dead alloc is a no-op, because it's not possible to get a dead allocation
345        // via int2ptr.
346        if !this.is_alloc_live(alloc_id) {
347            return interp_ok(());
348        }
349        trace!("Exposing allocation id {alloc_id:?}");
350        global_state.exposed.insert(alloc_id);
351        // Release the global state before we call `expose_tag`, which may call `get_alloc_info_extra`,
352        // which may need access to the global state.
353        drop(global_state);
354        if this.machine.borrow_tracker.is_some() {
355            this.expose_tag(alloc_id, tag)?;
356        }
357        interp_ok(())
358    }
359
360    fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer> {
361        trace!("Casting {:#x} to a pointer", addr);
362
363        let this = self.eval_context_ref();
364        let global_state = this.machine.alloc_addresses.borrow();
365
366        // Potentially emit a warning.
367        match global_state.provenance_mode {
368            ProvenanceMode::Default => {
369                // The first time this happens at a particular location, print a warning.
370                static DEDUP: SpanDedupDiagnostic = SpanDedupDiagnostic::new();
371                this.dedup_diagnostic(&DEDUP, |first| {
372                    NonHaltingDiagnostic::Int2Ptr { details: first }
373                });
374            }
375            ProvenanceMode::Strict => {
376                throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
377            }
378            ProvenanceMode::Permissive => {}
379        }
380
381        // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
382        // completely legal to do a cast and then `wrapping_offset` to another allocation and only
383        // *then* do a memory access. So the allocation that the pointer happens to point to on a
384        // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
385        // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
386        // allocation it might be referencing.
387        interp_ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
388    }
389
390    /// Convert a relative (tcx) pointer to a Miri pointer.
391    fn adjust_alloc_root_pointer(
392        &self,
393        ptr: interpret::Pointer<CtfeProvenance>,
394        tag: BorTag,
395        kind: MemoryKind,
396    ) -> InterpResult<'tcx, interpret::Pointer<Provenance>> {
397        let this = self.eval_context_ref();
398
399        let (prov, offset) = ptr.prov_and_relative_offset();
400        let alloc_id = prov.alloc_id();
401
402        // Get a pointer to the beginning of this allocation.
403        let base_addr = this.addr_from_alloc_id(alloc_id, Some(kind))?;
404        let base_ptr = interpret::Pointer::new(
405            Provenance::Concrete { alloc_id, tag },
406            Size::from_bytes(base_addr),
407        );
408        // Add offset with the right kind of pointer-overflowing arithmetic.
409        interp_ok(base_ptr.wrapping_offset(offset, this))
410    }
411
412    // This returns some prepared `MiriAllocBytes`, either because `addr_from_alloc_id` reserved
413    // memory space in the past, or by doing the pre-allocation right upon being called.
414    fn get_global_alloc_bytes(
415        &self,
416        id: AllocId,
417        bytes: &[u8],
418        align: Align,
419    ) -> InterpResult<'tcx, MiriAllocBytes> {
420        let this = self.eval_context_ref();
421        assert!(this.tcx.try_get_global_alloc(id).is_some());
422        if !this.machine.native_lib.is_empty() {
423            // In native lib mode, MiriAllocBytes for global allocations are handled via `prepared_alloc_bytes`.
424            // This additional call ensures that some `MiriAllocBytes` are always prepared, just in case
425            // this function gets called before the first time `addr_from_alloc_id` gets called.
426            this.addr_from_alloc_id(id, Some(MiriMemoryKind::Global.into()))?;
427            // The memory we need here will have already been allocated during an earlier call to
428            // `addr_from_alloc_id` for this allocation. So don't create a new `MiriAllocBytes` here, instead
429            // fetch the previously prepared bytes from `prepared_alloc_bytes`.
430            let mut global_state = this.machine.alloc_addresses.borrow_mut();
431            let mut prepared_alloc_bytes = global_state
432                .prepared_alloc_bytes
433                .as_mut()
434                .unwrap()
435                .remove(&id)
436                .unwrap_or_else(|| panic!("alloc bytes for {id:?} have not been prepared"));
437            // Sanity-check that the prepared allocation has the right size and alignment.
438            assert!(prepared_alloc_bytes.as_ptr().is_aligned_to(align.bytes_usize()));
439            assert_eq!(prepared_alloc_bytes.len(), bytes.len());
440            // Copy allocation contents into prepared memory.
441            prepared_alloc_bytes.copy_from_slice(bytes);
442            interp_ok(prepared_alloc_bytes)
443        } else {
444            let params = this.machine.get_default_alloc_params();
445            interp_ok(MiriAllocBytes::from_bytes(std::borrow::Cow::Borrowed(bytes), align, params))
446        }
447    }
448
449    /// When a pointer is used for a memory access, this computes where in which allocation the
450    /// access is going.
451    fn ptr_get_alloc(
452        &self,
453        ptr: interpret::Pointer<Provenance>,
454        size: i64,
455    ) -> Option<(AllocId, Size)> {
456        let this = self.eval_context_ref();
457
458        let (tag, addr) = ptr.into_raw_parts(); // addr is absolute (Miri provenance)
459
460        let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
461            alloc_id
462        } else {
463            // A wildcard pointer.
464            this.alloc_id_from_addr(addr.bytes(), size)?
465        };
466
467        // This cannot fail: since we already have a pointer with that provenance, adjust_alloc_root_pointer
468        // must have been called in the past, so we can just look up the address in the map.
469        let base_addr = *this.machine.alloc_addresses.borrow().base_addr.get(&alloc_id).unwrap();
470
471        // Wrapping "addr - base_addr"
472        let rel_offset = this.truncate_to_target_usize(addr.bytes().wrapping_sub(base_addr));
473        Some((alloc_id, Size::from_bytes(rel_offset)))
474    }
475
476    /// Return a list of all exposed allocations.
477    fn exposed_allocs(&self) -> Vec<AllocId> {
478        let this = self.eval_context_ref();
479        this.machine.alloc_addresses.borrow().exposed.iter().copied().collect()
480    }
481}
482
483impl<'tcx> MiriMachine<'tcx> {
484    pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) {
485        let global_state = self.alloc_addresses.get_mut();
486        let rng = self.rng.get_mut();
487
488        // We can *not* remove this from `base_addr`, since the interpreter design requires that we
489        // be able to retrieve an AllocId + offset for any memory access *before* we check if the
490        // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
491        // access to determine the allocation ID and offset -- and there can still be pointers with
492        // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
493        // `None` only if the pointer truly has no provenance (this ensures consistent error
494        // messages).
495        // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
496        // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
497        // returns a dead allocation.
498        // To avoid a linear scan we first look up the address in `base_addr`, and then find it in
499        // `int_to_ptr_map`.
500        let addr = *global_state.base_addr.get(&dead_id).unwrap();
501        let pos =
502            global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
503        let removed = global_state.int_to_ptr_map.remove(pos);
504        assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing
505        // We can also remove it from `exposed`, since this allocation can anyway not be returned by
506        // `alloc_id_from_addr` any more.
507        global_state.exposed.remove(&dead_id);
508        // Also remember this address for future reuse.
509        if let Some((_addr_gen, reuse)) = global_state.address_generation.as_mut() {
510            let thread = self.threads.active_thread();
511            reuse.add_addr(rng, addr, size, align, kind, thread, || {
512                // We cannot be in GenMC mode as then `address_generation` is `None`. We cannot use
513                // `self.release_clock` as `self.alloc_addresses` is borrowed.
514                if let Some(data_race) = self.data_race.as_vclocks_ref() {
515                    data_race.release_clock(&self.threads, |clock| clock.clone())
516                } else {
517                    VClock::default()
518                }
519            })
520        }
521    }
522}