miri/alloc_addresses/
mod.rs

1//! This module is responsible for managing the absolute addresses that allocations are located at,
2//! and for casting between pointers and integers based on those addresses.
3
4mod address_generator;
5mod reuse_pool;
6
7use std::cell::RefCell;
8
9use rustc_abi::{Align, Size};
10use rustc_data_structures::fx::{FxHashMap, FxHashSet};
11use rustc_middle::ty::TyCtxt;
12
13pub use self::address_generator::AddressGenerator;
14use self::reuse_pool::ReusePool;
15use crate::concurrency::VClock;
16use crate::*;
17
18#[derive(Copy, Clone, Debug, PartialEq, Eq)]
19pub enum ProvenanceMode {
20    /// We support `expose_provenance`/`with_exposed_provenance` via "wildcard" provenance.
21    /// However, we warn on `with_exposed_provenance` to alert the user of the precision loss.
22    Default,
23    /// Like `Default`, but without the warning.
24    Permissive,
25    /// We error on `with_exposed_provenance`, ensuring no precision loss.
26    Strict,
27}
28
29pub type GlobalState = RefCell<GlobalStateInner>;
30
31#[derive(Debug)]
32pub struct GlobalStateInner {
33    /// This is used as a map between the address of each allocation and its `AllocId`. It is always
34    /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
35    /// from the base address, and we need to find the `AllocId` it belongs to. This is not the
36    /// *full* inverse of `base_addr`; dead allocations have been removed.
37    /// Note that in GenMC mode, dead allocations are *not* removed -- and also, addresses are never
38    /// reused. This lets us use the address as a cross-execution-stable identifier for an allocation.
39    int_to_ptr_map: Vec<(u64, AllocId)>,
40    /// The base address for each allocation.  We cannot put that into
41    /// `AllocExtra` because function pointers also have a base address, and
42    /// they do not have an `AllocExtra`.
43    /// This is the inverse of `int_to_ptr_map`.
44    base_addr: FxHashMap<AllocId, u64>,
45    /// Temporarily store prepared memory space for global allocations the first time their memory
46    /// address is required. This is used to ensure that the memory is allocated before Miri assigns
47    /// it an internal address, which is important for matching the internal address to the machine
48    /// address so FFI can read from pointers.
49    prepared_alloc_bytes: FxHashMap<AllocId, MiriAllocBytes>,
50    /// A pool of addresses we can reuse for future allocations.
51    reuse: ReusePool,
52    /// Whether an allocation has been exposed or not. This cannot be put
53    /// into `AllocExtra` for the same reason as `base_addr`.
54    exposed: FxHashSet<AllocId>,
55    /// The generator for new addresses in a given range.
56    address_generator: AddressGenerator,
57    /// The provenance to use for int2ptr casts
58    provenance_mode: ProvenanceMode,
59}
60
61impl VisitProvenance for GlobalStateInner {
62    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
63        let GlobalStateInner {
64            int_to_ptr_map: _,
65            base_addr: _,
66            prepared_alloc_bytes: _,
67            reuse: _,
68            exposed: _,
69            address_generator: _,
70            provenance_mode: _,
71        } = self;
72        // Though base_addr, int_to_ptr_map, and exposed contain AllocIds, we do not want to visit them.
73        // int_to_ptr_map and exposed must contain only live allocations, and those
74        // are never garbage collected.
75        // base_addr is only relevant if we have a pointer to an AllocId and need to look up its
76        // base address; so if an AllocId is not reachable from somewhere else we can remove it
77        // here.
78    }
79}
80
81impl GlobalStateInner {
82    pub fn new<'tcx>(config: &MiriConfig, stack_addr: u64, tcx: TyCtxt<'tcx>) -> Self {
83        GlobalStateInner {
84            int_to_ptr_map: Vec::default(),
85            base_addr: FxHashMap::default(),
86            prepared_alloc_bytes: FxHashMap::default(),
87            reuse: ReusePool::new(config),
88            exposed: FxHashSet::default(),
89            address_generator: AddressGenerator::new(stack_addr..tcx.target_usize_max()),
90            provenance_mode: config.provenance_mode,
91        }
92    }
93
94    pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_>) {
95        // `exposed` and `int_to_ptr_map` are cleared immediately when an allocation
96        // is freed, so `base_addr` is the only one we have to clean up based on the GC.
97        self.base_addr.retain(|id, _| allocs.is_live(*id));
98    }
99}
100
101impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
102trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
103    fn addr_from_alloc_id_uncached(
104        &self,
105        global_state: &mut GlobalStateInner,
106        alloc_id: AllocId,
107        memory_kind: MemoryKind,
108    ) -> InterpResult<'tcx, u64> {
109        let this = self.eval_context_ref();
110        let info = this.get_alloc_info(alloc_id);
111
112        // This is either called immediately after allocation (and then cached), or when
113        // adjusting `tcx` pointers (which never get freed). So assert that we are looking
114        // at a live allocation. This also ensures that we never re-assign an address to an
115        // allocation that previously had an address, but then was freed and the address
116        // information was removed.
117        assert!(!matches!(info.kind, AllocKind::Dead));
118
119        // TypeId allocations always have a "base address" of 0 (i.e., the relative offset is the
120        // hash fragment and therefore equal to the actual integer value).
121        if matches!(info.kind, AllocKind::TypeId) {
122            return interp_ok(0);
123        }
124
125        // Miri's address assignment leaks state across thread boundaries, which is incompatible
126        // with GenMC execution. So we instead let GenMC assign addresses to allocations.
127        if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
128            let addr =
129                genmc_ctx.handle_alloc(this, alloc_id, info.size, info.align, memory_kind)?;
130            return interp_ok(addr);
131        }
132
133        // This allocation does not have a base address yet, pick or reuse one.
134        if !this.machine.native_lib.is_empty() {
135            // In native lib mode, we use the "real" address of the bytes for this allocation.
136            // This ensures the interpreted program and native code have the same view of memory.
137            let params = this.machine.get_default_alloc_params();
138            let base_ptr = match info.kind {
139                AllocKind::LiveData => {
140                    if memory_kind == MiriMemoryKind::Global.into() {
141                        // For new global allocations, we always pre-allocate the memory to be able use the machine address directly.
142                        let prepared_bytes = MiriAllocBytes::zeroed(info.size, info.align, params)
143                            .unwrap_or_else(|| {
144                                panic!("Miri ran out of memory: cannot create allocation of {size:?} bytes", size = info.size)
145                            });
146                        let ptr = prepared_bytes.as_ptr();
147                        // Store prepared allocation to be picked up for use later.
148                        global_state
149                            .prepared_alloc_bytes
150                            .try_insert(alloc_id, prepared_bytes)
151                            .unwrap();
152                        ptr
153                    } else {
154                        // Non-global allocations are already in memory at this point so
155                        // we can just get a pointer to where their data is stored.
156                        this.get_alloc_bytes_unchecked_raw(alloc_id)?
157                    }
158                }
159                AllocKind::Function | AllocKind::VTable => {
160                    // Allocate some dummy memory to get a unique address for this function/vtable.
161                    let alloc_bytes = MiriAllocBytes::from_bytes(
162                        &[0u8; 1],
163                        Align::from_bytes(1).unwrap(),
164                        params,
165                    );
166                    let ptr = alloc_bytes.as_ptr();
167                    // Leak the underlying memory to ensure it remains unique.
168                    std::mem::forget(alloc_bytes);
169                    ptr
170                }
171                AllocKind::TypeId | AllocKind::Dead => unreachable!(),
172            };
173            // We don't have to expose this pointer yet, we do that in `prepare_for_native_call`.
174            return interp_ok(base_ptr.addr().to_u64());
175        }
176        // We are not in native lib mode, so we control the addresses ourselves.
177        let mut rng = this.machine.rng.borrow_mut();
178        if let Some((reuse_addr, clock)) = global_state.reuse.take_addr(
179            &mut *rng,
180            info.size,
181            info.align,
182            memory_kind,
183            this.active_thread(),
184        ) {
185            if let Some(clock) = clock {
186                this.acquire_clock(&clock)?;
187            }
188            interp_ok(reuse_addr)
189        } else {
190            // We have to pick a fresh address.
191            let new_addr =
192                global_state.address_generator.generate(info.size, info.align, &mut rng)?;
193
194            // If we filled up more than half the address space, start aggressively reusing
195            // addresses to avoid running out.
196            let remaining_range = global_state.address_generator.get_remaining();
197            if remaining_range.start > remaining_range.end / 2 {
198                global_state.reuse.address_space_shortage();
199            }
200
201            interp_ok(new_addr)
202        }
203    }
204}
205
206impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
207pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
208    // Returns the `AllocId` that corresponds to the specified addr,
209    // or `None` if the addr is out of bounds.
210    // Setting `only_exposed_allocations` selects whether only exposed allocations are considered.
211    fn alloc_id_from_addr(
212        &self,
213        addr: u64,
214        size: i64,
215        only_exposed_allocations: bool,
216    ) -> Option<AllocId> {
217        let this = self.eval_context_ref();
218        let global_state = this.machine.alloc_addresses.borrow();
219        assert!(global_state.provenance_mode != ProvenanceMode::Strict);
220
221        // We always search the allocation to the right of this address. So if the size is strictly
222        // negative, we have to search for `addr-1` instead.
223        let addr = if size >= 0 { addr } else { addr.saturating_sub(1) };
224        let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr);
225
226        // Determine the in-bounds provenance for this pointer.
227        let alloc_id = match pos {
228            Ok(pos) => Some(global_state.int_to_ptr_map[pos].1),
229            Err(0) => None,
230            Err(pos) => {
231                // This is the largest of the addresses smaller than `int`,
232                // i.e. the greatest lower bound (glb)
233                let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
234                // This never overflows because `addr >= glb`
235                let offset = addr - glb;
236                // We require this to be strict in-bounds of the allocation. This arm is only
237                // entered for addresses that are not the base address, so even zero-sized
238                // allocations will get recognized at their base address -- but all other
239                // allocations will *not* be recognized at their "end" address.
240                let size = this.get_alloc_info(alloc_id).size;
241                if offset < size.bytes() { Some(alloc_id) } else { None }
242            }
243        }?;
244
245        // We only use this provenance if it has been exposed, or if the caller requested also non-exposed allocations
246        if !only_exposed_allocations || global_state.exposed.contains(&alloc_id) {
247            // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
248            // In GenMC mode, we keep all allocations, so this check doesn't apply there.
249            if this.machine.data_race.as_genmc_ref().is_none() {
250                debug_assert!(this.is_alloc_live(alloc_id));
251            }
252            Some(alloc_id)
253        } else {
254            None
255        }
256    }
257
258    /// Returns the base address of an allocation, or an error if no base address could be found
259    ///
260    /// # Panics
261    /// If `memory_kind = None` and the `alloc_id` is not cached, meaning that the first call to this function per `alloc_id` must get the `memory_kind`.
262    fn addr_from_alloc_id(
263        &self,
264        alloc_id: AllocId,
265        memory_kind: Option<MemoryKind>,
266    ) -> InterpResult<'tcx, u64> {
267        let this = self.eval_context_ref();
268        let mut global_state = this.machine.alloc_addresses.borrow_mut();
269        let global_state = &mut *global_state;
270
271        match global_state.base_addr.get(&alloc_id) {
272            Some(&addr) => interp_ok(addr),
273            None => {
274                // First time we're looking for the absolute address of this allocation.
275                let memory_kind =
276                    memory_kind.expect("memory_kind is required since alloc_id is not cached");
277                let base_addr =
278                    this.addr_from_alloc_id_uncached(global_state, alloc_id, memory_kind)?;
279                trace!("Assigning base address {:#x} to allocation {:?}", base_addr, alloc_id);
280
281                // Store address in cache.
282                global_state.base_addr.try_insert(alloc_id, base_addr).unwrap();
283
284                // Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it
285                // sorted. We have a fast-path for the common case that this address is bigger than
286                // all previous ones. We skip this for allocations at address 0; those can't be
287                // real, they must be TypeId "fake allocations".
288                if base_addr != 0 {
289                    let pos = if global_state
290                        .int_to_ptr_map
291                        .last()
292                        .is_some_and(|(last_addr, _)| *last_addr < base_addr)
293                    {
294                        global_state.int_to_ptr_map.len()
295                    } else {
296                        global_state
297                            .int_to_ptr_map
298                            .binary_search_by_key(&base_addr, |(addr, _)| *addr)
299                            .unwrap_err()
300                    };
301                    global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id));
302                }
303
304                interp_ok(base_addr)
305            }
306        }
307    }
308
309    fn expose_provenance(&self, provenance: Provenance) -> InterpResult<'tcx> {
310        let this = self.eval_context_ref();
311        let mut global_state = this.machine.alloc_addresses.borrow_mut();
312
313        let (alloc_id, tag) = match provenance {
314            Provenance::Concrete { alloc_id, tag } => (alloc_id, tag),
315            Provenance::Wildcard => {
316                // No need to do anything for wildcard pointers as
317                // their provenances have already been previously exposed.
318                return interp_ok(());
319            }
320        };
321
322        // In strict mode, we don't need this, so we can save some cycles by not tracking it.
323        if global_state.provenance_mode == ProvenanceMode::Strict {
324            return interp_ok(());
325        }
326        // Exposing a dead alloc is a no-op, because it's not possible to get a dead allocation
327        // via int2ptr.
328        if !this.is_alloc_live(alloc_id) {
329            return interp_ok(());
330        }
331        trace!("Exposing allocation id {alloc_id:?}");
332        global_state.exposed.insert(alloc_id);
333        // Release the global state before we call `expose_tag`, which may call `get_alloc_info_extra`,
334        // which may need access to the global state.
335        drop(global_state);
336        if this.machine.borrow_tracker.is_some() {
337            this.expose_tag(alloc_id, tag)?;
338        }
339        interp_ok(())
340    }
341
342    fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer> {
343        trace!("Casting {:#x} to a pointer", addr);
344
345        let this = self.eval_context_ref();
346        let global_state = this.machine.alloc_addresses.borrow();
347
348        // Potentially emit a warning.
349        match global_state.provenance_mode {
350            ProvenanceMode::Default => {
351                // The first time this happens at a particular location, print a warning.
352                let mut int2ptr_warned = this.machine.int2ptr_warned.borrow_mut();
353                let first = int2ptr_warned.is_empty();
354                if int2ptr_warned.insert(this.cur_span()) {
355                    // Newly inserted, so first time we see this span.
356                    this.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
357                }
358            }
359            ProvenanceMode::Strict => {
360                throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
361            }
362            ProvenanceMode::Permissive => {}
363        }
364
365        // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
366        // completely legal to do a cast and then `wrapping_offset` to another allocation and only
367        // *then* do a memory access. So the allocation that the pointer happens to point to on a
368        // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
369        // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
370        // allocation it might be referencing.
371        interp_ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
372    }
373
374    /// Convert a relative (tcx) pointer to a Miri pointer.
375    fn adjust_alloc_root_pointer(
376        &self,
377        ptr: interpret::Pointer<CtfeProvenance>,
378        tag: BorTag,
379        kind: MemoryKind,
380    ) -> InterpResult<'tcx, interpret::Pointer<Provenance>> {
381        let this = self.eval_context_ref();
382
383        let (prov, offset) = ptr.prov_and_relative_offset();
384        let alloc_id = prov.alloc_id();
385
386        // Get a pointer to the beginning of this allocation.
387        let base_addr = this.addr_from_alloc_id(alloc_id, Some(kind))?;
388        let base_ptr = interpret::Pointer::new(
389            Provenance::Concrete { alloc_id, tag },
390            Size::from_bytes(base_addr),
391        );
392        // Add offset with the right kind of pointer-overflowing arithmetic.
393        interp_ok(base_ptr.wrapping_offset(offset, this))
394    }
395
396    // This returns some prepared `MiriAllocBytes`, either because `addr_from_alloc_id` reserved
397    // memory space in the past, or by doing the pre-allocation right upon being called.
398    fn get_global_alloc_bytes(
399        &self,
400        id: AllocId,
401        bytes: &[u8],
402        align: Align,
403    ) -> InterpResult<'tcx, MiriAllocBytes> {
404        let this = self.eval_context_ref();
405        assert!(this.tcx.try_get_global_alloc(id).is_some());
406        if !this.machine.native_lib.is_empty() {
407            // In native lib mode, MiriAllocBytes for global allocations are handled via `prepared_alloc_bytes`.
408            // This additional call ensures that some `MiriAllocBytes` are always prepared, just in case
409            // this function gets called before the first time `addr_from_alloc_id` gets called.
410            this.addr_from_alloc_id(id, Some(MiriMemoryKind::Global.into()))?;
411            // The memory we need here will have already been allocated during an earlier call to
412            // `addr_from_alloc_id` for this allocation. So don't create a new `MiriAllocBytes` here, instead
413            // fetch the previously prepared bytes from `prepared_alloc_bytes`.
414            let mut global_state = this.machine.alloc_addresses.borrow_mut();
415            let mut prepared_alloc_bytes = global_state
416                .prepared_alloc_bytes
417                .remove(&id)
418                .unwrap_or_else(|| panic!("alloc bytes for {id:?} have not been prepared"));
419            // Sanity-check that the prepared allocation has the right size and alignment.
420            assert!(prepared_alloc_bytes.as_ptr().is_aligned_to(align.bytes_usize()));
421            assert_eq!(prepared_alloc_bytes.len(), bytes.len());
422            // Copy allocation contents into prepared memory.
423            prepared_alloc_bytes.copy_from_slice(bytes);
424            interp_ok(prepared_alloc_bytes)
425        } else {
426            let params = this.machine.get_default_alloc_params();
427            interp_ok(MiriAllocBytes::from_bytes(std::borrow::Cow::Borrowed(bytes), align, params))
428        }
429    }
430
431    /// When a pointer is used for a memory access, this computes where in which allocation the
432    /// access is going.
433    fn ptr_get_alloc(
434        &self,
435        ptr: interpret::Pointer<Provenance>,
436        size: i64,
437    ) -> Option<(AllocId, Size)> {
438        let this = self.eval_context_ref();
439
440        let (tag, addr) = ptr.into_raw_parts(); // addr is absolute (Miri provenance)
441
442        let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
443            alloc_id
444        } else {
445            // A wildcard pointer.
446            let only_exposed_allocations = true;
447            this.alloc_id_from_addr(addr.bytes(), size, only_exposed_allocations)?
448        };
449
450        // This cannot fail: since we already have a pointer with that provenance, adjust_alloc_root_pointer
451        // must have been called in the past, so we can just look up the address in the map.
452        let base_addr = *this.machine.alloc_addresses.borrow().base_addr.get(&alloc_id).unwrap();
453
454        // Wrapping "addr - base_addr"
455        let rel_offset = this.truncate_to_target_usize(addr.bytes().wrapping_sub(base_addr));
456        Some((alloc_id, Size::from_bytes(rel_offset)))
457    }
458
459    /// Return a list of all exposed allocations.
460    fn exposed_allocs(&self) -> Vec<AllocId> {
461        let this = self.eval_context_ref();
462        this.machine.alloc_addresses.borrow().exposed.iter().copied().collect()
463    }
464}
465
466impl<'tcx> MiriMachine<'tcx> {
467    pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) {
468        // In GenMC mode, we can't remove dead allocation info since such pointers can
469        // still be stored in atomics and we need this info to convert GenMC pointers to Miri pointers.
470        // `global_state.reuse` is also unused so we can just skip this entire function.
471        if self.data_race.as_genmc_ref().is_some() {
472            return;
473        }
474
475        let global_state = self.alloc_addresses.get_mut();
476        let rng = self.rng.get_mut();
477
478        // We can *not* remove this from `base_addr`, since the interpreter design requires that we
479        // be able to retrieve an AllocId + offset for any memory access *before* we check if the
480        // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
481        // access to determine the allocation ID and offset -- and there can still be pointers with
482        // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
483        // `None` only if the pointer truly has no provenance (this ensures consistent error
484        // messages).
485        // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
486        // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
487        // returns a dead allocation.
488        // To avoid a linear scan we first look up the address in `base_addr`, and then find it in
489        // `int_to_ptr_map`.
490        let addr = *global_state.base_addr.get(&dead_id).unwrap();
491        let pos =
492            global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
493        let removed = global_state.int_to_ptr_map.remove(pos);
494        assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing
495        // We can also remove it from `exposed`, since this allocation can anyway not be returned by
496        // `alloc_id_from_addr` any more.
497        global_state.exposed.remove(&dead_id);
498        // Also remember this address for future reuse.
499        let thread = self.threads.active_thread();
500        global_state.reuse.add_addr(rng, addr, size, align, kind, thread, || {
501            // We already excluded GenMC above. We cannot use `self.release_clock` as
502            // `self.alloc_addresses` is borrowed.
503            if let Some(data_race) = self.data_race.as_vclocks_ref() {
504                data_race.release_clock(&self.threads, |clock| clock.clone())
505            } else {
506                VClock::default()
507            }
508        })
509    }
510}