miri/alloc_addresses/mod.rs
1//! This module is responsible for managing the absolute addresses that allocations are located at,
2//! and for casting between pointers and integers based on those addresses.
3
4mod address_generator;
5mod reuse_pool;
6
7use std::cell::RefCell;
8
9use rustc_abi::{Align, Size};
10use rustc_data_structures::fx::{FxHashMap, FxHashSet};
11use rustc_middle::ty::TyCtxt;
12
13pub use self::address_generator::AddressGenerator;
14use self::reuse_pool::ReusePool;
15use crate::concurrency::VClock;
16use crate::diagnostics::SpanDedupDiagnostic;
17use crate::*;
18
19#[derive(Copy, Clone, Debug, PartialEq, Eq)]
20pub enum ProvenanceMode {
21 /// We support `expose_provenance`/`with_exposed_provenance` via "wildcard" provenance.
22 /// However, we warn on `with_exposed_provenance` to alert the user of the precision loss.
23 Default,
24 /// Like `Default`, but without the warning.
25 Permissive,
26 /// We error on `with_exposed_provenance`, ensuring no precision loss.
27 Strict,
28}
29
30pub type GlobalState = RefCell<GlobalStateInner>;
31
32#[derive(Debug)]
33pub struct GlobalStateInner {
34 /// This is used as a map between the address of each allocation and its `AllocId`. It is always
35 /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
36 /// from the base address, and we need to find the `AllocId` it belongs to. This is not the
37 /// *full* inverse of `base_addr`; dead allocations have been removed.
38 /// Note that in GenMC mode, dead allocations are *not* removed -- and also, addresses are never
39 /// reused. This lets us use the address as a cross-execution-stable identifier for an allocation.
40 int_to_ptr_map: Vec<(u64, AllocId)>,
41 /// The base address for each allocation. We cannot put that into
42 /// `AllocExtra` because function pointers also have a base address, and
43 /// they do not have an `AllocExtra`.
44 /// This is the inverse of `int_to_ptr_map`.
45 base_addr: FxHashMap<AllocId, u64>,
46 /// The set of exposed allocations. This cannot be put
47 /// into `AllocExtra` for the same reason as `base_addr`.
48 exposed: FxHashSet<AllocId>,
49 /// The provenance to use for int2ptr casts
50 provenance_mode: ProvenanceMode,
51 /// The generator for new addresses in a given range, and a pool for address reuse. This is
52 /// `None` if addresses are generated elsewhere (in native-lib mode or with GenMC).
53 address_generation: Option<(AddressGenerator, ReusePool)>,
54 /// Native-lib mode only: Temporarily store prepared memory space for global allocations the
55 /// first time their memory address is required. This is used to ensure that the memory is
56 /// allocated before Miri assigns it an internal address, which is important for matching the
57 /// internal address to the machine address so FFI can read from pointers.
58 prepared_alloc_bytes: Option<FxHashMap<AllocId, MiriAllocBytes>>,
59}
60
61impl VisitProvenance for GlobalStateInner {
62 fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
63 let GlobalStateInner {
64 int_to_ptr_map: _,
65 base_addr: _,
66 prepared_alloc_bytes: _,
67 exposed: _,
68 address_generation: _,
69 provenance_mode: _,
70 } = self;
71 // Though base_addr, int_to_ptr_map, and exposed contain AllocIds, we do not want to visit them.
72 // int_to_ptr_map and exposed must contain only live allocations, and those
73 // are never garbage collected.
74 // base_addr is only relevant if we have a pointer to an AllocId and need to look up its
75 // base address; so if an AllocId is not reachable from somewhere else we can remove it
76 // here.
77 }
78}
79
80impl GlobalStateInner {
81 pub fn new<'tcx>(config: &MiriConfig, stack_addr: u64, tcx: TyCtxt<'tcx>) -> Self {
82 GlobalStateInner {
83 int_to_ptr_map: Vec::default(),
84 base_addr: FxHashMap::default(),
85 exposed: FxHashSet::default(),
86 provenance_mode: config.provenance_mode,
87 address_generation: (config.native_lib.is_empty() && config.genmc_config.is_none())
88 .then(|| {
89 (
90 AddressGenerator::new(stack_addr..tcx.target_usize_max()),
91 ReusePool::new(config),
92 )
93 }),
94 prepared_alloc_bytes: (!config.native_lib.is_empty()).then(FxHashMap::default),
95 }
96 }
97
98 pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_>) {
99 // `exposed` and `int_to_ptr_map` are cleared immediately when an allocation
100 // is freed, so `base_addr` is the only one we have to clean up based on the GC.
101 self.base_addr.retain(|id, _| allocs.is_live(*id));
102 }
103}
104
105impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
106trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
107 fn addr_from_alloc_id_uncached(
108 &self,
109 global_state: &mut GlobalStateInner,
110 alloc_id: AllocId,
111 memory_kind: MemoryKind,
112 ) -> InterpResult<'tcx, u64> {
113 let this = self.eval_context_ref();
114 let info = this.get_alloc_info(alloc_id);
115
116 // This is either called immediately after allocation (and then cached), or when
117 // adjusting `tcx` pointers (which never get freed). So assert that we are looking
118 // at a live allocation. This also ensures that we never re-assign an address to an
119 // allocation that previously had an address, but then was freed and the address
120 // information was removed.
121 assert!(!matches!(info.kind, AllocKind::Dead));
122
123 // TypeId allocations always have a "base address" of 0 (i.e., the relative offset is the
124 // hash fragment and therefore equal to the actual integer value).
125 if matches!(info.kind, AllocKind::TypeId) {
126 return interp_ok(0);
127 }
128
129 // Miri's address assignment leaks state across thread boundaries, which is incompatible
130 // with GenMC execution. So we instead let GenMC assign addresses to allocations.
131 if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
132 let addr =
133 genmc_ctx.handle_alloc(this, alloc_id, info.size, info.align, memory_kind)?;
134 return interp_ok(addr);
135 }
136
137 // This allocation does not have a base address yet, pick or reuse one.
138 if !this.machine.native_lib.is_empty() {
139 // In native lib mode, we use the "real" address of the bytes for this allocation.
140 // This ensures the interpreted program and native code have the same view of memory.
141 let params = this.machine.get_default_alloc_params();
142 let base_ptr = match info.kind {
143 AllocKind::LiveData => {
144 if memory_kind == MiriMemoryKind::Global.into() {
145 // For new global allocations, we always pre-allocate the memory to be able use the machine address directly.
146 let prepared_bytes = MiriAllocBytes::zeroed(info.size, info.align, params)
147 .unwrap_or_else(|| {
148 panic!("Miri ran out of memory: cannot create allocation of {size:?} bytes", size = info.size)
149 });
150 let ptr = prepared_bytes.as_ptr();
151 // Store prepared allocation to be picked up for use later.
152 global_state
153 .prepared_alloc_bytes
154 .as_mut()
155 .unwrap()
156 .try_insert(alloc_id, prepared_bytes)
157 .unwrap();
158 ptr
159 } else {
160 // Non-global allocations are already in memory at this point so
161 // we can just get a pointer to where their data is stored.
162 this.get_alloc_bytes_unchecked_raw(alloc_id)?
163 }
164 }
165 AllocKind::Function | AllocKind::VTable => {
166 // Allocate some dummy memory to get a unique address for this function/vtable.
167 let alloc_bytes = MiriAllocBytes::from_bytes(
168 &[0u8; 1],
169 Align::from_bytes(1).unwrap(),
170 params,
171 );
172 let ptr = alloc_bytes.as_ptr();
173 // Leak the underlying memory to ensure it remains unique.
174 std::mem::forget(alloc_bytes);
175 ptr
176 }
177 AllocKind::TypeId | AllocKind::Dead => unreachable!(),
178 };
179 // We don't have to expose this pointer yet, we do that in `prepare_for_native_call`.
180 return interp_ok(base_ptr.addr().to_u64());
181 }
182 // We are not in native lib or genmc mode, so we control the addresses ourselves.
183 let (addr_gen, reuse) = global_state.address_generation.as_mut().unwrap();
184 let mut rng = this.machine.rng.borrow_mut();
185 if let Some((reuse_addr, clock)) =
186 reuse.take_addr(&mut *rng, info.size, info.align, memory_kind, this.active_thread())
187 {
188 if let Some(clock) = clock {
189 this.acquire_clock(&clock)?;
190 }
191 interp_ok(reuse_addr)
192 } else {
193 // We have to pick a fresh address.
194 let new_addr = addr_gen.generate(info.size, info.align, &mut rng)?;
195
196 // If we filled up more than half the address space, start aggressively reusing
197 // addresses to avoid running out.
198 let remaining_range = addr_gen.get_remaining();
199 if remaining_range.start > remaining_range.end / 2 {
200 reuse.address_space_shortage();
201 }
202
203 interp_ok(new_addr)
204 }
205 }
206}
207
208impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
209pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
210 // Returns the `AllocId` that corresponds to the specified addr,
211 // or `None` if the addr is out of bounds.
212 fn alloc_id_from_addr(&self, addr: u64, size: i64) -> Option<AllocId> {
213 let this = self.eval_context_ref();
214 let global_state = this.machine.alloc_addresses.borrow();
215 assert!(global_state.provenance_mode != ProvenanceMode::Strict);
216
217 // We always search the allocation to the right of this address. So if the size is strictly
218 // negative, we have to search for `addr-1` instead.
219 let addr = if size >= 0 { addr } else { addr.saturating_sub(1) };
220 let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr);
221
222 // Determine the in-bounds provenance for this pointer.
223 let alloc_id = match pos {
224 Ok(pos) => Some(global_state.int_to_ptr_map[pos].1),
225 Err(0) => None,
226 Err(pos) => {
227 // This is the largest of the addresses smaller than `int`,
228 // i.e. the greatest lower bound (glb)
229 let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
230 // This never overflows because `addr >= glb`
231 let offset = addr - glb;
232 // We require this to be strict in-bounds of the allocation. This arm is only
233 // entered for addresses that are not the base address, so even zero-sized
234 // allocations will get recognized at their base address -- but all other
235 // allocations will *not* be recognized at their "end" address.
236 let size = this.get_alloc_info(alloc_id).size;
237 if offset < size.bytes() { Some(alloc_id) } else { None }
238 }
239 }?;
240
241 // We only use this provenance if it has been exposed.
242 if global_state.exposed.contains(&alloc_id) {
243 // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
244 debug_assert!(this.is_alloc_live(alloc_id));
245 Some(alloc_id)
246 } else {
247 None
248 }
249 }
250
251 /// Returns the base address of an allocation, or an error if no base address could be found
252 ///
253 /// # Panics
254 /// If `memory_kind = None` and the `alloc_id` is not cached, meaning that the first call to this function per `alloc_id` must get the `memory_kind`.
255 fn addr_from_alloc_id(
256 &self,
257 alloc_id: AllocId,
258 memory_kind: Option<MemoryKind>,
259 ) -> InterpResult<'tcx, u64> {
260 let this = self.eval_context_ref();
261 let mut global_state = this.machine.alloc_addresses.borrow_mut();
262 let global_state = &mut *global_state;
263
264 match global_state.base_addr.get(&alloc_id) {
265 Some(&addr) => interp_ok(addr),
266 None => {
267 // First time we're looking for the absolute address of this allocation.
268 let memory_kind =
269 memory_kind.expect("memory_kind is required since alloc_id is not cached");
270 let base_addr =
271 this.addr_from_alloc_id_uncached(global_state, alloc_id, memory_kind)?;
272 trace!("Assigning base address {:#x} to allocation {:?}", base_addr, alloc_id);
273
274 // Store address in cache.
275 global_state.base_addr.try_insert(alloc_id, base_addr).unwrap();
276
277 // Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it
278 // sorted. We have a fast-path for the common case that this address is bigger than
279 // all previous ones. We skip this for allocations at address 0; those can't be
280 // real, they must be TypeId "fake allocations".
281 if base_addr != 0 {
282 let pos = if global_state
283 .int_to_ptr_map
284 .last()
285 .is_some_and(|(last_addr, _)| *last_addr < base_addr)
286 {
287 global_state.int_to_ptr_map.len()
288 } else {
289 global_state
290 .int_to_ptr_map
291 .binary_search_by_key(&base_addr, |(addr, _)| *addr)
292 .unwrap_err()
293 };
294 global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id));
295 }
296
297 interp_ok(base_addr)
298 }
299 }
300 }
301
302 fn expose_provenance(&self, provenance: Provenance) -> InterpResult<'tcx> {
303 let this = self.eval_context_ref();
304 let mut global_state = this.machine.alloc_addresses.borrow_mut();
305
306 let (alloc_id, tag) = match provenance {
307 Provenance::Concrete { alloc_id, tag } => (alloc_id, tag),
308 Provenance::Wildcard => {
309 // No need to do anything for wildcard pointers as
310 // their provenances have already been previously exposed.
311 return interp_ok(());
312 }
313 };
314
315 // In strict mode, we don't need this, so we can save some cycles by not tracking it.
316 if global_state.provenance_mode == ProvenanceMode::Strict {
317 return interp_ok(());
318 }
319 // Exposing a dead alloc is a no-op, because it's not possible to get a dead allocation
320 // via int2ptr.
321 if !this.is_alloc_live(alloc_id) {
322 return interp_ok(());
323 }
324 trace!("Exposing allocation id {alloc_id:?}");
325 global_state.exposed.insert(alloc_id);
326 // Release the global state before we call `expose_tag`, which may call `get_alloc_info_extra`,
327 // which may need access to the global state.
328 drop(global_state);
329 if this.machine.borrow_tracker.is_some() {
330 this.expose_tag(alloc_id, tag)?;
331 }
332 interp_ok(())
333 }
334
335 fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer> {
336 trace!("Casting {:#x} to a pointer", addr);
337
338 let this = self.eval_context_ref();
339 let global_state = this.machine.alloc_addresses.borrow();
340
341 // Potentially emit a warning.
342 match global_state.provenance_mode {
343 ProvenanceMode::Default => {
344 // The first time this happens at a particular location, print a warning.
345 static DEDUP: SpanDedupDiagnostic = SpanDedupDiagnostic::new();
346 this.dedup_diagnostic(&DEDUP, |first| {
347 NonHaltingDiagnostic::Int2Ptr { details: first }
348 });
349 }
350 ProvenanceMode::Strict => {
351 throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
352 }
353 ProvenanceMode::Permissive => {}
354 }
355
356 // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
357 // completely legal to do a cast and then `wrapping_offset` to another allocation and only
358 // *then* do a memory access. So the allocation that the pointer happens to point to on a
359 // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
360 // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
361 // allocation it might be referencing.
362 interp_ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
363 }
364
365 /// Convert a relative (tcx) pointer to a Miri pointer.
366 fn adjust_alloc_root_pointer(
367 &self,
368 ptr: interpret::Pointer<CtfeProvenance>,
369 tag: BorTag,
370 kind: MemoryKind,
371 ) -> InterpResult<'tcx, interpret::Pointer<Provenance>> {
372 let this = self.eval_context_ref();
373
374 let (prov, offset) = ptr.prov_and_relative_offset();
375 let alloc_id = prov.alloc_id();
376
377 // Get a pointer to the beginning of this allocation.
378 let base_addr = this.addr_from_alloc_id(alloc_id, Some(kind))?;
379 let base_ptr = interpret::Pointer::new(
380 Provenance::Concrete { alloc_id, tag },
381 Size::from_bytes(base_addr),
382 );
383 // Add offset with the right kind of pointer-overflowing arithmetic.
384 interp_ok(base_ptr.wrapping_offset(offset, this))
385 }
386
387 // This returns some prepared `MiriAllocBytes`, either because `addr_from_alloc_id` reserved
388 // memory space in the past, or by doing the pre-allocation right upon being called.
389 fn get_global_alloc_bytes(
390 &self,
391 id: AllocId,
392 bytes: &[u8],
393 align: Align,
394 ) -> InterpResult<'tcx, MiriAllocBytes> {
395 let this = self.eval_context_ref();
396 assert!(this.tcx.try_get_global_alloc(id).is_some());
397 if !this.machine.native_lib.is_empty() {
398 // In native lib mode, MiriAllocBytes for global allocations are handled via `prepared_alloc_bytes`.
399 // This additional call ensures that some `MiriAllocBytes` are always prepared, just in case
400 // this function gets called before the first time `addr_from_alloc_id` gets called.
401 this.addr_from_alloc_id(id, Some(MiriMemoryKind::Global.into()))?;
402 // The memory we need here will have already been allocated during an earlier call to
403 // `addr_from_alloc_id` for this allocation. So don't create a new `MiriAllocBytes` here, instead
404 // fetch the previously prepared bytes from `prepared_alloc_bytes`.
405 let mut global_state = this.machine.alloc_addresses.borrow_mut();
406 let mut prepared_alloc_bytes = global_state
407 .prepared_alloc_bytes
408 .as_mut()
409 .unwrap()
410 .remove(&id)
411 .unwrap_or_else(|| panic!("alloc bytes for {id:?} have not been prepared"));
412 // Sanity-check that the prepared allocation has the right size and alignment.
413 assert!(prepared_alloc_bytes.as_ptr().is_aligned_to(align.bytes_usize()));
414 assert_eq!(prepared_alloc_bytes.len(), bytes.len());
415 // Copy allocation contents into prepared memory.
416 prepared_alloc_bytes.copy_from_slice(bytes);
417 interp_ok(prepared_alloc_bytes)
418 } else {
419 let params = this.machine.get_default_alloc_params();
420 interp_ok(MiriAllocBytes::from_bytes(std::borrow::Cow::Borrowed(bytes), align, params))
421 }
422 }
423
424 /// When a pointer is used for a memory access, this computes where in which allocation the
425 /// access is going.
426 fn ptr_get_alloc(
427 &self,
428 ptr: interpret::Pointer<Provenance>,
429 size: i64,
430 ) -> Option<(AllocId, Size)> {
431 let this = self.eval_context_ref();
432
433 let (tag, addr) = ptr.into_raw_parts(); // addr is absolute (Miri provenance)
434
435 let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
436 alloc_id
437 } else {
438 // A wildcard pointer.
439 this.alloc_id_from_addr(addr.bytes(), size)?
440 };
441
442 // This cannot fail: since we already have a pointer with that provenance, adjust_alloc_root_pointer
443 // must have been called in the past, so we can just look up the address in the map.
444 let base_addr = *this.machine.alloc_addresses.borrow().base_addr.get(&alloc_id).unwrap();
445
446 // Wrapping "addr - base_addr"
447 let rel_offset = this.truncate_to_target_usize(addr.bytes().wrapping_sub(base_addr));
448 Some((alloc_id, Size::from_bytes(rel_offset)))
449 }
450
451 /// Return a list of all exposed allocations.
452 fn exposed_allocs(&self) -> Vec<AllocId> {
453 let this = self.eval_context_ref();
454 this.machine.alloc_addresses.borrow().exposed.iter().copied().collect()
455 }
456}
457
458impl<'tcx> MiriMachine<'tcx> {
459 pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) {
460 let global_state = self.alloc_addresses.get_mut();
461 let rng = self.rng.get_mut();
462
463 // We can *not* remove this from `base_addr`, since the interpreter design requires that we
464 // be able to retrieve an AllocId + offset for any memory access *before* we check if the
465 // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
466 // access to determine the allocation ID and offset -- and there can still be pointers with
467 // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
468 // `None` only if the pointer truly has no provenance (this ensures consistent error
469 // messages).
470 // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
471 // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
472 // returns a dead allocation.
473 // To avoid a linear scan we first look up the address in `base_addr`, and then find it in
474 // `int_to_ptr_map`.
475 let addr = *global_state.base_addr.get(&dead_id).unwrap();
476 let pos =
477 global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
478 let removed = global_state.int_to_ptr_map.remove(pos);
479 assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing
480 // We can also remove it from `exposed`, since this allocation can anyway not be returned by
481 // `alloc_id_from_addr` any more.
482 global_state.exposed.remove(&dead_id);
483 // Also remember this address for future reuse.
484 if let Some((_addr_gen, reuse)) = global_state.address_generation.as_mut() {
485 let thread = self.threads.active_thread();
486 reuse.add_addr(rng, addr, size, align, kind, thread, || {
487 // We cannot be in GenMC mode as then `address_generation` is `None`. We cannot use
488 // `self.release_clock` as `self.alloc_addresses` is borrowed.
489 if let Some(data_race) = self.data_race.as_vclocks_ref() {
490 data_race.release_clock(&self.threads, |clock| clock.clone())
491 } else {
492 VClock::default()
493 }
494 })
495 }
496 }
497}