miri/alloc_addresses/mod.rs
1//! This module is responsible for managing the absolute addresses that allocations are located at,
2//! and for casting between pointers and integers based on those addresses.
3
4mod reuse_pool;
5
6use std::cell::RefCell;
7use std::cmp::max;
8
9use rand::Rng;
10use rustc_abi::{Align, Size};
11use rustc_data_structures::fx::{FxHashMap, FxHashSet};
12
13use self::reuse_pool::ReusePool;
14use crate::concurrency::VClock;
15use crate::*;
16
17#[derive(Copy, Clone, Debug, PartialEq, Eq)]
18pub enum ProvenanceMode {
19 /// We support `expose_provenance`/`with_exposed_provenance` via "wildcard" provenance.
20 /// However, we warn on `with_exposed_provenance` to alert the user of the precision loss.
21 Default,
22 /// Like `Default`, but without the warning.
23 Permissive,
24 /// We error on `with_exposed_provenance`, ensuring no precision loss.
25 Strict,
26}
27
28pub type GlobalState = RefCell<GlobalStateInner>;
29
30#[derive(Debug)]
31pub struct GlobalStateInner {
32 /// This is used as a map between the address of each allocation and its `AllocId`. It is always
33 /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
34 /// from the base address, and we need to find the `AllocId` it belongs to. This is not the
35 /// *full* inverse of `base_addr`; dead allocations have been removed.
36 int_to_ptr_map: Vec<(u64, AllocId)>,
37 /// The base address for each allocation. We cannot put that into
38 /// `AllocExtra` because function pointers also have a base address, and
39 /// they do not have an `AllocExtra`.
40 /// This is the inverse of `int_to_ptr_map`.
41 base_addr: FxHashMap<AllocId, u64>,
42 /// Temporarily store prepared memory space for global allocations the first time their memory
43 /// address is required. This is used to ensure that the memory is allocated before Miri assigns
44 /// it an internal address, which is important for matching the internal address to the machine
45 /// address so FFI can read from pointers.
46 prepared_alloc_bytes: FxHashMap<AllocId, MiriAllocBytes>,
47 /// A pool of addresses we can reuse for future allocations.
48 reuse: ReusePool,
49 /// Whether an allocation has been exposed or not. This cannot be put
50 /// into `AllocExtra` for the same reason as `base_addr`.
51 exposed: FxHashSet<AllocId>,
52 /// This is used as a memory address when a new pointer is casted to an integer. It
53 /// is always larger than any address that was previously made part of a block.
54 next_base_addr: u64,
55 /// The provenance to use for int2ptr casts
56 provenance_mode: ProvenanceMode,
57}
58
59impl VisitProvenance for GlobalStateInner {
60 fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
61 let GlobalStateInner {
62 int_to_ptr_map: _,
63 base_addr: _,
64 prepared_alloc_bytes: _,
65 reuse: _,
66 exposed: _,
67 next_base_addr: _,
68 provenance_mode: _,
69 } = self;
70 // Though base_addr, int_to_ptr_map, and exposed contain AllocIds, we do not want to visit them.
71 // int_to_ptr_map and exposed must contain only live allocations, and those
72 // are never garbage collected.
73 // base_addr is only relevant if we have a pointer to an AllocId and need to look up its
74 // base address; so if an AllocId is not reachable from somewhere else we can remove it
75 // here.
76 }
77}
78
79impl GlobalStateInner {
80 pub fn new(config: &MiriConfig, stack_addr: u64) -> Self {
81 GlobalStateInner {
82 int_to_ptr_map: Vec::default(),
83 base_addr: FxHashMap::default(),
84 prepared_alloc_bytes: FxHashMap::default(),
85 reuse: ReusePool::new(config),
86 exposed: FxHashSet::default(),
87 next_base_addr: stack_addr,
88 provenance_mode: config.provenance_mode,
89 }
90 }
91
92 pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_>) {
93 // `exposed` and `int_to_ptr_map` are cleared immediately when an allocation
94 // is freed, so `base_addr` is the only one we have to clean up based on the GC.
95 self.base_addr.retain(|id, _| allocs.is_live(*id));
96 }
97}
98
99/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
100/// of `align` that is larger or equal to `addr`
101fn align_addr(addr: u64, align: u64) -> u64 {
102 match addr % align {
103 0 => addr,
104 rem => addr.strict_add(align) - rem,
105 }
106}
107
108impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
109trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
110 fn addr_from_alloc_id_uncached(
111 &self,
112 global_state: &mut GlobalStateInner,
113 alloc_id: AllocId,
114 memory_kind: MemoryKind,
115 ) -> InterpResult<'tcx, u64> {
116 let this = self.eval_context_ref();
117 let info = this.get_alloc_info(alloc_id);
118
119 // Miri's address assignment leaks state across thread boundaries, which is incompatible
120 // with GenMC execution. So we instead let GenMC assign addresses to allocations.
121 if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
122 let addr = genmc_ctx.handle_alloc(&this.machine, info.size, info.align, memory_kind)?;
123 return interp_ok(addr);
124 }
125
126 let mut rng = this.machine.rng.borrow_mut();
127 // This is either called immediately after allocation (and then cached), or when
128 // adjusting `tcx` pointers (which never get freed). So assert that we are looking
129 // at a live allocation. This also ensures that we never re-assign an address to an
130 // allocation that previously had an address, but then was freed and the address
131 // information was removed.
132 assert!(!matches!(info.kind, AllocKind::Dead));
133
134 // This allocation does not have a base address yet, pick or reuse one.
135 if !this.machine.native_lib.is_empty() {
136 // In native lib mode, we use the "real" address of the bytes for this allocation.
137 // This ensures the interpreted program and native code have the same view of memory.
138 let params = this.machine.get_default_alloc_params();
139 let base_ptr = match info.kind {
140 AllocKind::LiveData => {
141 if memory_kind == MiriMemoryKind::Global.into() {
142 // For new global allocations, we always pre-allocate the memory to be able use the machine address directly.
143 let prepared_bytes = MiriAllocBytes::zeroed(info.size, info.align, params)
144 .unwrap_or_else(|| {
145 panic!("Miri ran out of memory: cannot create allocation of {size:?} bytes", size = info.size)
146 });
147 let ptr = prepared_bytes.as_ptr();
148 // Store prepared allocation to be picked up for use later.
149 global_state
150 .prepared_alloc_bytes
151 .try_insert(alloc_id, prepared_bytes)
152 .unwrap();
153 ptr
154 } else {
155 // Non-global allocations are already in memory at this point so
156 // we can just get a pointer to where their data is stored.
157 this.get_alloc_bytes_unchecked_raw(alloc_id)?
158 }
159 }
160 AllocKind::Function | AllocKind::VTable => {
161 // Allocate some dummy memory to get a unique address for this function/vtable.
162 let alloc_bytes = MiriAllocBytes::from_bytes(
163 &[0u8; 1],
164 Align::from_bytes(1).unwrap(),
165 params,
166 );
167 let ptr = alloc_bytes.as_ptr();
168 // Leak the underlying memory to ensure it remains unique.
169 std::mem::forget(alloc_bytes);
170 ptr
171 }
172 AllocKind::Dead => unreachable!(),
173 };
174 // We don't have to expose this pointer yet, we do that in `prepare_for_native_call`.
175 return interp_ok(base_ptr.addr().to_u64());
176 }
177 // We are not in native lib mode, so we control the addresses ourselves.
178 if let Some((reuse_addr, clock)) = global_state.reuse.take_addr(
179 &mut *rng,
180 info.size,
181 info.align,
182 memory_kind,
183 this.active_thread(),
184 ) {
185 if let Some(clock) = clock {
186 this.acquire_clock(&clock);
187 }
188 interp_ok(reuse_addr)
189 } else {
190 // We have to pick a fresh address.
191 // Leave some space to the previous allocation, to give it some chance to be less aligned.
192 // We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
193 let slack = rng.random_range(0..16);
194 // From next_base_addr + slack, round up to adjust for alignment.
195 let base_addr = global_state
196 .next_base_addr
197 .checked_add(slack)
198 .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
199 let base_addr = align_addr(base_addr, info.align.bytes());
200
201 // Remember next base address. If this allocation is zero-sized, leave a gap of at
202 // least 1 to avoid two allocations having the same base address. (The logic in
203 // `alloc_id_from_addr` assumes unique addresses, and different function/vtable pointers
204 // need to be distinguishable!)
205 global_state.next_base_addr = base_addr
206 .checked_add(max(info.size.bytes(), 1))
207 .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
208 // Even if `Size` didn't overflow, we might still have filled up the address space.
209 if global_state.next_base_addr > this.target_usize_max() {
210 throw_exhaust!(AddressSpaceFull);
211 }
212 // If we filled up more than half the address space, start aggressively reusing
213 // addresses to avoid running out.
214 if global_state.next_base_addr > u64::try_from(this.target_isize_max()).unwrap() {
215 global_state.reuse.address_space_shortage();
216 }
217
218 interp_ok(base_addr)
219 }
220 }
221}
222
223impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
224pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
225 // Returns the `AllocId` that corresponds to the specified addr,
226 // or `None` if the addr is out of bounds.
227 // Setting `only_exposed_allocations` selects whether only exposed allocations are considered.
228 fn alloc_id_from_addr(
229 &self,
230 addr: u64,
231 size: i64,
232 only_exposed_allocations: bool,
233 ) -> Option<AllocId> {
234 let this = self.eval_context_ref();
235 let global_state = this.machine.alloc_addresses.borrow();
236 assert!(global_state.provenance_mode != ProvenanceMode::Strict);
237
238 // We always search the allocation to the right of this address. So if the size is strictly
239 // negative, we have to search for `addr-1` instead.
240 let addr = if size >= 0 { addr } else { addr.saturating_sub(1) };
241 let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr);
242
243 // Determine the in-bounds provenance for this pointer.
244 let alloc_id = match pos {
245 Ok(pos) => Some(global_state.int_to_ptr_map[pos].1),
246 Err(0) => None,
247 Err(pos) => {
248 // This is the largest of the addresses smaller than `int`,
249 // i.e. the greatest lower bound (glb)
250 let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
251 // This never overflows because `addr >= glb`
252 let offset = addr - glb;
253 // We require this to be strict in-bounds of the allocation. This arm is only
254 // entered for addresses that are not the base address, so even zero-sized
255 // allocations will get recognized at their base address -- but all other
256 // allocations will *not* be recognized at their "end" address.
257 let size = this.get_alloc_info(alloc_id).size;
258 if offset < size.bytes() { Some(alloc_id) } else { None }
259 }
260 }?;
261
262 // We only use this provenance if it has been exposed, or if the caller requested also non-exposed allocations
263 if !only_exposed_allocations || global_state.exposed.contains(&alloc_id) {
264 // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
265 debug_assert!(this.is_alloc_live(alloc_id));
266 Some(alloc_id)
267 } else {
268 None
269 }
270 }
271
272 /// Returns the base address of an allocation, or an error if no base address could be found
273 ///
274 /// # Panics
275 /// If `memory_kind = None` and the `alloc_id` is not cached, meaning that the first call to this function per `alloc_id` must get the `memory_kind`.
276 fn addr_from_alloc_id(
277 &self,
278 alloc_id: AllocId,
279 memory_kind: Option<MemoryKind>,
280 ) -> InterpResult<'tcx, u64> {
281 let this = self.eval_context_ref();
282 let mut global_state = this.machine.alloc_addresses.borrow_mut();
283 let global_state = &mut *global_state;
284
285 match global_state.base_addr.get(&alloc_id) {
286 Some(&addr) => interp_ok(addr),
287 None => {
288 // First time we're looking for the absolute address of this allocation.
289 let memory_kind =
290 memory_kind.expect("memory_kind is required since alloc_id is not cached");
291 let base_addr =
292 this.addr_from_alloc_id_uncached(global_state, alloc_id, memory_kind)?;
293 trace!("Assigning base address {:#x} to allocation {:?}", base_addr, alloc_id);
294
295 // Store address in cache.
296 global_state.base_addr.try_insert(alloc_id, base_addr).unwrap();
297
298 // Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it sorted.
299 // We have a fast-path for the common case that this address is bigger than all previous ones.
300 let pos = if global_state
301 .int_to_ptr_map
302 .last()
303 .is_some_and(|(last_addr, _)| *last_addr < base_addr)
304 {
305 global_state.int_to_ptr_map.len()
306 } else {
307 global_state
308 .int_to_ptr_map
309 .binary_search_by_key(&base_addr, |(addr, _)| *addr)
310 .unwrap_err()
311 };
312 global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id));
313
314 interp_ok(base_addr)
315 }
316 }
317 }
318
319 fn expose_provenance(&self, provenance: Provenance) -> InterpResult<'tcx> {
320 let this = self.eval_context_ref();
321 let mut global_state = this.machine.alloc_addresses.borrow_mut();
322
323 let (alloc_id, tag) = match provenance {
324 Provenance::Concrete { alloc_id, tag } => (alloc_id, tag),
325 Provenance::Wildcard => {
326 // No need to do anything for wildcard pointers as
327 // their provenances have already been previously exposed.
328 return interp_ok(());
329 }
330 };
331
332 // In strict mode, we don't need this, so we can save some cycles by not tracking it.
333 if global_state.provenance_mode == ProvenanceMode::Strict {
334 return interp_ok(());
335 }
336 // Exposing a dead alloc is a no-op, because it's not possible to get a dead allocation
337 // via int2ptr.
338 if !this.is_alloc_live(alloc_id) {
339 return interp_ok(());
340 }
341 trace!("Exposing allocation id {alloc_id:?}");
342 global_state.exposed.insert(alloc_id);
343 // Release the global state before we call `expose_tag`, which may call `get_alloc_info_extra`,
344 // which may need access to the global state.
345 drop(global_state);
346 if this.machine.borrow_tracker.is_some() {
347 this.expose_tag(alloc_id, tag)?;
348 }
349 interp_ok(())
350 }
351
352 fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer> {
353 trace!("Casting {:#x} to a pointer", addr);
354
355 let this = self.eval_context_ref();
356 let global_state = this.machine.alloc_addresses.borrow();
357
358 // Potentially emit a warning.
359 match global_state.provenance_mode {
360 ProvenanceMode::Default => {
361 // The first time this happens at a particular location, print a warning.
362 let mut int2ptr_warned = this.machine.int2ptr_warned.borrow_mut();
363 let first = int2ptr_warned.is_empty();
364 if int2ptr_warned.insert(this.cur_span()) {
365 // Newly inserted, so first time we see this span.
366 this.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
367 }
368 }
369 ProvenanceMode::Strict => {
370 throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
371 }
372 ProvenanceMode::Permissive => {}
373 }
374
375 // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
376 // completely legal to do a cast and then `wrapping_offset` to another allocation and only
377 // *then* do a memory access. So the allocation that the pointer happens to point to on a
378 // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
379 // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
380 // allocation it might be referencing.
381 interp_ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
382 }
383
384 /// Convert a relative (tcx) pointer to a Miri pointer.
385 fn adjust_alloc_root_pointer(
386 &self,
387 ptr: interpret::Pointer<CtfeProvenance>,
388 tag: BorTag,
389 kind: MemoryKind,
390 ) -> InterpResult<'tcx, interpret::Pointer<Provenance>> {
391 let this = self.eval_context_ref();
392
393 let (prov, offset) = ptr.into_parts(); // offset is relative (AllocId provenance)
394 let alloc_id = prov.alloc_id();
395
396 // Get a pointer to the beginning of this allocation.
397 let base_addr = this.addr_from_alloc_id(alloc_id, Some(kind))?;
398 let base_ptr = interpret::Pointer::new(
399 Provenance::Concrete { alloc_id, tag },
400 Size::from_bytes(base_addr),
401 );
402 // Add offset with the right kind of pointer-overflowing arithmetic.
403 interp_ok(base_ptr.wrapping_offset(offset, this))
404 }
405
406 // This returns some prepared `MiriAllocBytes`, either because `addr_from_alloc_id` reserved
407 // memory space in the past, or by doing the pre-allocation right upon being called.
408 fn get_global_alloc_bytes(
409 &self,
410 id: AllocId,
411 bytes: &[u8],
412 align: Align,
413 ) -> InterpResult<'tcx, MiriAllocBytes> {
414 let this = self.eval_context_ref();
415 assert!(this.tcx.try_get_global_alloc(id).is_some());
416 if !this.machine.native_lib.is_empty() {
417 // In native lib mode, MiriAllocBytes for global allocations are handled via `prepared_alloc_bytes`.
418 // This additional call ensures that some `MiriAllocBytes` are always prepared, just in case
419 // this function gets called before the first time `addr_from_alloc_id` gets called.
420 this.addr_from_alloc_id(id, Some(MiriMemoryKind::Global.into()))?;
421 // The memory we need here will have already been allocated during an earlier call to
422 // `addr_from_alloc_id` for this allocation. So don't create a new `MiriAllocBytes` here, instead
423 // fetch the previously prepared bytes from `prepared_alloc_bytes`.
424 let mut global_state = this.machine.alloc_addresses.borrow_mut();
425 let mut prepared_alloc_bytes = global_state
426 .prepared_alloc_bytes
427 .remove(&id)
428 .unwrap_or_else(|| panic!("alloc bytes for {id:?} have not been prepared"));
429 // Sanity-check that the prepared allocation has the right size and alignment.
430 assert!(prepared_alloc_bytes.as_ptr().is_aligned_to(align.bytes_usize()));
431 assert_eq!(prepared_alloc_bytes.len(), bytes.len());
432 // Copy allocation contents into prepared memory.
433 prepared_alloc_bytes.copy_from_slice(bytes);
434 interp_ok(prepared_alloc_bytes)
435 } else {
436 let params = this.machine.get_default_alloc_params();
437 interp_ok(MiriAllocBytes::from_bytes(std::borrow::Cow::Borrowed(bytes), align, params))
438 }
439 }
440
441 /// When a pointer is used for a memory access, this computes where in which allocation the
442 /// access is going.
443 fn ptr_get_alloc(
444 &self,
445 ptr: interpret::Pointer<Provenance>,
446 size: i64,
447 ) -> Option<(AllocId, Size)> {
448 let this = self.eval_context_ref();
449
450 let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance)
451
452 let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
453 alloc_id
454 } else {
455 // A wildcard pointer.
456 let only_exposed_allocations = true;
457 this.alloc_id_from_addr(addr.bytes(), size, only_exposed_allocations)?
458 };
459
460 // This cannot fail: since we already have a pointer with that provenance, adjust_alloc_root_pointer
461 // must have been called in the past, so we can just look up the address in the map.
462 let base_addr = *this.machine.alloc_addresses.borrow().base_addr.get(&alloc_id).unwrap();
463
464 // Wrapping "addr - base_addr"
465 let rel_offset = this.truncate_to_target_usize(addr.bytes().wrapping_sub(base_addr));
466 Some((alloc_id, Size::from_bytes(rel_offset)))
467 }
468
469 /// Prepare all exposed memory for a native call.
470 /// This overapproximates the modifications which external code might make to memory:
471 /// We set all reachable allocations as initialized, mark all reachable provenances as exposed
472 /// and overwrite them with `Provenance::WILDCARD`.
473 fn prepare_exposed_for_native_call(&mut self) -> InterpResult<'tcx> {
474 let this = self.eval_context_mut();
475 // We need to make a deep copy of this list, but it's fine; it also serves as scratch space
476 // for the search within `prepare_for_native_call`.
477 let exposed: Vec<AllocId> =
478 this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
479 this.prepare_for_native_call(exposed)
480 }
481}
482
483impl<'tcx> MiriMachine<'tcx> {
484 pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) {
485 let global_state = self.alloc_addresses.get_mut();
486 let rng = self.rng.get_mut();
487
488 // We can *not* remove this from `base_addr`, since the interpreter design requires that we
489 // be able to retrieve an AllocId + offset for any memory access *before* we check if the
490 // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
491 // access to determine the allocation ID and offset -- and there can still be pointers with
492 // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
493 // `None` only if the pointer truly has no provenance (this ensures consistent error
494 // messages).
495 // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
496 // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
497 // returns a dead allocation.
498 // To avoid a linear scan we first look up the address in `base_addr`, and then find it in
499 // `int_to_ptr_map`.
500 let addr = *global_state.base_addr.get(&dead_id).unwrap();
501 let pos =
502 global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
503 let removed = global_state.int_to_ptr_map.remove(pos);
504 assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing
505 // We can also remove it from `exposed`, since this allocation can anyway not be returned by
506 // `alloc_id_from_addr` any more.
507 global_state.exposed.remove(&dead_id);
508 // Also remember this address for future reuse.
509 let thread = self.threads.active_thread();
510 global_state.reuse.add_addr(rng, addr, size, align, kind, thread, || {
511 if let Some(data_race) = self.data_race.as_vclocks_ref() {
512 data_race.release_clock(&self.threads, |clock| clock.clone())
513 } else {
514 VClock::default()
515 }
516 })
517 }
518}
519
520#[cfg(test)]
521mod tests {
522 use super::*;
523
524 #[test]
525 fn test_align_addr() {
526 assert_eq!(align_addr(37, 4), 40);
527 assert_eq!(align_addr(44, 4), 44);
528 }
529}