rustc_middle/mir/interpret/
allocation.rs

1//! The virtual memory representation of the MIR interpreter.
2
3mod init_mask;
4mod provenance_map;
5
6use std::borrow::Cow;
7use std::hash::Hash;
8use std::ops::{Deref, DerefMut, Range};
9use std::{fmt, hash, ptr};
10
11use either::{Left, Right};
12use init_mask::*;
13pub use init_mask::{InitChunk, InitChunkIter};
14use provenance_map::*;
15use rustc_abi::{Align, HasDataLayout, Size};
16use rustc_ast::Mutability;
17use rustc_data_structures::intern::Interned;
18use rustc_macros::HashStable;
19use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
20
21use super::{
22    AllocId, BadBytesAccess, CtfeProvenance, InterpErrorKind, InterpResult, Pointer,
23    PointerArithmetic, Provenance, ResourceExhaustionInfo, Scalar, ScalarSizeMismatch,
24    UndefinedBehaviorInfo, UnsupportedOpInfo, interp_ok, read_target_uint, write_target_uint,
25};
26use crate::ty;
27
28/// Functionality required for the bytes of an `Allocation`.
29pub trait AllocBytes: Clone + fmt::Debug + Deref<Target = [u8]> + DerefMut<Target = [u8]> {
30    /// The type of extra parameters passed in when creating an allocation.
31    /// Can be used by `interpret::Machine` instances to make runtime-configuration-dependent
32    /// decisions about the allocation strategy.
33    type AllocParams;
34
35    /// Create an `AllocBytes` from a slice of `u8`.
36    fn from_bytes<'a>(
37        slice: impl Into<Cow<'a, [u8]>>,
38        _align: Align,
39        _params: Self::AllocParams,
40    ) -> Self;
41
42    /// Create a zeroed `AllocBytes` of the specified size and alignment.
43    /// Returns `None` if we ran out of memory on the host.
44    fn zeroed(size: Size, _align: Align, _params: Self::AllocParams) -> Option<Self>;
45
46    /// Gives direct access to the raw underlying storage.
47    ///
48    /// Crucially this pointer is compatible with:
49    /// - other pointers returned by this method, and
50    /// - references returned from `deref()`, as long as there was no write.
51    fn as_mut_ptr(&mut self) -> *mut u8;
52
53    /// Gives direct access to the raw underlying storage.
54    ///
55    /// Crucially this pointer is compatible with:
56    /// - other pointers returned by this method, and
57    /// - references returned from `deref()`, as long as there was no write.
58    fn as_ptr(&self) -> *const u8;
59}
60
61/// Default `bytes` for `Allocation` is a `Box<u8>`.
62impl AllocBytes for Box<[u8]> {
63    type AllocParams = ();
64
65    fn from_bytes<'a>(slice: impl Into<Cow<'a, [u8]>>, _align: Align, _params: ()) -> Self {
66        Box::<[u8]>::from(slice.into())
67    }
68
69    fn zeroed(size: Size, _align: Align, _params: ()) -> Option<Self> {
70        let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes().try_into().ok()?).ok()?;
71        // SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]>
72        let bytes = unsafe { bytes.assume_init() };
73        Some(bytes)
74    }
75
76    fn as_mut_ptr(&mut self) -> *mut u8 {
77        Box::as_mut_ptr(self).cast()
78    }
79
80    fn as_ptr(&self) -> *const u8 {
81        Box::as_ptr(self).cast()
82    }
83}
84
85/// This type represents an Allocation in the Miri/CTFE core engine.
86///
87/// Its public API is rather low-level, working directly with allocation offsets and a custom error
88/// type to account for the lack of an AllocId on this level. The Miri/CTFE core engine `memory`
89/// module provides higher-level access.
90// Note: for performance reasons when interning, some of the `Allocation` fields can be partially
91// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
92#[derive(Clone, Eq, PartialEq)]
93#[derive(HashStable)]
94pub struct Allocation<Prov: Provenance = CtfeProvenance, Extra = (), Bytes = Box<[u8]>> {
95    /// The actual bytes of the allocation.
96    /// Note that the bytes of a pointer represent the offset of the pointer.
97    bytes: Bytes,
98    /// Maps from byte addresses to extra provenance data for each pointer.
99    /// Only the first byte of a pointer is inserted into the map; i.e.,
100    /// every entry in this map applies to `pointer_size` consecutive bytes starting
101    /// at the given offset.
102    provenance: ProvenanceMap<Prov>,
103    /// Denotes which part of this allocation is initialized.
104    ///
105    /// Invariant: the uninitialized parts have no provenance.
106    init_mask: InitMask,
107    /// The alignment of the allocation to detect unaligned reads.
108    /// (`Align` guarantees that this is a power of two.)
109    pub align: Align,
110    /// `true` if the allocation is mutable.
111    /// Also used by codegen to determine if a static should be put into mutable memory,
112    /// which happens for `static mut` and `static` with interior mutability.
113    pub mutability: Mutability,
114    /// Extra state for the machine.
115    pub extra: Extra,
116}
117
118/// Helper struct that packs an alignment, mutability, and "all bytes are zero" flag together.
119///
120/// Alignment values always have 2 free high bits, and we check for this in our [`Encodable`] impl.
121struct AllocFlags {
122    align: Align,
123    mutability: Mutability,
124    all_zero: bool,
125}
126
127impl<E: Encoder> Encodable<E> for AllocFlags {
128    fn encode(&self, encoder: &mut E) {
129        // Make sure Align::MAX can be stored with the high 2 bits unset.
130        const {
131            let max_supported_align_repr = u8::MAX >> 2;
132            let max_supported_align = 1 << max_supported_align_repr;
133            assert!(Align::MAX.bytes() <= max_supported_align)
134        }
135
136        let mut flags = self.align.bytes().trailing_zeros() as u8;
137        flags |= match self.mutability {
138            Mutability::Not => 0,
139            Mutability::Mut => 1 << 6,
140        };
141        flags |= (self.all_zero as u8) << 7;
142        flags.encode(encoder);
143    }
144}
145
146impl<D: Decoder> Decodable<D> for AllocFlags {
147    fn decode(decoder: &mut D) -> Self {
148        let flags: u8 = Decodable::decode(decoder);
149        let align = flags & 0b0011_1111;
150        let mutability = flags & 0b0100_0000;
151        let all_zero = flags & 0b1000_0000;
152
153        let align = Align::from_bytes(1 << align).unwrap();
154        let mutability = match mutability {
155            0 => Mutability::Not,
156            _ => Mutability::Mut,
157        };
158        let all_zero = all_zero > 0;
159
160        AllocFlags { align, mutability, all_zero }
161    }
162}
163
164/// Efficiently detect whether a slice of `u8` is all zero.
165///
166/// This is used in encoding of [`Allocation`] to special-case all-zero allocations. It is only
167/// optimized a little, because for many allocations the encoding of the actual bytes does not
168/// dominate runtime.
169#[inline]
170fn all_zero(buf: &[u8]) -> bool {
171    // In the empty case we wouldn't encode any contents even without this system where we
172    // special-case allocations whose contents are all 0. We can return anything in the empty case.
173    if buf.is_empty() {
174        return true;
175    }
176    // Just fast-rejecting based on the first element significantly reduces the amount that we end
177    // up walking the whole array.
178    if buf[0] != 0 {
179        return false;
180    }
181
182    // This strategy of combining all slice elements with & or | is unbeatable for the large
183    // all-zero case because it is so well-understood by autovectorization.
184    buf.iter().fold(true, |acc, b| acc & (*b == 0))
185}
186
187/// Custom encoder for [`Allocation`] to more efficiently represent the case where all bytes are 0.
188impl<Prov: Provenance, Extra, E: Encoder> Encodable<E> for Allocation<Prov, Extra, Box<[u8]>>
189where
190    ProvenanceMap<Prov>: Encodable<E>,
191    Extra: Encodable<E>,
192{
193    fn encode(&self, encoder: &mut E) {
194        let all_zero = all_zero(&self.bytes);
195        AllocFlags { align: self.align, mutability: self.mutability, all_zero }.encode(encoder);
196
197        encoder.emit_usize(self.bytes.len());
198        if !all_zero {
199            encoder.emit_raw_bytes(&self.bytes);
200        }
201        self.provenance.encode(encoder);
202        self.init_mask.encode(encoder);
203        self.extra.encode(encoder);
204    }
205}
206
207impl<Prov: Provenance, Extra, D: Decoder> Decodable<D> for Allocation<Prov, Extra, Box<[u8]>>
208where
209    ProvenanceMap<Prov>: Decodable<D>,
210    Extra: Decodable<D>,
211{
212    fn decode(decoder: &mut D) -> Self {
213        let AllocFlags { align, mutability, all_zero } = Decodable::decode(decoder);
214
215        let len = decoder.read_usize();
216        let bytes = if all_zero { vec![0u8; len] } else { decoder.read_raw_bytes(len).to_vec() };
217        let bytes = <Box<[u8]> as AllocBytes>::from_bytes(bytes, align, ());
218
219        let provenance = Decodable::decode(decoder);
220        let init_mask = Decodable::decode(decoder);
221        let extra = Decodable::decode(decoder);
222
223        Self { bytes, provenance, init_mask, align, mutability, extra }
224    }
225}
226
227/// This is the maximum size we will hash at a time, when interning an `Allocation` and its
228/// `InitMask`. Note, we hash that amount of bytes twice: at the start, and at the end of a buffer.
229/// Used when these two structures are large: we only partially hash the larger fields in that
230/// situation. See the comment at the top of their respective `Hash` impl for more details.
231const MAX_BYTES_TO_HASH: usize = 64;
232
233/// This is the maximum size (in bytes) for which a buffer will be fully hashed, when interning.
234/// Otherwise, it will be partially hashed in 2 slices, requiring at least 2 `MAX_BYTES_TO_HASH`
235/// bytes.
236const MAX_HASHED_BUFFER_LEN: usize = 2 * MAX_BYTES_TO_HASH;
237
238// Const allocations are only hashed for interning. However, they can be large, making the hashing
239// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially
240// big buffers like the actual bytes of allocation. We can partially hash some fields when they're
241// large.
242impl hash::Hash for Allocation {
243    fn hash<H: hash::Hasher>(&self, state: &mut H) {
244        let Self {
245            bytes,
246            provenance,
247            init_mask,
248            align,
249            mutability,
250            extra: (), // don't bother hashing ()
251        } = self;
252
253        // Partially hash the `bytes` buffer when it is large. To limit collisions with common
254        // prefixes and suffixes, we hash the length and some slices of the buffer.
255        let byte_count = bytes.len();
256        if byte_count > MAX_HASHED_BUFFER_LEN {
257            // Hash the buffer's length.
258            byte_count.hash(state);
259
260            // And its head and tail.
261            bytes[..MAX_BYTES_TO_HASH].hash(state);
262            bytes[byte_count - MAX_BYTES_TO_HASH..].hash(state);
263        } else {
264            bytes.hash(state);
265        }
266
267        // Hash the other fields as usual.
268        provenance.hash(state);
269        init_mask.hash(state);
270        align.hash(state);
271        mutability.hash(state);
272    }
273}
274
275/// Interned types generally have an `Outer` type and an `Inner` type, where
276/// `Outer` is a newtype around `Interned<Inner>`, and all the operations are
277/// done on `Outer`, because all occurrences are interned. E.g. `Ty` is an
278/// outer type and `TyKind` is its inner type.
279///
280/// Here things are different because only const allocations are interned. This
281/// means that both the inner type (`Allocation`) and the outer type
282/// (`ConstAllocation`) are used quite a bit.
283#[derive(Copy, Clone, PartialEq, Eq, Hash, HashStable)]
284#[rustc_pass_by_value]
285pub struct ConstAllocation<'tcx>(pub Interned<'tcx, Allocation>);
286
287impl<'tcx> fmt::Debug for ConstAllocation<'tcx> {
288    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
289        // The debug representation of this is very verbose and basically useless,
290        // so don't print it.
291        write!(f, "ConstAllocation {{ .. }}")
292    }
293}
294
295impl<'tcx> ConstAllocation<'tcx> {
296    pub fn inner(self) -> &'tcx Allocation {
297        self.0.0
298    }
299}
300
301/// We have our own error type that does not know about the `AllocId`; that information
302/// is added when converting to `InterpError`.
303#[derive(Debug)]
304pub enum AllocError {
305    /// A scalar had the wrong size.
306    ScalarSizeMismatch(ScalarSizeMismatch),
307    /// Encountered a pointer where we needed raw bytes.
308    ReadPointerAsInt(Option<BadBytesAccess>),
309    /// Partially copying a pointer.
310    ReadPartialPointer(Size),
311    /// Using uninitialized data where it is not allowed.
312    InvalidUninitBytes(Option<BadBytesAccess>),
313}
314pub type AllocResult<T = ()> = Result<T, AllocError>;
315
316impl From<ScalarSizeMismatch> for AllocError {
317    fn from(s: ScalarSizeMismatch) -> Self {
318        AllocError::ScalarSizeMismatch(s)
319    }
320}
321
322impl AllocError {
323    pub fn to_interp_error<'tcx>(self, alloc_id: AllocId) -> InterpErrorKind<'tcx> {
324        use AllocError::*;
325        match self {
326            ScalarSizeMismatch(s) => {
327                InterpErrorKind::UndefinedBehavior(UndefinedBehaviorInfo::ScalarSizeMismatch(s))
328            }
329            ReadPointerAsInt(info) => InterpErrorKind::Unsupported(
330                UnsupportedOpInfo::ReadPointerAsInt(info.map(|b| (alloc_id, b))),
331            ),
332            ReadPartialPointer(offset) => InterpErrorKind::Unsupported(
333                UnsupportedOpInfo::ReadPartialPointer(Pointer::new(alloc_id, offset)),
334            ),
335            InvalidUninitBytes(info) => InterpErrorKind::UndefinedBehavior(
336                UndefinedBehaviorInfo::InvalidUninitBytes(info.map(|b| (alloc_id, b))),
337            ),
338        }
339    }
340}
341
342/// The information that makes up a memory access: offset and size.
343#[derive(Copy, Clone)]
344pub struct AllocRange {
345    pub start: Size,
346    pub size: Size,
347}
348
349impl fmt::Debug for AllocRange {
350    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
351        write!(f, "[{:#x}..{:#x}]", self.start.bytes(), self.end().bytes())
352    }
353}
354
355/// Free-starting constructor for less syntactic overhead.
356#[inline(always)]
357pub fn alloc_range(start: Size, size: Size) -> AllocRange {
358    AllocRange { start, size }
359}
360
361impl From<Range<Size>> for AllocRange {
362    #[inline]
363    fn from(r: Range<Size>) -> Self {
364        alloc_range(r.start, r.end - r.start) // `Size` subtraction (overflow-checked)
365    }
366}
367
368impl From<Range<usize>> for AllocRange {
369    #[inline]
370    fn from(r: Range<usize>) -> Self {
371        AllocRange::from(Size::from_bytes(r.start)..Size::from_bytes(r.end))
372    }
373}
374
375impl AllocRange {
376    #[inline(always)]
377    pub fn end(self) -> Size {
378        self.start + self.size // This does overflow checking.
379    }
380
381    /// Returns the `subrange` within this range; panics if it is not a subrange.
382    #[inline]
383    pub fn subrange(self, subrange: AllocRange) -> AllocRange {
384        let sub_start = self.start + subrange.start;
385        let range = alloc_range(sub_start, subrange.size);
386        assert!(range.end() <= self.end(), "access outside the bounds for given AllocRange");
387        range
388    }
389}
390
391/// Whether a new allocation should be initialized with zero-bytes.
392pub enum AllocInit {
393    Uninit,
394    Zero,
395}
396
397// The constructors are all without extra; the extra gets added by a machine hook later.
398impl<Prov: Provenance, Bytes: AllocBytes> Allocation<Prov, (), Bytes> {
399    /// Creates an allocation initialized by the given bytes
400    pub fn from_bytes<'a>(
401        slice: impl Into<Cow<'a, [u8]>>,
402        align: Align,
403        mutability: Mutability,
404        params: <Bytes as AllocBytes>::AllocParams,
405    ) -> Self {
406        let bytes = Bytes::from_bytes(slice, align, params);
407        let size = Size::from_bytes(bytes.len());
408        Self {
409            bytes,
410            provenance: ProvenanceMap::new(),
411            init_mask: InitMask::new(size, true),
412            align,
413            mutability,
414            extra: (),
415        }
416    }
417
418    pub fn from_bytes_byte_aligned_immutable<'a>(
419        slice: impl Into<Cow<'a, [u8]>>,
420        params: <Bytes as AllocBytes>::AllocParams,
421    ) -> Self {
422        Allocation::from_bytes(slice, Align::ONE, Mutability::Not, params)
423    }
424
425    fn new_inner<R>(
426        size: Size,
427        align: Align,
428        init: AllocInit,
429        params: <Bytes as AllocBytes>::AllocParams,
430        fail: impl FnOnce() -> R,
431    ) -> Result<Self, R> {
432        // We raise an error if we cannot create the allocation on the host.
433        // This results in an error that can happen non-deterministically, since the memory
434        // available to the compiler can change between runs. Normally queries are always
435        // deterministic. However, we can be non-deterministic here because all uses of const
436        // evaluation (including ConstProp!) will make compilation fail (via hard error
437        // or ICE) upon encountering a `MemoryExhausted` error.
438        let bytes = Bytes::zeroed(size, align, params).ok_or_else(fail)?;
439
440        Ok(Allocation {
441            bytes,
442            provenance: ProvenanceMap::new(),
443            init_mask: InitMask::new(
444                size,
445                match init {
446                    AllocInit::Uninit => false,
447                    AllocInit::Zero => true,
448                },
449            ),
450            align,
451            mutability: Mutability::Mut,
452            extra: (),
453        })
454    }
455
456    /// Try to create an Allocation of `size` bytes, failing if there is not enough memory
457    /// available to the compiler to do so.
458    pub fn try_new<'tcx>(
459        size: Size,
460        align: Align,
461        init: AllocInit,
462        params: <Bytes as AllocBytes>::AllocParams,
463    ) -> InterpResult<'tcx, Self> {
464        Self::new_inner(size, align, init, params, || {
465            ty::tls::with(|tcx| tcx.dcx().delayed_bug("exhausted memory during interpretation"));
466            InterpErrorKind::ResourceExhaustion(ResourceExhaustionInfo::MemoryExhausted)
467        })
468        .into()
469    }
470
471    /// Try to create an Allocation of `size` bytes, panics if there is not enough memory
472    /// available to the compiler to do so.
473    ///
474    /// Example use case: To obtain an Allocation filled with specific data,
475    /// first call this function and then call write_scalar to fill in the right data.
476    pub fn new(
477        size: Size,
478        align: Align,
479        init: AllocInit,
480        params: <Bytes as AllocBytes>::AllocParams,
481    ) -> Self {
482        match Self::new_inner(size, align, init, params, || {
483            panic!(
484                "interpreter ran out of memory: cannot create allocation of {} bytes",
485                size.bytes()
486            );
487        }) {
488            Ok(x) => x,
489            Err(x) => x,
490        }
491    }
492
493    /// Add the extra.
494    pub fn with_extra<Extra>(self, extra: Extra) -> Allocation<Prov, Extra, Bytes> {
495        Allocation {
496            bytes: self.bytes,
497            provenance: self.provenance,
498            init_mask: self.init_mask,
499            align: self.align,
500            mutability: self.mutability,
501            extra,
502        }
503    }
504}
505
506impl Allocation {
507    /// Adjust allocation from the ones in `tcx` to a custom Machine instance
508    /// with a different `Provenance` and `Byte` type.
509    pub fn adjust_from_tcx<'tcx, Prov: Provenance, Bytes: AllocBytes>(
510        &self,
511        cx: &impl HasDataLayout,
512        alloc_bytes: impl FnOnce(&[u8], Align) -> InterpResult<'tcx, Bytes>,
513        mut adjust_ptr: impl FnMut(Pointer<CtfeProvenance>) -> InterpResult<'tcx, Pointer<Prov>>,
514    ) -> InterpResult<'tcx, Allocation<Prov, (), Bytes>> {
515        // Copy the data.
516        let mut bytes = alloc_bytes(&*self.bytes, self.align)?;
517        // Adjust provenance of pointers stored in this allocation.
518        let mut new_provenance = Vec::with_capacity(self.provenance.ptrs().len());
519        let ptr_size = cx.data_layout().pointer_size().bytes_usize();
520        let endian = cx.data_layout().endian;
521        for &(offset, alloc_id) in self.provenance.ptrs().iter() {
522            let idx = offset.bytes_usize();
523            let ptr_bytes = &mut bytes[idx..idx + ptr_size];
524            let bits = read_target_uint(endian, ptr_bytes).unwrap();
525            let (ptr_prov, ptr_offset) =
526                adjust_ptr(Pointer::new(alloc_id, Size::from_bytes(bits)))?.into_raw_parts();
527            write_target_uint(endian, ptr_bytes, ptr_offset.bytes().into()).unwrap();
528            new_provenance.push((offset, ptr_prov));
529        }
530        // Create allocation.
531        interp_ok(Allocation {
532            bytes,
533            provenance: ProvenanceMap::from_presorted_ptrs(new_provenance),
534            init_mask: self.init_mask.clone(),
535            align: self.align,
536            mutability: self.mutability,
537            extra: self.extra,
538        })
539    }
540}
541
542/// Raw accessors. Provide access to otherwise private bytes.
543impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
544    pub fn len(&self) -> usize {
545        self.bytes.len()
546    }
547
548    pub fn size(&self) -> Size {
549        Size::from_bytes(self.len())
550    }
551
552    /// Looks at a slice which may contain uninitialized bytes or provenance. This differs
553    /// from `get_bytes_with_uninit_and_ptr` in that it does no provenance checks (even on the
554    /// edges) at all.
555    /// This must not be used for reads affecting the interpreter execution.
556    pub fn inspect_with_uninit_and_ptr_outside_interpreter(&self, range: Range<usize>) -> &[u8] {
557        &self.bytes[range]
558    }
559
560    /// Returns the mask indicating which bytes are initialized.
561    pub fn init_mask(&self) -> &InitMask {
562        &self.init_mask
563    }
564
565    /// Returns the provenance map.
566    pub fn provenance(&self) -> &ProvenanceMap<Prov> {
567        &self.provenance
568    }
569}
570
571/// Byte accessors.
572impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
573    /// This is the entirely abstraction-violating way to just grab the raw bytes without
574    /// caring about provenance or initialization.
575    ///
576    /// This function also guarantees that the resulting pointer will remain stable
577    /// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
578    /// on that.
579    #[inline]
580    pub fn get_bytes_unchecked(&self, range: AllocRange) -> &[u8] {
581        &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
582    }
583
584    /// Checks that these bytes are initialized, and then strip provenance (if possible) and return
585    /// them.
586    ///
587    /// It is the caller's responsibility to check bounds and alignment beforehand.
588    /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods
589    /// on `InterpCx` instead.
590    #[inline]
591    pub fn get_bytes_strip_provenance(
592        &self,
593        cx: &impl HasDataLayout,
594        range: AllocRange,
595    ) -> AllocResult<&[u8]> {
596        self.init_mask.is_range_initialized(range).map_err(|uninit_range| {
597            AllocError::InvalidUninitBytes(Some(BadBytesAccess {
598                access: range,
599                bad: uninit_range,
600            }))
601        })?;
602        if !Prov::OFFSET_IS_ADDR && !self.provenance.range_empty(range, cx) {
603            // Find the provenance.
604            let (offset, _prov) = self
605                .provenance
606                .range_ptrs_get(range, cx)
607                .first()
608                .copied()
609                .expect("there must be provenance somewhere here");
610            let start = offset.max(range.start); // the pointer might begin before `range`!
611            let end = (offset + cx.pointer_size()).min(range.end()); // the pointer might end after `range`!
612            return Err(AllocError::ReadPointerAsInt(Some(BadBytesAccess {
613                access: range,
614                bad: AllocRange::from(start..end),
615            })));
616        }
617        Ok(self.get_bytes_unchecked(range))
618    }
619
620    /// This is the entirely abstraction-violating way to just get mutable access to the raw bytes.
621    /// Just calling this already marks everything as defined and removes provenance, so be sure to
622    /// actually overwrite all the data there!
623    ///
624    /// It is the caller's responsibility to check bounds and alignment beforehand.
625    /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods
626    /// on `InterpCx` instead.
627    pub fn get_bytes_unchecked_for_overwrite(
628        &mut self,
629        cx: &impl HasDataLayout,
630        range: AllocRange,
631    ) -> &mut [u8] {
632        self.mark_init(range, true);
633        self.provenance.clear(range, cx);
634
635        &mut self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
636    }
637
638    /// A raw pointer variant of `get_bytes_unchecked_for_overwrite` that avoids invalidating existing immutable aliases
639    /// into this memory.
640    pub fn get_bytes_unchecked_for_overwrite_ptr(
641        &mut self,
642        cx: &impl HasDataLayout,
643        range: AllocRange,
644    ) -> *mut [u8] {
645        self.mark_init(range, true);
646        self.provenance.clear(range, cx);
647
648        assert!(range.end().bytes_usize() <= self.bytes.len()); // need to do our own bounds-check
649        // Crucially, we go via `AllocBytes::as_mut_ptr`, not `AllocBytes::deref_mut`.
650        let begin_ptr = self.bytes.as_mut_ptr().wrapping_add(range.start.bytes_usize());
651        let len = range.end().bytes_usize() - range.start.bytes_usize();
652        ptr::slice_from_raw_parts_mut(begin_ptr, len)
653    }
654
655    /// This gives direct mutable access to the entire buffer, just exposing their internal state
656    /// without resetting anything. Directly exposes `AllocBytes::as_mut_ptr`. Only works if
657    /// `OFFSET_IS_ADDR` is true.
658    pub fn get_bytes_unchecked_raw_mut(&mut self) -> *mut u8 {
659        assert!(Prov::OFFSET_IS_ADDR);
660        self.bytes.as_mut_ptr()
661    }
662
663    /// This gives direct immutable access to the entire buffer, just exposing their internal state
664    /// without resetting anything. Directly exposes `AllocBytes::as_ptr`. Only works if
665    /// `OFFSET_IS_ADDR` is true.
666    pub fn get_bytes_unchecked_raw(&self) -> *const u8 {
667        assert!(Prov::OFFSET_IS_ADDR);
668        self.bytes.as_ptr()
669    }
670}
671
672/// Reading and writing.
673impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
674    /// Sets the init bit for the given range.
675    fn mark_init(&mut self, range: AllocRange, is_init: bool) {
676        if range.size.bytes() == 0 {
677            return;
678        }
679        assert!(self.mutability == Mutability::Mut);
680        self.init_mask.set_range(range, is_init);
681    }
682
683    /// Reads a *non-ZST* scalar.
684    ///
685    /// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine
686    /// supports that) provenance is entirely ignored.
687    ///
688    /// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
689    /// for ZSTness anyway due to integer pointers being valid for ZSTs.
690    ///
691    /// It is the caller's responsibility to check bounds and alignment beforehand.
692    /// Most likely, you want to call `InterpCx::read_scalar` instead of this method.
693    pub fn read_scalar(
694        &self,
695        cx: &impl HasDataLayout,
696        range: AllocRange,
697        read_provenance: bool,
698    ) -> AllocResult<Scalar<Prov>> {
699        // First and foremost, if anything is uninit, bail.
700        if let Err(bad) = self.init_mask.is_range_initialized(range) {
701            return Err(AllocError::InvalidUninitBytes(Some(BadBytesAccess {
702                access: range,
703                bad,
704            })));
705        }
706
707        // Get the integer part of the result. We HAVE TO check provenance before returning this!
708        let bytes = self.get_bytes_unchecked(range);
709        let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
710
711        if read_provenance {
712            assert_eq!(range.size, cx.data_layout().pointer_size());
713
714            // When reading data with provenance, the easy case is finding provenance exactly where we
715            // are reading, then we can put data and provenance back together and return that.
716            if let Some(prov) = self.provenance.get_ptr(range.start) {
717                // Now we can return the bits, with their appropriate provenance.
718                let ptr = Pointer::new(prov, Size::from_bytes(bits));
719                return Ok(Scalar::from_pointer(ptr, cx));
720            }
721            // The other easy case is total absence of provenance.
722            if self.provenance.range_empty(range, cx) {
723                return Ok(Scalar::from_uint(bits, range.size));
724            }
725            // If we get here, we have to check per-byte provenance, and join them together.
726            let prov = 'prov: {
727                // Initialize with first fragment. Must have index 0.
728                let Some((mut joint_prov, 0)) = self.provenance.get_byte(range.start, cx) else {
729                    break 'prov None;
730                };
731                // Update with the remaining fragments.
732                for offset in Size::from_bytes(1)..range.size {
733                    // Ensure there is provenance here and it has the right index.
734                    let Some((frag_prov, frag_idx)) =
735                        self.provenance.get_byte(range.start + offset, cx)
736                    else {
737                        break 'prov None;
738                    };
739                    // Wildcard provenance is allowed to come with any index (this is needed
740                    // for Miri's native-lib mode to work).
741                    if u64::from(frag_idx) != offset.bytes() && Some(frag_prov) != Prov::WILDCARD {
742                        break 'prov None;
743                    }
744                    // Merge this byte's provenance with the previous ones.
745                    joint_prov = match Prov::join(joint_prov, frag_prov) {
746                        Some(prov) => prov,
747                        None => break 'prov None,
748                    };
749                }
750                break 'prov Some(joint_prov);
751            };
752            if prov.is_none() && !Prov::OFFSET_IS_ADDR {
753                // There are some bytes with provenance here but overall the provenance does not add up.
754                // We need `OFFSET_IS_ADDR` to fall back to no-provenance here; without that option, we must error.
755                return Err(AllocError::ReadPartialPointer(range.start));
756            }
757            // We can use this provenance.
758            let ptr = Pointer::new(prov, Size::from_bytes(bits));
759            return Ok(Scalar::from_maybe_pointer(ptr, cx));
760        } else {
761            // We are *not* reading a pointer.
762            // If we can just ignore provenance or there is none, that's easy.
763            if Prov::OFFSET_IS_ADDR || self.provenance.range_empty(range, cx) {
764                // We just strip provenance.
765                return Ok(Scalar::from_uint(bits, range.size));
766            }
767            // There is some provenance and we don't have OFFSET_IS_ADDR. This doesn't work.
768            return Err(AllocError::ReadPointerAsInt(None));
769        }
770    }
771
772    /// Writes a *non-ZST* scalar.
773    ///
774    /// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
775    /// for ZSTness anyway due to integer pointers being valid for ZSTs.
776    ///
777    /// It is the caller's responsibility to check bounds and alignment beforehand.
778    /// Most likely, you want to call `InterpCx::write_scalar` instead of this method.
779    pub fn write_scalar(
780        &mut self,
781        cx: &impl HasDataLayout,
782        range: AllocRange,
783        val: Scalar<Prov>,
784    ) -> AllocResult {
785        assert!(self.mutability == Mutability::Mut);
786
787        // `to_bits_or_ptr_internal` is the right method because we just want to store this data
788        // as-is into memory. This also double-checks that `val.size()` matches `range.size`.
789        let (bytes, provenance) = match val.to_bits_or_ptr_internal(range.size)? {
790            Right(ptr) => {
791                let (provenance, offset) = ptr.into_raw_parts();
792                (u128::from(offset.bytes()), Some(provenance))
793            }
794            Left(data) => (data, None),
795        };
796
797        let endian = cx.data_layout().endian;
798        // Yes we do overwrite all the bytes in `dst`.
799        let dst = self.get_bytes_unchecked_for_overwrite(cx, range);
800        write_target_uint(endian, dst, bytes).unwrap();
801
802        // See if we have to also store some provenance.
803        if let Some(provenance) = provenance {
804            assert_eq!(range.size, cx.data_layout().pointer_size());
805            self.provenance.insert_ptr(range.start, provenance, cx);
806        }
807
808        Ok(())
809    }
810
811    /// Write "uninit" to the given memory range.
812    pub fn write_uninit(&mut self, cx: &impl HasDataLayout, range: AllocRange) {
813        self.mark_init(range, false);
814        self.provenance.clear(range, cx);
815    }
816
817    /// Mark all bytes in the given range as initialised and reset the provenance
818    /// to wildcards. This entirely breaks the normal mechanisms for tracking
819    /// initialisation and is only provided for Miri operating in native-lib
820    /// mode. UB will be missed if the underlying bytes were not actually written to.
821    ///
822    /// If `range` is `None`, defaults to performing this on the whole allocation.
823    pub fn process_native_write(&mut self, cx: &impl HasDataLayout, range: Option<AllocRange>) {
824        let range = range.unwrap_or_else(|| AllocRange {
825            start: Size::ZERO,
826            size: Size::from_bytes(self.len()),
827        });
828        self.mark_init(range, true);
829        self.provenance.write_wildcards(cx, range);
830    }
831
832    /// Remove all provenance in the given memory range.
833    pub fn clear_provenance(&mut self, cx: &impl HasDataLayout, range: AllocRange) {
834        self.provenance.clear(range, cx);
835    }
836
837    pub fn provenance_merge_bytes(&mut self, cx: &impl HasDataLayout) -> bool {
838        self.provenance.merge_bytes(cx)
839    }
840
841    /// Applies a previously prepared provenance copy.
842    /// The affected range, as defined in the parameters to `provenance().prepare_copy` is expected
843    /// to be clear of provenance.
844    ///
845    /// This is dangerous to use as it can violate internal `Allocation` invariants!
846    /// It only exists to support an efficient implementation of `mem_copy_repeatedly`.
847    pub fn provenance_apply_copy(&mut self, copy: ProvenanceCopy<Prov>) {
848        self.provenance.apply_copy(copy)
849    }
850
851    /// Applies a previously prepared copy of the init mask.
852    ///
853    /// This is dangerous to use as it can violate internal `Allocation` invariants!
854    /// It only exists to support an efficient implementation of `mem_copy_repeatedly`.
855    pub fn init_mask_apply_copy(&mut self, copy: InitCopy, range: AllocRange, repeat: u64) {
856        self.init_mask.apply_copy(copy, range, repeat)
857    }
858}