rustc_middle/mir/interpret/
allocation.rs

1//! The virtual memory representation of the MIR interpreter.
2
3mod init_mask;
4mod provenance_map;
5
6use std::borrow::Cow;
7use std::hash::Hash;
8use std::ops::{Deref, DerefMut, Range};
9use std::{fmt, hash, ptr};
10
11use either::{Left, Right};
12use init_mask::*;
13pub use init_mask::{InitChunk, InitChunkIter};
14use provenance_map::*;
15use rustc_abi::{Align, HasDataLayout, Size};
16use rustc_ast::Mutability;
17use rustc_data_structures::intern::Interned;
18use rustc_macros::HashStable;
19use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
20
21use super::{
22    AllocId, BadBytesAccess, CtfeProvenance, InterpErrorKind, InterpResult, Pointer,
23    PointerArithmetic, Provenance, ResourceExhaustionInfo, Scalar, ScalarSizeMismatch,
24    UndefinedBehaviorInfo, UnsupportedOpInfo, interp_ok, read_target_uint, write_target_uint,
25};
26use crate::ty;
27
28/// Functionality required for the bytes of an `Allocation`.
29pub trait AllocBytes: Clone + fmt::Debug + Deref<Target = [u8]> + DerefMut<Target = [u8]> {
30    /// The type of extra parameters passed in when creating an allocation.
31    /// Can be used by `interpret::Machine` instances to make runtime-configuration-dependent
32    /// decisions about the allocation strategy.
33    type AllocParams;
34
35    /// Create an `AllocBytes` from a slice of `u8`.
36    fn from_bytes<'a>(
37        slice: impl Into<Cow<'a, [u8]>>,
38        _align: Align,
39        _params: Self::AllocParams,
40    ) -> Self;
41
42    /// Create a zeroed `AllocBytes` of the specified size and alignment.
43    /// Returns `None` if we ran out of memory on the host.
44    fn zeroed(size: Size, _align: Align, _params: Self::AllocParams) -> Option<Self>;
45
46    /// Gives direct access to the raw underlying storage.
47    ///
48    /// Crucially this pointer is compatible with:
49    /// - other pointers returned by this method, and
50    /// - references returned from `deref()`, as long as there was no write.
51    fn as_mut_ptr(&mut self) -> *mut u8;
52
53    /// Gives direct access to the raw underlying storage.
54    ///
55    /// Crucially this pointer is compatible with:
56    /// - other pointers returned by this method, and
57    /// - references returned from `deref()`, as long as there was no write.
58    fn as_ptr(&self) -> *const u8;
59}
60
61/// Default `bytes` for `Allocation` is a `Box<u8>`.
62impl AllocBytes for Box<[u8]> {
63    type AllocParams = ();
64
65    fn from_bytes<'a>(slice: impl Into<Cow<'a, [u8]>>, _align: Align, _params: ()) -> Self {
66        Box::<[u8]>::from(slice.into())
67    }
68
69    fn zeroed(size: Size, _align: Align, _params: ()) -> Option<Self> {
70        let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes().try_into().ok()?).ok()?;
71        // SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]>
72        let bytes = unsafe { bytes.assume_init() };
73        Some(bytes)
74    }
75
76    fn as_mut_ptr(&mut self) -> *mut u8 {
77        Box::as_mut_ptr(self).cast()
78    }
79
80    fn as_ptr(&self) -> *const u8 {
81        Box::as_ptr(self).cast()
82    }
83}
84
85/// This type represents an Allocation in the Miri/CTFE core engine.
86///
87/// Its public API is rather low-level, working directly with allocation offsets and a custom error
88/// type to account for the lack of an AllocId on this level. The Miri/CTFE core engine `memory`
89/// module provides higher-level access.
90// Note: for performance reasons when interning, some of the `Allocation` fields can be partially
91// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
92#[derive(Clone, Eq, PartialEq)]
93#[derive(HashStable)]
94pub struct Allocation<Prov: Provenance = CtfeProvenance, Extra = (), Bytes = Box<[u8]>> {
95    /// The actual bytes of the allocation.
96    /// Note that the bytes of a pointer represent the offset of the pointer.
97    bytes: Bytes,
98    /// Maps from byte addresses to extra provenance data for each pointer.
99    /// Only the first byte of a pointer is inserted into the map; i.e.,
100    /// every entry in this map applies to `pointer_size` consecutive bytes starting
101    /// at the given offset.
102    provenance: ProvenanceMap<Prov>,
103    /// Denotes which part of this allocation is initialized.
104    init_mask: InitMask,
105    /// The alignment of the allocation to detect unaligned reads.
106    /// (`Align` guarantees that this is a power of two.)
107    pub align: Align,
108    /// `true` if the allocation is mutable.
109    /// Also used by codegen to determine if a static should be put into mutable memory,
110    /// which happens for `static mut` and `static` with interior mutability.
111    pub mutability: Mutability,
112    /// Extra state for the machine.
113    pub extra: Extra,
114}
115
116/// Helper struct that packs an alignment, mutability, and "all bytes are zero" flag together.
117///
118/// Alignment values always have 2 free high bits, and we check for this in our [`Encodable`] impl.
119struct AllocFlags {
120    align: Align,
121    mutability: Mutability,
122    all_zero: bool,
123}
124
125impl<E: Encoder> Encodable<E> for AllocFlags {
126    fn encode(&self, encoder: &mut E) {
127        // Make sure Align::MAX can be stored with the high 2 bits unset.
128        const {
129            let max_supported_align_repr = u8::MAX >> 2;
130            let max_supported_align = 1 << max_supported_align_repr;
131            assert!(Align::MAX.bytes() <= max_supported_align)
132        }
133
134        let mut flags = self.align.bytes().trailing_zeros() as u8;
135        flags |= match self.mutability {
136            Mutability::Not => 0,
137            Mutability::Mut => 1 << 6,
138        };
139        flags |= (self.all_zero as u8) << 7;
140        flags.encode(encoder);
141    }
142}
143
144impl<D: Decoder> Decodable<D> for AllocFlags {
145    fn decode(decoder: &mut D) -> Self {
146        let flags: u8 = Decodable::decode(decoder);
147        let align = flags & 0b0011_1111;
148        let mutability = flags & 0b0100_0000;
149        let all_zero = flags & 0b1000_0000;
150
151        let align = Align::from_bytes(1 << align).unwrap();
152        let mutability = match mutability {
153            0 => Mutability::Not,
154            _ => Mutability::Mut,
155        };
156        let all_zero = all_zero > 0;
157
158        AllocFlags { align, mutability, all_zero }
159    }
160}
161
162/// Efficiently detect whether a slice of `u8` is all zero.
163///
164/// This is used in encoding of [`Allocation`] to special-case all-zero allocations. It is only
165/// optimized a little, because for many allocations the encoding of the actual bytes does not
166/// dominate runtime.
167#[inline]
168fn all_zero(buf: &[u8]) -> bool {
169    // In the empty case we wouldn't encode any contents even without this system where we
170    // special-case allocations whose contents are all 0. We can return anything in the empty case.
171    if buf.is_empty() {
172        return true;
173    }
174    // Just fast-rejecting based on the first element significantly reduces the amount that we end
175    // up walking the whole array.
176    if buf[0] != 0 {
177        return false;
178    }
179
180    // This strategy of combining all slice elements with & or | is unbeatable for the large
181    // all-zero case because it is so well-understood by autovectorization.
182    buf.iter().fold(true, |acc, b| acc & (*b == 0))
183}
184
185/// Custom encoder for [`Allocation`] to more efficiently represent the case where all bytes are 0.
186impl<Prov: Provenance, Extra, E: Encoder> Encodable<E> for Allocation<Prov, Extra, Box<[u8]>>
187where
188    ProvenanceMap<Prov>: Encodable<E>,
189    Extra: Encodable<E>,
190{
191    fn encode(&self, encoder: &mut E) {
192        let all_zero = all_zero(&self.bytes);
193        AllocFlags { align: self.align, mutability: self.mutability, all_zero }.encode(encoder);
194
195        encoder.emit_usize(self.bytes.len());
196        if !all_zero {
197            encoder.emit_raw_bytes(&self.bytes);
198        }
199        self.provenance.encode(encoder);
200        self.init_mask.encode(encoder);
201        self.extra.encode(encoder);
202    }
203}
204
205impl<Prov: Provenance, Extra, D: Decoder> Decodable<D> for Allocation<Prov, Extra, Box<[u8]>>
206where
207    ProvenanceMap<Prov>: Decodable<D>,
208    Extra: Decodable<D>,
209{
210    fn decode(decoder: &mut D) -> Self {
211        let AllocFlags { align, mutability, all_zero } = Decodable::decode(decoder);
212
213        let len = decoder.read_usize();
214        let bytes = if all_zero { vec![0u8; len] } else { decoder.read_raw_bytes(len).to_vec() };
215        let bytes = <Box<[u8]> as AllocBytes>::from_bytes(bytes, align, ());
216
217        let provenance = Decodable::decode(decoder);
218        let init_mask = Decodable::decode(decoder);
219        let extra = Decodable::decode(decoder);
220
221        Self { bytes, provenance, init_mask, align, mutability, extra }
222    }
223}
224
225/// This is the maximum size we will hash at a time, when interning an `Allocation` and its
226/// `InitMask`. Note, we hash that amount of bytes twice: at the start, and at the end of a buffer.
227/// Used when these two structures are large: we only partially hash the larger fields in that
228/// situation. See the comment at the top of their respective `Hash` impl for more details.
229const MAX_BYTES_TO_HASH: usize = 64;
230
231/// This is the maximum size (in bytes) for which a buffer will be fully hashed, when interning.
232/// Otherwise, it will be partially hashed in 2 slices, requiring at least 2 `MAX_BYTES_TO_HASH`
233/// bytes.
234const MAX_HASHED_BUFFER_LEN: usize = 2 * MAX_BYTES_TO_HASH;
235
236// Const allocations are only hashed for interning. However, they can be large, making the hashing
237// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially
238// big buffers like the actual bytes of allocation. We can partially hash some fields when they're
239// large.
240impl hash::Hash for Allocation {
241    fn hash<H: hash::Hasher>(&self, state: &mut H) {
242        let Self {
243            bytes,
244            provenance,
245            init_mask,
246            align,
247            mutability,
248            extra: (), // don't bother hashing ()
249        } = self;
250
251        // Partially hash the `bytes` buffer when it is large. To limit collisions with common
252        // prefixes and suffixes, we hash the length and some slices of the buffer.
253        let byte_count = bytes.len();
254        if byte_count > MAX_HASHED_BUFFER_LEN {
255            // Hash the buffer's length.
256            byte_count.hash(state);
257
258            // And its head and tail.
259            bytes[..MAX_BYTES_TO_HASH].hash(state);
260            bytes[byte_count - MAX_BYTES_TO_HASH..].hash(state);
261        } else {
262            bytes.hash(state);
263        }
264
265        // Hash the other fields as usual.
266        provenance.hash(state);
267        init_mask.hash(state);
268        align.hash(state);
269        mutability.hash(state);
270    }
271}
272
273/// Interned types generally have an `Outer` type and an `Inner` type, where
274/// `Outer` is a newtype around `Interned<Inner>`, and all the operations are
275/// done on `Outer`, because all occurrences are interned. E.g. `Ty` is an
276/// outer type and `TyKind` is its inner type.
277///
278/// Here things are different because only const allocations are interned. This
279/// means that both the inner type (`Allocation`) and the outer type
280/// (`ConstAllocation`) are used quite a bit.
281#[derive(Copy, Clone, PartialEq, Eq, Hash, HashStable)]
282#[rustc_pass_by_value]
283pub struct ConstAllocation<'tcx>(pub Interned<'tcx, Allocation>);
284
285impl<'tcx> fmt::Debug for ConstAllocation<'tcx> {
286    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
287        // The debug representation of this is very verbose and basically useless,
288        // so don't print it.
289        write!(f, "ConstAllocation {{ .. }}")
290    }
291}
292
293impl<'tcx> ConstAllocation<'tcx> {
294    pub fn inner(self) -> &'tcx Allocation {
295        self.0.0
296    }
297}
298
299/// We have our own error type that does not know about the `AllocId`; that information
300/// is added when converting to `InterpError`.
301#[derive(Debug)]
302pub enum AllocError {
303    /// A scalar had the wrong size.
304    ScalarSizeMismatch(ScalarSizeMismatch),
305    /// Encountered a pointer where we needed raw bytes.
306    ReadPointerAsInt(Option<BadBytesAccess>),
307    /// Partially overwriting a pointer.
308    OverwritePartialPointer(Size),
309    /// Partially copying a pointer.
310    ReadPartialPointer(Size),
311    /// Using uninitialized data where it is not allowed.
312    InvalidUninitBytes(Option<BadBytesAccess>),
313}
314pub type AllocResult<T = ()> = Result<T, AllocError>;
315
316impl From<ScalarSizeMismatch> for AllocError {
317    fn from(s: ScalarSizeMismatch) -> Self {
318        AllocError::ScalarSizeMismatch(s)
319    }
320}
321
322impl AllocError {
323    pub fn to_interp_error<'tcx>(self, alloc_id: AllocId) -> InterpErrorKind<'tcx> {
324        use AllocError::*;
325        match self {
326            ScalarSizeMismatch(s) => {
327                InterpErrorKind::UndefinedBehavior(UndefinedBehaviorInfo::ScalarSizeMismatch(s))
328            }
329            ReadPointerAsInt(info) => InterpErrorKind::Unsupported(
330                UnsupportedOpInfo::ReadPointerAsInt(info.map(|b| (alloc_id, b))),
331            ),
332            OverwritePartialPointer(offset) => InterpErrorKind::Unsupported(
333                UnsupportedOpInfo::OverwritePartialPointer(Pointer::new(alloc_id, offset)),
334            ),
335            ReadPartialPointer(offset) => InterpErrorKind::Unsupported(
336                UnsupportedOpInfo::ReadPartialPointer(Pointer::new(alloc_id, offset)),
337            ),
338            InvalidUninitBytes(info) => InterpErrorKind::UndefinedBehavior(
339                UndefinedBehaviorInfo::InvalidUninitBytes(info.map(|b| (alloc_id, b))),
340            ),
341        }
342    }
343}
344
345/// The information that makes up a memory access: offset and size.
346#[derive(Copy, Clone)]
347pub struct AllocRange {
348    pub start: Size,
349    pub size: Size,
350}
351
352impl fmt::Debug for AllocRange {
353    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
354        write!(f, "[{:#x}..{:#x}]", self.start.bytes(), self.end().bytes())
355    }
356}
357
358/// Free-starting constructor for less syntactic overhead.
359#[inline(always)]
360pub fn alloc_range(start: Size, size: Size) -> AllocRange {
361    AllocRange { start, size }
362}
363
364impl From<Range<Size>> for AllocRange {
365    #[inline]
366    fn from(r: Range<Size>) -> Self {
367        alloc_range(r.start, r.end - r.start) // `Size` subtraction (overflow-checked)
368    }
369}
370
371impl From<Range<usize>> for AllocRange {
372    #[inline]
373    fn from(r: Range<usize>) -> Self {
374        AllocRange::from(Size::from_bytes(r.start)..Size::from_bytes(r.end))
375    }
376}
377
378impl AllocRange {
379    #[inline(always)]
380    pub fn end(self) -> Size {
381        self.start + self.size // This does overflow checking.
382    }
383
384    /// Returns the `subrange` within this range; panics if it is not a subrange.
385    #[inline]
386    pub fn subrange(self, subrange: AllocRange) -> AllocRange {
387        let sub_start = self.start + subrange.start;
388        let range = alloc_range(sub_start, subrange.size);
389        assert!(range.end() <= self.end(), "access outside the bounds for given AllocRange");
390        range
391    }
392}
393
394/// Whether a new allocation should be initialized with zero-bytes.
395pub enum AllocInit {
396    Uninit,
397    Zero,
398}
399
400// The constructors are all without extra; the extra gets added by a machine hook later.
401impl<Prov: Provenance, Bytes: AllocBytes> Allocation<Prov, (), Bytes> {
402    /// Creates an allocation initialized by the given bytes
403    pub fn from_bytes<'a>(
404        slice: impl Into<Cow<'a, [u8]>>,
405        align: Align,
406        mutability: Mutability,
407        params: <Bytes as AllocBytes>::AllocParams,
408    ) -> Self {
409        let bytes = Bytes::from_bytes(slice, align, params);
410        let size = Size::from_bytes(bytes.len());
411        Self {
412            bytes,
413            provenance: ProvenanceMap::new(),
414            init_mask: InitMask::new(size, true),
415            align,
416            mutability,
417            extra: (),
418        }
419    }
420
421    pub fn from_bytes_byte_aligned_immutable<'a>(
422        slice: impl Into<Cow<'a, [u8]>>,
423        params: <Bytes as AllocBytes>::AllocParams,
424    ) -> Self {
425        Allocation::from_bytes(slice, Align::ONE, Mutability::Not, params)
426    }
427
428    fn new_inner<R>(
429        size: Size,
430        align: Align,
431        init: AllocInit,
432        params: <Bytes as AllocBytes>::AllocParams,
433        fail: impl FnOnce() -> R,
434    ) -> Result<Self, R> {
435        // We raise an error if we cannot create the allocation on the host.
436        // This results in an error that can happen non-deterministically, since the memory
437        // available to the compiler can change between runs. Normally queries are always
438        // deterministic. However, we can be non-deterministic here because all uses of const
439        // evaluation (including ConstProp!) will make compilation fail (via hard error
440        // or ICE) upon encountering a `MemoryExhausted` error.
441        let bytes = Bytes::zeroed(size, align, params).ok_or_else(fail)?;
442
443        Ok(Allocation {
444            bytes,
445            provenance: ProvenanceMap::new(),
446            init_mask: InitMask::new(
447                size,
448                match init {
449                    AllocInit::Uninit => false,
450                    AllocInit::Zero => true,
451                },
452            ),
453            align,
454            mutability: Mutability::Mut,
455            extra: (),
456        })
457    }
458
459    /// Try to create an Allocation of `size` bytes, failing if there is not enough memory
460    /// available to the compiler to do so.
461    pub fn try_new<'tcx>(
462        size: Size,
463        align: Align,
464        init: AllocInit,
465        params: <Bytes as AllocBytes>::AllocParams,
466    ) -> InterpResult<'tcx, Self> {
467        Self::new_inner(size, align, init, params, || {
468            ty::tls::with(|tcx| tcx.dcx().delayed_bug("exhausted memory during interpretation"));
469            InterpErrorKind::ResourceExhaustion(ResourceExhaustionInfo::MemoryExhausted)
470        })
471        .into()
472    }
473
474    /// Try to create an Allocation of `size` bytes, panics if there is not enough memory
475    /// available to the compiler to do so.
476    ///
477    /// Example use case: To obtain an Allocation filled with specific data,
478    /// first call this function and then call write_scalar to fill in the right data.
479    pub fn new(
480        size: Size,
481        align: Align,
482        init: AllocInit,
483        params: <Bytes as AllocBytes>::AllocParams,
484    ) -> Self {
485        match Self::new_inner(size, align, init, params, || {
486            panic!(
487                "interpreter ran out of memory: cannot create allocation of {} bytes",
488                size.bytes()
489            );
490        }) {
491            Ok(x) => x,
492            Err(x) => x,
493        }
494    }
495
496    /// Add the extra.
497    pub fn with_extra<Extra>(self, extra: Extra) -> Allocation<Prov, Extra, Bytes> {
498        Allocation {
499            bytes: self.bytes,
500            provenance: self.provenance,
501            init_mask: self.init_mask,
502            align: self.align,
503            mutability: self.mutability,
504            extra,
505        }
506    }
507}
508
509impl Allocation {
510    /// Adjust allocation from the ones in `tcx` to a custom Machine instance
511    /// with a different `Provenance` and `Byte` type.
512    pub fn adjust_from_tcx<'tcx, Prov: Provenance, Bytes: AllocBytes>(
513        &self,
514        cx: &impl HasDataLayout,
515        alloc_bytes: impl FnOnce(&[u8], Align) -> InterpResult<'tcx, Bytes>,
516        mut adjust_ptr: impl FnMut(Pointer<CtfeProvenance>) -> InterpResult<'tcx, Pointer<Prov>>,
517    ) -> InterpResult<'tcx, Allocation<Prov, (), Bytes>> {
518        // Copy the data.
519        let mut bytes = alloc_bytes(&*self.bytes, self.align)?;
520        // Adjust provenance of pointers stored in this allocation.
521        let mut new_provenance = Vec::with_capacity(self.provenance.ptrs().len());
522        let ptr_size = cx.data_layout().pointer_size.bytes_usize();
523        let endian = cx.data_layout().endian;
524        for &(offset, alloc_id) in self.provenance.ptrs().iter() {
525            let idx = offset.bytes_usize();
526            let ptr_bytes = &mut bytes[idx..idx + ptr_size];
527            let bits = read_target_uint(endian, ptr_bytes).unwrap();
528            let (ptr_prov, ptr_offset) =
529                adjust_ptr(Pointer::new(alloc_id, Size::from_bytes(bits)))?.into_parts();
530            write_target_uint(endian, ptr_bytes, ptr_offset.bytes().into()).unwrap();
531            new_provenance.push((offset, ptr_prov));
532        }
533        // Create allocation.
534        interp_ok(Allocation {
535            bytes,
536            provenance: ProvenanceMap::from_presorted_ptrs(new_provenance),
537            init_mask: self.init_mask.clone(),
538            align: self.align,
539            mutability: self.mutability,
540            extra: self.extra,
541        })
542    }
543}
544
545/// Raw accessors. Provide access to otherwise private bytes.
546impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
547    pub fn len(&self) -> usize {
548        self.bytes.len()
549    }
550
551    pub fn size(&self) -> Size {
552        Size::from_bytes(self.len())
553    }
554
555    /// Looks at a slice which may contain uninitialized bytes or provenance. This differs
556    /// from `get_bytes_with_uninit_and_ptr` in that it does no provenance checks (even on the
557    /// edges) at all.
558    /// This must not be used for reads affecting the interpreter execution.
559    pub fn inspect_with_uninit_and_ptr_outside_interpreter(&self, range: Range<usize>) -> &[u8] {
560        &self.bytes[range]
561    }
562
563    /// Returns the mask indicating which bytes are initialized.
564    pub fn init_mask(&self) -> &InitMask {
565        &self.init_mask
566    }
567
568    /// Returns the provenance map.
569    pub fn provenance(&self) -> &ProvenanceMap<Prov> {
570        &self.provenance
571    }
572}
573
574/// Byte accessors.
575impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
576    /// This is the entirely abstraction-violating way to just grab the raw bytes without
577    /// caring about provenance or initialization.
578    ///
579    /// This function also guarantees that the resulting pointer will remain stable
580    /// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
581    /// on that.
582    #[inline]
583    pub fn get_bytes_unchecked(&self, range: AllocRange) -> &[u8] {
584        &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
585    }
586
587    /// Checks that these bytes are initialized, and then strip provenance (if possible) and return
588    /// them.
589    ///
590    /// It is the caller's responsibility to check bounds and alignment beforehand.
591    /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods
592    /// on `InterpCx` instead.
593    #[inline]
594    pub fn get_bytes_strip_provenance(
595        &self,
596        cx: &impl HasDataLayout,
597        range: AllocRange,
598    ) -> AllocResult<&[u8]> {
599        self.init_mask.is_range_initialized(range).map_err(|uninit_range| {
600            AllocError::InvalidUninitBytes(Some(BadBytesAccess {
601                access: range,
602                bad: uninit_range,
603            }))
604        })?;
605        if !Prov::OFFSET_IS_ADDR && !self.provenance.range_empty(range, cx) {
606            // Find the provenance.
607            let (offset, _prov) = self
608                .provenance
609                .range_ptrs_get(range, cx)
610                .first()
611                .copied()
612                .expect("there must be provenance somewhere here");
613            let start = offset.max(range.start); // the pointer might begin before `range`!
614            let end = (offset + cx.pointer_size()).min(range.end()); // the pointer might end after `range`!
615            return Err(AllocError::ReadPointerAsInt(Some(BadBytesAccess {
616                access: range,
617                bad: AllocRange::from(start..end),
618            })));
619        }
620        Ok(self.get_bytes_unchecked(range))
621    }
622
623    /// This is the entirely abstraction-violating way to just get mutable access to the raw bytes.
624    /// Just calling this already marks everything as defined and removes provenance, so be sure to
625    /// actually overwrite all the data there!
626    ///
627    /// It is the caller's responsibility to check bounds and alignment beforehand.
628    /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods
629    /// on `InterpCx` instead.
630    pub fn get_bytes_unchecked_for_overwrite(
631        &mut self,
632        cx: &impl HasDataLayout,
633        range: AllocRange,
634    ) -> AllocResult<&mut [u8]> {
635        self.mark_init(range, true);
636        self.provenance.clear(range, cx)?;
637
638        Ok(&mut self.bytes[range.start.bytes_usize()..range.end().bytes_usize()])
639    }
640
641    /// A raw pointer variant of `get_bytes_unchecked_for_overwrite` that avoids invalidating existing immutable aliases
642    /// into this memory.
643    pub fn get_bytes_unchecked_for_overwrite_ptr(
644        &mut self,
645        cx: &impl HasDataLayout,
646        range: AllocRange,
647    ) -> AllocResult<*mut [u8]> {
648        self.mark_init(range, true);
649        self.provenance.clear(range, cx)?;
650
651        assert!(range.end().bytes_usize() <= self.bytes.len()); // need to do our own bounds-check
652        // Crucially, we go via `AllocBytes::as_mut_ptr`, not `AllocBytes::deref_mut`.
653        let begin_ptr = self.bytes.as_mut_ptr().wrapping_add(range.start.bytes_usize());
654        let len = range.end().bytes_usize() - range.start.bytes_usize();
655        Ok(ptr::slice_from_raw_parts_mut(begin_ptr, len))
656    }
657
658    /// This gives direct mutable access to the entire buffer, just exposing their internal state
659    /// without resetting anything. Directly exposes `AllocBytes::as_mut_ptr`. Only works if
660    /// `OFFSET_IS_ADDR` is true.
661    pub fn get_bytes_unchecked_raw_mut(&mut self) -> *mut u8 {
662        assert!(Prov::OFFSET_IS_ADDR);
663        self.bytes.as_mut_ptr()
664    }
665
666    /// This gives direct immutable access to the entire buffer, just exposing their internal state
667    /// without resetting anything. Directly exposes `AllocBytes::as_ptr`. Only works if
668    /// `OFFSET_IS_ADDR` is true.
669    pub fn get_bytes_unchecked_raw(&self) -> *const u8 {
670        assert!(Prov::OFFSET_IS_ADDR);
671        self.bytes.as_ptr()
672    }
673}
674
675/// Reading and writing.
676impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
677    /// Sets the init bit for the given range.
678    fn mark_init(&mut self, range: AllocRange, is_init: bool) {
679        if range.size.bytes() == 0 {
680            return;
681        }
682        assert!(self.mutability == Mutability::Mut);
683        self.init_mask.set_range(range, is_init);
684    }
685
686    /// Reads a *non-ZST* scalar.
687    ///
688    /// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine
689    /// supports that) provenance is entirely ignored.
690    ///
691    /// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
692    /// for ZSTness anyway due to integer pointers being valid for ZSTs.
693    ///
694    /// It is the caller's responsibility to check bounds and alignment beforehand.
695    /// Most likely, you want to call `InterpCx::read_scalar` instead of this method.
696    pub fn read_scalar(
697        &self,
698        cx: &impl HasDataLayout,
699        range: AllocRange,
700        read_provenance: bool,
701    ) -> AllocResult<Scalar<Prov>> {
702        // First and foremost, if anything is uninit, bail.
703        if self.init_mask.is_range_initialized(range).is_err() {
704            return Err(AllocError::InvalidUninitBytes(None));
705        }
706
707        // Get the integer part of the result. We HAVE TO check provenance before returning this!
708        let bytes = self.get_bytes_unchecked(range);
709        let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
710
711        if read_provenance {
712            assert_eq!(range.size, cx.data_layout().pointer_size);
713
714            // When reading data with provenance, the easy case is finding provenance exactly where we
715            // are reading, then we can put data and provenance back together and return that.
716            if let Some(prov) = self.provenance.get_ptr(range.start) {
717                // Now we can return the bits, with their appropriate provenance.
718                let ptr = Pointer::new(prov, Size::from_bytes(bits));
719                return Ok(Scalar::from_pointer(ptr, cx));
720            }
721
722            // If we can work on pointers byte-wise, join the byte-wise provenances.
723            if Prov::OFFSET_IS_ADDR {
724                let mut prov = self.provenance.get(range.start, cx);
725                for offset in Size::from_bytes(1)..range.size {
726                    let this_prov = self.provenance.get(range.start + offset, cx);
727                    prov = Prov::join(prov, this_prov);
728                }
729                // Now use this provenance.
730                let ptr = Pointer::new(prov, Size::from_bytes(bits));
731                return Ok(Scalar::from_maybe_pointer(ptr, cx));
732            } else {
733                // Without OFFSET_IS_ADDR, the only remaining case we can handle is total absence of
734                // provenance.
735                if self.provenance.range_empty(range, cx) {
736                    return Ok(Scalar::from_uint(bits, range.size));
737                }
738                // Else we have mixed provenance, that doesn't work.
739                return Err(AllocError::ReadPartialPointer(range.start));
740            }
741        } else {
742            // We are *not* reading a pointer.
743            // If we can just ignore provenance or there is none, that's easy.
744            if Prov::OFFSET_IS_ADDR || self.provenance.range_empty(range, cx) {
745                // We just strip provenance.
746                return Ok(Scalar::from_uint(bits, range.size));
747            }
748            // There is some provenance and we don't have OFFSET_IS_ADDR. This doesn't work.
749            return Err(AllocError::ReadPointerAsInt(None));
750        }
751    }
752
753    /// Writes a *non-ZST* scalar.
754    ///
755    /// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
756    /// for ZSTness anyway due to integer pointers being valid for ZSTs.
757    ///
758    /// It is the caller's responsibility to check bounds and alignment beforehand.
759    /// Most likely, you want to call `InterpCx::write_scalar` instead of this method.
760    pub fn write_scalar(
761        &mut self,
762        cx: &impl HasDataLayout,
763        range: AllocRange,
764        val: Scalar<Prov>,
765    ) -> AllocResult {
766        assert!(self.mutability == Mutability::Mut);
767
768        // `to_bits_or_ptr_internal` is the right method because we just want to store this data
769        // as-is into memory. This also double-checks that `val.size()` matches `range.size`.
770        let (bytes, provenance) = match val.to_bits_or_ptr_internal(range.size)? {
771            Right(ptr) => {
772                let (provenance, offset) = ptr.into_parts();
773                (u128::from(offset.bytes()), Some(provenance))
774            }
775            Left(data) => (data, None),
776        };
777
778        let endian = cx.data_layout().endian;
779        // Yes we do overwrite all the bytes in `dst`.
780        let dst = self.get_bytes_unchecked_for_overwrite(cx, range)?;
781        write_target_uint(endian, dst, bytes).unwrap();
782
783        // See if we have to also store some provenance.
784        if let Some(provenance) = provenance {
785            assert_eq!(range.size, cx.data_layout().pointer_size);
786            self.provenance.insert_ptr(range.start, provenance, cx);
787        }
788
789        Ok(())
790    }
791
792    /// Write "uninit" to the given memory range.
793    pub fn write_uninit(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
794        self.mark_init(range, false);
795        self.provenance.clear(range, cx)?;
796        Ok(())
797    }
798
799    /// Initialize all previously uninitialized bytes in the entire allocation, and set
800    /// provenance of everything to `Wildcard`. Before calling this, make sure all
801    /// provenance in this allocation is exposed!
802    pub fn prepare_for_native_write(&mut self) -> AllocResult {
803        let full_range = AllocRange { start: Size::ZERO, size: Size::from_bytes(self.len()) };
804        // Overwrite uninitialized bytes with 0, to ensure we don't leak whatever their value happens to be.
805        for chunk in self.init_mask.range_as_init_chunks(full_range) {
806            if !chunk.is_init() {
807                let uninit_bytes = &mut self.bytes
808                    [chunk.range().start.bytes_usize()..chunk.range().end.bytes_usize()];
809                uninit_bytes.fill(0);
810            }
811        }
812        // Mark everything as initialized now.
813        self.mark_init(full_range, true);
814
815        // Set provenance of all bytes to wildcard.
816        self.provenance.write_wildcards(self.len());
817
818        // Also expose the provenance of the interpreter-level allocation, so it can
819        // be written by FFI. The `black_box` is defensive programming as LLVM likes
820        // to (incorrectly) optimize away ptr2int casts whose result is unused.
821        std::hint::black_box(self.get_bytes_unchecked_raw_mut().expose_provenance());
822
823        Ok(())
824    }
825
826    /// Remove all provenance in the given memory range.
827    pub fn clear_provenance(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
828        self.provenance.clear(range, cx)?;
829        return Ok(());
830    }
831
832    /// Applies a previously prepared provenance copy.
833    /// The affected range, as defined in the parameters to `provenance().prepare_copy` is expected
834    /// to be clear of provenance.
835    ///
836    /// This is dangerous to use as it can violate internal `Allocation` invariants!
837    /// It only exists to support an efficient implementation of `mem_copy_repeatedly`.
838    pub fn provenance_apply_copy(&mut self, copy: ProvenanceCopy<Prov>) {
839        self.provenance.apply_copy(copy)
840    }
841
842    /// Applies a previously prepared copy of the init mask.
843    ///
844    /// This is dangerous to use as it can violate internal `Allocation` invariants!
845    /// It only exists to support an efficient implementation of `mem_copy_repeatedly`.
846    pub fn init_mask_apply_copy(&mut self, copy: InitCopy, range: AllocRange, repeat: u64) {
847        self.init_mask.apply_copy(copy, range, repeat)
848    }
849}