Skip to main content

rustc_data_structures/
vec_cache.rs

1//! VecCache maintains a mapping from K -> (V, I) pairing. K and I must be roughly u32-sized, and V
2//! must be Copy.
3//!
4//! VecCache supports efficient concurrent put/get across the key space, with write-once semantics
5//! (i.e., a given key can only be put once). Subsequent puts will panic.
6//!
7//! This is currently used for query caching.
8
9use std::fmt::Debug;
10use std::marker::PhantomData;
11use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering};
12
13use rustc_index::Idx;
14
15struct Slot<V> {
16    // We never construct &Slot<V> so it's fine for this to not be in an UnsafeCell.
17    value: V,
18    // This is both an index and a once-lock.
19    //
20    // 0: not yet initialized.
21    // 1: lock held, initializing.
22    // 2..u32::MAX - 2: initialized.
23    index_and_lock: AtomicU32,
24}
25
26/// This uniquely identifies a single `Slot<V>` entry in the buckets map, and provides accessors for
27/// either getting the value or putting a value.
28#[derive(#[automatically_derived]
impl ::core::marker::Copy for SlotIndex { }Copy, #[automatically_derived]
impl ::core::clone::Clone for SlotIndex {
    #[inline]
    fn clone(&self) -> SlotIndex {
        let _: ::core::clone::AssertParamIsClone<usize>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::fmt::Debug for SlotIndex {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field2_finish(f, "SlotIndex",
            "bucket_idx", &self.bucket_idx, "index_in_bucket",
            &&self.index_in_bucket)
    }
}Debug)]
29struct SlotIndex {
30    // the index of the bucket in VecCache (0 to 20)
31    bucket_idx: usize,
32    // the index of the slot within the bucket
33    index_in_bucket: usize,
34}
35
36// This makes sure the counts are consistent with what we allocate, precomputing each bucket a
37// compile-time. Visiting all powers of two is enough to hit all the buckets.
38//
39// We confirm counts are accurate in the slot_index_exhaustive test.
40const ENTRIES_BY_BUCKET: [usize; BUCKETS] = {
41    let mut entries = [0; BUCKETS];
42    let mut key = 0;
43    loop {
44        let si = SlotIndex::from_index(key);
45        entries[si.bucket_idx] = si.entries();
46        if key == 0 {
47            key = 1;
48        } else if key == (1 << 31) {
49            break;
50        } else {
51            key <<= 1;
52        }
53    }
54    entries
55};
56
57const BUCKETS: usize = 21;
58
59impl SlotIndex {
60    /// The total possible number of entries in the bucket
61    const fn entries(&self) -> usize {
62        if self.bucket_idx == 0 { 1 << 12 } else { 1 << (self.bucket_idx + 11) }
63    }
64
65    // This unpacks a flat u32 index into identifying which bucket it belongs to and the offset
66    // within that bucket. As noted in the VecCache docs, buckets double in size with each index.
67    // Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce
68    // the size of the VecCache struct and avoid uselessly small allocations, we instead have the
69    // first bucket have 2**12 entries. To simplify the math, the second bucket also 2**12 entries,
70    // and buckets double from there.
71    //
72    // We assert that [0, 2**32 - 1] uniquely map through this function to individual, consecutive
73    // slots (see `slot_index_exhaustive` in tests).
74    #[inline]
75    const fn from_index(idx: u32) -> Self {
76        const FIRST_BUCKET_SHIFT: usize = 12;
77        if idx < (1 << FIRST_BUCKET_SHIFT) {
78            return SlotIndex { bucket_idx: 0, index_in_bucket: idx as usize };
79        }
80        // We already ruled out idx 0, so this `ilog2` never panics (and the check optimizes away)
81        let bucket = idx.ilog2() as usize;
82        let entries = 1 << bucket;
83        SlotIndex {
84            bucket_idx: bucket - FIRST_BUCKET_SHIFT + 1,
85            index_in_bucket: idx as usize - entries,
86        }
87    }
88
89    // SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and
90    // `self` comes from SlotIndex::from_index
91    #[inline]
92    unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot<V>>; 21]) -> Option<(V, u32)> {
93        // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
94        // in-bounds of buckets. See `from_index` for computation.
95        let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
96        let ptr = bucket.load(Ordering::Acquire);
97        // Bucket is not yet initialized: then we obviously won't find this entry in that bucket.
98        if ptr.is_null() {
99            return None;
100        }
101        if true {
    if !(self.index_in_bucket < self.entries()) {
        ::core::panicking::panic("assertion failed: self.index_in_bucket < self.entries()")
    };
};debug_assert!(self.index_in_bucket < self.entries());
102        // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
103        // must be inbounds.
104        let slot = unsafe { ptr.add(self.index_in_bucket) };
105
106        // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
107        // AtomicU32 access.
108        let index_and_lock = unsafe { &(*slot).index_and_lock };
109        let current = index_and_lock.load(Ordering::Acquire);
110        let index = match current {
111            0 => return None,
112            // Treat "initializing" as actually just not initialized at all.
113            // The only reason this is a separate state is that `complete` calls could race and
114            // we can't allow that, but from load perspective there's no difference.
115            1 => return None,
116            _ => current - 2,
117        };
118
119        // SAFETY:
120        // * slot is a valid pointer (buckets are always valid for the index we get).
121        // * value is initialized since we saw a >= 2 index above.
122        // * `V: Copy`, so safe to read.
123        let value = unsafe { (*slot).value };
124        Some((value, index))
125    }
126
127    fn bucket_ptr<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
128        let ptr = bucket.load(Ordering::Acquire);
129        if ptr.is_null() { Self::initialize_bucket(bucket, self.bucket_idx) } else { ptr }
130    }
131
132    #[cold]
133    #[inline(never)]
134    fn initialize_bucket<V>(bucket: &AtomicPtr<Slot<V>>, bucket_idx: usize) -> *mut Slot<V> {
135        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
136
137        // If we are initializing the bucket, then acquire a global lock.
138        //
139        // This path is quite cold, so it's cheap to use a global lock. This ensures that we never
140        // have multiple allocations for the same bucket.
141        let _allocator_guard = LOCK.lock().unwrap_or_else(|e| e.into_inner());
142
143        let ptr = bucket.load(Ordering::Acquire);
144
145        // OK, now under the allocator lock, if we're still null then it's definitely us that will
146        // initialize this bucket.
147        if ptr.is_null() {
148            let bucket_len = SlotIndex { bucket_idx, index_in_bucket: 0 }.entries();
149            let bucket_layout = std::alloc::Layout::array::<Slot<V>>(bucket_len).unwrap();
150            // This is more of a sanity check -- this code is very cold, so it's safe to pay a
151            // little extra cost here.
152            if !(bucket_layout.size() > 0) {
    ::core::panicking::panic("assertion failed: bucket_layout.size() > 0")
};assert!(bucket_layout.size() > 0);
153            // SAFETY: Just checked that size is non-zero.
154            let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot<V>>() };
155            if allocated.is_null() {
156                std::alloc::handle_alloc_error(bucket_layout);
157            }
158            bucket.store(allocated, Ordering::Release);
159            allocated
160        } else {
161            // Otherwise some other thread initialized this bucket after we took the lock. In that
162            // case, just return early.
163            ptr
164        }
165    }
166
167    /// Returns true if this successfully put into the map.
168    #[inline]
169    fn put<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) -> bool {
170        // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
171        // in-bounds of buckets.
172        let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
173        let ptr = self.bucket_ptr(bucket);
174
175        if true {
    if !(self.index_in_bucket < self.entries()) {
        ::core::panicking::panic("assertion failed: self.index_in_bucket < self.entries()")
    };
};debug_assert!(self.index_in_bucket < self.entries());
176        // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
177        // must be inbounds.
178        let slot = unsafe { ptr.add(self.index_in_bucket) };
179
180        // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
181        // AtomicU32 access.
182        let index_and_lock = unsafe { &(*slot).index_and_lock };
183        match index_and_lock.compare_exchange(0, 1, Ordering::AcqRel, Ordering::Acquire) {
184            Ok(_) => {
185                // We have acquired the initialization lock. It is our job to write `value` and
186                // then set the lock to the real index.
187
188                unsafe {
189                    (&raw mut (*slot).value).write(value);
190                }
191
192                index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
193
194                true
195            }
196
197            // Treat "initializing" as the caller's fault. Callers are responsible for ensuring that
198            // there are no races on initialization. In the compiler's current usage for query
199            // caches, that's the "active query map" which ensures each query actually runs once
200            // (even if concurrently started).
201            Err(1) => { ::core::panicking::panic_fmt(format_args!("caller raced calls to put()")); }panic!("caller raced calls to put()"),
202
203            // This slot was already populated. Also ignore, currently this is the same as
204            // "initializing".
205            Err(_) => false,
206        }
207    }
208
209    /// Inserts into the map, given that the slot is unique, so it won't race with other threads.
210    #[inline]
211    unsafe fn put_unique<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) {
212        // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
213        // in-bounds of buckets.
214        let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
215        let ptr = self.bucket_ptr(bucket);
216
217        if true {
    if !(self.index_in_bucket < self.entries()) {
        ::core::panicking::panic("assertion failed: self.index_in_bucket < self.entries()")
    };
};debug_assert!(self.index_in_bucket < self.entries());
218        // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
219        // must be inbounds.
220        let slot = unsafe { ptr.add(self.index_in_bucket) };
221
222        // SAFETY: We known our slot is unique as a precondition of this function, so this can't race.
223        unsafe {
224            (&raw mut (*slot).value).write(value);
225        }
226
227        // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
228        // AtomicU32 access.
229        let index_and_lock = unsafe { &(*slot).index_and_lock };
230
231        index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
232    }
233}
234
235/// In-memory cache for queries whose keys are densely-numbered IDs
236/// (e.g `CrateNum`, `LocalDefId`), and can therefore be used as indices
237/// into a dense vector of cached values.
238///
239/// (As of [#124780] the underlying storage is not an actual `Vec`, but rather
240/// a series of increasingly-large buckets, for improved performance when the
241/// parallel frontend is using multiple threads.)
242///
243/// Each entry in the cache stores the query's return value (`V`), and also
244/// an associated index (`I`), which in practice is a `DepNodeIndex` used for
245/// query dependency tracking.
246///
247/// [#124780]: https://github.com/rust-lang/rust/pull/124780
248pub struct VecCache<K: Idx, V, I> {
249    // Entries per bucket:
250    // Bucket  0:       4096 2^12
251    // Bucket  1:       4096 2^12
252    // Bucket  2:       8192
253    // Bucket  3:      16384
254    // ...
255    // Bucket 19: 1073741824
256    // Bucket 20: 2147483648
257    // The total number of entries if all buckets are initialized is u32::MAX-1.
258    buckets: [AtomicPtr<Slot<V>>; BUCKETS],
259
260    // In the compiler's current usage these are only *read* during incremental and self-profiling.
261    // They are an optimization over iterating the full buckets array.
262    present: [AtomicPtr<Slot<()>>; BUCKETS],
263    len: AtomicUsize,
264
265    key: PhantomData<(K, I)>,
266}
267
268impl<K: Idx, V, I> Default for VecCache<K, V, I> {
269    fn default() -> Self {
270        VecCache {
271            buckets: Default::default(),
272            key: PhantomData,
273            len: Default::default(),
274            present: Default::default(),
275        }
276    }
277}
278
279// SAFETY: No access to `V` is made.
280unsafe impl<K: Idx, #[may_dangle] V, I> Drop for VecCache<K, V, I> {
281    fn drop(&mut self) {
282        // We have unique ownership, so no locks etc. are needed. Since `K` and `V` are both `Copy`,
283        // we are also guaranteed to just need to deallocate any large arrays (not iterate over
284        // contents).
285        //
286        // Confirm no need to deallocate individual entries. Note that `V: Copy` is asserted on
287        // insert/lookup but not necessarily construction, primarily to avoid annoyingly propagating
288        // the bounds into struct definitions everywhere.
289        if !!std::mem::needs_drop::<K>() {
    ::core::panicking::panic("assertion failed: !std::mem::needs_drop::<K>()")
};assert!(!std::mem::needs_drop::<K>());
290        if !!std::mem::needs_drop::<V>() {
    ::core::panicking::panic("assertion failed: !std::mem::needs_drop::<V>()")
};assert!(!std::mem::needs_drop::<V>());
291
292        for (idx, bucket) in self.buckets.iter().enumerate() {
293            let bucket = bucket.load(Ordering::Acquire);
294            if !bucket.is_null() {
295                let layout = std::alloc::Layout::array::<Slot<V>>(ENTRIES_BY_BUCKET[idx]).unwrap();
296                unsafe {
297                    std::alloc::dealloc(bucket.cast(), layout);
298                }
299            }
300        }
301
302        for (idx, bucket) in self.present.iter().enumerate() {
303            let bucket = bucket.load(Ordering::Acquire);
304            if !bucket.is_null() {
305                let layout = std::alloc::Layout::array::<Slot<()>>(ENTRIES_BY_BUCKET[idx]).unwrap();
306                unsafe {
307                    std::alloc::dealloc(bucket.cast(), layout);
308                }
309            }
310        }
311    }
312}
313
314impl<K, V, I> VecCache<K, V, I>
315where
316    K: Eq + Idx + Copy + Debug,
317    V: Copy,
318    I: Idx + Copy,
319{
320    #[inline(always)]
321    pub fn lookup(&self, key: &K) -> Option<(V, I)> {
322        let key = u32::try_from(key.index()).unwrap();
323        let slot_idx = SlotIndex::from_index(key);
324        match unsafe { slot_idx.get(&self.buckets) } {
325            Some((value, idx)) => Some((value, I::new(idx as usize))),
326            None => None,
327        }
328    }
329
330    #[inline]
331    pub fn complete(&self, key: K, value: V, index: I) {
332        let key = u32::try_from(key.index()).unwrap();
333        let slot_idx = SlotIndex::from_index(key);
334        if slot_idx.put(&self.buckets, value, index.index() as u32) {
335            let present_idx = self.len.fetch_add(1, Ordering::Relaxed);
336            let slot = SlotIndex::from_index(u32::try_from(present_idx).unwrap());
337            // SAFETY: We should always be uniquely putting due to `len` fetch_add returning unique values.
338            // We can't get here if `len` overflows because `put` will not succeed u32::MAX + 1 times
339            // as it will run out of slots.
340            unsafe { slot.put_unique(&self.present, (), key) };
341        }
342    }
343
344    pub fn iter(&self, f: &mut dyn FnMut(&K, &V, I)) {
345        for idx in 0..self.len.load(Ordering::Acquire) {
346            let key = SlotIndex::from_index(idx as u32);
347            match unsafe { key.get(&self.present) } {
348                // This shouldn't happen in our current usage (iter is really only
349                // used long after queries are done running), but if we hit this in practice it's
350                // probably fine to just break early.
351                None => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
352                Some(((), key)) => {
353                    let key = K::new(key as usize);
354                    // unwrap() is OK: present entries are always written only after we put the real
355                    // entry.
356                    let value = self.lookup(&key).unwrap();
357                    f(&key, &value.0, value.1);
358                }
359            }
360        }
361    }
362}
363
364#[cfg(test)]
365mod tests;