1//! VecCache maintains a mapping from K -> (V, I) pairing. K and I must be roughly u32-sized, and V
2//! must be Copy.
3//!
4//! VecCache supports efficient concurrent put/get across the key space, with write-once semantics
5//! (i.e., a given key can only be put once). Subsequent puts will panic.
6//!
7//! This is currently used for query caching.
89use std::fmt::{self, Debug};
10use std::marker::PhantomData;
11use std::ops::{Index, IndexMut};
12use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering};
1314use rustc_index::Idx;
1516#[cfg(test)]
17mod tests;
1819struct Slot<V> {
20// We never construct &Slot<V> so it's fine for this to not be in an UnsafeCell.
21value: V,
22// This is both an index and a once-lock.
23 //
24 // 0: not yet initialized.
25 // 1: lock held, initializing.
26 // 2..u32::MAX - 2: initialized.
27index_and_lock: AtomicU32,
28}
2930/// This uniquely identifies a single `Slot<V>` entry in the buckets map, and provides accessors for
31/// either getting the value or putting a value.
32#[derive(#[automatically_derived]
impl ::core::marker::Copy for SlotIndex { }Copy, #[automatically_derived]
impl ::core::clone::Clone for SlotIndex {
#[inline]
fn clone(&self) -> SlotIndex {
let _: ::core::clone::AssertParamIsClone<BucketIndex>;
let _: ::core::clone::AssertParamIsClone<usize>;
*self
}
}Clone, #[automatically_derived]
impl ::core::fmt::Debug for SlotIndex {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "SlotIndex",
"bucket_idx", &self.bucket_idx, "index_in_bucket",
&&self.index_in_bucket)
}
}Debug)]
33struct SlotIndex {
34// the index of the bucket in VecCache (0 to 20)
35bucket_idx: BucketIndex,
36// the index of the slot within the bucket
37index_in_bucket: usize,
38}
3940// This makes sure the counts are consistent with what we allocate, precomputing each bucket a
41// compile-time. Visiting all powers of two is enough to hit all the buckets.
42//
43// We confirm counts are accurate in the slot_index_exhaustive test.
44const ENTRIES_BY_BUCKET: [usize; BUCKETS] = {
45let mut entries = [0; BUCKETS];
46let mut key = 0;
47loop {
48let si = SlotIndex::from_index(key);
49entries[si.bucket_idx.to_usize()] = si.bucket_idx.capacity();
50if key == 0 {
51key = 1;
52 } else if key == (1 << 31) {
53break;
54 } else {
55key <<= 1;
56 }
57 }
58entries59};
6061const BUCKETS: usize = 21;
6263impl SlotIndex {
64/// Unpacks a flat 32-bit index into a [`BucketIndex`] and a slot offset within that bucket.
65#[inline]
66const fn from_index(idx: u32) -> Self {
67let (bucket_idx, index_in_bucket) = BucketIndex::from_flat_index(idxas usize);
68SlotIndex { bucket_idx, index_in_bucket }
69 }
7071// SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and
72 // `self` comes from SlotIndex::from_index
73#[inline]
74unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot<V>>; 21]) -> Option<(V, u32)> {
75let bucket = &buckets[self.bucket_idx];
76let ptr = bucket.load(Ordering::Acquire);
77// Bucket is not yet initialized: then we obviously won't find this entry in that bucket.
78if ptr.is_null() {
79return None;
80 }
81if true {
if !(self.index_in_bucket < self.bucket_idx.capacity()) {
::core::panicking::panic("assertion failed: self.index_in_bucket < self.bucket_idx.capacity()")
};
};debug_assert!(self.index_in_bucket < self.bucket_idx.capacity());
82// SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
83 // must be inbounds.
84let slot = unsafe { ptr.add(self.index_in_bucket) };
8586// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
87 // AtomicU32 access.
88let index_and_lock = unsafe { &(*slot).index_and_lock };
89let current = index_and_lock.load(Ordering::Acquire);
90let index = match current {
910 => return None,
92// Treat "initializing" as actually just not initialized at all.
93 // The only reason this is a separate state is that `complete` calls could race and
94 // we can't allow that, but from load perspective there's no difference.
951 => return None,
96_ => current - 2,
97 };
9899// SAFETY:
100 // * slot is a valid pointer (buckets are always valid for the index we get).
101 // * value is initialized since we saw a >= 2 index above.
102 // * `V: Copy`, so safe to read.
103let value = unsafe { (*slot).value };
104Some((value, index))
105 }
106107fn bucket_ptr<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
108let ptr = bucket.load(Ordering::Acquire);
109if ptr.is_null() { Self::initialize_bucket(bucket, self.bucket_idx) } else { ptr }
110 }
111112#[cold]
113 #[inline(never)]
114fn initialize_bucket<V>(bucket: &AtomicPtr<Slot<V>>, bucket_idx: BucketIndex) -> *mut Slot<V> {
115static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
116117// If we are initializing the bucket, then acquire a global lock.
118 //
119 // This path is quite cold, so it's cheap to use a global lock. This ensures that we never
120 // have multiple allocations for the same bucket.
121let _allocator_guard = LOCK.lock().unwrap_or_else(|e| e.into_inner());
122123let ptr = bucket.load(Ordering::Acquire);
124125// OK, now under the allocator lock, if we're still null then it's definitely us that will
126 // initialize this bucket.
127if ptr.is_null() {
128let bucket_layout =
129 std::alloc::Layout::array::<Slot<V>>(bucket_idx.capacity()).unwrap();
130// This is more of a sanity check -- this code is very cold, so it's safe to pay a
131 // little extra cost here.
132if !(bucket_layout.size() > 0) {
::core::panicking::panic("assertion failed: bucket_layout.size() > 0")
};assert!(bucket_layout.size() > 0);
133// SAFETY: Just checked that size is non-zero.
134let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot<V>>() };
135if allocated.is_null() {
136 std::alloc::handle_alloc_error(bucket_layout);
137 }
138bucket.store(allocated, Ordering::Release);
139allocated140 } else {
141// Otherwise some other thread initialized this bucket after we took the lock. In that
142 // case, just return early.
143ptr144 }
145 }
146147/// Returns true if this successfully put into the map.
148#[inline]
149fn put<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) -> bool {
150let bucket = &buckets[self.bucket_idx];
151let ptr = self.bucket_ptr(bucket);
152153if true {
if !(self.index_in_bucket < self.bucket_idx.capacity()) {
::core::panicking::panic("assertion failed: self.index_in_bucket < self.bucket_idx.capacity()")
};
};debug_assert!(self.index_in_bucket < self.bucket_idx.capacity());
154// SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
155 // must be inbounds.
156let slot = unsafe { ptr.add(self.index_in_bucket) };
157158// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
159 // AtomicU32 access.
160let index_and_lock = unsafe { &(*slot).index_and_lock };
161match index_and_lock.compare_exchange(0, 1, Ordering::AcqRel, Ordering::Acquire) {
162Ok(_) => {
163// We have acquired the initialization lock. It is our job to write `value` and
164 // then set the lock to the real index.
165166unsafe {
167 (&raw mut (*slot).value).write(value);
168 }
169170index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
171172true
173}
174175// Treat "initializing" as the caller's fault. Callers are responsible for ensuring that
176 // there are no races on initialization. In the compiler's current usage for query
177 // caches, that's the "active query map" which ensures each query actually runs once
178 // (even if concurrently started).
179Err(1) => { ::core::panicking::panic_fmt(format_args!("caller raced calls to put()")); }panic!("caller raced calls to put()"),
180181// This slot was already populated. Also ignore, currently this is the same as
182 // "initializing".
183Err(_) => false,
184 }
185 }
186187/// Inserts into the map, given that the slot is unique, so it won't race with other threads.
188#[inline]
189unsafe fn put_unique<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) {
190let bucket = &buckets[self.bucket_idx];
191let ptr = self.bucket_ptr(bucket);
192193if true {
if !(self.index_in_bucket < self.bucket_idx.capacity()) {
::core::panicking::panic("assertion failed: self.index_in_bucket < self.bucket_idx.capacity()")
};
};debug_assert!(self.index_in_bucket < self.bucket_idx.capacity());
194// SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
195 // must be inbounds.
196let slot = unsafe { ptr.add(self.index_in_bucket) };
197198// SAFETY: We known our slot is unique as a precondition of this function, so this can't race.
199unsafe {
200 (&raw mut (*slot).value).write(value);
201 }
202203// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
204 // AtomicU32 access.
205let index_and_lock = unsafe { &(*slot).index_and_lock };
206207index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
208 }
209}
210211/// In-memory cache for queries whose keys are densely-numbered IDs
212/// (e.g `CrateNum`, `LocalDefId`), and can therefore be used as indices
213/// into a dense vector of cached values.
214///
215/// (As of [#124780] the underlying storage is not an actual `Vec`, but rather
216/// a series of increasingly-large buckets, for improved performance when the
217/// parallel frontend is using multiple threads.)
218///
219/// Each entry in the cache stores the query's return value (`V`), and also
220/// an associated index (`I`), which in practice is a `DepNodeIndex` used for
221/// query dependency tracking.
222///
223/// [#124780]: https://github.com/rust-lang/rust/pull/124780
224pub struct VecCache<K: Idx, V, I> {
225// Entries per bucket:
226 // Bucket 0: 4096 2^12
227 // Bucket 1: 4096 2^12
228 // Bucket 2: 8192
229 // Bucket 3: 16384
230 // ...
231 // Bucket 19: 1073741824
232 // Bucket 20: 2147483648
233 // The total number of entries if all buckets are initialized is 2^32.
234buckets: [AtomicPtr<Slot<V>>; BUCKETS],
235236// In the compiler's current usage these are only *read* during incremental and self-profiling.
237 // They are an optimization over iterating the full buckets array.
238present: [AtomicPtr<Slot<()>>; BUCKETS],
239 len: AtomicUsize,
240241 key: PhantomData<(K, I)>,
242}
243244impl<K: Idx, V, I> Defaultfor VecCache<K, V, I> {
245fn default() -> Self {
246VecCache {
247 buckets: Default::default(),
248 key: PhantomData,
249 len: Default::default(),
250 present: Default::default(),
251 }
252 }
253}
254255// SAFETY: No access to `V` is made.
256unsafe impl<K: Idx, #[may_dangle] V, I> Dropfor VecCache<K, V, I> {
257fn drop(&mut self) {
258// We have unique ownership, so no locks etc. are needed. Since `K` and `V` are both `Copy`,
259 // we are also guaranteed to just need to deallocate any large arrays (not iterate over
260 // contents).
261 //
262 // Confirm no need to deallocate individual entries. Note that `V: Copy` is asserted on
263 // insert/lookup but not necessarily construction, primarily to avoid annoyingly propagating
264 // the bounds into struct definitions everywhere.
265if !!std::mem::needs_drop::<K>() {
::core::panicking::panic("assertion failed: !std::mem::needs_drop::<K>()")
};assert!(!std::mem::needs_drop::<K>());
266if !!std::mem::needs_drop::<V>() {
::core::panicking::panic("assertion failed: !std::mem::needs_drop::<V>()")
};assert!(!std::mem::needs_drop::<V>());
267268for (idx, bucket) in BucketIndex::enumerate_buckets(&self.buckets) {
269let bucket = bucket.load(Ordering::Acquire);
270if !bucket.is_null() {
271let layout = std::alloc::Layout::array::<Slot<V>>(ENTRIES_BY_BUCKET[idx]).unwrap();
272unsafe {
273 std::alloc::dealloc(bucket.cast(), layout);
274 }
275 }
276 }
277278for (idx, bucket) in BucketIndex::enumerate_buckets(&self.present) {
279let bucket = bucket.load(Ordering::Acquire);
280if !bucket.is_null() {
281let layout = std::alloc::Layout::array::<Slot<()>>(ENTRIES_BY_BUCKET[idx]).unwrap();
282unsafe {
283 std::alloc::dealloc(bucket.cast(), layout);
284 }
285 }
286 }
287 }
288}
289290impl<K, V, I> VecCache<K, V, I>
291where
292K: Eq + Idx + Copy + Debug,
293 V: Copy,
294 I: Idx + Copy,
295{
296#[inline(always)]
297pub fn lookup(&self, key: &K) -> Option<(V, I)> {
298let key = u32::try_from(key.index()).unwrap();
299let slot_idx = SlotIndex::from_index(key);
300match unsafe { slot_idx.get(&self.buckets) } {
301Some((value, idx)) => Some((value, I::new(idxas usize))),
302None => None,
303 }
304 }
305306#[inline]
307pub fn complete(&self, key: K, value: V, index: I) {
308let key = u32::try_from(key.index()).unwrap();
309let slot_idx = SlotIndex::from_index(key);
310if slot_idx.put(&self.buckets, value, index.index() as u32) {
311let present_idx = self.len.fetch_add(1, Ordering::Relaxed);
312let slot = SlotIndex::from_index(u32::try_from(present_idx).unwrap());
313// SAFETY: We should always be uniquely putting due to `len` fetch_add returning unique values.
314 // We can't get here if `len` overflows because `put` will not succeed u32::MAX + 1 times
315 // as it will run out of slots.
316unsafe { slot.put_unique(&self.present, (), key) };
317 }
318 }
319320pub fn for_each(&self, f: &mut dyn FnMut(&K, &V, I)) {
321for idx in 0..self.len.load(Ordering::Acquire) {
322let key = SlotIndex::from_index(idx as u32);
323match unsafe { key.get(&self.present) } {
324// This shouldn't happen in our current usage (iter is really only
325 // used long after queries are done running), but if we hit this in practice it's
326 // probably fine to just break early.
327None => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
328Some(((), key)) => {
329let key = K::new(key as usize);
330// unwrap() is OK: present entries are always written only after we put the real
331 // entry.
332let value = self.lookup(&key).unwrap();
333 f(&key, &value.0, value.1);
334 }
335 }
336 }
337 }
338339pub fn len(&self) -> usize {
340self.len.load(Ordering::Acquire)
341 }
342}
343344/// Index into an array of buckets.
345///
346/// Using an enum lets us tell the compiler that values range from 0 to 20,
347/// allowing array bounds checks to be optimized away,
348/// without having to resort to pattern types or other unstable features.
349#[derive(#[automatically_derived]
impl ::core::clone::Clone for BucketIndex {
#[inline]
fn clone(&self) -> BucketIndex { *self }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for BucketIndex { }Copy, #[automatically_derived]
impl ::core::cmp::PartialEq for BucketIndex {
#[inline]
fn eq(&self, other: &BucketIndex) -> bool {
let __self_discr = ::core::intrinsics::discriminant_value(self);
let __arg1_discr = ::core::intrinsics::discriminant_value(other);
__self_discr == __arg1_discr
}
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for BucketIndex {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_fields_are_eq(&self) {}
}Eq)]
350#[repr(usize)]
351enum BucketIndex {
352// tidy-alphabetical-start
353Bucket00,
354 Bucket01,
355 Bucket02,
356 Bucket03,
357 Bucket04,
358 Bucket05,
359 Bucket06,
360 Bucket07,
361 Bucket08,
362 Bucket09,
363 Bucket10,
364 Bucket11,
365 Bucket12,
366 Bucket13,
367 Bucket14,
368 Bucket15,
369 Bucket16,
370 Bucket17,
371 Bucket18,
372 Bucket19,
373 Bucket20,
374// tidy-alphabetical-end
375}
376377impl Debugfor BucketIndex {
378fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
379 Debug::fmt(&self.to_usize(), f)
380 }
381}
382383impl BucketIndex {
384/// Capacity of bucket 0 (and also of bucket 1).
385const BUCKET_0_CAPACITY: usize = 1 << (Self::NONZERO_BUCKET_SHIFT_ADJUST + 1);
386/// Adjustment factor from the highest-set-bit-position of a flat index,
387 /// to its corresponding bucket number.
388 ///
389 /// For example, the first flat-index in bucket 2 is 8192.
390 /// Its highest-set-bit-position is `(8192).ilog2() == 13`, and subtracting
391 /// the adjustment factor of 11 gives the bucket number of 2.
392const NONZERO_BUCKET_SHIFT_ADJUST: usize = 11;
393394#[inline(always)]
395const fn to_usize(self) -> usize {
396selfas usize397 }
398399#[inline(always)]
400const fn from_raw(raw: usize) -> Self {
401match raw {
402// tidy-alphabetical-start
40300 => Self::Bucket00,
40401 => Self::Bucket01,
40502 => Self::Bucket02,
40603 => Self::Bucket03,
40704 => Self::Bucket04,
40805 => Self::Bucket05,
40906 => Self::Bucket06,
41007 => Self::Bucket07,
41108 => Self::Bucket08,
41209 => Self::Bucket09,
41310 => Self::Bucket10,
41411 => Self::Bucket11,
41512 => Self::Bucket12,
41613 => Self::Bucket13,
41714 => Self::Bucket14,
41815 => Self::Bucket15,
41916 => Self::Bucket16,
42017 => Self::Bucket17,
42118 => Self::Bucket18,
42219 => Self::Bucket19,
42320 => Self::Bucket20,
424// tidy-alphabetical-end
425_ => { ::core::panicking::panic_fmt(format_args!("bucket index out of range")); }panic!("bucket index out of range"),
426 }
427 }
428429/// Total number of slots in this bucket.
430#[inline(always)]
431const fn capacity(self) -> usize {
432match self {
433Self::Bucket00 => Self::BUCKET_0_CAPACITY,
434// Bucket 1 has a capacity of `1 << (1 + 11) == pow(2, 12) == 4096`.
435 // Bucket 2 has a capacity of `1 << (2 + 11) == pow(2, 13) == 8192`.
436_ => 1 << (self.to_usize() + Self::NONZERO_BUCKET_SHIFT_ADJUST),
437 }
438 }
439440/// Converts a flat index in the range `0..=u32::MAX` into a bucket index,
441 /// and a slot offset within that bucket.
442 ///
443 /// Panics if `flat > u32::MAX`.
444#[inline(always)]
445const fn from_flat_index(flat: usize) -> (Self, usize) {
446if flat > u32::MAXas usize {
447::core::panicking::panic("explicit panic");panic!();
448 }
449450// If the index is in bucket 0, the conversion is trivial.
451 // This also avoids calling `ilog2` when `flat == 0`.
452if flat < Self::BUCKET_0_CAPACITY {
453return (Self::Bucket00, flat);
454 }
455456// General-case conversion for a non-zero bucket index.
457 //
458 // | bucket | slot
459 // flat | ilog2 | index | offset
460 // ------------------------------
461 // 4096 | 12 | 1 | 0
462 // 4097 | 12 | 1 | 1
463 // ...
464 // 8191 | 12 | 1 | 4095
465 // 8192 | 13 | 2 | 0
466let highest_bit_pos = flat.ilog2() as usize;
467let bucket_index =
468BucketIndex::from_raw(highest_bit_pos - Self::NONZERO_BUCKET_SHIFT_ADJUST);
469470// Clear the highest-set bit (which selects the bucket) to get the
471 // slot offset within this bucket.
472let slot_offset = flat - (1 << highest_bit_pos);
473474 (bucket_index, slot_offset)
475 }
476477#[inline(always)]
478fn iter_all() -> impl ExactSizeIterator<Item = Self> {
479 (0usize..BUCKETS).map(BucketIndex::from_raw)
480 }
481482#[inline(always)]
483fn enumerate_buckets<T>(buckets: &[T; BUCKETS]) -> impl ExactSizeIterator<Item = (Self, &T)> {
484BucketIndex::iter_all().zip(buckets)
485 }
486}
487488impl<T> Index<BucketIndex> for [T; BUCKETS] {
489type Output = T;
490491#[inline(always)]
492fn index(&self, index: BucketIndex) -> &Self::Output {
493// The optimizer should be able to see that see that a bucket index is
494 // always in-bounds, and omit the runtime bounds check.
495&self[index.to_usize()]
496 }
497}
498499impl<T> IndexMut<BucketIndex> for [T; BUCKETS] {
500#[inline(always)]
501fn index_mut(&mut self, index: BucketIndex) -> &mut Self::Output {
502&mut self[index.to_usize()]
503 }
504}