rustc_data_structures/vec_cache.rs
1//! VecCache maintains a mapping from K -> (V, I) pairing. K and I must be roughly u32-sized, and V
2//! must be Copy.
3//!
4//! VecCache supports efficient concurrent put/get across the key space, with write-once semantics
5//! (i.e., a given key can only be put once). Subsequent puts will panic.
6//!
7//! This is currently used for query caching.
8
9use std::fmt::Debug;
10use std::marker::PhantomData;
11use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering};
12
13use rustc_index::Idx;
14
15struct Slot<V> {
16 // We never construct &Slot<V> so it's fine for this to not be in an UnsafeCell.
17 value: V,
18 // This is both an index and a once-lock.
19 //
20 // 0: not yet initialized.
21 // 1: lock held, initializing.
22 // 2..u32::MAX - 2: initialized.
23 index_and_lock: AtomicU32,
24}
25
26/// This uniquely identifies a single `Slot<V>` entry in the buckets map, and provides accessors for
27/// either getting the value or putting a value.
28#[derive(Copy, Clone, Debug)]
29struct SlotIndex {
30 // the index of the bucket in VecCache (0 to 20)
31 bucket_idx: usize,
32 // number of entries in that bucket
33 entries: usize,
34 // the index of the slot within the bucket
35 index_in_bucket: usize,
36}
37
38// This makes sure the counts are consistent with what we allocate, precomputing each bucket a
39// compile-time. Visiting all powers of two is enough to hit all the buckets.
40//
41// We confirm counts are accurate in the slot_index_exhaustive test.
42const ENTRIES_BY_BUCKET: [usize; 21] = {
43 let mut entries = [0; 21];
44 let mut key = 0;
45 loop {
46 let si = SlotIndex::from_index(key);
47 entries[si.bucket_idx] = si.entries;
48 if key == 0 {
49 key = 1;
50 } else if key == (1 << 31) {
51 break;
52 } else {
53 key <<= 1;
54 }
55 }
56 entries
57};
58
59impl SlotIndex {
60 // This unpacks a flat u32 index into identifying which bucket it belongs to and the offset
61 // within that bucket. As noted in the VecCache docs, buckets double in size with each index.
62 // Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce
63 // the size of the VecCache struct and avoid uselessly small allocations, we instead have the
64 // first bucket have 2**12 entries. To simplify the math, the second bucket also 2**12 entries,
65 // and buckets double from there.
66 //
67 // We assert that [0, 2**32 - 1] uniquely map through this function to individual, consecutive
68 // slots (see `slot_index_exhaustive` in tests).
69 #[inline]
70 const fn from_index(idx: u32) -> Self {
71 let mut bucket = match idx.checked_ilog2() {
72 Some(x) => x as usize,
73 None => 0,
74 };
75 let entries;
76 let running_sum;
77 if bucket <= 11 {
78 entries = 1 << 12;
79 running_sum = 0;
80 bucket = 0;
81 } else {
82 entries = 1 << bucket;
83 running_sum = entries;
84 bucket = bucket - 11;
85 }
86 SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }
87 }
88
89 // SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and
90 // `self` comes from SlotIndex::from_index
91 #[inline]
92 unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot<V>>; 21]) -> Option<(V, u32)> {
93 // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
94 // in-bounds of buckets. See `from_index` for computation.
95 let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
96 let ptr = bucket.load(Ordering::Acquire);
97 // Bucket is not yet initialized: then we obviously won't find this entry in that bucket.
98 if ptr.is_null() {
99 return None;
100 }
101 assert!(self.index_in_bucket < self.entries);
102 // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
103 // must be inbounds.
104 let slot = unsafe { ptr.add(self.index_in_bucket) };
105
106 // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
107 // AtomicU32 access.
108 let index_and_lock = unsafe { &(*slot).index_and_lock };
109 let current = index_and_lock.load(Ordering::Acquire);
110 let index = match current {
111 0 => return None,
112 // Treat "initializing" as actually just not initialized at all.
113 // The only reason this is a separate state is that `complete` calls could race and
114 // we can't allow that, but from load perspective there's no difference.
115 1 => return None,
116 _ => current - 2,
117 };
118
119 // SAFETY:
120 // * slot is a valid pointer (buckets are always valid for the index we get).
121 // * value is initialized since we saw a >= 2 index above.
122 // * `V: Copy`, so safe to read.
123 let value = unsafe { (*slot).value };
124 Some((value, index))
125 }
126
127 fn bucket_ptr<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
128 let ptr = bucket.load(Ordering::Acquire);
129 if ptr.is_null() { self.initialize_bucket(bucket) } else { ptr }
130 }
131
132 #[cold]
133 fn initialize_bucket<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
134 static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
135
136 // If we are initializing the bucket, then acquire a global lock.
137 //
138 // This path is quite cold, so it's cheap to use a global lock. This ensures that we never
139 // have multiple allocations for the same bucket.
140 let _allocator_guard = LOCK.lock().unwrap_or_else(|e| e.into_inner());
141
142 let ptr = bucket.load(Ordering::Acquire);
143
144 // OK, now under the allocator lock, if we're still null then it's definitely us that will
145 // initialize this bucket.
146 if ptr.is_null() {
147 let bucket_layout =
148 std::alloc::Layout::array::<Slot<V>>(self.entries as usize).unwrap();
149 // This is more of a sanity check -- this code is very cold, so it's safe to pay a
150 // little extra cost here.
151 assert!(bucket_layout.size() > 0);
152 // SAFETY: Just checked that size is non-zero.
153 let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot<V>>() };
154 if allocated.is_null() {
155 std::alloc::handle_alloc_error(bucket_layout);
156 }
157 bucket.store(allocated, Ordering::Release);
158 allocated
159 } else {
160 // Otherwise some other thread initialized this bucket after we took the lock. In that
161 // case, just return early.
162 ptr
163 }
164 }
165
166 /// Returns true if this successfully put into the map.
167 #[inline]
168 fn put<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) -> bool {
169 // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
170 // in-bounds of buckets.
171 let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
172 let ptr = self.bucket_ptr(bucket);
173
174 assert!(self.index_in_bucket < self.entries);
175 // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
176 // must be inbounds.
177 let slot = unsafe { ptr.add(self.index_in_bucket) };
178
179 // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
180 // AtomicU32 access.
181 let index_and_lock = unsafe { &(*slot).index_and_lock };
182 match index_and_lock.compare_exchange(0, 1, Ordering::AcqRel, Ordering::Acquire) {
183 Ok(_) => {
184 // We have acquired the initialization lock. It is our job to write `value` and
185 // then set the lock to the real index.
186
187 unsafe {
188 (&raw mut (*slot).value).write(value);
189 }
190
191 index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
192
193 true
194 }
195
196 // Treat "initializing" as the caller's fault. Callers are responsible for ensuring that
197 // there are no races on initialization. In the compiler's current usage for query
198 // caches, that's the "active query map" which ensures each query actually runs once
199 // (even if concurrently started).
200 Err(1) => panic!("caller raced calls to put()"),
201
202 // This slot was already populated. Also ignore, currently this is the same as
203 // "initializing".
204 Err(_) => false,
205 }
206 }
207}
208
209/// In-memory cache for queries whose keys are densely-numbered IDs
210/// (e.g `CrateNum`, `LocalDefId`), and can therefore be used as indices
211/// into a dense vector of cached values.
212///
213/// (As of [#124780] the underlying storage is not an actual `Vec`, but rather
214/// a series of increasingly-large buckets, for improved performance when the
215/// parallel frontend is using multiple threads.)
216///
217/// Each entry in the cache stores the query's return value (`V`), and also
218/// an associated index (`I`), which in practice is a `DepNodeIndex` used for
219/// query dependency tracking.
220///
221/// [#124780]: https://github.com/rust-lang/rust/pull/124780
222pub struct VecCache<K: Idx, V, I> {
223 // Entries per bucket:
224 // Bucket 0: 4096 2^12
225 // Bucket 1: 4096 2^12
226 // Bucket 2: 8192
227 // Bucket 3: 16384
228 // ...
229 // Bucket 19: 1073741824
230 // Bucket 20: 2147483648
231 // The total number of entries if all buckets are initialized is u32::MAX-1.
232 buckets: [AtomicPtr<Slot<V>>; 21],
233
234 // In the compiler's current usage these are only *read* during incremental and self-profiling.
235 // They are an optimization over iterating the full buckets array.
236 present: [AtomicPtr<Slot<()>>; 21],
237 len: AtomicUsize,
238
239 key: PhantomData<(K, I)>,
240}
241
242impl<K: Idx, V, I> Default for VecCache<K, V, I> {
243 fn default() -> Self {
244 VecCache {
245 buckets: Default::default(),
246 key: PhantomData,
247 len: Default::default(),
248 present: Default::default(),
249 }
250 }
251}
252
253// SAFETY: No access to `V` is made.
254unsafe impl<K: Idx, #[may_dangle] V, I> Drop for VecCache<K, V, I> {
255 fn drop(&mut self) {
256 // We have unique ownership, so no locks etc. are needed. Since `K` and `V` are both `Copy`,
257 // we are also guaranteed to just need to deallocate any large arrays (not iterate over
258 // contents).
259 //
260 // Confirm no need to deallocate invidual entries. Note that `V: Copy` is asserted on
261 // insert/lookup but not necessarily construction, primarily to avoid annoyingly propagating
262 // the bounds into struct definitions everywhere.
263 assert!(!std::mem::needs_drop::<K>());
264 assert!(!std::mem::needs_drop::<V>());
265
266 for (idx, bucket) in self.buckets.iter().enumerate() {
267 let bucket = bucket.load(Ordering::Acquire);
268 if !bucket.is_null() {
269 let layout = std::alloc::Layout::array::<Slot<V>>(ENTRIES_BY_BUCKET[idx]).unwrap();
270 unsafe {
271 std::alloc::dealloc(bucket.cast(), layout);
272 }
273 }
274 }
275
276 for (idx, bucket) in self.present.iter().enumerate() {
277 let bucket = bucket.load(Ordering::Acquire);
278 if !bucket.is_null() {
279 let layout = std::alloc::Layout::array::<Slot<()>>(ENTRIES_BY_BUCKET[idx]).unwrap();
280 unsafe {
281 std::alloc::dealloc(bucket.cast(), layout);
282 }
283 }
284 }
285 }
286}
287
288impl<K, V, I> VecCache<K, V, I>
289where
290 K: Eq + Idx + Copy + Debug,
291 V: Copy,
292 I: Idx + Copy,
293{
294 #[inline(always)]
295 pub fn lookup(&self, key: &K) -> Option<(V, I)> {
296 let key = u32::try_from(key.index()).unwrap();
297 let slot_idx = SlotIndex::from_index(key);
298 match unsafe { slot_idx.get(&self.buckets) } {
299 Some((value, idx)) => Some((value, I::new(idx as usize))),
300 None => None,
301 }
302 }
303
304 #[inline]
305 pub fn complete(&self, key: K, value: V, index: I) {
306 let key = u32::try_from(key.index()).unwrap();
307 let slot_idx = SlotIndex::from_index(key);
308 if slot_idx.put(&self.buckets, value, index.index() as u32) {
309 let present_idx = self.len.fetch_add(1, Ordering::Relaxed);
310 let slot = SlotIndex::from_index(present_idx as u32);
311 // We should always be uniquely putting due to `len` fetch_add returning unique values.
312 assert!(slot.put(&self.present, (), key));
313 }
314 }
315
316 pub fn iter(&self, f: &mut dyn FnMut(&K, &V, I)) {
317 for idx in 0..self.len.load(Ordering::Acquire) {
318 let key = SlotIndex::from_index(idx as u32);
319 match unsafe { key.get(&self.present) } {
320 // This shouldn't happen in our current usage (iter is really only
321 // used long after queries are done running), but if we hit this in practice it's
322 // probably fine to just break early.
323 None => unreachable!(),
324 Some(((), key)) => {
325 let key = K::new(key as usize);
326 // unwrap() is OK: present entries are always written only after we put the real
327 // entry.
328 let value = self.lookup(&key).unwrap();
329 f(&key, &value.0, value.1);
330 }
331 }
332 }
333 }
334}
335
336#[cfg(test)]
337mod tests;