rustc_data_structures/vec_cache.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
//! VecCache maintains a mapping from K -> (V, I) pairing. K and I must be roughly u32-sized, and V
//! must be Copy.
//!
//! VecCache supports efficient concurrent put/get across the key space, with write-once semantics
//! (i.e., a given key can only be put once). Subsequent puts will panic.
//!
//! This is currently used for query caching.
use std::fmt::Debug;
use std::marker::PhantomData;
use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering};
use rustc_index::Idx;
struct Slot<V> {
// We never construct &Slot<V> so it's fine for this to not be in an UnsafeCell.
value: V,
// This is both an index and a once-lock.
//
// 0: not yet initialized.
// 1: lock held, initializing.
// 2..u32::MAX - 2: initialized.
index_and_lock: AtomicU32,
}
/// This uniquely identifies a single `Slot<V>` entry in the buckets map, and provides accessors for
/// either getting the value or putting a value.
#[derive(Copy, Clone, Debug)]
struct SlotIndex {
// the index of the bucket in VecCache (0 to 20)
bucket_idx: usize,
// number of entries in that bucket
entries: usize,
// the index of the slot within the bucket
index_in_bucket: usize,
}
// This makes sure the counts are consistent with what we allocate, precomputing each bucket a
// compile-time. Visiting all powers of two is enough to hit all the buckets.
//
// We confirm counts are accurate in the slot_index_exhaustive test.
const ENTRIES_BY_BUCKET: [usize; 21] = {
let mut entries = [0; 21];
let mut key = 0;
loop {
let si = SlotIndex::from_index(key);
entries[si.bucket_idx] = si.entries;
if key == 0 {
key = 1;
} else if key == (1 << 31) {
break;
} else {
key <<= 1;
}
}
entries
};
impl SlotIndex {
// This unpacks a flat u32 index into identifying which bucket it belongs to and the offset
// within that bucket. As noted in the VecCache docs, buckets double in size with each index.
// Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce
// the size of the VecCache struct and avoid uselessly small allocations, we instead have the
// first bucket have 2**12 entries. To simplify the math, the second bucket also 2**12 entries,
// and buckets double from there.
//
// We assert that [0, 2**32 - 1] uniquely map through this function to individual, consecutive
// slots (see `slot_index_exhaustive` in tests).
#[inline]
const fn from_index(idx: u32) -> Self {
let mut bucket = match idx.checked_ilog2() {
Some(x) => x as usize,
None => 0,
};
let entries;
let running_sum;
if bucket <= 11 {
entries = 1 << 12;
running_sum = 0;
bucket = 0;
} else {
entries = 1 << bucket;
running_sum = entries;
bucket = bucket - 11;
}
SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }
}
// SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and
// `self` comes from SlotIndex::from_index
#[inline]
unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot<V>>; 21]) -> Option<(V, u32)> {
// SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
// in-bounds of buckets. See `from_index` for computation.
let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
let ptr = bucket.load(Ordering::Acquire);
// Bucket is not yet initialized: then we obviously won't find this entry in that bucket.
if ptr.is_null() {
return None;
}
assert!(self.index_in_bucket < self.entries);
// SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
// must be inbounds.
let slot = unsafe { ptr.add(self.index_in_bucket) };
// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
// AtomicU32 access.
let index_and_lock = unsafe { &(*slot).index_and_lock };
let current = index_and_lock.load(Ordering::Acquire);
let index = match current {
0 => return None,
// Treat "initializing" as actually just not initialized at all.
// The only reason this is a separate state is that `complete` calls could race and
// we can't allow that, but from load perspective there's no difference.
1 => return None,
_ => current - 2,
};
// SAFETY:
// * slot is a valid pointer (buckets are always valid for the index we get).
// * value is initialized since we saw a >= 2 index above.
// * `V: Copy`, so safe to read.
let value = unsafe { (*slot).value };
Some((value, index))
}
fn bucket_ptr<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
let ptr = bucket.load(Ordering::Acquire);
if ptr.is_null() { self.initialize_bucket(bucket) } else { ptr }
}
#[cold]
fn initialize_bucket<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
// If we are initializing the bucket, then acquire a global lock.
//
// This path is quite cold, so it's cheap to use a global lock. This ensures that we never
// have multiple allocations for the same bucket.
let _allocator_guard = LOCK.lock().unwrap_or_else(|e| e.into_inner());
let ptr = bucket.load(Ordering::Acquire);
// OK, now under the allocator lock, if we're still null then it's definitely us that will
// initialize this bucket.
if ptr.is_null() {
let bucket_layout =
std::alloc::Layout::array::<Slot<V>>(self.entries as usize).unwrap();
// This is more of a sanity check -- this code is very cold, so it's safe to pay a
// little extra cost here.
assert!(bucket_layout.size() > 0);
// SAFETY: Just checked that size is non-zero.
let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot<V>>() };
if allocated.is_null() {
std::alloc::handle_alloc_error(bucket_layout);
}
bucket.store(allocated, Ordering::Release);
allocated
} else {
// Otherwise some other thread initialized this bucket after we took the lock. In that
// case, just return early.
ptr
}
}
/// Returns true if this successfully put into the map.
#[inline]
fn put<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) -> bool {
// SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
// in-bounds of buckets.
let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
let ptr = self.bucket_ptr(bucket);
assert!(self.index_in_bucket < self.entries);
// SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
// must be inbounds.
let slot = unsafe { ptr.add(self.index_in_bucket) };
// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
// AtomicU32 access.
let index_and_lock = unsafe { &(*slot).index_and_lock };
match index_and_lock.compare_exchange(0, 1, Ordering::AcqRel, Ordering::Acquire) {
Ok(_) => {
// We have acquired the initialization lock. It is our job to write `value` and
// then set the lock to the real index.
unsafe {
(&raw mut (*slot).value).write(value);
}
index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
true
}
// Treat "initializing" as the caller's fault. Callers are responsible for ensuring that
// there are no races on initialization. In the compiler's current usage for query
// caches, that's the "active query map" which ensures each query actually runs once
// (even if concurrently started).
Err(1) => panic!("caller raced calls to put()"),
// This slot was already populated. Also ignore, currently this is the same as
// "initializing".
Err(_) => false,
}
}
}
pub struct VecCache<K: Idx, V, I> {
// Entries per bucket:
// Bucket 0: 4096 2^12
// Bucket 1: 4096 2^12
// Bucket 2: 8192
// Bucket 3: 16384
// ...
// Bucket 19: 1073741824
// Bucket 20: 2147483648
// The total number of entries if all buckets are initialized is u32::MAX-1.
buckets: [AtomicPtr<Slot<V>>; 21],
// In the compiler's current usage these are only *read* during incremental and self-profiling.
// They are an optimization over iterating the full buckets array.
present: [AtomicPtr<Slot<()>>; 21],
len: AtomicUsize,
key: PhantomData<(K, I)>,
}
impl<K: Idx, V, I> Default for VecCache<K, V, I> {
fn default() -> Self {
VecCache {
buckets: Default::default(),
key: PhantomData,
len: Default::default(),
present: Default::default(),
}
}
}
// SAFETY: No access to `V` is made.
unsafe impl<K: Idx, #[may_dangle] V, I> Drop for VecCache<K, V, I> {
fn drop(&mut self) {
// We have unique ownership, so no locks etc. are needed. Since `K` and `V` are both `Copy`,
// we are also guaranteed to just need to deallocate any large arrays (not iterate over
// contents).
//
// Confirm no need to deallocate invidual entries. Note that `V: Copy` is asserted on
// insert/lookup but not necessarily construction, primarily to avoid annoyingly propagating
// the bounds into struct definitions everywhere.
assert!(!std::mem::needs_drop::<K>());
assert!(!std::mem::needs_drop::<V>());
for (idx, bucket) in self.buckets.iter().enumerate() {
let bucket = bucket.load(Ordering::Acquire);
if !bucket.is_null() {
let layout = std::alloc::Layout::array::<Slot<V>>(ENTRIES_BY_BUCKET[idx]).unwrap();
unsafe {
std::alloc::dealloc(bucket.cast(), layout);
}
}
}
for (idx, bucket) in self.present.iter().enumerate() {
let bucket = bucket.load(Ordering::Acquire);
if !bucket.is_null() {
let layout = std::alloc::Layout::array::<Slot<()>>(ENTRIES_BY_BUCKET[idx]).unwrap();
unsafe {
std::alloc::dealloc(bucket.cast(), layout);
}
}
}
}
}
impl<K, V, I> VecCache<K, V, I>
where
K: Eq + Idx + Copy + Debug,
V: Copy,
I: Idx + Copy,
{
#[inline(always)]
pub fn lookup(&self, key: &K) -> Option<(V, I)> {
let key = u32::try_from(key.index()).unwrap();
let slot_idx = SlotIndex::from_index(key);
match unsafe { slot_idx.get(&self.buckets) } {
Some((value, idx)) => Some((value, I::new(idx as usize))),
None => None,
}
}
#[inline]
pub fn complete(&self, key: K, value: V, index: I) {
let key = u32::try_from(key.index()).unwrap();
let slot_idx = SlotIndex::from_index(key);
if slot_idx.put(&self.buckets, value, index.index() as u32) {
let present_idx = self.len.fetch_add(1, Ordering::Relaxed);
let slot = SlotIndex::from_index(present_idx as u32);
// We should always be uniquely putting due to `len` fetch_add returning unique values.
assert!(slot.put(&self.present, (), key));
}
}
pub fn iter(&self, f: &mut dyn FnMut(&K, &V, I)) {
for idx in 0..self.len.load(Ordering::Acquire) {
let key = SlotIndex::from_index(idx as u32);
match unsafe { key.get(&self.present) } {
// This shouldn't happen in our current usage (iter is really only
// used long after queries are done running), but if we hit this in practice it's
// probably fine to just break early.
None => unreachable!(),
Some(((), key)) => {
let key = K::new(key as usize);
// unwrap() is OK: present entries are always written only after we put the real
// entry.
let value = self.lookup(&key).unwrap();
f(&key, &value.0, value.1);
}
}
}
}
}
#[cfg(test)]
mod tests;