rustc_data_structures/
vec_cache.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
//! VecCache maintains a mapping from K -> (V, I) pairing. K and I must be roughly u32-sized, and V
//! must be Copy.
//!
//! VecCache supports efficient concurrent put/get across the key space, with write-once semantics
//! (i.e., a given key can only be put once). Subsequent puts will panic.
//!
//! This is currently used for query caching.

use std::fmt::Debug;
use std::marker::PhantomData;
use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering};

use rustc_index::Idx;

struct Slot<V> {
    // We never construct &Slot<V> so it's fine for this to not be in an UnsafeCell.
    value: V,
    // This is both an index and a once-lock.
    //
    // 0: not yet initialized.
    // 1: lock held, initializing.
    // 2..u32::MAX - 2: initialized.
    index_and_lock: AtomicU32,
}

/// This uniquely identifies a single `Slot<V>` entry in the buckets map, and provides accessors for
/// either getting the value or putting a value.
#[derive(Copy, Clone, Debug)]
struct SlotIndex {
    // the index of the bucket in VecCache (0 to 20)
    bucket_idx: usize,
    // number of entries in that bucket
    entries: usize,
    // the index of the slot within the bucket
    index_in_bucket: usize,
}

// This makes sure the counts are consistent with what we allocate, precomputing each bucket a
// compile-time. Visiting all powers of two is enough to hit all the buckets.
//
// We confirm counts are accurate in the slot_index_exhaustive test.
const ENTRIES_BY_BUCKET: [usize; 21] = {
    let mut entries = [0; 21];
    let mut key = 0;
    loop {
        let si = SlotIndex::from_index(key);
        entries[si.bucket_idx] = si.entries;
        if key == 0 {
            key = 1;
        } else if key == (1 << 31) {
            break;
        } else {
            key <<= 1;
        }
    }
    entries
};

impl SlotIndex {
    // This unpacks a flat u32 index into identifying which bucket it belongs to and the offset
    // within that bucket. As noted in the VecCache docs, buckets double in size with each index.
    // Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce
    // the size of the VecCache struct and avoid uselessly small allocations, we instead have the
    // first bucket have 2**12 entries. To simplify the math, the second bucket also 2**12 entries,
    // and buckets double from there.
    //
    // We assert that [0, 2**32 - 1] uniquely map through this function to individual, consecutive
    // slots (see `slot_index_exhaustive` in tests).
    #[inline]
    const fn from_index(idx: u32) -> Self {
        let mut bucket = match idx.checked_ilog2() {
            Some(x) => x as usize,
            None => 0,
        };
        let entries;
        let running_sum;
        if bucket <= 11 {
            entries = 1 << 12;
            running_sum = 0;
            bucket = 0;
        } else {
            entries = 1 << bucket;
            running_sum = entries;
            bucket = bucket - 11;
        }
        SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }
    }

    // SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and
    // `self` comes from SlotIndex::from_index
    #[inline]
    unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot<V>>; 21]) -> Option<(V, u32)> {
        // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
        // in-bounds of buckets. See `from_index` for computation.
        let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
        let ptr = bucket.load(Ordering::Acquire);
        // Bucket is not yet initialized: then we obviously won't find this entry in that bucket.
        if ptr.is_null() {
            return None;
        }
        assert!(self.index_in_bucket < self.entries);
        // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
        // must be inbounds.
        let slot = unsafe { ptr.add(self.index_in_bucket) };

        // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
        // AtomicU32 access.
        let index_and_lock = unsafe { &(*slot).index_and_lock };
        let current = index_and_lock.load(Ordering::Acquire);
        let index = match current {
            0 => return None,
            // Treat "initializing" as actually just not initialized at all.
            // The only reason this is a separate state is that `complete` calls could race and
            // we can't allow that, but from load perspective there's no difference.
            1 => return None,
            _ => current - 2,
        };

        // SAFETY:
        // * slot is a valid pointer (buckets are always valid for the index we get).
        // * value is initialized since we saw a >= 2 index above.
        // * `V: Copy`, so safe to read.
        let value = unsafe { (*slot).value };
        Some((value, index))
    }

    fn bucket_ptr<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
        let ptr = bucket.load(Ordering::Acquire);
        if ptr.is_null() { self.initialize_bucket(bucket) } else { ptr }
    }

    #[cold]
    fn initialize_bucket<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());

        // If we are initializing the bucket, then acquire a global lock.
        //
        // This path is quite cold, so it's cheap to use a global lock. This ensures that we never
        // have multiple allocations for the same bucket.
        let _allocator_guard = LOCK.lock().unwrap_or_else(|e| e.into_inner());

        let ptr = bucket.load(Ordering::Acquire);

        // OK, now under the allocator lock, if we're still null then it's definitely us that will
        // initialize this bucket.
        if ptr.is_null() {
            let bucket_layout =
                std::alloc::Layout::array::<Slot<V>>(self.entries as usize).unwrap();
            // This is more of a sanity check -- this code is very cold, so it's safe to pay a
            // little extra cost here.
            assert!(bucket_layout.size() > 0);
            // SAFETY: Just checked that size is non-zero.
            let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot<V>>() };
            if allocated.is_null() {
                std::alloc::handle_alloc_error(bucket_layout);
            }
            bucket.store(allocated, Ordering::Release);
            allocated
        } else {
            // Otherwise some other thread initialized this bucket after we took the lock. In that
            // case, just return early.
            ptr
        }
    }

    /// Returns true if this successfully put into the map.
    #[inline]
    fn put<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) -> bool {
        // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
        // in-bounds of buckets.
        let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
        let ptr = self.bucket_ptr(bucket);

        assert!(self.index_in_bucket < self.entries);
        // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
        // must be inbounds.
        let slot = unsafe { ptr.add(self.index_in_bucket) };

        // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
        // AtomicU32 access.
        let index_and_lock = unsafe { &(*slot).index_and_lock };
        match index_and_lock.compare_exchange(0, 1, Ordering::AcqRel, Ordering::Acquire) {
            Ok(_) => {
                // We have acquired the initialization lock. It is our job to write `value` and
                // then set the lock to the real index.

                unsafe {
                    (&raw mut (*slot).value).write(value);
                }

                index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);

                true
            }

            // Treat "initializing" as the caller's fault. Callers are responsible for ensuring that
            // there are no races on initialization. In the compiler's current usage for query
            // caches, that's the "active query map" which ensures each query actually runs once
            // (even if concurrently started).
            Err(1) => panic!("caller raced calls to put()"),

            // This slot was already populated. Also ignore, currently this is the same as
            // "initializing".
            Err(_) => false,
        }
    }
}

pub struct VecCache<K: Idx, V, I> {
    // Entries per bucket:
    // Bucket  0:       4096 2^12
    // Bucket  1:       4096 2^12
    // Bucket  2:       8192
    // Bucket  3:      16384
    // ...
    // Bucket 19: 1073741824
    // Bucket 20: 2147483648
    // The total number of entries if all buckets are initialized is u32::MAX-1.
    buckets: [AtomicPtr<Slot<V>>; 21],

    // In the compiler's current usage these are only *read* during incremental and self-profiling.
    // They are an optimization over iterating the full buckets array.
    present: [AtomicPtr<Slot<()>>; 21],
    len: AtomicUsize,

    key: PhantomData<(K, I)>,
}

impl<K: Idx, V, I> Default for VecCache<K, V, I> {
    fn default() -> Self {
        VecCache {
            buckets: Default::default(),
            key: PhantomData,
            len: Default::default(),
            present: Default::default(),
        }
    }
}

// SAFETY: No access to `V` is made.
unsafe impl<K: Idx, #[may_dangle] V, I> Drop for VecCache<K, V, I> {
    fn drop(&mut self) {
        // We have unique ownership, so no locks etc. are needed. Since `K` and `V` are both `Copy`,
        // we are also guaranteed to just need to deallocate any large arrays (not iterate over
        // contents).
        //
        // Confirm no need to deallocate invidual entries. Note that `V: Copy` is asserted on
        // insert/lookup but not necessarily construction, primarily to avoid annoyingly propagating
        // the bounds into struct definitions everywhere.
        assert!(!std::mem::needs_drop::<K>());
        assert!(!std::mem::needs_drop::<V>());

        for (idx, bucket) in self.buckets.iter().enumerate() {
            let bucket = bucket.load(Ordering::Acquire);
            if !bucket.is_null() {
                let layout = std::alloc::Layout::array::<Slot<V>>(ENTRIES_BY_BUCKET[idx]).unwrap();
                unsafe {
                    std::alloc::dealloc(bucket.cast(), layout);
                }
            }
        }

        for (idx, bucket) in self.present.iter().enumerate() {
            let bucket = bucket.load(Ordering::Acquire);
            if !bucket.is_null() {
                let layout = std::alloc::Layout::array::<Slot<()>>(ENTRIES_BY_BUCKET[idx]).unwrap();
                unsafe {
                    std::alloc::dealloc(bucket.cast(), layout);
                }
            }
        }
    }
}

impl<K, V, I> VecCache<K, V, I>
where
    K: Eq + Idx + Copy + Debug,
    V: Copy,
    I: Idx + Copy,
{
    #[inline(always)]
    pub fn lookup(&self, key: &K) -> Option<(V, I)> {
        let key = u32::try_from(key.index()).unwrap();
        let slot_idx = SlotIndex::from_index(key);
        match unsafe { slot_idx.get(&self.buckets) } {
            Some((value, idx)) => Some((value, I::new(idx as usize))),
            None => None,
        }
    }

    #[inline]
    pub fn complete(&self, key: K, value: V, index: I) {
        let key = u32::try_from(key.index()).unwrap();
        let slot_idx = SlotIndex::from_index(key);
        if slot_idx.put(&self.buckets, value, index.index() as u32) {
            let present_idx = self.len.fetch_add(1, Ordering::Relaxed);
            let slot = SlotIndex::from_index(present_idx as u32);
            // We should always be uniquely putting due to `len` fetch_add returning unique values.
            assert!(slot.put(&self.present, (), key));
        }
    }

    pub fn iter(&self, f: &mut dyn FnMut(&K, &V, I)) {
        for idx in 0..self.len.load(Ordering::Acquire) {
            let key = SlotIndex::from_index(idx as u32);
            match unsafe { key.get(&self.present) } {
                // This shouldn't happen in our current usage (iter is really only
                // used long after queries are done running), but if we hit this in practice it's
                // probably fine to just break early.
                None => unreachable!(),
                Some(((), key)) => {
                    let key = K::new(key as usize);
                    // unwrap() is OK: present entries are always written only after we put the real
                    // entry.
                    let value = self.lookup(&key).unwrap();
                    f(&key, &value.0, value.1);
                }
            }
        }
    }
}

#[cfg(test)]
mod tests;