bootstrap/utils/
cache.rs

1//! This module helps you efficiently store and retrieve values using interning.
2//!
3//! Interning is a neat trick that keeps only one copy of identical values, saving memory
4//! and making comparisons super fast. Here, we provide the `Interned<T>` struct and the `Internable` trait
5//! to make interning easy for different data types.
6//!
7//! The `Interner` struct handles caching for common types like `String`, `PathBuf`, and `Vec<String>`,
8//! while the `Cache` struct acts as a write-once storage for linking computation steps with their results.
9//!
10//! # Thread Safety
11//!
12//! We use `Mutex` to make sure interning and retrieval are thread-safe. But keep in mind—once a value is
13//! interned, it sticks around for the entire lifetime of the program.
14
15use std::any::{Any, TypeId};
16use std::borrow::Borrow;
17use std::cell::RefCell;
18use std::cmp::Ordering;
19use std::collections::HashMap;
20use std::hash::{Hash, Hasher};
21use std::marker::PhantomData;
22use std::ops::Deref;
23use std::path::PathBuf;
24use std::sync::{LazyLock, Mutex};
25use std::{fmt, mem};
26
27use crate::core::builder::Step;
28
29/// Represents an interned value of type `T`, allowing for efficient comparisons and retrieval.
30///
31/// This struct stores a unique index referencing the interned value within an internal cache.
32pub struct Interned<T>(usize, PhantomData<*const T>);
33
34impl<T: Internable + Default> Default for Interned<T> {
35    fn default() -> Self {
36        T::default().intern()
37    }
38}
39
40impl<T> Copy for Interned<T> {}
41impl<T> Clone for Interned<T> {
42    fn clone(&self) -> Interned<T> {
43        *self
44    }
45}
46
47impl<T> PartialEq for Interned<T> {
48    fn eq(&self, other: &Self) -> bool {
49        self.0 == other.0
50    }
51}
52impl<T> Eq for Interned<T> {}
53
54impl PartialEq<str> for Interned<String> {
55    fn eq(&self, other: &str) -> bool {
56        *self == other
57    }
58}
59impl PartialEq<&str> for Interned<String> {
60    fn eq(&self, other: &&str) -> bool {
61        **self == **other
62    }
63}
64impl<T> PartialEq<&Interned<T>> for Interned<T> {
65    fn eq(&self, other: &&Self) -> bool {
66        self.0 == other.0
67    }
68}
69impl<T> PartialEq<Interned<T>> for &Interned<T> {
70    fn eq(&self, other: &Interned<T>) -> bool {
71        self.0 == other.0
72    }
73}
74
75unsafe impl<T> Send for Interned<T> {}
76unsafe impl<T> Sync for Interned<T> {}
77
78impl fmt::Display for Interned<String> {
79    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80        let s: &str = self;
81        f.write_str(s)
82    }
83}
84
85impl<T, U: ?Sized + fmt::Debug> fmt::Debug for Interned<T>
86where
87    Self: Deref<Target = U>,
88{
89    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90        let s: &U = self;
91        f.write_fmt(format_args!("{s:?}"))
92    }
93}
94
95impl<T: Internable + Hash> Hash for Interned<T> {
96    fn hash<H: Hasher>(&self, state: &mut H) {
97        let l = T::intern_cache().lock().unwrap();
98        l.get(*self).hash(state)
99    }
100}
101
102impl<T: Internable + Deref> Deref for Interned<T> {
103    type Target = T::Target;
104    fn deref(&self) -> &Self::Target {
105        let l = T::intern_cache().lock().unwrap();
106        unsafe { mem::transmute::<&Self::Target, &Self::Target>(l.get(*self)) }
107    }
108}
109
110impl<T: Internable + AsRef<U>, U: ?Sized> AsRef<U> for Interned<T> {
111    fn as_ref(&self) -> &U {
112        let l = T::intern_cache().lock().unwrap();
113        unsafe { mem::transmute::<&U, &U>(l.get(*self).as_ref()) }
114    }
115}
116
117impl<T: Internable + PartialOrd> PartialOrd for Interned<T> {
118    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
119        let l = T::intern_cache().lock().unwrap();
120        l.get(*self).partial_cmp(l.get(*other))
121    }
122}
123
124impl<T: Internable + Ord> Ord for Interned<T> {
125    fn cmp(&self, other: &Self) -> Ordering {
126        let l = T::intern_cache().lock().unwrap();
127        l.get(*self).cmp(l.get(*other))
128    }
129}
130
131/// A structure for managing the interning of values of type `T`.
132///
133/// `TyIntern<T>` maintains a mapping between values and their interned representations,
134/// ensuring that duplicate values are not stored multiple times.
135struct TyIntern<T: Clone + Eq> {
136    items: Vec<T>,
137    set: HashMap<T, Interned<T>>,
138}
139
140impl<T: Hash + Clone + Eq> Default for TyIntern<T> {
141    fn default() -> Self {
142        TyIntern { items: Vec::new(), set: Default::default() }
143    }
144}
145
146impl<T: Hash + Clone + Eq> TyIntern<T> {
147    /// Interns a borrowed value, ensuring it is stored uniquely.
148    ///
149    /// If the value has been previously interned, the same `Interned<T>` instance is returned.
150    fn intern_borrow<B>(&mut self, item: &B) -> Interned<T>
151    where
152        B: Eq + Hash + ToOwned<Owned = T> + ?Sized,
153        T: Borrow<B>,
154    {
155        if let Some(i) = self.set.get(item) {
156            return *i;
157        }
158        let item = item.to_owned();
159        let interned = Interned(self.items.len(), PhantomData::<*const T>);
160        self.set.insert(item.clone(), interned);
161        self.items.push(item);
162        interned
163    }
164
165    /// Interns an owned value, storing it uniquely.
166    ///
167    /// If the value has been previously interned, the existing `Interned<T>` is returned.
168    fn intern(&mut self, item: T) -> Interned<T> {
169        if let Some(i) = self.set.get(&item) {
170            return *i;
171        }
172        let interned = Interned(self.items.len(), PhantomData::<*const T>);
173        self.set.insert(item.clone(), interned);
174        self.items.push(item);
175        interned
176    }
177
178    /// Retrieves a reference to the interned value associated with the given `Interned<T>` instance.
179    fn get(&self, i: Interned<T>) -> &T {
180        &self.items[i.0]
181    }
182}
183
184/// A global interner for managing interned values of common types.
185///
186/// This structure maintains caches for `String`, `PathBuf`, and `Vec<String>`, ensuring efficient storage
187/// and retrieval of frequently used values.
188#[derive(Default)]
189pub struct Interner {
190    strs: Mutex<TyIntern<String>>,
191    paths: Mutex<TyIntern<PathBuf>>,
192    lists: Mutex<TyIntern<Vec<String>>>,
193}
194
195/// Defines the behavior required for a type to be internable.
196///
197/// Types implementing this trait must provide access to a static cache and define an `intern` method
198/// that ensures values are stored uniquely.
199trait Internable: Clone + Eq + Hash + 'static {
200    fn intern_cache() -> &'static Mutex<TyIntern<Self>>;
201
202    fn intern(self) -> Interned<Self> {
203        Self::intern_cache().lock().unwrap().intern(self)
204    }
205}
206
207impl Internable for String {
208    fn intern_cache() -> &'static Mutex<TyIntern<Self>> {
209        &INTERNER.strs
210    }
211}
212
213impl Internable for PathBuf {
214    fn intern_cache() -> &'static Mutex<TyIntern<Self>> {
215        &INTERNER.paths
216    }
217}
218
219impl Internable for Vec<String> {
220    fn intern_cache() -> &'static Mutex<TyIntern<Self>> {
221        &INTERNER.lists
222    }
223}
224
225impl Interner {
226    /// Interns a string reference, ensuring it is stored uniquely.
227    ///
228    /// If the string has been previously interned, the same `Interned<String>` instance is returned.
229    pub fn intern_str(&self, s: &str) -> Interned<String> {
230        self.strs.lock().unwrap().intern_borrow(s)
231    }
232}
233
234/// A global instance of `Interner` that caches common interned values.
235pub static INTERNER: LazyLock<Interner> = LazyLock::new(Interner::default);
236
237/// This is essentially a `HashMap` which allows storing any type in its input and
238/// any type in its output. It is a write-once cache; values are never evicted,
239/// which means that references to the value can safely be returned from the
240/// `get()` method.
241#[derive(Debug)]
242pub struct Cache(
243    RefCell<
244        HashMap<
245            TypeId,
246            Box<dyn Any>, // actually a HashMap<Step, Interned<Step::Output>>
247        >,
248    >,
249);
250
251impl Cache {
252    /// Creates a new empty cache.
253    pub fn new() -> Cache {
254        Cache(RefCell::new(HashMap::new()))
255    }
256
257    /// Stores the result of a computation step in the cache.
258    pub fn put<S: Step>(&self, step: S, value: S::Output) {
259        let mut cache = self.0.borrow_mut();
260        let type_id = TypeId::of::<S>();
261        let stepcache = cache
262            .entry(type_id)
263            .or_insert_with(|| Box::<HashMap<S, S::Output>>::default())
264            .downcast_mut::<HashMap<S, S::Output>>()
265            .expect("invalid type mapped");
266        assert!(!stepcache.contains_key(&step), "processing {step:?} a second time");
267        stepcache.insert(step, value);
268    }
269
270    /// Retrieves a cached result for the given step, if available.
271    pub fn get<S: Step>(&self, step: &S) -> Option<S::Output> {
272        let mut cache = self.0.borrow_mut();
273        let type_id = TypeId::of::<S>();
274        let stepcache = cache
275            .entry(type_id)
276            .or_insert_with(|| Box::<HashMap<S, S::Output>>::default())
277            .downcast_mut::<HashMap<S, S::Output>>()
278            .expect("invalid type mapped");
279        stepcache.get(step).cloned()
280    }
281}
282
283#[cfg(test)]
284impl Cache {
285    pub fn all<S: Ord + Clone + Step>(&mut self) -> Vec<(S, S::Output)> {
286        let cache = self.0.get_mut();
287        let type_id = TypeId::of::<S>();
288        let mut v = cache
289            .remove(&type_id)
290            .map(|b| b.downcast::<HashMap<S, S::Output>>().expect("correct type"))
291            .map(|m| m.into_iter().collect::<Vec<_>>())
292            .unwrap_or_default();
293        v.sort_by_key(|(s, _)| s.clone());
294        v
295    }
296
297    pub fn contains<S: Step>(&self) -> bool {
298        self.0.borrow().contains_key(&TypeId::of::<S>())
299    }
300}
301
302#[cfg(test)]
303mod tests;