rustc_data_structures/
sorted_map.rs

1use std::borrow::Borrow;
2use std::cmp::Ordering;
3use std::fmt::Debug;
4use std::iter::TrustedLen;
5use std::mem;
6use std::ops::{Bound, Index, IndexMut, RangeBounds};
7
8use rustc_macros::{Decodable_NoContext, Encodable_NoContext};
9
10use crate::stable_hasher::{HashStable, StableHasher, StableOrd};
11
12mod index_map;
13
14pub use index_map::SortedIndexMultiMap;
15
16/// `SortedMap` is a data structure with similar characteristics as BTreeMap but
17/// slightly different trade-offs: lookup is *O*(log(*n*)), insertion and removal
18/// are *O*(*n*) but elements can be iterated in order cheaply.
19///
20/// `SortedMap` can be faster than a `BTreeMap` for small sizes (<50) since it
21/// stores data in a more compact way. It also supports accessing contiguous
22/// ranges of elements as a slice, and slices of already sorted elements can be
23/// inserted efficiently.
24#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Encodable_NoContext, Decodable_NoContext)]
25pub struct SortedMap<K, V> {
26    data: Vec<(K, V)>,
27}
28
29impl<K, V> Default for SortedMap<K, V> {
30    #[inline]
31    fn default() -> SortedMap<K, V> {
32        SortedMap { data: Vec::new() }
33    }
34}
35
36impl<K, V> SortedMap<K, V> {
37    #[inline]
38    pub const fn new() -> SortedMap<K, V> {
39        SortedMap { data: Vec::new() }
40    }
41}
42
43impl<K: Ord, V> SortedMap<K, V> {
44    /// Construct a `SortedMap` from a presorted set of elements. This is faster
45    /// than creating an empty map and then inserting the elements individually.
46    ///
47    /// It is up to the caller to make sure that the elements are sorted by key
48    /// and that there are no duplicates.
49    #[inline]
50    pub fn from_presorted_elements(elements: Vec<(K, V)>) -> SortedMap<K, V> {
51        debug_assert!(elements.array_windows().all(|[fst, snd]| fst.0 < snd.0));
52
53        SortedMap { data: elements }
54    }
55
56    #[inline]
57    pub fn insert(&mut self, key: K, value: V) -> Option<V> {
58        match self.lookup_index_for(&key) {
59            Ok(index) => {
60                let slot = unsafe { self.data.get_unchecked_mut(index) };
61                Some(mem::replace(&mut slot.1, value))
62            }
63            Err(index) => {
64                self.data.insert(index, (key, value));
65                None
66            }
67        }
68    }
69
70    #[inline]
71    pub fn remove(&mut self, key: &K) -> Option<V> {
72        match self.lookup_index_for(key) {
73            Ok(index) => Some(self.data.remove(index).1),
74            Err(_) => None,
75        }
76    }
77
78    #[inline]
79    pub fn get<Q>(&self, key: &Q) -> Option<&V>
80    where
81        K: Borrow<Q>,
82        Q: Ord + ?Sized,
83    {
84        match self.lookup_index_for(key) {
85            Ok(index) => unsafe { Some(&self.data.get_unchecked(index).1) },
86            Err(_) => None,
87        }
88    }
89
90    #[inline]
91    pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
92    where
93        K: Borrow<Q>,
94        Q: Ord + ?Sized,
95    {
96        match self.lookup_index_for(key) {
97            Ok(index) => unsafe { Some(&mut self.data.get_unchecked_mut(index).1) },
98            Err(_) => None,
99        }
100    }
101
102    /// Gets a mutable reference to the value in the entry, or insert a new one.
103    #[inline]
104    pub fn get_mut_or_insert_default(&mut self, key: K) -> &mut V
105    where
106        K: Eq,
107        V: Default,
108    {
109        let index = match self.lookup_index_for(&key) {
110            Ok(index) => index,
111            Err(index) => {
112                self.data.insert(index, (key, V::default()));
113                index
114            }
115        };
116        unsafe { &mut self.data.get_unchecked_mut(index).1 }
117    }
118
119    #[inline]
120    pub fn clear(&mut self) {
121        self.data.clear();
122    }
123
124    /// Iterate over elements, sorted by key
125    #[inline]
126    pub fn iter(&self) -> std::slice::Iter<'_, (K, V)> {
127        self.data.iter()
128    }
129
130    /// Iterate over the keys, sorted
131    #[inline]
132    pub fn keys(&self) -> impl ExactSizeIterator<Item = &K> + DoubleEndedIterator {
133        self.data.iter().map(|(k, _)| k)
134    }
135
136    /// Iterate over values, sorted by key
137    #[inline]
138    pub fn values(&self) -> impl ExactSizeIterator<Item = &V> + DoubleEndedIterator {
139        self.data.iter().map(|(_, v)| v)
140    }
141
142    #[inline]
143    pub fn len(&self) -> usize {
144        self.data.len()
145    }
146
147    #[inline]
148    pub fn is_empty(&self) -> bool {
149        self.len() == 0
150    }
151
152    #[inline]
153    pub fn range<R>(&self, range: R) -> &[(K, V)]
154    where
155        R: RangeBounds<K>,
156    {
157        let (start, end) = self.range_slice_indices(range);
158        &self.data[start..end]
159    }
160
161    /// `sm.range_is_empty(r)` == `sm.range(r).is_empty()`, but is faster.
162    #[inline]
163    pub fn range_is_empty<R>(&self, range: R) -> bool
164    where
165        R: RangeBounds<K>,
166    {
167        // `range` must (via `range_slice_indices`) search for the start and
168        // end separately. But here we can do a single binary search for the
169        // entire range. If a single `x` matching `range` is found then the
170        // range is *not* empty.
171        self.data
172            .binary_search_by(|(x, _)| {
173                // Is `x` below `range`?
174                match range.start_bound() {
175                    Bound::Included(start) if x < start => return Ordering::Less,
176                    Bound::Excluded(start) if x <= start => return Ordering::Less,
177                    _ => {}
178                };
179
180                // Is `x` above `range`?
181                match range.end_bound() {
182                    Bound::Included(end) if x > end => return Ordering::Greater,
183                    Bound::Excluded(end) if x >= end => return Ordering::Greater,
184                    _ => {}
185                };
186
187                // `x` must be within `range`.
188                Ordering::Equal
189            })
190            .is_err()
191    }
192
193    #[inline]
194    pub fn remove_range<R>(&mut self, range: R)
195    where
196        R: RangeBounds<K>,
197    {
198        let (start, end) = self.range_slice_indices(range);
199        self.data.splice(start..end, std::iter::empty());
200    }
201
202    /// Mutate all keys with the given function `f`. This mutation must not
203    /// change the sort-order of keys.
204    #[inline]
205    pub fn offset_keys<F>(&mut self, f: F)
206    where
207        F: Fn(&mut K),
208    {
209        self.data.iter_mut().map(|(k, _)| k).for_each(f);
210    }
211
212    /// Inserts a presorted range of elements into the map. If the range can be
213    /// inserted as a whole in between to existing elements of the map, this
214    /// will be faster than inserting the elements individually.
215    ///
216    /// It is up to the caller to make sure that the elements are sorted by key
217    /// and that there are no duplicates.
218    #[inline]
219    pub fn insert_presorted(
220        &mut self,
221        // We require `TrustedLen` to ensure that the `splice` below is actually efficient.
222        mut elements: impl Iterator<Item = (K, V)> + DoubleEndedIterator + TrustedLen,
223    ) {
224        let Some(first) = elements.next() else {
225            return;
226        };
227
228        let start_index = self.lookup_index_for(&first.0);
229
230        let elements = match start_index {
231            Ok(index) => {
232                self.data[index] = first; // overwrite first element
233                elements.chain(None) // insert the rest below
234            }
235            Err(index) => {
236                let last = elements.next_back();
237                if index == self.data.len()
238                    || last.as_ref().is_none_or(|l| l.0 < self.data[index].0)
239                {
240                    // We can copy the whole range without having to mix with
241                    // existing elements.
242                    self.data
243                        .splice(index..index, std::iter::once(first).chain(elements).chain(last));
244                    return;
245                }
246
247                self.data.insert(index, first);
248                elements.chain(last) // insert the rest below
249            }
250        };
251
252        // Insert the rest. This is super inefficicent since each insertion copies the entire tail.
253        for (k, v) in elements {
254            self.insert(k, v);
255        }
256    }
257
258    /// Looks up the key in `self.data` via `slice::binary_search()`.
259    #[inline(always)]
260    fn lookup_index_for<Q>(&self, key: &Q) -> Result<usize, usize>
261    where
262        K: Borrow<Q>,
263        Q: Ord + ?Sized,
264    {
265        self.data.binary_search_by(|(x, _)| x.borrow().cmp(key))
266    }
267
268    #[inline]
269    fn range_slice_indices<R>(&self, range: R) -> (usize, usize)
270    where
271        R: RangeBounds<K>,
272    {
273        let start = match range.start_bound() {
274            Bound::Included(k) => match self.lookup_index_for(k) {
275                Ok(index) | Err(index) => index,
276            },
277            Bound::Excluded(k) => match self.lookup_index_for(k) {
278                Ok(index) => index + 1,
279                Err(index) => index,
280            },
281            Bound::Unbounded => 0,
282        };
283
284        let end = match range.end_bound() {
285            Bound::Included(k) => match self.lookup_index_for(k) {
286                Ok(index) => index + 1,
287                Err(index) => index,
288            },
289            Bound::Excluded(k) => match self.lookup_index_for(k) {
290                Ok(index) | Err(index) => index,
291            },
292            Bound::Unbounded => self.data.len(),
293        };
294
295        (start, end)
296    }
297
298    #[inline]
299    pub fn contains_key<Q>(&self, key: &Q) -> bool
300    where
301        K: Borrow<Q>,
302        Q: Ord + ?Sized,
303    {
304        self.get(key).is_some()
305    }
306}
307
308impl<K: Ord, V> IntoIterator for SortedMap<K, V> {
309    type Item = (K, V);
310    type IntoIter = std::vec::IntoIter<(K, V)>;
311
312    fn into_iter(self) -> Self::IntoIter {
313        self.data.into_iter()
314    }
315}
316
317impl<'a, K, Q, V> Index<&'a Q> for SortedMap<K, V>
318where
319    K: Ord + Borrow<Q>,
320    Q: Ord + ?Sized,
321{
322    type Output = V;
323
324    fn index(&self, key: &Q) -> &Self::Output {
325        self.get(key).expect("no entry found for key")
326    }
327}
328
329impl<'a, K, Q, V> IndexMut<&'a Q> for SortedMap<K, V>
330where
331    K: Ord + Borrow<Q>,
332    Q: Ord + ?Sized,
333{
334    fn index_mut(&mut self, key: &Q) -> &mut Self::Output {
335        self.get_mut(key).expect("no entry found for key")
336    }
337}
338
339impl<K: Ord, V> FromIterator<(K, V)> for SortedMap<K, V> {
340    fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> Self {
341        let mut data: Vec<(K, V)> = iter.into_iter().collect();
342
343        data.sort_unstable_by(|(k1, _), (k2, _)| k1.cmp(k2));
344        data.dedup_by(|(k1, _), (k2, _)| k1 == k2);
345
346        SortedMap { data }
347    }
348}
349
350impl<K: HashStable<CTX> + StableOrd, V: HashStable<CTX>, CTX> HashStable<CTX> for SortedMap<K, V> {
351    #[inline]
352    fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
353        self.data.hash_stable(ctx, hasher);
354    }
355}
356
357impl<K: Debug, V: Debug> Debug for SortedMap<K, V> {
358    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
359        f.debug_map().entries(self.data.iter().map(|(a, b)| (a, b))).finish()
360    }
361}
362
363#[cfg(test)]
364mod tests;