miri/range_map.rs
1//! Implements a map from integer indices to data.
2//! Rather than storing data for every index, internally, this maps entire ranges to the data.
3//! To this end, the APIs all work on ranges, not on individual integers. Ranges are split as
4//! necessary (e.g., when [0,5) is first associated with X, and then [1,2) is mutated).
5//! Users must not depend on whether a range is coalesced or not, even though this is observable
6//! via the iteration APIs.
7
8use std::ops;
9
10use rustc_abi::Size;
11
12#[derive(Clone, Debug)]
13struct Elem<T> {
14 /// The range covered by this element; never empty.
15 range: ops::Range<u64>,
16 /// The data stored for this element.
17 data: T,
18}
19#[derive(Clone, Debug)]
20pub struct RangeMap<T> {
21 v: Vec<Elem<T>>,
22}
23
24impl<T> RangeMap<T> {
25 /// Creates a new `RangeMap` for the given size, and with the given initial value used for
26 /// the entire range.
27 #[inline(always)]
28 pub fn new(size: Size, init: T) -> RangeMap<T> {
29 let size = size.bytes();
30 let v = if size > 0 { vec![Elem { range: 0..size, data: init }] } else { Vec::new() };
31 RangeMap { v }
32 }
33
34 /// Finds the index containing the given offset.
35 fn find_offset(&self, offset: u64) -> usize {
36 self.v
37 .binary_search_by(|elem| -> std::cmp::Ordering {
38 if offset < elem.range.start {
39 // We are too far right (offset is further left).
40 // (`Greater` means that `elem` is greater than the desired target.)
41 std::cmp::Ordering::Greater
42 } else if offset >= elem.range.end {
43 // We are too far left (offset is further right).
44 std::cmp::Ordering::Less
45 } else {
46 // This is it!
47 std::cmp::Ordering::Equal
48 }
49 })
50 .unwrap()
51 }
52
53 /// Provides read-only iteration over everything in the given range. This does
54 /// *not* split items if they overlap with the edges. Do not use this to mutate
55 /// through interior mutability.
56 ///
57 /// The iterator also provides the range of the given element.
58 /// How exactly the ranges are split can differ even for otherwise identical
59 /// maps, so user-visible behavior should never depend on the exact range.
60 pub fn iter(&self, offset: Size, len: Size) -> impl Iterator<Item = (ops::Range<u64>, &T)> {
61 let offset = offset.bytes();
62 let len = len.bytes();
63 // Compute a slice starting with the elements we care about.
64 let slice: &[Elem<T>] = if len == 0 {
65 // We just need any empty iterator. We don't even want to
66 // yield the element that surrounds this position.
67 &[]
68 } else {
69 let first_idx = self.find_offset(offset);
70 &self.v[first_idx..]
71 };
72 // The first offset that is not included any more.
73 let end = offset + len;
74 assert!(
75 end <= self.v.last().unwrap().range.end,
76 "iterating beyond the bounds of this RangeMap"
77 );
78 slice
79 .iter()
80 .take_while(move |elem| elem.range.start < end)
81 .map(|elem| (elem.range.clone(), &elem.data))
82 }
83
84 /// Provides mutable iteration over all elements.
85 /// The iterator also provides the range of the given element.
86 /// How exactly the ranges are split can differ even for otherwise identical
87 /// maps, so user-visible behavior should never depend on the exact range.
88 pub fn iter_mut_all(&mut self) -> impl Iterator<Item = (ops::Range<u64>, &mut T)> {
89 self.v.iter_mut().map(|elem| (elem.range.clone(), &mut elem.data))
90 }
91
92 /// Provides iteration over all elements.
93 /// The iterator also provides the range of the given element.
94 /// How exactly the ranges are split can differ even for otherwise identical
95 /// maps, so user-visible behavior should never depend on the exact range.
96 pub fn iter_all(&self) -> impl Iterator<Item = (ops::Range<u64>, &T)> {
97 self.v.iter().map(|elem| (elem.range.clone(), &elem.data))
98 }
99
100 // Splits the element situated at the given `index`, such that the 2nd one starts at offset
101 // `split_offset`. Do nothing if the element already starts there.
102 // Returns whether a split was necessary.
103 fn split_index(&mut self, index: usize, split_offset: u64) -> bool
104 where
105 T: Clone,
106 {
107 let elem = &mut self.v[index];
108 if split_offset == elem.range.start || split_offset == elem.range.end {
109 // Nothing to do.
110 return false;
111 }
112 debug_assert!(
113 elem.range.contains(&split_offset),
114 "the `split_offset` is not in the element to be split"
115 );
116
117 // Now we really have to split. Reduce length of first element.
118 let second_range = split_offset..elem.range.end;
119 elem.range.end = split_offset;
120 // Copy the data, and insert second element.
121 let second = Elem { range: second_range, data: elem.data.clone() };
122 self.v.insert(index + 1, second);
123 true
124 }
125
126 /// Provides mutable iteration over everything in the given range. As a side-effect,
127 /// this will split entries in the map that are only partially hit by the given range,
128 /// to make sure that when they are mutated, the effect is constrained to the given range.
129 /// Moreover, this will opportunistically merge neighbouring equal blocks.
130 ///
131 /// The iterator also provides the range of the given element.
132 /// How exactly the ranges are split (both prior to and resulting from the execution of this
133 /// function) can differ even for otherwise identical maps,
134 /// so user-visible behavior should never depend on the exact range.
135 pub fn iter_mut(
136 &mut self,
137 offset: Size,
138 len: Size,
139 ) -> impl Iterator<Item = (ops::Range<u64>, &mut T)>
140 where
141 T: Clone + PartialEq,
142 {
143 let offset = offset.bytes();
144 let len = len.bytes();
145 // Compute a slice containing exactly the elements we care about
146 let slice: &mut [Elem<T>] = if len == 0 {
147 // We just need any empty iterator. We don't even want to
148 // yield the element that surrounds this position, nor do
149 // any splitting.
150 &mut []
151 } else {
152 // Make sure we got a clear beginning
153 let mut first_idx = self.find_offset(offset);
154 if self.split_index(first_idx, offset) {
155 // The newly created 2nd element is ours
156 first_idx += 1;
157 }
158 // No more mutation.
159 let first_idx = first_idx;
160 // Find our end. Linear scan, but that's ok because the iteration
161 // is doing the same linear scan anyway -- no increase in complexity.
162 // We combine this scan with a scan for duplicates that we can merge, to reduce
163 // the number of elements.
164 // We stop searching after the first "block" of size 1, to avoid spending excessive
165 // amounts of time on the merging.
166 let mut equal_since_idx = first_idx;
167 // Once we see too many non-mergeable blocks, we stop.
168 // The initial value is chosen via... magic. Benchmarking and magic.
169 let mut successful_merge_count = 3usize;
170 // When the loop is done, this is the first excluded element.
171 let mut end_idx = first_idx;
172 loop {
173 // Compute if `end` is the last element we need to look at.
174 let done = self.v[end_idx].range.end >= offset + len;
175 // We definitely need to include `end`, so move the index.
176 end_idx += 1;
177 debug_assert!(
178 done || end_idx < self.v.len(),
179 "iter_mut: end-offset {} is out-of-bounds",
180 offset + len
181 );
182 // see if we want to merge everything in `equal_since..end` (exclusive at the end!)
183 if successful_merge_count > 0 {
184 if done || self.v[end_idx].data != self.v[equal_since_idx].data {
185 // Everything in `equal_since..end` was equal. Make them just one element covering
186 // the entire range.
187 let removed_elems = end_idx - equal_since_idx - 1; // number of elements that we would remove
188 if removed_elems > 0 {
189 // Adjust the range of the first element to cover all of them.
190 let equal_until = self.v[end_idx - 1].range.end; // end of range of last of the equal elements
191 self.v[equal_since_idx].range.end = equal_until;
192 // Delete the rest of them.
193 self.v.splice(equal_since_idx + 1..end_idx, std::iter::empty());
194 // Adjust `end_idx` because we made the list shorter.
195 end_idx -= removed_elems;
196 // Adjust the count for the cutoff.
197 successful_merge_count += removed_elems;
198 } else {
199 // Adjust the count for the cutoff.
200 successful_merge_count -= 1;
201 }
202 // Go on scanning for the next block starting here.
203 equal_since_idx = end_idx;
204 }
205 }
206 // Leave loop if this is the last element.
207 if done {
208 break;
209 }
210 }
211 // Move to last included instead of first excluded index.
212 let end_idx = end_idx - 1;
213 // We need to split the end as well. Even if this performs a
214 // split, we don't have to adjust our index as we only care about
215 // the first part of the split.
216 self.split_index(end_idx, offset + len);
217 // Now we yield the slice. `end` is inclusive.
218 &mut self.v[first_idx..=end_idx]
219 };
220 slice.iter_mut().map(|elem| (elem.range.clone(), &mut elem.data))
221 }
222
223 /// Remove all adjacent duplicates
224 pub fn merge_adjacent_thorough(&mut self)
225 where
226 T: PartialEq,
227 {
228 let clean = Vec::with_capacity(self.v.len());
229 for elem in std::mem::replace(&mut self.v, clean) {
230 if let Some(prev) = self.v.last_mut() {
231 if prev.data == elem.data {
232 assert_eq!(prev.range.end, elem.range.start);
233 prev.range.end = elem.range.end;
234 continue;
235 }
236 }
237 self.v.push(elem);
238 }
239 }
240}
241
242#[cfg(test)]
243mod tests {
244 use super::*;
245
246 /// Query the map at every offset in the range and collect the results.
247 fn to_vec<T: Copy>(map: &RangeMap<T>, offset: u64, len: u64) -> Vec<T> {
248 (offset..offset + len)
249 .map(|i| {
250 map.iter(Size::from_bytes(i), Size::from_bytes(1)).next().map(|(_, &t)| t).unwrap()
251 })
252 .collect()
253 }
254
255 #[test]
256 fn basic_insert() {
257 let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
258 // Insert.
259 for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(1)) {
260 *x = 42;
261 }
262 // Check.
263 assert_eq!(to_vec(&map, 10, 1), vec![42]);
264 assert_eq!(map.v.len(), 3);
265
266 // Insert with size 0.
267 for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(0)) {
268 *x = 19;
269 }
270 for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(0)) {
271 *x = 19;
272 }
273 assert_eq!(to_vec(&map, 10, 2), vec![42, -1]);
274 assert_eq!(map.v.len(), 3);
275 }
276
277 #[test]
278 fn gaps() {
279 let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
280 for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(1)) {
281 *x = 42;
282 }
283 for (_, x) in map.iter_mut(Size::from_bytes(15), Size::from_bytes(1)) {
284 *x = 43;
285 }
286 assert_eq!(map.v.len(), 5);
287 assert_eq!(to_vec(&map, 10, 10), vec![-1, 42, -1, -1, -1, 43, -1, -1, -1, -1]);
288
289 for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(10)) {
290 if *x < 42 {
291 *x = 23;
292 }
293 }
294 assert_eq!(map.v.len(), 6);
295 assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 43, 23, 23, 23, 23]);
296 assert_eq!(to_vec(&map, 13, 5), vec![23, 23, 43, 23, 23]);
297
298 for (_, x) in map.iter_mut(Size::from_bytes(15), Size::from_bytes(5)) {
299 *x = 19;
300 }
301 assert_eq!(map.v.len(), 6);
302 assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 19, 19, 19, 19, 19]);
303 // Should be seeing two blocks with 19.
304 assert_eq!(
305 map.iter(Size::from_bytes(15), Size::from_bytes(2))
306 .map(|(_, &t)| t)
307 .collect::<Vec<_>>(),
308 vec![19, 19]
309 );
310
311 // A NOP `iter_mut` should trigger merging.
312 for _ in map.iter_mut(Size::from_bytes(15), Size::from_bytes(5)) {}
313 assert_eq!(map.v.len(), 5);
314 assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 19, 19, 19, 19, 19]);
315 }
316
317 #[test]
318 #[should_panic]
319 fn out_of_range_iter_mut() {
320 let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
321 let _ = map.iter_mut(Size::from_bytes(11), Size::from_bytes(11));
322 }
323
324 #[test]
325 #[should_panic]
326 fn out_of_range_iter() {
327 let map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
328 let _ = map.iter(Size::from_bytes(11), Size::from_bytes(11));
329 }
330}