miri/
range_map.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
//! Implements a map from integer indices to data.
//! Rather than storing data for every index, internally, this maps entire ranges to the data.
//! To this end, the APIs all work on ranges, not on individual integers. Ranges are split as
//! necessary (e.g., when [0,5) is first associated with X, and then [1,2) is mutated).
//! Users must not depend on whether a range is coalesced or not, even though this is observable
//! via the iteration APIs.

use std::ops;

use rustc_abi::Size;

#[derive(Clone, Debug)]
struct Elem<T> {
    /// The range covered by this element; never empty.
    range: ops::Range<u64>,
    /// The data stored for this element.
    data: T,
}
#[derive(Clone, Debug)]
pub struct RangeMap<T> {
    v: Vec<Elem<T>>,
}

impl<T> RangeMap<T> {
    /// Creates a new `RangeMap` for the given size, and with the given initial value used for
    /// the entire range.
    #[inline(always)]
    pub fn new(size: Size, init: T) -> RangeMap<T> {
        let size = size.bytes();
        let v = if size > 0 { vec![Elem { range: 0..size, data: init }] } else { Vec::new() };
        RangeMap { v }
    }

    /// Finds the index containing the given offset.
    fn find_offset(&self, offset: u64) -> usize {
        self.v
            .binary_search_by(|elem| -> std::cmp::Ordering {
                if offset < elem.range.start {
                    // We are too far right (offset is further left).
                    // (`Greater` means that `elem` is greater than the desired target.)
                    std::cmp::Ordering::Greater
                } else if offset >= elem.range.end {
                    // We are too far left (offset is further right).
                    std::cmp::Ordering::Less
                } else {
                    // This is it!
                    std::cmp::Ordering::Equal
                }
            })
            .unwrap()
    }

    /// Provides read-only iteration over everything in the given range. This does
    /// *not* split items if they overlap with the edges. Do not use this to mutate
    /// through interior mutability.
    ///
    /// The iterator also provides the range of the given element.
    /// How exactly the ranges are split can differ even for otherwise identical
    /// maps, so user-visible behavior should never depend on the exact range.
    pub fn iter(&self, offset: Size, len: Size) -> impl Iterator<Item = (ops::Range<u64>, &T)> {
        let offset = offset.bytes();
        let len = len.bytes();
        // Compute a slice starting with the elements we care about.
        let slice: &[Elem<T>] = if len == 0 {
            // We just need any empty iterator. We don't even want to
            // yield the element that surrounds this position.
            &[]
        } else {
            let first_idx = self.find_offset(offset);
            &self.v[first_idx..]
        };
        // The first offset that is not included any more.
        let end = offset + len;
        assert!(
            end <= self.v.last().unwrap().range.end,
            "iterating beyond the bounds of this RangeMap"
        );
        slice
            .iter()
            .take_while(move |elem| elem.range.start < end)
            .map(|elem| (elem.range.clone(), &elem.data))
    }

    /// Provides mutable iteration over all elements.
    /// The iterator also provides the range of the given element.
    /// How exactly the ranges are split can differ even for otherwise identical
    /// maps, so user-visible behavior should never depend on the exact range.
    pub fn iter_mut_all(&mut self) -> impl Iterator<Item = (ops::Range<u64>, &mut T)> {
        self.v.iter_mut().map(|elem| (elem.range.clone(), &mut elem.data))
    }

    /// Provides iteration over all elements.
    /// The iterator also provides the range of the given element.
    /// How exactly the ranges are split can differ even for otherwise identical
    /// maps, so user-visible behavior should never depend on the exact range.
    pub fn iter_all(&self) -> impl Iterator<Item = (ops::Range<u64>, &T)> {
        self.v.iter().map(|elem| (elem.range.clone(), &elem.data))
    }

    // Splits the element situated at the given `index`, such that the 2nd one starts at offset
    // `split_offset`. Do nothing if the element already starts there.
    // Returns whether a split was necessary.
    fn split_index(&mut self, index: usize, split_offset: u64) -> bool
    where
        T: Clone,
    {
        let elem = &mut self.v[index];
        if split_offset == elem.range.start || split_offset == elem.range.end {
            // Nothing to do.
            return false;
        }
        debug_assert!(
            elem.range.contains(&split_offset),
            "the `split_offset` is not in the element to be split"
        );

        // Now we really have to split. Reduce length of first element.
        let second_range = split_offset..elem.range.end;
        elem.range.end = split_offset;
        // Copy the data, and insert second element.
        let second = Elem { range: second_range, data: elem.data.clone() };
        self.v.insert(index + 1, second);
        true
    }

    /// Provides mutable iteration over everything in the given range. As a side-effect,
    /// this will split entries in the map that are only partially hit by the given range,
    /// to make sure that when they are mutated, the effect is constrained to the given range.
    /// Moreover, this will opportunistically merge neighbouring equal blocks.
    ///
    /// The iterator also provides the range of the given element.
    /// How exactly the ranges are split (both prior to and resulting from the execution of this
    /// function) can differ even for otherwise identical maps,
    /// so user-visible behavior should never depend on the exact range.
    pub fn iter_mut(
        &mut self,
        offset: Size,
        len: Size,
    ) -> impl Iterator<Item = (ops::Range<u64>, &mut T)>
    where
        T: Clone + PartialEq,
    {
        let offset = offset.bytes();
        let len = len.bytes();
        // Compute a slice containing exactly the elements we care about
        let slice: &mut [Elem<T>] = if len == 0 {
            // We just need any empty iterator. We don't even want to
            // yield the element that surrounds this position, nor do
            // any splitting.
            &mut []
        } else {
            // Make sure we got a clear beginning
            let mut first_idx = self.find_offset(offset);
            if self.split_index(first_idx, offset) {
                // The newly created 2nd element is ours
                first_idx += 1;
            }
            // No more mutation.
            let first_idx = first_idx;
            // Find our end. Linear scan, but that's ok because the iteration
            // is doing the same linear scan anyway -- no increase in complexity.
            // We combine this scan with a scan for duplicates that we can merge, to reduce
            // the number of elements.
            // We stop searching after the first "block" of size 1, to avoid spending excessive
            // amounts of time on the merging.
            let mut equal_since_idx = first_idx;
            // Once we see too many non-mergeable blocks, we stop.
            // The initial value is chosen via... magic. Benchmarking and magic.
            let mut successful_merge_count = 3usize;
            // When the loop is done, this is the first excluded element.
            let mut end_idx = first_idx;
            loop {
                // Compute if `end` is the last element we need to look at.
                let done = self.v[end_idx].range.end >= offset + len;
                // We definitely need to include `end`, so move the index.
                end_idx += 1;
                debug_assert!(
                    done || end_idx < self.v.len(),
                    "iter_mut: end-offset {} is out-of-bounds",
                    offset + len
                );
                // see if we want to merge everything in `equal_since..end` (exclusive at the end!)
                if successful_merge_count > 0 {
                    if done || self.v[end_idx].data != self.v[equal_since_idx].data {
                        // Everything in `equal_since..end` was equal. Make them just one element covering
                        // the entire range.
                        let removed_elems = end_idx - equal_since_idx - 1; // number of elements that we would remove
                        if removed_elems > 0 {
                            // Adjust the range of the first element to cover all of them.
                            let equal_until = self.v[end_idx - 1].range.end; // end of range of last of the equal elements
                            self.v[equal_since_idx].range.end = equal_until;
                            // Delete the rest of them.
                            self.v.splice(equal_since_idx + 1..end_idx, std::iter::empty());
                            // Adjust `end_idx` because we made the list shorter.
                            end_idx -= removed_elems;
                            // Adjust the count for the cutoff.
                            successful_merge_count += removed_elems;
                        } else {
                            // Adjust the count for the cutoff.
                            successful_merge_count -= 1;
                        }
                        // Go on scanning for the next block starting here.
                        equal_since_idx = end_idx;
                    }
                }
                // Leave loop if this is the last element.
                if done {
                    break;
                }
            }
            // Move to last included instead of first excluded index.
            let end_idx = end_idx - 1;
            // We need to split the end as well. Even if this performs a
            // split, we don't have to adjust our index as we only care about
            // the first part of the split.
            self.split_index(end_idx, offset + len);
            // Now we yield the slice. `end` is inclusive.
            &mut self.v[first_idx..=end_idx]
        };
        slice.iter_mut().map(|elem| (elem.range.clone(), &mut elem.data))
    }

    /// Remove all adjacent duplicates
    pub fn merge_adjacent_thorough(&mut self)
    where
        T: PartialEq,
    {
        let clean = Vec::with_capacity(self.v.len());
        for elem in std::mem::replace(&mut self.v, clean) {
            if let Some(prev) = self.v.last_mut() {
                if prev.data == elem.data {
                    assert_eq!(prev.range.end, elem.range.start);
                    prev.range.end = elem.range.end;
                    continue;
                }
            }
            self.v.push(elem);
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Query the map at every offset in the range and collect the results.
    fn to_vec<T: Copy>(map: &RangeMap<T>, offset: u64, len: u64) -> Vec<T> {
        (offset..offset + len)
            .map(|i| {
                map.iter(Size::from_bytes(i), Size::from_bytes(1)).next().map(|(_, &t)| t).unwrap()
            })
            .collect()
    }

    #[test]
    fn basic_insert() {
        let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
        // Insert.
        for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(1)) {
            *x = 42;
        }
        // Check.
        assert_eq!(to_vec(&map, 10, 1), vec![42]);
        assert_eq!(map.v.len(), 3);

        // Insert with size 0.
        for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(0)) {
            *x = 19;
        }
        for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(0)) {
            *x = 19;
        }
        assert_eq!(to_vec(&map, 10, 2), vec![42, -1]);
        assert_eq!(map.v.len(), 3);
    }

    #[test]
    fn gaps() {
        let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
        for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(1)) {
            *x = 42;
        }
        for (_, x) in map.iter_mut(Size::from_bytes(15), Size::from_bytes(1)) {
            *x = 43;
        }
        assert_eq!(map.v.len(), 5);
        assert_eq!(to_vec(&map, 10, 10), vec![-1, 42, -1, -1, -1, 43, -1, -1, -1, -1]);

        for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(10)) {
            if *x < 42 {
                *x = 23;
            }
        }
        assert_eq!(map.v.len(), 6);
        assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 43, 23, 23, 23, 23]);
        assert_eq!(to_vec(&map, 13, 5), vec![23, 23, 43, 23, 23]);

        for (_, x) in map.iter_mut(Size::from_bytes(15), Size::from_bytes(5)) {
            *x = 19;
        }
        assert_eq!(map.v.len(), 6);
        assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 19, 19, 19, 19, 19]);
        // Should be seeing two blocks with 19.
        assert_eq!(
            map.iter(Size::from_bytes(15), Size::from_bytes(2))
                .map(|(_, &t)| t)
                .collect::<Vec<_>>(),
            vec![19, 19]
        );

        // A NOP `iter_mut` should trigger merging.
        for _ in map.iter_mut(Size::from_bytes(15), Size::from_bytes(5)) {}
        assert_eq!(map.v.len(), 5);
        assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 19, 19, 19, 19, 19]);
    }

    #[test]
    #[should_panic]
    fn out_of_range_iter_mut() {
        let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
        let _ = map.iter_mut(Size::from_bytes(11), Size::from_bytes(11));
    }

    #[test]
    #[should_panic]
    fn out_of_range_iter() {
        let map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
        let _ = map.iter(Size::from_bytes(11), Size::from_bytes(11));
    }
}