rustdoc/html/render/search_index/
encode.rs

1pub(crate) fn write_signed_vlqhex_to_string(n: i32, string: &mut String) {
2    let (sign, magnitude): (bool, u32) =
3        if n >= 0 { (false, n.try_into().unwrap()) } else { (true, (-n).try_into().unwrap()) };
4    // zig-zag encoding
5    let value: u32 = (magnitude << 1) | (if sign { 1 } else { 0 });
6    // Self-terminating hex use capital letters for everything but the
7    // least significant digit, which is lowercase. For example, decimal 17
8    // would be `` Aa `` if zig-zag encoding weren't used.
9    //
10    // Zig-zag encoding, however, stores the sign bit as the last bit.
11    // This means, in the last hexit, 1 is actually `c`, -1 is `b`
12    // (`a` is the imaginary -0), and, because all the bits are shifted
13    // by one, `` A` `` is actually 8 and `` Aa `` is -8.
14    //
15    // https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html
16    // describes the encoding in more detail.
17    let mut shift: u32 = 28;
18    let mut mask: u32 = 0xF0_00_00_00;
19    // first skip leading zeroes
20    while shift < 32 {
21        let hexit = (value & mask) >> shift;
22        if hexit != 0 || shift == 0 {
23            break;
24        }
25        shift = shift.wrapping_sub(4);
26        mask >>= 4;
27    }
28    // now write the rest
29    while shift < 32 {
30        let hexit = (value & mask) >> shift;
31        let hex = char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap();
32        string.push(hex);
33        shift = shift.wrapping_sub(4);
34        mask >>= 4;
35    }
36}
37
38pub fn read_signed_vlqhex_from_string(string: &[u8]) -> Option<(i32, usize)> {
39    let mut n = 0i32;
40    let mut i = 0;
41    while let Some(&c) = string.get(i) {
42        i += 1;
43        n = (n << 4) | i32::from(c & 0xF);
44        if c >= 96 {
45            // zig-zag encoding
46            let (sign, magnitude) = (n & 1, n >> 1);
47            let value = if sign == 0 { 1 } else { -1 } * magnitude;
48            return Some((value, i));
49        }
50    }
51    None
52}
53
54pub fn write_postings_to_string(postings: &[Vec<u32>], buf: &mut Vec<u8>) {
55    for list in postings {
56        if list.is_empty() {
57            buf.push(0);
58            continue;
59        }
60        let len_before = buf.len();
61        stringdex::internals::encode::write_bitmap_to_bytes(&list, &mut *buf).unwrap();
62        let len_after = buf.len();
63        if len_after - len_before > 1 + (4 * list.len()) && list.len() < 0x3a {
64            buf.truncate(len_before);
65            buf.push(list.len() as u8);
66            for &item in list {
67                buf.push(item as u8);
68                buf.push((item >> 8) as u8);
69                buf.push((item >> 16) as u8);
70                buf.push((item >> 24) as u8);
71            }
72        }
73    }
74}
75
76pub fn read_postings_from_string(postings: &mut Vec<Vec<u32>>, mut buf: &[u8]) {
77    use stringdex::internals::decode::RoaringBitmap;
78    while let Some(&c) = buf.get(0) {
79        if c < 0x3a {
80            buf = &buf[1..];
81            let mut slot = Vec::new();
82            for _ in 0..c {
83                slot.push(
84                    (buf[0] as u32)
85                        | ((buf[1] as u32) << 8)
86                        | ((buf[2] as u32) << 16)
87                        | ((buf[3] as u32) << 24),
88                );
89                buf = &buf[4..];
90            }
91            postings.push(slot);
92        } else {
93            let (bitmap, consumed_bytes_len) =
94                RoaringBitmap::from_bytes(buf).unwrap_or_else(|| (RoaringBitmap::default(), 0));
95            assert_ne!(consumed_bytes_len, 0);
96            postings.push(bitmap.to_vec());
97            buf = &buf[consumed_bytes_len..];
98        }
99    }
100}