rustdoc/html/render/search_index/
encode.rs
1use base64::prelude::*;
2
3pub(crate) fn write_vlqhex_to_string(n: i32, string: &mut String) {
4 let (sign, magnitude): (bool, u32) =
5 if n >= 0 { (false, n.try_into().unwrap()) } else { (true, (-n).try_into().unwrap()) };
6 let value: u32 = (magnitude << 1) | (if sign { 1 } else { 0 });
8 let mut shift: u32 = 28;
20 let mut mask: u32 = 0xF0_00_00_00;
21 while shift < 32 {
23 let hexit = (value & mask) >> shift;
24 if hexit != 0 || shift == 0 {
25 break;
26 }
27 shift = shift.wrapping_sub(4);
28 mask >>= 4;
29 }
30 while shift < 32 {
32 let hexit = (value & mask) >> shift;
33 let hex = char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap();
34 string.push(hex);
35 shift = shift.wrapping_sub(4);
36 mask >>= 4;
37 }
38}
39
40enum Container {
42 Bits(Box<[u64; 1024]>),
44 Array(Vec<u16>),
46 Run(Vec<(u16, u16)>),
48}
49impl Container {
50 fn popcount(&self) -> u32 {
51 match self {
52 Container::Bits(bits) => bits.iter().copied().map(|x| x.count_ones()).sum(),
53 Container::Array(array) => {
54 array.len().try_into().expect("array can't be bigger than 2**32")
55 }
56 Container::Run(runs) => {
57 runs.iter().copied().map(|(_, lenm1)| u32::from(lenm1) + 1).sum()
58 }
59 }
60 }
61 fn push(&mut self, value: u16) {
62 match self {
63 Container::Bits(bits) => bits[value as usize >> 6] |= 1 << (value & 0x3F),
64 Container::Array(array) => {
65 array.push(value);
66 if array.len() >= 4096 {
67 let array = std::mem::take(array);
68 *self = Container::Bits(Box::new([0; 1024]));
69 for value in array {
70 self.push(value);
71 }
72 }
73 }
74 Container::Run(runs) => {
75 if let Some(r) = runs.last_mut()
76 && r.0 + r.1 + 1 == value
77 {
78 r.1 += 1;
79 } else {
80 runs.push((value, 0));
81 }
82 }
83 }
84 }
85 fn try_make_run(&mut self) -> bool {
86 match self {
87 Container::Bits(bits) => {
88 let mut r: u64 = 0;
89 for (i, chunk) in bits.iter().copied().enumerate() {
90 let next_chunk =
91 i.checked_add(1).and_then(|i| bits.get(i)).copied().unwrap_or(0);
92 r += !chunk & u64::from((chunk << 1).count_ones());
93 r += !next_chunk & u64::from((chunk >> 63).count_ones());
94 }
95 if (2 + 4 * r) >= 8192 {
96 return false;
97 }
98 let bits = std::mem::replace(bits, Box::new([0; 1024]));
99 *self = Container::Run(Vec::new());
100 for (i, bits) in bits.iter().copied().enumerate() {
101 if bits == 0 {
102 continue;
103 }
104 for j in 0..64 {
105 let value = (u16::try_from(i).unwrap() << 6) | j;
106 if bits & (1 << j) != 0 {
107 self.push(value);
108 }
109 }
110 }
111 true
112 }
113 Container::Array(array) if array.len() <= 5 => false,
114 Container::Array(array) => {
115 let mut r = 0;
116 let mut prev = None;
117 for value in array.iter().copied() {
118 if value.checked_sub(1) != prev {
119 r += 1;
120 }
121 prev = Some(value);
122 }
123 if 2 + 4 * r >= 2 * array.len() + 2 {
124 return false;
125 }
126 let array = std::mem::take(array);
127 *self = Container::Run(Vec::new());
128 for value in array {
129 self.push(value);
130 }
131 true
132 }
133 Container::Run(_) => true,
134 }
135 }
136}
137
138pub(crate) fn write_bitmap_to_bytes(
141 domain: &[u32],
142 mut out: impl std::io::Write,
143) -> std::io::Result<()> {
144 let mut keys = Vec::<u16>::new();
146 let mut containers = Vec::<Container>::new();
147 let mut key: u16;
148 let mut domain_iter = domain.iter().copied().peekable();
149 let mut has_run = false;
150 while let Some(entry) = domain_iter.next() {
151 key = (entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit");
152 let value: u16 = (entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit");
153 let mut container = Container::Array(vec![value]);
154 while let Some(entry) = domain_iter.peek().copied() {
155 let entry_key: u16 =
156 (entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit");
157 if entry_key != key {
158 break;
159 }
160 domain_iter.next().expect("peeking just succeeded");
161 container
162 .push((entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit"));
163 }
164 keys.push(key);
165 has_run = container.try_make_run() || has_run;
166 containers.push(container);
167 }
168 const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
170 const SERIAL_COOKIE: u32 = 12347;
171 const NO_OFFSET_THRESHOLD: u32 = 4;
172 let size: u32 = containers.len().try_into().unwrap();
173 let start_offset = if has_run {
174 out.write_all(&u32::to_le_bytes(SERIAL_COOKIE | ((size - 1) << 16)))?;
175 for set in containers.chunks(8) {
176 let mut b = 0;
177 for (i, container) in set.iter().enumerate() {
178 if matches!(container, &Container::Run(..)) {
179 b |= 1 << i;
180 }
181 }
182 out.write_all(&[b])?;
183 }
184 if size < NO_OFFSET_THRESHOLD {
185 4 + 4 * size + ((size + 7) / 8)
186 } else {
187 4 + 8 * size + ((size + 7) / 8)
188 }
189 } else {
190 out.write_all(&u32::to_le_bytes(SERIAL_COOKIE_NO_RUNCONTAINER))?;
191 out.write_all(&u32::to_le_bytes(containers.len().try_into().unwrap()))?;
192 4 + 4 + 4 * size + 4 * size
193 };
194 for (&key, container) in keys.iter().zip(&containers) {
195 let key: u32 = key.into();
197 let count: u32 = container.popcount() - 1;
198 out.write_all(&u32::to_le_bytes((count << 16) | key))?;
199 }
200 if !has_run || size >= NO_OFFSET_THRESHOLD {
201 let mut starting_offset = start_offset;
203 for container in &containers {
204 out.write_all(&u32::to_le_bytes(starting_offset))?;
205 starting_offset += match container {
206 Container::Bits(_) => 8192u32,
207 Container::Array(array) => u32::try_from(array.len()).unwrap() * 2,
208 Container::Run(runs) => 2 + u32::try_from(runs.len()).unwrap() * 4,
209 };
210 }
211 }
212 for container in &containers {
213 match container {
214 Container::Bits(bits) => {
215 for chunk in bits.iter() {
216 out.write_all(&u64::to_le_bytes(*chunk))?;
217 }
218 }
219 Container::Array(array) => {
220 for value in array.iter() {
221 out.write_all(&u16::to_le_bytes(*value))?;
222 }
223 }
224 Container::Run(runs) => {
225 out.write_all(&u16::to_le_bytes(runs.len().try_into().unwrap()))?;
226 for (start, lenm1) in runs.iter().copied() {
227 out.write_all(&u16::to_le_bytes(start))?;
228 out.write_all(&u16::to_le_bytes(lenm1))?;
229 }
230 }
231 }
232 }
233 Ok(())
234}
235
236pub(crate) fn bitmap_to_string(domain: &[u32]) -> String {
237 let mut buf = Vec::new();
238 let mut strbuf = String::new();
239 write_bitmap_to_bytes(domain, &mut buf).unwrap();
240 BASE64_STANDARD.encode_string(&buf, &mut strbuf);
241 strbuf
242}