use base64::prelude::*;
pub(crate) fn write_vlqhex_to_string(n: i32, string: &mut String) {
let (sign, magnitude): (bool, u32) =
if n >= 0 { (false, n.try_into().unwrap()) } else { (true, (-n).try_into().unwrap()) };
let value: u32 = (magnitude << 1) | (if sign { 1 } else { 0 });
let mut shift: u32 = 28;
let mut mask: u32 = 0xF0_00_00_00;
while shift < 32 {
let hexit = (value & mask) >> shift;
if hexit != 0 || shift == 0 {
break;
}
shift = shift.wrapping_sub(4);
mask = mask >> 4;
}
while shift < 32 {
let hexit = (value & mask) >> shift;
let hex = char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap();
string.push(hex);
shift = shift.wrapping_sub(4);
mask = mask >> 4;
}
}
enum Container {
Bits(Box<[u64; 1024]>),
Array(Vec<u16>),
Run(Vec<(u16, u16)>),
}
impl Container {
fn popcount(&self) -> u32 {
match self {
Container::Bits(bits) => bits.iter().copied().map(|x| x.count_ones()).sum(),
Container::Array(array) => {
array.len().try_into().expect("array can't be bigger than 2**32")
}
Container::Run(runs) => {
runs.iter().copied().map(|(_, lenm1)| u32::from(lenm1) + 1).sum()
}
}
}
fn push(&mut self, value: u16) {
match self {
Container::Bits(bits) => bits[value as usize >> 6] |= 1 << (value & 0x3F),
Container::Array(array) => {
array.push(value);
if array.len() >= 4096 {
let array = std::mem::replace(array, Vec::new());
*self = Container::Bits(Box::new([0; 1024]));
for value in array {
self.push(value);
}
}
}
Container::Run(runs) => {
if let Some(r) = runs.last_mut()
&& r.0 + r.1 + 1 == value
{
r.1 += 1;
} else {
runs.push((value, 0));
}
}
}
}
fn try_make_run(&mut self) -> bool {
match self {
Container::Bits(bits) => {
let mut r: u64 = 0;
for (i, chunk) in bits.iter().copied().enumerate() {
let next_chunk =
i.checked_add(1).and_then(|i| bits.get(i)).copied().unwrap_or(0);
r += !chunk & u64::from((chunk << 1).count_ones());
r += !next_chunk & u64::from((chunk >> 63).count_ones());
}
if (2 + 4 * r) >= 8192 {
return false;
}
let bits = std::mem::replace(bits, Box::new([0; 1024]));
*self = Container::Run(Vec::new());
for (i, bits) in bits.iter().copied().enumerate() {
if bits == 0 {
continue;
}
for j in 0..64 {
let value = (u16::try_from(i).unwrap() << 6) | j;
if bits & (1 << j) != 0 {
self.push(value);
}
}
}
true
}
Container::Array(array) if array.len() <= 5 => false,
Container::Array(array) => {
let mut r = 0;
let mut prev = None;
for value in array.iter().copied() {
if value.checked_sub(1) != prev {
r += 1;
}
prev = Some(value);
}
if 2 + 4 * r >= 2 * array.len() + 2 {
return false;
}
let array = std::mem::replace(array, Vec::new());
*self = Container::Run(Vec::new());
for value in array {
self.push(value);
}
true
}
Container::Run(_) => true,
}
}
}
pub(crate) fn write_bitmap_to_bytes(
domain: &[u32],
mut out: impl std::io::Write,
) -> std::io::Result<()> {
let mut keys = Vec::<u16>::new();
let mut containers = Vec::<Container>::new();
let mut key: u16;
let mut domain_iter = domain.into_iter().copied().peekable();
let mut has_run = false;
while let Some(entry) = domain_iter.next() {
key = (entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit");
let value: u16 = (entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit");
let mut container = Container::Array(vec![value]);
while let Some(entry) = domain_iter.peek().copied() {
let entry_key: u16 =
(entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit");
if entry_key != key {
break;
}
domain_iter.next().expect("peeking just succeeded");
container
.push((entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit"));
}
keys.push(key);
has_run = container.try_make_run() || has_run;
containers.push(container);
}
const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
const SERIAL_COOKIE: u32 = 12347;
const NO_OFFSET_THRESHOLD: u32 = 4;
let size: u32 = containers.len().try_into().unwrap();
let start_offset = if has_run {
out.write_all(&u32::to_le_bytes(SERIAL_COOKIE | ((size - 1) << 16)))?;
for set in containers.chunks(8) {
let mut b = 0;
for (i, container) in set.iter().enumerate() {
if matches!(container, &Container::Run(..)) {
b |= 1 << i;
}
}
out.write_all(&[b])?;
}
if size < NO_OFFSET_THRESHOLD {
4 + 4 * size + ((size + 7) / 8)
} else {
4 + 8 * size + ((size + 7) / 8)
}
} else {
out.write_all(&u32::to_le_bytes(SERIAL_COOKIE_NO_RUNCONTAINER))?;
out.write_all(&u32::to_le_bytes(containers.len().try_into().unwrap()))?;
4 + 4 + 4 * size + 4 * size
};
for (&key, container) in keys.iter().zip(&containers) {
let key: u32 = key.into();
let count: u32 = container.popcount() - 1;
out.write_all(&u32::to_le_bytes((count << 16) | key))?;
}
if !has_run || size >= NO_OFFSET_THRESHOLD {
let mut starting_offset = start_offset;
for container in &containers {
out.write_all(&u32::to_le_bytes(starting_offset))?;
starting_offset += match container {
Container::Bits(_) => 8192u32,
Container::Array(array) => u32::try_from(array.len()).unwrap() * 2,
Container::Run(runs) => 2 + u32::try_from(runs.len()).unwrap() * 4,
};
}
}
for container in &containers {
match container {
Container::Bits(bits) => {
for chunk in bits.iter() {
out.write_all(&u64::to_le_bytes(*chunk))?;
}
}
Container::Array(array) => {
for value in array.iter() {
out.write_all(&u16::to_le_bytes(*value))?;
}
}
Container::Run(runs) => {
out.write_all(&u16::to_le_bytes(runs.len().try_into().unwrap()))?;
for (start, lenm1) in runs.iter().copied() {
out.write_all(&u16::to_le_bytes(start))?;
out.write_all(&u16::to_le_bytes(lenm1))?;
}
}
}
}
Ok(())
}
pub(crate) fn bitmap_to_string(domain: &[u32]) -> String {
let mut buf = Vec::new();
let mut strbuf = String::new();
write_bitmap_to_bytes(&domain, &mut buf).unwrap();
BASE64_STANDARD.encode_string(&buf, &mut strbuf);
strbuf
}