use std::{cmp, mem::size_of};
use bytes::Bytes;
use crate::data_type::{AsBytes, ByteArray, FixedLenByteArray, Int96};
use crate::errors::{ParquetError, Result};
use crate::util::bit_pack::{unpack16, unpack32, unpack64, unpack8};
#[inline]
fn array_from_slice<const N: usize>(bs: &[u8]) -> Result<[u8; N]> {
match bs.get(..N) {
Some(b) => Ok(b.try_into().unwrap()),
None => Err(general_err!(
"error converting value, expected {} bytes got {}",
N,
bs.len()
)),
}
}
pub unsafe trait FromBytes: Sized {
const BIT_CAPACITY: usize;
type Buffer: AsMut<[u8]> + Default;
fn try_from_le_slice(b: &[u8]) -> Result<Self>;
fn from_le_bytes(bs: Self::Buffer) -> Self;
}
macro_rules! from_le_bytes {
($($ty: ty),*) => {
$(
unsafe impl FromBytes for $ty {
const BIT_CAPACITY: usize = std::mem::size_of::<$ty>() * 8;
type Buffer = [u8; size_of::<Self>()];
fn try_from_le_slice(b: &[u8]) -> Result<Self> {
Ok(Self::from_le_bytes(array_from_slice(b)?))
}
fn from_le_bytes(bs: Self::Buffer) -> Self {
<$ty>::from_le_bytes(bs)
}
}
)*
};
}
from_le_bytes! { u8, u16, u32, u64, i8, i16, i32, i64, f32, f64 }
unsafe impl FromBytes for bool {
const BIT_CAPACITY: usize = 1;
type Buffer = [u8; 1];
fn try_from_le_slice(b: &[u8]) -> Result<Self> {
Ok(Self::from_le_bytes(array_from_slice(b)?))
}
fn from_le_bytes(bs: Self::Buffer) -> Self {
bs[0] != 0
}
}
unsafe impl FromBytes for Int96 {
const BIT_CAPACITY: usize = 0;
type Buffer = [u8; 12];
fn try_from_le_slice(b: &[u8]) -> Result<Self> {
let bs: [u8; 12] = array_from_slice(b)?;
let mut i = Int96::new();
i.set_data(
u32::try_from_le_slice(&bs[0..4])?,
u32::try_from_le_slice(&bs[4..8])?,
u32::try_from_le_slice(&bs[8..12])?,
);
Ok(i)
}
fn from_le_bytes(bs: Self::Buffer) -> Self {
let mut i = Int96::new();
i.set_data(
u32::try_from_le_slice(&bs[0..4]).unwrap(),
u32::try_from_le_slice(&bs[4..8]).unwrap(),
u32::try_from_le_slice(&bs[8..12]).unwrap(),
);
i
}
}
unsafe impl FromBytes for ByteArray {
const BIT_CAPACITY: usize = 0;
type Buffer = Vec<u8>;
fn try_from_le_slice(b: &[u8]) -> Result<Self> {
Ok(b.to_vec().into())
}
fn from_le_bytes(bs: Self::Buffer) -> Self {
bs.into()
}
}
unsafe impl FromBytes for FixedLenByteArray {
const BIT_CAPACITY: usize = 0;
type Buffer = Vec<u8>;
fn try_from_le_slice(b: &[u8]) -> Result<Self> {
Ok(b.to_vec().into())
}
fn from_le_bytes(bs: Self::Buffer) -> Self {
bs.into()
}
}
pub(crate) fn read_num_bytes<T>(size: usize, src: &[u8]) -> T
where
T: FromBytes,
{
assert!(size <= src.len());
let mut buffer = <T as FromBytes>::Buffer::default();
buffer.as_mut()[..size].copy_from_slice(&src[..size]);
<T>::from_le_bytes(buffer)
}
#[inline]
pub fn ceil<T: num::Integer>(value: T, divisor: T) -> T {
num::Integer::div_ceil(&value, &divisor)
}
#[inline]
pub fn trailing_bits(v: u64, num_bits: usize) -> u64 {
if num_bits >= 64 {
v
} else {
v & ((1 << num_bits) - 1)
}
}
#[inline]
pub fn num_required_bits(x: u64) -> u8 {
64 - x.leading_zeros() as u8
}
static BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
#[inline]
pub fn get_bit(data: &[u8], i: usize) -> bool {
(data[i >> 3] & BIT_MASK[i & 7]) != 0
}
pub struct BitWriter {
buffer: Vec<u8>,
buffered_values: u64,
bit_offset: u8,
}
impl BitWriter {
pub fn new(initial_capacity: usize) -> Self {
Self {
buffer: Vec::with_capacity(initial_capacity),
buffered_values: 0,
bit_offset: 0,
}
}
pub fn new_from_buf(buffer: Vec<u8>) -> Self {
Self {
buffer,
buffered_values: 0,
bit_offset: 0,
}
}
#[inline]
pub fn consume(mut self) -> Vec<u8> {
self.flush();
self.buffer
}
#[inline]
pub fn flush_buffer(&mut self) -> &[u8] {
self.flush();
self.buffer()
}
#[inline]
pub fn clear(&mut self) {
self.buffer.clear();
self.buffered_values = 0;
self.bit_offset = 0;
}
#[inline]
pub fn flush(&mut self) {
let num_bytes = ceil(self.bit_offset, 8);
let slice = &self.buffered_values.to_le_bytes()[..num_bytes as usize];
self.buffer.extend_from_slice(slice);
self.buffered_values = 0;
self.bit_offset = 0;
}
#[inline]
pub fn skip(&mut self, num_bytes: usize) -> usize {
self.flush();
let result = self.buffer.len();
self.buffer.extend(std::iter::repeat(0).take(num_bytes));
result
}
#[inline]
pub fn get_next_byte_ptr(&mut self, num_bytes: usize) -> &mut [u8] {
let offset = self.skip(num_bytes);
&mut self.buffer[offset..offset + num_bytes]
}
#[inline]
pub fn bytes_written(&self) -> usize {
self.buffer.len() + ceil(self.bit_offset, 8) as usize
}
#[inline]
pub fn buffer(&self) -> &[u8] {
&self.buffer
}
#[inline]
pub fn byte_offset(&self) -> usize {
self.buffer.len()
}
pub fn write_at(&mut self, offset: usize, value: u8) {
self.buffer[offset] = value;
}
#[inline]
pub fn put_value(&mut self, v: u64, num_bits: usize) {
assert!(num_bits <= 64);
let num_bits = num_bits as u8;
assert_eq!(v.checked_shr(num_bits as u32).unwrap_or(0), 0); self.buffered_values |= v << self.bit_offset;
self.bit_offset += num_bits;
if let Some(remaining) = self.bit_offset.checked_sub(64) {
self.buffer
.extend_from_slice(&self.buffered_values.to_le_bytes());
self.bit_offset = remaining;
self.buffered_values = v
.checked_shr((num_bits - self.bit_offset) as u32)
.unwrap_or(0);
}
}
#[inline]
pub fn put_aligned<T: AsBytes>(&mut self, val: T, num_bytes: usize) {
self.flush();
let slice = val.as_bytes();
let len = num_bytes.min(slice.len());
self.buffer.extend_from_slice(&slice[..len]);
}
#[inline]
pub fn put_aligned_offset<T: AsBytes>(&mut self, val: T, num_bytes: usize, offset: usize) {
let slice = val.as_bytes();
let len = num_bytes.min(slice.len());
self.buffer[offset..offset + len].copy_from_slice(&slice[..len])
}
#[inline]
pub fn put_vlq_int(&mut self, mut v: u64) {
while v & 0xFFFFFFFFFFFFFF80 != 0 {
self.put_aligned::<u8>(((v & 0x7F) | 0x80) as u8, 1);
v >>= 7;
}
self.put_aligned::<u8>((v & 0x7F) as u8, 1);
}
#[inline]
pub fn put_zigzag_vlq_int(&mut self, v: i64) {
let u: u64 = ((v << 1) ^ (v >> 63)) as u64;
self.put_vlq_int(u)
}
pub fn estimated_memory_size(&self) -> usize {
self.buffer.capacity() * size_of::<u8>()
}
}
pub const MAX_VLQ_BYTE_LEN: usize = 10;
pub struct BitReader {
buffer: Bytes,
buffered_values: u64,
byte_offset: usize,
bit_offset: usize,
}
impl BitReader {
pub fn new(buffer: Bytes) -> Self {
BitReader {
buffer,
buffered_values: 0,
byte_offset: 0,
bit_offset: 0,
}
}
pub fn reset(&mut self, buffer: Bytes) {
self.buffer = buffer;
self.buffered_values = 0;
self.byte_offset = 0;
self.bit_offset = 0;
}
#[inline]
pub fn get_byte_offset(&self) -> usize {
self.byte_offset + ceil(self.bit_offset, 8)
}
pub fn get_value<T: FromBytes>(&mut self, num_bits: usize) -> Option<T> {
assert!(num_bits <= 64);
assert!(num_bits <= size_of::<T>() * 8);
if self.byte_offset * 8 + self.bit_offset + num_bits > self.buffer.len() * 8 {
return None;
}
if self.bit_offset == 0 {
self.load_buffered_values()
}
let mut v =
trailing_bits(self.buffered_values, self.bit_offset + num_bits) >> self.bit_offset;
self.bit_offset += num_bits;
if self.bit_offset >= 64 {
self.byte_offset += 8;
self.bit_offset -= 64;
if self.bit_offset != 0 {
self.load_buffered_values();
v |= trailing_bits(self.buffered_values, self.bit_offset)
.wrapping_shl((num_bits - self.bit_offset) as u32);
}
}
T::try_from_le_slice(v.as_bytes()).ok()
}
pub fn get_batch<T: FromBytes>(&mut self, batch: &mut [T], num_bits: usize) -> usize {
assert!(num_bits <= size_of::<T>() * 8);
let mut values_to_read = batch.len();
let needed_bits = num_bits * values_to_read;
let remaining_bits = (self.buffer.len() - self.byte_offset) * 8 - self.bit_offset;
if remaining_bits < needed_bits {
values_to_read = remaining_bits / num_bits;
}
let mut i = 0;
if self.bit_offset != 0 {
while i < values_to_read && self.bit_offset != 0 {
batch[i] = self
.get_value(num_bits)
.expect("expected to have more data");
i += 1;
}
}
assert_ne!(T::BIT_CAPACITY, 0);
assert!(num_bits <= T::BIT_CAPACITY);
match size_of::<T>() {
1 => {
let ptr = batch.as_mut_ptr() as *mut u8;
let out = unsafe { std::slice::from_raw_parts_mut(ptr, batch.len()) };
while values_to_read - i >= 8 {
let out_slice = (&mut out[i..i + 8]).try_into().unwrap();
unpack8(&self.buffer[self.byte_offset..], out_slice, num_bits);
self.byte_offset += num_bits;
i += 8;
}
}
2 => {
let ptr = batch.as_mut_ptr() as *mut u16;
let out = unsafe { std::slice::from_raw_parts_mut(ptr, batch.len()) };
while values_to_read - i >= 16 {
let out_slice = (&mut out[i..i + 16]).try_into().unwrap();
unpack16(&self.buffer[self.byte_offset..], out_slice, num_bits);
self.byte_offset += 2 * num_bits;
i += 16;
}
}
4 => {
let ptr = batch.as_mut_ptr() as *mut u32;
let out = unsafe { std::slice::from_raw_parts_mut(ptr, batch.len()) };
while values_to_read - i >= 32 {
let out_slice = (&mut out[i..i + 32]).try_into().unwrap();
unpack32(&self.buffer[self.byte_offset..], out_slice, num_bits);
self.byte_offset += 4 * num_bits;
i += 32;
}
}
8 => {
let ptr = batch.as_mut_ptr() as *mut u64;
let out = unsafe { std::slice::from_raw_parts_mut(ptr, batch.len()) };
while values_to_read - i >= 64 {
let out_slice = (&mut out[i..i + 64]).try_into().unwrap();
unpack64(&self.buffer[self.byte_offset..], out_slice, num_bits);
self.byte_offset += 8 * num_bits;
i += 64;
}
}
_ => unreachable!(),
}
if size_of::<T>() > 4 && values_to_read - i >= 32 && num_bits <= 32 {
let mut out_buf = [0_u32; 32];
unpack32(&self.buffer[self.byte_offset..], &mut out_buf, num_bits);
self.byte_offset += 4 * num_bits;
for out in out_buf {
let mut out_bytes = T::Buffer::default();
out_bytes.as_mut()[..4].copy_from_slice(&out.to_le_bytes());
batch[i] = T::from_le_bytes(out_bytes);
i += 1;
}
}
if size_of::<T>() > 2 && values_to_read - i >= 16 && num_bits <= 16 {
let mut out_buf = [0_u16; 16];
unpack16(&self.buffer[self.byte_offset..], &mut out_buf, num_bits);
self.byte_offset += 2 * num_bits;
for out in out_buf {
let mut out_bytes = T::Buffer::default();
out_bytes.as_mut()[..2].copy_from_slice(&out.to_le_bytes());
batch[i] = T::from_le_bytes(out_bytes);
i += 1;
}
}
if size_of::<T>() > 1 && values_to_read - i >= 8 && num_bits <= 8 {
let mut out_buf = [0_u8; 8];
unpack8(&self.buffer[self.byte_offset..], &mut out_buf, num_bits);
self.byte_offset += num_bits;
for out in out_buf {
let mut out_bytes = T::Buffer::default();
out_bytes.as_mut()[..1].copy_from_slice(&out.to_le_bytes());
batch[i] = T::from_le_bytes(out_bytes);
i += 1;
}
}
while i < values_to_read {
let value = self
.get_value(num_bits)
.expect("expected to have more data");
batch[i] = value;
i += 1;
}
values_to_read
}
pub fn skip(&mut self, num_values: usize, num_bits: usize) -> usize {
assert!(num_bits <= 64);
let needed_bits = num_bits * num_values;
let remaining_bits = (self.buffer.len() - self.byte_offset) * 8 - self.bit_offset;
let values_to_read = match remaining_bits < needed_bits {
true => remaining_bits / num_bits,
false => num_values,
};
let end_bit_offset = self.byte_offset * 8 + values_to_read * num_bits + self.bit_offset;
self.byte_offset = end_bit_offset / 8;
self.bit_offset = end_bit_offset % 8;
if self.bit_offset != 0 {
self.load_buffered_values()
}
values_to_read
}
pub(crate) fn get_aligned_bytes(&mut self, buf: &mut Vec<u8>, num_bytes: usize) -> usize {
self.byte_offset = self.get_byte_offset();
self.bit_offset = 0;
let src = &self.buffer[self.byte_offset..];
let to_read = num_bytes.min(src.len());
buf.extend_from_slice(&src[..to_read]);
self.byte_offset += to_read;
to_read
}
pub fn get_aligned<T: FromBytes>(&mut self, num_bytes: usize) -> Option<T> {
self.byte_offset = self.get_byte_offset();
self.bit_offset = 0;
if self.byte_offset + num_bytes > self.buffer.len() {
return None;
}
let v = read_num_bytes::<T>(num_bytes, &self.buffer[self.byte_offset..]);
self.byte_offset += num_bytes;
Some(v)
}
pub fn get_vlq_int(&mut self) -> Option<i64> {
let mut shift = 0;
let mut v: i64 = 0;
while let Some(byte) = self.get_aligned::<u8>(1) {
v |= ((byte & 0x7F) as i64) << shift;
shift += 7;
assert!(
shift <= MAX_VLQ_BYTE_LEN * 7,
"Num of bytes exceed MAX_VLQ_BYTE_LEN ({MAX_VLQ_BYTE_LEN})"
);
if byte & 0x80 == 0 {
return Some(v);
}
}
None
}
#[inline]
pub fn get_zigzag_vlq_int(&mut self) -> Option<i64> {
self.get_vlq_int().map(|v| {
let u = v as u64;
(u >> 1) as i64 ^ -((u & 1) as i64)
})
}
#[inline]
fn load_buffered_values(&mut self) {
let bytes_to_read = cmp::min(self.buffer.len() - self.byte_offset, 8);
self.buffered_values =
read_num_bytes::<u64>(bytes_to_read, &self.buffer[self.byte_offset..]);
}
}
impl From<Vec<u8>> for BitReader {
#[inline]
fn from(buffer: Vec<u8>) -> Self {
BitReader::new(buffer.into())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::util::test_common::rand_gen::random_numbers;
use rand::distributions::{Distribution, Standard};
use std::fmt::Debug;
#[test]
fn test_ceil() {
assert_eq!(ceil(0, 1), 0);
assert_eq!(ceil(1, 1), 1);
assert_eq!(ceil(1, 2), 1);
assert_eq!(ceil(1, 8), 1);
assert_eq!(ceil(7, 8), 1);
assert_eq!(ceil(8, 8), 1);
assert_eq!(ceil(9, 8), 2);
assert_eq!(ceil(9, 9), 1);
assert_eq!(ceil(10000000000_u64, 10), 1000000000);
assert_eq!(ceil(10_u64, 10000000000), 1);
assert_eq!(ceil(10000000000_u64, 1000000000), 10);
}
#[test]
fn test_bit_reader_get_byte_offset() {
let buffer = vec![255; 10];
let mut bit_reader = BitReader::from(buffer);
assert_eq!(bit_reader.get_byte_offset(), 0); bit_reader.get_value::<i32>(6);
assert_eq!(bit_reader.get_byte_offset(), 1); bit_reader.get_value::<i32>(10);
assert_eq!(bit_reader.get_byte_offset(), 2); bit_reader.get_value::<i32>(20);
assert_eq!(bit_reader.get_byte_offset(), 5); bit_reader.get_value::<i32>(30);
assert_eq!(bit_reader.get_byte_offset(), 9); }
#[test]
fn test_bit_reader_get_value() {
let buffer = vec![255, 0];
let mut bit_reader = BitReader::from(buffer);
assert_eq!(bit_reader.get_value::<i32>(1), Some(1));
assert_eq!(bit_reader.get_value::<i32>(2), Some(3));
assert_eq!(bit_reader.get_value::<i32>(3), Some(7));
assert_eq!(bit_reader.get_value::<i32>(4), Some(3));
}
#[test]
fn test_bit_reader_skip() {
let buffer = vec![255, 0];
let mut bit_reader = BitReader::from(buffer);
let skipped = bit_reader.skip(1, 1);
assert_eq!(skipped, 1);
assert_eq!(bit_reader.get_value::<i32>(1), Some(1));
let skipped = bit_reader.skip(2, 2);
assert_eq!(skipped, 2);
assert_eq!(bit_reader.get_value::<i32>(2), Some(3));
let skipped = bit_reader.skip(4, 1);
assert_eq!(skipped, 4);
assert_eq!(bit_reader.get_value::<i32>(4), Some(0));
let skipped = bit_reader.skip(1, 1);
assert_eq!(skipped, 0);
}
#[test]
fn test_bit_reader_get_value_boundary() {
let buffer = vec![10, 0, 0, 0, 20, 0, 30, 0, 0, 0, 40, 0];
let mut bit_reader = BitReader::from(buffer);
assert_eq!(bit_reader.get_value::<i64>(32), Some(10));
assert_eq!(bit_reader.get_value::<i64>(16), Some(20));
assert_eq!(bit_reader.get_value::<i64>(32), Some(30));
assert_eq!(bit_reader.get_value::<i64>(16), Some(40));
}
#[test]
fn test_bit_reader_skip_boundary() {
let buffer = vec![10, 0, 0, 0, 20, 0, 30, 0, 0, 0, 40, 0];
let mut bit_reader = BitReader::from(buffer);
assert_eq!(bit_reader.get_value::<i64>(32), Some(10));
assert_eq!(bit_reader.skip(1, 16), 1);
assert_eq!(bit_reader.get_value::<i64>(32), Some(30));
assert_eq!(bit_reader.get_value::<i64>(16), Some(40));
}
#[test]
fn test_bit_reader_get_aligned() {
let buffer = Bytes::from(vec![0x75, 0xCB]);
let mut bit_reader = BitReader::new(buffer.clone());
assert_eq!(bit_reader.get_value::<i32>(3), Some(5));
assert_eq!(bit_reader.get_aligned::<i32>(1), Some(203));
assert_eq!(bit_reader.get_value::<i32>(1), None);
bit_reader.reset(buffer.clone());
assert_eq!(bit_reader.get_aligned::<i32>(3), None);
}
#[test]
fn test_bit_reader_get_vlq_int() {
let buffer: Vec<u8> = vec![0x89, 0x01, 0xF2, 0xB5, 0x06];
let mut bit_reader = BitReader::from(buffer);
assert_eq!(bit_reader.get_vlq_int(), Some(137));
assert_eq!(bit_reader.get_vlq_int(), Some(105202));
}
#[test]
fn test_bit_reader_get_zigzag_vlq_int() {
let buffer: Vec<u8> = vec![0, 1, 2, 3];
let mut bit_reader = BitReader::from(buffer);
assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(0));
assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(-1));
assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(1));
assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(-2));
}
#[test]
fn test_num_required_bits() {
assert_eq!(num_required_bits(0), 0);
assert_eq!(num_required_bits(1), 1);
assert_eq!(num_required_bits(2), 2);
assert_eq!(num_required_bits(4), 3);
assert_eq!(num_required_bits(8), 4);
assert_eq!(num_required_bits(10), 4);
assert_eq!(num_required_bits(12), 4);
assert_eq!(num_required_bits(16), 5);
assert_eq!(num_required_bits(u64::MAX), 64);
}
#[test]
fn test_get_bit() {
assert!(get_bit(&[0b00001101], 0));
assert!(!get_bit(&[0b00001101], 1));
assert!(get_bit(&[0b00001101], 2));
assert!(get_bit(&[0b00001101], 3));
assert!(get_bit(&[0b01001001, 0b01010010], 0));
assert!(!get_bit(&[0b01001001, 0b01010010], 1));
assert!(!get_bit(&[0b01001001, 0b01010010], 2));
assert!(get_bit(&[0b01001001, 0b01010010], 3));
assert!(!get_bit(&[0b01001001, 0b01010010], 4));
assert!(!get_bit(&[0b01001001, 0b01010010], 5));
assert!(get_bit(&[0b01001001, 0b01010010], 6));
assert!(!get_bit(&[0b01001001, 0b01010010], 7));
assert!(!get_bit(&[0b01001001, 0b01010010], 8));
assert!(get_bit(&[0b01001001, 0b01010010], 9));
assert!(!get_bit(&[0b01001001, 0b01010010], 10));
assert!(!get_bit(&[0b01001001, 0b01010010], 11));
assert!(get_bit(&[0b01001001, 0b01010010], 12));
assert!(!get_bit(&[0b01001001, 0b01010010], 13));
assert!(get_bit(&[0b01001001, 0b01010010], 14));
assert!(!get_bit(&[0b01001001, 0b01010010], 15));
}
#[test]
fn test_skip() {
let mut writer = BitWriter::new(5);
let old_offset = writer.skip(1);
writer.put_aligned(42, 4);
writer.put_aligned_offset(0x10, 1, old_offset);
let result = writer.consume();
assert_eq!(result.as_ref(), [0x10, 42, 0, 0, 0]);
writer = BitWriter::new(4);
let result = writer.skip(5);
assert_eq!(result, 0);
assert_eq!(writer.buffer(), &[0; 5])
}
#[test]
fn test_get_next_byte_ptr() {
let mut writer = BitWriter::new(5);
{
let first_byte = writer.get_next_byte_ptr(1);
first_byte[0] = 0x10;
}
writer.put_aligned(42, 4);
let result = writer.consume();
assert_eq!(result.as_ref(), [0x10, 42, 0, 0, 0]);
}
#[test]
fn test_consume_flush_buffer() {
let mut writer1 = BitWriter::new(3);
let mut writer2 = BitWriter::new(3);
for i in 1..10 {
writer1.put_value(i, 4);
writer2.put_value(i, 4);
}
let res1 = writer1.flush_buffer();
let res2 = writer2.consume();
assert_eq!(res1, &res2[..]);
}
#[test]
fn test_put_get_bool() {
let len = 8;
let mut writer = BitWriter::new(len);
for i in 0..8 {
writer.put_value(i % 2, 1);
}
writer.flush();
{
let buffer = writer.buffer();
assert_eq!(buffer[0], 0b10101010);
}
for i in 0..8 {
match i {
0 | 1 | 4 | 5 => writer.put_value(false as u64, 1),
_ => writer.put_value(true as u64, 1),
}
}
writer.flush();
{
let buffer = writer.buffer();
assert_eq!(buffer[0], 0b10101010);
assert_eq!(buffer[1], 0b11001100);
}
let mut reader = BitReader::from(writer.consume());
for i in 0..8 {
let val = reader
.get_value::<u8>(1)
.expect("get_value() should return OK");
assert_eq!(val, i % 2);
}
for i in 0..8 {
let val = reader
.get_value::<bool>(1)
.expect("get_value() should return OK");
match i {
0 | 1 | 4 | 5 => assert!(!val),
_ => assert!(val),
}
}
}
#[test]
fn test_put_value_roundtrip() {
test_put_value_rand_numbers(32, 2);
test_put_value_rand_numbers(32, 3);
test_put_value_rand_numbers(32, 4);
test_put_value_rand_numbers(32, 5);
test_put_value_rand_numbers(32, 6);
test_put_value_rand_numbers(32, 7);
test_put_value_rand_numbers(32, 8);
test_put_value_rand_numbers(64, 16);
test_put_value_rand_numbers(64, 24);
test_put_value_rand_numbers(64, 32);
}
fn test_put_value_rand_numbers(total: usize, num_bits: usize) {
assert!(num_bits < 64);
let num_bytes = ceil(num_bits, 8);
let mut writer = BitWriter::new(num_bytes * total);
let values: Vec<u64> = random_numbers::<u64>(total)
.iter()
.map(|v| v & ((1 << num_bits) - 1))
.collect();
(0..total).for_each(|i| writer.put_value(values[i], num_bits));
let mut reader = BitReader::from(writer.consume());
(0..total).for_each(|i| {
let v = reader
.get_value::<u64>(num_bits)
.expect("get_value() should return OK");
assert_eq!(
v, values[i],
"[{}]: expected {} but got {}",
i, values[i], v
);
});
}
#[test]
fn test_get_batch() {
const SIZE: &[usize] = &[1, 31, 32, 33, 128, 129];
for s in SIZE {
for i in 0..=64 {
match i {
0..=8 => test_get_batch_helper::<u8>(*s, i),
9..=16 => test_get_batch_helper::<u16>(*s, i),
17..=32 => test_get_batch_helper::<u32>(*s, i),
_ => test_get_batch_helper::<u64>(*s, i),
}
}
}
}
fn test_get_batch_helper<T>(total: usize, num_bits: usize)
where
T: FromBytes + Default + Clone + Debug + Eq,
{
assert!(num_bits <= 64);
let num_bytes = ceil(num_bits, 8);
let mut writer = BitWriter::new(num_bytes * total);
let mask = match num_bits {
64 => u64::MAX,
_ => (1 << num_bits) - 1,
};
let values: Vec<u64> = random_numbers::<u64>(total)
.iter()
.map(|v| v & mask)
.collect();
let expected_values: Vec<T> = values
.iter()
.map(|v| T::try_from_le_slice(v.as_bytes()).unwrap())
.collect();
(0..total).for_each(|i| writer.put_value(values[i], num_bits));
let buf = writer.consume();
let mut reader = BitReader::from(buf);
let mut batch = vec![T::default(); values.len()];
let values_read = reader.get_batch::<T>(&mut batch, num_bits);
assert_eq!(values_read, values.len());
for i in 0..batch.len() {
assert_eq!(
batch[i],
expected_values[i],
"max_num_bits = {}, num_bits = {}, index = {}",
size_of::<T>() * 8,
num_bits,
i
);
}
}
#[test]
fn test_put_aligned_roundtrip() {
test_put_aligned_rand_numbers::<u8>(4, 3);
test_put_aligned_rand_numbers::<u8>(16, 5);
test_put_aligned_rand_numbers::<i16>(32, 7);
test_put_aligned_rand_numbers::<i16>(32, 9);
test_put_aligned_rand_numbers::<i32>(32, 11);
test_put_aligned_rand_numbers::<i32>(32, 13);
test_put_aligned_rand_numbers::<i64>(32, 17);
test_put_aligned_rand_numbers::<i64>(32, 23);
}
fn test_put_aligned_rand_numbers<T>(total: usize, num_bits: usize)
where
T: Copy + FromBytes + AsBytes + Debug + PartialEq,
Standard: Distribution<T>,
{
assert!(num_bits <= 32);
assert!(total % 2 == 0);
let aligned_value_byte_width = std::mem::size_of::<T>();
let value_byte_width = ceil(num_bits, 8);
let mut writer =
BitWriter::new((total / 2) * (aligned_value_byte_width + value_byte_width));
let values: Vec<u32> = random_numbers::<u32>(total / 2)
.iter()
.map(|v| v & ((1 << num_bits) - 1))
.collect();
let aligned_values = random_numbers::<T>(total / 2);
for i in 0..total {
let j = i / 2;
if i % 2 == 0 {
writer.put_value(values[j] as u64, num_bits);
} else {
writer.put_aligned::<T>(aligned_values[j], aligned_value_byte_width)
}
}
let mut reader = BitReader::from(writer.consume());
for i in 0..total {
let j = i / 2;
if i % 2 == 0 {
let v = reader
.get_value::<u64>(num_bits)
.expect("get_value() should return OK");
assert_eq!(
v, values[j] as u64,
"[{}]: expected {} but got {}",
i, values[j], v
);
} else {
let v = reader
.get_aligned::<T>(aligned_value_byte_width)
.expect("get_aligned() should return OK");
assert_eq!(
v, aligned_values[j],
"[{}]: expected {:?} but got {:?}",
i, aligned_values[j], v
);
}
}
}
#[test]
fn test_put_vlq_int() {
let total = 64;
let mut writer = BitWriter::new(total * 32);
let values = random_numbers::<u32>(total);
(0..total).for_each(|i| writer.put_vlq_int(values[i] as u64));
let mut reader = BitReader::from(writer.consume());
(0..total).for_each(|i| {
let v = reader
.get_vlq_int()
.expect("get_vlq_int() should return OK");
assert_eq!(
v as u32, values[i],
"[{}]: expected {} but got {}",
i, values[i], v
);
});
}
#[test]
fn test_put_zigzag_vlq_int() {
let total = 64;
let mut writer = BitWriter::new(total * 32);
let values = random_numbers::<i32>(total);
(0..total).for_each(|i| writer.put_zigzag_vlq_int(values[i] as i64));
let mut reader = BitReader::from(writer.consume());
(0..total).for_each(|i| {
let v = reader
.get_zigzag_vlq_int()
.expect("get_zigzag_vlq_int() should return OK");
assert_eq!(
v as i32, values[i],
"[{}]: expected {} but got {}",
i, values[i], v
);
});
}
#[test]
fn test_get_batch_zero_extend() {
let to_read = vec![0xFF; 4];
let mut reader = BitReader::from(to_read);
let mut output = [u64::MAX; 32];
reader.get_batch(&mut output, 1);
for v in output {
assert_eq!(v, 1);
}
}
}