1use crate::bitmap::container::{Container, ARRAY_LIMIT};
2use crate::bitmap::store::{
3 ArrayStore, BitmapStore, Interval, Store, BITMAP_BYTES, BITMAP_LENGTH, RUN_ELEMENT_BYTES,
4 RUN_NUM_BYTES,
5};
6use crate::RoaringBitmap;
7use bytemuck::cast_slice_mut;
8use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
9use core::convert::Infallible;
10use std::error::Error;
11use std::io;
12
13use super::store::IntervalStore;
14
15pub(crate) const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
16pub(crate) const SERIAL_COOKIE: u16 = 12347;
17pub(crate) const NO_OFFSET_THRESHOLD: usize = 4;
18
19pub(crate) const COOKIE_BYTES: usize = 4;
21pub(crate) const SIZE_BYTES: usize = 4;
22pub(crate) const DESCRIPTION_BYTES: usize = 4;
23pub(crate) const OFFSET_BYTES: usize = 4;
24
25impl RoaringBitmap {
26 pub fn serialized_size(&self) -> usize {
42 let mut has_run_containers = false;
43 let size = self.containers.len();
44 let container_sizes: usize = self
45 .containers
46 .iter()
47 .map(|container| match container.store {
48 Store::Array(ref values) => values.byte_size(),
49 Store::Bitmap(..) => BITMAP_BYTES,
50 Store::Run(ref intervals) => {
51 has_run_containers = true;
52 intervals.byte_size()
53 }
54 })
55 .sum();
56
57 header_size(size, has_run_containers) + container_sizes
59 }
60
61 pub fn serialize_into<W: io::Write>(&self, mut writer: W) -> io::Result<()> {
79 let has_run_containers = self.containers.iter().any(|c| matches!(c.store, Store::Run(_)));
80 let size = self.containers.len();
81
82 if has_run_containers {
84 let cookie = SERIAL_COOKIE as u32 | ((size as u32 - 1) << 16);
86 writer.write_u32::<LittleEndian>(cookie)?;
87 let run_container_bitmap_size = size.div_ceil(8);
89 let mut run_container_bitmap = vec![0; run_container_bitmap_size];
90 for (i, container) in self.containers.iter().enumerate() {
91 if let Store::Run(_) = container.store {
92 run_container_bitmap[i / 8] |= 1 << (i % 8);
93 }
94 }
95 writer.write_all(&run_container_bitmap)?;
96 } else {
97 writer.write_u32::<LittleEndian>(SERIAL_COOKIE_NO_RUNCONTAINER)?;
99 writer.write_u32::<LittleEndian>(size as u32)?;
100 }
101
102 for container in &self.containers {
104 writer.write_u16::<LittleEndian>(container.key)?;
105 writer.write_u16::<LittleEndian>((container.len() - 1) as u16)?;
106 }
107
108 let mut offset = header_size(size, has_run_containers) as u32;
109 let has_offsets = if has_run_containers { size >= OFFSET_BYTES } else { true };
110 if has_offsets {
111 for container in &self.containers {
112 writer.write_u32::<LittleEndian>(offset)?;
113 match container.store {
114 Store::Array(ref values) => {
115 offset += values.len() as u32 * 2;
116 }
117 Store::Bitmap(..) => {
118 offset += 8 * 1024;
119 }
120 Store::Run(ref intervals) => {
121 offset += (RUN_NUM_BYTES
122 + (intervals.run_amount() as usize * RUN_ELEMENT_BYTES))
123 as u32;
124 }
125 }
126 }
127 }
128
129 for container in &self.containers {
130 match container.store {
131 Store::Array(ref values) => {
132 for &value in values.iter() {
133 writer.write_u16::<LittleEndian>(value)?;
134 }
135 }
136 Store::Bitmap(ref bits) => {
137 for &value in bits.as_array() {
138 writer.write_u64::<LittleEndian>(value)?;
139 }
140 }
141 Store::Run(ref intervals) => {
142 writer.write_u16::<LittleEndian>(intervals.run_amount() as u16)?;
143 for iv in intervals.iter_intervals() {
144 writer.write_u16::<LittleEndian>(iv.start())?;
145 writer.write_u16::<LittleEndian>(iv.end() - iv.start())?;
146 }
147 }
148 }
149 }
150
151 Ok(())
152 }
153
154 pub fn deserialize_from<R: io::Read>(reader: R) -> io::Result<RoaringBitmap> {
175 RoaringBitmap::deserialize_from_impl(reader, ArrayStore::try_from, BitmapStore::try_from)
176 }
177
178 pub fn deserialize_unchecked_from<R: io::Read>(reader: R) -> io::Result<RoaringBitmap> {
198 RoaringBitmap::deserialize_from_impl::<R, _, Infallible, _, Infallible>(
199 reader,
200 |values| Ok(ArrayStore::from_vec_unchecked(values)),
201 |len, values| Ok(BitmapStore::from_unchecked(len, values)),
202 )
203 }
204
205 fn deserialize_from_impl<R, A, AErr, B, BErr>(
206 mut reader: R,
207 a: A,
208 b: B,
209 ) -> io::Result<RoaringBitmap>
210 where
211 R: io::Read,
212 A: Fn(Vec<u16>) -> Result<ArrayStore, AErr>,
213 AErr: Error + Send + Sync + 'static,
214 B: Fn(u64, Box<[u64; 1024]>) -> Result<BitmapStore, BErr>,
215 BErr: Error + Send + Sync + 'static,
216 {
217 let (size, has_offsets, has_run_containers) = {
219 let cookie = reader.read_u32::<LittleEndian>()?;
220 if cookie == SERIAL_COOKIE_NO_RUNCONTAINER {
221 (reader.read_u32::<LittleEndian>()? as usize, true, false)
222 } else if (cookie as u16) == SERIAL_COOKIE {
223 let size = ((cookie >> 16) + 1) as usize;
224 (size, size >= NO_OFFSET_THRESHOLD, true)
225 } else {
226 return Err(io::Error::other("unknown cookie value"));
227 }
228 };
229
230 let run_container_bitmap = if has_run_containers {
232 let mut bitmap = vec![0u8; size.div_ceil(8)];
233 reader.read_exact(&mut bitmap)?;
234 Some(bitmap)
235 } else {
236 None
237 };
238
239 if size > u16::MAX as usize + 1 {
240 return Err(io::Error::other("size is greater than supported"));
241 }
242
243 let mut description_bytes = vec![0u8; size * DESCRIPTION_BYTES];
245 reader.read_exact(&mut description_bytes)?;
246 let mut description_bytes = &description_bytes[..];
247
248 if has_offsets {
249 let mut offsets = vec![0u8; size * OFFSET_BYTES];
250 reader.read_exact(&mut offsets)?;
251 drop(offsets); }
253
254 let mut containers = Vec::with_capacity(size);
255
256 let mut last_key = None::<u16>;
257 for i in 0..size {
259 let key = description_bytes.read_u16::<LittleEndian>()?;
260 if let Some(last_key) = last_key.replace(key) {
261 if key <= last_key {
262 return Err(io::Error::new(
263 io::ErrorKind::InvalidData,
264 "container keys are not sorted",
265 ));
266 }
267 }
268 let cardinality = u64::from(description_bytes.read_u16::<LittleEndian>()?) + 1;
269
270 let is_run_container =
272 run_container_bitmap.as_ref().is_some_and(|bm| bm[i / 8] & (1 << (i % 8)) != 0);
273
274 let store = if is_run_container {
275 let runs = reader.read_u16::<LittleEndian>()?;
276 if runs == 0 {
277 return Err(io::Error::new(
278 io::ErrorKind::InvalidData,
279 "run container with zero runs",
280 ));
281 }
282 let mut intervals = vec![[0, 0]; runs as usize];
283 reader.read_exact(cast_slice_mut(&mut intervals))?;
284 intervals.iter_mut().for_each(|[s, len]| {
285 *s = u16::from_le(*s);
286 *len = u16::from_le(*len);
287 });
288
289 let mut last_end = None::<u16>;
290 let store = IntervalStore::from_vec_unchecked(
291 intervals
292 .into_iter()
293 .map(|[s, len]| -> Result<Interval, io::ErrorKind> {
294 let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?;
295 if let Some(last_end) = last_end.replace(end) {
296 if s <= last_end.saturating_add(1) {
297 return Err(io::ErrorKind::InvalidData);
299 }
300 }
301 Ok(Interval::new_unchecked(s, end))
302 })
303 .collect::<Result<_, _>>()?,
304 );
305 Store::Run(store)
306 } else if cardinality <= ARRAY_LIMIT {
307 let mut values = vec![0; cardinality as usize];
308 reader.read_exact(cast_slice_mut(&mut values))?;
309 values.iter_mut().for_each(|n| *n = u16::from_le(*n));
310 let array = a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
311 Store::Array(array)
312 } else {
313 let mut values = Box::new([0; BITMAP_LENGTH]);
314 reader.read_exact(cast_slice_mut(&mut values[..]))?;
315 values.iter_mut().for_each(|n| *n = u64::from_le(*n));
316 let bitmap = b(cardinality, values)
317 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
318 Store::Bitmap(bitmap)
319 };
320
321 containers.push(Container { key, store });
322 }
323
324 Ok(RoaringBitmap { containers })
325 }
326}
327
328fn header_size(size: usize, has_run_containers: bool) -> usize {
329 if has_run_containers {
330 let run_container_bitmap_size = size.div_ceil(8);
333 if size >= NO_OFFSET_THRESHOLD {
335 COOKIE_BYTES + ((DESCRIPTION_BYTES + OFFSET_BYTES) * size) + run_container_bitmap_size
336 } else {
337 COOKIE_BYTES + (DESCRIPTION_BYTES * size) + run_container_bitmap_size
338 }
339 } else {
340 COOKIE_BYTES + SIZE_BYTES + ((DESCRIPTION_BYTES + OFFSET_BYTES) * size)
343 }
344}
345
346#[cfg(test)]
347mod test {
348 use crate::{bitmap::store::BITMAP_LENGTH, RoaringBitmap};
349 use proptest::prelude::*;
350
351 proptest! {
352 #[test]
353 fn test_serialization(
354 bitmap in RoaringBitmap::arbitrary(),
355 ) {
356 let mut buffer = Vec::new();
357 bitmap.serialize_into(&mut buffer).unwrap();
358 prop_assert_eq!(bitmap, RoaringBitmap::deserialize_from(buffer.as_slice()).unwrap());
359 }
360 }
361
362 #[test]
363 fn test_from_lsb0_bytes() {
364 const CONTAINER_OFFSET: u32 = u64::BITS * BITMAP_LENGTH as u32;
365 const CONTAINER_OFFSET_IN_BYTES: u32 = CONTAINER_OFFSET / 8;
366 let mut bytes = vec![0xff; CONTAINER_OFFSET_IN_BYTES as usize];
367 bytes.extend([0x00; CONTAINER_OFFSET_IN_BYTES as usize]);
368 bytes.extend([0b00000001, 0b00000010, 0b00000011, 0b00000100]);
369
370 let offset = 32;
371 let rb = RoaringBitmap::from_lsb0_bytes(offset, &bytes);
372 for i in 0..offset {
373 assert!(!rb.contains(i), "{i} should not be in the bitmap");
374 }
375 for i in offset..offset + CONTAINER_OFFSET {
376 assert!(rb.contains(i), "{i} should be in the bitmap");
377 }
378 for i in offset + CONTAINER_OFFSET..offset + CONTAINER_OFFSET * 2 {
379 assert!(!rb.contains(i), "{i} should not be in the bitmap");
380 }
381 for bit in [0, 9, 16, 17, 26] {
382 let i = bit + offset + CONTAINER_OFFSET * 2;
383 assert!(rb.contains(i), "{i} should be in the bitmap");
384 }
385
386 assert_eq!(rb.len(), CONTAINER_OFFSET as u64 + 5);
387
388 let mut bytes = vec![0x00u8; CONTAINER_OFFSET_IN_BYTES as usize];
390 bytes.extend([0xff]);
391 let rb = RoaringBitmap::from_lsb0_bytes(0, &bytes);
392 assert_eq!(rb.min(), Some(CONTAINER_OFFSET));
393
394 let rb = RoaringBitmap::from_lsb0_bytes(8, &bytes);
395 assert_eq!(rb.min(), Some(CONTAINER_OFFSET + 8));
396
397 let bytes = [0x80];
399 let rb = RoaringBitmap::from_lsb0_bytes(0xFFFFFFF8, &bytes);
400 assert_eq!(rb.len(), 1);
401 assert!(rb.contains(u32::MAX));
402
403 let bytes = vec![0xFF; 0x1_0000 / 8];
405 let rb = RoaringBitmap::from_lsb0_bytes(0xFFFF0000, &bytes);
406 assert_eq!(rb.len(), 0x1_0000);
407 assert!(rb.contains(u32::MAX));
408 }
409
410 #[test]
411 fn test_from_lsb0_bytes_not_multiple_of_8() {
412 const CONTAINER_OFFSET: u32 = u64::BITS * BITMAP_LENGTH as u32;
413 const CONTAINER_OFFSET_IN_BYTES: u32 = CONTAINER_OFFSET / 8;
414
415 let mut bytes = vec![0b0101_1001];
416 bytes.extend([0x00; CONTAINER_OFFSET_IN_BYTES as usize]);
417 bytes.extend([0b00000001, 0b00000010, 0b00000011, 0b00000100]);
418
419 let mut indices = vec![0, 3, 4, 6];
420 indices.extend([0, 9, 16, 17, 26].map(|i| 8 + CONTAINER_OFFSET + i));
421
422 for offset in 0..8 {
423 let rb = RoaringBitmap::from_lsb0_bytes(offset, &bytes);
424 for i in indices.iter().map(|&i| i + offset) {
425 assert!(rb.contains(i), "{i} should be in the bitmap");
426 }
427 }
428 }
429
430 #[test]
431 #[should_panic(expected = "<= 2^32")]
432 fn test_from_lsb0_bytes_overflow() {
433 let bytes = [0x01, 0x01];
434 RoaringBitmap::from_lsb0_bytes(u32::MAX - 7, &bytes);
435 }
436
437 #[test]
438 fn test_deserialize_overflow_s_plus_len() {
439 let data = vec![59, 48, 0, 0, 255, 130, 254, 59, 48, 2, 0, 41, 255, 255, 166, 197, 4, 0, 2];
440 let res = RoaringBitmap::deserialize_from(data.as_slice());
441 assert!(res.is_err());
442 }
443}