1extern crate columnar_derive;
11pub use columnar_derive::Columnar;
12
13pub mod adts;
14
15pub trait Columnar : 'static {
19
20 type Ref<'a>;
24 fn copy_from<'a>(&mut self, other: Self::Ref<'a>) where Self: Sized {
28 *self = Self::into_owned(other);
29 }
30 fn into_owned<'a>(other: Self::Ref<'a>) -> Self;
32
33 type Container: Len + Clear + Default + for<'a> Push<&'a Self> + for<'a> Push<Self::Ref<'a>> + Container<Self> + Clone + Send;
38
39 fn as_columns<'a, I>(selves: I) -> Self::Container where I: IntoIterator<Item =&'a Self>, Self: 'a {
41 let mut columns: Self::Container = Default::default();
42 for item in selves {
43 columns.push(item);
44 }
45 columns
46 }
47 fn into_columns<I>(selves: I) -> Self::Container where I: IntoIterator<Item = Self>, Self: Sized {
52 let mut columns: Self::Container = Default::default();
53 for item in selves {
54 columns.push(&item);
55 }
56 columns
57 }
58}
59
60pub trait Container<C: Columnar + ?Sized> {
64 type Borrowed<'a>: Copy + Len + AsBytes<'a> + FromBytes<'a> + Index<Ref = C::Ref<'a>> where Self: 'a;
68 fn borrow<'a>(&'a self) -> Self::Borrowed<'a>;
70}
71
72pub use common::{Clear, Len, Push, IndexMut, Index, IndexAs, HeapSize, Slice, AsBytes, FromBytes};
73pub mod common {
75
76 pub trait Len {
78 fn len(&self) -> usize;
80 fn is_empty(&self) -> bool {
82 self.len() == 0
83 }
84 }
85 impl<L: Len> Len for &L {
86 #[inline(always)] fn len(&self) -> usize { L::len(*self) }
87 }
88 impl<L: Len> Len for &mut L {
89 #[inline(always)] fn len(&self) -> usize { L::len(*self) }
90 }
91 impl<T> Len for Vec<T> {
92 #[inline(always)] fn len(&self) -> usize { self.len() }
93 }
94 impl<T> Len for [T] {
95 #[inline(always)] fn len(&self) -> usize { <[T]>::len(self) }
96 }
97 impl<T> Len for &[T] {
98 #[inline(always)] fn len(&self) -> usize { <[T]>::len(self) }
99 }
100
101 pub trait Push<T> {
103 fn push(&mut self, item: T);
105 #[inline(always)] fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
107 for item in iter {
108 self.push(item);
109 }
110 }
111 }
112 impl<T> Push<T> for Vec<T> {
113 #[inline(always)] fn push(&mut self, item: T) { self.push(item) }
114
115 #[inline(always)]
116 fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
117 std::iter::Extend::extend(self, iter)
118 }
119 }
120 impl<'a, T: Clone> Push<&'a T> for Vec<T> {
121 #[inline(always)] fn push(&mut self, item: &'a T) { self.push(item.clone()) }
122
123 #[inline(always)]
124 fn extend(&mut self, iter: impl IntoIterator<Item=&'a T>) {
125 std::iter::Extend::extend(self, iter.into_iter().cloned())
126 }
127 }
128 impl<'a, T: Clone> Push<&'a [T]> for Vec<T> {
129 #[inline(always)] fn push(&mut self, item: &'a [T]) { self.clone_from_slice(item) }
130 }
131
132
133 pub use index::{Index, IndexMut, IndexAs};
134 pub mod index {
140
141 use crate::Len;
142 use crate::common::IterOwn;
143
144 pub trait IndexMut {
146 type IndexMut<'a> where Self: 'a;
148 fn get_mut(& mut self, index: usize) -> Self::IndexMut<'_>;
149 #[inline(always)] fn last_mut(&mut self) -> Option<Self::IndexMut<'_>> where Self: Len {
151 if self.is_empty() { None }
152 else { Some(self.get_mut(self.len()-1)) }
153 }
154 }
155
156 impl<'t, T: IndexMut + ?Sized> IndexMut for &'t mut T {
157 type IndexMut<'a> = T::IndexMut<'a> where Self: 'a;
158 #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
159 T::get_mut(*self, index)
160 }
161 }
162 impl<T> IndexMut for Vec<T> {
163 type IndexMut<'a> = &'a mut T where Self: 'a;
164 #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
165 }
166 impl<T> IndexMut for [T] {
167 type IndexMut<'a> = &'a mut T where Self: 'a;
168 #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
169 }
170
171 pub trait Index {
181 type Ref;
185 fn get(&self, index: usize) -> Self::Ref;
186 #[inline(always)] fn last(&self) -> Option<Self::Ref> where Self: Len {
187 if self.is_empty() { None }
188 else { Some(self.get(self.len()-1)) }
189 }
190 #[inline(always)]
194 fn index_iter(&self) -> IterOwn<&Self> {
195 IterOwn {
196 index: 0,
197 slice: self,
198 }
199 }
200 #[inline(always)]
204 fn into_index_iter(self) -> IterOwn<Self> where Self: Sized {
205 IterOwn {
206 index: 0,
207 slice: self,
208 }
209 }
210 }
211
212 impl<'a, T> Index for &'a [T] {
214 type Ref = &'a T;
215 #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
216 }
217 impl<T: Copy> Index for [T] {
218 type Ref = T;
219 #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
220 }
221 impl<'a, T> Index for &'a Vec<T> {
222 type Ref = &'a T;
223 #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
224 }
225 impl<T: Copy> Index for Vec<T> {
226 type Ref = T;
227 #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
228 }
229
230
231 pub trait CopyAs<T> {
236 fn copy_as(self) -> T;
237 }
238 impl<T: Copy> CopyAs<T> for &T {
239 fn copy_as(self) -> T { *self }
240 }
241 impl<T> CopyAs<T> for T {
242 fn copy_as(self) -> T { self }
243 }
244
245 pub trait IndexAs<T> {
246 fn index_as(&self, index: usize) -> T;
247 fn last(&self) -> Option<T> where Self: Len {
248 if self.is_empty() { None }
249 else { Some(self.index_as(self.len()-1)) }
250 }
251 }
252
253 impl<T: Index, S> IndexAs<S> for T where T::Ref: CopyAs<S> {
254 fn index_as(&self, index: usize) -> S { self.get(index).copy_as() }
255 }
256 }
257
258 pub trait Clear {
262 fn clear(&mut self);
264 }
265 impl<T> Clear for Vec<T> {
267 #[inline(always)] fn clear(&mut self) { self.clear() }
268 }
269 impl<'a, T> Clear for &'a [T] {
271 #[inline(always)] fn clear(&mut self) { *self = &[]; }
272 }
273
274 pub trait HeapSize {
275 fn heap_size(&self) -> (usize, usize) { (0, 0) }
278 }
279 impl HeapSize for serde_json::Number { }
280 impl HeapSize for String {
281 fn heap_size(&self) -> (usize, usize) {
282 (self.len(), self.capacity())
283 }
284 }
285 impl<T: HeapSize> HeapSize for [T] {
286 fn heap_size(&self) -> (usize, usize) {
287 let mut l = std::mem::size_of_val(self);
288 let mut c = std::mem::size_of_val(self);
289 for item in self.iter() {
290 let (il, ic) = item.heap_size();
291 l += il;
292 c += ic;
293 }
294 (l, c)
295 }
296 }
297 impl<T: HeapSize> HeapSize for Vec<T> {
298 fn heap_size(&self) -> (usize, usize) {
299 let mut l = std::mem::size_of::<T>() * self.len();
300 let mut c = std::mem::size_of::<T>() * self.capacity();
301 for item in (self[..]).iter() {
302 let (il, ic) = item.heap_size();
303 l += il;
304 c += ic;
305 }
306 (l, c)
307 }
308 }
309
310 #[derive(Copy, Clone, Debug)]
314 pub struct Slice<S> {
315 lower: usize,
316 upper: usize,
317 slice: S,
318 }
319
320 impl<S> Slice<S> {
321 pub fn slice<R: std::ops::RangeBounds<usize>>(self, range: R) -> Self {
322 use std::ops::Bound;
323 let lower = match range.start_bound() {
324 Bound::Included(s) => std::cmp::max(self.lower, *s),
325 Bound::Excluded(s) => std::cmp::max(self.lower, *s+1),
326 Bound::Unbounded => self.lower,
327 };
328 let upper = match range.end_bound() {
329 Bound::Included(s) => std::cmp::min(self.upper, *s+1),
330 Bound::Excluded(s) => std::cmp::min(self.upper, *s),
331 Bound::Unbounded => self.upper,
332 };
333 assert!(lower <= upper);
334 Self { lower, upper, slice: self.slice }
335 }
336 pub fn new(lower: u64, upper: u64, slice: S) -> Self {
337 let lower: usize = lower.try_into().expect("slice bounds must fit in `usize`");
338 let upper: usize = upper.try_into().expect("slice bounds must fit in `usize`");
339 Self { lower, upper, slice }
340 }
341 pub fn len(&self) -> usize { self.upper - self.lower }
342 }
343
344 impl<S: Index> PartialEq for Slice<S> where S::Ref: PartialEq {
345 fn eq(&self, other: &Self) -> bool {
346 if self.len() != other.len() { return false; }
347 for i in 0 .. self.len() {
348 if self.get(i) != other.get(i) { return false; }
349 }
350 true
351 }
352 }
353 impl<S: Index> PartialEq<[S::Ref]> for Slice<S> where S::Ref: PartialEq {
354 fn eq(&self, other: &[S::Ref]) -> bool {
355 if self.len() != other.len() { return false; }
356 for i in 0 .. self.len() {
357 if self.get(i) != other[i] { return false; }
358 }
359 true
360 }
361 }
362 impl<S: Index> PartialEq<Vec<S::Ref>> for Slice<S> where S::Ref: PartialEq {
363 fn eq(&self, other: &Vec<S::Ref>) -> bool {
364 if self.len() != other.len() { return false; }
365 for i in 0 .. self.len() {
366 if self.get(i) != other[i] { return false; }
367 }
368 true
369 }
370 }
371
372 impl<S: Index> Eq for Slice<S> where S::Ref: Eq { }
373
374 impl<S: Index> PartialOrd for Slice<S> where S::Ref: PartialOrd {
375 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
376 use std::cmp::Ordering;
377 let len = std::cmp::min(self.len(), other.len());
378
379 for i in 0 .. len {
380 match self.get(i).partial_cmp(&other.get(i)) {
381 Some(Ordering::Equal) => (),
382 not_equal => return not_equal,
383 }
384 }
385
386 self.len().partial_cmp(&other.len())
387 }
388 }
389
390 impl<S: Index> Ord for Slice<S> where S::Ref: Ord + Eq {
391 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
392 use std::cmp::Ordering;
393 let len = std::cmp::min(self.len(), other.len());
394
395 for i in 0 .. len {
396 match self.get(i).cmp(&other.get(i)) {
397 Ordering::Equal => (),
398 not_equal => return not_equal,
399 }
400 }
401
402 self.len().cmp(&other.len())
403 }
404 }
405
406 impl<S> Len for Slice<S> {
407 #[inline(always)] fn len(&self) -> usize { self.len() }
408 }
409
410 impl<S: Index> Index for Slice<S> {
411 type Ref = S::Ref;
412 #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
413 assert!(index < self.upper - self.lower);
414 self.slice.get(self.lower + index)
415 }
416 }
417 impl<'a, S> Index for &'a Slice<S>
418 where
419 &'a S : Index,
420 {
421 type Ref = <&'a S as Index>::Ref;
422 #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
423 assert!(index < self.upper - self.lower);
424 (&self.slice).get(self.lower + index)
425 }
426 }
427
428 impl<S: IndexMut> IndexMut for Slice<S> {
429 type IndexMut<'a> = S::IndexMut<'a> where S: 'a;
430 #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
431 assert!(index < self.upper - self.lower);
432 self.slice.get_mut(self.lower + index)
433 }
434 }
435
436 impl<S: Index + Len> IntoIterator for Slice<S> {
437 type Item = S::Ref;
438 type IntoIter = IterOwn<Slice<S>>;
439 fn into_iter(self) -> Self::IntoIter {
440 self.into_index_iter()
441 }
442 }
443
444 pub struct IterOwn<S> {
445 index: usize,
446 slice: S,
447 }
448
449 impl<S> IterOwn<S> {
450 pub fn new(index: usize, slice: S) -> Self {
451 Self { index, slice }
452 }
453 }
454
455 impl<S: Index + Len> Iterator for IterOwn<S> {
456 type Item = S::Ref;
457 #[inline(always)] fn next(&mut self) -> Option<Self::Item> {
458 if self.index < self.slice.len() {
459 let result = self.slice.get(self.index);
460 self.index += 1;
461 Some(result)
462 } else {
463 None
464 }
465 }
466 }
467
468 pub trait AsBytes<'a> {
472 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])>;
474 }
475
476 pub trait FromBytes<'a> {
481 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self;
486 }
487
488}
489
490pub mod bytes {
494
495 use crate::AsBytes;
496
497 pub trait EncodeDecode {
499 fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a>;
501 fn length_in_bytes<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> { 8 * Self::length_in_words(bytes) }
505 fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a>;
507 fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a>;
509 fn decode<'a>(store: &'a [u64]) -> impl Iterator<Item=&'a [u8]>;
511 }
512
513 pub use serialization::Sequence;
518 mod serialization {
519
520 use crate::AsBytes;
521
522 pub struct Sequence;
524 impl super::EncodeDecode for Sequence {
525 fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
526 bytes.as_bytes().map(|(_align, bytes)| 1 + (bytes.len() + 7)/8).sum()
528 }
529 fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
530 encode(store, bytes.as_bytes())
531 }
532 fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
533 write(writer, bytes.as_bytes())
534 }
535 fn decode<'a>(store: &'a [u64]) -> impl Iterator<Item=&'a [u8]> {
536 decode(store)
537 }
538 }
539
540 pub fn encode<'a>(store: &mut Vec<u64>, bytes: impl Iterator<Item=(u64, &'a [u8])>) {
545 for (align, bytes) in bytes {
546 assert!(align <= 8);
547 store.push(bytes.len() as u64);
548 let whole_words = 8 * (bytes.len() / 8);
549 if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
552 store.extend_from_slice(words);
553 }
554 else {
555 let store_len = store.len();
556 store.resize(store_len + whole_words/8, 0);
557 let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
558 slice.copy_from_slice(&bytes[.. whole_words]);
559 }
560 let remaining_bytes = &bytes[whole_words..];
561 if !remaining_bytes.is_empty() {
562 let mut remainder = 0u64;
563 let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
564 for (i, byte) in remaining_bytes.iter().enumerate() {
565 transmute[i] = *byte;
566 }
567 store.push(remainder);
568 }
569 }
570 }
571
572 pub fn write<'a>(mut writer: impl std::io::Write, bytes: impl Iterator<Item=(u64, &'a [u8])>) -> std::io::Result<()> {
577 for (align, bytes) in bytes {
581 assert!(align <= 8);
582 let length = u64::try_from(bytes.len()).unwrap();
583 writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&length)))?;
584 writer.write_all(bytes)?;
585 let padding = usize::try_from((8 - (length % 8)) % 8).unwrap();
586 writer.write_all(&[0; 8][..padding])?;
587 }
588 Ok(())
589 }
590
591 pub fn decode(store: &[u64]) -> Decoder<'_> {
596 Decoder { store }
597 }
598
599 pub struct Decoder<'a> {
601 store: &'a [u64],
602 }
603
604 impl<'a> Iterator for Decoder<'a> {
605 type Item = &'a [u8];
606 fn next(&mut self) -> Option<Self::Item> {
607 if let Some(length) = self.store.first() {
608 let length = *length as usize;
609 self.store = &self.store[1..];
610 let whole_words = if length % 8 == 0 { length / 8 } else { length / 8 + 1 };
611 let bytes: &[u8] = bytemuck::try_cast_slice(&self.store[..whole_words]).expect("&[u64] should convert to &[u8]");
612 self.store = &self.store[whole_words..];
613 Some(&bytes[..length])
614 } else {
615 None
616 }
617 }
618 }
619 }
620
621 pub use serialization_neu::Indexed;
627 pub mod serialization_neu {
628
629 use crate::AsBytes;
630
631 pub struct Indexed;
633 impl super::EncodeDecode for Indexed {
634 fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
635 1 + bytes.as_bytes().map(|(_align, bytes)| 1 + (bytes.len() + 7)/8).sum::<usize>()
636 }
637 fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
638 encode(store, bytes)
639 }
640 fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
641 write(writer, bytes)
642 }
643 fn decode<'a>(store: &'a [u64]) -> impl Iterator<Item=&'a [u8]> {
644 decode(store)
645 }
646 }
647
648 pub fn encode<'a, A>(store: &mut Vec<u64>, iter: &A)
661 where A : AsBytes<'a>,
662 {
663 let offsets = 1 + iter.as_bytes().count();
666 let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
667 store.push(offsets_end);
668 let mut position_bytes = offsets_end;
670 for (align, bytes) in iter.as_bytes() {
671 assert!(align <= 8);
672 let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
674 store.push(to_push);
675 let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
676 position_bytes += round_len;
677 }
678 for (_align, bytes) in iter.as_bytes() {
680 let whole_words = 8 * (bytes.len() / 8);
681 if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
684 store.extend_from_slice(words);
685 }
686 else {
687 let store_len = store.len();
688 store.resize(store_len + whole_words/8, 0);
689 let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
690 slice.copy_from_slice(&bytes[.. whole_words]);
691 }
692 let remaining_bytes = &bytes[whole_words..];
693 if !remaining_bytes.is_empty() {
694 let mut remainder = 0u64;
695 let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
696 for (i, byte) in remaining_bytes.iter().enumerate() {
697 transmute[i] = *byte;
698 }
699 store.push(remainder);
700 }
701 }
702 }
703
704 pub fn write<'a, A, W>(mut writer: W, iter: &A) -> std::io::Result<()>
705 where
706 A: AsBytes<'a>,
707 W: std::io::Write,
708 {
709 let offsets = 1 + iter.as_bytes().count();
711 let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
712 writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&offsets_end)))?;
713 let mut position_bytes = offsets_end;
715 for (align, bytes) in iter.as_bytes() {
716 assert!(align <= 8);
717 let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
719 writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&to_push)))?;
720 let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
721 position_bytes += round_len;
722 }
723 for (_align, bytes) in iter.as_bytes() {
725 writer.write_all(bytes)?;
726 let padding = ((bytes.len() + 7) & !7) - bytes.len();
727 if padding > 0 {
728 writer.write_all(&[0u8;8][..padding])?;
729 }
730 }
731
732 Ok(())
733 }
734
735 pub fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
737 assert!(store[0] % 8 == 0);
738 let slices = (store[0] / 8) - 1;
739 (0 .. slices).map(|i| decode_index(store, i))
740 }
741
742 #[inline(always)]
744 pub fn decode_index(store: &[u64], index: u64) -> &[u8] {
745 debug_assert!(index + 1 < store[0]/8);
746 let index: usize = index.try_into().unwrap();
747 let lower: usize = ((store[index] + 7) & !7).try_into().unwrap();
748 let upper: usize = (store[index + 1]).try_into().unwrap();
749 let bytes: &[u8] = bytemuck::try_cast_slice(store).expect("&[u64] should convert to &[u8]");
750 &bytes[lower .. upper]
751 }
752
753 #[cfg(test)]
754 mod test {
755
756 use crate::{Columnar, Container};
757 use crate::common::Push;
758 use crate::AsBytes;
759
760 use super::{encode, decode};
761
762 fn assert_roundtrip<'a, AB: AsBytes<'a>>(item: &AB) {
763 let mut store = Vec::new();
764 encode(&mut store, item);
765 assert!(item.as_bytes().map(|x| x.1).eq(decode(&store)));
766 }
767
768 #[test]
769 fn round_trip() {
770
771 let mut column: <Result<u64, String> as Columnar>::Container = Default::default();
772 for i in 0..10000u64 {
773 column.push(&Ok::<u64, String>(i));
774 column.push(&Err::<u64, String>(format!("{:?}", i)));
775 }
776
777 assert_roundtrip(&column.borrow());
778 }
779 }
780 }
781
782 #[cfg(test)]
783 mod test {
784 #[test]
785 fn round_trip() {
786
787 use crate::{Columnar, Container};
788 use crate::common::{Push, HeapSize, Len, Index};
789 use crate::{AsBytes, FromBytes};
790
791 let mut column: <Result<u64, u64> as Columnar>::Container = Default::default();
792 for i in 0..100u64 {
793 column.push(Ok::<u64, u64>(i));
794 column.push(Err::<u64, u64>(i));
795 }
796
797 assert_eq!(column.len(), 200);
798 assert_eq!(column.heap_size(), (1624, 2080));
799
800 for i in 0..100 {
801 assert_eq!(column.get(2*i+0), Ok(i as u64));
802 assert_eq!(column.get(2*i+1), Err(i as u64));
803 }
804
805 let column2 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column.borrow().as_bytes().map(|(_, bytes)| bytes));
806 for i in 0..100 {
807 assert_eq!(column.get(2*i+0), column2.get(2*i+0).copied().map_err(|e| *e));
808 assert_eq!(column.get(2*i+1), column2.get(2*i+1).copied().map_err(|e| *e));
809 }
810
811 let column3 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column2.as_bytes().map(|(_, bytes)| bytes));
812 for i in 0..100 {
813 assert_eq!(column3.get(2*i+0), column2.get(2*i+0));
814 assert_eq!(column3.get(2*i+1), column2.get(2*i+1));
815 }
816 }
817 }
818}
819
820pub mod primitive {
822
823 macro_rules! implement_columnable {
825 ($($index_type:ty),*) => { $(
826 impl crate::Columnar for $index_type {
827 type Ref<'a> = &'a $index_type;
828 fn into_owned<'a>(other: Self::Ref<'a>) -> Self { *other }
829
830 type Container = Vec<$index_type>;
831 }
832 impl crate::Container<$index_type> for Vec<$index_type> {
833 type Borrowed<'a> = &'a [$index_type];
834 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { &self[..] }
835 }
836
837 impl crate::HeapSize for $index_type { }
838
839 impl<'a> crate::AsBytes<'a> for &'a [$index_type] {
840 #[inline(always)]
841 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
842 std::iter::once((std::mem::align_of::<$index_type>() as u64, bytemuck::cast_slice(&self[..])))
843 }
844 }
845 impl<'a> crate::FromBytes<'a> for &'a [$index_type] {
846 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
847 bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()
849 }
850 }
851 )* }
852 }
853
854 implement_columnable!(u8, u16, u32, u64, u128);
855 implement_columnable!(i8, i16, i32, i64, i128);
856 implement_columnable!(f32, f64);
857
858 pub use sizes::{Usizes, Isizes};
859 mod sizes {
861
862 use crate::{Clear, Columnar, Len, IndexMut, Index, IndexAs, Push, HeapSize};
863
864 #[derive(Copy, Clone, Default)]
865 pub struct Usizes<CV = Vec<u64>> { pub values: CV }
866
867 impl Columnar for usize {
868 type Ref<'a> = usize;
869 fn into_owned<'a>(other: Self::Ref<'a>) -> Self { other }
870 type Container = Usizes;
871 }
872
873 impl<CV: crate::Container<u64>> crate::Container<usize> for Usizes<CV> {
874 type Borrowed<'a> = Usizes<CV::Borrowed<'a>> where CV: 'a;
875 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
876 Usizes { values: self.values.borrow() }
877 }
878 }
879
880 impl<CV: Len> Len for Usizes<CV> { fn len(&self) -> usize { self.values.len() }}
881 impl IndexMut for Usizes {
882 type IndexMut<'a> = &'a mut u64;
883 #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
884 }
885 impl<CV: IndexAs<u64>> Index for Usizes<CV> {
886 type Ref = usize;
887 #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
888 }
889 impl<'a, CV: IndexAs<u64>> Index for &'a Usizes<CV> {
890 type Ref = usize;
891 #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
892 }
893 impl Push<usize> for Usizes {
894 #[inline]
895 fn push(&mut self, item: usize) { self.values.push(item.try_into().expect("usize must fit in a u64")) }
896 }
897 impl Push<&usize> for Usizes {
898 #[inline]
899 fn push(&mut self, item: &usize) { self.values.push((*item).try_into().expect("usize must fit in a u64")) }
900 }
901 impl<CV: Clear> Clear for Usizes<CV> { fn clear(&mut self) { self.values.clear() }}
902
903 impl<CV: HeapSize> HeapSize for Usizes<CV> {
904 fn heap_size(&self) -> (usize, usize) {
905 self.values.heap_size()
906 }
907 }
908
909 impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Usizes<CV> {
910 #[inline(always)]
911 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
912 self.values.as_bytes()
913 }
914 }
915
916 impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Usizes<CV> {
917 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
918 Self { values: CV::from_bytes(bytes) }
919 }
920 }
921
922
923 #[derive(Copy, Clone, Default)]
924 pub struct Isizes<CV = Vec<i64>> { pub values: CV }
925
926 impl Columnar for isize {
927 type Ref<'a> = isize;
928 fn into_owned<'a>(other: Self::Ref<'a>) -> Self { other }
929 type Container = Isizes;
930 }
931
932 impl<CV: crate::Container<i64>> crate::Container<isize> for Isizes<CV> {
933 type Borrowed<'a> = Isizes<CV::Borrowed<'a>> where CV: 'a;
934 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
935 Isizes { values: self.values.borrow() }
936 }
937 }
938
939 impl<CV: Len> Len for Isizes<CV> { fn len(&self) -> usize { self.values.len() }}
940 impl IndexMut for Isizes {
941 type IndexMut<'a> = &'a mut i64;
942 #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
943 }
944 impl<CV: IndexAs<i64>> Index for Isizes<CV> {
945 type Ref = isize;
946 #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
947 }
948 impl<'a, CV: IndexAs<i64>> Index for &'a Isizes<CV> {
949 type Ref = isize;
950 #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
951 }
952 impl Push<isize> for Isizes {
953 #[inline]
954 fn push(&mut self, item: isize) { self.values.push(item.try_into().expect("isize must fit in a i64")) }
955 }
956 impl Push<&isize> for Isizes {
957 #[inline]
958 fn push(&mut self, item: &isize) { self.values.push((*item).try_into().expect("isize must fit in a i64")) }
959 }
960 impl<CV: Clear> Clear for Isizes<CV> { fn clear(&mut self) { self.values.clear() }}
961
962 impl<CV: HeapSize> HeapSize for Isizes<CV> {
963 fn heap_size(&self) -> (usize, usize) {
964 self.values.heap_size()
965 }
966 }
967
968 impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Isizes<CV> {
969 #[inline(always)]
970 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
971 self.values.as_bytes()
972 }
973 }
974
975 impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Isizes<CV> {
976 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
977 Self { values: CV::from_bytes(bytes) }
978 }
979 }
980 }
981
982 pub use empty::Empties;
983 mod empty {
985
986 use crate::common::index::CopyAs;
987 use crate::{Clear, Columnar, Len, IndexMut, Index, Push, HeapSize};
988
989 #[derive(Copy, Clone, Debug, Default)]
990 pub struct Empties<CC = u64> { pub count: CC, pub empty: () }
991
992 impl Columnar for () {
993 type Ref<'a> = ();
994 fn into_owned<'a>(_other: Self::Ref<'a>) -> Self { () }
995 type Container = Empties;
996 }
997
998 impl crate::Container<()> for Empties {
999 type Borrowed<'a> = Empties<&'a u64>;
1000 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Empties { count: &self.count, empty: () } }
1001 }
1002
1003 impl<CC: CopyAs<u64> + Copy> Len for Empties<CC> {
1004 fn len(&self) -> usize { self.count.copy_as() as usize }
1005 }
1006 impl<CC> IndexMut for Empties<CC> {
1007 type IndexMut<'a> = &'a mut () where CC: 'a;
1008 #[inline(always)] fn get_mut(&mut self, _index: usize) -> Self::IndexMut<'_> { &mut self.empty }
1010 }
1011 impl<CC> Index for Empties<CC> {
1012 type Ref = ();
1013 fn get(&self, _index: usize) -> Self::Ref { () }
1014 }
1015 impl<'a, CC> Index for &'a Empties<CC> {
1016 type Ref = &'a ();
1017 fn get(&self, _index: usize) -> Self::Ref { &() }
1018 }
1019 impl Push<()> for Empties {
1020 #[inline(always)]
1022 fn push(&mut self, _item: ()) { self.count += 1; }
1023 }
1024 impl Push<&()> for Empties {
1025 #[inline(always)]
1027 fn push(&mut self, _item: &()) { self.count += 1; }
1028 }
1029
1030 impl HeapSize for Empties {
1031 fn heap_size(&self) -> (usize, usize) { (0, 0) }
1032 }
1033 impl Clear for Empties {
1034 fn clear(&mut self) { self.count = 0; }
1035 }
1036
1037 impl<'a> crate::AsBytes<'a> for crate::primitive::Empties<&'a u64> {
1038 #[inline(always)]
1039 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1040 std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.count))))
1041 }
1042 }
1043 impl<'a> crate::FromBytes<'a> for crate::primitive::Empties<&'a u64> {
1044 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1045 Self { count: &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0], empty: () }
1046 }
1047 }
1048 }
1049
1050 pub use boolean::Bools;
1051 mod boolean {
1053
1054 use crate::common::index::CopyAs;
1055 use crate::{Clear, Len, Index, IndexAs, Push, HeapSize};
1056
1057 #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1059 pub struct Bools<VC = Vec<u64>, WC = u64> {
1060 pub values: VC,
1062 pub last_word: WC,
1064 pub last_bits: WC,
1066 }
1067
1068 impl crate::Columnar for bool {
1069 type Ref<'a> = bool;
1070 fn into_owned<'a>(other: Self::Ref<'a>) -> Self { other }
1071 type Container = Bools;
1072 }
1073
1074 impl<VC: crate::Container<u64>> crate::Container<bool> for Bools<VC> {
1075 type Borrowed<'a> = Bools<VC::Borrowed<'a>, &'a u64> where VC: 'a;
1076 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1077 Bools {
1078 values: self.values.borrow(),
1079 last_word: &self.last_word,
1080 last_bits: &self.last_bits,
1081 }
1082 }
1083 }
1084
1085 impl<'a, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1086 #[inline(always)]
1087 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1088 let iter = self.values.as_bytes();
1089 let iter = crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_word))));
1090 crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_bits))))
1091 }
1092 }
1093
1094 impl<'a, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1095 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1096 let values = crate::FromBytes::from_bytes(bytes);
1097 let last_word = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1098 let last_bits = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1099 Self { values, last_word, last_bits }
1100 }
1101 }
1102
1103 impl<VC: Len, WC: Copy + CopyAs<u64>> Len for Bools<VC, WC> {
1104 #[inline(always)] fn len(&self) -> usize { self.values.len() * 64 + (self.last_bits.copy_as() as usize) }
1105 }
1106
1107 impl<VC: Len + IndexAs<u64>, WC: Copy + CopyAs<u64>> Index for Bools<VC, WC> {
1108 type Ref = bool;
1109 #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1110 let block = index / 64;
1111 let word = if block == self.values.len() {
1112 self.last_word.copy_as()
1113 } else {
1114 self.values.index_as(block)
1115 };
1116 let bit = index % 64;
1117 (word >> bit) & 1 == 1
1118 }
1119 }
1120
1121 impl<VC: Len + IndexAs<u64>, WC: Copy + CopyAs<u64>> Index for &Bools<VC, WC> {
1122 type Ref = bool;
1123 #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1124 (*self).get(index)
1125 }
1126 }
1127
1128 impl<VC: Push<u64>> Push<bool> for Bools<VC> {
1129 #[inline]
1130 fn push(&mut self, bit: bool) {
1131 self.last_word |= (bit as u64) << self.last_bits;
1132 self.last_bits += 1;
1133 if self.last_bits == 64 {
1135 self.values.push(self.last_word);
1136 self.last_word = 0;
1137 self.last_bits = 0;
1138 }
1139 }
1140 }
1141 impl<'a, VC: Push<u64>> Push<&'a bool> for Bools<VC> {
1142 #[inline(always)]
1143 fn push(&mut self, bit: &'a bool) {
1144 self.push(*bit)
1145 }
1146 }
1147
1148
1149 impl<VC: Clear> Clear for Bools<VC> {
1150 fn clear(&mut self) {
1151 self.values.clear();
1152 self.last_word = 0;
1153 self.last_bits = 0;
1154 }
1155 }
1156
1157 impl<VC: HeapSize> HeapSize for Bools<VC> {
1158 fn heap_size(&self) -> (usize, usize) {
1159 self.values.heap_size()
1160 }
1161 }
1162 }
1163
1164 pub use duration::Durations;
1165 mod duration {
1167
1168 use std::time::Duration;
1169 use crate::{Len, Index, IndexAs, Push, Clear, HeapSize};
1170
1171 #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1173 pub struct Durations<SC = Vec<u64>, NC = Vec<u32>> {
1174 pub seconds: SC,
1175 pub nanoseconds: NC,
1176 }
1177
1178 impl crate::Columnar for Duration {
1179 type Ref<'a> = Duration;
1180 fn into_owned<'a>(other: Self::Ref<'a>) -> Self { other }
1181 type Container = Durations;
1182 }
1183
1184 impl<SC: crate::Container<u64>, NC: crate::Container<u32>> crate::Container<Duration> for Durations<SC, NC> {
1185 type Borrowed<'a> = Durations<SC::Borrowed<'a>, NC::Borrowed<'a>> where SC: 'a, NC: 'a;
1186 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1187 Durations {
1188 seconds: self.seconds.borrow(),
1189 nanoseconds: self.nanoseconds.borrow(),
1190 }
1191 }
1192 }
1193
1194 impl<'a, SC: crate::AsBytes<'a>, NC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Durations<SC, NC> {
1195 #[inline(always)]
1196 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1197 crate::chain(self.seconds.as_bytes(), self.nanoseconds.as_bytes())
1198 }
1199 }
1200 impl<'a, SC: crate::FromBytes<'a>, NC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Durations<SC, NC> {
1201 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1202 Self {
1203 seconds: crate::FromBytes::from_bytes(bytes),
1204 nanoseconds: crate::FromBytes::from_bytes(bytes),
1205 }
1206 }
1207 }
1208
1209 impl<SC: Len, NC> Len for Durations<SC, NC> {
1210 #[inline(always)] fn len(&self) -> usize { self.seconds.len() }
1211 }
1212
1213 impl<SC: IndexAs<u64>, NC: IndexAs<u32>> Index for Durations<SC, NC> {
1214 type Ref = Duration;
1215 #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1216 Duration::new(self.seconds.index_as(index), self.nanoseconds.index_as(index))
1217 }
1218 }
1219
1220 impl<SC: Push<u64>, NC: Push<u32>> Push<std::time::Duration> for Durations<SC, NC> {
1221 #[inline]
1222 fn push(&mut self, item: std::time::Duration) {
1223 self.seconds.push(item.as_secs());
1224 self.nanoseconds.push(item.subsec_nanos());
1225 }
1226 }
1227 impl<'a, SC: Push<u64>, NC: Push<u32>> Push<&'a std::time::Duration> for Durations<SC, NC> {
1228 #[inline]
1229 fn push(&mut self, item: &'a std::time::Duration) {
1230 self.push(*item)
1231 }
1232 }
1233 impl<'a, SC: Push<&'a u64>, NC: Push<&'a u32>> Push<(&'a u64, &'a u32)> for Durations<SC, NC> {
1234 #[inline]
1235 fn push(&mut self, item: (&'a u64, &'a u32)) {
1236 self.seconds.push(item.0);
1237 self.nanoseconds.push(item.1);
1238 }
1239 }
1240
1241 impl<SC: Clear, NC: Clear> Clear for Durations<SC, NC> {
1242 fn clear(&mut self) {
1243 self.seconds.clear();
1244 self.nanoseconds.clear();
1245 }
1246 }
1247
1248 impl<SC: HeapSize, NC: HeapSize> HeapSize for Durations<SC, NC> {
1249 fn heap_size(&self) -> (usize, usize) {
1250 let (l0, c0) = self.seconds.heap_size();
1251 let (l1, c1) = self.nanoseconds.heap_size();
1252 (l0 + l1, c0 + c1)
1253 }
1254 }
1255 }
1256}
1257
1258pub use string::Strings;
1259pub mod string {
1260
1261 use super::{Clear, Columnar, Len, Index, IndexAs, Push, HeapSize};
1262
1263 #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1265 pub struct Strings<BC = Vec<u64>, VC = Vec<u8>> {
1266 pub bounds: BC,
1268 pub values: VC,
1270 }
1271
1272 impl Columnar for String {
1273 type Ref<'a> = &'a str;
1274 fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
1275 self.clear();
1276 self.push_str(other);
1277 }
1278 fn into_owned<'a>(other: Self::Ref<'a>) -> Self { other.to_string() }
1279 type Container = Strings;
1280 }
1281
1282 impl<'b, BC: crate::Container<u64>> crate::Container<String> for Strings<BC, &'b [u8]> {
1283 type Borrowed<'a> = Strings<BC::Borrowed<'a>, &'a [u8]> where BC: 'a, 'b: 'a;
1284 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1285 Strings {
1286 bounds: self.bounds.borrow(),
1287 values: self.values,
1288 }
1289 }
1290 }
1291 impl<BC: crate::Container<u64>> crate::Container<String> for Strings<BC, Vec<u8>> {
1292 type Borrowed<'a> = Strings<BC::Borrowed<'a>, &'a [u8]> where BC: 'a;
1293 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1294 Strings {
1295 bounds: self.bounds.borrow(),
1296 values: self.values.borrow(),
1297 }
1298 }
1299 }
1300
1301 impl<'a, BC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Strings<BC, VC> {
1302 #[inline(always)]
1303 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1304 crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
1305 }
1306 }
1307 impl<'a, BC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Strings<BC, VC> {
1308 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1309 Self {
1310 bounds: crate::FromBytes::from_bytes(bytes),
1311 values: crate::FromBytes::from_bytes(bytes),
1312 }
1313 }
1314 }
1315
1316 impl<BC: Len, VC> Len for Strings<BC, VC> {
1317 #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
1318 }
1319
1320 impl<'a, BC: Len+IndexAs<u64>> Index for Strings<BC, &'a [u8]> {
1321 type Ref = &'a str;
1322 #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1323 let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1324 let upper = self.bounds.index_as(index);
1325 let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
1326 let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
1327 std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
1328 }
1329 }
1330 impl<'a, BC: Len+IndexAs<u64>> Index for &'a Strings<BC, Vec<u8>> {
1331 type Ref = &'a str;
1332 #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1333 let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1334 let upper = self.bounds.index_as(index);
1335 let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
1336 let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
1337 std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
1338 }
1339 }
1340
1341 impl<BC: Push<u64>> Push<&String> for Strings<BC> {
1342 #[inline(always)] fn push(&mut self, item: &String) {
1343 self.values.extend_from_slice(item.as_bytes());
1344 self.bounds.push(self.values.len() as u64);
1345 }
1346 }
1347 impl<BC: Push<u64>> Push<&str> for Strings<BC> {
1348 #[inline]
1349 fn push(&mut self, item: &str) {
1350 self.values.extend_from_slice(item.as_bytes());
1351 self.bounds.push(self.values.len() as u64);
1352 }
1353 }
1354 impl<'a, BC: Push<u64>> Push<std::fmt::Arguments<'a>> for Strings<BC> {
1355 fn push(&mut self, item: std::fmt::Arguments<'a>) {
1356 use std::io::Write;
1357 self.values.write_fmt(item).expect("write_fmt failed");
1358 self.bounds.push(self.values.len() as u64);
1359 }
1360 }
1361 impl<'a, 'b, BC: Push<u64>> Push<&'b std::fmt::Arguments<'a>> for Strings<BC> {
1362 fn push(&mut self, item: &'b std::fmt::Arguments<'a>) {
1363 use std::io::Write;
1364 self.values.write_fmt(*item).expect("write_fmt failed");
1365 self.bounds.push(self.values.len() as u64);
1366 }
1367 }
1368 impl<BC: Clear, VC: Clear> Clear for Strings<BC, VC> {
1369 fn clear(&mut self) {
1370 self.bounds.clear();
1371 self.values.clear();
1372 }
1373 }
1374 impl<BC: HeapSize, VC: HeapSize> HeapSize for Strings<BC, VC> {
1375 fn heap_size(&self) -> (usize, usize) {
1376 let (l0, c0) = self.bounds.heap_size();
1377 let (l1, c1) = self.values.heap_size();
1378 (l0 + l1, c0 + c1)
1379 }
1380 }
1381}
1382
1383pub use vector::Vecs;
1384pub mod vector {
1385
1386 use super::{Clear, Columnar, Len, IndexMut, Index, IndexAs, Push, HeapSize, Slice};
1387
1388 #[derive(Debug, Default, Copy, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
1390 pub struct Vecs<TC, BC = Vec<u64>> {
1391 pub bounds: BC,
1392 pub values: TC,
1393 }
1394
1395 impl<T: Columnar> Columnar for Vec<T> {
1396 type Ref<'a> = Slice<<T::Container as crate::Container<T>>::Borrowed<'a>> where T: 'a;
1397 fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
1398 self.truncate(other.len());
1399 let mut other_iter = other.into_iter();
1400 for (s, o) in self.iter_mut().zip(&mut other_iter) {
1401 T::copy_from(s, o);
1402 }
1403 for o in other_iter {
1404 self.push(T::into_owned(o));
1405 }
1406 }
1407 fn into_owned<'a>(other: Self::Ref<'a>) -> Self {
1408 other.into_iter().map(|x| T::into_owned(x)).collect()
1409 }
1410 type Container = Vecs<T::Container>;
1411 }
1412
1413 impl<T: Columnar, const N: usize> Columnar for [T; N] {
1414 type Ref<'a> = Slice<<T::Container as crate::Container<T>>::Borrowed<'a>> where T: 'a;
1415 fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
1416 for (s, o) in self.iter_mut().zip(other.into_iter()) {
1417 T::copy_from(s, o);
1418 }
1419 }
1420 fn into_owned<'a>(other: Self::Ref<'a>) -> Self {
1421 let vec: Vec<_> = other.into_iter().map(|x| T::into_owned(x)).collect();
1422 match vec.try_into() {
1423 Ok(array) => array,
1424 Err(_) => panic!("wrong length"),
1425 }
1426 }
1427 type Container = Vecs<T::Container>;
1428 }
1429
1430 impl<T: Columnar<Container = TC>, BC: crate::Container<u64>, TC: crate::Container<T>> crate::Container<Vec<T>> for Vecs<TC, BC> {
1431 type Borrowed<'a> = Vecs<TC::Borrowed<'a>, BC::Borrowed<'a>> where BC: 'a, TC: 'a, T: 'a;
1432 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1433 Vecs {
1434 bounds: self.bounds.borrow(),
1435 values: self.values.borrow(),
1436 }
1437 }
1438 }
1439
1440 impl<T: Columnar<Container = TC>, BC: crate::Container<u64>, TC: crate::Container<T>, const N: usize> crate::Container<[T; N]> for Vecs<TC, BC> {
1441 type Borrowed<'a> = Vecs<TC::Borrowed<'a>, BC::Borrowed<'a>> where BC: 'a, TC: 'a, T: 'a;
1442 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1443 Vecs {
1444 bounds: self.bounds.borrow(),
1445 values: self.values.borrow(),
1446 }
1447 }
1448 }
1449
1450 impl<'a, TC: crate::AsBytes<'a>, BC: crate::AsBytes<'a>> crate::AsBytes<'a> for Vecs<TC, BC> {
1451 #[inline(always)]
1452 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1453 crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
1454 }
1455 }
1456 impl<'a, TC: crate::FromBytes<'a>, BC: crate::FromBytes<'a>> crate::FromBytes<'a> for Vecs<TC, BC> {
1457 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1458 Self {
1459 bounds: crate::FromBytes::from_bytes(bytes),
1460 values: crate::FromBytes::from_bytes(bytes),
1461 }
1462 }
1463 }
1464
1465 impl<TC: Len> Vecs<TC> {
1466 #[inline]
1467 pub fn push_iter<I>(&mut self, iter: I) where I: IntoIterator, TC: Push<I::Item> {
1468 self.values.extend(iter);
1469 self.bounds.push(self.values.len() as u64);
1470 }
1471 }
1472
1473 impl<TC, BC: Len> Len for Vecs<TC, BC> {
1474 #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
1475 }
1476
1477 impl<TC: Copy, BC: Len+IndexAs<u64>> Index for Vecs<TC, BC> {
1478 type Ref = Slice<TC>;
1479 #[inline(always)]
1480 fn get(&self, index: usize) -> Self::Ref {
1481 let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1482 let upper = self.bounds.index_as(index);
1483 Slice::new(lower, upper, self.values)
1484 }
1485 }
1486 impl<'a, TC, BC: Len+IndexAs<u64>> Index for &'a Vecs<TC, BC> {
1487 type Ref = Slice<&'a TC>;
1488 #[inline(always)]
1489 fn get(&self, index: usize) -> Self::Ref {
1490 let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1491 let upper = self.bounds.index_as(index);
1492 Slice::new(lower, upper, &self.values)
1493 }
1494 }
1495 impl<TC, BC: Len+IndexAs<u64>> IndexMut for Vecs<TC, BC> {
1496 type IndexMut<'a> = Slice<&'a mut TC> where TC: 'a, BC: 'a;
1497
1498 #[inline(always)]
1499 fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
1500 let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1501 let upper = self.bounds.index_as(index);
1502 Slice::new(lower, upper, &mut self.values)
1503 }
1504 }
1505
1506 impl<I: IntoIterator, TC: Push<I::Item> + Len> Push<I> for Vecs<TC> {
1507 #[inline]
1508 fn push(&mut self, item: I) {
1509 self.values.extend(item.into_iter());
1510 self.bounds.push(self.values.len() as u64);
1511 }
1512 }
1513
1514 impl<TC: Clear> Clear for Vecs<TC> {
1515 fn clear(&mut self) {
1516 self.bounds.clear();
1517 self.values.clear();
1518 }
1519 }
1520
1521 impl<TC: HeapSize, BC: HeapSize> HeapSize for Vecs<TC, BC> {
1522 fn heap_size(&self) -> (usize, usize) {
1523 let (l0, c0) = self.bounds.heap_size();
1524 let (l1, c1) = self.values.heap_size();
1525 (l0 + l1, c0 + c1)
1526 }
1527 }
1528}
1529
1530#[allow(non_snake_case)]
1531pub mod tuple {
1532
1533 use super::{Clear, Columnar, Len, IndexMut, Index, Push, HeapSize};
1534
1535 macro_rules! tuple_impl {
1539 ( $($name:ident,$name2:ident)+) => (
1540
1541 impl<$($name: Columnar),*> Columnar for ($($name,)*) {
1542 type Ref<'a> = ($($name::Ref<'a>,)*) where $($name: 'a,)*;
1543 fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
1544 let ($($name,)*) = self;
1545 let ($($name2,)*) = other;
1546 $(crate::Columnar::copy_from($name, $name2);)*
1547 }
1548 fn into_owned<'a>(other: Self::Ref<'a>) -> Self {
1549 let ($($name2,)*) = other;
1550 ($($name::into_owned($name2),)*)
1551 }
1552 type Container = ($($name::Container,)*);
1553 }
1554 impl<$($name: crate::Columnar, $name2: crate::Container<$name>,)*> crate::Container<($($name,)*)> for ($($name2,)*) {
1555 type Borrowed<'a> = ($($name2::Borrowed<'a>,)*) where $($name: 'a, $name2: 'a,)*;
1556 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1557 let ($($name,)*) = self;
1558 ($($name.borrow(),)*)
1559 }
1560 }
1561
1562 #[allow(non_snake_case)]
1563 impl<'a, $($name: crate::AsBytes<'a>),*> crate::AsBytes<'a> for ($($name,)*) {
1564 #[inline(always)]
1565 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1566 let ($($name,)*) = self;
1567 let iter = None.into_iter();
1568 $( let iter = crate::chain(iter, $name.as_bytes()); )*
1569 iter
1570 }
1571 }
1572 impl<'a, $($name: crate::FromBytes<'a>),*> crate::FromBytes<'a> for ($($name,)*) {
1573 #[allow(non_snake_case)]
1574 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1575 $(let $name = crate::FromBytes::from_bytes(bytes);)*
1576 ($($name,)*)
1577 }
1578 }
1579
1580 impl<$($name: Len),*> Len for ($($name,)*) {
1581 fn len(&self) -> usize {
1582 self.0.len()
1583 }
1584 }
1585 impl<$($name: Clear),*> Clear for ($($name,)*) {
1586 fn clear(&mut self) {
1587 let ($($name,)*) = self;
1588 $($name.clear();)*
1589 }
1590 }
1591 impl<$($name: HeapSize),*> HeapSize for ($($name,)*) {
1592 fn heap_size(&self) -> (usize, usize) {
1593 let ($($name,)*) = self;
1594 let mut l = 0;
1595 let mut c = 0;
1596 $(let (l0, c0) = $name.heap_size(); l += l0; c += c0;)*
1597 (l, c)
1598 }
1599 }
1600 impl<$($name: Index),*> Index for ($($name,)*) {
1601 type Ref = ($($name::Ref,)*);
1602 fn get(&self, index: usize) -> Self::Ref {
1603 let ($($name,)*) = self;
1604 ($($name.get(index),)*)
1605 }
1606 }
1607 impl<'a, $($name),*> Index for &'a ($($name,)*) where $( &'a $name: Index),* {
1608 type Ref = ($(<&'a $name as Index>::Ref,)*);
1609 fn get(&self, index: usize) -> Self::Ref {
1610 let ($($name,)*) = self;
1611 ($($name.get(index),)*)
1612 }
1613 }
1614
1615 impl<$($name: IndexMut),*> IndexMut for ($($name,)*) {
1616 type IndexMut<'a> = ($($name::IndexMut<'a>,)*) where $($name: 'a),*;
1617 fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
1618 let ($($name,)*) = self;
1619 ($($name.get_mut(index),)*)
1620 }
1621 }
1622 impl<$($name2, $name: Push<$name2>),*> Push<($($name2,)*)> for ($($name,)*) {
1623 #[inline]
1624 fn push(&mut self, item: ($($name2,)*)) {
1625 let ($($name,)*) = self;
1626 let ($($name2,)*) = item;
1627 $($name.push($name2);)*
1628 }
1629 }
1630 impl<'a, $($name2, $name: Push<&'a $name2>),*> Push<&'a ($($name2,)*)> for ($($name,)*) {
1631 #[inline]
1632 fn push(&mut self, item: &'a ($($name2,)*)) {
1633 let ($($name,)*) = self;
1634 let ($($name2,)*) = item;
1635 $($name.push($name2);)*
1636 }
1637 }
1638 )
1639 }
1640
1641 tuple_impl!(A,AA);
1642 tuple_impl!(A,AA B,BB);
1643 tuple_impl!(A,AA B,BB C,CC);
1644 tuple_impl!(A,AA B,BB C,CC D,DD);
1645 tuple_impl!(A,AA B,BB C,CC D,DD E,EE);
1646 tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF);
1647 tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG);
1648 tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG H,HH);
1649 tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG H,HH I,II);
1650 tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG H,HH I,II J,JJ);
1651
1652 #[cfg(test)]
1653 mod test {
1654 #[test]
1655 fn round_trip() {
1656
1657 use crate::Columnar;
1658 use crate::common::{Index, Push, HeapSize, Len};
1659
1660 let mut column: <(u64, u8, String) as Columnar>::Container = Default::default();
1661 for i in 0..100 {
1662 column.push((i, i as u8, &i.to_string()));
1663 column.push((i, i as u8, &"".to_string()));
1664 }
1665
1666 assert_eq!(column.len(), 200);
1667 assert_eq!(column.heap_size(), (3590, 4608));
1668
1669 for i in 0..100u64 {
1670 assert_eq!((&column).get((2*i+0) as usize), (&i, &(i as u8), i.to_string().as_str()));
1671 assert_eq!((&column).get((2*i+1) as usize), (&i, &(i as u8), ""));
1672 }
1673
1674 let mut column: Vec<(u64, u8, String)> = Default::default();
1676 for i in 0..100 {
1677 column.push((i, i as u8, i.to_string()));
1678 column.push((i, i as u8, "".to_string()));
1679 }
1680 assert_eq!(column.heap_size(), (8190, 11040));
1681
1682 }
1683 }
1684}
1685
1686pub use sums::{rank_select::RankSelect, result::Results, option::Options};
1687pub mod sums {
1692
1693 pub mod rank_select {
1701
1702 use crate::primitive::Bools;
1703 use crate::common::index::CopyAs;
1704 use crate::{Len, Index, IndexAs, Push, Clear, HeapSize};
1705
1706 #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1712 pub struct RankSelect<CC = Vec<u64>, VC = Vec<u64>, WC = u64> {
1713 pub counts: CC,
1715 pub values: Bools<VC, WC>,
1717 }
1718
1719 impl<CC: crate::Container<u64>, VC: crate::Container<u64>> RankSelect<CC, VC> {
1720 pub fn borrow<'a>(&'a self) -> RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a u64> {
1721 use crate::Container;
1722 RankSelect {
1723 counts: self.counts.borrow(),
1724 values: self.values.borrow(),
1725 }
1726 }
1727 }
1728
1729 impl<'a, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for RankSelect<CC, VC, &'a u64> {
1730 #[inline(always)]
1731 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1732 crate::chain(self.counts.as_bytes(), self.values.as_bytes())
1733 }
1734 }
1735 impl<'a, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for RankSelect<CC, VC, &'a u64> {
1736 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1737 Self {
1738 counts: crate::FromBytes::from_bytes(bytes),
1739 values: crate::FromBytes::from_bytes(bytes),
1740 }
1741 }
1742 }
1743
1744
1745 impl<CC, VC: Len + IndexAs<u64>, WC: Copy+CopyAs<u64>> RankSelect<CC, VC, WC> {
1746 #[inline]
1747 pub fn get(&self, index: usize) -> bool {
1748 Index::get(&self.values, index)
1749 }
1750 }
1751 impl<CC: Len + IndexAs<u64>, VC: Len + IndexAs<u64>, WC: Copy+CopyAs<u64>> RankSelect<CC, VC, WC> {
1752 pub fn rank(&self, index: usize) -> usize {
1758 let bit = index % 64;
1759 let block = index / 64;
1760 let chunk = block / 16;
1761 let mut count = if chunk > 0 { self.counts.index_as(chunk - 1) as usize } else { 0 };
1762 for pos in (16 * chunk) .. block {
1763 count += self.values.values.index_as(pos).count_ones() as usize;
1764 }
1765 let intra_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
1767 count += (intra_word & ((1 << bit) - 1)).count_ones() as usize;
1768 count
1769 }
1770 pub fn select(&self, rank: u64) -> Option<usize> {
1772 let mut chunk = 0;
1773 while chunk < self.counts.len() && self.counts.index_as(chunk) <= rank {
1777 chunk += 1;
1778 }
1779 let mut count = if chunk < self.counts.len() { self.counts.index_as(chunk) } else { 0 };
1780 let mut block = 16 * chunk;
1782 while block < self.values.values.len() && count + (self.values.values.index_as(block).count_ones() as u64) <= rank {
1783 count += self.values.values.index_as(block).count_ones() as u64;
1784 block += 1;
1785 }
1786 let last_bits = if block == self.values.values.len() { self.values.last_bits.copy_as() as usize } else { 64 };
1788 let last_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
1789 for shift in 0 .. last_bits {
1790 if ((last_word >> shift) & 0x01 == 0x01) && count + 1 == rank {
1791 return Some(64 * block + shift);
1792 }
1793 count += (last_word >> shift) & 0x01;
1794 }
1795
1796 None
1797 }
1798 }
1799
1800 impl<CC, VC: Len, WC: Copy + CopyAs<u64>> RankSelect<CC, VC, WC> {
1801 pub fn len(&self) -> usize {
1802 self.values.len()
1803 }
1804 }
1805
1806 impl<CC: Push<u64> + Len + IndexAs<u64>, VC: Push<u64> + Len + IndexAs<u64>> RankSelect<CC, VC> {
1809 #[inline]
1810 pub fn push(&mut self, bit: bool) {
1811 self.values.push(bit);
1812 while self.counts.len() < self.values.len() / 1024 {
1813 let mut count = self.counts.last().unwrap_or(0);
1814 let lower = 16 * self.counts.len();
1815 let upper = lower + 16;
1816 for i in lower .. upper {
1817 count += self.values.values.index_as(i).count_ones() as u64;
1818 }
1819 self.counts.push(count);
1820 }
1821 }
1822 }
1823 impl<CC: Clear, VC: Clear> Clear for RankSelect<CC, VC> {
1824 fn clear(&mut self) {
1825 self.counts.clear();
1826 self.values.clear();
1827 }
1828 }
1829 impl<CC: HeapSize, VC: HeapSize> HeapSize for RankSelect<CC, VC> {
1830 fn heap_size(&self) -> (usize, usize) {
1831 let (l0, c0) = self.counts.heap_size();
1832 let (l1, c1) = self.values.heap_size();
1833 (l0 + l1, c0 + c1)
1834 }
1835 }
1836 }
1837
1838 pub mod result {
1839
1840 use crate::common::index::CopyAs;
1841 use crate::{Clear, Columnar, Len, IndexMut, Index, IndexAs, Push, HeapSize};
1842 use crate::RankSelect;
1843
1844 #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1845 pub struct Results<SC, TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
1846 pub indexes: RankSelect<CC, VC, WC>,
1848 pub oks: SC,
1849 pub errs: TC,
1850 }
1851
1852 impl<S: Columnar, T: Columnar> Columnar for Result<S, T> {
1853 type Ref<'a> = Result<S::Ref<'a>, T::Ref<'a>> where S: 'a, T: 'a;
1854 fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
1855 match (&mut *self, other) {
1856 (Ok(x), Ok(y)) => x.copy_from(y),
1857 (Err(x), Err(y)) => x.copy_from(y),
1858 (_, other) => { *self = Self::into_owned(other); },
1859 }
1860 }
1861 fn into_owned<'a>(other: Self::Ref<'a>) -> Self {
1862 match other {
1863 Ok(y) => Ok(S::into_owned(y)),
1864 Err(y) => Err(T::into_owned(y)),
1865 }
1866 }
1867 type Container = Results<S::Container, T::Container>;
1868 }
1869
1870 impl<S: Columnar, T: Columnar, SC: crate::Container<S>, TC: crate::Container<T>> crate::Container<Result<S, T>> for Results<SC, TC> {
1871 type Borrowed<'a> = Results<SC::Borrowed<'a>, TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where SC: 'a, TC: 'a, S:'a, T: 'a;
1872 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1873 Results {
1874 indexes: self.indexes.borrow(),
1875 oks: self.oks.borrow(),
1876 errs: self.errs.borrow(),
1877 }
1878 }
1879 }
1880
1881 impl<'a, SC: crate::AsBytes<'a>, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
1882 #[inline(always)]
1883 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1884 let iter = self.indexes.as_bytes();
1885 let iter = crate::chain(iter, self.oks.as_bytes());
1886 crate::chain(iter, self.errs.as_bytes())
1887 }
1888 }
1889 impl<'a, SC: crate::FromBytes<'a>, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
1890 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1891 Self {
1892 indexes: crate::FromBytes::from_bytes(bytes),
1893 oks: crate::FromBytes::from_bytes(bytes),
1894 errs: crate::FromBytes::from_bytes(bytes),
1895 }
1896 }
1897 }
1898
1899 impl<SC, TC, CC, VC: Len, WC: Copy+CopyAs<u64>> Len for Results<SC, TC, CC, VC, WC> {
1900 #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
1901 }
1902
1903 impl<SC, TC, CC, VC, WC> Index for Results<SC, TC, CC, VC, WC>
1904 where
1905 SC: Index,
1906 TC: Index,
1907 CC: IndexAs<u64> + Len,
1908 VC: IndexAs<u64> + Len,
1909 WC: Copy + CopyAs<u64>,
1910 {
1911 type Ref = Result<SC::Ref, TC::Ref>;
1912 fn get(&self, index: usize) -> Self::Ref {
1913 if self.indexes.get(index) {
1914 Ok(self.oks.get(self.indexes.rank(index)))
1915 } else {
1916 Err(self.errs.get(index - self.indexes.rank(index)))
1917 }
1918 }
1919 }
1920 impl<'a, SC, TC, CC, VC, WC> Index for &'a Results<SC, TC, CC, VC, WC>
1921 where
1922 &'a SC: Index,
1923 &'a TC: Index,
1924 CC: IndexAs<u64> + Len,
1925 VC: IndexAs<u64> + Len,
1926 WC: Copy + CopyAs<u64>,
1927 {
1928 type Ref = Result<<&'a SC as Index>::Ref, <&'a TC as Index>::Ref>;
1929 fn get(&self, index: usize) -> Self::Ref {
1930 if self.indexes.get(index) {
1931 Ok((&self.oks).get(self.indexes.rank(index)))
1932 } else {
1933 Err((&self.errs).get(index - self.indexes.rank(index)))
1934 }
1935 }
1936 }
1937
1938 impl<SC: IndexMut, TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Results<SC, TC, CC, VC> {
1940 type IndexMut<'a> = Result<SC::IndexMut<'a>, TC::IndexMut<'a>> where SC: 'a, TC: 'a, CC: 'a, VC: 'a;
1941 fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
1942 if self.indexes.get(index) {
1943 Ok(self.oks.get_mut(self.indexes.rank(index)))
1944 } else {
1945 Err(self.errs.get_mut(index - self.indexes.rank(index)))
1946 }
1947 }
1948 }
1949
1950 impl<S, SC: Push<S>, T, TC: Push<T>> Push<Result<S, T>> for Results<SC, TC> {
1951 #[inline]
1952 fn push(&mut self, item: Result<S, T>) {
1953 match item {
1954 Ok(item) => {
1955 self.indexes.push(true);
1956 self.oks.push(item);
1957 }
1958 Err(item) => {
1959 self.indexes.push(false);
1960 self.errs.push(item);
1961 }
1962 }
1963 }
1964 }
1965 impl<'a, S, SC: Push<&'a S>, T, TC: Push<&'a T>> Push<&'a Result<S, T>> for Results<SC, TC> {
1966 #[inline]
1967 fn push(&mut self, item: &'a Result<S, T>) {
1968 match item {
1969 Ok(item) => {
1970 self.indexes.push(true);
1971 self.oks.push(item);
1972 }
1973 Err(item) => {
1974 self.indexes.push(false);
1975 self.errs.push(item);
1976 }
1977 }
1978 }
1979 }
1980
1981 impl<SC: Clear, TC: Clear> Clear for Results<SC, TC> {
1982 fn clear(&mut self) {
1983 self.indexes.clear();
1984 self.oks.clear();
1985 self.errs.clear();
1986 }
1987 }
1988
1989 impl<SC: HeapSize, TC: HeapSize> HeapSize for Results<SC, TC> {
1990 fn heap_size(&self) -> (usize, usize) {
1991 let (l0, c0) = self.oks.heap_size();
1992 let (l1, c1) = self.errs.heap_size();
1993 let (li, ci) = self.indexes.heap_size();
1994 (l0 + l1 + li, c0 + c1 + ci)
1995 }
1996 }
1997
1998 #[cfg(test)]
1999 mod test {
2000 #[test]
2001 fn round_trip() {
2002
2003 use crate::Columnar;
2004 use crate::common::{Index, Push, HeapSize, Len};
2005
2006 let mut column: <Result<u64, u64> as Columnar>::Container = Default::default();
2007 for i in 0..100 {
2008 column.push(Ok::<u64, u64>(i));
2009 column.push(Err::<u64, u64>(i));
2010 }
2011
2012 assert_eq!(column.len(), 200);
2013 assert_eq!(column.heap_size(), (1624, 2080));
2014
2015 for i in 0..100 {
2016 assert_eq!(column.get(2*i+0), Ok(i as u64));
2017 assert_eq!(column.get(2*i+1), Err(i as u64));
2018 }
2019
2020 let mut column: <Result<u64, u8> as Columnar>::Container = Default::default();
2021 for i in 0..100 {
2022 column.push(Ok::<u64, u8>(i as u64));
2023 column.push(Err::<u64, u8>(i as u8));
2024 }
2025
2026 assert_eq!(column.len(), 200);
2027 assert_eq!(column.heap_size(), (924, 1184));
2028
2029 for i in 0..100 {
2030 assert_eq!(column.get(2*i+0), Ok(i as u64));
2031 assert_eq!(column.get(2*i+1), Err(i as u8));
2032 }
2033 }
2034 }
2035 }
2036
2037 pub mod option {
2038
2039 use crate::common::index::CopyAs;
2040 use crate::{Clear, Columnar, Len, IndexMut, Index, IndexAs, Push, HeapSize};
2041 use crate::RankSelect;
2042
2043 #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
2044 pub struct Options<TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
2045 pub indexes: RankSelect<CC, VC, WC>,
2048 pub somes: TC,
2049 }
2050
2051 impl<T: Columnar> Columnar for Option<T> {
2052 type Ref<'a> = Option<T::Ref<'a>> where T: 'a;
2053 fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
2054 match (&mut *self, other) {
2055 (Some(x), Some(y)) => { x.copy_from(y); }
2056 (_, other) => { *self = Self::into_owned(other); }
2057 }
2058 }
2059 fn into_owned<'a>(other: Self::Ref<'a>) -> Self {
2060 other.map(|x| T::into_owned(x))
2061 }
2062 type Container = Options<T::Container>;
2063 }
2064
2065 impl<T: Columnar, TC: crate::Container<T>> crate::Container<Option<T>> for Options<TC> {
2066 type Borrowed<'a> = Options<TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where TC: 'a, T: 'a;
2067 fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2068 Options {
2069 indexes: self.indexes.borrow(),
2070 somes: self.somes.borrow(),
2071 }
2072 }
2073 }
2074
2075 impl<'a, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Options<TC, CC, VC, &'a u64> {
2076 #[inline(always)]
2077 fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2078 crate::chain(self.indexes.as_bytes(), self.somes.as_bytes())
2079 }
2080 }
2081
2082 impl <'a, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Options<TC, CC, VC, &'a u64> {
2083 fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2084 Self {
2085 indexes: crate::FromBytes::from_bytes(bytes),
2086 somes: crate::FromBytes::from_bytes(bytes),
2087 }
2088 }
2089 }
2090
2091 impl<T, CC, VC: Len, WC: Copy + CopyAs<u64>> Len for Options<T, CC, VC, WC> {
2092 #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
2093 }
2094
2095 impl<TC: Index, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: Copy+CopyAs<u64>> Index for Options<TC, CC, VC, WC> {
2096 type Ref = Option<TC::Ref>;
2097 fn get(&self, index: usize) -> Self::Ref {
2098 if self.indexes.get(index) {
2099 Some(self.somes.get(self.indexes.rank(index)))
2100 } else {
2101 None
2102 }
2103 }
2104 }
2105 impl<'a, TC, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: Copy+CopyAs<u64>> Index for &'a Options<TC, CC, VC, WC>
2106 where &'a TC: Index
2107 {
2108 type Ref = Option<<&'a TC as Index>::Ref>;
2109 fn get(&self, index: usize) -> Self::Ref {
2110 if self.indexes.get(index) {
2111 Some((&self.somes).get(self.indexes.rank(index)))
2112 } else {
2113 None
2114 }
2115 }
2116 }
2117 impl<TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Options<TC, CC, VC> {
2118 type IndexMut<'a> = Option<TC::IndexMut<'a>> where TC: 'a, CC: 'a, VC: 'a;
2119 fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2120 if self.indexes.get(index) {
2121 Some(self.somes.get_mut(self.indexes.rank(index)))
2122 } else {
2123 None
2124 }
2125 }
2126 }
2127
2128 impl<T, TC: Push<T> + Len> Push<Option<T>> for Options<TC> {
2129 #[inline]
2130 fn push(&mut self, item: Option<T>) {
2131 match item {
2132 Some(item) => {
2133 self.indexes.push(true);
2134 self.somes.push(item);
2135 }
2136 None => {
2137 self.indexes.push(false);
2138 }
2139 }
2140 }
2141 }
2142 impl<'a, T, TC: Push<&'a T> + Len> Push<&'a Option<T>> for Options<TC> {
2143 #[inline]
2144 fn push(&mut self, item: &'a Option<T>) {
2145 match item {
2146 Some(item) => {
2147 self.indexes.push(true);
2148 self.somes.push(item);
2149 }
2150 None => {
2151 self.indexes.push(false);
2152 }
2153 }
2154 }
2155 }
2156
2157 impl<TC: Clear> Clear for Options<TC> {
2158 fn clear(&mut self) {
2159 self.indexes.clear();
2160 self.somes.clear();
2161 }
2162 }
2163
2164 impl<TC: HeapSize> HeapSize for Options<TC> {
2165 fn heap_size(&self) -> (usize, usize) {
2166 let (l0, c0) = self.somes.heap_size();
2167 let (li, ci) = self.indexes.heap_size();
2168 (l0 + li, c0 + ci)
2169 }
2170 }
2171
2172 #[cfg(test)]
2173 mod test {
2174
2175 use crate::Columnar;
2176 use crate::common::{Index, HeapSize, Len};
2177 use crate::Options;
2178
2179 #[test]
2180 fn round_trip_some() {
2181 let store: Options<Vec<i32>> = Columnar::into_columns((0..100).map(Some));
2183 assert_eq!(store.len(), 100);
2184 assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == Some(&b)));
2185 assert_eq!(store.heap_size(), (408, 544));
2186 }
2187
2188 #[test]
2189 fn round_trip_none() {
2190 let store = Columnar::into_columns((0..100).map(|_x| None::<i32>));
2191 assert_eq!(store.len(), 100);
2192 let foo = &store;
2193 assert!(foo.index_iter().zip(0..100).all(|(a, _b)| a == None));
2194 assert_eq!(store.heap_size(), (8, 32));
2195 }
2196
2197 #[test]
2198 fn round_trip_mixed() {
2199 let store: Options<Vec<i32>> = Columnar::into_columns((0..100).map(|x| if x % 2 == 0 { Some(x) } else { None }));
2201 assert_eq!(store.len(), 100);
2202 assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == if b % 2 == 0 { Some(&b) } else { None }));
2203 assert_eq!(store.heap_size(), (208, 288));
2204 }
2205 }
2206 }
2207}
2208
2209pub use lookback::{Repeats, Lookbacks};
2210pub mod lookback {
2215
2216 use crate::{Options, Results, Push, Index, Len, HeapSize};
2217
2218 #[derive(Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
2220 pub struct Repeats<TC, const N: u8 = 255> {
2221 pub inner: Options<TC>,
2223 }
2224
2225 impl<T: PartialEq, TC: Push<T> + Len, const N: u8> Push<T> for Repeats<TC, N>
2226 where
2227 for<'a> &'a TC: Index,
2228 for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
2229 {
2230 #[inline]
2231 fn push(&mut self, item: T) {
2232 let insert: Option<T> = if (&self.inner.somes).last().map(|x| x.eq(&item)) == Some(true) {
2234 None
2235 } else {
2236 Some(item)
2237 };
2238 self.inner.push(insert);
2239 }
2240 }
2241
2242 impl<TC: Len, const N: u8> Len for Repeats<TC, N> {
2243 #[inline(always)] fn len(&self) -> usize { self.inner.len() }
2244 }
2245
2246 impl<TC: Index, const N: u8> Index for Repeats<TC, N> {
2247 type Ref = TC::Ref;
2248 #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2249 match self.inner.get(index) {
2250 Some(item) => item,
2251 None => {
2252 let pos = self.inner.indexes.rank(index) - 1;
2253 self.inner.somes.get(pos)
2254 },
2255 }
2256 }
2257 }
2258
2259 impl<TC: HeapSize, const N: u8> HeapSize for Repeats<TC, N> {
2260 fn heap_size(&self) -> (usize, usize) {
2261 self.inner.heap_size()
2262 }
2263 }
2264
2265 #[derive(Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
2266 pub struct Lookbacks<TC, VC = Vec<u8>, const N: u8 = 255> {
2267 pub inner: Results<TC, VC>,
2269 }
2270
2271 impl<T: PartialEq, TC: Push<T> + Len, VC: Push<u8>, const N: u8> Push<T> for Lookbacks<TC, VC, N>
2272 where
2273 for<'a> &'a TC: Index,
2274 for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
2275 {
2276 #[inline]
2277 fn push(&mut self, item: T) {
2278 let oks_len = self.inner.oks.len();
2280 let find = (0u8 .. N).take(self.inner.oks.len()).find(|i| (&self.inner.oks).get(oks_len - (*i as usize) - 1) == item);
2281 let insert: Result<T, u8> = if let Some(back) = find { Err(back) } else { Ok(item) };
2282 self.inner.push(insert);
2283 }
2284 }
2285
2286 impl<TC, VC, const N: u8> Len for Lookbacks<TC, VC, N> {
2287 #[inline(always)] fn len(&self) -> usize { self.inner.len() }
2288 }
2289
2290 impl<TC: Index, VC: Index<Ref=u8>, const N: u8> Index for Lookbacks<TC, VC, N> {
2291 type Ref = TC::Ref;
2292 #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2293 match self.inner.get(index) {
2294 Ok(item) => item,
2295 Err(back) => {
2296 let pos = self.inner.indexes.rank(index) - 1;
2297 self.inner.oks.get(pos - (back as usize))
2298 },
2299 }
2300 }
2301 }
2302 impl<'a, TC, const N: u8> Index for &'a Lookbacks<TC, Vec<u8>, N>
2303 where
2304 &'a TC: Index,
2305 {
2306 type Ref = <&'a TC as Index>::Ref;
2307 #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2308 match (&self.inner).get(index) {
2309 Ok(item) => item,
2310 Err(back) => {
2311 let pos = self.inner.indexes.rank(index) - 1;
2312 (&self.inner.oks).get(pos - (*back as usize))
2313 },
2314 }
2315 }
2316 }
2317
2318 impl<TC: HeapSize, VC: HeapSize, const N: u8> HeapSize for Lookbacks<TC, VC, N> {
2319 fn heap_size(&self) -> (usize, usize) {
2320 self.inner.heap_size()
2321 }
2322 }
2323}
2324
2325mod maps {
2327
2328 use crate::{Len, Push};
2329 use crate::Options;
2330
2331 pub struct KeyMaps<CK, CV> {
2337 _keys: CK,
2338 vals: Vec<CV>,
2339 }
2340
2341 impl<CK, CV: Len> Len for KeyMaps<CK, CV> {
2342 fn len(&self) -> usize {
2343 self.vals[0].len()
2345 }
2346 }
2347
2348 impl<K: PartialOrd, V, CV: Push<K>> Push<Vec<(K, V)>> for KeyMaps<Vec<K>, CV> {
2352 #[inline]
2353 fn push(&mut self, _item: Vec<(K, V)>) {
2354
2355 }
2356 }
2357
2358 pub struct ListMaps<CV> {
2362 vals: Vec<Options<CV>>,
2363 }
2364
2365 impl<CV> Default for ListMaps<CV> {
2366 fn default() -> Self {
2367 ListMaps { vals: Default::default() }
2368 }
2369 }
2370
2371 impl<CV: Len> Len for ListMaps<CV> {
2372 fn len(&self) -> usize {
2373 self.vals[0].len()
2374 }
2375 }
2376
2377 impl<'a, V, CV: Push<&'a V> + Len + Default> Push<&'a Vec<V>> for ListMaps<CV> {
2378 #[inline]
2379 fn push(&mut self, item: &'a Vec<V>) {
2380 let mut item_len = item.len();
2381 let self_len = if self.vals.is_empty() { 0 } else { self.vals[0].len() };
2382 while self.vals.len() < item_len {
2383 let mut new_store: Options<CV> = Default::default();
2384 for _ in 0..self_len {
2385 new_store.push(None);
2386 }
2387 self.vals.push(new_store);
2388 }
2389 for (store, i) in self.vals.iter_mut().zip(item) {
2390 store.push(Some(i));
2391 }
2392 while item_len < self.vals.len() {
2393 self.vals[item_len].push(None);
2394 item_len += 1;
2395 }
2396 }
2397 }
2398
2399 #[cfg(test)]
2400 mod test {
2401
2402 use crate::common::{Len, Push};
2403 use crate::{Results, Strings};
2404
2405 #[test]
2406 fn round_trip_listmap() {
2407
2408 let records = (0 .. 1024).map(|i|
2410 vec![
2411 Ok(i),
2412 Err(format!("{:?}", i)),
2413 if i % 2 == 0 { Ok(i) } else { Err(format!("{:?}", i)) },
2414 ]
2415 );
2416
2417 let mut store: super::ListMaps<Results<Vec<i32>, Strings>> = Default::default();
2419 for record in records {
2420 store.push(&record);
2421 }
2422
2423 let field0: Option<&[i32]> = if store.vals[0].somes.oks.len() == store.vals[0].len() {
2426 Some(&store.vals[0].somes.oks)
2427 } else { None };
2428
2429 let field1: Option<&Strings> = if store.vals[1].somes.errs.len() == store.vals[1].len() {
2430 Some(&store.vals[1].somes.errs)
2431 } else { None };
2432
2433 let field2: Option<&[i32]> = if store.vals[2].somes.oks.len() == store.vals[2].len() {
2434 Some(&store.vals[2].somes.oks)
2435 } else { None };
2436
2437 assert!(field0.is_some());
2438 assert!(field1.is_some());
2439 assert!(field2.is_none());
2440 }
2441 }
2442
2443}
2444
2445mod sizes {
2450
2451 use crate::Push;
2452 use crate::Results;
2453
2454 struct Sizes<C0, C1, C2, C3> {
2456 inner: Results<Results<C0, C1>, Results<C2, C3>>,
2458 }
2459
2460 impl<C0: Default, C1: Default, C2: Default, C3: Default> Default for Sizes<C0, C1, C2, C3> {
2461 fn default() -> Self {
2462 Sizes { inner: Default::default() }
2463 }
2464 }
2465
2466 impl<C0: Push<u8>, C1: Push<u16>, C2: Push<u32>, C3: Push<u64>> Push<usize> for Sizes<C0, C1, C2, C3> {
2467 #[inline]
2468 fn push(&mut self, item: usize) {
2469 if let Ok(item) = TryInto::<u8>::try_into(item) {
2470 self.inner.push(Ok(Ok(item)))
2471 } else if let Ok(item) = TryInto::<u16>::try_into(item) {
2472 self.inner.push(Ok(Err(item)))
2473 } else if let Ok(item) = TryInto::<u32>::try_into(item) {
2474 self.inner.push(Err(Ok(item)))
2475 } else if let Ok(item) = TryInto::<u64>::try_into(item) {
2476 self.inner.push(Err(Err(item)))
2477 } else {
2478 panic!("usize exceeds bounds of u64")
2479 }
2480 }
2481 }
2482
2483 impl<C0: Push<i8>, C1: Push<i16>, C2: Push<i32>, C3: Push<i64>> Push<isize> for Sizes<C0, C1, C2, C3> {
2484 #[inline]
2485 fn push(&mut self, item: isize) {
2486 if let Ok(item) = TryInto::<i8>::try_into(item) {
2487 self.inner.push(Ok(Ok(item)))
2488 } else if let Ok(item) = TryInto::<i16>::try_into(item) {
2489 self.inner.push(Ok(Err(item)))
2490 } else if let Ok(item) = TryInto::<i32>::try_into(item) {
2491 self.inner.push(Err(Ok(item)))
2492 } else if let Ok(item) = TryInto::<i64>::try_into(item) {
2493 self.inner.push(Err(Err(item)))
2494 } else {
2495 panic!("isize exceeds bounds of i64")
2496 }
2497 }
2498 }
2499}
2500
2501pub mod roaring {
2503
2504 use crate::Results;
2505
2506 pub struct RoaringBits {
2514 _inner: Results<[u64; 1024], Vec<u16>>,
2515 }
2516}
2517
2518pub use chain_mod::{chain, chain_one};
2519
2520pub mod chain_mod {
2521 #[inline(always)]
2531 pub fn chain<A: IntoIterator, B: IntoIterator<Item=A::Item>>(a: A, b: B) -> Chain<A::IntoIter, B::IntoIter> {
2532 Chain { a: Some(a.into_iter()), b: Some(b.into_iter()) }
2533 }
2534
2535 pub struct Chain<A, B> {
2536 a: Option<A>,
2537 b: Option<B>,
2538 }
2539
2540 impl<A, B> Iterator for Chain<A, B>
2541 where
2542 A: Iterator,
2543 B: Iterator<Item=A::Item>,
2544 {
2545 type Item = A::Item;
2546
2547 #[inline(always)]
2548 fn next(&mut self) -> Option<Self::Item> {
2549 if let Some(a) = self.a.as_mut() {
2550 let x = a.next();
2551 if x.is_none() {
2552 self.a = None;
2553 } else {
2554 return x;
2555 }
2556 }
2557 if let Some(b) = self.b.as_mut() {
2558 let x = b.next();
2559 if x.is_none() {
2560 self.b = None;
2561 } else {
2562 return x;
2563 }
2564 }
2565 None
2566 }
2567 }
2568
2569 #[inline(always)]
2573 pub fn chain_one<A: IntoIterator>(a: A, b: A::Item) -> ChainOne<A::IntoIter> {
2574 ChainOne { a: Some(a.into_iter()), b: Some(b) }
2575 }
2576
2577 pub struct ChainOne<A: Iterator> {
2578 a: Option<A>,
2579 b: Option<A::Item>,
2580 }
2581
2582 impl<A: Iterator> Iterator for ChainOne<A> {
2583 type Item = A::Item;
2584
2585 #[inline(always)]
2586 fn next(&mut self) -> Option<Self::Item> {
2587 if let Some(a) = self.a.as_mut() {
2588 let x = a.next();
2589 if x.is_none() {
2590 self.a = None;
2591 self.b.take()
2592 } else {
2593 x
2594 }
2595 } else {
2596 None
2597 }
2598 }
2599 }
2600
2601 #[cfg(test)]
2602 mod test {
2603 use super::*;
2604
2605 #[test]
2606 fn test_chain() {
2607 let a = [1, 2, 3];
2608 let b = [4, 5, 6];
2609 let mut chain = chain(a, b);
2610 assert_eq!(chain.next(), Some(1));
2611 assert_eq!(chain.next(), Some(2));
2612 assert_eq!(chain.next(), Some(3));
2613 assert_eq!(chain.next(), Some(4));
2614 assert_eq!(chain.next(), Some(5));
2615 assert_eq!(chain.next(), Some(6));
2616 assert_eq!(chain.next(), None);
2617 }
2618
2619 #[test]
2620 fn test_chain_one() {
2621 let a = [1, 2, 3];
2622 let b = 4;
2623 let mut chain = chain_one(a, b);
2624 assert_eq!(chain.next(), Some(1));
2625 assert_eq!(chain.next(), Some(2));
2626 assert_eq!(chain.next(), Some(3));
2627 assert_eq!(chain.next(), Some(4));
2628 assert_eq!(chain.next(), None);
2629 }
2630 }
2631}