columnar/
lib.rs

1//! Types supporting flat / "columnar" layout for complex types.
2//!
3//! The intent is to re-layout `Vec<T>` types into vectors of reduced
4//! complexity, repeatedly. One should be able to push and pop easily,
5//! but indexing will be more complicated because we likely won't have
6//! a real `T` lying around to return as a reference. Instead, we will
7//! use Generic Associated Types (GATs) to provide alternate references.
8
9// Re-export derive crate.
10extern crate columnar_derive;
11pub use columnar_derive::Columnar;
12
13pub mod adts;
14
15/// A type that can be represented in columnar form.
16///
17/// For a running example, a type like `(A, Vec<B>)`.
18pub trait Columnar : 'static {
19
20    /// For each lifetime, a reference with that lifetime.
21    ///
22    /// As an example, `(&'a A, &'a [B])`.
23    type Ref<'a>;
24    /// Repopulates `self` from a reference.
25    ///
26    /// By default this just calls `into_owned()`, but it can be overridden.
27    fn copy_from<'a>(&mut self, other: Self::Ref<'a>) where Self: Sized {
28        *self = Self::into_owned(other);
29    }
30    /// Produce an instance of `Self` from `Self::Ref<'a>`.
31    fn into_owned<'a>(other: Self::Ref<'a>) -> Self;
32
33    /// The type that stores the columnar representation.
34    ///
35    /// The container must support pushing both `&Self` and `Self::Ref<'_>`.
36    /// In our running example this might be `(Vec<A>, Vecs<Vec<B>>)`.
37    type Container: Len + Clear + Default + for<'a> Push<&'a Self> + for<'a> Push<Self::Ref<'a>> + Container<Self> + Clone + Send;
38
39    /// Converts a sequence of the references to the type into columnar form.
40    fn as_columns<'a, I>(selves: I) -> Self::Container where I: IntoIterator<Item =&'a Self>, Self: 'a {
41        let mut columns: Self::Container = Default::default();
42        for item in selves {
43            columns.push(item);
44        }
45        columns
46    }
47    /// Converts a sequence of the type into columnar form.
48    ///
49    /// This consumes the owned `Self` types but uses them only by reference.
50    /// Consider `as_columns()` instead if it is equally ergonomic.
51    fn into_columns<I>(selves: I) -> Self::Container where I: IntoIterator<Item = Self>, Self: Sized {
52        let mut columns: Self::Container = Default::default();
53        for item in selves {
54            columns.push(&item);
55        }
56        columns
57    }
58}
59
60/// A container that can hold `C`, and provide its preferred references.
61///
62/// As an example, `(Vec<A>, Vecs<Vec<B>>)`.
63pub trait Container<C: Columnar + ?Sized> {
64    /// The type of a borrowed container.
65    ///
66    /// Corresponding to our example, `(&'a [A], Vecs<&'a [B], &'a [u64]>)`.
67    type Borrowed<'a>: Copy + Len + AsBytes<'a> + FromBytes<'a> + Index<Ref = C::Ref<'a>> where Self: 'a;
68    /// Converts a reference to the type to a borrowed variant.
69    fn borrow<'a>(&'a self) -> Self::Borrowed<'a>;
70}
71
72pub use common::{Clear, Len, Push, IndexMut, Index, IndexAs, HeapSize, Slice, AsBytes, FromBytes};
73/// Common traits and types that are re-used throughout the module.
74pub mod common {
75
76    /// A type with a length.
77    pub trait Len {
78        /// The number of contained elements.
79        fn len(&self) -> usize;
80        /// Whether this contains no elements.
81        fn is_empty(&self) -> bool {
82            self.len() == 0
83        }
84    }
85    impl<L: Len> Len for &L {
86        #[inline(always)] fn len(&self) -> usize { L::len(*self) }
87    }
88    impl<L: Len> Len for &mut L {
89        #[inline(always)] fn len(&self) -> usize { L::len(*self) }
90    }
91    impl<T> Len for Vec<T> {
92        #[inline(always)] fn len(&self) -> usize { self.len() }
93    }
94    impl<T> Len for [T] {
95        #[inline(always)] fn len(&self) -> usize { <[T]>::len(self) }
96    }
97    impl<T> Len for &[T] {
98        #[inline(always)] fn len(&self) -> usize { <[T]>::len(self) }
99    }
100
101    /// A type that can accept items of type `T`.
102    pub trait Push<T> {
103        /// Pushes an item onto `self`.
104        fn push(&mut self, item: T);
105        /// Pushes elements of an iterator onto `self`.
106        #[inline(always)] fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
107            for item in iter {
108                self.push(item);
109            }
110        }
111    }
112    impl<T> Push<T> for Vec<T> {
113        #[inline(always)] fn push(&mut self, item: T) { self.push(item) }
114
115        #[inline(always)]
116        fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
117            std::iter::Extend::extend(self, iter)
118        }
119    }
120    impl<'a, T: Clone> Push<&'a T> for Vec<T> {
121        #[inline(always)] fn push(&mut self, item: &'a T) { self.push(item.clone()) }
122
123        #[inline(always)]
124        fn extend(&mut self, iter: impl IntoIterator<Item=&'a T>) {
125            std::iter::Extend::extend(self, iter.into_iter().cloned())
126        }
127    }
128    impl<'a, T: Clone> Push<&'a [T]> for Vec<T> {
129        #[inline(always)] fn push(&mut self, item: &'a [T]) { self.clone_from_slice(item) }
130    }
131
132
133    pub use index::{Index, IndexMut, IndexAs};
134    /// Traits for accessing elements by `usize` indexes.
135    ///
136    /// There are several traits, with a core distinction being whether the returned reference depends on the lifetime of `&self`.
137    /// For one trait `Index` the result does not depend on this lifetime.
138    /// There is a third trait `IndexMut` that allows mutable access, that may be less commonly implemented.
139    pub mod index {
140
141        use crate::Len;
142        use crate::common::IterOwn;
143
144        /// A type that can be mutably accessed by `usize`.
145        pub trait IndexMut {
146            /// Type mutably referencing an indexed element.
147            type IndexMut<'a> where Self: 'a;
148            fn get_mut(& mut self, index: usize) -> Self::IndexMut<'_>;
149            /// A reference to the last element, should one exist.
150            #[inline(always)] fn last_mut(&mut self) -> Option<Self::IndexMut<'_>> where Self: Len {
151                if self.is_empty() { None }
152                else { Some(self.get_mut(self.len()-1)) }
153            }
154        }
155
156        impl<'t, T: IndexMut + ?Sized> IndexMut for &'t mut T {
157            type IndexMut<'a> = T::IndexMut<'a> where Self: 'a;
158            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
159                T::get_mut(*self, index)
160            }
161        }
162        impl<T> IndexMut for Vec<T> {
163            type IndexMut<'a> = &'a mut T where Self: 'a;
164            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
165        }
166        impl<T> IndexMut for [T] {
167            type IndexMut<'a> = &'a mut T where Self: 'a;
168            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
169        }
170
171        /// A type that can be accessed by `usize` but without borrowing `self`.
172        ///
173        /// This can be useful for types which include their own lifetimes, and
174        /// which wish to express that their reference has the same lifetime.
175        /// In the GAT `Index`, the `Ref<'_>` lifetime would be tied to `&self`.
176        ///
177        /// This trait may be challenging to implement for owning containers,
178        /// for example `Vec<_>`, which would need their `Ref` type to depend
179        /// on the lifetime of the `&self` borrow in the `get()` function.
180        pub trait Index {
181            /// The type returned by the `get` method.
182            ///
183            /// Notably, this does not vary with lifetime, and will not depend on the lifetime of `&self`.
184            type Ref;
185            fn get(&self, index: usize) -> Self::Ref;
186            #[inline(always)] fn last(&self) -> Option<Self::Ref> where Self: Len {
187                if self.is_empty() { None }
188                else { Some(self.get(self.len()-1)) }
189            }
190            /// Converts `&self` into an iterator.
191            ///
192            /// This has an awkward name to avoid collision with `iter()`, which may also be implemented.
193            #[inline(always)]
194            fn index_iter(&self) -> IterOwn<&Self> {
195                IterOwn {
196                    index: 0,
197                    slice: self,
198                }
199            }
200            /// Converts `self` into an iterator.
201            ///
202            /// This has an awkward name to avoid collision with `into_iter()`, which may also be implemented.
203            #[inline(always)]
204            fn into_index_iter(self) -> IterOwn<Self> where Self: Sized {
205                IterOwn {
206                    index: 0,
207                    slice: self,
208                }
209            }
210        }
211
212        // These implementations aim to reveal a longer lifetime, or to copy results to avoid a lifetime.
213        impl<'a, T> Index for &'a [T] {
214            type Ref = &'a T;
215            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
216        }
217        impl<T: Copy> Index for [T] {
218            type Ref = T;
219            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
220        }
221        impl<'a, T> Index for &'a Vec<T> {
222            type Ref = &'a T;
223            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
224        }
225        impl<T: Copy> Index for Vec<T> {
226            type Ref = T;
227            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
228        }
229
230
231        /// Types that can be converted into another type by copying.
232        ///
233        /// We use this trait to unify the ability of `T` and `&T` to be converted into `T`.
234        /// This is handy for copy types that we'd like to use, like `u8`, `u64` and `usize`.
235        pub trait CopyAs<T> {
236            fn copy_as(self) -> T;
237        }
238        impl<T: Copy> CopyAs<T> for &T {
239            fn copy_as(self) -> T { *self }
240        }
241        impl<T> CopyAs<T> for T {
242            fn copy_as(self) -> T { self }
243        }
244
245        pub trait IndexAs<T> {
246            fn index_as(&self, index: usize) -> T;
247            fn last(&self) -> Option<T> where Self: Len {
248                if self.is_empty() { None }
249                else { Some(self.index_as(self.len()-1)) }
250            }
251        }
252
253        impl<T: Index, S> IndexAs<S> for T where T::Ref: CopyAs<S> {
254            fn index_as(&self, index: usize) -> S { self.get(index).copy_as() }
255        }
256    }
257
258    /// A type that can remove its contents and return to an empty state.
259    ///
260    /// Generally, this method does not release resources, and is used to make the container available for re-insertion.
261    pub trait Clear {
262        /// Clears `self`, without changing its capacity.
263        fn clear(&mut self);
264    }
265    // Vectors can be cleared.
266    impl<T> Clear for Vec<T> {
267        #[inline(always)] fn clear(&mut self) { self.clear() }
268    }
269    // Slice references can be cleared.
270    impl<'a, T> Clear for &'a [T] {
271        #[inline(always)] fn clear(&mut self) { *self = &[]; }
272    }
273
274    pub trait HeapSize {
275        /// Active (len) and allocated (cap) heap sizes in bytes.
276        /// This should not include the size of `self` itself.
277        fn heap_size(&self) -> (usize, usize) { (0, 0) }
278    }
279    impl HeapSize for serde_json::Number { }
280    impl HeapSize for String {
281        fn heap_size(&self) -> (usize, usize) {
282            (self.len(), self.capacity())
283        }
284    }
285    impl<T: HeapSize> HeapSize for [T] {
286        fn heap_size(&self) -> (usize, usize) {
287            let mut l = std::mem::size_of_val(self);
288            let mut c = std::mem::size_of_val(self);
289            for item in self.iter() {
290                let (il, ic) = item.heap_size();
291                l += il;
292                c += ic;
293            }
294            (l, c)
295        }
296    }
297    impl<T: HeapSize> HeapSize for Vec<T> {
298        fn heap_size(&self) -> (usize, usize) {
299            let mut l = std::mem::size_of::<T>() * self.len();
300            let mut c = std::mem::size_of::<T>() * self.capacity();
301            for item in (self[..]).iter() {
302                let (il, ic) = item.heap_size();
303                l += il;
304                c += ic;
305            }
306            (l, c)
307        }
308    }
309
310    /// A struct representing a slice of a range of values.
311    ///
312    /// The lower and upper bounds should be meaningfully set on construction.
313    #[derive(Copy, Clone, Debug)]
314    pub struct Slice<S> {
315        lower: usize,
316        upper: usize,
317        slice: S,
318    }
319
320    impl<S> Slice<S> {
321        pub fn slice<R: std::ops::RangeBounds<usize>>(self, range: R) -> Self {
322            use std::ops::Bound;
323            let lower = match range.start_bound() {
324                Bound::Included(s) => std::cmp::max(self.lower, *s),
325                Bound::Excluded(s) => std::cmp::max(self.lower, *s+1),
326                Bound::Unbounded => self.lower,
327            };
328            let upper = match range.end_bound() {
329                Bound::Included(s) => std::cmp::min(self.upper, *s+1),
330                Bound::Excluded(s) => std::cmp::min(self.upper, *s),
331                Bound::Unbounded => self.upper,
332            };
333            assert!(lower <= upper);
334            Self { lower, upper, slice: self.slice }
335        }
336        pub fn new(lower: u64, upper: u64, slice: S) -> Self {
337            let lower: usize = lower.try_into().expect("slice bounds must fit in `usize`");
338            let upper: usize = upper.try_into().expect("slice bounds must fit in `usize`");
339            Self { lower, upper, slice }
340        }
341        pub fn len(&self) -> usize { self.upper - self.lower }
342    }
343
344    impl<S: Index> PartialEq for Slice<S> where S::Ref: PartialEq {
345        fn eq(&self, other: &Self) -> bool {
346            if self.len() != other.len() { return false; }
347            for i in 0 .. self.len() {
348                if self.get(i) != other.get(i) { return false; }
349            }
350            true
351        }
352    }
353    impl<S: Index> PartialEq<[S::Ref]> for Slice<S> where S::Ref: PartialEq {
354        fn eq(&self, other: &[S::Ref]) -> bool {
355            if self.len() != other.len() { return false; }
356            for i in 0 .. self.len() {
357                if self.get(i) != other[i] { return false; }
358            }
359            true
360        }
361    }
362    impl<S: Index> PartialEq<Vec<S::Ref>> for Slice<S> where S::Ref: PartialEq {
363        fn eq(&self, other: &Vec<S::Ref>) -> bool {
364            if self.len() != other.len() { return false; }
365            for i in 0 .. self.len() {
366                if self.get(i) != other[i] { return false; }
367            }
368            true
369        }
370    }
371
372    impl<S: Index> Eq for Slice<S> where S::Ref: Eq { }
373
374    impl<S: Index> PartialOrd for Slice<S> where S::Ref: PartialOrd {
375        fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
376            use std::cmp::Ordering;
377            let len = std::cmp::min(self.len(), other.len());
378
379            for i in 0 .. len {
380                match self.get(i).partial_cmp(&other.get(i)) {
381                    Some(Ordering::Equal) => (),
382                    not_equal => return not_equal,
383                }
384            }
385
386            self.len().partial_cmp(&other.len())
387        }
388    }
389
390    impl<S: Index> Ord for Slice<S> where S::Ref: Ord + Eq {
391        fn cmp(&self, other: &Self) -> std::cmp::Ordering {
392            use std::cmp::Ordering;
393            let len = std::cmp::min(self.len(), other.len());
394
395            for i in 0 .. len {
396                match self.get(i).cmp(&other.get(i)) {
397                    Ordering::Equal => (),
398                    not_equal => return not_equal,
399                }
400            }
401
402            self.len().cmp(&other.len())
403        }
404    }
405
406    impl<S> Len for Slice<S> {
407        #[inline(always)] fn len(&self) -> usize { self.len() }
408    }
409
410    impl<S: Index> Index for Slice<S> {
411        type Ref = S::Ref;
412        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
413            assert!(index < self.upper - self.lower);
414            self.slice.get(self.lower + index)
415        }
416    }
417    impl<'a, S> Index for &'a Slice<S>
418    where
419        &'a S : Index,
420    {
421        type Ref = <&'a S as Index>::Ref;
422        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
423            assert!(index < self.upper - self.lower);
424            (&self.slice).get(self.lower + index)
425        }
426    }
427
428    impl<S: IndexMut> IndexMut for Slice<S> {
429        type IndexMut<'a> = S::IndexMut<'a> where S: 'a;
430        #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
431            assert!(index < self.upper - self.lower);
432            self.slice.get_mut(self.lower + index)
433        }
434    }
435
436    impl<S: Index + Len> IntoIterator for Slice<S> {
437        type Item = S::Ref;
438        type IntoIter = IterOwn<Slice<S>>;
439        fn into_iter(self) -> Self::IntoIter {
440            self.into_index_iter()
441        }
442    }
443
444    pub struct IterOwn<S> {
445        index: usize,
446        slice: S,
447    }
448
449    impl<S> IterOwn<S> {
450        pub fn new(index: usize, slice: S) -> Self {
451            Self { index, slice }
452        }
453    }
454
455    impl<S: Index + Len> Iterator for IterOwn<S> {
456        type Item = S::Ref;
457        #[inline(always)] fn next(&mut self) -> Option<Self::Item> {
458            if self.index < self.slice.len() {
459                let result = self.slice.get(self.index);
460                self.index += 1;
461                Some(result)
462            } else {
463                None
464            }
465        }
466    }
467
468    /// A type that can be viewed as byte slices with lifetime `'a`.
469    ///
470    /// Implementors of this trait almost certainly reference the lifetime `'a` themselves.
471    pub trait AsBytes<'a> {
472        /// Presents `self` as a sequence of byte slices, with their required alignment.
473        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])>;
474    }
475
476    /// A type that can be reconstituted from byte slices with lifetime `'a`.
477    ///
478    /// Implementors of this trait almost certainly reference the lifetime `'a` themselves,
479    /// unless they actively deserialize the bytes (vs sit on the slices, as if zero-copy).
480    pub trait FromBytes<'a> {
481        /// Reconstructs `self` from a sequence of correctly aligned and sized bytes slices.
482        ///
483        /// The implementation is expected to consume the right number of items from the iterator,
484        /// which may go on to be used by other implementations of `FromBytes`.
485        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self;
486    }
487
488}
489
490/// Logic related to the transformation to and from bytes.
491///
492/// The methods here line up with the `AsBytes` and `FromBytes` traits.
493pub mod bytes {
494
495    use crate::AsBytes;
496
497    /// A coupled encode/decode pair for byte sequences.
498    pub trait EncodeDecode {
499        /// Encoded length in number of `u64` words required.
500        fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a>;
501        /// Encoded length in number of `u8` bytes required.
502        ///
503        /// This method should always be eight times `Self::length_in_words`, and is provided for convenience and clarity.
504        fn length_in_bytes<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> { 8 * Self::length_in_words(bytes) }
505        /// Encodes `bytes` into a sequence of `u64`.
506        fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a>;
507        /// Writes `bytes` in the encoded format to an arbitrary writer.
508        fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a>;
509        /// Decodes bytes from a sequence of `u64`.
510        fn decode<'a>(store: &'a [u64]) -> impl Iterator<Item=&'a [u8]>;
511    }
512
513    /// A sequential byte layout for `AsBytes` and `FromBytes` implementors.
514    ///
515    /// The layout is aligned like a sequence of `u64`, where we repeatedly announce a length,
516    /// and then follow it by that many bytes. We may need to follow this with padding bytes.
517    pub use serialization::Sequence;
518    mod serialization {
519
520        use crate::AsBytes;
521
522        /// Encodes and decodes bytes sequences, by prepending the length and appending the all sequences.
523        pub struct Sequence;
524        impl super::EncodeDecode for Sequence {
525            fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
526                // Each byte slice has one `u64` for the length, and then as many `u64`s as needed to hold all bytes.
527                bytes.as_bytes().map(|(_align, bytes)| 1 + (bytes.len() + 7)/8).sum()
528            }
529            fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
530                encode(store, bytes.as_bytes())
531            }
532            fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
533                write(writer, bytes.as_bytes())
534            }
535            fn decode<'a>(store: &'a [u64]) -> impl Iterator<Item=&'a [u8]> {
536                decode(store)
537            }
538        }
539
540        /// Encodes a sequence of byte slices as their length followed by their bytes, aligned to 8 bytes.
541        ///
542        /// Each length will be exactly 8 bytes, and the bytes that follow are padded out to a multiple of 8 bytes.
543        /// When reading the data, the length is in bytes, and one should consume those bytes and advance over padding.
544        pub fn encode<'a>(store: &mut Vec<u64>, bytes: impl Iterator<Item=(u64, &'a [u8])>) {
545            for (align, bytes) in bytes {
546                assert!(align <= 8);
547                store.push(bytes.len() as u64);
548                let whole_words = 8 * (bytes.len() / 8);
549                // We want to extend `store` by `bytes`, but `bytes` may not be `u64` aligned.
550                // In the latter case, init `store` and cast and copy onto it as a byte slice.
551                if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
552                    store.extend_from_slice(words);
553                }
554                else {
555                    let store_len = store.len();
556                    store.resize(store_len + whole_words/8, 0);
557                    let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
558                    slice.copy_from_slice(&bytes[.. whole_words]);
559                }
560                let remaining_bytes = &bytes[whole_words..];
561                if !remaining_bytes.is_empty() {
562                    let mut remainder = 0u64;
563                    let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
564                    for (i, byte) in remaining_bytes.iter().enumerate() {
565                        transmute[i] = *byte;
566                    }
567                    store.push(remainder);
568                }
569            }
570        }
571
572        /// Writes a sequence of byte slices as their length followed by their bytes, padded to 8 bytes.
573        ///
574        /// Each length will be exactly 8 bytes, and the bytes that follow are padded out to a multiple of 8 bytes.
575        /// When reading the data, the length is in bytes, and one should consume those bytes and advance over padding.
576        pub fn write<'a>(mut writer: impl std::io::Write, bytes: impl Iterator<Item=(u64, &'a [u8])>) -> std::io::Result<()> {
577            // Columnar data is serialized as a sequence of `u64` values, with each `[u8]` slice
578            // serialize as first its length in bytes, and then as many `u64` values as needed.
579            // Padding should be added, but only for alignment; no specific values are required.
580            for (align, bytes) in bytes {
581                assert!(align <= 8);
582                let length = u64::try_from(bytes.len()).unwrap();
583                writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&length)))?;
584                writer.write_all(bytes)?;
585                let padding = usize::try_from((8 - (length % 8)) % 8).unwrap();
586                writer.write_all(&[0; 8][..padding])?;
587            }
588            Ok(())
589        }
590
591        /// Decodes a sequence of byte slices from their length followed by their bytes.
592        ///
593        /// This decoder matches the `encode` function above.
594        /// In particular, it anticipates padding bytes when the length is not a multiple of eight.
595        pub fn decode(store: &[u64]) -> Decoder<'_> {
596            Decoder { store }
597        }
598
599        /// An iterator over byte slices, decoding from a sequence of lengths followed by bytes.
600        pub struct Decoder<'a> {
601            store: &'a [u64],
602        }
603
604        impl<'a> Iterator for Decoder<'a> {
605            type Item = &'a [u8];
606            fn next(&mut self) -> Option<Self::Item> {
607                if let Some(length) = self.store.first() {
608                    let length = *length as usize;
609                    self.store = &self.store[1..];
610                    let whole_words = if length % 8 == 0 { length / 8 } else { length / 8 + 1 };
611                    let bytes: &[u8] = bytemuck::try_cast_slice(&self.store[..whole_words]).expect("&[u64] should convert to &[u8]");
612                    self.store = &self.store[whole_words..];
613                    Some(&bytes[..length])
614                } else {
615                    None
616                }
617            }
618        }
619    }
620
621    /// A binary encoding of sequences of byte slices.
622    ///
623    /// The encoding starts with a sequence of n+1 offsets describing where to find the n slices in the bytes that follow.
624    /// Treating the offsets as a byte slice too, the each offset indicates the location (in bytes) of the end of its slice.
625    /// Each byte slice can be found from a pair of adjacent offsets, where the first is rounded up to a multiple of eight.
626    pub use serialization_neu::Indexed;
627    pub mod serialization_neu {
628
629        use crate::AsBytes;
630
631        /// Encodes and decodes bytes sequences, using an index of offsets.
632        pub struct Indexed;
633        impl super::EncodeDecode for Indexed {
634            fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
635                1 + bytes.as_bytes().map(|(_align, bytes)| 1 + (bytes.len() + 7)/8).sum::<usize>()
636            }
637            fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
638                encode(store, bytes)
639            }
640            fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
641                write(writer, bytes)
642            }
643            fn decode<'a>(store: &'a [u64]) -> impl Iterator<Item=&'a [u8]> {
644                decode(store)
645            }
646        }
647
648        /// Encodes `item` into `u64` aligned words.
649        ///
650        /// The sequence of byte slices are appended, with padding to have each slice start `u64` aligned.
651        /// The sequence is then pre-pended with as many byte offsets as there are slices in `item`, plus one.
652        /// The byte offsets indicate where each slice ends, and by rounding up to `u64` alignemnt where the next slice begins.
653        /// The first offset indicates where the list of offsets itself ends, and where the first slice begins.
654        ///
655        /// We will need to visit `as_bytes` three times to extract this information, so the method should be efficient and inlined.
656        /// The first read writes the first offset, the second writes each other offset, and the third writes the bytes themselves.
657        ///
658        /// The offsets are zero-based, rather than based on `store.len()`.
659        /// If you call the method with a non-empty `store` be careful decoding.
660        pub fn encode<'a, A>(store: &mut Vec<u64>, iter: &A)
661        where A : AsBytes<'a>,
662        {
663            // Read 1: Number of offsets we will record, equal to the number of slices plus one.
664            // TODO: right-size `store` before first call to `push`.
665            let offsets = 1 + iter.as_bytes().count();
666            let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
667            store.push(offsets_end);
668            // Read 2: Establish each of the offsets based on lengths of byte slices.
669            let mut position_bytes = offsets_end;
670            for (align, bytes) in iter.as_bytes() {
671                assert!(align <= 8);
672                // Write length in bytes, but round up to words before updating `position_bytes`.
673                let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
674                store.push(to_push);
675                let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
676                position_bytes += round_len;
677            }
678            // Read 3: Append each byte slice, with padding to align starts to `u64`.
679            for (_align, bytes) in iter.as_bytes() {
680                let whole_words = 8 * (bytes.len() / 8);
681                // We want to extend `store` by `bytes`, but `bytes` may not be `u64` aligned.
682                // In the latter case, init `store` and cast and copy onto it as a byte slice.
683                if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
684                    store.extend_from_slice(words);
685                }
686                else {
687                    let store_len = store.len();
688                    store.resize(store_len + whole_words/8, 0);
689                    let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
690                    slice.copy_from_slice(&bytes[.. whole_words]);
691                }
692                let remaining_bytes = &bytes[whole_words..];
693                if !remaining_bytes.is_empty() {
694                    let mut remainder = 0u64;
695                    let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
696                    for (i, byte) in remaining_bytes.iter().enumerate() {
697                        transmute[i] = *byte;
698                    }
699                    store.push(remainder);
700                }
701            }
702        }
703
704        pub fn write<'a, A, W>(mut writer: W, iter: &A) -> std::io::Result<()>
705        where
706            A: AsBytes<'a>,
707            W: std::io::Write,
708        {
709            // Read 1: Number of offsets we will record, equal to the number of slices plus one.
710            let offsets = 1 + iter.as_bytes().count();
711            let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
712            writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&offsets_end)))?;
713            // Read 2: Establish each of the offsets based on lengths of byte slices.
714            let mut position_bytes = offsets_end;
715            for (align, bytes) in iter.as_bytes() {
716                assert!(align <= 8);
717                // Write length in bytes, but round up to words before updating `position_bytes`.
718                let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
719                writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&to_push)))?;
720                let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
721                position_bytes += round_len;
722            }
723            // Read 3: Append each byte slice, with padding to align starts to `u64`.
724            for (_align, bytes) in iter.as_bytes() {
725                writer.write_all(bytes)?;
726                let padding = ((bytes.len() + 7) & !7) - bytes.len();
727                if padding > 0 {
728                    writer.write_all(&[0u8;8][..padding])?;
729                }
730            }
731
732            Ok(())
733        }
734
735        /// Decodes an encoded sequence of byte slices. Each result will be `u64` aligned.
736        pub fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
737            assert!(store[0] % 8 == 0);
738            let slices = (store[0] / 8) - 1;
739            (0 .. slices).map(|i| decode_index(store, i))
740        }
741
742        /// Decodes a specific byte slice by index. It will be `u64` aligned.
743        #[inline(always)]
744        pub fn decode_index(store: &[u64], index: u64) -> &[u8] {
745            debug_assert!(index + 1 < store[0]/8);
746            let index: usize = index.try_into().unwrap();
747            let lower: usize = ((store[index] + 7) & !7).try_into().unwrap();
748            let upper: usize = (store[index + 1]).try_into().unwrap();
749            let bytes: &[u8] = bytemuck::try_cast_slice(store).expect("&[u64] should convert to &[u8]");
750            &bytes[lower .. upper]
751        }
752
753        #[cfg(test)]
754        mod test {
755
756            use crate::{Columnar, Container};
757            use crate::common::Push;
758            use crate::AsBytes;
759
760            use super::{encode, decode};
761
762            fn assert_roundtrip<'a, AB: AsBytes<'a>>(item: &AB) {
763                let mut store = Vec::new();
764                encode(&mut store, item);
765                assert!(item.as_bytes().map(|x| x.1).eq(decode(&store)));
766            }
767
768            #[test]
769            fn round_trip() {
770
771                let mut column: <Result<u64, String> as Columnar>::Container = Default::default();
772                for i in 0..10000u64 {
773                    column.push(&Ok::<u64, String>(i));
774                    column.push(&Err::<u64, String>(format!("{:?}", i)));
775                }
776
777                assert_roundtrip(&column.borrow());
778            }
779        }
780    }
781
782    #[cfg(test)]
783    mod test {
784        #[test]
785        fn round_trip() {
786
787            use crate::{Columnar, Container};
788            use crate::common::{Push, HeapSize, Len, Index};
789            use crate::{AsBytes, FromBytes};
790
791            let mut column: <Result<u64, u64> as Columnar>::Container = Default::default();
792            for i in 0..100u64 {
793                column.push(Ok::<u64, u64>(i));
794                column.push(Err::<u64, u64>(i));
795            }
796
797            assert_eq!(column.len(), 200);
798            assert_eq!(column.heap_size(), (1624, 2080));
799
800            for i in 0..100 {
801                assert_eq!(column.get(2*i+0), Ok(i as u64));
802                assert_eq!(column.get(2*i+1), Err(i as u64));
803            }
804
805            let column2 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column.borrow().as_bytes().map(|(_, bytes)| bytes));
806            for i in 0..100 {
807                assert_eq!(column.get(2*i+0), column2.get(2*i+0).copied().map_err(|e| *e));
808                assert_eq!(column.get(2*i+1), column2.get(2*i+1).copied().map_err(|e| *e));
809            }
810
811            let column3 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column2.as_bytes().map(|(_, bytes)| bytes));
812            for i in 0..100 {
813                assert_eq!(column3.get(2*i+0), column2.get(2*i+0));
814                assert_eq!(column3.get(2*i+1), column2.get(2*i+1));
815            }
816        }
817    }
818}
819
820/// Types that prefer to be represented by `Vec<T>`.
821pub mod primitive {
822
823    /// An implementation of opinions for types that want to use `Vec<T>`.
824    macro_rules! implement_columnable {
825        ($($index_type:ty),*) => { $(
826            impl crate::Columnar for $index_type {
827                type Ref<'a> = &'a $index_type;
828                fn into_owned<'a>(other: Self::Ref<'a>) -> Self { *other }
829
830                type Container = Vec<$index_type>;
831            }
832            impl crate::Container<$index_type> for Vec<$index_type> {
833                type Borrowed<'a> = &'a [$index_type];
834                fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { &self[..] }
835            }
836
837            impl crate::HeapSize for $index_type { }
838
839            impl<'a> crate::AsBytes<'a> for &'a [$index_type] {
840                #[inline(always)]
841                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
842                    std::iter::once((std::mem::align_of::<$index_type>() as u64, bytemuck::cast_slice(&self[..])))
843                }
844            }
845            impl<'a> crate::FromBytes<'a> for &'a [$index_type] {
846                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
847                    // We use `unwrap()` here in order to panic with the `bytemuck` error, which may be informative.
848                    bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()
849                }
850            }
851        )* }
852    }
853
854    implement_columnable!(u8, u16, u32, u64, u128);
855    implement_columnable!(i8, i16, i32, i64, i128);
856    implement_columnable!(f32, f64);
857
858    pub use sizes::{Usizes, Isizes};
859    /// Columnar stores for `usize` and `isize`, stored as 64 bits.
860    mod sizes {
861
862        use crate::{Clear, Columnar, Len, IndexMut, Index, IndexAs, Push, HeapSize};
863
864        #[derive(Copy, Clone, Default)]
865        pub struct Usizes<CV = Vec<u64>> { pub values: CV }
866
867        impl Columnar for usize {
868            type Ref<'a> = usize;
869            fn into_owned<'a>(other: Self::Ref<'a>) -> Self { other }
870            type Container = Usizes;
871        }
872
873        impl<CV: crate::Container<u64>> crate::Container<usize> for Usizes<CV> {
874            type Borrowed<'a> = Usizes<CV::Borrowed<'a>> where CV: 'a;
875            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
876                Usizes { values: self.values.borrow() }
877            }
878        }
879
880        impl<CV: Len> Len for Usizes<CV> { fn len(&self) -> usize { self.values.len() }}
881        impl IndexMut for Usizes {
882            type IndexMut<'a> = &'a mut u64;
883            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
884        }
885        impl<CV: IndexAs<u64>> Index for Usizes<CV> {
886            type Ref = usize;
887            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
888        }
889        impl<'a, CV: IndexAs<u64>> Index for &'a Usizes<CV> {
890            type Ref = usize;
891            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
892        }
893        impl Push<usize> for Usizes {
894            #[inline]
895            fn push(&mut self, item: usize) { self.values.push(item.try_into().expect("usize must fit in a u64")) }
896        }
897        impl Push<&usize> for Usizes {
898            #[inline]
899            fn push(&mut self, item: &usize) { self.values.push((*item).try_into().expect("usize must fit in a u64")) }
900        }
901        impl<CV: Clear> Clear for Usizes<CV> { fn clear(&mut self) { self.values.clear() }}
902
903        impl<CV: HeapSize> HeapSize for Usizes<CV> {
904            fn heap_size(&self) -> (usize, usize) {
905                self.values.heap_size()
906            }
907        }
908
909        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Usizes<CV> {
910            #[inline(always)]
911            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
912                self.values.as_bytes()
913            }
914        }
915
916        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Usizes<CV> {
917            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
918                Self { values: CV::from_bytes(bytes) }
919            }
920        }
921
922
923        #[derive(Copy, Clone, Default)]
924        pub struct Isizes<CV = Vec<i64>> { pub values: CV }
925
926        impl Columnar for isize {
927            type Ref<'a> = isize;
928            fn into_owned<'a>(other: Self::Ref<'a>) -> Self { other }
929            type Container = Isizes;
930        }
931
932        impl<CV: crate::Container<i64>> crate::Container<isize> for Isizes<CV> {
933            type Borrowed<'a> = Isizes<CV::Borrowed<'a>> where CV: 'a;
934            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
935                Isizes { values: self.values.borrow() }
936            }
937        }
938
939        impl<CV: Len> Len for Isizes<CV> { fn len(&self) -> usize { self.values.len() }}
940        impl IndexMut for Isizes {
941            type IndexMut<'a> = &'a mut i64;
942            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
943        }
944        impl<CV: IndexAs<i64>> Index for Isizes<CV> {
945            type Ref = isize;
946            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
947        }
948        impl<'a, CV: IndexAs<i64>> Index for &'a Isizes<CV> {
949            type Ref = isize;
950            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
951        }
952        impl Push<isize> for Isizes {
953            #[inline]
954            fn push(&mut self, item: isize) { self.values.push(item.try_into().expect("isize must fit in a i64")) }
955        }
956        impl Push<&isize> for Isizes {
957            #[inline]
958            fn push(&mut self, item: &isize) { self.values.push((*item).try_into().expect("isize must fit in a i64")) }
959        }
960        impl<CV: Clear> Clear for Isizes<CV> { fn clear(&mut self) { self.values.clear() }}
961
962        impl<CV: HeapSize> HeapSize for Isizes<CV> {
963            fn heap_size(&self) -> (usize, usize) {
964                self.values.heap_size()
965            }
966        }
967
968        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Isizes<CV> {
969            #[inline(always)]
970            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
971                self.values.as_bytes()
972            }
973        }
974
975        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Isizes<CV> {
976            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
977                Self { values: CV::from_bytes(bytes) }
978            }
979        }
980    }
981
982    pub use empty::Empties;
983    /// A columnar store for `()`.
984    mod empty {
985
986        use crate::common::index::CopyAs;
987        use crate::{Clear, Columnar, Len, IndexMut, Index, Push, HeapSize};
988
989        #[derive(Copy, Clone, Debug, Default)]
990        pub struct Empties<CC = u64> { pub count: CC, pub empty: () }
991
992        impl Columnar for () {
993            type Ref<'a> = ();
994            fn into_owned<'a>(_other: Self::Ref<'a>) -> Self { () }
995            type Container = Empties;
996        }
997
998        impl crate::Container<()> for Empties {
999            type Borrowed<'a> = Empties<&'a u64>;
1000            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Empties { count: &self.count, empty: () } }
1001        }
1002
1003        impl<CC: CopyAs<u64> + Copy> Len for Empties<CC> {
1004            fn len(&self) -> usize { self.count.copy_as() as usize }
1005        }
1006        impl<CC> IndexMut for Empties<CC> {
1007            type IndexMut<'a> = &'a mut () where CC: 'a;
1008            // TODO: panic if out of bounds?
1009            #[inline(always)] fn get_mut(&mut self, _index: usize) -> Self::IndexMut<'_> { &mut self.empty }
1010        }
1011        impl<CC> Index for Empties<CC> {
1012            type Ref = ();
1013            fn get(&self, _index: usize) -> Self::Ref { () }
1014        }
1015        impl<'a, CC> Index for &'a Empties<CC> {
1016            type Ref = &'a ();
1017            fn get(&self, _index: usize) -> Self::Ref { &() }
1018        }
1019        impl Push<()> for Empties {
1020            // TODO: check for overflow?
1021            #[inline(always)]
1022            fn push(&mut self, _item: ()) { self.count += 1; }
1023        }
1024        impl Push<&()> for Empties {
1025            // TODO: check for overflow?
1026            #[inline(always)]
1027            fn push(&mut self, _item: &()) { self.count += 1; }
1028        }
1029
1030        impl HeapSize for Empties {
1031            fn heap_size(&self) -> (usize, usize) { (0, 0) }
1032        }
1033        impl Clear for Empties {
1034            fn clear(&mut self) { self.count = 0; }
1035        }
1036
1037        impl<'a> crate::AsBytes<'a> for crate::primitive::Empties<&'a u64> {
1038            #[inline(always)]
1039            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1040                std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.count))))
1041            }
1042        }
1043        impl<'a> crate::FromBytes<'a> for crate::primitive::Empties<&'a u64> {
1044            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1045                Self { count: &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0], empty: () }
1046            }
1047        }
1048    }
1049
1050    pub use boolean::Bools;
1051    /// A columnar store for `bool`.
1052    mod boolean {
1053
1054        use crate::common::index::CopyAs;
1055        use crate::{Clear, Len, Index, IndexAs, Push, HeapSize};
1056
1057        /// A store for maintaining `Vec<bool>`.
1058        #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1059        pub struct Bools<VC = Vec<u64>, WC = u64> {
1060            /// The bundles of bits that form complete `u64` values.
1061            pub values: VC,
1062            /// The work-in-progress bits that are not yet complete.
1063            pub last_word: WC,
1064            /// The number of set bits in `bits.last()`.
1065            pub last_bits: WC,
1066        }
1067
1068        impl crate::Columnar for bool {
1069            type Ref<'a> = bool;
1070            fn into_owned<'a>(other: Self::Ref<'a>) -> Self { other }
1071            type Container = Bools;
1072        }
1073
1074        impl<VC: crate::Container<u64>> crate::Container<bool> for Bools<VC> {
1075            type Borrowed<'a> = Bools<VC::Borrowed<'a>, &'a u64> where VC: 'a;
1076            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1077                Bools {
1078                    values: self.values.borrow(),
1079                    last_word: &self.last_word,
1080                    last_bits: &self.last_bits,
1081                }
1082            }
1083        }
1084
1085        impl<'a, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1086            #[inline(always)]
1087            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1088                let iter = self.values.as_bytes();
1089                let iter = crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_word))));
1090                crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_bits))))
1091            }
1092        }
1093
1094        impl<'a, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1095            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1096                let values = crate::FromBytes::from_bytes(bytes);
1097                let last_word = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1098                let last_bits = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1099                Self { values, last_word, last_bits }
1100            }
1101        }
1102
1103        impl<VC: Len, WC: Copy + CopyAs<u64>> Len for Bools<VC, WC> {
1104            #[inline(always)] fn len(&self) -> usize { self.values.len() * 64 + (self.last_bits.copy_as() as usize) }
1105        }
1106
1107        impl<VC: Len + IndexAs<u64>, WC: Copy + CopyAs<u64>> Index for Bools<VC, WC> {
1108            type Ref = bool;
1109            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1110                let block = index / 64;
1111                let word = if block == self.values.len() {
1112                    self.last_word.copy_as()
1113                } else {
1114                    self.values.index_as(block)
1115                };
1116                let bit = index % 64;
1117                (word >> bit) & 1 == 1
1118            }
1119        }
1120
1121        impl<VC: Len + IndexAs<u64>, WC: Copy + CopyAs<u64>> Index for &Bools<VC, WC> {
1122            type Ref = bool;
1123            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1124                (*self).get(index)
1125            }
1126        }
1127
1128        impl<VC: Push<u64>> Push<bool> for Bools<VC> {
1129            #[inline]
1130            fn push(&mut self, bit: bool) {
1131                self.last_word |= (bit as u64) << self.last_bits;
1132                self.last_bits += 1;
1133                // If we have a fully formed word, commit it to `self.values`.
1134                if self.last_bits == 64 {
1135                    self.values.push(self.last_word);
1136                    self.last_word = 0;
1137                    self.last_bits = 0;
1138                }
1139            }
1140        }
1141        impl<'a, VC: Push<u64>> Push<&'a bool> for Bools<VC> {
1142            #[inline(always)]
1143            fn push(&mut self, bit: &'a bool) {
1144                self.push(*bit)
1145            }
1146        }
1147
1148
1149        impl<VC: Clear> Clear for Bools<VC> {
1150            fn clear(&mut self) {
1151                self.values.clear();
1152                self.last_word = 0;
1153                self.last_bits = 0;
1154            }
1155        }
1156
1157        impl<VC: HeapSize> HeapSize for Bools<VC> {
1158            fn heap_size(&self) -> (usize, usize) {
1159                self.values.heap_size()
1160            }
1161        }
1162    }
1163
1164    pub use duration::Durations;
1165    /// A columnar store for `std::time::Duration`.
1166    mod duration {
1167
1168        use std::time::Duration;
1169        use crate::{Len, Index, IndexAs, Push, Clear, HeapSize};
1170
1171        // `std::time::Duration` is equivalent to `(u64, u32)`, corresponding to seconds and nanoseconds.
1172        #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1173        pub struct Durations<SC = Vec<u64>, NC = Vec<u32>> {
1174            pub seconds: SC,
1175            pub nanoseconds: NC,
1176        }
1177
1178        impl crate::Columnar for Duration {
1179            type Ref<'a> = Duration;
1180            fn into_owned<'a>(other: Self::Ref<'a>) -> Self { other }
1181            type Container = Durations;
1182        }
1183
1184        impl<SC: crate::Container<u64>, NC: crate::Container<u32>> crate::Container<Duration> for Durations<SC, NC> {
1185            type Borrowed<'a> = Durations<SC::Borrowed<'a>, NC::Borrowed<'a>> where SC: 'a, NC: 'a;
1186            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1187                Durations {
1188                    seconds: self.seconds.borrow(),
1189                    nanoseconds: self.nanoseconds.borrow(),
1190                }
1191            }
1192        }
1193
1194        impl<'a, SC: crate::AsBytes<'a>, NC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Durations<SC, NC> {
1195            #[inline(always)]
1196            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1197                crate::chain(self.seconds.as_bytes(), self.nanoseconds.as_bytes())
1198            }
1199        }
1200        impl<'a, SC: crate::FromBytes<'a>, NC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Durations<SC, NC> {
1201            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1202                Self {
1203                    seconds: crate::FromBytes::from_bytes(bytes),
1204                    nanoseconds: crate::FromBytes::from_bytes(bytes),
1205                }
1206            }
1207        }
1208
1209        impl<SC: Len, NC> Len for Durations<SC, NC> {
1210            #[inline(always)] fn len(&self) -> usize { self.seconds.len() }
1211        }
1212
1213        impl<SC: IndexAs<u64>, NC: IndexAs<u32>> Index for Durations<SC, NC> {
1214            type Ref = Duration;
1215            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1216                Duration::new(self.seconds.index_as(index), self.nanoseconds.index_as(index))
1217            }
1218        }
1219
1220        impl<SC: Push<u64>, NC: Push<u32>> Push<std::time::Duration> for Durations<SC, NC> {
1221            #[inline]
1222            fn push(&mut self, item: std::time::Duration) {
1223                self.seconds.push(item.as_secs());
1224                self.nanoseconds.push(item.subsec_nanos());
1225            }
1226        }
1227        impl<'a, SC: Push<u64>, NC: Push<u32>> Push<&'a std::time::Duration> for Durations<SC, NC> {
1228            #[inline]
1229            fn push(&mut self, item: &'a std::time::Duration) {
1230                self.push(*item)
1231            }
1232        }
1233        impl<'a, SC: Push<&'a u64>, NC: Push<&'a u32>> Push<(&'a u64, &'a u32)> for Durations<SC, NC> {
1234            #[inline]
1235            fn push(&mut self, item: (&'a u64, &'a u32)) {
1236                self.seconds.push(item.0);
1237                self.nanoseconds.push(item.1);
1238            }
1239        }
1240
1241        impl<SC: Clear, NC: Clear> Clear for Durations<SC, NC> {
1242            fn clear(&mut self) {
1243                self.seconds.clear();
1244                self.nanoseconds.clear();
1245            }
1246        }
1247
1248        impl<SC: HeapSize, NC: HeapSize> HeapSize for Durations<SC, NC> {
1249            fn heap_size(&self) -> (usize, usize) {
1250                let (l0, c0) = self.seconds.heap_size();
1251                let (l1, c1) = self.nanoseconds.heap_size();
1252                (l0 + l1, c0 + c1)
1253            }
1254        }
1255    }
1256}
1257
1258pub use string::Strings;
1259pub mod string {
1260
1261    use super::{Clear, Columnar, Len, Index, IndexAs, Push, HeapSize};
1262
1263    /// A stand-in for `Vec<String>`.
1264    #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1265    pub struct Strings<BC = Vec<u64>, VC = Vec<u8>> {
1266        /// Bounds container; provides indexed access to offsets.
1267        pub bounds: BC,
1268        /// Values container; provides slice access to bytes.
1269        pub values: VC,
1270    }
1271
1272    impl Columnar for String {
1273        type Ref<'a> = &'a str;
1274        fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
1275            self.clear();
1276            self.push_str(other);
1277        }
1278        fn into_owned<'a>(other: Self::Ref<'a>) -> Self { other.to_string() }
1279        type Container = Strings;
1280    }
1281
1282    impl<'b, BC: crate::Container<u64>> crate::Container<String> for Strings<BC, &'b [u8]> {
1283        type Borrowed<'a> = Strings<BC::Borrowed<'a>, &'a [u8]> where BC: 'a, 'b: 'a;
1284        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1285            Strings {
1286                bounds: self.bounds.borrow(),
1287                values: self.values,
1288            }
1289        }
1290    }
1291    impl<BC: crate::Container<u64>> crate::Container<String> for Strings<BC, Vec<u8>> {
1292        type Borrowed<'a> = Strings<BC::Borrowed<'a>, &'a [u8]> where BC: 'a;
1293        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1294            Strings {
1295                bounds: self.bounds.borrow(),
1296                values: self.values.borrow(),
1297            }
1298        }
1299    }
1300
1301    impl<'a, BC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Strings<BC, VC> {
1302        #[inline(always)]
1303        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1304            crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
1305        }
1306    }
1307    impl<'a, BC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Strings<BC, VC> {
1308        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1309            Self {
1310                bounds: crate::FromBytes::from_bytes(bytes),
1311                values: crate::FromBytes::from_bytes(bytes),
1312            }
1313        }
1314    }
1315
1316    impl<BC: Len, VC> Len for Strings<BC, VC> {
1317        #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
1318    }
1319
1320    impl<'a, BC: Len+IndexAs<u64>> Index for Strings<BC, &'a [u8]> {
1321        type Ref = &'a str;
1322        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1323            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1324            let upper = self.bounds.index_as(index);
1325            let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
1326            let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
1327            std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
1328        }
1329    }
1330    impl<'a, BC: Len+IndexAs<u64>> Index for &'a Strings<BC, Vec<u8>> {
1331        type Ref = &'a str;
1332        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1333            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1334            let upper = self.bounds.index_as(index);
1335            let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
1336            let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
1337            std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
1338        }
1339    }
1340
1341    impl<BC: Push<u64>> Push<&String> for Strings<BC> {
1342        #[inline(always)] fn push(&mut self, item: &String) {
1343            self.values.extend_from_slice(item.as_bytes());
1344            self.bounds.push(self.values.len() as u64);
1345        }
1346    }
1347    impl<BC: Push<u64>> Push<&str> for Strings<BC> {
1348        #[inline]
1349        fn push(&mut self, item: &str) {
1350            self.values.extend_from_slice(item.as_bytes());
1351            self.bounds.push(self.values.len() as u64);
1352        }
1353    }
1354    impl<'a, BC: Push<u64>> Push<std::fmt::Arguments<'a>> for Strings<BC> {
1355        fn push(&mut self, item: std::fmt::Arguments<'a>) {
1356            use std::io::Write;
1357            self.values.write_fmt(item).expect("write_fmt failed");
1358            self.bounds.push(self.values.len() as u64);
1359        }
1360    }
1361    impl<'a, 'b, BC: Push<u64>> Push<&'b std::fmt::Arguments<'a>> for Strings<BC> {
1362        fn push(&mut self, item: &'b std::fmt::Arguments<'a>) {
1363            use std::io::Write;
1364            self.values.write_fmt(*item).expect("write_fmt failed");
1365            self.bounds.push(self.values.len() as u64);
1366        }
1367    }
1368    impl<BC: Clear, VC: Clear> Clear for Strings<BC, VC> {
1369        fn clear(&mut self) {
1370            self.bounds.clear();
1371            self.values.clear();
1372        }
1373    }
1374    impl<BC: HeapSize, VC: HeapSize> HeapSize for Strings<BC, VC> {
1375        fn heap_size(&self) -> (usize, usize) {
1376            let (l0, c0) = self.bounds.heap_size();
1377            let (l1, c1) = self.values.heap_size();
1378            (l0 + l1, c0 + c1)
1379        }
1380    }
1381}
1382
1383pub use vector::Vecs;
1384pub mod vector {
1385
1386    use super::{Clear, Columnar, Len, IndexMut, Index, IndexAs, Push, HeapSize, Slice};
1387
1388    /// A stand-in for `Vec<Vec<T>>` for complex `T`.
1389    #[derive(Debug, Default, Copy, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
1390    pub struct Vecs<TC, BC = Vec<u64>> {
1391        pub bounds: BC,
1392        pub values: TC,
1393    }
1394
1395    impl<T: Columnar> Columnar for Vec<T> {
1396        type Ref<'a> = Slice<<T::Container as crate::Container<T>>::Borrowed<'a>> where T: 'a;
1397        fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
1398            self.truncate(other.len());
1399            let mut other_iter = other.into_iter();
1400            for (s, o) in self.iter_mut().zip(&mut other_iter) {
1401                T::copy_from(s, o);
1402            }
1403            for o in other_iter {
1404                self.push(T::into_owned(o));
1405            }
1406        }
1407        fn into_owned<'a>(other: Self::Ref<'a>) -> Self {
1408            other.into_iter().map(|x| T::into_owned(x)).collect()
1409        }
1410        type Container = Vecs<T::Container>;
1411    }
1412
1413    impl<T: Columnar, const N: usize> Columnar for [T; N] {
1414        type Ref<'a> = Slice<<T::Container as crate::Container<T>>::Borrowed<'a>> where T: 'a;
1415        fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
1416            for (s, o) in self.iter_mut().zip(other.into_iter()) {
1417                T::copy_from(s, o);
1418            }
1419        }
1420        fn into_owned<'a>(other: Self::Ref<'a>) -> Self {
1421            let vec: Vec<_> = other.into_iter().map(|x| T::into_owned(x)).collect();
1422            match vec.try_into() {
1423                Ok(array) => array,
1424                Err(_) => panic!("wrong length"),
1425            }
1426        }
1427        type Container = Vecs<T::Container>;
1428    }
1429
1430    impl<T: Columnar<Container = TC>, BC: crate::Container<u64>, TC: crate::Container<T>> crate::Container<Vec<T>> for Vecs<TC, BC> {
1431        type Borrowed<'a> = Vecs<TC::Borrowed<'a>, BC::Borrowed<'a>> where BC: 'a, TC: 'a, T: 'a;
1432        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1433            Vecs {
1434                bounds: self.bounds.borrow(),
1435                values: self.values.borrow(),
1436            }
1437        }
1438    }
1439
1440    impl<T: Columnar<Container = TC>, BC: crate::Container<u64>, TC: crate::Container<T>, const N: usize> crate::Container<[T; N]> for Vecs<TC, BC> {
1441        type Borrowed<'a> = Vecs<TC::Borrowed<'a>, BC::Borrowed<'a>> where BC: 'a, TC: 'a, T: 'a;
1442        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1443            Vecs {
1444                bounds: self.bounds.borrow(),
1445                values: self.values.borrow(),
1446            }
1447        }
1448    }
1449
1450    impl<'a, TC: crate::AsBytes<'a>, BC: crate::AsBytes<'a>> crate::AsBytes<'a> for Vecs<TC, BC> {
1451        #[inline(always)]
1452        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1453            crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
1454        }
1455    }
1456    impl<'a, TC: crate::FromBytes<'a>, BC: crate::FromBytes<'a>> crate::FromBytes<'a> for Vecs<TC, BC> {
1457        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1458            Self {
1459                bounds: crate::FromBytes::from_bytes(bytes),
1460                values: crate::FromBytes::from_bytes(bytes),
1461            }
1462        }
1463    }
1464
1465    impl<TC: Len> Vecs<TC> {
1466        #[inline]
1467        pub fn push_iter<I>(&mut self, iter: I) where I: IntoIterator, TC: Push<I::Item> {
1468            self.values.extend(iter);
1469            self.bounds.push(self.values.len() as u64);
1470        }
1471    }
1472
1473    impl<TC, BC: Len> Len for Vecs<TC, BC> {
1474        #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
1475    }
1476
1477    impl<TC: Copy, BC: Len+IndexAs<u64>> Index for Vecs<TC, BC> {
1478        type Ref = Slice<TC>;
1479        #[inline(always)]
1480        fn get(&self, index: usize) -> Self::Ref {
1481            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1482            let upper = self.bounds.index_as(index);
1483            Slice::new(lower, upper, self.values)
1484        }
1485    }
1486    impl<'a, TC, BC: Len+IndexAs<u64>> Index for &'a Vecs<TC, BC> {
1487        type Ref = Slice<&'a TC>;
1488        #[inline(always)]
1489        fn get(&self, index: usize) -> Self::Ref {
1490            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1491            let upper = self.bounds.index_as(index);
1492            Slice::new(lower, upper, &self.values)
1493        }
1494    }
1495    impl<TC, BC: Len+IndexAs<u64>> IndexMut for Vecs<TC, BC> {
1496        type IndexMut<'a> = Slice<&'a mut TC> where TC: 'a, BC: 'a;
1497
1498        #[inline(always)]
1499        fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
1500            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1501            let upper = self.bounds.index_as(index);
1502            Slice::new(lower, upper, &mut self.values)
1503        }
1504    }
1505
1506    impl<I: IntoIterator, TC: Push<I::Item> + Len> Push<I> for Vecs<TC> {
1507        #[inline]
1508        fn push(&mut self, item: I) {
1509            self.values.extend(item.into_iter());
1510            self.bounds.push(self.values.len() as u64);
1511        }
1512    }
1513
1514    impl<TC: Clear> Clear for Vecs<TC> {
1515        fn clear(&mut self) {
1516            self.bounds.clear();
1517            self.values.clear();
1518        }
1519    }
1520
1521    impl<TC: HeapSize, BC: HeapSize> HeapSize for Vecs<TC, BC> {
1522        fn heap_size(&self) -> (usize, usize) {
1523            let (l0, c0) = self.bounds.heap_size();
1524            let (l1, c1) = self.values.heap_size();
1525            (l0 + l1, c0 + c1)
1526        }
1527    }
1528}
1529
1530#[allow(non_snake_case)]
1531pub mod tuple {
1532
1533    use super::{Clear, Columnar, Len, IndexMut, Index, Push, HeapSize};
1534
1535    // Implementations for tuple types.
1536    // These are all macro based, because the implementations are very similar.
1537    // The macro requires two names, one for the store and one for pushable types.
1538    macro_rules! tuple_impl {
1539        ( $($name:ident,$name2:ident)+) => (
1540
1541            impl<$($name: Columnar),*> Columnar for ($($name,)*) {
1542                type Ref<'a> = ($($name::Ref<'a>,)*) where $($name: 'a,)*;
1543                fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
1544                    let ($($name,)*) = self;
1545                    let ($($name2,)*) = other;
1546                    $(crate::Columnar::copy_from($name, $name2);)*
1547                }
1548                fn into_owned<'a>(other: Self::Ref<'a>) -> Self {
1549                    let ($($name2,)*) = other;
1550                    ($($name::into_owned($name2),)*)
1551                }
1552                type Container = ($($name::Container,)*);
1553            }
1554            impl<$($name: crate::Columnar, $name2: crate::Container<$name>,)*> crate::Container<($($name,)*)> for ($($name2,)*) {
1555                type Borrowed<'a> = ($($name2::Borrowed<'a>,)*) where $($name: 'a, $name2: 'a,)*;
1556                fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1557                    let ($($name,)*) = self;
1558                    ($($name.borrow(),)*)
1559                }
1560            }
1561
1562            #[allow(non_snake_case)]
1563            impl<'a, $($name: crate::AsBytes<'a>),*> crate::AsBytes<'a> for ($($name,)*) {
1564                #[inline(always)]
1565                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1566                    let ($($name,)*) = self;
1567                    let iter = None.into_iter();
1568                    $( let iter = crate::chain(iter, $name.as_bytes()); )*
1569                    iter
1570                }
1571            }
1572            impl<'a, $($name: crate::FromBytes<'a>),*> crate::FromBytes<'a> for ($($name,)*) {
1573                #[allow(non_snake_case)]
1574                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1575                    $(let $name = crate::FromBytes::from_bytes(bytes);)*
1576                    ($($name,)*)
1577                }
1578            }
1579
1580            impl<$($name: Len),*> Len for ($($name,)*) {
1581                fn len(&self) -> usize {
1582                    self.0.len()
1583                }
1584            }
1585            impl<$($name: Clear),*> Clear for ($($name,)*) {
1586                fn clear(&mut self) {
1587                    let ($($name,)*) = self;
1588                    $($name.clear();)*
1589                }
1590            }
1591            impl<$($name: HeapSize),*> HeapSize for ($($name,)*) {
1592                fn heap_size(&self) -> (usize, usize) {
1593                    let ($($name,)*) = self;
1594                    let mut l = 0;
1595                    let mut c = 0;
1596                    $(let (l0, c0) = $name.heap_size(); l += l0; c += c0;)*
1597                    (l, c)
1598                }
1599            }
1600            impl<$($name: Index),*> Index for ($($name,)*) {
1601                type Ref = ($($name::Ref,)*);
1602                fn get(&self, index: usize) -> Self::Ref {
1603                    let ($($name,)*) = self;
1604                    ($($name.get(index),)*)
1605                }
1606            }
1607            impl<'a, $($name),*> Index for &'a ($($name,)*) where $( &'a $name: Index),* {
1608                type Ref = ($(<&'a $name as Index>::Ref,)*);
1609                fn get(&self, index: usize) -> Self::Ref {
1610                    let ($($name,)*) = self;
1611                    ($($name.get(index),)*)
1612                }
1613            }
1614
1615            impl<$($name: IndexMut),*> IndexMut for ($($name,)*) {
1616                type IndexMut<'a> = ($($name::IndexMut<'a>,)*) where $($name: 'a),*;
1617                fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
1618                    let ($($name,)*) = self;
1619                    ($($name.get_mut(index),)*)
1620                }
1621            }
1622            impl<$($name2, $name: Push<$name2>),*> Push<($($name2,)*)> for ($($name,)*) {
1623                #[inline]
1624                fn push(&mut self, item: ($($name2,)*)) {
1625                    let ($($name,)*) = self;
1626                    let ($($name2,)*) = item;
1627                    $($name.push($name2);)*
1628                }
1629            }
1630            impl<'a, $($name2, $name: Push<&'a $name2>),*> Push<&'a ($($name2,)*)> for ($($name,)*) {
1631                #[inline]
1632                fn push(&mut self, item: &'a ($($name2,)*)) {
1633                    let ($($name,)*) = self;
1634                    let ($($name2,)*) = item;
1635                    $($name.push($name2);)*
1636                }
1637            }
1638        )
1639    }
1640
1641    tuple_impl!(A,AA);
1642    tuple_impl!(A,AA B,BB);
1643    tuple_impl!(A,AA B,BB C,CC);
1644    tuple_impl!(A,AA B,BB C,CC D,DD);
1645    tuple_impl!(A,AA B,BB C,CC D,DD E,EE);
1646    tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF);
1647    tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG);
1648    tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG H,HH);
1649    tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG H,HH I,II);
1650    tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG H,HH I,II J,JJ);
1651
1652    #[cfg(test)]
1653    mod test {
1654        #[test]
1655        fn round_trip() {
1656
1657            use crate::Columnar;
1658            use crate::common::{Index, Push, HeapSize, Len};
1659
1660            let mut column: <(u64, u8, String) as Columnar>::Container = Default::default();
1661            for i in 0..100 {
1662                column.push((i, i as u8, &i.to_string()));
1663                column.push((i, i as u8, &"".to_string()));
1664            }
1665
1666            assert_eq!(column.len(), 200);
1667            assert_eq!(column.heap_size(), (3590, 4608));
1668
1669            for i in 0..100u64 {
1670                assert_eq!((&column).get((2*i+0) as usize), (&i, &(i as u8), i.to_string().as_str()));
1671                assert_eq!((&column).get((2*i+1) as usize), (&i, &(i as u8), ""));
1672            }
1673
1674            // Compare to the heap size of a `Vec<Option<usize>>`.
1675            let mut column: Vec<(u64, u8, String)> = Default::default();
1676            for i in 0..100 {
1677                column.push((i, i as u8, i.to_string()));
1678                column.push((i, i as u8, "".to_string()));
1679            }
1680            assert_eq!(column.heap_size(), (8190, 11040));
1681
1682        }
1683    }
1684}
1685
1686pub use sums::{rank_select::RankSelect, result::Results, option::Options};
1687/// Containers for enumerations ("sum types") that store variants separately.
1688///
1689/// The main work of these types is storing a discriminant and index efficiently,
1690/// as containers for each of the variant types can hold the actual data.
1691pub mod sums {
1692
1693    /// Stores for maintaining discriminants, and associated sequential indexes.
1694    ///
1695    /// The sequential indexes are not explicitly maintained, but are supported
1696    /// by a `rank(index)` function that indicates how many of a certain variant
1697    /// precede the given index. While this could potentially be done with a scan
1698    /// of all preceding discriminants, the stores maintain running accumulations
1699    /// that make the operation constant time (using additional amortized memory).
1700    pub mod rank_select {
1701
1702        use crate::primitive::Bools;
1703        use crate::common::index::CopyAs;
1704        use crate::{Len, Index, IndexAs, Push, Clear, HeapSize};
1705
1706        /// A store for maintaining `Vec<bool>` with fast `rank` and `select` access.
1707        ///
1708        /// The design is to have `u64` running counts for each block of 1024 bits,
1709        /// which are roughly the size of a cache line. This is roughly 6% overhead,
1710        /// above the bits themselves, which seems pretty solid.
1711        #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1712        pub struct RankSelect<CC = Vec<u64>, VC = Vec<u64>, WC = u64> {
1713            /// Counts of the number of cumulative set (true) bits, *after* each block of 1024 bits.
1714            pub counts: CC,
1715            /// The bits themselves.
1716            pub values: Bools<VC, WC>,
1717        }
1718
1719        impl<CC: crate::Container<u64>, VC: crate::Container<u64>> RankSelect<CC, VC> {
1720            pub fn borrow<'a>(&'a self) -> RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a u64> {
1721                use crate::Container;
1722                RankSelect {
1723                    counts: self.counts.borrow(),
1724                    values: self.values.borrow(),
1725                }
1726            }
1727        }
1728
1729        impl<'a, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for RankSelect<CC, VC, &'a u64> {
1730            #[inline(always)]
1731            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1732                crate::chain(self.counts.as_bytes(), self.values.as_bytes())
1733            }
1734        }
1735        impl<'a, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for RankSelect<CC, VC, &'a u64> {
1736            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1737                Self {
1738                    counts: crate::FromBytes::from_bytes(bytes),
1739                    values: crate::FromBytes::from_bytes(bytes),
1740                }
1741            }
1742        }
1743
1744
1745        impl<CC, VC: Len + IndexAs<u64>, WC: Copy+CopyAs<u64>> RankSelect<CC, VC, WC> {
1746            #[inline]
1747            pub fn get(&self, index: usize) -> bool {
1748                Index::get(&self.values, index)
1749            }
1750        }
1751        impl<CC: Len + IndexAs<u64>, VC: Len + IndexAs<u64>, WC: Copy+CopyAs<u64>> RankSelect<CC, VC, WC> {
1752            /// The number of set bits *strictly* preceding `index`.
1753            ///
1754            /// This number is accumulated first by reading out of `self.counts` at the correct position,
1755            /// then by summing the ones in strictly prior `u64` entries, then by counting the ones in the
1756            /// masked `u64` in which the bit lives.
1757            pub fn rank(&self, index: usize) -> usize {
1758                let bit = index % 64;
1759                let block = index / 64;
1760                let chunk = block / 16;
1761                let mut count = if chunk > 0 { self.counts.index_as(chunk - 1) as usize } else { 0 };
1762                for pos in (16 * chunk) .. block {
1763                    count += self.values.values.index_as(pos).count_ones() as usize;
1764                }
1765                // TODO: Panic if out of bounds?
1766                let intra_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
1767                count += (intra_word & ((1 << bit) - 1)).count_ones() as usize;
1768                count
1769            }
1770            /// The index of the `rank`th set bit, should one exist.
1771            pub fn select(&self, rank: u64) -> Option<usize> {
1772                let mut chunk = 0;
1773                // Step one is to find the position in `counts` where we go from `rank` to `rank + 1`.
1774                // The position we are looking for is within that chunk of bits.
1775                // TODO: Binary search is likely better at many scales. Rust's binary search is .. not helpful with ties.
1776                while chunk < self.counts.len() && self.counts.index_as(chunk) <= rank {
1777                    chunk += 1;
1778                }
1779                let mut count = if chunk < self.counts.len() { self.counts.index_as(chunk) } else { 0 };
1780                // Step two is to find the position within that chunk where the `rank`th bit is.
1781                let mut block = 16 * chunk;
1782                while block < self.values.values.len() && count + (self.values.values.index_as(block).count_ones() as u64) <= rank {
1783                    count += self.values.values.index_as(block).count_ones() as u64;
1784                    block += 1;
1785                }
1786                // Step three is to search the last word for the location, or return `None` if we run out of bits.
1787                let last_bits = if block == self.values.values.len() { self.values.last_bits.copy_as() as usize } else { 64 };
1788                let last_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
1789                for shift in 0 .. last_bits {
1790                    if ((last_word >> shift) & 0x01 == 0x01) && count + 1 == rank {
1791                        return Some(64 * block + shift);
1792                    }
1793                    count += (last_word >> shift) & 0x01;
1794                }
1795
1796                None
1797            }
1798        }
1799
1800        impl<CC, VC: Len, WC: Copy + CopyAs<u64>> RankSelect<CC, VC, WC> {
1801            pub fn len(&self) -> usize {
1802                self.values.len()
1803            }
1804        }
1805
1806        // This implementation probably only works for `Vec<u64>` and `Vec<u64>`, but we could fix that.
1807        // Partly, it's hard to name the `Index` flavor that allows one to get back a `u64`.
1808        impl<CC: Push<u64> + Len + IndexAs<u64>, VC: Push<u64> + Len + IndexAs<u64>> RankSelect<CC, VC> {
1809            #[inline]
1810            pub fn push(&mut self, bit: bool) {
1811                self.values.push(bit);
1812                while self.counts.len() < self.values.len() / 1024 {
1813                    let mut count = self.counts.last().unwrap_or(0);
1814                    let lower = 16 * self.counts.len();
1815                    let upper = lower + 16;
1816                    for i in lower .. upper {
1817                        count += self.values.values.index_as(i).count_ones() as u64;
1818                    }
1819                    self.counts.push(count);
1820                }
1821            }
1822        }
1823        impl<CC: Clear, VC: Clear> Clear for RankSelect<CC, VC> {
1824            fn clear(&mut self) {
1825                self.counts.clear();
1826                self.values.clear();
1827            }
1828        }
1829        impl<CC: HeapSize, VC: HeapSize> HeapSize for RankSelect<CC, VC> {
1830            fn heap_size(&self) -> (usize, usize) {
1831                let (l0, c0) = self.counts.heap_size();
1832                let (l1, c1) = self.values.heap_size();
1833                (l0 + l1, c0 + c1)
1834            }
1835        }
1836    }
1837
1838    pub mod result {
1839
1840        use crate::common::index::CopyAs;
1841        use crate::{Clear, Columnar, Len, IndexMut, Index, IndexAs, Push, HeapSize};
1842        use crate::RankSelect;
1843
1844        #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1845        pub struct Results<SC, TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
1846            /// Bits set to `true` correspond to `Ok` variants.
1847            pub indexes: RankSelect<CC, VC, WC>,
1848            pub oks: SC,
1849            pub errs: TC,
1850        }
1851
1852        impl<S: Columnar, T: Columnar> Columnar for Result<S, T> {
1853            type Ref<'a> = Result<S::Ref<'a>, T::Ref<'a>> where S: 'a, T: 'a;
1854            fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
1855                match (&mut *self, other) {
1856                    (Ok(x), Ok(y)) => x.copy_from(y),
1857                    (Err(x), Err(y)) => x.copy_from(y),
1858                    (_, other) => { *self = Self::into_owned(other); },
1859                }
1860            }
1861            fn into_owned<'a>(other: Self::Ref<'a>) -> Self {
1862                match other {
1863                    Ok(y) => Ok(S::into_owned(y)),
1864                    Err(y) => Err(T::into_owned(y)),
1865                }
1866            }
1867            type Container = Results<S::Container, T::Container>;
1868        }
1869
1870        impl<S: Columnar, T: Columnar, SC: crate::Container<S>, TC: crate::Container<T>> crate::Container<Result<S, T>> for Results<SC, TC> {
1871            type Borrowed<'a> = Results<SC::Borrowed<'a>, TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where SC: 'a, TC: 'a, S:'a, T: 'a;
1872            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1873                Results {
1874                    indexes: self.indexes.borrow(),
1875                    oks: self.oks.borrow(),
1876                    errs: self.errs.borrow(),
1877                }
1878            }
1879        }
1880
1881        impl<'a, SC: crate::AsBytes<'a>, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
1882            #[inline(always)]
1883            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1884                let iter = self.indexes.as_bytes();
1885                let iter = crate::chain(iter, self.oks.as_bytes());
1886                crate::chain(iter, self.errs.as_bytes())
1887            }
1888        }
1889        impl<'a, SC: crate::FromBytes<'a>, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
1890            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1891                Self {
1892                    indexes: crate::FromBytes::from_bytes(bytes),
1893                    oks: crate::FromBytes::from_bytes(bytes),
1894                    errs: crate::FromBytes::from_bytes(bytes),
1895                }
1896            }
1897        }
1898
1899        impl<SC, TC, CC, VC: Len, WC: Copy+CopyAs<u64>> Len for Results<SC, TC, CC, VC, WC> {
1900            #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
1901        }
1902
1903        impl<SC, TC, CC, VC, WC> Index for Results<SC, TC, CC, VC, WC>
1904        where
1905            SC: Index,
1906            TC: Index,
1907            CC: IndexAs<u64> + Len,
1908            VC: IndexAs<u64> + Len,
1909            WC: Copy + CopyAs<u64>,
1910        {
1911            type Ref = Result<SC::Ref, TC::Ref>;
1912            fn get(&self, index: usize) -> Self::Ref {
1913                if self.indexes.get(index) {
1914                    Ok(self.oks.get(self.indexes.rank(index)))
1915                } else {
1916                    Err(self.errs.get(index - self.indexes.rank(index)))
1917                }
1918            }
1919        }
1920        impl<'a, SC, TC, CC, VC, WC> Index for &'a Results<SC, TC, CC, VC, WC>
1921        where
1922            &'a SC: Index,
1923            &'a TC: Index,
1924            CC: IndexAs<u64> + Len,
1925            VC: IndexAs<u64> + Len,
1926            WC: Copy + CopyAs<u64>,
1927        {
1928            type Ref = Result<<&'a SC as Index>::Ref, <&'a TC as Index>::Ref>;
1929            fn get(&self, index: usize) -> Self::Ref {
1930                if self.indexes.get(index) {
1931                    Ok((&self.oks).get(self.indexes.rank(index)))
1932                } else {
1933                    Err((&self.errs).get(index - self.indexes.rank(index)))
1934                }
1935            }
1936        }
1937
1938        // NB: You are not allowed to change the variant, but can change its contents.
1939        impl<SC: IndexMut, TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Results<SC, TC, CC, VC> {
1940            type IndexMut<'a> = Result<SC::IndexMut<'a>, TC::IndexMut<'a>> where SC: 'a, TC: 'a, CC: 'a, VC: 'a;
1941            fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
1942                if self.indexes.get(index) {
1943                    Ok(self.oks.get_mut(self.indexes.rank(index)))
1944                } else {
1945                    Err(self.errs.get_mut(index - self.indexes.rank(index)))
1946                }
1947            }
1948        }
1949
1950        impl<S, SC: Push<S>, T, TC: Push<T>> Push<Result<S, T>> for Results<SC, TC> {
1951            #[inline]
1952            fn push(&mut self, item: Result<S, T>) {
1953                match item {
1954                    Ok(item) => {
1955                        self.indexes.push(true);
1956                        self.oks.push(item);
1957                    }
1958                    Err(item) => {
1959                        self.indexes.push(false);
1960                        self.errs.push(item);
1961                    }
1962                }
1963            }
1964        }
1965        impl<'a, S, SC: Push<&'a S>, T, TC: Push<&'a T>> Push<&'a Result<S, T>> for Results<SC, TC> {
1966            #[inline]
1967            fn push(&mut self, item: &'a Result<S, T>) {
1968                match item {
1969                    Ok(item) => {
1970                        self.indexes.push(true);
1971                        self.oks.push(item);
1972                    }
1973                    Err(item) => {
1974                        self.indexes.push(false);
1975                        self.errs.push(item);
1976                    }
1977                }
1978            }
1979        }
1980
1981        impl<SC: Clear, TC: Clear> Clear for Results<SC, TC> {
1982            fn clear(&mut self) {
1983                self.indexes.clear();
1984                self.oks.clear();
1985                self.errs.clear();
1986            }
1987        }
1988
1989        impl<SC: HeapSize, TC: HeapSize> HeapSize for Results<SC, TC> {
1990            fn heap_size(&self) -> (usize, usize) {
1991                let (l0, c0) = self.oks.heap_size();
1992                let (l1, c1) = self.errs.heap_size();
1993                let (li, ci) = self.indexes.heap_size();
1994                (l0 + l1 + li, c0 + c1 + ci)
1995            }
1996        }
1997
1998        #[cfg(test)]
1999        mod test {
2000            #[test]
2001            fn round_trip() {
2002
2003                use crate::Columnar;
2004                use crate::common::{Index, Push, HeapSize, Len};
2005
2006                let mut column: <Result<u64, u64> as Columnar>::Container = Default::default();
2007                for i in 0..100 {
2008                    column.push(Ok::<u64, u64>(i));
2009                    column.push(Err::<u64, u64>(i));
2010                }
2011
2012                assert_eq!(column.len(), 200);
2013                assert_eq!(column.heap_size(), (1624, 2080));
2014
2015                for i in 0..100 {
2016                    assert_eq!(column.get(2*i+0), Ok(i as u64));
2017                    assert_eq!(column.get(2*i+1), Err(i as u64));
2018                }
2019
2020                let mut column: <Result<u64, u8> as Columnar>::Container = Default::default();
2021                for i in 0..100 {
2022                    column.push(Ok::<u64, u8>(i as u64));
2023                    column.push(Err::<u64, u8>(i as u8));
2024                }
2025
2026                assert_eq!(column.len(), 200);
2027                assert_eq!(column.heap_size(), (924, 1184));
2028
2029                for i in 0..100 {
2030                    assert_eq!(column.get(2*i+0), Ok(i as u64));
2031                    assert_eq!(column.get(2*i+1), Err(i as u8));
2032                }
2033            }
2034        }
2035    }
2036
2037    pub mod option {
2038
2039        use crate::common::index::CopyAs;
2040        use crate::{Clear, Columnar, Len, IndexMut, Index, IndexAs, Push, HeapSize};
2041        use crate::RankSelect;
2042
2043        #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
2044        pub struct Options<TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
2045            /// Uses two bits for each item, one to indicate the variant and one (amortized)
2046            /// to enable efficient rank determination.
2047            pub indexes: RankSelect<CC, VC, WC>,
2048            pub somes: TC,
2049        }
2050
2051        impl<T: Columnar> Columnar for Option<T> {
2052            type Ref<'a> = Option<T::Ref<'a>> where T: 'a;
2053            fn copy_from<'a>(&mut self, other: Self::Ref<'a>) {
2054                match (&mut *self, other) {
2055                    (Some(x), Some(y)) => { x.copy_from(y); }
2056                    (_, other) => { *self = Self::into_owned(other); }
2057                }
2058            }
2059            fn into_owned<'a>(other: Self::Ref<'a>) -> Self {
2060                other.map(|x| T::into_owned(x))
2061            }
2062            type Container = Options<T::Container>;
2063        }
2064
2065        impl<T: Columnar, TC: crate::Container<T>> crate::Container<Option<T>> for Options<TC> {
2066            type Borrowed<'a> = Options<TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where TC: 'a, T: 'a;
2067            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2068                Options {
2069                    indexes: self.indexes.borrow(),
2070                    somes: self.somes.borrow(),
2071                }
2072            }
2073        }
2074
2075        impl<'a, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Options<TC, CC, VC, &'a u64> {
2076            #[inline(always)]
2077            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2078                crate::chain(self.indexes.as_bytes(), self.somes.as_bytes())
2079            }
2080        }
2081
2082        impl <'a, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Options<TC, CC, VC, &'a u64> {
2083            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2084                Self {
2085                    indexes: crate::FromBytes::from_bytes(bytes),
2086                    somes: crate::FromBytes::from_bytes(bytes),
2087                }
2088            }
2089        }
2090
2091        impl<T, CC, VC: Len, WC: Copy + CopyAs<u64>> Len for Options<T, CC, VC, WC> {
2092            #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
2093        }
2094
2095        impl<TC: Index, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: Copy+CopyAs<u64>> Index for Options<TC, CC, VC, WC> {
2096            type Ref = Option<TC::Ref>;
2097            fn get(&self, index: usize) -> Self::Ref {
2098                if self.indexes.get(index) {
2099                    Some(self.somes.get(self.indexes.rank(index)))
2100                } else {
2101                    None
2102                }
2103            }
2104        }
2105        impl<'a, TC, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: Copy+CopyAs<u64>> Index for &'a Options<TC, CC, VC, WC>
2106        where &'a TC: Index
2107        {
2108            type Ref = Option<<&'a TC as Index>::Ref>;
2109            fn get(&self, index: usize) -> Self::Ref {
2110                if self.indexes.get(index) {
2111                    Some((&self.somes).get(self.indexes.rank(index)))
2112                } else {
2113                    None
2114                }
2115            }
2116        }
2117        impl<TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Options<TC, CC, VC> {
2118            type IndexMut<'a> = Option<TC::IndexMut<'a>> where TC: 'a, CC: 'a, VC: 'a;
2119            fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2120                if self.indexes.get(index) {
2121                    Some(self.somes.get_mut(self.indexes.rank(index)))
2122                } else {
2123                    None
2124                }
2125            }
2126        }
2127
2128        impl<T, TC: Push<T> + Len> Push<Option<T>> for Options<TC> {
2129            #[inline]
2130            fn push(&mut self, item: Option<T>) {
2131                match item {
2132                    Some(item) => {
2133                        self.indexes.push(true);
2134                        self.somes.push(item);
2135                    }
2136                    None => {
2137                        self.indexes.push(false);
2138                    }
2139                }
2140            }
2141        }
2142        impl<'a, T, TC: Push<&'a T> + Len> Push<&'a Option<T>> for Options<TC> {
2143            #[inline]
2144            fn push(&mut self, item: &'a Option<T>) {
2145                match item {
2146                    Some(item) => {
2147                        self.indexes.push(true);
2148                        self.somes.push(item);
2149                    }
2150                    None => {
2151                        self.indexes.push(false);
2152                    }
2153                }
2154            }
2155        }
2156
2157        impl<TC: Clear> Clear for Options<TC> {
2158            fn clear(&mut self) {
2159                self.indexes.clear();
2160                self.somes.clear();
2161            }
2162        }
2163
2164        impl<TC: HeapSize> HeapSize for Options<TC> {
2165            fn heap_size(&self) -> (usize, usize) {
2166                let (l0, c0) = self.somes.heap_size();
2167                let (li, ci) = self.indexes.heap_size();
2168                (l0 + li, c0 + ci)
2169            }
2170        }
2171
2172        #[cfg(test)]
2173        mod test {
2174
2175            use crate::Columnar;
2176            use crate::common::{Index, HeapSize, Len};
2177            use crate::Options;
2178
2179            #[test]
2180            fn round_trip_some() {
2181                // Type annotation is important to avoid some inference overflow.
2182                let store: Options<Vec<i32>> = Columnar::into_columns((0..100).map(Some));
2183                assert_eq!(store.len(), 100);
2184                assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == Some(&b)));
2185                assert_eq!(store.heap_size(), (408, 544));
2186            }
2187
2188            #[test]
2189            fn round_trip_none() {
2190                let store = Columnar::into_columns((0..100).map(|_x| None::<i32>));
2191                assert_eq!(store.len(), 100);
2192                let foo = &store;
2193                assert!(foo.index_iter().zip(0..100).all(|(a, _b)| a == None));
2194                assert_eq!(store.heap_size(), (8, 32));
2195            }
2196
2197            #[test]
2198            fn round_trip_mixed() {
2199                // Type annotation is important to avoid some inference overflow.
2200                let store: Options<Vec<i32>>  = Columnar::into_columns((0..100).map(|x| if x % 2 == 0 { Some(x) } else { None }));
2201                assert_eq!(store.len(), 100);
2202                assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == if b % 2 == 0 { Some(&b) } else { None }));
2203                assert_eq!(store.heap_size(), (208, 288));
2204            }
2205        }
2206    }
2207}
2208
2209pub use lookback::{Repeats, Lookbacks};
2210/// Containers that can store either values, or offsets to prior values.
2211///
2212/// This has the potential to be more efficient than a list of `T` when many values repeat in
2213/// close proximity. Values must be equatable, and the degree of lookback can be configured.
2214pub mod lookback {
2215
2216    use crate::{Options, Results, Push, Index, Len, HeapSize};
2217
2218    /// A container that encodes repeated values with a `None` variant, at the cost of extra bits for every record.
2219    #[derive(Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
2220    pub struct Repeats<TC, const N: u8 = 255> {
2221        /// Some(x) encodes a value, and None indicates the prior `x` value.
2222        pub inner: Options<TC>,
2223    }
2224
2225    impl<T: PartialEq, TC: Push<T> + Len, const N: u8> Push<T> for Repeats<TC, N>
2226    where
2227        for<'a> &'a TC: Index,
2228        for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
2229    {
2230        #[inline]
2231        fn push(&mut self, item: T) {
2232            // Look at the last `somes` value for a potential match.
2233            let insert: Option<T> = if (&self.inner.somes).last().map(|x| x.eq(&item)) == Some(true) {
2234                None
2235            } else {
2236                Some(item)
2237            };
2238            self.inner.push(insert);
2239        }
2240    }
2241
2242    impl<TC: Len, const N: u8> Len for Repeats<TC, N> {
2243        #[inline(always)] fn len(&self) -> usize { self.inner.len() }
2244    }
2245
2246    impl<TC: Index, const N: u8> Index for Repeats<TC, N> {
2247        type Ref = TC::Ref;
2248        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2249            match self.inner.get(index) {
2250                Some(item) => item,
2251                None => {
2252                    let pos = self.inner.indexes.rank(index) - 1;
2253                    self.inner.somes.get(pos)
2254                },
2255            }
2256        }
2257    }
2258
2259    impl<TC: HeapSize, const N: u8> HeapSize for Repeats<TC, N> {
2260        fn heap_size(&self) -> (usize, usize) {
2261            self.inner.heap_size()
2262        }
2263    }
2264
2265    #[derive(Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
2266    pub struct Lookbacks<TC, VC = Vec<u8>, const N: u8 = 255> {
2267        /// Ok(x) encodes a value, and Err(y) indicates a value `y` back.
2268        pub inner: Results<TC, VC>,
2269    }
2270
2271    impl<T: PartialEq, TC: Push<T> + Len, VC: Push<u8>, const N: u8> Push<T> for Lookbacks<TC, VC, N>
2272    where
2273        for<'a> &'a TC: Index,
2274        for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
2275    {
2276        #[inline]
2277        fn push(&mut self, item: T) {
2278            // Look backwards through (0 .. N) to look for a matching value.
2279            let oks_len = self.inner.oks.len();
2280            let find = (0u8 .. N).take(self.inner.oks.len()).find(|i| (&self.inner.oks).get(oks_len - (*i as usize) - 1) == item);
2281            let insert: Result<T, u8> = if let Some(back) = find { Err(back) } else { Ok(item) };
2282            self.inner.push(insert);
2283        }
2284    }
2285
2286    impl<TC, VC, const N: u8> Len for Lookbacks<TC, VC, N> {
2287        #[inline(always)] fn len(&self) -> usize { self.inner.len() }
2288    }
2289
2290    impl<TC: Index, VC: Index<Ref=u8>, const N: u8> Index for Lookbacks<TC, VC, N> {
2291        type Ref = TC::Ref;
2292        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2293            match self.inner.get(index) {
2294                Ok(item) => item,
2295                Err(back) => {
2296                    let pos = self.inner.indexes.rank(index) - 1;
2297                    self.inner.oks.get(pos - (back as usize))
2298                },
2299            }
2300        }
2301    }
2302    impl<'a, TC, const N: u8> Index for &'a Lookbacks<TC, Vec<u8>, N>
2303    where
2304        &'a TC: Index,
2305    {
2306        type Ref = <&'a TC as Index>::Ref;
2307        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2308            match (&self.inner).get(index) {
2309                Ok(item) => item,
2310                Err(back) => {
2311                    let pos = self.inner.indexes.rank(index) - 1;
2312                    (&self.inner.oks).get(pos - (*back as usize))
2313                },
2314            }
2315        }
2316    }
2317
2318    impl<TC: HeapSize, VC: HeapSize, const N: u8> HeapSize for Lookbacks<TC, VC, N> {
2319        fn heap_size(&self) -> (usize, usize) {
2320            self.inner.heap_size()
2321        }
2322    }
2323}
2324
2325/// Containers for `Vec<(K, V)>` that form columns by `K` keys.
2326mod maps {
2327
2328    use crate::{Len, Push};
2329    use crate::Options;
2330
2331    /// A container for `Vec<(K, V)>` items.
2332    ///
2333    /// Each inserted map is expected to have one `val` for any `key`.
2334    /// Each is stored with `None` variants for absent keys. As such,
2335    /// this type is not meant for large sparse key spaces.
2336    pub struct KeyMaps<CK, CV> {
2337        _keys: CK,
2338        vals: Vec<CV>,
2339    }
2340
2341    impl<CK, CV: Len> Len for KeyMaps<CK, CV> {
2342        fn len(&self) -> usize {
2343            // This .. behaves badly if we have no keys.
2344            self.vals[0].len()
2345        }
2346    }
2347
2348    // Should this implementation preserve the order of the key-val pairs?
2349    // That might want an associated `Vec<usize>` for each, to order the keys.
2350    // If they are all identical, it shouldn't take up any space, though.
2351    impl<K: PartialOrd, V, CV: Push<K>> Push<Vec<(K, V)>> for KeyMaps<Vec<K>, CV> {
2352        #[inline]
2353        fn push(&mut self, _item: Vec<(K, V)>) {
2354
2355        }
2356    }
2357
2358    /// A container for `Vec<K>` items sliced by index.
2359    ///
2360    /// The container puts each `item[i]` element into the `i`th column.
2361    pub struct ListMaps<CV> {
2362        vals: Vec<Options<CV>>,
2363    }
2364
2365    impl<CV> Default for ListMaps<CV> {
2366        fn default() -> Self {
2367            ListMaps { vals: Default::default() }
2368        }
2369    }
2370
2371    impl<CV: Len> Len for ListMaps<CV> {
2372        fn len(&self) -> usize {
2373            self.vals[0].len()
2374        }
2375    }
2376
2377    impl<'a, V, CV: Push<&'a V> + Len + Default> Push<&'a Vec<V>> for ListMaps<CV> {
2378        #[inline]
2379        fn push(&mut self, item: &'a Vec<V>) {
2380            let mut item_len = item.len();
2381            let self_len = if self.vals.is_empty() { 0 } else { self.vals[0].len() };
2382            while self.vals.len() < item_len {
2383                let mut new_store: Options<CV> = Default::default();
2384                for _ in 0..self_len {
2385                    new_store.push(None);
2386                }
2387                self.vals.push(new_store);
2388            }
2389            for (store, i) in self.vals.iter_mut().zip(item) {
2390                store.push(Some(i));
2391            }
2392            while item_len < self.vals.len() {
2393                self.vals[item_len].push(None);
2394                item_len += 1;
2395            }
2396        }
2397    }
2398
2399    #[cfg(test)]
2400    mod test {
2401
2402        use crate::common::{Len, Push};
2403        use crate::{Results, Strings};
2404
2405        #[test]
2406        fn round_trip_listmap() {
2407
2408            // Each record is a list, of first homogeneous elements, and one heterogeneous.
2409            let records = (0 .. 1024).map(|i|
2410                vec![
2411                    Ok(i),
2412                    Err(format!("{:?}", i)),
2413                    if i % 2 == 0 { Ok(i) } else { Err(format!("{:?}", i)) },
2414                ]
2415            );
2416
2417            // We'll stash all the records in the store, which expects them.
2418            let mut store: super::ListMaps<Results<Vec<i32>, Strings>> = Default::default();
2419            for record in records {
2420                store.push(&record);
2421            }
2422
2423            // Demonstrate type-safe restructuring.
2424            // We expect the first two columns to be homogenous, and the third to be mixed.
2425            let field0: Option<&[i32]> = if store.vals[0].somes.oks.len() == store.vals[0].len() {
2426                Some(&store.vals[0].somes.oks)
2427            } else { None };
2428
2429            let field1: Option<&Strings> = if store.vals[1].somes.errs.len() == store.vals[1].len() {
2430                Some(&store.vals[1].somes.errs)
2431            } else { None };
2432
2433            let field2: Option<&[i32]> = if store.vals[2].somes.oks.len() == store.vals[2].len() {
2434                Some(&store.vals[2].somes.oks)
2435            } else { None };
2436
2437            assert!(field0.is_some());
2438            assert!(field1.is_some());
2439            assert!(field2.is_none());
2440        }
2441    }
2442
2443}
2444
2445/// Containers for `isize` and `usize` that adapt to the size of the data.
2446///
2447/// Similar structures could be used for containers of `u8`, `u16`, `u32`, and `u64`,
2448/// without losing their type information, if one didn't need the bespoke compression.
2449mod sizes {
2450
2451    use crate::Push;
2452    use crate::Results;
2453
2454    /// A four-variant container for integers of varying sizes.
2455    struct Sizes<C0, C1, C2, C3> {
2456        /// Four variants stored separately.
2457        inner: Results<Results<C0, C1>, Results<C2, C3>>,
2458    }
2459
2460    impl<C0: Default, C1: Default, C2: Default, C3: Default> Default for Sizes<C0, C1, C2, C3> {
2461        fn default() -> Self {
2462            Sizes { inner: Default::default() }
2463        }
2464    }
2465
2466    impl<C0: Push<u8>, C1: Push<u16>, C2: Push<u32>, C3: Push<u64>> Push<usize> for Sizes<C0, C1, C2, C3> {
2467        #[inline]
2468        fn push(&mut self, item: usize) {
2469            if let Ok(item) = TryInto::<u8>::try_into(item) {
2470                self.inner.push(Ok(Ok(item)))
2471            } else if let Ok(item) = TryInto::<u16>::try_into(item) {
2472                self.inner.push(Ok(Err(item)))
2473            } else if let Ok(item) = TryInto::<u32>::try_into(item) {
2474                self.inner.push(Err(Ok(item)))
2475            } else if let Ok(item) = TryInto::<u64>::try_into(item) {
2476                self.inner.push(Err(Err(item)))
2477            } else {
2478                panic!("usize exceeds bounds of u64")
2479            }
2480        }
2481    }
2482
2483    impl<C0: Push<i8>, C1: Push<i16>, C2: Push<i32>, C3: Push<i64>> Push<isize> for Sizes<C0, C1, C2, C3> {
2484        #[inline]
2485        fn push(&mut self, item: isize) {
2486            if let Ok(item) = TryInto::<i8>::try_into(item) {
2487                self.inner.push(Ok(Ok(item)))
2488            } else if let Ok(item) = TryInto::<i16>::try_into(item) {
2489                self.inner.push(Ok(Err(item)))
2490            } else if let Ok(item) = TryInto::<i32>::try_into(item) {
2491                self.inner.push(Err(Ok(item)))
2492            } else if let Ok(item) = TryInto::<i64>::try_into(item) {
2493                self.inner.push(Err(Err(item)))
2494            } else {
2495                panic!("isize exceeds bounds of i64")
2496            }
2497        }
2498    }
2499}
2500
2501/// Roaring bitmap (and similar) containers.
2502pub mod roaring {
2503
2504    use crate::Results;
2505
2506    /// A container for `bool` that uses techniques from Roaring bitmaps.
2507    ///
2508    /// These techniques are to block the bits into blocks of 2^16 bits,
2509    /// and to encode each block based on its density. Either a bitmap
2510    /// for dense blocks or a list of set bits for sparse blocks.
2511    ///
2512    /// Additionally, other representations encode runs of set bits.
2513    pub struct RoaringBits {
2514        _inner: Results<[u64; 1024], Vec<u16>>,
2515    }
2516}
2517
2518pub use chain_mod::{chain, chain_one};
2519
2520pub mod chain_mod {
2521    //! Chain iterators, or iterators and an item. Iterators that might improve inlining, at the
2522    //! expense of not providing iterator maker traits.
2523
2524    /// Chain two iterators together. The result first iterates over `a`, then `b`, until both are
2525    /// exhausted.
2526    ///
2527    /// This addresses a quirk where deep iterators would not be optimized to their full potential.
2528    /// Here, functions are marked with `#[inline(always)]` to indicate that the compiler should
2529    /// try hard to inline the iterators.
2530    #[inline(always)]
2531    pub fn chain<A: IntoIterator, B: IntoIterator<Item=A::Item>>(a: A, b: B) -> Chain<A::IntoIter, B::IntoIter> {
2532        Chain { a: Some(a.into_iter()), b: Some(b.into_iter()) }
2533    }
2534
2535    pub struct Chain<A, B> {
2536        a: Option<A>,
2537        b: Option<B>,
2538    }
2539
2540    impl<A, B> Iterator for Chain<A, B>
2541    where
2542        A: Iterator,
2543        B: Iterator<Item=A::Item>,
2544    {
2545        type Item = A::Item;
2546
2547        #[inline(always)]
2548        fn next(&mut self) -> Option<Self::Item> {
2549            if let Some(a) = self.a.as_mut() {
2550                let x = a.next();
2551                if x.is_none() {
2552                    self.a = None;
2553                } else {
2554                    return x;
2555                }
2556            }
2557            if let Some(b) = self.b.as_mut() {
2558                let x = b.next();
2559                if x.is_none() {
2560                    self.b = None;
2561                } else {
2562                    return x;
2563                }
2564            }
2565            None
2566        }
2567    }
2568
2569    /// Chain a single item to an iterator. The resulting iterator first iterates over `a`,
2570    /// then `b`. The resulting iterator is marked as `#[inline(always)]`, which in some situations
2571    /// causes better inlining behavior with current Rust versions.
2572    #[inline(always)]
2573    pub fn chain_one<A: IntoIterator>(a: A, b: A::Item) -> ChainOne<A::IntoIter> {
2574        ChainOne { a: Some(a.into_iter()), b: Some(b) }
2575    }
2576
2577    pub struct ChainOne<A: Iterator> {
2578        a: Option<A>,
2579        b: Option<A::Item>,
2580    }
2581
2582    impl<A: Iterator> Iterator for ChainOne<A> {
2583        type Item = A::Item;
2584
2585        #[inline(always)]
2586        fn next(&mut self) -> Option<Self::Item> {
2587            if let Some(a) = self.a.as_mut() {
2588                let x = a.next();
2589                if x.is_none() {
2590                    self.a = None;
2591                    self.b.take()
2592                } else {
2593                    x
2594                }
2595            } else {
2596                None
2597            }
2598        }
2599    }
2600
2601    #[cfg(test)]
2602    mod test {
2603        use super::*;
2604
2605        #[test]
2606        fn test_chain() {
2607            let a = [1, 2, 3];
2608            let b = [4, 5, 6];
2609            let mut chain = chain(a, b);
2610            assert_eq!(chain.next(), Some(1));
2611            assert_eq!(chain.next(), Some(2));
2612            assert_eq!(chain.next(), Some(3));
2613            assert_eq!(chain.next(), Some(4));
2614            assert_eq!(chain.next(), Some(5));
2615            assert_eq!(chain.next(), Some(6));
2616            assert_eq!(chain.next(), None);
2617        }
2618
2619        #[test]
2620        fn test_chain_one() {
2621            let a = [1, 2, 3];
2622            let b = 4;
2623            let mut chain = chain_one(a, b);
2624            assert_eq!(chain.next(), Some(1));
2625            assert_eq!(chain.next(), Some(2));
2626            assert_eq!(chain.next(), Some(3));
2627            assert_eq!(chain.next(), Some(4));
2628            assert_eq!(chain.next(), None);
2629        }
2630    }
2631}