columnar/
lib.rs

1//! Types supporting flat / "columnar" layout for complex types.
2//!
3//! The intent is to re-layout `Vec<T>` types into vectors of reduced
4//! complexity, repeatedly. One should be able to push and pop easily,
5//! but indexing will be more complicated because we likely won't have
6//! a real `T` lying around to return as a reference. Instead, we will
7//! use Generic Associated Types (GATs) to provide alternate references.
8
9// Re-export derive crate.
10extern crate columnar_derive;
11pub use columnar_derive::Columnar;
12
13pub mod adts;
14pub mod boxed;
15
16pub use bytemuck;
17
18/// A type that can be represented in columnar form.
19///
20/// For a running example, a type like `(A, Vec<B>)`.
21pub trait Columnar : 'static {
22    /// Repopulates `self` from a reference.
23    ///
24    /// By default this just calls `into_owned()`, but it can be overridden.
25    fn copy_from<'a>(&mut self, other: Ref<'a, Self>) where Self: Sized {
26        *self = Self::into_owned(other);
27    }
28    /// Produce an instance of `Self` from `Self::Ref<'a>`.
29    fn into_owned<'a>(other: Ref<'a, Self>) -> Self;
30
31    /// The type that stores the columnar representation.
32    ///
33    /// The container must support pushing both `&Self` and `Self::Ref<'_>`.
34    /// In our running example this might be `(Vec<A>, Vecs<Vec<B>>)`.
35    type Container: ContainerBytes + for<'a> Push<&'a Self>;
36
37    /// Converts a sequence of the references to the type into columnar form.
38    fn as_columns<'a, I>(selves: I) -> Self::Container where I: IntoIterator<Item=&'a Self>, Self: 'a {
39        let mut columns: Self::Container = Default::default();
40        for item in selves {
41            columns.push(item);
42        }
43        columns
44    }
45    /// Converts a sequence of the type into columnar form.
46    ///
47    /// This consumes the owned `Self` types but uses them only by reference.
48    /// Consider `as_columns()` instead if it is equally ergonomic.
49    fn into_columns<I>(selves: I) -> Self::Container where I: IntoIterator<Item = Self>, Self: Sized {
50        let mut columns: Self::Container = Default::default();
51        for item in selves {
52            columns.push(&item);
53        }
54        columns
55    }
56    /// Reborrows the reference type to a shorter lifetime.
57    ///
58    /// Implementations must not change the contents of the reference, and should mark
59    /// the function as `#[inline(always)]`. It is no-op to overcome limitations
60    /// of the borrow checker. In many cases, it is enough to return `thing` as-is.
61    ///
62    /// For example, when comparing two references `Ref<'a>` and `Ref<'b>`, we can
63    /// reborrow both to a local lifetime to compare them. This allows us to keep the
64    /// lifetimes `'a` and `'b` separate, while otherwise Rust would unify them.
65    #[inline(always)] fn reborrow<'b, 'a: 'b>(thing: Ref<'a, Self>) -> Ref<'b, Self> {
66        Self::Container::reborrow_ref(thing)
67    }
68}
69
70/// The container type of columnar type `T`.
71///
72/// Equivalent to `<T as Columnar>::Container`.
73pub type ContainerOf<T> = <T as Columnar>::Container;
74
75/// For a lifetime, the reference type of columnar type `T`.
76///
77/// Equivalent to `<ContainerOf<T> as Container>::Ref<'a>`.
78pub type Ref<'a, T> = <ContainerOf<T> as Container>::Ref<'a>;
79
80/// A container that can hold `C`, and provide its preferred references.
81///
82/// As an example, `(Vec<A>, Vecs<Vec<B>>)`.
83pub trait Container : Len + Clear + for<'a> Push<Self::Ref<'a>> + Clone + Default + Send + 'static {
84    /// For each lifetime, a reference with that lifetime.
85    ///
86    /// As an example, `(&'a A, &'a [B])`.
87    type Ref<'a> : Copy;
88    /// The type of a borrowed container.
89    ///
90    /// Corresponding to our example, `(&'a [A], Vecs<&'a [B], &'a [u64]>)`.
91    type Borrowed<'a>: Copy + Len + Index<Ref = Self::Ref<'a>> where Self: 'a;
92    /// Converts a reference to the type to a borrowed variant.
93    fn borrow<'a>(&'a self) -> Self::Borrowed<'a>;
94    /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
95    fn reborrow<'b, 'a: 'b>(item: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a;
96    /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
97    fn reborrow_ref<'b, 'a: 'b>(item: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a;
98
99
100    /// Allocates an empty container that can be extended by `selves` without reallocation.
101    ///
102    /// This goal is optimistic, and some containers may struggle to size correctly, especially
103    /// if they employ compression or other variable-sizing techniques that respond to the data
104    /// and the order in which is it presented. Best effort, but still useful!
105    fn with_capacity_for<'a, I>(selves: I) -> Self
106    where
107        Self: 'a,
108        I: Iterator<Item = Self::Borrowed<'a>> + Clone
109    {
110        let mut output = Self::default();
111        output.reserve_for(selves);
112        output
113    }
114
115    // Ensure that `self` can extend from `selves` without reallocation.
116    fn reserve_for<'a, I>(&mut self, selves: I)
117    where
118        Self: 'a,
119        I: Iterator<Item = Self::Borrowed<'a>> + Clone;
120
121
122    /// Extends `self` by a range in `other`.
123    ///
124    /// This method has a default implementation, but can and should be specialized when ranges can be copied.
125    /// As an example, lists of lists are often backed by contiguous elements, all of which can be memcopied,
126    /// with only the offsets into them (the bounds) to push either before or after (rather than during).
127    #[inline(always)]
128    fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
129        self.extend(range.map(|i| other.get(i)))
130    }
131}
132
133impl<T: Clone + Send + 'static> Container for Vec<T> {
134    type Ref<'a> = &'a T;
135    type Borrowed<'a> = &'a [T];
136    fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { &self[..] }
137    fn reborrow<'b, 'a: 'b>(item: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a { item }
138    fn reborrow_ref<'b, 'a: 'b>(item: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { item }
139    fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
140        self.extend_from_slice(&other[range])
141    }
142    fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
143        self.reserve(selves.map(|x| x.len()).sum::<usize>())
144    }
145}
146
147/// A container that can also be viewed as and reconstituted from bytes.
148pub trait ContainerBytes : for<'a> Container<Borrowed<'a> : AsBytes<'a> + FromBytes<'a>> { }
149impl<C: for<'a> Container<Borrowed<'a> : AsBytes<'a> + FromBytes<'a>>> ContainerBytes for C { }
150
151pub use common::{Clear, Len, Push, IndexMut, Index, IndexAs, HeapSize, Slice, AsBytes, FromBytes};
152/// Common traits and types that are re-used throughout the module.
153pub mod common {
154
155    /// A type with a length.
156    pub trait Len {
157        /// The number of contained elements.
158        fn len(&self) -> usize;
159        /// Whether this contains no elements.
160        fn is_empty(&self) -> bool {
161            self.len() == 0
162        }
163    }
164    impl<L: Len + ?Sized> Len for &L {
165        #[inline(always)] fn len(&self) -> usize { L::len(*self) }
166    }
167    impl<L: Len + ?Sized> Len for &mut L {
168        #[inline(always)] fn len(&self) -> usize { L::len(*self) }
169    }
170    impl<T> Len for Vec<T> {
171        #[inline(always)] fn len(&self) -> usize { self.len() }
172    }
173    impl<T> Len for [T] {
174        #[inline(always)] fn len(&self) -> usize { <[T]>::len(self) }
175    }
176
177    /// A type that can accept items of type `T`.
178    pub trait Push<T> {
179        /// Pushes an item onto `self`.
180        fn push(&mut self, item: T);
181        /// Pushes elements of an iterator onto `self`.
182        #[inline(always)] fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
183            for item in iter {
184                self.push(item);
185            }
186        }
187    }
188    impl<T> Push<T> for Vec<T> {
189        #[inline(always)] fn push(&mut self, item: T) { self.push(item) }
190
191        #[inline(always)]
192        fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
193            std::iter::Extend::extend(self, iter)
194        }
195    }
196    impl<'a, T: Clone> Push<&'a T> for Vec<T> {
197        #[inline(always)] fn push(&mut self, item: &'a T) { self.push(item.clone()) }
198
199        #[inline(always)]
200        fn extend(&mut self, iter: impl IntoIterator<Item=&'a T>) {
201            std::iter::Extend::extend(self, iter.into_iter().cloned())
202        }
203    }
204    impl<'a, T: Clone> Push<&'a [T]> for Vec<T> {
205        #[inline(always)] fn push(&mut self, item: &'a [T]) { self.clone_from_slice(item) }
206    }
207
208
209    pub use index::{Index, IndexMut, IndexAs};
210    /// Traits for accessing elements by `usize` indexes.
211    ///
212    /// There are several traits, with a core distinction being whether the returned reference depends on the lifetime of `&self`.
213    /// For one trait `Index` the result does not depend on this lifetime.
214    /// There is a third trait `IndexMut` that allows mutable access, that may be less commonly implemented.
215    pub mod index {
216
217        use crate::Len;
218        use crate::common::IterOwn;
219
220        /// A type that can be mutably accessed by `usize`.
221        pub trait IndexMut {
222            /// Type mutably referencing an indexed element.
223            type IndexMut<'a> where Self: 'a;
224            fn get_mut(& mut self, index: usize) -> Self::IndexMut<'_>;
225            /// A reference to the last element, should one exist.
226            #[inline(always)] fn last_mut(&mut self) -> Option<Self::IndexMut<'_>> where Self: Len {
227                if self.is_empty() { None }
228                else { Some(self.get_mut(self.len()-1)) }
229            }
230        }
231
232        impl<T: IndexMut + ?Sized> IndexMut for &mut T {
233            type IndexMut<'a> = T::IndexMut<'a> where Self: 'a;
234            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
235                T::get_mut(*self, index)
236            }
237        }
238        impl<T> IndexMut for Vec<T> {
239            type IndexMut<'a> = &'a mut T where Self: 'a;
240            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
241        }
242        impl<T> IndexMut for [T] {
243            type IndexMut<'a> = &'a mut T where Self: 'a;
244            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
245        }
246
247        /// A type that can be accessed by `usize` but without borrowing `self`.
248        ///
249        /// This can be useful for types which include their own lifetimes, and
250        /// which wish to express that their reference has the same lifetime.
251        /// In the GAT `Index`, the `Ref<'_>` lifetime would be tied to `&self`.
252        ///
253        /// This trait may be challenging to implement for owning containers,
254        /// for example `Vec<_>`, which would need their `Ref` type to depend
255        /// on the lifetime of the `&self` borrow in the `get()` function.
256        pub trait Index {
257            /// The type returned by the `get` method.
258            ///
259            /// Notably, this does not vary with lifetime, and will not depend on the lifetime of `&self`.
260            type Ref;
261            fn get(&self, index: usize) -> Self::Ref;
262            #[inline(always)] fn last(&self) -> Option<Self::Ref> where Self: Len {
263                if self.is_empty() { None }
264                else { Some(self.get(self.len()-1)) }
265            }
266            /// Converts `&self` into an iterator.
267            ///
268            /// This has an awkward name to avoid collision with `iter()`, which may also be implemented.
269            #[inline(always)]
270            fn index_iter(&self) -> IterOwn<&Self> {
271                IterOwn {
272                    index: 0,
273                    slice: self,
274                }
275            }
276            /// Converts `self` into an iterator.
277            ///
278            /// This has an awkward name to avoid collision with `into_iter()`, which may also be implemented.
279            #[inline(always)]
280            fn into_index_iter(self) -> IterOwn<Self> where Self: Sized {
281                IterOwn {
282                    index: 0,
283                    slice: self,
284                }
285            }
286        }
287
288        // These implementations aim to reveal a longer lifetime, or to copy results to avoid a lifetime.
289        impl<'a, T> Index for &'a [T] {
290            type Ref = &'a T;
291            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
292        }
293        impl<T: Copy> Index for [T] {
294            type Ref = T;
295            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
296        }
297        impl<'a, T> Index for &'a Vec<T> {
298            type Ref = &'a T;
299            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
300        }
301        impl<T: Copy> Index for Vec<T> {
302            type Ref = T;
303            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
304        }
305
306
307        /// Types that can be converted into another type by copying.
308        ///
309        /// We use this trait to unify the ability of `T` and `&T` to be converted into `T`.
310        /// This is handy for copy types that we'd like to use, like `u8`, `u64` and `usize`.
311        pub trait CopyAs<T> : Copy {
312            fn copy_as(self) -> T;
313        }
314        impl<T: Copy> CopyAs<T> for &T {
315            #[inline(always)] fn copy_as(self) -> T { *self }
316        }
317        impl<T: Copy> CopyAs<T> for T {
318            #[inline(always)] fn copy_as(self) -> T { self }
319        }
320
321        pub trait IndexAs<T> {
322            fn index_as(&self, index: usize) -> T;
323            #[inline(always)] fn last(&self) -> Option<T> where Self: Len {
324                if self.is_empty() { None }
325                else { Some(self.index_as(self.len()-1)) }
326            }
327        }
328
329        impl<T: Index, S> IndexAs<S> for T where T::Ref: CopyAs<S> {
330            #[inline(always)] fn index_as(&self, index: usize) -> S { self.get(index).copy_as() }
331        }
332
333    }
334
335    use crate::Container;
336    use crate::common::index::CopyAs;
337    /// A composite trait which captures the ability `Push<&T>` and `Index<Ref = T>`.
338    ///
339    /// Implement `CopyAs<T>` for the reference type, and `Push<&'a T>` for the container.
340    pub trait PushIndexAs<T> : for<'a> Container<Ref<'a>: CopyAs<T>> + for<'a> Push<&'a T> { }
341    impl<T, C: for<'a> Container<Ref<'a>: CopyAs<T>> + for<'a> Push<&'a T>> PushIndexAs<T> for C { }
342
343    /// A type that can remove its contents and return to an empty state.
344    ///
345    /// Generally, this method does not release resources, and is used to make the container available for re-insertion.
346    pub trait Clear {
347        /// Clears `self`, without changing its capacity.
348        fn clear(&mut self);
349    }
350    // Vectors can be cleared.
351    impl<T> Clear for Vec<T> {
352        #[inline(always)] fn clear(&mut self) { self.clear() }
353    }
354    // Slice references can be cleared.
355    impl<T> Clear for &[T] {
356        #[inline(always)] fn clear(&mut self) { *self = &[]; }
357    }
358
359    pub trait HeapSize {
360        /// Active (len) and allocated (cap) heap sizes in bytes.
361        /// This should not include the size of `self` itself.
362        fn heap_size(&self) -> (usize, usize) { (0, 0) }
363    }
364
365    impl HeapSize for String {
366        fn heap_size(&self) -> (usize, usize) {
367            (self.len(), self.capacity())
368        }
369    }
370    impl<T: HeapSize> HeapSize for [T] {
371        fn heap_size(&self) -> (usize, usize) {
372            let mut l = std::mem::size_of_val(self);
373            let mut c = std::mem::size_of_val(self);
374            for item in self.iter() {
375                let (il, ic) = item.heap_size();
376                l += il;
377                c += ic;
378            }
379            (l, c)
380        }
381    }
382    impl<T: HeapSize> HeapSize for Vec<T> {
383        fn heap_size(&self) -> (usize, usize) {
384            let mut l = std::mem::size_of::<T>() * self.len();
385            let mut c = std::mem::size_of::<T>() * self.capacity();
386            for item in (self[..]).iter() {
387                let (il, ic) = item.heap_size();
388                l += il;
389                c += ic;
390            }
391            (l, c)
392        }
393    }
394
395    /// A struct representing a slice of a range of values.
396    ///
397    /// The lower and upper bounds should be meaningfully set on construction.
398    #[derive(Copy, Clone, Debug)]
399    pub struct Slice<S> {
400        pub lower: usize,
401        pub upper: usize,
402        pub slice: S,
403    }
404
405    impl<S> std::hash::Hash for Slice<S> where Self: Index<Ref: std::hash::Hash> {
406        fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
407            self.len().hash(state);
408            for i in 0 .. self.len() {
409                self.get(i).hash(state);
410            }
411        }
412    }
413
414    impl<S> Slice<S> {
415        pub fn slice<R: std::ops::RangeBounds<usize>>(self, range: R) -> Self {
416            use std::ops::Bound;
417            let lower = match range.start_bound() {
418                Bound::Included(s) => std::cmp::max(self.lower, *s),
419                Bound::Excluded(s) => std::cmp::max(self.lower, *s+1),
420                Bound::Unbounded => self.lower,
421            };
422            let upper = match range.end_bound() {
423                Bound::Included(s) => std::cmp::min(self.upper, *s+1),
424                Bound::Excluded(s) => std::cmp::min(self.upper, *s),
425                Bound::Unbounded => self.upper,
426            };
427            assert!(lower <= upper);
428            Self { lower, upper, slice: self.slice }
429        }
430        pub fn new(lower: u64, upper: u64, slice: S) -> Self {
431            let lower: usize = lower.try_into().expect("slice bounds must fit in `usize`");
432            let upper: usize = upper.try_into().expect("slice bounds must fit in `usize`");
433            Self { lower, upper, slice }
434        }
435        pub fn len(&self) -> usize { self.upper - self.lower }
436        /// Map the slice to another type.
437        pub(crate) fn map<T>(self, f: impl Fn(S) -> T) -> Slice<T> {
438            Slice {
439                lower: self.lower,
440                upper: self.upper,
441                slice: f(self.slice),
442            }
443        }
444    }
445
446    impl<S: Index> PartialEq for Slice<S> where S::Ref: PartialEq {
447        fn eq(&self, other: &Self) -> bool {
448            if self.len() != other.len() { return false; }
449            for i in 0 .. self.len() {
450                if self.get(i) != other.get(i) { return false; }
451            }
452            true
453        }
454    }
455    impl<S: Index> PartialEq<[S::Ref]> for Slice<S> where S::Ref: PartialEq {
456        fn eq(&self, other: &[S::Ref]) -> bool {
457            if self.len() != other.len() { return false; }
458            for i in 0 .. self.len() {
459                if self.get(i) != other[i] { return false; }
460            }
461            true
462        }
463    }
464    impl<S: Index> PartialEq<Vec<S::Ref>> for Slice<S> where S::Ref: PartialEq {
465        fn eq(&self, other: &Vec<S::Ref>) -> bool {
466            if self.len() != other.len() { return false; }
467            for i in 0 .. self.len() {
468                if self.get(i) != other[i] { return false; }
469            }
470            true
471        }
472    }
473
474    impl<S: Index> Eq for Slice<S> where S::Ref: Eq { }
475
476    impl<S: Index> PartialOrd for Slice<S> where S::Ref: PartialOrd {
477        fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
478            use std::cmp::Ordering;
479            let len = std::cmp::min(self.len(), other.len());
480
481            for i in 0 .. len {
482                match self.get(i).partial_cmp(&other.get(i)) {
483                    Some(Ordering::Equal) => (),
484                    not_equal => return not_equal,
485                }
486            }
487
488            self.len().partial_cmp(&other.len())
489        }
490    }
491
492    impl<S: Index> Ord for Slice<S> where S::Ref: Ord + Eq {
493        fn cmp(&self, other: &Self) -> std::cmp::Ordering {
494            use std::cmp::Ordering;
495            let len = std::cmp::min(self.len(), other.len());
496
497            for i in 0 .. len {
498                match self.get(i).cmp(&other.get(i)) {
499                    Ordering::Equal => (),
500                    not_equal => return not_equal,
501                }
502            }
503
504            self.len().cmp(&other.len())
505        }
506    }
507
508    impl<S> Len for Slice<S> {
509        #[inline(always)] fn len(&self) -> usize { self.len() }
510    }
511
512    impl<S: Index> Index for Slice<S> {
513        type Ref = S::Ref;
514        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
515            assert!(index < self.upper - self.lower);
516            self.slice.get(self.lower + index)
517        }
518    }
519    impl<'a, S> Index for &'a Slice<S>
520    where
521        &'a S : Index,
522    {
523        type Ref = <&'a S as Index>::Ref;
524        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
525            assert!(index < self.upper - self.lower);
526            (&self.slice).get(self.lower + index)
527        }
528    }
529
530    impl<S: IndexMut> IndexMut for Slice<S> {
531        type IndexMut<'a> = S::IndexMut<'a> where S: 'a;
532        #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
533            assert!(index < self.upper - self.lower);
534            self.slice.get_mut(self.lower + index)
535        }
536    }
537
538    impl<S: Index + Len> Slice<S> {
539        /// Converts the slice into an iterator.
540        ///
541        /// This method exists rather than an `IntoIterator` implementation to avoid
542        /// a conflicting implementation for pushing an `I: IntoIterator` into `Vecs`.
543        pub fn into_iter(self) -> IterOwn<Slice<S>> {
544            self.into_index_iter()
545        }
546    }
547
548    impl<'a, T> Slice<&'a [T]> {
549        pub fn as_slice(&self) -> &'a [T] {
550            &self.slice[self.lower .. self.upper]
551        }
552    }
553
554    pub struct IterOwn<S> {
555        index: usize,
556        slice: S,
557    }
558
559    impl<S> IterOwn<S> {
560        pub fn new(index: usize, slice: S) -> Self {
561            Self { index, slice }
562        }
563    }
564
565    impl<S: Index + Len> Iterator for IterOwn<S> {
566        type Item = S::Ref;
567        #[inline(always)] fn next(&mut self) -> Option<Self::Item> {
568            if self.index < self.slice.len() {
569                let result = self.slice.get(self.index);
570                self.index += 1;
571                Some(result)
572            } else {
573                None
574            }
575        }
576        #[inline(always)]
577        fn size_hint(&self) -> (usize, Option<usize>) {
578            (self.slice.len() - self.index, Some(self.slice.len() - self.index))
579        }
580    }
581
582    impl<S: Index + Len> ExactSizeIterator for IterOwn<S> { }
583
584    /// A type that can be viewed as byte slices with lifetime `'a`.
585    ///
586    /// Implementors of this trait almost certainly reference the lifetime `'a` themselves.
587    pub trait AsBytes<'a> {
588        /// Presents `self` as a sequence of byte slices, with their required alignment.
589        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])>;
590    }
591
592    /// A type that can be reconstituted from byte slices with lifetime `'a`.
593    ///
594    /// Implementors of this trait almost certainly reference the lifetime `'a` themselves,
595    /// unless they actively deserialize the bytes (vs sit on the slices, as if zero-copy).
596    pub trait FromBytes<'a> {
597        /// Reconstructs `self` from a sequence of correctly aligned and sized bytes slices.
598        ///
599        /// The implementation is expected to consume the right number of items from the iterator,
600        /// which may go on to be used by other implementations of `FromBytes`.
601        ///
602        /// The implementation should aim for only doing trivial work, as it backs calls like
603        /// `borrow` for serialized containers.
604        ///
605        /// Implementations should almost always be marked as `#[inline(always)]` to ensure that
606        /// they are inlined. A single non-inlined function on a tree of `from_bytes` calls
607        /// can cause the performance to drop significantly.
608        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self;
609    }
610
611}
612
613/// Logic related to the transformation to and from bytes.
614///
615/// The methods here line up with the `AsBytes` and `FromBytes` traits.
616pub mod bytes {
617
618    use crate::AsBytes;
619
620    /// A coupled encode/decode pair for byte sequences.
621    pub trait EncodeDecode {
622        /// Encoded length in number of `u64` words required.
623        fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a>;
624        /// Encoded length in number of `u8` bytes required.
625        ///
626        /// This method should always be eight times `Self::length_in_words`, and is provided for convenience and clarity.
627        fn length_in_bytes<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> { 8 * Self::length_in_words(bytes) }
628        /// Encodes `bytes` into a sequence of `u64`.
629        fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a>;
630        /// Writes `bytes` in the encoded format to an arbitrary writer.
631        fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a>;
632        /// Decodes bytes from a sequence of `u64`.
633        fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]>;
634    }
635
636    /// A sequential byte layout for `AsBytes` and `FromBytes` implementors.
637    ///
638    /// The layout is aligned like a sequence of `u64`, where we repeatedly announce a length,
639    /// and then follow it by that many bytes. We may need to follow this with padding bytes.
640    pub use serialization::Sequence;
641    mod serialization {
642
643        use crate::AsBytes;
644
645        /// Encodes and decodes bytes sequences, by prepending the length and appending the all sequences.
646        pub struct Sequence;
647        impl super::EncodeDecode for Sequence {
648            fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
649                // Each byte slice has one `u64` for the length, and then as many `u64`s as needed to hold all bytes.
650                bytes.as_bytes().map(|(_align, bytes)| 1 + bytes.len().div_ceil(8)).sum()
651            }
652            fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
653                encode(store, bytes.as_bytes())
654            }
655            fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
656                write(writer, bytes.as_bytes())
657            }
658            fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
659                decode(store)
660            }
661        }
662
663        /// Encodes a sequence of byte slices as their length followed by their bytes, aligned to 8 bytes.
664        ///
665        /// Each length will be exactly 8 bytes, and the bytes that follow are padded out to a multiple of 8 bytes.
666        /// When reading the data, the length is in bytes, and one should consume those bytes and advance over padding.
667        pub fn encode<'a>(store: &mut Vec<u64>, bytes: impl Iterator<Item=(u64, &'a [u8])>) {
668            for (align, bytes) in bytes {
669                assert!(align <= 8);
670                store.push(bytes.len() as u64);
671                let whole_words = 8 * (bytes.len() / 8);
672                // We want to extend `store` by `bytes`, but `bytes` may not be `u64` aligned.
673                // In the latter case, init `store` and cast and copy onto it as a byte slice.
674                if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
675                    store.extend_from_slice(words);
676                }
677                else {
678                    let store_len = store.len();
679                    store.resize(store_len + whole_words/8, 0);
680                    let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
681                    slice.copy_from_slice(&bytes[.. whole_words]);
682                }
683                let remaining_bytes = &bytes[whole_words..];
684                if !remaining_bytes.is_empty() {
685                    let mut remainder = 0u64;
686                    let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
687                    for (i, byte) in remaining_bytes.iter().enumerate() {
688                        transmute[i] = *byte;
689                    }
690                    store.push(remainder);
691                }
692            }
693        }
694
695        /// Writes a sequence of byte slices as their length followed by their bytes, padded to 8 bytes.
696        ///
697        /// Each length will be exactly 8 bytes, and the bytes that follow are padded out to a multiple of 8 bytes.
698        /// When reading the data, the length is in bytes, and one should consume those bytes and advance over padding.
699        pub fn write<'a>(mut writer: impl std::io::Write, bytes: impl Iterator<Item=(u64, &'a [u8])>) -> std::io::Result<()> {
700            // Columnar data is serialized as a sequence of `u64` values, with each `[u8]` slice
701            // serialize as first its length in bytes, and then as many `u64` values as needed.
702            // Padding should be added, but only for alignment; no specific values are required.
703            for (align, bytes) in bytes {
704                assert!(align <= 8);
705                let length = u64::try_from(bytes.len()).unwrap();
706                writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&length)))?;
707                writer.write_all(bytes)?;
708                let padding = usize::try_from((8 - (length % 8)) % 8).unwrap();
709                writer.write_all(&[0; 8][..padding])?;
710            }
711            Ok(())
712        }
713
714        /// Decodes a sequence of byte slices from their length followed by their bytes.
715        ///
716        /// This decoder matches the `encode` function above.
717        /// In particular, it anticipates padding bytes when the length is not a multiple of eight.
718        pub fn decode(store: &[u64]) -> Decoder<'_> {
719            Decoder { store }
720        }
721
722        /// An iterator over byte slices, decoding from a sequence of lengths followed by bytes.
723        pub struct Decoder<'a> {
724            store: &'a [u64],
725        }
726
727        impl<'a> Iterator for Decoder<'a> {
728            type Item = &'a [u8];
729            fn next(&mut self) -> Option<Self::Item> {
730                if let Some(length) = self.store.first() {
731                    let length = *length as usize;
732                    self.store = &self.store[1..];
733                    let whole_words = if length % 8 == 0 { length / 8 } else { length / 8 + 1 };
734                    let bytes: &[u8] = bytemuck::try_cast_slice(&self.store[..whole_words]).expect("&[u64] should convert to &[u8]");
735                    self.store = &self.store[whole_words..];
736                    Some(&bytes[..length])
737                } else {
738                    None
739                }
740            }
741        }
742    }
743
744    /// A binary encoding of sequences of byte slices.
745    ///
746    /// The encoding starts with a sequence of n+1 offsets describing where to find the n slices in the bytes that follow.
747    /// Treating the offsets as a byte slice too, the each offset indicates the location (in bytes) of the end of its slice.
748    /// Each byte slice can be found from a pair of adjacent offsets, where the first is rounded up to a multiple of eight.
749    pub use serialization_neu::Indexed;
750    pub mod serialization_neu {
751
752        use crate::AsBytes;
753
754        /// Encodes and decodes bytes sequences, using an index of offsets.
755        pub struct Indexed;
756        impl super::EncodeDecode for Indexed {
757            fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
758                1 + bytes.as_bytes().map(|(_align, bytes)| 1 + bytes.len().div_ceil(8)).sum::<usize>()
759            }
760            fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
761                encode(store, bytes)
762            }
763            fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
764                write(writer, bytes)
765            }
766            fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
767                decode(store)
768            }
769        }
770
771        /// Encodes `item` into `u64` aligned words.
772        ///
773        /// The sequence of byte slices are appended, with padding to have each slice start `u64` aligned.
774        /// The sequence is then pre-pended with as many byte offsets as there are slices in `item`, plus one.
775        /// The byte offsets indicate where each slice ends, and by rounding up to `u64` alignemnt where the next slice begins.
776        /// The first offset indicates where the list of offsets itself ends, and where the first slice begins.
777        ///
778        /// We will need to visit `as_bytes` three times to extract this information, so the method should be efficient and inlined.
779        /// The first read writes the first offset, the second writes each other offset, and the third writes the bytes themselves.
780        ///
781        /// The offsets are zero-based, rather than based on `store.len()`.
782        /// If you call the method with a non-empty `store` be careful decoding.
783        pub fn encode<'a, A>(store: &mut Vec<u64>, iter: &A)
784        where A : AsBytes<'a>,
785        {
786            // Read 1: Number of offsets we will record, equal to the number of slices plus one.
787            // TODO: right-size `store` before first call to `push`.
788            let offsets = 1 + iter.as_bytes().count();
789            let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
790            store.push(offsets_end);
791            // Read 2: Establish each of the offsets based on lengths of byte slices.
792            let mut position_bytes = offsets_end;
793            for (align, bytes) in iter.as_bytes() {
794                assert!(align <= 8);
795                // Write length in bytes, but round up to words before updating `position_bytes`.
796                let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
797                store.push(to_push);
798                let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
799                position_bytes += round_len;
800            }
801            // Read 3: Append each byte slice, with padding to align starts to `u64`.
802            for (_align, bytes) in iter.as_bytes() {
803                let whole_words = 8 * (bytes.len() / 8);
804                // We want to extend `store` by `bytes`, but `bytes` may not be `u64` aligned.
805                // In the latter case, init `store` and cast and copy onto it as a byte slice.
806                if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
807                    store.extend_from_slice(words);
808                }
809                else {
810                    let store_len = store.len();
811                    store.resize(store_len + whole_words/8, 0);
812                    let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
813                    slice.copy_from_slice(&bytes[.. whole_words]);
814                }
815                let remaining_bytes = &bytes[whole_words..];
816                if !remaining_bytes.is_empty() {
817                    let mut remainder = 0u64;
818                    let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
819                    for (i, byte) in remaining_bytes.iter().enumerate() {
820                        transmute[i] = *byte;
821                    }
822                    store.push(remainder);
823                }
824            }
825        }
826
827        pub fn write<'a, A, W>(mut writer: W, iter: &A) -> std::io::Result<()>
828        where
829            A: AsBytes<'a>,
830            W: std::io::Write,
831        {
832            // Read 1: Number of offsets we will record, equal to the number of slices plus one.
833            let offsets = 1 + iter.as_bytes().count();
834            let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
835            writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&offsets_end)))?;
836            // Read 2: Establish each of the offsets based on lengths of byte slices.
837            let mut position_bytes = offsets_end;
838            for (align, bytes) in iter.as_bytes() {
839                assert!(align <= 8);
840                // Write length in bytes, but round up to words before updating `position_bytes`.
841                let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
842                writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&to_push)))?;
843                let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
844                position_bytes += round_len;
845            }
846            // Read 3: Append each byte slice, with padding to align starts to `u64`.
847            for (_align, bytes) in iter.as_bytes() {
848                writer.write_all(bytes)?;
849                let padding = ((bytes.len() + 7) & !7) - bytes.len();
850                if padding > 0 {
851                    writer.write_all(&[0u8;8][..padding])?;
852                }
853            }
854
855            Ok(())
856        }
857
858        /// Decodes an encoded sequence of byte slices. Each result will be `u64` aligned.
859        pub fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
860            assert!(store[0] % 8 == 0);
861            let slices = (store[0] / 8) - 1;
862            (0 .. slices).map(|i| decode_index(store, i))
863        }
864
865        /// Decodes a specific byte slice by index. It will be `u64` aligned.
866        #[inline(always)]
867        pub fn decode_index(store: &[u64], index: u64) -> &[u8] {
868            debug_assert!(index + 1 < store[0]/8);
869            let index: usize = index.try_into().unwrap();
870            let lower: usize = ((store[index] + 7) & !7).try_into().unwrap();
871            let upper: usize = (store[index + 1]).try_into().unwrap();
872            let bytes: &[u8] = bytemuck::try_cast_slice(store).expect("&[u64] should convert to &[u8]");
873            &bytes[lower .. upper]
874        }
875
876        #[cfg(test)]
877        mod test {
878
879            use crate::{Container, ContainerOf};
880            use crate::common::Push;
881            use crate::AsBytes;
882
883            use super::{encode, decode};
884
885            fn assert_roundtrip<'a, AB: AsBytes<'a>>(item: &AB) {
886                let mut store = Vec::new();
887                encode(&mut store, item);
888                assert!(item.as_bytes().map(|x| x.1).eq(decode(&store)));
889            }
890
891            #[test]
892            fn round_trip() {
893
894                let mut column: ContainerOf<Result<u64, String>> = Default::default();
895                for i in 0..10000u64 {
896                    column.push(&Ok::<u64, String>(i));
897                    column.push(&Err::<u64, String>(format!("{:?}", i)));
898                }
899
900                assert_roundtrip(&column.borrow());
901            }
902        }
903    }
904
905    #[cfg(test)]
906    mod test {
907        use crate::ContainerOf;
908
909        #[test]
910        fn round_trip() {
911
912            use crate::common::{Push, HeapSize, Len, Index};
913            use crate::{Container, AsBytes, FromBytes};
914
915            let mut column: ContainerOf<Result<u64, u64>> = Default::default();
916            for i in 0..100u64 {
917                column.push(Ok::<u64, u64>(i));
918                column.push(Err::<u64, u64>(i));
919            }
920
921            assert_eq!(column.len(), 200);
922            assert_eq!(column.heap_size(), (1624, 2080));
923
924            for i in 0..100 {
925                assert_eq!(column.get(2*i+0), Ok(i as u64));
926                assert_eq!(column.get(2*i+1), Err(i as u64));
927            }
928
929            let column2 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column.borrow().as_bytes().map(|(_, bytes)| bytes));
930            for i in 0..100 {
931                assert_eq!(column.get(2*i+0), column2.get(2*i+0).copied().map_err(|e| *e));
932                assert_eq!(column.get(2*i+1), column2.get(2*i+1).copied().map_err(|e| *e));
933            }
934
935            let column3 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column2.as_bytes().map(|(_, bytes)| bytes));
936            for i in 0..100 {
937                assert_eq!(column3.get(2*i+0), column2.get(2*i+0));
938                assert_eq!(column3.get(2*i+1), column2.get(2*i+1));
939            }
940        }
941    }
942}
943
944/// Types that prefer to be represented by `Vec<T>`.
945pub mod primitive {
946    use std::num::Wrapping;
947
948    /// An implementation of opinions for types that want to use `Vec<T>`.
949    macro_rules! implement_columnable {
950        ($($index_type:ty),*) => { $(
951            impl crate::Columnar for $index_type {
952                #[inline(always)]
953                fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { *other }
954
955                type Container = Vec<$index_type>;
956            }
957
958            impl crate::HeapSize for $index_type { }
959
960            impl<'a> crate::AsBytes<'a> for &'a [$index_type] {
961                #[inline(always)]
962                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
963                    std::iter::once((std::mem::align_of::<$index_type>() as u64, bytemuck::cast_slice(&self[..])))
964                }
965            }
966            impl<'a> crate::FromBytes<'a> for &'a [$index_type] {
967                #[inline(always)]
968                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
969                    // We use `unwrap()` here in order to panic with the `bytemuck` error, which may be informative.
970                    bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()
971                }
972            }
973            impl<'a, const N: usize> crate::AsBytes<'a> for &'a [[$index_type; N]] {
974                #[inline(always)]
975                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
976                    std::iter::once((std::mem::align_of::<$index_type>() as u64, bytemuck::cast_slice(&self[..])))
977                }
978            }
979            impl<'a, const N: usize> crate::FromBytes<'a> for &'a [[$index_type; N]] {
980                #[inline(always)]
981                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
982                    // We use `unwrap()` here in order to panic with the `bytemuck` error, which may be informative.
983                    bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()
984                }
985            }
986        )* }
987    }
988
989    implement_columnable!(u8, u16, u32, u64);
990    implement_columnable!(i8, i16, i32, i64);
991    implement_columnable!(f32, f64);
992    implement_columnable!(Wrapping<u8>, Wrapping<u16>, Wrapping<u32>, Wrapping<u64>);
993    implement_columnable!(Wrapping<i8>, Wrapping<i16>, Wrapping<i32>, Wrapping<i64>);
994
995    pub use sizes::{Usizes, Isizes};
996    /// Columnar stores for `usize` and `isize`, stored as 64 bits.
997    mod sizes {
998
999        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize};
1000        use crate::common::PushIndexAs;
1001
1002        #[derive(Copy, Clone, Default)]
1003        pub struct Usizes<CV = Vec<u64>> { pub values: CV }
1004
1005        impl Columnar for usize {
1006            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1007            type Container = Usizes;
1008        }
1009
1010        impl<CV: PushIndexAs<u64>> Container for Usizes<CV> {
1011            type Ref<'a> = usize;
1012            type Borrowed<'a> = Usizes<CV::Borrowed<'a>> where CV: 'a;
1013            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1014                Usizes { values: self.values.borrow() }
1015            }
1016            #[inline(always)]
1017            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1018                Usizes { values: CV::reborrow(thing.values) }
1019            }
1020            #[inline(always)]
1021            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1022
1023            #[inline(always)]
1024            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1025                self.values.extend_from_self(other.values, range)
1026            }
1027
1028            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1029                self.values.reserve_for(selves.map(|x| x.values))
1030            }
1031        }
1032
1033        impl<CV: Len> Len for Usizes<CV> { fn len(&self) -> usize { self.values.len() }}
1034        impl IndexMut for Usizes {
1035            type IndexMut<'a> = &'a mut u64;
1036            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
1037        }
1038        impl<CV: IndexAs<u64>> Index for Usizes<CV> {
1039            type Ref = usize;
1040            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
1041        }
1042        impl<CV: IndexAs<u64>> Index for &Usizes<CV> {
1043            type Ref = usize;
1044            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
1045        }
1046        impl<CV: for<'a> Push<&'a u64>> Push<usize> for Usizes<CV> {
1047            #[inline]
1048            fn push(&mut self, item: usize) { self.values.push(&item.try_into().expect("usize must fit in a u64")) }
1049        }
1050        impl Push<&usize> for Usizes {
1051            #[inline]
1052            fn push(&mut self, item: &usize) { self.values.push((*item).try_into().expect("usize must fit in a u64")) }
1053        }
1054        impl<CV: Clear> Clear for Usizes<CV> { fn clear(&mut self) { self.values.clear() }}
1055
1056        impl<CV: HeapSize> HeapSize for Usizes<CV> {
1057            fn heap_size(&self) -> (usize, usize) {
1058                self.values.heap_size()
1059            }
1060        }
1061
1062        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Usizes<CV> {
1063            #[inline(always)]
1064            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1065                self.values.as_bytes()
1066            }
1067        }
1068
1069        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Usizes<CV> {
1070            #[inline(always)]
1071            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1072                Self { values: CV::from_bytes(bytes) }
1073            }
1074        }
1075
1076
1077        #[derive(Copy, Clone, Default)]
1078        pub struct Isizes<CV = Vec<i64>> { pub values: CV }
1079
1080        impl Columnar for isize {
1081            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1082            type Container = Isizes;
1083        }
1084
1085        impl<CV: PushIndexAs<i64>> Container for Isizes<CV> {
1086            type Ref<'a> = isize;
1087            type Borrowed<'a> = Isizes<CV::Borrowed<'a>> where CV: 'a;
1088            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1089                Isizes { values: self.values.borrow() }
1090            }
1091            #[inline(always)]
1092            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1093                Isizes { values: CV::reborrow(thing.values) }
1094            }
1095            #[inline(always)]
1096            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1097
1098            #[inline(always)]
1099            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1100                self.values.extend_from_self(other.values, range)
1101            }
1102
1103            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1104                self.values.reserve_for(selves.map(|x| x.values))
1105            }
1106        }
1107
1108        impl<CV: Len> Len for Isizes<CV> { fn len(&self) -> usize { self.values.len() }}
1109        impl IndexMut for Isizes {
1110            type IndexMut<'a> = &'a mut i64;
1111            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
1112        }
1113        impl<CV: IndexAs<i64>> Index for Isizes<CV> {
1114            type Ref = isize;
1115            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
1116        }
1117        impl<CV: IndexAs<i64>> Index for &Isizes<CV> {
1118            type Ref = isize;
1119            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
1120        }
1121        impl<CV: for<'a> Push<&'a i64>> Push<isize> for Isizes<CV> {
1122            #[inline]
1123            fn push(&mut self, item: isize) { self.values.push(&item.try_into().expect("isize must fit in a i64")) }
1124        }
1125        impl Push<&isize> for Isizes {
1126            #[inline]
1127            fn push(&mut self, item: &isize) { self.values.push((*item).try_into().expect("isize must fit in a i64")) }
1128        }
1129        impl<CV: Clear> Clear for Isizes<CV> { fn clear(&mut self) { self.values.clear() }}
1130
1131        impl<CV: HeapSize> HeapSize for Isizes<CV> {
1132            fn heap_size(&self) -> (usize, usize) {
1133                self.values.heap_size()
1134            }
1135        }
1136
1137        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Isizes<CV> {
1138            #[inline(always)]
1139            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1140                self.values.as_bytes()
1141            }
1142        }
1143
1144        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Isizes<CV> {
1145            #[inline(always)]
1146            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1147                Self { values: CV::from_bytes(bytes) }
1148            }
1149        }
1150    }
1151
1152    pub use chars::{Chars};
1153    /// Columnar store for `char`, stored as a `u32`.
1154    mod chars {
1155
1156        use crate::{Clear, Columnar, Container, Len, Index, IndexAs, Push, HeapSize};
1157        use crate::common::PushIndexAs;
1158
1159        type Encoded = u32;
1160
1161        #[derive(Copy, Clone, Default)]
1162        pub struct Chars<CV = Vec<Encoded>> { pub values: CV }
1163
1164        impl Columnar for char {
1165            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1166            type Container = Chars;
1167        }
1168
1169        impl<CV: PushIndexAs<Encoded>> Container for Chars<CV> {
1170            type Ref<'a> = char;
1171            type Borrowed<'a> = Chars<CV::Borrowed<'a>> where CV: 'a;
1172            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1173                Chars { values: self.values.borrow() }
1174            }
1175            #[inline(always)]
1176            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1177                Chars { values: CV::reborrow(thing.values) }
1178            }
1179            #[inline(always)]
1180            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1181
1182            #[inline(always)]
1183            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1184                self.values.extend_from_self(other.values, range)
1185            }
1186
1187            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1188                self.values.reserve_for(selves.map(|x| x.values))
1189            }
1190        }
1191
1192        impl<CV: Len> Len for Chars<CV> { fn len(&self) -> usize { self.values.len() }}
1193        impl<CV: IndexAs<Encoded>> Index for Chars<CV> {
1194            type Ref = char;
1195            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { char::from_u32(self.values.index_as(index)).unwrap() }
1196        }
1197        impl<CV: IndexAs<Encoded>> Index for &Chars<CV> {
1198            type Ref = char;
1199            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { char::from_u32(self.values.index_as(index)).unwrap() }
1200        }
1201        impl<CV: for<'a> Push<&'a Encoded>> Push<char> for Chars<CV> {
1202            #[inline]
1203            fn push(&mut self, item: char) { self.values.push(&u32::from(item)) }
1204        }
1205        impl Push<&char> for Chars {
1206            #[inline]
1207            fn push(&mut self, item: &char) { self.values.push(u32::from(*item)) }
1208        }
1209        impl<CV: Clear> Clear for Chars<CV> { fn clear(&mut self) { self.values.clear() }}
1210
1211        impl<CV: HeapSize> HeapSize for Chars<CV> {
1212            fn heap_size(&self) -> (usize, usize) {
1213                self.values.heap_size()
1214            }
1215        }
1216
1217        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for Chars<CV> {
1218            #[inline(always)]
1219            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1220                self.values.as_bytes()
1221            }
1222        }
1223
1224        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for Chars<CV> {
1225            #[inline(always)]
1226            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1227                Self { values: CV::from_bytes(bytes) }
1228            }
1229        }
1230    }
1231
1232    pub use larges::{U128s, I128s};
1233    /// Columnar stores for `u128` and `i128`, stored as [u8; 16] bits.
1234    mod larges {
1235
1236        use crate::{Clear, Columnar, Container, Len, Index, IndexAs, Push, HeapSize};
1237        use crate::common::PushIndexAs;
1238
1239        type Encoded = [u8; 16];
1240
1241        #[derive(Copy, Clone, Default)]
1242        pub struct U128s<CV = Vec<Encoded>> { pub values: CV }
1243
1244        impl Columnar for u128 {
1245            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1246            type Container = U128s;
1247        }
1248
1249        impl<CV: PushIndexAs<Encoded>> Container for U128s<CV> {
1250            type Ref<'a> = u128;
1251            type Borrowed<'a> = U128s<CV::Borrowed<'a>> where CV: 'a;
1252            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1253                U128s { values: self.values.borrow() }
1254            }
1255            #[inline(always)]
1256            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1257                U128s { values: CV::reborrow(thing.values) }
1258            }
1259            #[inline(always)]
1260            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1261
1262            #[inline(always)]
1263            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1264                self.values.extend_from_self(other.values, range)
1265            }
1266
1267            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1268                self.values.reserve_for(selves.map(|x| x.values))
1269            }
1270        }
1271
1272        impl<CV: Len> Len for U128s<CV> { fn len(&self) -> usize { self.values.len() }}
1273        impl<CV: IndexAs<Encoded>> Index for U128s<CV> {
1274            type Ref = u128;
1275            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { u128::from_le_bytes(self.values.index_as(index)) }
1276        }
1277        impl<CV: IndexAs<Encoded>> Index for &U128s<CV> {
1278            type Ref = u128;
1279            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { u128::from_le_bytes(self.values.index_as(index)) }
1280        }
1281        impl<CV: for<'a> Push<&'a Encoded>> Push<u128> for U128s<CV> {
1282            #[inline]
1283            fn push(&mut self, item: u128) { self.values.push(&item.to_le_bytes()) }
1284        }
1285        impl Push<&u128> for U128s {
1286            #[inline]
1287            fn push(&mut self, item: &u128) { self.values.push(item.to_le_bytes()) }
1288        }
1289        impl<CV: Clear> Clear for U128s<CV> { fn clear(&mut self) { self.values.clear() }}
1290
1291        impl<CV: HeapSize> HeapSize for U128s<CV> {
1292            fn heap_size(&self) -> (usize, usize) {
1293                self.values.heap_size()
1294            }
1295        }
1296
1297        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for U128s<CV> {
1298            #[inline(always)]
1299            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1300                self.values.as_bytes()
1301            }
1302        }
1303
1304        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for U128s<CV> {
1305            #[inline(always)]
1306            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1307                Self { values: CV::from_bytes(bytes) }
1308            }
1309        }
1310
1311        #[derive(Copy, Clone, Default)]
1312        pub struct I128s<CV = Vec<Encoded>> { pub values: CV }
1313
1314        impl Columnar for i128 {
1315            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1316            type Container = I128s;
1317        }
1318
1319        impl<CV: PushIndexAs<Encoded>> Container for I128s<CV> {
1320            type Ref<'a> = i128;
1321            type Borrowed<'a> = I128s<CV::Borrowed<'a>> where CV: 'a;
1322            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1323                I128s { values: self.values.borrow() }
1324            }
1325            #[inline(always)]
1326            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1327                I128s { values: CV::reborrow(thing.values) }
1328            }
1329            #[inline(always)]
1330            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1331
1332            #[inline(always)]
1333            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1334                self.values.extend_from_self(other.values, range)
1335            }
1336
1337            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1338                self.values.reserve_for(selves.map(|x| x.values))
1339            }
1340        }
1341
1342        impl<CV: Len> Len for I128s<CV> { fn len(&self) -> usize { self.values.len() }}
1343        impl<CV: IndexAs<Encoded>> Index for I128s<CV> {
1344            type Ref = i128;
1345            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { i128::from_le_bytes(self.values.index_as(index)) }
1346        }
1347        impl<CV: IndexAs<Encoded>> Index for &I128s<CV> {
1348            type Ref = i128;
1349            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { i128::from_le_bytes(self.values.index_as(index)) }
1350        }
1351        impl<CV: for<'a> Push<&'a Encoded>> Push<i128> for I128s<CV> {
1352            #[inline]
1353            fn push(&mut self, item: i128) { self.values.push(&item.to_le_bytes()) }
1354        }
1355        impl Push<&i128> for I128s {
1356            #[inline]
1357            fn push(&mut self, item: &i128) { self.values.push(item.to_le_bytes()) }
1358        }
1359        impl<CV: Clear> Clear for I128s<CV> { fn clear(&mut self) { self.values.clear() }}
1360
1361        impl<CV: HeapSize> HeapSize for I128s<CV> {
1362            fn heap_size(&self) -> (usize, usize) {
1363                self.values.heap_size()
1364            }
1365        }
1366
1367        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for I128s<CV> {
1368            #[inline(always)]
1369            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1370                self.values.as_bytes()
1371            }
1372        }
1373
1374        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for I128s<CV> {
1375            #[inline(always)]
1376            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1377                Self { values: CV::from_bytes(bytes) }
1378            }
1379        }
1380    }
1381
1382    /// Columnar stores for non-decreasing `u64`, stored in various ways.
1383    ///
1384    /// The venerable `Vec<u64>` works as a general container for arbitrary offests,
1385    /// but it can be non-optimal for various patterns of offset, including constant
1386    /// inter-offset spacing, and relatively short runs (compared to a `RankSelect`).
1387    pub mod offsets {
1388
1389        pub use array::Fixeds;
1390        pub use stride::Strides;
1391
1392        /// An offset container that encodes a constant spacing in its type.
1393        ///
1394        /// Any attempt to push any value will result in pushing the next value
1395        /// at the specified spacing. This type is only appropriate in certain
1396        /// contexts, for example when storing `[T; K]` array types, or having
1397        /// introspected a `Strides` and found it to be only one constant stride.
1398        mod array {
1399
1400            use crate::{Container, Index, Len, Push};
1401            use crate::common::index::CopyAs;
1402
1403            /// An offset container that encodes a constant `K` spacing.
1404            #[derive(Copy, Clone, Debug, Default)]
1405            pub struct Fixeds<const K: u64, CC = u64> { pub count: CC }
1406
1407            impl<const K: u64> Container for Fixeds<K> {
1408                type Ref<'a> = u64;
1409                type Borrowed<'a> = Fixeds<K, &'a u64>;
1410                #[inline(always)]
1411                fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Fixeds { count: &self.count } }
1412                #[inline(always)]
1413                fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1414                    Fixeds { count: thing.count }
1415                }
1416                #[inline(always)]
1417                fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1418
1419                #[inline(always)]
1420                fn extend_from_self(&mut self, _other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1421                    self.count += range.len() as u64;
1422                }
1423
1424                fn reserve_for<'a, I>(&mut self, _selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone { }
1425            }
1426
1427            impl<const K: u64, CC: CopyAs<u64>> Len for Fixeds<K, CC> {
1428                #[inline(always)] fn len(&self) -> usize { self.count.copy_as() as usize }
1429            }
1430
1431            impl<const K: u64, CC> Index for Fixeds<K, CC> {
1432                type Ref = u64;
1433                #[inline(always)]
1434                fn get(&self, index: usize) -> Self::Ref { (index as u64 + 1) * K }
1435            }
1436            impl<'a, const K: u64, CC> Index for &'a Fixeds<K, CC> {
1437                type Ref = u64;
1438                #[inline(always)]
1439                fn get(&self, index: usize) -> Self::Ref { (index as u64 + 1) * K }
1440            }
1441
1442            impl<'a, const K: u64, T> Push<T> for Fixeds<K> {
1443                // TODO: check for overflow?
1444                #[inline(always)]
1445                fn push(&mut self, _item: T) { self.count += 1; }
1446                #[inline(always)]
1447                fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
1448                    self.count += iter.into_iter().count() as u64;
1449                }
1450            }
1451
1452            impl<const K: u64> crate::HeapSize for Fixeds<K> {
1453                #[inline(always)]
1454                fn heap_size(&self) -> (usize, usize) { (0, 0) }
1455            }
1456            impl<const K: u64> crate::Clear for Fixeds<K> {
1457                #[inline(always)]
1458                fn clear(&mut self) { self.count = 0; }
1459            }
1460
1461            impl<'a, const K: u64> crate::AsBytes<'a> for Fixeds<K, &'a u64> {
1462                #[inline(always)]
1463                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1464                    std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.count))))
1465                }
1466            }
1467            impl<'a, const K: u64> crate::FromBytes<'a> for Fixeds<K, &'a u64> {
1468                #[inline(always)]
1469                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1470                    Self { count: &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0] }
1471                }
1472            }
1473
1474            use super::Strides;
1475            impl<const K: u64, BC: Len, CC: CopyAs<u64>> std::convert::TryFrom<Strides<BC, CC>> for Fixeds<K, CC> {
1476                // On error we return the original.
1477                type Error = Strides<BC, CC>;
1478                fn try_from(item: Strides<BC, CC>) -> Result<Self, Self::Error> {
1479                    if item.strided() == Some(K) { Ok( Self { count: item.length } ) } else { Err(item) }
1480                }
1481            }
1482        }
1483
1484        /// An general offset container optimized for fixed inter-offset sizes.
1485        ///
1486        /// Although it can handle general offsets, it starts with the optimistic
1487        /// assumption that the offsets will be evenly spaced from zero, and while
1488        /// that holds it will maintain the stride and length. Should it stop being
1489        /// true, when a non-confirming offset is pushed, it will start to store
1490        /// the offsets in a general container.
1491        mod stride {
1492
1493            use std::ops::Deref;
1494            use crate::{Container, Index, Len, Push, Clear, AsBytes, FromBytes};
1495            use crate::common::index::CopyAs;
1496
1497            /// The first two integers describe a stride pattern, [stride, length].
1498            ///
1499            /// If the length is zero the collection is empty. The first `item` pushed
1500            /// always becomes the first list element. The next element is the number of
1501            /// items at position `i` whose value is `item * (i+1)`. After this comes
1502            /// the remaining entries in the bounds container.
1503            #[derive(Copy, Clone, Debug, Default)]
1504            pub struct Strides<BC = Vec<u64>, CC = u64> {
1505                pub stride: CC,
1506                pub length: CC,
1507                pub bounds: BC,
1508            }
1509
1510            impl Container for Strides {
1511                type Ref<'a> = u64;
1512                type Borrowed<'a> = Strides<&'a [u64], &'a u64>;
1513
1514                #[inline(always)] fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Strides { stride: &self.stride, length: &self.length, bounds: &self.bounds[..] } }
1515                /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
1516                #[inline(always)] fn reborrow<'b, 'a: 'b>(item: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1517                    Strides { stride: item.stride, length: item.length, bounds: item.bounds }
1518                }
1519                /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
1520                 #[inline(always)]fn reborrow_ref<'b, 'a: 'b>(item: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { item }
1521
1522                fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1523                    self.bounds.reserve_for(selves.map(|x| x.bounds))
1524                }
1525            }
1526
1527            impl<'a> Push<&'a u64> for Strides { #[inline(always)] fn push(&mut self, item: &'a u64) { self.push(*item) } }
1528            impl Push<u64> for Strides { #[inline(always)] fn push(&mut self, item: u64) { self.push(item) } }
1529            impl Clear for Strides { #[inline(always)] fn clear(&mut self) { self.clear() } }
1530
1531            impl<BC: Len, CC: CopyAs<u64>> Len for Strides<BC, CC> {
1532                #[inline(always)]
1533                fn len(&self) -> usize { self.length.copy_as() as usize + self.bounds.len() }
1534            }
1535            impl Index for Strides<&[u64], &u64> {
1536                type Ref = u64;
1537                #[inline(always)]
1538                fn get(&self, index: usize) -> Self::Ref {
1539                    let index = index as u64;
1540                    if index < *self.length { (index+1) * self.stride } else { self.bounds[(index - self.length) as usize] }
1541                }
1542            }
1543
1544            impl<'a, BC: AsBytes<'a>> AsBytes<'a> for Strides<BC, &'a u64> {
1545                #[inline(always)]
1546                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1547                    let stride = std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.stride))));
1548                    let length = std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.length))));
1549                    let bounds = self.bounds.as_bytes();
1550                    crate::chain(stride, crate::chain(length, bounds))
1551                }
1552            }
1553            impl<'a, BC: FromBytes<'a>> FromBytes<'a> for Strides<BC, &'a u64> {
1554                #[inline(always)]
1555                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1556                    let stride = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1557                    let length = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1558                    let bounds = BC::from_bytes(bytes);
1559                    Self { stride, length, bounds }
1560                }
1561            }
1562
1563            impl Strides {
1564                pub fn new(stride: u64, length: u64) -> Self {
1565                    Self { stride, length, bounds: Vec::default() }
1566                }
1567                #[inline(always)]
1568                pub fn push(&mut self, item: u64) {
1569                    if self.length == 0 {
1570                        self.stride = item;
1571                        self.length = 1;
1572                    }
1573                    else if !self.bounds.is_empty() {
1574                        self.bounds.push(item);
1575                    }
1576                    else if item == self.stride * (self.length + 1) {
1577                        self.length += 1;
1578                    }
1579                    else {
1580                        self.bounds.push(item);
1581                    }
1582                }
1583                #[inline(always)]
1584                pub fn clear(&mut self) {
1585                    self.stride = 0;
1586                    self.length = 0;
1587                    self.bounds.clear();
1588                }
1589            }
1590
1591            impl<BC: Deref<Target=[u64]>, CC: CopyAs<u64>> Strides<BC, CC> {
1592                #[inline(always)]
1593                pub fn bounds(&self, index: usize) -> (usize, usize) {
1594                    let stride = self.stride.copy_as();
1595                    let length = self.length.copy_as();
1596                    let index = index as u64;
1597                    let lower = if index == 0 { 0 } else {
1598                        let index = index - 1;
1599                        if index < length { (index+1) * stride } else { self.bounds[(index - length) as usize] }
1600                    } as usize;
1601                    let upper = if index < length { (index+1) * stride } else { self.bounds[(index - length) as usize] } as usize;
1602                    (lower, upper)
1603                }
1604            }
1605            impl<BC: Len, CC: CopyAs<u64>> Strides<BC, CC> {
1606                #[inline(always)] pub fn strided(&self) -> Option<u64> {
1607                    if self.bounds.is_empty() {
1608                        Some(self.stride.copy_as())
1609                    }
1610                    else { None }
1611                }
1612            }
1613        }
1614
1615        #[cfg(test)]
1616        mod test {
1617            #[test]
1618            fn round_trip() {
1619
1620                use crate::common::{Index, Push, Len};
1621                use crate::{Container, Vecs};
1622                use crate::primitive::offsets::{Strides, Fixeds};
1623
1624                let mut cols = Vecs::<Vec::<i32>, Strides>::default();
1625                for i in 0 .. 100 {
1626                    cols.push(&[1i32, 2, i]);
1627                }
1628
1629                let cols = Vecs {
1630                    bounds: TryInto::<Fixeds<3>>::try_into(cols.bounds).unwrap(),
1631                    values: cols.values,
1632                };
1633
1634                assert_eq!(cols.borrow().len(), 100);
1635                for i in 0 .. 100 {
1636                    assert_eq!(cols.borrow().get(i).len(), 3);
1637                }
1638
1639                let mut cols = Vecs {
1640                    bounds: Strides::new(3, cols.bounds.count),
1641                    values: cols.values
1642                };
1643
1644                cols.push(&[0, 0]);
1645                assert!(TryInto::<Fixeds<3>>::try_into(cols.bounds).is_err());
1646            }
1647        }
1648    }
1649
1650    pub use empty::Empties;
1651    /// A columnar store for `()`.
1652    mod empty {
1653
1654        use crate::common::index::CopyAs;
1655        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, Push, HeapSize};
1656
1657        #[derive(Copy, Clone, Debug, Default)]
1658        pub struct Empties<CC = u64> { pub count: CC, pub empty: () }
1659
1660        impl Columnar for () {
1661            #[inline(always)]
1662            fn into_owned<'a>(_other: crate::Ref<'a, Self>) -> Self { }
1663            type Container = Empties;
1664        }
1665
1666        impl Container for Empties {
1667            type Ref<'a> = ();
1668            type Borrowed<'a> = Empties<&'a u64>;
1669            #[inline(always)]
1670            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Empties { count: &self.count, empty: () } }
1671            #[inline(always)]
1672            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1673                Empties { count: thing.count, empty: () }
1674            }
1675            #[inline(always)]
1676            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1677
1678            #[inline(always)]
1679            fn extend_from_self(&mut self, _other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1680                self.count += range.len() as u64;
1681            }
1682
1683            fn reserve_for<'a, I>(&mut self, _selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone { }
1684        }
1685
1686        impl<CC: CopyAs<u64>> Len for Empties<CC> {
1687            #[inline(always)] fn len(&self) -> usize { self.count.copy_as() as usize }
1688        }
1689        impl<CC> IndexMut for Empties<CC> {
1690            type IndexMut<'a> = &'a mut () where CC: 'a;
1691            // TODO: panic if out of bounds?
1692            #[inline(always)] fn get_mut(&mut self, _index: usize) -> Self::IndexMut<'_> { &mut self.empty }
1693        }
1694        impl<CC> Index for Empties<CC> {
1695            type Ref = ();
1696            #[inline(always)]
1697            fn get(&self, _index: usize) -> Self::Ref { }
1698        }
1699        impl<'a, CC> Index for &'a Empties<CC> {
1700            type Ref = &'a ();
1701            #[inline(always)]
1702            fn get(&self, _index: usize) -> Self::Ref { &() }
1703        }
1704        impl Push<()> for Empties {
1705            // TODO: check for overflow?
1706            #[inline(always)]
1707            fn push(&mut self, _item: ()) { self.count += 1; }
1708            #[inline(always)]
1709            fn extend(&mut self, iter: impl IntoIterator<Item=()>) {
1710                self.count += iter.into_iter().count() as u64;
1711            }
1712        }
1713        impl<'a> Push<&'a ()> for Empties {
1714            // TODO: check for overflow?
1715            #[inline(always)]
1716            fn push(&mut self, _item: &()) { self.count += 1; }
1717            #[inline(always)]
1718            fn extend(&mut self, iter: impl IntoIterator<Item=&'a ()>) {
1719                self.count += iter.into_iter().count() as u64;
1720            }
1721        }
1722
1723        impl HeapSize for Empties {
1724            #[inline(always)]
1725            fn heap_size(&self) -> (usize, usize) { (0, 0) }
1726        }
1727        impl Clear for Empties {
1728            #[inline(always)]
1729            fn clear(&mut self) { self.count = 0; }
1730        }
1731
1732        impl<'a> crate::AsBytes<'a> for crate::primitive::Empties<&'a u64> {
1733            #[inline(always)]
1734            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1735                std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.count))))
1736            }
1737        }
1738        impl<'a> crate::FromBytes<'a> for crate::primitive::Empties<&'a u64> {
1739            #[inline(always)]
1740            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1741                Self { count: &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0], empty: () }
1742            }
1743        }
1744    }
1745
1746    pub use boolean::Bools;
1747    /// A columnar store for `bool`.
1748    mod boolean {
1749
1750        use crate::common::index::CopyAs;
1751        use crate::{Container, Clear, Len, Index, IndexAs, Push, HeapSize};
1752
1753        /// A store for maintaining `Vec<bool>`.
1754        #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1755        #[derive(Copy, Clone, Debug, Default, PartialEq)]
1756        pub struct Bools<VC = Vec<u64>, WC = u64> {
1757            /// The bundles of bits that form complete `u64` values.
1758            pub values: VC,
1759            /// The work-in-progress bits that are not yet complete.
1760            pub last_word: WC,
1761            /// The number of set bits in `bits.last()`.
1762            pub last_bits: WC,
1763        }
1764
1765        impl crate::Columnar for bool {
1766            #[inline(always)]
1767            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1768            type Container = Bools;
1769        }
1770
1771        impl<VC: crate::common::PushIndexAs<u64>> Container for Bools<VC> {
1772            type Ref<'a> = bool;
1773            type Borrowed<'a> = Bools<VC::Borrowed<'a>, &'a u64> where VC: 'a;
1774            #[inline(always)]
1775            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1776                Bools {
1777                    values: self.values.borrow(),
1778                    last_word: &self.last_word,
1779                    last_bits: &self.last_bits,
1780                }
1781            }
1782            #[inline(always)]
1783            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where VC: 'a {
1784                Bools {
1785                    values: VC::reborrow(thing.values),
1786                    last_word: thing.last_word,
1787                    last_bits: thing.last_bits,
1788                }
1789            }
1790            #[inline(always)]
1791            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1792
1793            // TODO: There is probably a smart way to implement `extend_from_slice`, but it isn't trivial due to alignment.
1794
1795            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1796                self.values.reserve_for(selves.map(|x| x.values))
1797            }
1798        }
1799
1800        impl<'a, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1801            #[inline(always)]
1802            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1803                let iter = self.values.as_bytes();
1804                let iter = crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_word))));
1805                crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_bits))))
1806            }
1807        }
1808
1809        impl<'a, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1810            #[inline(always)]
1811            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1812                let values = crate::FromBytes::from_bytes(bytes);
1813                let last_word = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1814                let last_bits = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1815                Self { values, last_word, last_bits }
1816            }
1817        }
1818
1819        impl<VC: Len, WC: CopyAs<u64>> Len for Bools<VC, WC> {
1820            #[inline(always)] fn len(&self) -> usize { self.values.len() * 64 + (self.last_bits.copy_as() as usize) }
1821        }
1822
1823        impl<VC: Len + IndexAs<u64>, WC: CopyAs<u64>> Index for Bools<VC, WC> {
1824            type Ref = bool;
1825            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1826                let block = index / 64;
1827                let word = if block == self.values.len() {
1828                    self.last_word.copy_as()
1829                } else {
1830                    self.values.index_as(block)
1831                };
1832                let bit = index % 64;
1833                (word >> bit) & 1 == 1
1834            }
1835        }
1836
1837        impl<VC: Len + IndexAs<u64>, WC: CopyAs<u64>> Index for &Bools<VC, WC> {
1838            type Ref = bool;
1839            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1840                (*self).get(index)
1841            }
1842        }
1843
1844        impl<VC: for<'a> Push<&'a u64>> Push<bool> for Bools<VC> {
1845            #[inline]
1846            fn push(&mut self, bit: bool) {
1847                self.last_word |= (bit as u64) << self.last_bits;
1848                self.last_bits += 1;
1849                // If we have a fully formed word, commit it to `self.values`.
1850                if self.last_bits == 64 {
1851                    self.values.push(&self.last_word);
1852                    self.last_word = 0;
1853                    self.last_bits = 0;
1854                }
1855            }
1856        }
1857        impl<'a, VC: for<'b> Push<&'b u64>> Push<&'a bool> for Bools<VC> {
1858            #[inline(always)]
1859            fn push(&mut self, bit: &'a bool) {
1860                self.push(*bit)
1861            }
1862        }
1863
1864
1865        impl<VC: Clear> Clear for Bools<VC> {
1866            #[inline(always)]
1867            fn clear(&mut self) {
1868                self.values.clear();
1869                self.last_word = 0;
1870                self.last_bits = 0;
1871            }
1872        }
1873
1874        impl<VC: HeapSize> HeapSize for Bools<VC> {
1875            #[inline(always)]
1876            fn heap_size(&self) -> (usize, usize) {
1877                self.values.heap_size()
1878            }
1879        }
1880    }
1881
1882    pub use duration::Durations;
1883    /// A columnar store for `std::time::Duration`.
1884    mod duration {
1885
1886        use std::time::Duration;
1887        use crate::{Container, Len, Index, IndexAs, Push, Clear, HeapSize};
1888
1889        // `std::time::Duration` is equivalent to `(u64, u32)`, corresponding to seconds and nanoseconds.
1890        #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1891        #[derive(Copy, Clone, Debug, Default, PartialEq)]
1892        pub struct Durations<SC = Vec<u64>, NC = Vec<u32>> {
1893            pub seconds: SC,
1894            pub nanoseconds: NC,
1895        }
1896
1897        impl crate::Columnar for Duration {
1898            #[inline(always)]
1899            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1900            type Container = Durations;
1901        }
1902
1903        impl<SC: crate::common::PushIndexAs<u64>, NC: crate::common::PushIndexAs<u32>> Container for Durations<SC, NC> {
1904            type Ref<'a> = Duration;
1905            type Borrowed<'a> = Durations<SC::Borrowed<'a>, NC::Borrowed<'a>> where SC: 'a, NC: 'a;
1906            #[inline(always)]
1907            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1908                Durations {
1909                    seconds: self.seconds.borrow(),
1910                    nanoseconds: self.nanoseconds.borrow(),
1911                }
1912            }
1913            #[inline(always)]
1914            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where SC: 'a, NC: 'a {
1915                Durations {
1916                    seconds: SC::reborrow(thing.seconds),
1917                    nanoseconds: NC::reborrow(thing.nanoseconds),
1918                }
1919            }
1920            #[inline(always)]
1921            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1922
1923            #[inline(always)]
1924            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1925                self.seconds.extend_from_self(other.seconds, range.clone());
1926                self.nanoseconds.extend_from_self(other.nanoseconds, range);
1927            }
1928
1929            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1930                self.seconds.reserve_for(selves.clone().map(|x| x.seconds));
1931                self.nanoseconds.reserve_for(selves.map(|x| x.nanoseconds));
1932            }
1933        }
1934
1935        impl<'a, SC: crate::AsBytes<'a>, NC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Durations<SC, NC> {
1936            #[inline(always)]
1937            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1938                crate::chain(self.seconds.as_bytes(), self.nanoseconds.as_bytes())
1939            }
1940        }
1941        impl<'a, SC: crate::FromBytes<'a>, NC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Durations<SC, NC> {
1942            #[inline(always)]
1943            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1944                Self {
1945                    seconds: crate::FromBytes::from_bytes(bytes),
1946                    nanoseconds: crate::FromBytes::from_bytes(bytes),
1947                }
1948            }
1949        }
1950
1951        impl<SC: Len, NC> Len for Durations<SC, NC> {
1952            #[inline(always)] fn len(&self) -> usize { self.seconds.len() }
1953        }
1954
1955        impl<SC: IndexAs<u64>, NC: IndexAs<u32>> Index for Durations<SC, NC> {
1956            type Ref = Duration;
1957            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1958                Duration::new(self.seconds.index_as(index), self.nanoseconds.index_as(index))
1959            }
1960        }
1961
1962        impl<SC: for<'a> Push<&'a u64>, NC: for<'a> Push<&'a u32>> Push<std::time::Duration> for Durations<SC, NC> {
1963            #[inline]
1964            fn push(&mut self, item: std::time::Duration) {
1965                self.seconds.push(&item.as_secs());
1966                self.nanoseconds.push(&item.subsec_nanos());
1967            }
1968        }
1969        impl<'a, SC: for<'b> Push<&'b u64>, NC: for<'b> Push<&'b u32>> Push<&'a std::time::Duration> for Durations<SC, NC> {
1970            #[inline]
1971            fn push(&mut self, item: &'a std::time::Duration) {
1972                self.push(*item)
1973            }
1974        }
1975        impl<'a, SC: Push<&'a u64>, NC: Push<&'a u32>> Push<(&'a u64, &'a u32)> for Durations<SC, NC> {
1976            #[inline]
1977            fn push(&mut self, item: (&'a u64, &'a u32)) {
1978                self.seconds.push(item.0);
1979                self.nanoseconds.push(item.1);
1980            }
1981        }
1982
1983        impl<SC: Clear, NC: Clear> Clear for Durations<SC, NC> {
1984            #[inline(always)]
1985            fn clear(&mut self) {
1986                self.seconds.clear();
1987                self.nanoseconds.clear();
1988            }
1989        }
1990
1991        impl<SC: HeapSize, NC: HeapSize> HeapSize for Durations<SC, NC> {
1992            #[inline(always)]
1993            fn heap_size(&self) -> (usize, usize) {
1994                let (l0, c0) = self.seconds.heap_size();
1995                let (l1, c1) = self.nanoseconds.heap_size();
1996                (l0 + l1, c0 + c1)
1997            }
1998        }
1999    }
2000}
2001
2002pub use string::Strings;
2003pub mod string {
2004
2005    use super::{Clear, Columnar, Container, Len, Index, IndexAs, Push, HeapSize};
2006
2007    /// A stand-in for `Vec<String>`.
2008    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2009    #[derive(Copy, Clone, Debug, Default, PartialEq)]
2010    pub struct Strings<BC = Vec<u64>, VC = Vec<u8>> {
2011        /// Bounds container; provides indexed access to offsets.
2012        pub bounds: BC,
2013        /// Values container; provides slice access to bytes.
2014        pub values: VC,
2015    }
2016
2017    impl Columnar for String {
2018        #[inline(always)]
2019        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2020            self.clear();
2021            self.push_str(other);
2022        }
2023        #[inline(always)]
2024        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other.to_string() }
2025        type Container = Strings;
2026    }
2027
2028    impl Columnar for Box<str> {
2029        #[inline(always)]
2030        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2031            let mut s = String::from(std::mem::take(self));
2032            s.clear();
2033            s.push_str(other);
2034            *self = s.into_boxed_str();
2035        }
2036        #[inline(always)]
2037        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { Self::from(other) }
2038        type Container = Strings;
2039    }
2040
2041    impl<BC: crate::common::PushIndexAs<u64>> Container for Strings<BC, Vec<u8>> {
2042        type Ref<'a> = &'a str;
2043        type Borrowed<'a> = Strings<BC::Borrowed<'a>, &'a [u8]> where BC: 'a;
2044        #[inline(always)]
2045        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2046            Strings {
2047                bounds: self.bounds.borrow(),
2048                values: self.values.borrow(),
2049            }
2050        }
2051        #[inline(always)]
2052        fn reborrow<'c, 'a: 'c>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'c> where BC: 'a {
2053            Strings {
2054                bounds: BC::reborrow(thing.bounds),
2055                values: thing.values,
2056            }
2057        }
2058        #[inline(always)]
2059        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
2060
2061        #[inline(always)]
2062        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2063            if !range.is_empty() {
2064                // Imported bounds will be relative to this starting offset.
2065                let values_len = self.values.len() as u64;
2066
2067                // Push all bytes that we can, all at once.
2068                let other_lower = if range.start == 0 { 0 } else { other.bounds.index_as(range.start-1) };
2069                let other_upper = other.bounds.index_as(range.end-1);
2070                self.values.extend_from_self(other.values, other_lower as usize .. other_upper as usize);
2071
2072                // Each bound needs to be shifted by `values_len - other_lower`.
2073                if values_len == other_lower {
2074                    self.bounds.extend_from_self(other.bounds, range);
2075                }
2076                else {
2077                    for index in range {
2078                        let shifted = other.bounds.index_as(index) - other_lower + values_len;
2079                        self.bounds.push(&shifted)
2080                    }
2081                }
2082            }
2083        }
2084
2085        fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2086            self.bounds.reserve_for(selves.clone().map(|x| x.bounds));
2087            self.values.reserve_for(selves.map(|x| x.values));
2088        }
2089
2090    }
2091
2092    impl<'a, BC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Strings<BC, VC> {
2093        #[inline(always)]
2094        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2095            crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
2096        }
2097    }
2098    impl<'a, BC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Strings<BC, VC> {
2099        #[inline(always)]
2100        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2101            Self {
2102                bounds: crate::FromBytes::from_bytes(bytes),
2103                values: crate::FromBytes::from_bytes(bytes),
2104            }
2105        }
2106    }
2107
2108    impl<BC: Len, VC> Len for Strings<BC, VC> {
2109        #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
2110    }
2111
2112    impl<'a, BC: Len+IndexAs<u64>> Index for Strings<BC, &'a [u8]> {
2113        type Ref = &'a str;
2114        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2115            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2116            let upper = self.bounds.index_as(index);
2117            let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
2118            let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
2119            std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
2120        }
2121    }
2122    impl<'a, BC: Len+IndexAs<u64>> Index for &'a Strings<BC, Vec<u8>> {
2123        type Ref = &'a str;
2124        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2125            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2126            let upper = self.bounds.index_as(index);
2127            let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
2128            let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
2129            std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
2130        }
2131    }
2132
2133    // This is a simpler implementation, but it leads to a performance regression
2134    // for Strings and str because it loses access to `Vec::extend_from_slice`.
2135    //
2136    // impl<BC: Push<u64>, D: std::fmt::Display> Push<D> for Strings<BC> {
2137    //     #[inline(always)]
2138    //     fn push(&mut self, item: D) {
2139    //         use std::io::Write;
2140    //         write!(self.values, "{}", item).unwrap();
2141    //         self.bounds.push(self.values.len() as u64);
2142    //     }
2143    // }
2144
2145    impl<BC: for<'a> Push<&'a u64>> Push<&String> for Strings<BC> {
2146        #[inline(always)] fn push(&mut self, item: &String) {
2147            self.values.extend_from_slice(item.as_bytes());
2148            self.bounds.push(&(self.values.len() as u64));
2149        }
2150    }
2151    impl<BC: for<'a> Push<&'a u64>> Push<&str> for Strings<BC> {
2152        #[inline]
2153        fn push(&mut self, item: &str) {
2154            self.values.extend_from_slice(item.as_bytes());
2155            self.bounds.push(&(self.values.len() as u64));
2156        }
2157    }
2158    impl<BC: for<'a> Push<&'a u64>> Push<&Box<str>> for Strings<BC> {
2159        #[inline]
2160        fn push(&mut self, item: &Box<str>) {
2161            self.values.extend_from_slice(item.as_bytes());
2162            self.bounds.push(&(self.values.len() as u64));
2163        }
2164    }
2165    impl<'a, BC: for<'b> Push<&'b u64>> Push<std::fmt::Arguments<'a>> for Strings<BC> {
2166        #[inline]
2167        fn push(&mut self, item: std::fmt::Arguments<'a>) {
2168            use std::io::Write;
2169            self.values.write_fmt(item).expect("write_fmt failed");
2170            self.bounds.push(&(self.values.len() as u64));
2171        }
2172    }
2173    impl<'a, 'b, BC: for<'c> Push<&'c u64>> Push<&'b std::fmt::Arguments<'a>> for Strings<BC> {
2174        #[inline]
2175        fn push(&mut self, item: &'b std::fmt::Arguments<'a>) {
2176            use std::io::Write;
2177            self.values.write_fmt(*item).expect("write_fmt failed");
2178            self.bounds.push(&(self.values.len() as u64));
2179        }
2180    }
2181    impl<BC: Clear, VC: Clear> Clear for Strings<BC, VC> {
2182        #[inline(always)]
2183        fn clear(&mut self) {
2184            self.bounds.clear();
2185            self.values.clear();
2186        }
2187    }
2188    impl<BC: HeapSize, VC: HeapSize> HeapSize for Strings<BC, VC> {
2189        #[inline(always)]
2190        fn heap_size(&self) -> (usize, usize) {
2191            let (l0, c0) = self.bounds.heap_size();
2192            let (l1, c1) = self.values.heap_size();
2193            (l0 + l1, c0 + c1)
2194        }
2195    }
2196}
2197
2198pub use vector::Vecs;
2199pub mod vector {
2200
2201    use super::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize, Slice};
2202
2203    /// A stand-in for `Vec<Vec<T>>` for complex `T`.
2204    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2205    #[derive(Debug, Default, Copy, Clone, PartialEq)]
2206    pub struct Vecs<TC, BC = Vec<u64>> {
2207        pub bounds: BC,
2208        pub values: TC,
2209    }
2210
2211    impl<T: Columnar> Columnar for Vec<T> {
2212        #[inline(always)]
2213        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2214            self.truncate(other.len());
2215            let mut other_iter = other.into_iter();
2216            for (s, o) in self.iter_mut().zip(&mut other_iter) {
2217                T::copy_from(s, o);
2218            }
2219            for o in other_iter {
2220                self.push(T::into_owned(o));
2221            }
2222        }
2223        #[inline(always)]
2224        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2225            other.into_iter().map(|x| T::into_owned(x)).collect()
2226        }
2227        type Container = Vecs<T::Container>;
2228    }
2229
2230    impl<T: Columnar, const N: usize> Columnar for [T; N] {
2231        #[inline(always)]
2232        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2233            for (s, o) in self.iter_mut().zip(other.into_iter()) {
2234                T::copy_from(s, o);
2235            }
2236        }
2237        #[inline(always)]
2238        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2239            let vec: Vec<_> = other.into_iter().map(|x| T::into_owned(x)).collect();
2240            match vec.try_into() {
2241                Ok(array) => array,
2242                Err(_) => panic!("wrong length"),
2243            }
2244        }
2245        type Container = Vecs<T::Container>;
2246    }
2247
2248    impl<T: Columnar, const N: usize> Columnar for smallvec::SmallVec<[T; N]> {
2249        #[inline(always)]
2250        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2251            self.truncate(other.len());
2252            let mut other_iter = other.into_iter();
2253            for (s, o) in self.iter_mut().zip(&mut other_iter) {
2254                T::copy_from(s, o);
2255            }
2256            for o in other_iter {
2257                self.push(T::into_owned(o));
2258            }
2259        }
2260        #[inline(always)]
2261        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2262            other.into_iter().map(|x| T::into_owned(x)).collect()
2263        }
2264        type Container = Vecs<T::Container>;
2265    }
2266
2267    impl<BC: crate::common::PushIndexAs<u64>, TC: Container> Container for Vecs<TC, BC> {
2268        type Ref<'a> = Slice<TC::Borrowed<'a>> where TC: 'a;
2269        type Borrowed<'a> = Vecs<TC::Borrowed<'a>, BC::Borrowed<'a>> where BC: 'a, TC: 'a;
2270        #[inline(always)]
2271        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2272            Vecs {
2273                bounds: self.bounds.borrow(),
2274                values: self.values.borrow(),
2275            }
2276        }
2277        #[inline(always)]
2278        fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where BC: 'a, TC: 'a {
2279            Vecs {
2280                bounds: BC::reborrow(thing.bounds),
2281                values: TC::reborrow(thing.values),
2282            }
2283        }
2284        #[inline(always)]
2285        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
2286            thing.map(|x| TC::reborrow(x))
2287        }
2288
2289        #[inline(always)]
2290        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2291            if !range.is_empty() {
2292                // Imported bounds will be relative to this starting offset.
2293                let values_len = self.values.len() as u64;
2294
2295                // Push all bytes that we can, all at once.
2296                let other_lower = if range.start == 0 { 0 } else { other.bounds.index_as(range.start-1) };
2297                let other_upper = other.bounds.index_as(range.end-1);
2298                self.values.extend_from_self(other.values, other_lower as usize .. other_upper as usize);
2299
2300                // Each bound needs to be shifted by `values_len - other_lower`.
2301                if values_len == other_lower {
2302                    self.bounds.extend_from_self(other.bounds, range);
2303                }
2304                else {
2305                    for index in range {
2306                        let shifted = other.bounds.index_as(index) - other_lower + values_len;
2307                        self.bounds.push(&shifted)
2308                    }
2309                }
2310            }
2311        }
2312
2313        fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2314            self.bounds.reserve_for(selves.clone().map(|x| x.bounds));
2315            self.values.reserve_for(selves.map(|x| x.values));
2316        }
2317    }
2318
2319    impl<'a, TC: crate::AsBytes<'a>, BC: crate::AsBytes<'a>> crate::AsBytes<'a> for Vecs<TC, BC> {
2320        #[inline(always)]
2321        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2322            crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
2323        }
2324    }
2325    impl<'a, TC: crate::FromBytes<'a>, BC: crate::FromBytes<'a>> crate::FromBytes<'a> for Vecs<TC, BC> {
2326        #[inline(always)]
2327        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2328            Self {
2329                bounds: crate::FromBytes::from_bytes(bytes),
2330                values: crate::FromBytes::from_bytes(bytes),
2331            }
2332        }
2333    }
2334
2335    impl<TC: Len> Vecs<TC> {
2336        #[inline]
2337        pub fn push_iter<I>(&mut self, iter: I) where I: IntoIterator, TC: Push<I::Item> {
2338            self.values.extend(iter);
2339            self.bounds.push(self.values.len() as u64);
2340        }
2341    }
2342
2343    impl<TC, BC: Len> Len for Vecs<TC, BC> {
2344        #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
2345    }
2346
2347    impl<TC: Copy, BC: Len+IndexAs<u64>> Index for Vecs<TC, BC> {
2348        type Ref = Slice<TC>;
2349        #[inline(always)]
2350        fn get(&self, index: usize) -> Self::Ref {
2351            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2352            let upper = self.bounds.index_as(index);
2353            Slice::new(lower, upper, self.values)
2354        }
2355    }
2356    impl<'a, TC, BC: Len+IndexAs<u64>> Index for &'a Vecs<TC, BC> {
2357        type Ref = Slice<&'a TC>;
2358        #[inline(always)]
2359        fn get(&self, index: usize) -> Self::Ref {
2360            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2361            let upper = self.bounds.index_as(index);
2362            Slice::new(lower, upper, &self.values)
2363        }
2364    }
2365    impl<TC, BC: Len+IndexAs<u64>> IndexMut for Vecs<TC, BC> {
2366        type IndexMut<'a> = Slice<&'a mut TC> where TC: 'a, BC: 'a;
2367
2368        #[inline(always)]
2369        fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2370            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2371            let upper = self.bounds.index_as(index);
2372            Slice::new(lower, upper, &mut self.values)
2373        }
2374    }
2375
2376    impl<'a, TC: Container, BC: for<'b> Push<&'b u64>> Push<Slice<TC::Borrowed<'a>>> for Vecs<TC, BC> {
2377        #[inline]
2378        fn push(&mut self, item: Slice<TC::Borrowed<'a>>) {
2379            self.values.extend_from_self(item.slice, item.lower .. item.upper);
2380            self.bounds.push(&(self.values.len() as u64));
2381        }
2382    }
2383
2384    impl<I: IntoIterator, TC: Push<I::Item> + Len, BC: for<'a> Push<&'a u64>> Push<I> for Vecs<TC, BC> {
2385        #[inline]
2386        fn push(&mut self, item: I) {
2387            self.values.extend(item);
2388            self.bounds.push(&(self.values.len() as u64));
2389        }
2390    }
2391
2392    impl<TC: Clear, BC: Clear> Clear for Vecs<TC, BC> {
2393        #[inline(always)]
2394        fn clear(&mut self) {
2395            self.bounds.clear();
2396            self.values.clear();
2397        }
2398    }
2399
2400    impl<TC: HeapSize, BC: HeapSize> HeapSize for Vecs<TC, BC> {
2401        fn heap_size(&self) -> (usize, usize) {
2402            let (l0, c0) = self.bounds.heap_size();
2403            let (l1, c1) = self.values.heap_size();
2404            (l0 + l1, c0 + c1)
2405        }
2406    }
2407}
2408
2409#[allow(non_snake_case)]
2410pub mod tuple {
2411
2412    use super::{Clear, Columnar, Container, Len, IndexMut, Index, Push, HeapSize};
2413
2414    // Implementations for tuple types.
2415    // These are all macro based, because the implementations are very similar.
2416    // The macro requires two names, one for the store and one for pushable types.
2417    macro_rules! tuple_impl {
2418        ( $($name:ident,$name2:ident,$idx:tt)+) => (
2419
2420            impl<$($name: Columnar),*> Columnar for ($($name,)*) {
2421                #[inline(always)]
2422                fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2423                    let ($($name,)*) = self;
2424                    let ($($name2,)*) = other;
2425                    $(crate::Columnar::copy_from($name, $name2);)*
2426                }
2427                #[inline(always)]
2428                fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2429                    let ($($name2,)*) = other;
2430                    ($($name::into_owned($name2),)*)
2431                }
2432                type Container = ($($name::Container,)*);
2433            }
2434            impl<$($name2: Container,)*> Container for ($($name2,)*) {
2435                type Ref<'a> = ($($name2::Ref<'a>,)*) where $($name2: 'a,)*;
2436                type Borrowed<'a> = ($($name2::Borrowed<'a>,)*) where $($name2: 'a,)*;
2437                #[inline(always)]
2438                fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2439                    let ($($name,)*) = self;
2440                    ($($name.borrow(),)*)
2441                }
2442                #[inline(always)]
2443                fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where $($name2: 'a,)* {
2444                    let ($($name,)*) = thing;
2445                    ($($name2::reborrow($name),)*)
2446                }
2447                #[inline(always)]
2448                fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
2449                    let ($($name2,)*) = thing;
2450                    ($($name2::reborrow_ref($name2),)*)
2451                }
2452
2453                #[inline(always)]
2454                fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2455                    let ($($name,)*) = self;
2456                    let ($($name2,)*) = other;
2457                    $( $name.extend_from_self($name2, range.clone()); )*
2458                }
2459
2460                fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2461                    let ($($name,)*) = self;
2462                    $( $name.reserve_for(selves.clone().map(|x| x.$idx)); )*
2463                }
2464            }
2465
2466            #[allow(non_snake_case)]
2467            impl<'a, $($name: crate::AsBytes<'a>),*> crate::AsBytes<'a> for ($($name,)*) {
2468                #[inline(always)]
2469                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2470                    let ($($name,)*) = self;
2471                    let iter = None.into_iter();
2472                    $( let iter = crate::chain(iter, $name.as_bytes()); )*
2473                    iter
2474                }
2475            }
2476            impl<'a, $($name: crate::FromBytes<'a>),*> crate::FromBytes<'a> for ($($name,)*) {
2477                #[inline(always)]
2478                #[allow(non_snake_case)]
2479                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2480                    $(let $name = crate::FromBytes::from_bytes(bytes);)*
2481                    ($($name,)*)
2482                }
2483            }
2484
2485            impl<$($name: Len),*> Len for ($($name,)*) {
2486                #[inline(always)]
2487                fn len(&self) -> usize {
2488                    self.0.len()
2489                }
2490            }
2491            impl<$($name: Clear),*> Clear for ($($name,)*) {
2492                #[inline(always)]
2493                fn clear(&mut self) {
2494                    let ($($name,)*) = self;
2495                    $($name.clear();)*
2496                }
2497            }
2498            impl<$($name: HeapSize),*> HeapSize for ($($name,)*) {
2499                #[inline(always)]
2500                fn heap_size(&self) -> (usize, usize) {
2501                    let ($($name,)*) = self;
2502                    let mut l = 0;
2503                    let mut c = 0;
2504                    $(let (l0, c0) = $name.heap_size(); l += l0; c += c0;)*
2505                    (l, c)
2506                }
2507            }
2508            impl<$($name: Index),*> Index for ($($name,)*) {
2509                type Ref = ($($name::Ref,)*);
2510                #[inline(always)]
2511                fn get(&self, index: usize) -> Self::Ref {
2512                    let ($($name,)*) = self;
2513                    ($($name.get(index),)*)
2514                }
2515            }
2516            impl<'a, $($name),*> Index for &'a ($($name,)*) where $( &'a $name: Index),* {
2517                type Ref = ($(<&'a $name as Index>::Ref,)*);
2518                #[inline(always)]
2519                fn get(&self, index: usize) -> Self::Ref {
2520                    let ($($name,)*) = self;
2521                    ($($name.get(index),)*)
2522                }
2523            }
2524
2525            impl<$($name: IndexMut),*> IndexMut for ($($name,)*) {
2526                type IndexMut<'a> = ($($name::IndexMut<'a>,)*) where $($name: 'a),*;
2527                #[inline(always)]
2528                fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2529                    let ($($name,)*) = self;
2530                    ($($name.get_mut(index),)*)
2531                }
2532            }
2533            impl<$($name2, $name: Push<$name2>),*> Push<($($name2,)*)> for ($($name,)*) {
2534                #[inline]
2535                fn push(&mut self, item: ($($name2,)*)) {
2536                    let ($($name,)*) = self;
2537                    let ($($name2,)*) = item;
2538                    $($name.push($name2);)*
2539                }
2540            }
2541            impl<'a, $($name2, $name: Push<&'a $name2>),*> Push<&'a ($($name2,)*)> for ($($name,)*) {
2542                #[inline]
2543                fn push(&mut self, item: &'a ($($name2,)*)) {
2544                    let ($($name,)*) = self;
2545                    let ($($name2,)*) = item;
2546                    $($name.push($name2);)*
2547                }
2548            }
2549        )
2550    }
2551
2552    tuple_impl!(A,AA,0);
2553    tuple_impl!(A,AA,0 B,BB,1);
2554    tuple_impl!(A,AA,0 B,BB,1 C,CC,2);
2555    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3);
2556    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4);
2557    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5);
2558    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6);
2559    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6 H,HH,7);
2560    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6 H,HH,7 I,II,8);
2561    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6 H,HH,7 I,II,8 J,JJ,9);
2562
2563    #[cfg(test)]
2564    mod test {
2565        #[test]
2566        fn round_trip() {
2567
2568            use crate::common::{Index, Push, HeapSize, Len};
2569
2570            let mut column: crate::ContainerOf<(u64, u8, String)> = Default::default();
2571            for i in 0..100 {
2572                column.push((i, i as u8, &i.to_string()));
2573                column.push((i, i as u8, &"".to_string()));
2574            }
2575
2576            assert_eq!(column.len(), 200);
2577            assert_eq!(column.heap_size(), (3590, 4608));
2578
2579            for i in 0..100u64 {
2580                assert_eq!((&column).get((2*i+0) as usize), (&i, &(i as u8), i.to_string().as_str()));
2581                assert_eq!((&column).get((2*i+1) as usize), (&i, &(i as u8), ""));
2582            }
2583
2584            // Compare to the heap size of a `Vec<Option<usize>>`.
2585            let mut column: Vec<(u64, u8, String)> = Default::default();
2586            for i in 0..100 {
2587                column.push((i, i as u8, i.to_string()));
2588                column.push((i, i as u8, "".to_string()));
2589            }
2590            // NB: Rust seems to change the capacities across versions (1.88 != 1.89),
2591            // so we just compare the allocated regions to avoid updating the MSRV.
2592            assert_eq!(column.heap_size().0, 8190);
2593
2594        }
2595    }
2596}
2597
2598pub use sums::{rank_select::RankSelect, result::Results, option::Options};
2599/// Containers for enumerations ("sum types") that store variants separately.
2600///
2601/// The main work of these types is storing a discriminant and index efficiently,
2602/// as containers for each of the variant types can hold the actual data.
2603pub mod sums {
2604
2605    /// Stores for maintaining discriminants, and associated sequential indexes.
2606    ///
2607    /// The sequential indexes are not explicitly maintained, but are supported
2608    /// by a `rank(index)` function that indicates how many of a certain variant
2609    /// precede the given index. While this could potentially be done with a scan
2610    /// of all preceding discriminants, the stores maintain running accumulations
2611    /// that make the operation constant time (using additional amortized memory).
2612    pub mod rank_select {
2613
2614        use crate::primitive::Bools;
2615        use crate::common::index::CopyAs;
2616        use crate::{Container, Len, Index, IndexAs, Push, Clear, HeapSize};
2617
2618        /// A store for maintaining `Vec<bool>` with fast `rank` and `select` access.
2619        ///
2620        /// The design is to have `u64` running counts for each block of 1024 bits,
2621        /// which are roughly the size of a cache line. This is roughly 6% overhead,
2622        /// above the bits themselves, which seems pretty solid.
2623    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2624        #[derive(Copy, Clone, Debug, Default, PartialEq)]
2625        pub struct RankSelect<CC = Vec<u64>, VC = Vec<u64>, WC = u64> {
2626            /// Counts of the number of cumulative set (true) bits, *after* each block of 1024 bits.
2627            pub counts: CC,
2628            /// The bits themselves.
2629            pub values: Bools<VC, WC>,
2630        }
2631
2632        impl<CC: crate::common::PushIndexAs<u64>, VC: crate::common::PushIndexAs<u64>> RankSelect<CC, VC> {
2633            pub fn borrow<'a>(&'a self) -> RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a u64> {
2634                RankSelect {
2635                    counts: self.counts.borrow(),
2636                    values: self.values.borrow(),
2637                }
2638            }
2639            #[inline(always)]
2640            pub fn reborrow<'b, 'a: 'b>(thing: RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a u64>) -> RankSelect<CC::Borrowed<'b>, VC::Borrowed<'b>, &'b u64> {
2641                RankSelect {
2642                    counts: CC::reborrow(thing.counts),
2643                    values: Bools::<VC, u64>::reborrow(thing.values),
2644                }
2645            }
2646        }
2647
2648        impl<'a, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for RankSelect<CC, VC, &'a u64> {
2649            #[inline(always)]
2650            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2651                crate::chain(self.counts.as_bytes(), self.values.as_bytes())
2652            }
2653        }
2654        impl<'a, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for RankSelect<CC, VC, &'a u64> {
2655            #[inline(always)]
2656            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2657                Self {
2658                    counts: crate::FromBytes::from_bytes(bytes),
2659                    values: crate::FromBytes::from_bytes(bytes),
2660                }
2661            }
2662        }
2663
2664
2665        impl<CC, VC: Len + IndexAs<u64>, WC: CopyAs<u64>> RankSelect<CC, VC, WC> {
2666            #[inline(always)]
2667            pub fn get(&self, index: usize) -> bool {
2668                Index::get(&self.values, index)
2669            }
2670        }
2671        impl<CC: Len + IndexAs<u64>, VC: Len + IndexAs<u64>, WC: CopyAs<u64>> RankSelect<CC, VC, WC> {
2672            /// The number of set bits *strictly* preceding `index`.
2673            ///
2674            /// This number is accumulated first by reading out of `self.counts` at the correct position,
2675            /// then by summing the ones in strictly prior `u64` entries, then by counting the ones in the
2676            /// masked `u64` in which the bit lives.
2677            pub fn rank(&self, index: usize) -> usize {
2678                let bit = index % 64;
2679                let block = index / 64;
2680                let chunk = block / 16;
2681                let mut count = if chunk > 0 { self.counts.index_as(chunk - 1) as usize } else { 0 };
2682                for pos in (16 * chunk) .. block {
2683                    count += self.values.values.index_as(pos).count_ones() as usize;
2684                }
2685                // TODO: Panic if out of bounds?
2686                let intra_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
2687                count += (intra_word & ((1 << bit) - 1)).count_ones() as usize;
2688                count
2689            }
2690            /// The index of the `rank`th set bit, should one exist.
2691            pub fn select(&self, rank: u64) -> Option<usize> {
2692                let mut chunk = 0;
2693                // Step one is to find the position in `counts` where we go from `rank` to `rank + 1`.
2694                // The position we are looking for is within that chunk of bits.
2695                // TODO: Binary search is likely better at many scales. Rust's binary search is .. not helpful with ties.
2696                while chunk < self.counts.len() && self.counts.index_as(chunk) <= rank {
2697                    chunk += 1;
2698                }
2699                let mut count = if chunk < self.counts.len() { self.counts.index_as(chunk) } else { 0 };
2700                // Step two is to find the position within that chunk where the `rank`th bit is.
2701                let mut block = 16 * chunk;
2702                while block < self.values.values.len() && count + (self.values.values.index_as(block).count_ones() as u64) <= rank {
2703                    count += self.values.values.index_as(block).count_ones() as u64;
2704                    block += 1;
2705                }
2706                // Step three is to search the last word for the location, or return `None` if we run out of bits.
2707                let last_bits = if block == self.values.values.len() { self.values.last_bits.copy_as() as usize } else { 64 };
2708                let last_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
2709                for shift in 0 .. last_bits {
2710                    if ((last_word >> shift) & 0x01 == 0x01) && count + 1 == rank {
2711                        return Some(64 * block + shift);
2712                    }
2713                    count += (last_word >> shift) & 0x01;
2714                }
2715
2716                None
2717            }
2718        }
2719
2720        impl<CC, VC: Len, WC: CopyAs<u64>> RankSelect<CC, VC, WC> {
2721            pub fn len(&self) -> usize {
2722                self.values.len()
2723            }
2724        }
2725
2726        // This implementation probably only works for `Vec<u64>` and `Vec<u64>`, but we could fix that.
2727        // Partly, it's hard to name the `Index` flavor that allows one to get back a `u64`.
2728        impl<CC: for<'a> Push<&'a u64> + Len + IndexAs<u64>, VC: for<'a> Push<&'a u64> + Len + IndexAs<u64>> RankSelect<CC, VC> {
2729            #[inline]
2730            pub fn push(&mut self, bit: bool) {
2731                self.values.push(&bit);
2732                while self.counts.len() < self.values.len() / 1024 {
2733                    let mut count = self.counts.last().unwrap_or(0);
2734                    let lower = 16 * self.counts.len();
2735                    let upper = lower + 16;
2736                    for i in lower .. upper {
2737                        count += self.values.values.index_as(i).count_ones() as u64;
2738                    }
2739                    self.counts.push(&count);
2740                }
2741            }
2742        }
2743        impl<CC: Clear, VC: Clear> Clear for RankSelect<CC, VC> {
2744            fn clear(&mut self) {
2745                self.counts.clear();
2746                self.values.clear();
2747            }
2748        }
2749        impl<CC: HeapSize, VC: HeapSize> HeapSize for RankSelect<CC, VC> {
2750            fn heap_size(&self) -> (usize, usize) {
2751                let (l0, c0) = self.counts.heap_size();
2752                let (l1, c1) = self.values.heap_size();
2753                (l0 + l1, c0 + c1)
2754            }
2755        }
2756    }
2757
2758    pub mod result {
2759
2760        use crate::common::index::CopyAs;
2761        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize};
2762        use crate::RankSelect;
2763
2764        #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2765        #[derive(Copy, Clone, Debug, Default, PartialEq)]
2766        pub struct Results<SC, TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
2767            /// Bits set to `true` correspond to `Ok` variants.
2768            pub indexes: RankSelect<CC, VC, WC>,
2769            pub oks: SC,
2770            pub errs: TC,
2771        }
2772
2773        impl<S: Columnar, T: Columnar> Columnar for Result<S, T> {
2774            fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2775                match (&mut *self, other) {
2776                    (Ok(x), Ok(y)) => x.copy_from(y),
2777                    (Err(x), Err(y)) => x.copy_from(y),
2778                    (_, other) => { *self = Self::into_owned(other); },
2779                }
2780            }
2781            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2782                match other {
2783                    Ok(y) => Ok(S::into_owned(y)),
2784                    Err(y) => Err(T::into_owned(y)),
2785                }
2786            }
2787            type Container = Results<S::Container, T::Container>;
2788        }
2789
2790        impl<SC: Container, TC: Container> Container for Results<SC, TC> {
2791            type Ref<'a> = Result<SC::Ref<'a>, TC::Ref<'a>> where SC: 'a, TC: 'a;
2792            type Borrowed<'a> = Results<SC::Borrowed<'a>, TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where SC: 'a, TC: 'a;
2793            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2794                Results {
2795                    indexes: self.indexes.borrow(),
2796                    oks: self.oks.borrow(),
2797                    errs: self.errs.borrow(),
2798                }
2799            }
2800            #[inline(always)]
2801            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where SC: 'a, TC: 'a {
2802                Results {
2803                    indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
2804                    oks: SC::reborrow(thing.oks),
2805                    errs: TC::reborrow(thing.errs),
2806                }
2807            }
2808            #[inline(always)]
2809            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
2810                match thing {
2811                    Ok(y) => Ok(SC::reborrow_ref(y)),
2812                    Err(y) => Err(TC::reborrow_ref(y)),
2813                }
2814            }
2815
2816            #[inline(always)]
2817            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2818                if !range.is_empty() {
2819                    // Starting offsets of each variant in `other`.
2820                    let oks_start = other.indexes.rank(range.start);
2821                    let errs_start = range.start - oks_start;
2822
2823                    // Count the number of `Ok` and `Err` variants as we push, to determine the range.
2824                    // TODO: This could probably be `popcnt` somehow.
2825                    let mut oks = 0;
2826                    for index in range.clone() {
2827                        let bit = other.indexes.get(index);
2828                        self.indexes.push(bit);
2829                        if bit { oks += 1; }
2830                    }
2831                    let errs = range.len() - oks;
2832
2833                    self.oks.extend_from_self(other.oks, oks_start .. oks_start + oks);
2834                    self.errs.extend_from_self(other.errs, errs_start .. errs_start + errs);
2835                }
2836            }
2837
2838            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2839                // TODO: reserve room in `self.indexes`.
2840                self.oks.reserve_for(selves.clone().map(|x| x.oks));
2841                self.errs.reserve_for(selves.map(|x| x.errs));
2842            }
2843        }
2844
2845        impl<'a, SC: crate::AsBytes<'a>, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
2846            #[inline(always)]
2847            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2848                let iter = self.indexes.as_bytes();
2849                let iter = crate::chain(iter, self.oks.as_bytes());
2850                crate::chain(iter, self.errs.as_bytes())
2851            }
2852        }
2853        impl<'a, SC: crate::FromBytes<'a>, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
2854            #[inline(always)]
2855            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2856                Self {
2857                    indexes: crate::FromBytes::from_bytes(bytes),
2858                    oks: crate::FromBytes::from_bytes(bytes),
2859                    errs: crate::FromBytes::from_bytes(bytes),
2860                }
2861            }
2862        }
2863
2864        impl<SC, TC, CC, VC: Len, WC: CopyAs<u64>> Len for Results<SC, TC, CC, VC, WC> {
2865            #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
2866        }
2867
2868        impl<SC, TC, CC, VC, WC> Index for Results<SC, TC, CC, VC, WC>
2869        where
2870            SC: Index,
2871            TC: Index,
2872            CC: IndexAs<u64> + Len,
2873            VC: IndexAs<u64> + Len,
2874            WC: CopyAs<u64>,
2875        {
2876            type Ref = Result<SC::Ref, TC::Ref>;
2877            #[inline(always)]
2878            fn get(&self, index: usize) -> Self::Ref {
2879                if self.indexes.get(index) {
2880                    Ok(self.oks.get(self.indexes.rank(index)))
2881                } else {
2882                    Err(self.errs.get(index - self.indexes.rank(index)))
2883                }
2884            }
2885        }
2886        impl<'a, SC, TC, CC, VC, WC> Index for &'a Results<SC, TC, CC, VC, WC>
2887        where
2888            &'a SC: Index,
2889            &'a TC: Index,
2890            CC: IndexAs<u64> + Len,
2891            VC: IndexAs<u64> + Len,
2892            WC: CopyAs<u64>,
2893        {
2894            type Ref = Result<<&'a SC as Index>::Ref, <&'a TC as Index>::Ref>;
2895            #[inline(always)]
2896            fn get(&self, index: usize) -> Self::Ref {
2897                if self.indexes.get(index) {
2898                    Ok((&self.oks).get(self.indexes.rank(index)))
2899                } else {
2900                    Err((&self.errs).get(index - self.indexes.rank(index)))
2901                }
2902            }
2903        }
2904
2905        // NB: You are not allowed to change the variant, but can change its contents.
2906        impl<SC: IndexMut, TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Results<SC, TC, CC, VC> {
2907            type IndexMut<'a> = Result<SC::IndexMut<'a>, TC::IndexMut<'a>> where SC: 'a, TC: 'a, CC: 'a, VC: 'a;
2908            #[inline(always)]
2909            fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2910                if self.indexes.get(index) {
2911                    Ok(self.oks.get_mut(self.indexes.rank(index)))
2912                } else {
2913                    Err(self.errs.get_mut(index - self.indexes.rank(index)))
2914                }
2915            }
2916        }
2917
2918        impl<S, SC: Push<S>, T, TC: Push<T>> Push<Result<S, T>> for Results<SC, TC> {
2919            #[inline]
2920            fn push(&mut self, item: Result<S, T>) {
2921                match item {
2922                    Ok(item) => {
2923                        self.indexes.push(true);
2924                        self.oks.push(item);
2925                    }
2926                    Err(item) => {
2927                        self.indexes.push(false);
2928                        self.errs.push(item);
2929                    }
2930                }
2931            }
2932        }
2933        impl<'a, S, SC: Push<&'a S>, T, TC: Push<&'a T>> Push<&'a Result<S, T>> for Results<SC, TC> {
2934            #[inline]
2935            fn push(&mut self, item: &'a Result<S, T>) {
2936                match item {
2937                    Ok(item) => {
2938                        self.indexes.push(true);
2939                        self.oks.push(item);
2940                    }
2941                    Err(item) => {
2942                        self.indexes.push(false);
2943                        self.errs.push(item);
2944                    }
2945                }
2946            }
2947        }
2948
2949        impl<SC: Clear, TC: Clear> Clear for Results<SC, TC> {
2950            fn clear(&mut self) {
2951                self.indexes.clear();
2952                self.oks.clear();
2953                self.errs.clear();
2954            }
2955        }
2956
2957        impl<SC: HeapSize, TC: HeapSize> HeapSize for Results<SC, TC> {
2958            fn heap_size(&self) -> (usize, usize) {
2959                let (l0, c0) = self.oks.heap_size();
2960                let (l1, c1) = self.errs.heap_size();
2961                let (li, ci) = self.indexes.heap_size();
2962                (l0 + l1 + li, c0 + c1 + ci)
2963            }
2964        }
2965
2966        impl<SC, TC, CC, VC, WC> Results<SC, TC, CC, VC, WC> {
2967            /// Returns ok values if no errors exist.
2968            pub fn unwrap(self) -> SC where TC: Len {
2969                assert!(self.errs.is_empty());
2970                self.oks
2971            }
2972            /// Returns error values if no oks exist.
2973            pub fn unwrap_err(self) -> TC where SC: Len {
2974                assert!(self.oks.is_empty());
2975                self.errs
2976            }
2977        }
2978        #[cfg(test)]
2979        mod test {
2980            #[test]
2981            fn round_trip() {
2982
2983                use crate::common::{Index, Push, HeapSize, Len};
2984
2985                let mut column: crate::ContainerOf<Result<u64, u64>> = Default::default();
2986                for i in 0..100 {
2987                    column.push(Ok::<u64, u64>(i));
2988                    column.push(Err::<u64, u64>(i));
2989                }
2990
2991                assert_eq!(column.len(), 200);
2992                assert_eq!(column.heap_size(), (1624, 2080));
2993
2994                for i in 0..100 {
2995                    assert_eq!(column.get(2*i+0), Ok(i as u64));
2996                    assert_eq!(column.get(2*i+1), Err(i as u64));
2997                }
2998
2999                let mut column: crate::ContainerOf<Result<u64, u8>> = Default::default();
3000                for i in 0..100 {
3001                    column.push(Ok::<u64, u8>(i as u64));
3002                    column.push(Err::<u64, u8>(i as u8));
3003                }
3004
3005                assert_eq!(column.len(), 200);
3006                assert_eq!(column.heap_size(), (924, 1184));
3007
3008                for i in 0..100 {
3009                    assert_eq!(column.get(2*i+0), Ok(i as u64));
3010                    assert_eq!(column.get(2*i+1), Err(i as u8));
3011                }
3012            }
3013        }
3014    }
3015
3016    pub mod option {
3017
3018        use crate::common::index::CopyAs;
3019        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize};
3020        use crate::RankSelect;
3021
3022    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3023        #[derive(Copy, Clone, Debug, Default, PartialEq)]
3024        pub struct Options<TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
3025            /// Uses two bits for each item, one to indicate the variant and one (amortized)
3026            /// to enable efficient rank determination.
3027            pub indexes: RankSelect<CC, VC, WC>,
3028            pub somes: TC,
3029        }
3030
3031        impl<T: Columnar> Columnar for Option<T> {
3032            fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
3033                match (&mut *self, other) {
3034                    (Some(x), Some(y)) => { x.copy_from(y); }
3035                    (_, other) => { *self = Self::into_owned(other); }
3036                }
3037            }
3038            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
3039                other.map(|x| T::into_owned(x))
3040            }
3041            type Container = Options<T::Container>;
3042        }
3043
3044        impl<TC: Container> Container for Options<TC> {
3045            type Ref<'a> = Option<TC::Ref<'a>> where TC: 'a;
3046            type Borrowed<'a> = Options<TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where TC: 'a;
3047            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
3048                Options {
3049                    indexes: self.indexes.borrow(),
3050                    somes: self.somes.borrow(),
3051                }
3052            }
3053            #[inline(always)]
3054            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where TC: 'a {
3055                Options {
3056                    indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
3057                    somes: TC::reborrow(thing.somes),
3058                }
3059            }
3060            #[inline(always)]
3061            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
3062                thing.map(TC::reborrow_ref)
3063            }
3064
3065            #[inline(always)]
3066            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
3067                if !range.is_empty() {
3068                    // Starting offsets of `Some` variants in `other`.
3069                    let somes_start = other.indexes.rank(range.start);
3070
3071                    // Count the number of `Some` variants as we push, to determine the range.
3072                    // TODO: This could probably be `popcnt` somehow.
3073                    let mut somes = 0;
3074                    for index in range {
3075                        let bit = other.indexes.get(index);
3076                        self.indexes.push(bit);
3077                        if bit { somes += 1; }
3078                    }
3079
3080                    self.somes.extend_from_self(other.somes, somes_start .. somes_start + somes);
3081                }
3082            }
3083
3084            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
3085                // TODO: reserve room in `self.indexes`.
3086                self.somes.reserve_for(selves.map(|x| x.somes));
3087            }
3088        }
3089
3090        impl<'a, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Options<TC, CC, VC, &'a u64> {
3091            #[inline(always)]
3092            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
3093                crate::chain(self.indexes.as_bytes(), self.somes.as_bytes())
3094            }
3095        }
3096
3097        impl <'a, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Options<TC, CC, VC, &'a u64> {
3098            #[inline(always)]
3099            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
3100                Self {
3101                    indexes: crate::FromBytes::from_bytes(bytes),
3102                    somes: crate::FromBytes::from_bytes(bytes),
3103                }
3104            }
3105        }
3106
3107        impl<T, CC, VC: Len, WC: CopyAs<u64>> Len for Options<T, CC, VC, WC> {
3108            #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
3109        }
3110
3111        impl<TC: Index, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: CopyAs<u64>> Index for Options<TC, CC, VC, WC> {
3112            type Ref = Option<TC::Ref>;
3113            #[inline(always)]
3114            fn get(&self, index: usize) -> Self::Ref {
3115                if self.indexes.get(index) {
3116                    Some(self.somes.get(self.indexes.rank(index)))
3117                } else {
3118                    None
3119                }
3120            }
3121        }
3122        impl<'a, TC, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: CopyAs<u64>> Index for &'a Options<TC, CC, VC, WC>
3123        where &'a TC: Index
3124        {
3125            type Ref = Option<<&'a TC as Index>::Ref>;
3126            #[inline(always)]
3127            fn get(&self, index: usize) -> Self::Ref {
3128                if self.indexes.get(index) {
3129                    Some((&self.somes).get(self.indexes.rank(index)))
3130                } else {
3131                    None
3132                }
3133            }
3134        }
3135        impl<TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Options<TC, CC, VC> {
3136            type IndexMut<'a> = Option<TC::IndexMut<'a>> where TC: 'a, CC: 'a, VC: 'a;
3137            #[inline(always)]
3138            fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
3139                if self.indexes.get(index) {
3140                    Some(self.somes.get_mut(self.indexes.rank(index)))
3141                } else {
3142                    None
3143                }
3144            }
3145        }
3146
3147        impl<T, TC: Push<T> + Len> Push<Option<T>> for Options<TC> {
3148            #[inline]
3149            fn push(&mut self, item: Option<T>) {
3150                match item {
3151                    Some(item) => {
3152                        self.indexes.push(true);
3153                        self.somes.push(item);
3154                    }
3155                    None => {
3156                        self.indexes.push(false);
3157                    }
3158                }
3159            }
3160        }
3161        impl<'a, T, TC: Push<&'a T> + Len> Push<&'a Option<T>> for Options<TC> {
3162            #[inline]
3163            fn push(&mut self, item: &'a Option<T>) {
3164                match item {
3165                    Some(item) => {
3166                        self.indexes.push(true);
3167                        self.somes.push(item);
3168                    }
3169                    None => {
3170                        self.indexes.push(false);
3171                    }
3172                }
3173            }
3174        }
3175
3176        impl<TC: Clear> Clear for Options<TC> {
3177            fn clear(&mut self) {
3178                self.indexes.clear();
3179                self.somes.clear();
3180            }
3181        }
3182
3183        impl<TC: HeapSize> HeapSize for Options<TC> {
3184            fn heap_size(&self) -> (usize, usize) {
3185                let (l0, c0) = self.somes.heap_size();
3186                let (li, ci) = self.indexes.heap_size();
3187                (l0 + li, c0 + ci)
3188            }
3189        }
3190
3191        #[cfg(test)]
3192        mod test {
3193
3194            use crate::Columnar;
3195            use crate::common::{Index, HeapSize, Len};
3196            use crate::Options;
3197
3198            #[test]
3199            fn round_trip_some() {
3200                // Type annotation is important to avoid some inference overflow.
3201                let store: Options<Vec<i32>> = Columnar::into_columns((0..100).map(Some));
3202                assert_eq!(store.len(), 100);
3203                assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == Some(&b)));
3204                assert_eq!(store.heap_size(), (408, 544));
3205            }
3206
3207            #[test]
3208            fn round_trip_none() {
3209                let store = Columnar::into_columns((0..100).map(|_x| None::<i32>));
3210                assert_eq!(store.len(), 100);
3211                let foo = &store;
3212                assert!(foo.index_iter().zip(0..100).all(|(a, _b)| a == None));
3213                assert_eq!(store.heap_size(), (8, 32));
3214            }
3215
3216            #[test]
3217            fn round_trip_mixed() {
3218                // Type annotation is important to avoid some inference overflow.
3219                let store: Options<Vec<i32>>  = Columnar::into_columns((0..100).map(|x| if x % 2 == 0 { Some(x) } else { None }));
3220                assert_eq!(store.len(), 100);
3221                assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == if b % 2 == 0 { Some(&b) } else { None }));
3222                assert_eq!(store.heap_size(), (208, 288));
3223            }
3224        }
3225    }
3226}
3227
3228pub use lookback::{Repeats, Lookbacks};
3229/// Containers that can store either values, or offsets to prior values.
3230///
3231/// This has the potential to be more efficient than a list of `T` when many values repeat in
3232/// close proximity. Values must be equatable, and the degree of lookback can be configured.
3233pub mod lookback {
3234
3235    use crate::{Options, Results, Push, Index, Len, HeapSize};
3236
3237    /// A container that encodes repeated values with a `None` variant, at the cost of extra bits for every record.
3238    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3239    #[derive(Clone, Debug, Default, PartialEq)]
3240    pub struct Repeats<TC, const N: u8 = 255> {
3241        /// Some(x) encodes a value, and None indicates the prior `x` value.
3242        pub inner: Options<TC>,
3243    }
3244
3245    impl<T: PartialEq, TC: Push<T> + Len, const N: u8> Push<T> for Repeats<TC, N>
3246    where
3247        for<'a> &'a TC: Index,
3248        for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
3249    {
3250        #[inline]
3251        fn push(&mut self, item: T) {
3252            // Look at the last `somes` value for a potential match.
3253            let insert: Option<T> = if (&self.inner.somes).last().map(|x| x.eq(&item)) == Some(true) {
3254                None
3255            } else {
3256                Some(item)
3257            };
3258            self.inner.push(insert);
3259        }
3260    }
3261
3262    impl<TC: Len, const N: u8> Len for Repeats<TC, N> {
3263        #[inline(always)] fn len(&self) -> usize { self.inner.len() }
3264    }
3265
3266    impl<TC: Index, const N: u8> Index for Repeats<TC, N> {
3267        type Ref = TC::Ref;
3268        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
3269            match self.inner.get(index) {
3270                Some(item) => item,
3271                None => {
3272                    let pos = self.inner.indexes.rank(index) - 1;
3273                    self.inner.somes.get(pos)
3274                },
3275            }
3276        }
3277    }
3278
3279    impl<TC: HeapSize, const N: u8> HeapSize for Repeats<TC, N> {
3280        fn heap_size(&self) -> (usize, usize) {
3281            self.inner.heap_size()
3282        }
3283    }
3284
3285    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3286    #[derive(Clone, Debug, Default, PartialEq)]
3287    pub struct Lookbacks<TC, VC = Vec<u8>, const N: u8 = 255> {
3288        /// Ok(x) encodes a value, and Err(y) indicates a value `y` back.
3289        pub inner: Results<TC, VC>,
3290    }
3291
3292    impl<T: PartialEq, TC: Push<T> + Len, VC: Push<u8>, const N: u8> Push<T> for Lookbacks<TC, VC, N>
3293    where
3294        for<'a> &'a TC: Index,
3295        for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
3296    {
3297        #[inline]
3298        fn push(&mut self, item: T) {
3299            // Look backwards through (0 .. N) to look for a matching value.
3300            let oks_len = self.inner.oks.len();
3301            let find = (0u8 .. N).take(self.inner.oks.len()).find(|i| (&self.inner.oks).get(oks_len - (*i as usize) - 1) == item);
3302            let insert: Result<T, u8> = if let Some(back) = find { Err(back) } else { Ok(item) };
3303            self.inner.push(insert);
3304        }
3305    }
3306
3307    impl<TC, VC, const N: u8> Len for Lookbacks<TC, VC, N> {
3308        #[inline(always)] fn len(&self) -> usize { self.inner.len() }
3309    }
3310
3311    impl<TC: Index, VC: Index<Ref=u8>, const N: u8> Index for Lookbacks<TC, VC, N> {
3312        type Ref = TC::Ref;
3313        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
3314            match self.inner.get(index) {
3315                Ok(item) => item,
3316                Err(back) => {
3317                    let pos = self.inner.indexes.rank(index) - 1;
3318                    self.inner.oks.get(pos - (back as usize))
3319                },
3320            }
3321        }
3322    }
3323    impl<'a, TC, const N: u8> Index for &'a Lookbacks<TC, Vec<u8>, N>
3324    where
3325        &'a TC: Index,
3326    {
3327        type Ref = <&'a TC as Index>::Ref;
3328        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
3329            match (&self.inner).get(index) {
3330                Ok(item) => item,
3331                Err(back) => {
3332                    let pos = self.inner.indexes.rank(index) - 1;
3333                    (&self.inner.oks).get(pos - (*back as usize))
3334                },
3335            }
3336        }
3337    }
3338
3339    impl<TC: HeapSize, VC: HeapSize, const N: u8> HeapSize for Lookbacks<TC, VC, N> {
3340        fn heap_size(&self) -> (usize, usize) {
3341            self.inner.heap_size()
3342        }
3343    }
3344}
3345
3346/// Roaring bitmap (and similar) containers.
3347pub mod roaring {
3348
3349    use crate::Results;
3350
3351    /// A container for `bool` that uses techniques from Roaring bitmaps.
3352    ///
3353    /// These techniques are to block the bits into blocks of 2^16 bits,
3354    /// and to encode each block based on its density. Either a bitmap
3355    /// for dense blocks or a list of set bits for sparse blocks.
3356    ///
3357    /// Additionally, other representations encode runs of set bits.
3358    pub struct RoaringBits {
3359        _inner: Results<[u64; 1024], Vec<u16>>,
3360    }
3361}
3362
3363pub use chain_mod::{chain, chain_one};
3364
3365pub mod chain_mod {
3366    //! Chain iterators, or iterators and an item. Iterators that might improve inlining, at the
3367    //! expense of not providing iterator maker traits.
3368
3369    /// Chain two iterators together. The result first iterates over `a`, then `b`, until both are
3370    /// exhausted.
3371    ///
3372    /// This addresses a quirk where deep iterators would not be optimized to their full potential.
3373    /// Here, functions are marked with `#[inline(always)]` to indicate that the compiler should
3374    /// try hard to inline the iterators.
3375    #[inline(always)]
3376    pub fn chain<A: IntoIterator, B: IntoIterator<Item=A::Item>>(a: A, b: B) -> Chain<A::IntoIter, B::IntoIter> {
3377        Chain { a: Some(a.into_iter()), b: Some(b.into_iter()) }
3378    }
3379
3380    pub struct Chain<A, B> {
3381        a: Option<A>,
3382        b: Option<B>,
3383    }
3384
3385    impl<A, B> Iterator for Chain<A, B>
3386    where
3387        A: Iterator,
3388        B: Iterator<Item=A::Item>,
3389    {
3390        type Item = A::Item;
3391
3392        #[inline(always)]
3393        fn next(&mut self) -> Option<Self::Item> {
3394            if let Some(a) = self.a.as_mut() {
3395                let x = a.next();
3396                if x.is_none() {
3397                    self.a = None;
3398                } else {
3399                    return x;
3400                }
3401            }
3402            if let Some(b) = self.b.as_mut() {
3403                let x = b.next();
3404                if x.is_none() {
3405                    self.b = None;
3406                } else {
3407                    return x;
3408                }
3409            }
3410            None
3411        }
3412
3413        #[inline]
3414        fn fold<Acc, F>(self, mut acc: Acc, mut f: F) -> Acc
3415        where
3416            F: FnMut(Acc, Self::Item) -> Acc,
3417        {
3418            if let Some(a) = self.a {
3419                acc = a.fold(acc, &mut f);
3420            }
3421            if let Some(b) = self.b {
3422                acc = b.fold(acc, f);
3423            }
3424            acc
3425        }
3426    }
3427
3428    /// Chain a single item to an iterator. The resulting iterator first iterates over `a`,
3429    /// then `b`. The resulting iterator is marked as `#[inline(always)]`, which in some situations
3430    /// causes better inlining behavior with current Rust versions.
3431    #[inline(always)]
3432    pub fn chain_one<A: IntoIterator>(a: A, b: A::Item) -> ChainOne<A::IntoIter> {
3433        ChainOne { a: Some(a.into_iter()), b: Some(b) }
3434    }
3435
3436    pub struct ChainOne<A: Iterator> {
3437        a: Option<A>,
3438        b: Option<A::Item>,
3439    }
3440
3441    impl<A: Iterator> Iterator for ChainOne<A> {
3442        type Item = A::Item;
3443
3444        #[inline(always)]
3445        fn next(&mut self) -> Option<Self::Item> {
3446            if let Some(a) = self.a.as_mut() {
3447                let x = a.next();
3448                if x.is_none() {
3449                    self.a = None;
3450                    self.b.take()
3451                } else {
3452                    x
3453                }
3454            } else {
3455                None
3456            }
3457        }
3458    }
3459
3460    #[cfg(test)]
3461    mod test {
3462        use super::*;
3463
3464        #[test]
3465        fn test_chain() {
3466            let a = [1, 2, 3];
3467            let b = [4, 5, 6];
3468            let mut chain = chain(a, b);
3469            assert_eq!(chain.next(), Some(1));
3470            assert_eq!(chain.next(), Some(2));
3471            assert_eq!(chain.next(), Some(3));
3472            assert_eq!(chain.next(), Some(4));
3473            assert_eq!(chain.next(), Some(5));
3474            assert_eq!(chain.next(), Some(6));
3475            assert_eq!(chain.next(), None);
3476        }
3477
3478        #[test]
3479        fn test_chain_one() {
3480            let a = [1, 2, 3];
3481            let b = 4;
3482            let mut chain = chain_one(a, b);
3483            assert_eq!(chain.next(), Some(1));
3484            assert_eq!(chain.next(), Some(2));
3485            assert_eq!(chain.next(), Some(3));
3486            assert_eq!(chain.next(), Some(4));
3487            assert_eq!(chain.next(), None);
3488        }
3489    }
3490}