columnar/
lib.rs

1//! Types supporting flat / "columnar" layout for complex types.
2//!
3//! The intent is to re-layout `Vec<T>` types into vectors of reduced
4//! complexity, repeatedly. One should be able to push and pop easily,
5//! but indexing will be more complicated because we likely won't have
6//! a real `T` lying around to return as a reference. Instead, we will
7//! use Generic Associated Types (GATs) to provide alternate references.
8
9// Re-export derive crate.
10extern crate columnar_derive;
11pub use columnar_derive::Columnar;
12
13pub mod adts;
14pub mod boxed;
15mod arc;
16mod rc;
17
18pub use bytemuck;
19
20/// A type that can be represented in columnar form.
21///
22/// For a running example, a type like `(A, Vec<B>)`.
23pub trait Columnar : 'static {
24    /// Repopulates `self` from a reference.
25    ///
26    /// By default this just calls `into_owned()`, but it can be overridden.
27    fn copy_from<'a>(&mut self, other: Ref<'a, Self>) where Self: Sized {
28        *self = Self::into_owned(other);
29    }
30    /// Produce an instance of `Self` from `Self::Ref<'a>`.
31    fn into_owned<'a>(other: Ref<'a, Self>) -> Self;
32
33    /// The type that stores the columnar representation.
34    ///
35    /// The container must support pushing both `&Self` and `Self::Ref<'_>`.
36    /// In our running example this might be `(Vec<A>, Vecs<Vec<B>>)`.
37    type Container: ContainerBytes + for<'a> Push<&'a Self>;
38
39    /// Converts a sequence of the references to the type into columnar form.
40    fn as_columns<'a, I>(selves: I) -> Self::Container where I: IntoIterator<Item=&'a Self>, Self: 'a {
41        let mut columns: Self::Container = Default::default();
42        for item in selves {
43            columns.push(item);
44        }
45        columns
46    }
47    /// Converts a sequence of the type into columnar form.
48    ///
49    /// This consumes the owned `Self` types but uses them only by reference.
50    /// Consider `as_columns()` instead if it is equally ergonomic.
51    fn into_columns<I>(selves: I) -> Self::Container where I: IntoIterator<Item = Self>, Self: Sized {
52        let mut columns: Self::Container = Default::default();
53        for item in selves {
54            columns.push(&item);
55        }
56        columns
57    }
58    /// Reborrows the reference type to a shorter lifetime.
59    ///
60    /// Implementations must not change the contents of the reference, and should mark
61    /// the function as `#[inline(always)]`. It is no-op to overcome limitations
62    /// of the borrow checker. In many cases, it is enough to return `thing` as-is.
63    ///
64    /// For example, when comparing two references `Ref<'a>` and `Ref<'b>`, we can
65    /// reborrow both to a local lifetime to compare them. This allows us to keep the
66    /// lifetimes `'a` and `'b` separate, while otherwise Rust would unify them.
67    #[inline(always)] fn reborrow<'b, 'a: 'b>(thing: Ref<'a, Self>) -> Ref<'b, Self> {
68        Self::Container::reborrow_ref(thing)
69    }
70}
71
72/// The container type of columnar type `T`.
73///
74/// Equivalent to `<T as Columnar>::Container`.
75pub type ContainerOf<T> = <T as Columnar>::Container;
76
77/// For a lifetime, the reference type of columnar type `T`.
78///
79/// Equivalent to `<ContainerOf<T> as Borrow>::Ref<'a>`.
80pub type Ref<'a, T> = <ContainerOf<T> as Borrow>::Ref<'a>;
81
82/// A type that can be borrowed into a preferred reference type.
83pub trait Borrow: Len + Clone + 'static {
84    /// For each lifetime, a reference with that lifetime.
85    ///
86    /// As an example, `(&'a A, &'a [B])`.
87    type Ref<'a> : Copy;
88    /// The type of a borrowed container.
89    ///
90    /// Corresponding to our example, `(&'a [A], Vecs<&'a [B], &'a [u64]>)`.
91    type Borrowed<'a>: Copy + Len + Index<Ref = Self::Ref<'a>> where Self: 'a;
92    /// Converts a reference to the type to a borrowed variant.
93    fn borrow<'a>(&'a self) -> Self::Borrowed<'a>;
94    /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
95    fn reborrow<'b, 'a: 'b>(item: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a;
96    /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
97    fn reborrow_ref<'b, 'a: 'b>(item: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a;
98}
99
100
101/// A container that can hold `C`, and provide its preferred references through [`Borrow`].
102///
103/// As an example, `(Vec<A>, Vecs<Vec<B>>)`.
104pub trait Container : Borrow + Clear + for<'a> Push<Self::Ref<'a>> + Default + Send {
105    /// Allocates an empty container that can be extended by `selves` without reallocation.
106    ///
107    /// This goal is optimistic, and some containers may struggle to size correctly, especially
108    /// if they employ compression or other variable-sizing techniques that respond to the data
109    /// and the order in which is it presented. Best effort, but still useful!
110    fn with_capacity_for<'a, I>(selves: I) -> Self
111    where
112        Self: 'a,
113        I: Iterator<Item = Self::Borrowed<'a>> + Clone
114    {
115        let mut output = Self::default();
116        output.reserve_for(selves);
117        output
118    }
119
120    // Ensure that `self` can extend from `selves` without reallocation.
121    fn reserve_for<'a, I>(&mut self, selves: I)
122    where
123        Self: 'a,
124        I: Iterator<Item = Self::Borrowed<'a>> + Clone;
125
126
127    /// Extends `self` by a range in `other`.
128    ///
129    /// This method has a default implementation, but can and should be specialized when ranges can be copied.
130    /// As an example, lists of lists are often backed by contiguous elements, all of which can be memcopied,
131    /// with only the offsets into them (the bounds) to push either before or after (rather than during).
132    #[inline(always)]
133    fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
134        self.extend(range.map(|i| other.get(i)))
135    }
136}
137
138impl<T: Clone + 'static> Borrow for Vec<T> {
139    type Ref<'a> = &'a T;
140    type Borrowed<'a> = &'a [T];
141    #[inline(always)] fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { &self[..] }
142    #[inline(always)] fn reborrow<'b, 'a: 'b>(item: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a { item }
143    #[inline(always)] fn reborrow_ref<'b, 'a: 'b>(item: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { item }
144}
145
146impl<T: Clone + Send + 'static> Container for Vec<T> {
147    fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
148        self.extend_from_slice(&other[range])
149    }
150    fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
151        self.reserve(selves.map(|x| x.len()).sum::<usize>())
152    }
153}
154
155/// A container that can also be viewed as and reconstituted from bytes.
156pub trait ContainerBytes : Container + for<'a> Borrow<Borrowed<'a> : AsBytes<'a> + FromBytes<'a>> { }
157impl<C: Container + for<'a> Borrow<Borrowed<'a> : AsBytes<'a> + FromBytes<'a>>> ContainerBytes for C { }
158
159pub use common::{Clear, Len, Push, IndexMut, Index, IndexAs, HeapSize, Slice, AsBytes, FromBytes};
160/// Common traits and types that are re-used throughout the module.
161pub mod common {
162
163    /// A type with a length.
164    pub trait Len {
165        /// The number of contained elements.
166        fn len(&self) -> usize;
167        /// Whether this contains no elements.
168        fn is_empty(&self) -> bool {
169            self.len() == 0
170        }
171    }
172    impl<L: Len + ?Sized> Len for &L {
173        #[inline(always)] fn len(&self) -> usize { L::len(*self) }
174    }
175    impl<L: Len + ?Sized> Len for &mut L {
176        #[inline(always)] fn len(&self) -> usize { L::len(*self) }
177    }
178    impl<T> Len for Vec<T> {
179        #[inline(always)] fn len(&self) -> usize { self.len() }
180    }
181    impl<T> Len for [T] {
182        #[inline(always)] fn len(&self) -> usize { <[T]>::len(self) }
183    }
184
185    /// A type that can accept items of type `T`.
186    pub trait Push<T> {
187        /// Pushes an item onto `self`.
188        fn push(&mut self, item: T);
189        /// Pushes elements of an iterator onto `self`.
190        #[inline(always)] fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
191            for item in iter {
192                self.push(item);
193            }
194        }
195    }
196    impl<T> Push<T> for Vec<T> {
197        #[inline(always)] fn push(&mut self, item: T) { self.push(item) }
198
199        #[inline(always)]
200        fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
201            std::iter::Extend::extend(self, iter)
202        }
203    }
204    impl<'a, T: Clone> Push<&'a T> for Vec<T> {
205        #[inline(always)] fn push(&mut self, item: &'a T) { self.push(item.clone()) }
206
207        #[inline(always)]
208        fn extend(&mut self, iter: impl IntoIterator<Item=&'a T>) {
209            std::iter::Extend::extend(self, iter.into_iter().cloned())
210        }
211    }
212    impl<'a, T: Clone> Push<&'a [T]> for Vec<T> {
213        #[inline(always)] fn push(&mut self, item: &'a [T]) { self.clone_from_slice(item) }
214    }
215
216
217    pub use index::{Index, IndexMut, IndexAs};
218    /// Traits for accessing elements by `usize` indexes.
219    ///
220    /// There are several traits, with a core distinction being whether the returned reference depends on the lifetime of `&self`.
221    /// For one trait `Index` the result does not depend on this lifetime.
222    /// There is a third trait `IndexMut` that allows mutable access, that may be less commonly implemented.
223    pub mod index {
224
225        use crate::Len;
226        use crate::common::IterOwn;
227
228        /// A type that can be mutably accessed by `usize`.
229        pub trait IndexMut {
230            /// Type mutably referencing an indexed element.
231            type IndexMut<'a> where Self: 'a;
232            fn get_mut(& mut self, index: usize) -> Self::IndexMut<'_>;
233            /// A reference to the last element, should one exist.
234            #[inline(always)] fn last_mut(&mut self) -> Option<Self::IndexMut<'_>> where Self: Len {
235                if self.is_empty() { None }
236                else { Some(self.get_mut(self.len()-1)) }
237            }
238        }
239
240        impl<T: IndexMut + ?Sized> IndexMut for &mut T {
241            type IndexMut<'a> = T::IndexMut<'a> where Self: 'a;
242            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
243                T::get_mut(*self, index)
244            }
245        }
246        impl<T> IndexMut for Vec<T> {
247            type IndexMut<'a> = &'a mut T where Self: 'a;
248            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
249        }
250        impl<T> IndexMut for [T] {
251            type IndexMut<'a> = &'a mut T where Self: 'a;
252            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
253        }
254
255        /// A type that can be accessed by `usize` but without borrowing `self`.
256        ///
257        /// This can be useful for types which include their own lifetimes, and
258        /// which wish to express that their reference has the same lifetime.
259        /// In the GAT `Index`, the `Ref<'_>` lifetime would be tied to `&self`.
260        ///
261        /// This trait may be challenging to implement for owning containers,
262        /// for example `Vec<_>`, which would need their `Ref` type to depend
263        /// on the lifetime of the `&self` borrow in the `get()` function.
264        pub trait Index {
265            /// The type returned by the `get` method.
266            ///
267            /// Notably, this does not vary with lifetime, and will not depend on the lifetime of `&self`.
268            type Ref;
269            fn get(&self, index: usize) -> Self::Ref;
270            #[inline(always)] fn last(&self) -> Option<Self::Ref> where Self: Len {
271                if self.is_empty() { None }
272                else { Some(self.get(self.len()-1)) }
273            }
274            /// Converts `&self` into an iterator.
275            ///
276            /// This has an awkward name to avoid collision with `iter()`, which may also be implemented.
277            #[inline(always)]
278            fn index_iter(&self) -> IterOwn<&Self> {
279                IterOwn {
280                    index: 0,
281                    slice: self,
282                }
283            }
284            /// Converts `self` into an iterator.
285            ///
286            /// This has an awkward name to avoid collision with `into_iter()`, which may also be implemented.
287            #[inline(always)]
288            fn into_index_iter(self) -> IterOwn<Self> where Self: Sized {
289                IterOwn {
290                    index: 0,
291                    slice: self,
292                }
293            }
294        }
295
296        // These implementations aim to reveal a longer lifetime, or to copy results to avoid a lifetime.
297        impl<'a, T> Index for &'a [T] {
298            type Ref = &'a T;
299            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
300        }
301        impl<T: Copy> Index for [T] {
302            type Ref = T;
303            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
304        }
305        impl<'a, T> Index for &'a Vec<T> {
306            type Ref = &'a T;
307            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
308        }
309        impl<T: Copy> Index for Vec<T> {
310            type Ref = T;
311            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
312        }
313
314
315        /// Types that can be converted into another type by copying.
316        ///
317        /// We use this trait to unify the ability of `T` and `&T` to be converted into `T`.
318        /// This is handy for copy types that we'd like to use, like `u8`, `u64` and `usize`.
319        pub trait CopyAs<T> : Copy {
320            fn copy_as(self) -> T;
321        }
322        impl<T: Copy> CopyAs<T> for &T {
323            #[inline(always)] fn copy_as(self) -> T { *self }
324        }
325        impl<T: Copy> CopyAs<T> for T {
326            #[inline(always)] fn copy_as(self) -> T { self }
327        }
328
329        pub trait IndexAs<T> {
330            fn index_as(&self, index: usize) -> T;
331            #[inline(always)] fn last(&self) -> Option<T> where Self: Len {
332                if self.is_empty() { None }
333                else { Some(self.index_as(self.len()-1)) }
334            }
335        }
336
337        impl<T: Index, S> IndexAs<S> for T where T::Ref: CopyAs<S> {
338            #[inline(always)] fn index_as(&self, index: usize) -> S { self.get(index).copy_as() }
339        }
340
341    }
342
343    use crate::{Borrow, Container};
344    use crate::common::index::CopyAs;
345    /// A composite trait which captures the ability `Index<Ref = T>`.
346    ///
347    /// Implement `CopyAs<T>` for the reference type.
348    pub trait BorrowIndexAs<T> : for<'a> Borrow<Ref<'a>: CopyAs<T>> { }
349    impl<T, C: for<'a> Borrow<Ref<'a>: CopyAs<T>>> BorrowIndexAs<T> for C { }
350    /// A composite trait which captures the ability `Push<&T>` and `Index<Ref = T>`.
351    ///
352    /// Implement `CopyAs<T>` for the reference type, and `Push<&'a T>` for the container.
353    pub trait PushIndexAs<T> : BorrowIndexAs<T> + Container + for<'a> Push<&'a T> { }
354    impl<T, C: BorrowIndexAs<T> + Container + for<'a> Push<&'a T>> PushIndexAs<T> for C { }
355
356    /// A type that can remove its contents and return to an empty state.
357    ///
358    /// Generally, this method does not release resources, and is used to make the container available for re-insertion.
359    pub trait Clear {
360        /// Clears `self`, without changing its capacity.
361        fn clear(&mut self);
362    }
363    // Vectors can be cleared.
364    impl<T> Clear for Vec<T> {
365        #[inline(always)] fn clear(&mut self) { self.clear() }
366    }
367    // Slice references can be cleared.
368    impl<T> Clear for &[T] {
369        #[inline(always)] fn clear(&mut self) { *self = &[]; }
370    }
371
372    pub trait HeapSize {
373        /// Active (len) and allocated (cap) heap sizes in bytes.
374        /// This should not include the size of `self` itself.
375        fn heap_size(&self) -> (usize, usize) { (0, 0) }
376    }
377
378    impl HeapSize for String {
379        fn heap_size(&self) -> (usize, usize) {
380            (self.len(), self.capacity())
381        }
382    }
383    impl<T: HeapSize> HeapSize for [T] {
384        fn heap_size(&self) -> (usize, usize) {
385            let mut l = std::mem::size_of_val(self);
386            let mut c = std::mem::size_of_val(self);
387            for item in self.iter() {
388                let (il, ic) = item.heap_size();
389                l += il;
390                c += ic;
391            }
392            (l, c)
393        }
394    }
395    impl<T: HeapSize> HeapSize for Vec<T> {
396        fn heap_size(&self) -> (usize, usize) {
397            let mut l = std::mem::size_of::<T>() * self.len();
398            let mut c = std::mem::size_of::<T>() * self.capacity();
399            for item in (self[..]).iter() {
400                let (il, ic) = item.heap_size();
401                l += il;
402                c += ic;
403            }
404            (l, c)
405        }
406    }
407
408    /// A struct representing a slice of a range of values.
409    ///
410    /// The lower and upper bounds should be meaningfully set on construction.
411    #[derive(Copy, Clone, Debug)]
412    pub struct Slice<S> {
413        pub lower: usize,
414        pub upper: usize,
415        pub slice: S,
416    }
417
418    impl<S> std::hash::Hash for Slice<S> where Self: Index<Ref: std::hash::Hash> {
419        fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
420            self.len().hash(state);
421            for i in 0 .. self.len() {
422                self.get(i).hash(state);
423            }
424        }
425    }
426
427    impl<S> Slice<S> {
428        pub fn slice<R: std::ops::RangeBounds<usize>>(self, range: R) -> Self {
429            use std::ops::Bound;
430            let lower = match range.start_bound() {
431                Bound::Included(s) => std::cmp::max(self.lower, *s),
432                Bound::Excluded(s) => std::cmp::max(self.lower, *s+1),
433                Bound::Unbounded => self.lower,
434            };
435            let upper = match range.end_bound() {
436                Bound::Included(s) => std::cmp::min(self.upper, *s+1),
437                Bound::Excluded(s) => std::cmp::min(self.upper, *s),
438                Bound::Unbounded => self.upper,
439            };
440            assert!(lower <= upper);
441            Self { lower, upper, slice: self.slice }
442        }
443        pub fn new(lower: u64, upper: u64, slice: S) -> Self {
444            let lower: usize = lower.try_into().expect("slice bounds must fit in `usize`");
445            let upper: usize = upper.try_into().expect("slice bounds must fit in `usize`");
446            Self { lower, upper, slice }
447        }
448        pub fn len(&self) -> usize { self.upper - self.lower }
449        /// Map the slice to another type.
450        pub(crate) fn map<T>(self, f: impl Fn(S) -> T) -> Slice<T> {
451            Slice {
452                lower: self.lower,
453                upper: self.upper,
454                slice: f(self.slice),
455            }
456        }
457    }
458
459    impl<S: Index> PartialEq for Slice<S> where S::Ref: PartialEq {
460        fn eq(&self, other: &Self) -> bool {
461            if self.len() != other.len() { return false; }
462            for i in 0 .. self.len() {
463                if self.get(i) != other.get(i) { return false; }
464            }
465            true
466        }
467    }
468    impl<S: Index> PartialEq<[S::Ref]> for Slice<S> where S::Ref: PartialEq {
469        fn eq(&self, other: &[S::Ref]) -> bool {
470            if self.len() != other.len() { return false; }
471            for i in 0 .. self.len() {
472                if self.get(i) != other[i] { return false; }
473            }
474            true
475        }
476    }
477    impl<S: Index> PartialEq<Vec<S::Ref>> for Slice<S> where S::Ref: PartialEq {
478        fn eq(&self, other: &Vec<S::Ref>) -> bool {
479            if self.len() != other.len() { return false; }
480            for i in 0 .. self.len() {
481                if self.get(i) != other[i] { return false; }
482            }
483            true
484        }
485    }
486
487    impl<S: Index> Eq for Slice<S> where S::Ref: Eq { }
488
489    impl<S: Index> PartialOrd for Slice<S> where S::Ref: PartialOrd {
490        fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
491            use std::cmp::Ordering;
492            let len = std::cmp::min(self.len(), other.len());
493
494            for i in 0 .. len {
495                match self.get(i).partial_cmp(&other.get(i)) {
496                    Some(Ordering::Equal) => (),
497                    not_equal => return not_equal,
498                }
499            }
500
501            self.len().partial_cmp(&other.len())
502        }
503    }
504
505    impl<S: Index> Ord for Slice<S> where S::Ref: Ord + Eq {
506        fn cmp(&self, other: &Self) -> std::cmp::Ordering {
507            use std::cmp::Ordering;
508            let len = std::cmp::min(self.len(), other.len());
509
510            for i in 0 .. len {
511                match self.get(i).cmp(&other.get(i)) {
512                    Ordering::Equal => (),
513                    not_equal => return not_equal,
514                }
515            }
516
517            self.len().cmp(&other.len())
518        }
519    }
520
521    impl<S> Len for Slice<S> {
522        #[inline(always)] fn len(&self) -> usize { self.len() }
523    }
524
525    impl<S: Index> Index for Slice<S> {
526        type Ref = S::Ref;
527        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
528            assert!(index < self.upper - self.lower);
529            self.slice.get(self.lower + index)
530        }
531    }
532    impl<'a, S> Index for &'a Slice<S>
533    where
534        &'a S : Index,
535    {
536        type Ref = <&'a S as Index>::Ref;
537        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
538            assert!(index < self.upper - self.lower);
539            (&self.slice).get(self.lower + index)
540        }
541    }
542
543    impl<S: IndexMut> IndexMut for Slice<S> {
544        type IndexMut<'a> = S::IndexMut<'a> where S: 'a;
545        #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
546            assert!(index < self.upper - self.lower);
547            self.slice.get_mut(self.lower + index)
548        }
549    }
550
551    impl<S: Index + Len> Slice<S> {
552        /// Converts the slice into an iterator.
553        ///
554        /// This method exists rather than an `IntoIterator` implementation to avoid
555        /// a conflicting implementation for pushing an `I: IntoIterator` into `Vecs`.
556        pub fn into_iter(self) -> IterOwn<Slice<S>> {
557            self.into_index_iter()
558        }
559    }
560
561    impl<'a, T> Slice<&'a [T]> {
562        pub fn as_slice(&self) -> &'a [T] {
563            &self.slice[self.lower .. self.upper]
564        }
565    }
566
567    pub struct IterOwn<S> {
568        index: usize,
569        slice: S,
570    }
571
572    impl<S> IterOwn<S> {
573        pub fn new(index: usize, slice: S) -> Self {
574            Self { index, slice }
575        }
576    }
577
578    impl<S: Index + Len> Iterator for IterOwn<S> {
579        type Item = S::Ref;
580        #[inline(always)] fn next(&mut self) -> Option<Self::Item> {
581            if self.index < self.slice.len() {
582                let result = self.slice.get(self.index);
583                self.index += 1;
584                Some(result)
585            } else {
586                None
587            }
588        }
589        #[inline(always)]
590        fn size_hint(&self) -> (usize, Option<usize>) {
591            (self.slice.len() - self.index, Some(self.slice.len() - self.index))
592        }
593    }
594
595    impl<S: Index + Len> ExactSizeIterator for IterOwn<S> { }
596
597    /// A type that can be viewed as byte slices with lifetime `'a`.
598    ///
599    /// Implementors of this trait almost certainly reference the lifetime `'a` themselves.
600    pub trait AsBytes<'a> {
601        /// Presents `self` as a sequence of byte slices, with their required alignment.
602        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])>;
603    }
604
605    /// A type that can be reconstituted from byte slices with lifetime `'a`.
606    ///
607    /// Implementors of this trait almost certainly reference the lifetime `'a` themselves,
608    /// unless they actively deserialize the bytes (vs sit on the slices, as if zero-copy).
609    pub trait FromBytes<'a> {
610        /// Reconstructs `self` from a sequence of correctly aligned and sized bytes slices.
611        ///
612        /// The implementation is expected to consume the right number of items from the iterator,
613        /// which may go on to be used by other implementations of `FromBytes`.
614        ///
615        /// The implementation should aim for only doing trivial work, as it backs calls like
616        /// `borrow` for serialized containers.
617        ///
618        /// Implementations should almost always be marked as `#[inline(always)]` to ensure that
619        /// they are inlined. A single non-inlined function on a tree of `from_bytes` calls
620        /// can cause the performance to drop significantly.
621        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self;
622    }
623
624}
625
626/// Logic related to the transformation to and from bytes.
627///
628/// The methods here line up with the `AsBytes` and `FromBytes` traits.
629pub mod bytes {
630
631    use crate::AsBytes;
632
633    /// A coupled encode/decode pair for byte sequences.
634    pub trait EncodeDecode {
635        /// Encoded length in number of `u64` words required.
636        fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a>;
637        /// Encoded length in number of `u8` bytes required.
638        ///
639        /// This method should always be eight times `Self::length_in_words`, and is provided for convenience and clarity.
640        fn length_in_bytes<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> { 8 * Self::length_in_words(bytes) }
641        /// Encodes `bytes` into a sequence of `u64`.
642        fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a>;
643        /// Writes `bytes` in the encoded format to an arbitrary writer.
644        fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a>;
645        /// Decodes bytes from a sequence of `u64`.
646        fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]>;
647    }
648
649    /// A sequential byte layout for `AsBytes` and `FromBytes` implementors.
650    ///
651    /// The layout is aligned like a sequence of `u64`, where we repeatedly announce a length,
652    /// and then follow it by that many bytes. We may need to follow this with padding bytes.
653    pub use serialization::Sequence;
654    mod serialization {
655
656        use crate::AsBytes;
657
658        /// Encodes and decodes bytes sequences, by prepending the length and appending the all sequences.
659        pub struct Sequence;
660        impl super::EncodeDecode for Sequence {
661            fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
662                // Each byte slice has one `u64` for the length, and then as many `u64`s as needed to hold all bytes.
663                bytes.as_bytes().map(|(_align, bytes)| 1 + bytes.len().div_ceil(8)).sum()
664            }
665            fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
666                encode(store, bytes.as_bytes())
667            }
668            fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
669                write(writer, bytes.as_bytes())
670            }
671            fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
672                decode(store)
673            }
674        }
675
676        /// Encodes a sequence of byte slices as their length followed by their bytes, aligned to 8 bytes.
677        ///
678        /// Each length will be exactly 8 bytes, and the bytes that follow are padded out to a multiple of 8 bytes.
679        /// When reading the data, the length is in bytes, and one should consume those bytes and advance over padding.
680        pub fn encode<'a>(store: &mut Vec<u64>, bytes: impl Iterator<Item=(u64, &'a [u8])>) {
681            for (align, bytes) in bytes {
682                assert!(align <= 8);
683                store.push(bytes.len() as u64);
684                let whole_words = 8 * (bytes.len() / 8);
685                // We want to extend `store` by `bytes`, but `bytes` may not be `u64` aligned.
686                // In the latter case, init `store` and cast and copy onto it as a byte slice.
687                if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
688                    store.extend_from_slice(words);
689                }
690                else {
691                    let store_len = store.len();
692                    store.resize(store_len + whole_words/8, 0);
693                    let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
694                    slice.copy_from_slice(&bytes[.. whole_words]);
695                }
696                let remaining_bytes = &bytes[whole_words..];
697                if !remaining_bytes.is_empty() {
698                    let mut remainder = 0u64;
699                    let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
700                    for (i, byte) in remaining_bytes.iter().enumerate() {
701                        transmute[i] = *byte;
702                    }
703                    store.push(remainder);
704                }
705            }
706        }
707
708        /// Writes a sequence of byte slices as their length followed by their bytes, padded to 8 bytes.
709        ///
710        /// Each length will be exactly 8 bytes, and the bytes that follow are padded out to a multiple of 8 bytes.
711        /// When reading the data, the length is in bytes, and one should consume those bytes and advance over padding.
712        pub fn write<'a>(mut writer: impl std::io::Write, bytes: impl Iterator<Item=(u64, &'a [u8])>) -> std::io::Result<()> {
713            // Columnar data is serialized as a sequence of `u64` values, with each `[u8]` slice
714            // serialize as first its length in bytes, and then as many `u64` values as needed.
715            // Padding should be added, but only for alignment; no specific values are required.
716            for (align, bytes) in bytes {
717                assert!(align <= 8);
718                let length = u64::try_from(bytes.len()).unwrap();
719                writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&length)))?;
720                writer.write_all(bytes)?;
721                let padding = usize::try_from((8 - (length % 8)) % 8).unwrap();
722                writer.write_all(&[0; 8][..padding])?;
723            }
724            Ok(())
725        }
726
727        /// Decodes a sequence of byte slices from their length followed by their bytes.
728        ///
729        /// This decoder matches the `encode` function above.
730        /// In particular, it anticipates padding bytes when the length is not a multiple of eight.
731        pub fn decode(store: &[u64]) -> Decoder<'_> {
732            Decoder { store }
733        }
734
735        /// An iterator over byte slices, decoding from a sequence of lengths followed by bytes.
736        pub struct Decoder<'a> {
737            store: &'a [u64],
738        }
739
740        impl<'a> Iterator for Decoder<'a> {
741            type Item = &'a [u8];
742            fn next(&mut self) -> Option<Self::Item> {
743                if let Some(length) = self.store.first() {
744                    let length = *length as usize;
745                    self.store = &self.store[1..];
746                    let whole_words = if length % 8 == 0 { length / 8 } else { length / 8 + 1 };
747                    let bytes: &[u8] = bytemuck::try_cast_slice(&self.store[..whole_words]).expect("&[u64] should convert to &[u8]");
748                    self.store = &self.store[whole_words..];
749                    Some(&bytes[..length])
750                } else {
751                    None
752                }
753            }
754        }
755    }
756
757    /// A binary encoding of sequences of byte slices.
758    ///
759    /// The encoding starts with a sequence of n+1 offsets describing where to find the n slices in the bytes that follow.
760    /// Treating the offsets as a byte slice too, the each offset indicates the location (in bytes) of the end of its slice.
761    /// Each byte slice can be found from a pair of adjacent offsets, where the first is rounded up to a multiple of eight.
762    pub use serialization_neu::Indexed;
763    pub mod serialization_neu {
764
765        use crate::AsBytes;
766
767        /// Encodes and decodes bytes sequences, using an index of offsets.
768        pub struct Indexed;
769        impl super::EncodeDecode for Indexed {
770            fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
771                1 + bytes.as_bytes().map(|(_align, bytes)| 1 + bytes.len().div_ceil(8)).sum::<usize>()
772            }
773            fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
774                encode(store, bytes)
775            }
776            fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
777                write(writer, bytes)
778            }
779            fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
780                decode(store)
781            }
782        }
783
784        /// Encodes `item` into `u64` aligned words.
785        ///
786        /// The sequence of byte slices are appended, with padding to have each slice start `u64` aligned.
787        /// The sequence is then pre-pended with as many byte offsets as there are slices in `item`, plus one.
788        /// The byte offsets indicate where each slice ends, and by rounding up to `u64` alignemnt where the next slice begins.
789        /// The first offset indicates where the list of offsets itself ends, and where the first slice begins.
790        ///
791        /// We will need to visit `as_bytes` three times to extract this information, so the method should be efficient and inlined.
792        /// The first read writes the first offset, the second writes each other offset, and the third writes the bytes themselves.
793        ///
794        /// The offsets are zero-based, rather than based on `store.len()`.
795        /// If you call the method with a non-empty `store` be careful decoding.
796        pub fn encode<'a, A>(store: &mut Vec<u64>, iter: &A)
797        where A : AsBytes<'a>,
798        {
799            // Read 1: Number of offsets we will record, equal to the number of slices plus one.
800            // TODO: right-size `store` before first call to `push`.
801            let offsets = 1 + iter.as_bytes().count();
802            let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
803            store.push(offsets_end);
804            // Read 2: Establish each of the offsets based on lengths of byte slices.
805            let mut position_bytes = offsets_end;
806            for (align, bytes) in iter.as_bytes() {
807                assert!(align <= 8);
808                // Write length in bytes, but round up to words before updating `position_bytes`.
809                let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
810                store.push(to_push);
811                let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
812                position_bytes += round_len;
813            }
814            // Read 3: Append each byte slice, with padding to align starts to `u64`.
815            for (_align, bytes) in iter.as_bytes() {
816                let whole_words = 8 * (bytes.len() / 8);
817                // We want to extend `store` by `bytes`, but `bytes` may not be `u64` aligned.
818                // In the latter case, init `store` and cast and copy onto it as a byte slice.
819                if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
820                    store.extend_from_slice(words);
821                }
822                else {
823                    let store_len = store.len();
824                    store.resize(store_len + whole_words/8, 0);
825                    let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
826                    slice.copy_from_slice(&bytes[.. whole_words]);
827                }
828                let remaining_bytes = &bytes[whole_words..];
829                if !remaining_bytes.is_empty() {
830                    let mut remainder = 0u64;
831                    let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
832                    for (i, byte) in remaining_bytes.iter().enumerate() {
833                        transmute[i] = *byte;
834                    }
835                    store.push(remainder);
836                }
837            }
838        }
839
840        pub fn write<'a, A, W>(mut writer: W, iter: &A) -> std::io::Result<()>
841        where
842            A: AsBytes<'a>,
843            W: std::io::Write,
844        {
845            // Read 1: Number of offsets we will record, equal to the number of slices plus one.
846            let offsets = 1 + iter.as_bytes().count();
847            let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
848            writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&offsets_end)))?;
849            // Read 2: Establish each of the offsets based on lengths of byte slices.
850            let mut position_bytes = offsets_end;
851            for (align, bytes) in iter.as_bytes() {
852                assert!(align <= 8);
853                // Write length in bytes, but round up to words before updating `position_bytes`.
854                let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
855                writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&to_push)))?;
856                let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
857                position_bytes += round_len;
858            }
859            // Read 3: Append each byte slice, with padding to align starts to `u64`.
860            for (_align, bytes) in iter.as_bytes() {
861                writer.write_all(bytes)?;
862                let padding = ((bytes.len() + 7) & !7) - bytes.len();
863                if padding > 0 {
864                    writer.write_all(&[0u8;8][..padding])?;
865                }
866            }
867
868            Ok(())
869        }
870
871        /// Decodes an encoded sequence of byte slices. Each result will be `u64` aligned.
872        pub fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
873            assert!(store[0] % 8 == 0);
874            let slices = (store[0] / 8) - 1;
875            (0 .. slices).map(|i| decode_index(store, i))
876        }
877
878        /// Decodes a specific byte slice by index. It will be `u64` aligned.
879        #[inline(always)]
880        pub fn decode_index(store: &[u64], index: u64) -> &[u8] {
881            debug_assert!(index + 1 < store[0]/8);
882            let index: usize = index.try_into().unwrap();
883            let lower: usize = ((store[index] + 7) & !7).try_into().unwrap();
884            let upper: usize = (store[index + 1]).try_into().unwrap();
885            let bytes: &[u8] = bytemuck::try_cast_slice(store).expect("&[u64] should convert to &[u8]");
886            &bytes[lower .. upper]
887        }
888
889        #[cfg(test)]
890        mod test {
891
892            use crate::{Borrow, ContainerOf};
893            use crate::common::Push;
894            use crate::AsBytes;
895
896            use super::{encode, decode};
897
898            fn assert_roundtrip<'a, AB: AsBytes<'a>>(item: &AB) {
899                let mut store = Vec::new();
900                encode(&mut store, item);
901                assert!(item.as_bytes().map(|x| x.1).eq(decode(&store)));
902            }
903
904            #[test]
905            fn round_trip() {
906
907                let mut column: ContainerOf<Result<u64, String>> = Default::default();
908                for i in 0..10000u64 {
909                    column.push(&Ok::<u64, String>(i));
910                    column.push(&Err::<u64, String>(format!("{:?}", i)));
911                }
912
913                assert_roundtrip(&column.borrow());
914            }
915        }
916    }
917
918    #[cfg(test)]
919    mod test {
920        use crate::ContainerOf;
921
922        #[test]
923        fn round_trip() {
924
925            use crate::common::{Push, HeapSize, Len, Index};
926            use crate::{Borrow, AsBytes, FromBytes};
927
928            let mut column: ContainerOf<Result<u64, u64>> = Default::default();
929            for i in 0..100u64 {
930                column.push(Ok::<u64, u64>(i));
931                column.push(Err::<u64, u64>(i));
932            }
933
934            assert_eq!(column.len(), 200);
935            assert_eq!(column.heap_size(), (1624, 2080));
936
937            for i in 0..100 {
938                assert_eq!(column.get(2*i+0), Ok(i as u64));
939                assert_eq!(column.get(2*i+1), Err(i as u64));
940            }
941
942            let column2 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column.borrow().as_bytes().map(|(_, bytes)| bytes));
943            for i in 0..100 {
944                assert_eq!(column.get(2*i+0), column2.get(2*i+0).copied().map_err(|e| *e));
945                assert_eq!(column.get(2*i+1), column2.get(2*i+1).copied().map_err(|e| *e));
946            }
947
948            let column3 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column2.as_bytes().map(|(_, bytes)| bytes));
949            for i in 0..100 {
950                assert_eq!(column3.get(2*i+0), column2.get(2*i+0));
951                assert_eq!(column3.get(2*i+1), column2.get(2*i+1));
952            }
953        }
954    }
955}
956
957/// Types that prefer to be represented by `Vec<T>`.
958pub mod primitive {
959    use std::num::Wrapping;
960
961    /// An implementation of opinions for types that want to use `Vec<T>`.
962    macro_rules! implement_columnable {
963        ($($index_type:ty),*) => { $(
964            impl crate::Columnar for $index_type {
965                #[inline(always)]
966                fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { *other }
967
968                type Container = Vec<$index_type>;
969            }
970
971            impl crate::HeapSize for $index_type { }
972
973            impl<'a> crate::AsBytes<'a> for &'a [$index_type] {
974                #[inline(always)]
975                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
976                    std::iter::once((std::mem::align_of::<$index_type>() as u64, bytemuck::cast_slice(&self[..])))
977                }
978            }
979            impl<'a> crate::FromBytes<'a> for &'a [$index_type] {
980                #[inline(always)]
981                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
982                    // We use `unwrap()` here in order to panic with the `bytemuck` error, which may be informative.
983                    bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()
984                }
985            }
986            impl<'a, const N: usize> crate::AsBytes<'a> for &'a [[$index_type; N]] {
987                #[inline(always)]
988                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
989                    std::iter::once((std::mem::align_of::<$index_type>() as u64, bytemuck::cast_slice(&self[..])))
990                }
991            }
992            impl<'a, const N: usize> crate::FromBytes<'a> for &'a [[$index_type; N]] {
993                #[inline(always)]
994                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
995                    // We use `unwrap()` here in order to panic with the `bytemuck` error, which may be informative.
996                    bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()
997                }
998            }
999        )* }
1000    }
1001
1002    implement_columnable!(u8, u16, u32, u64);
1003    implement_columnable!(i8, i16, i32, i64);
1004    implement_columnable!(f32, f64);
1005    implement_columnable!(Wrapping<u8>, Wrapping<u16>, Wrapping<u32>, Wrapping<u64>);
1006    implement_columnable!(Wrapping<i8>, Wrapping<i16>, Wrapping<i32>, Wrapping<i64>);
1007
1008    pub use sizes::{Usizes, Isizes};
1009    /// Columnar stores for `usize` and `isize`, stored as 64 bits.
1010    mod sizes {
1011
1012        use crate::*;
1013        use crate::common::{BorrowIndexAs, PushIndexAs};
1014
1015        #[derive(Copy, Clone, Default)]
1016        pub struct Usizes<CV = Vec<u64>> { pub values: CV }
1017
1018        impl Columnar for usize {
1019            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1020            type Container = Usizes;
1021        }
1022
1023        impl<CV: BorrowIndexAs<u64> + Len> Borrow for Usizes<CV> {
1024            type Ref<'a> = usize;
1025            type Borrowed<'a> = Usizes<CV::Borrowed<'a>> where CV: 'a;
1026            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1027                Usizes { values: self.values.borrow() }
1028            }
1029            #[inline(always)]
1030            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1031                Usizes { values: CV::reborrow(thing.values) }
1032            }
1033            #[inline(always)]
1034            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1035        }
1036
1037        impl<CV: PushIndexAs<u64>> Container for Usizes<CV> {
1038            #[inline(always)]
1039            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1040                self.values.extend_from_self(other.values, range)
1041            }
1042
1043            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1044                self.values.reserve_for(selves.map(|x| x.values))
1045            }
1046        }
1047
1048        impl<CV: Len> Len for Usizes<CV> { fn len(&self) -> usize { self.values.len() }}
1049        impl IndexMut for Usizes {
1050            type IndexMut<'a> = &'a mut u64;
1051            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
1052        }
1053        impl<CV: IndexAs<u64>> Index for Usizes<CV> {
1054            type Ref = usize;
1055            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
1056        }
1057        impl<CV: IndexAs<u64>> Index for &Usizes<CV> {
1058            type Ref = usize;
1059            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
1060        }
1061        impl<CV: for<'a> Push<&'a u64>> Push<usize> for Usizes<CV> {
1062            #[inline]
1063            fn push(&mut self, item: usize) { self.values.push(&item.try_into().expect("usize must fit in a u64")) }
1064        }
1065        impl Push<&usize> for Usizes {
1066            #[inline]
1067            fn push(&mut self, item: &usize) { self.values.push((*item).try_into().expect("usize must fit in a u64")) }
1068        }
1069        impl<CV: Clear> Clear for Usizes<CV> { fn clear(&mut self) { self.values.clear() }}
1070
1071        impl<CV: HeapSize> HeapSize for Usizes<CV> {
1072            fn heap_size(&self) -> (usize, usize) {
1073                self.values.heap_size()
1074            }
1075        }
1076
1077        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Usizes<CV> {
1078            #[inline(always)]
1079            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1080                self.values.as_bytes()
1081            }
1082        }
1083
1084        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Usizes<CV> {
1085            #[inline(always)]
1086            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1087                Self { values: CV::from_bytes(bytes) }
1088            }
1089        }
1090
1091
1092        #[derive(Copy, Clone, Default)]
1093        pub struct Isizes<CV = Vec<i64>> { pub values: CV }
1094
1095        impl Columnar for isize {
1096            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1097            type Container = Isizes;
1098        }
1099
1100        impl<CV: BorrowIndexAs<i64>> Borrow for Isizes<CV> {
1101            type Ref<'a> = isize;
1102            type Borrowed<'a> = Isizes<CV::Borrowed<'a>> where CV: 'a;
1103            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1104                Isizes { values: self.values.borrow() }
1105            }
1106            #[inline(always)]
1107            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1108                Isizes { values: CV::reborrow(thing.values) }
1109            }
1110            #[inline(always)]
1111            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1112        }
1113
1114        impl<CV: PushIndexAs<i64>> Container for Isizes<CV> {
1115            #[inline(always)]
1116            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1117                self.values.extend_from_self(other.values, range)
1118            }
1119
1120            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1121                self.values.reserve_for(selves.map(|x| x.values))
1122            }
1123        }
1124
1125        impl<CV: Len> Len for Isizes<CV> { fn len(&self) -> usize { self.values.len() }}
1126        impl IndexMut for Isizes {
1127            type IndexMut<'a> = &'a mut i64;
1128            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
1129        }
1130        impl<CV: IndexAs<i64>> Index for Isizes<CV> {
1131            type Ref = isize;
1132            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
1133        }
1134        impl<CV: IndexAs<i64>> Index for &Isizes<CV> {
1135            type Ref = isize;
1136            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
1137        }
1138        impl<CV: for<'a> Push<&'a i64>> Push<isize> for Isizes<CV> {
1139            #[inline]
1140            fn push(&mut self, item: isize) { self.values.push(&item.try_into().expect("isize must fit in a i64")) }
1141        }
1142        impl Push<&isize> for Isizes {
1143            #[inline]
1144            fn push(&mut self, item: &isize) { self.values.push((*item).try_into().expect("isize must fit in a i64")) }
1145        }
1146        impl<CV: Clear> Clear for Isizes<CV> { fn clear(&mut self) { self.values.clear() }}
1147
1148        impl<CV: HeapSize> HeapSize for Isizes<CV> {
1149            fn heap_size(&self) -> (usize, usize) {
1150                self.values.heap_size()
1151            }
1152        }
1153
1154        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Isizes<CV> {
1155            #[inline(always)]
1156            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1157                self.values.as_bytes()
1158            }
1159        }
1160
1161        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Isizes<CV> {
1162            #[inline(always)]
1163            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1164                Self { values: CV::from_bytes(bytes) }
1165            }
1166        }
1167    }
1168
1169    pub use chars::{Chars};
1170    /// Columnar store for `char`, stored as a `u32`.
1171    mod chars {
1172
1173        use crate::*;
1174        use crate::common::{BorrowIndexAs, PushIndexAs};
1175
1176        type Encoded = u32;
1177
1178        #[derive(Copy, Clone, Default)]
1179        pub struct Chars<CV = Vec<Encoded>> { pub values: CV }
1180
1181        impl Columnar for char {
1182            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1183            type Container = Chars;
1184        }
1185
1186        impl<CV: BorrowIndexAs<Encoded>> Borrow for Chars<CV> {
1187            type Ref<'a> = char;
1188            type Borrowed<'a> = Chars<CV::Borrowed<'a>> where CV: 'a;
1189            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1190                Chars { values: self.values.borrow() }
1191            }
1192            #[inline(always)]
1193            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1194                Chars { values: CV::reborrow(thing.values) }
1195            }
1196            #[inline(always)]
1197            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1198        }
1199
1200        impl<CV: PushIndexAs<Encoded>> Container for Chars<CV> {
1201            #[inline(always)]
1202            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1203                self.values.extend_from_self(other.values, range)
1204            }
1205
1206            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1207                self.values.reserve_for(selves.map(|x| x.values))
1208            }
1209        }
1210
1211        impl<CV: Len> Len for Chars<CV> { fn len(&self) -> usize { self.values.len() }}
1212        impl<CV: IndexAs<Encoded>> Index for Chars<CV> {
1213            type Ref = char;
1214            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { char::from_u32(self.values.index_as(index)).unwrap() }
1215        }
1216        impl<CV: IndexAs<Encoded>> Index for &Chars<CV> {
1217            type Ref = char;
1218            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { char::from_u32(self.values.index_as(index)).unwrap() }
1219        }
1220        impl<CV: for<'a> Push<&'a Encoded>> Push<char> for Chars<CV> {
1221            #[inline]
1222            fn push(&mut self, item: char) { self.values.push(&u32::from(item)) }
1223        }
1224        impl Push<&char> for Chars {
1225            #[inline]
1226            fn push(&mut self, item: &char) { self.values.push(u32::from(*item)) }
1227        }
1228        impl<CV: Clear> Clear for Chars<CV> { fn clear(&mut self) { self.values.clear() }}
1229
1230        impl<CV: HeapSize> HeapSize for Chars<CV> {
1231            fn heap_size(&self) -> (usize, usize) {
1232                self.values.heap_size()
1233            }
1234        }
1235
1236        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for Chars<CV> {
1237            #[inline(always)]
1238            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1239                self.values.as_bytes()
1240            }
1241        }
1242
1243        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for Chars<CV> {
1244            #[inline(always)]
1245            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1246                Self { values: CV::from_bytes(bytes) }
1247            }
1248        }
1249    }
1250
1251    pub use larges::{U128s, I128s};
1252    /// Columnar stores for `u128` and `i128`, stored as [u8; 16] bits.
1253    mod larges {
1254
1255        use crate::*;
1256        use crate::common::{BorrowIndexAs, PushIndexAs};
1257
1258        type Encoded = [u8; 16];
1259
1260        #[derive(Copy, Clone, Default)]
1261        pub struct U128s<CV = Vec<Encoded>> { pub values: CV }
1262
1263        impl Columnar for u128 {
1264            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1265            type Container = U128s;
1266        }
1267
1268        impl<CV: BorrowIndexAs<Encoded>> Borrow for U128s<CV> {
1269            type Ref<'a> = u128;
1270            type Borrowed<'a> = U128s<CV::Borrowed<'a>> where CV: 'a;
1271            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1272                U128s { values: self.values.borrow() }
1273            }
1274            #[inline(always)]
1275            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1276                U128s { values: CV::reborrow(thing.values) }
1277            }
1278            #[inline(always)]
1279            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1280        }
1281
1282        impl<CV: PushIndexAs<Encoded>> Container for U128s<CV> {
1283            #[inline(always)]
1284            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1285                self.values.extend_from_self(other.values, range)
1286            }
1287
1288            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1289                self.values.reserve_for(selves.map(|x| x.values))
1290            }
1291        }
1292
1293        impl<CV: Len> Len for U128s<CV> { fn len(&self) -> usize { self.values.len() }}
1294        impl<CV: IndexAs<Encoded>> Index for U128s<CV> {
1295            type Ref = u128;
1296            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { u128::from_le_bytes(self.values.index_as(index)) }
1297        }
1298        impl<CV: IndexAs<Encoded>> Index for &U128s<CV> {
1299            type Ref = u128;
1300            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { u128::from_le_bytes(self.values.index_as(index)) }
1301        }
1302        impl<CV: for<'a> Push<&'a Encoded>> Push<u128> for U128s<CV> {
1303            #[inline]
1304            fn push(&mut self, item: u128) { self.values.push(&item.to_le_bytes()) }
1305        }
1306        impl Push<&u128> for U128s {
1307            #[inline]
1308            fn push(&mut self, item: &u128) { self.values.push(item.to_le_bytes()) }
1309        }
1310        impl<CV: Clear> Clear for U128s<CV> { fn clear(&mut self) { self.values.clear() }}
1311
1312        impl<CV: HeapSize> HeapSize for U128s<CV> {
1313            fn heap_size(&self) -> (usize, usize) {
1314                self.values.heap_size()
1315            }
1316        }
1317
1318        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for U128s<CV> {
1319            #[inline(always)]
1320            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1321                self.values.as_bytes()
1322            }
1323        }
1324
1325        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for U128s<CV> {
1326            #[inline(always)]
1327            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1328                Self { values: CV::from_bytes(bytes) }
1329            }
1330        }
1331
1332        #[derive(Copy, Clone, Default)]
1333        pub struct I128s<CV = Vec<Encoded>> { pub values: CV }
1334
1335        impl Columnar for i128 {
1336            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1337            type Container = I128s;
1338        }
1339
1340        impl<CV: BorrowIndexAs<Encoded>> Borrow for I128s<CV> {
1341            type Ref<'a> = i128;
1342            type Borrowed<'a> = I128s<CV::Borrowed<'a>> where CV: 'a;
1343            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1344                I128s { values: self.values.borrow() }
1345            }
1346            #[inline(always)]
1347            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1348                I128s { values: CV::reborrow(thing.values) }
1349            }
1350            #[inline(always)]
1351            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1352        }
1353
1354        impl<CV: PushIndexAs<Encoded>> Container for I128s<CV> {
1355            #[inline(always)]
1356            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1357                self.values.extend_from_self(other.values, range)
1358            }
1359
1360            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1361                self.values.reserve_for(selves.map(|x| x.values))
1362            }
1363        }
1364
1365        impl<CV: Len> Len for I128s<CV> { fn len(&self) -> usize { self.values.len() }}
1366        impl<CV: IndexAs<Encoded>> Index for I128s<CV> {
1367            type Ref = i128;
1368            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { i128::from_le_bytes(self.values.index_as(index)) }
1369        }
1370        impl<CV: IndexAs<Encoded>> Index for &I128s<CV> {
1371            type Ref = i128;
1372            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { i128::from_le_bytes(self.values.index_as(index)) }
1373        }
1374        impl<CV: for<'a> Push<&'a Encoded>> Push<i128> for I128s<CV> {
1375            #[inline]
1376            fn push(&mut self, item: i128) { self.values.push(&item.to_le_bytes()) }
1377        }
1378        impl Push<&i128> for I128s {
1379            #[inline]
1380            fn push(&mut self, item: &i128) { self.values.push(item.to_le_bytes()) }
1381        }
1382        impl<CV: Clear> Clear for I128s<CV> { fn clear(&mut self) { self.values.clear() }}
1383
1384        impl<CV: HeapSize> HeapSize for I128s<CV> {
1385            fn heap_size(&self) -> (usize, usize) {
1386                self.values.heap_size()
1387            }
1388        }
1389
1390        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for I128s<CV> {
1391            #[inline(always)]
1392            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1393                self.values.as_bytes()
1394            }
1395        }
1396
1397        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for I128s<CV> {
1398            #[inline(always)]
1399            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1400                Self { values: CV::from_bytes(bytes) }
1401            }
1402        }
1403    }
1404
1405    /// Columnar stores for non-decreasing `u64`, stored in various ways.
1406    ///
1407    /// The venerable `Vec<u64>` works as a general container for arbitrary offests,
1408    /// but it can be non-optimal for various patterns of offset, including constant
1409    /// inter-offset spacing, and relatively short runs (compared to a `RankSelect`).
1410    pub mod offsets {
1411
1412        pub use array::Fixeds;
1413        pub use stride::Strides;
1414
1415        /// An offset container that encodes a constant spacing in its type.
1416        ///
1417        /// Any attempt to push any value will result in pushing the next value
1418        /// at the specified spacing. This type is only appropriate in certain
1419        /// contexts, for example when storing `[T; K]` array types, or having
1420        /// introspected a `Strides` and found it to be only one constant stride.
1421        mod array {
1422
1423            use crate::{Container, Borrow, Index, Len, Push};
1424            use crate::common::index::CopyAs;
1425
1426            /// An offset container that encodes a constant `K` spacing.
1427            #[derive(Copy, Clone, Debug, Default)]
1428            pub struct Fixeds<const K: u64, CC = u64> { pub count: CC }
1429
1430            impl<const K: u64> Borrow for Fixeds<K> {
1431                type Ref<'a> = u64;
1432                type Borrowed<'a> = Fixeds<K, &'a u64>;
1433                #[inline(always)]
1434                fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Fixeds { count: &self.count } }
1435                #[inline(always)]
1436                fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1437                    Fixeds { count: thing.count }
1438                }
1439                #[inline(always)]
1440                fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1441            }
1442
1443            impl<const K: u64> Container for Fixeds<K> {
1444                #[inline(always)]
1445                fn extend_from_self(&mut self, _other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1446                    self.count += range.len() as u64;
1447                }
1448
1449                fn reserve_for<'a, I>(&mut self, _selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone { }
1450            }
1451
1452            impl<const K: u64, CC: CopyAs<u64>> Len for Fixeds<K, CC> {
1453                #[inline(always)] fn len(&self) -> usize { self.count.copy_as() as usize }
1454            }
1455
1456            impl<const K: u64, CC> Index for Fixeds<K, CC> {
1457                type Ref = u64;
1458                #[inline(always)]
1459                fn get(&self, index: usize) -> Self::Ref { (index as u64 + 1) * K }
1460            }
1461            impl<'a, const K: u64, CC> Index for &'a Fixeds<K, CC> {
1462                type Ref = u64;
1463                #[inline(always)]
1464                fn get(&self, index: usize) -> Self::Ref { (index as u64 + 1) * K }
1465            }
1466
1467            impl<'a, const K: u64, T> Push<T> for Fixeds<K> {
1468                // TODO: check for overflow?
1469                #[inline(always)]
1470                fn push(&mut self, _item: T) { self.count += 1; }
1471                #[inline(always)]
1472                fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
1473                    self.count += iter.into_iter().count() as u64;
1474                }
1475            }
1476
1477            impl<const K: u64> crate::HeapSize for Fixeds<K> {
1478                #[inline(always)]
1479                fn heap_size(&self) -> (usize, usize) { (0, 0) }
1480            }
1481            impl<const K: u64> crate::Clear for Fixeds<K> {
1482                #[inline(always)]
1483                fn clear(&mut self) { self.count = 0; }
1484            }
1485
1486            impl<'a, const K: u64> crate::AsBytes<'a> for Fixeds<K, &'a u64> {
1487                #[inline(always)]
1488                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1489                    std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.count))))
1490                }
1491            }
1492            impl<'a, const K: u64> crate::FromBytes<'a> for Fixeds<K, &'a u64> {
1493                #[inline(always)]
1494                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1495                    Self { count: &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0] }
1496                }
1497            }
1498
1499            use super::Strides;
1500            impl<const K: u64, BC: Len, CC: CopyAs<u64>> std::convert::TryFrom<Strides<BC, CC>> for Fixeds<K, CC> {
1501                // On error we return the original.
1502                type Error = Strides<BC, CC>;
1503                fn try_from(item: Strides<BC, CC>) -> Result<Self, Self::Error> {
1504                    if item.strided() == Some(K) { Ok( Self { count: item.length } ) } else { Err(item) }
1505                }
1506            }
1507        }
1508
1509        /// An general offset container optimized for fixed inter-offset sizes.
1510        ///
1511        /// Although it can handle general offsets, it starts with the optimistic
1512        /// assumption that the offsets will be evenly spaced from zero, and while
1513        /// that holds it will maintain the stride and length. Should it stop being
1514        /// true, when a non-confirming offset is pushed, it will start to store
1515        /// the offsets in a general container.
1516        mod stride {
1517
1518            use std::ops::Deref;
1519            use crate::{Container, Borrow, Index, Len, Push, Clear, AsBytes, FromBytes};
1520            use crate::common::index::CopyAs;
1521
1522            /// The first two integers describe a stride pattern, [stride, length].
1523            ///
1524            /// If the length is zero the collection is empty. The first `item` pushed
1525            /// always becomes the first list element. The next element is the number of
1526            /// items at position `i` whose value is `item * (i+1)`. After this comes
1527            /// the remaining entries in the bounds container.
1528            #[derive(Copy, Clone, Debug, Default)]
1529            pub struct Strides<BC = Vec<u64>, CC = u64> {
1530                pub stride: CC,
1531                pub length: CC,
1532                pub bounds: BC,
1533            }
1534
1535            impl Borrow for Strides {
1536                type Ref<'a> = u64;
1537                type Borrowed<'a> = Strides<&'a [u64], &'a u64>;
1538
1539                #[inline(always)] fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Strides { stride: &self.stride, length: &self.length, bounds: &self.bounds[..] } }
1540                /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
1541                #[inline(always)] fn reborrow<'b, 'a: 'b>(item: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1542                    Strides { stride: item.stride, length: item.length, bounds: item.bounds }
1543                }
1544                /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
1545                 #[inline(always)]fn reborrow_ref<'b, 'a: 'b>(item: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { item }
1546            }
1547
1548            impl Container for Strides {
1549                fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1550                    self.bounds.reserve_for(selves.map(|x| x.bounds))
1551                }
1552            }
1553
1554            impl<'a> Push<&'a u64> for Strides { #[inline(always)] fn push(&mut self, item: &'a u64) { self.push(*item) } }
1555            impl Push<u64> for Strides { #[inline(always)] fn push(&mut self, item: u64) { self.push(item) } }
1556            impl Clear for Strides { #[inline(always)] fn clear(&mut self) { self.clear() } }
1557
1558            impl<BC: Len, CC: CopyAs<u64>> Len for Strides<BC, CC> {
1559                #[inline(always)]
1560                fn len(&self) -> usize { self.length.copy_as() as usize + self.bounds.len() }
1561            }
1562            impl Index for Strides<&[u64], &u64> {
1563                type Ref = u64;
1564                #[inline(always)]
1565                fn get(&self, index: usize) -> Self::Ref {
1566                    let index = index as u64;
1567                    if index < *self.length { (index+1) * self.stride } else { self.bounds[(index - self.length) as usize] }
1568                }
1569            }
1570
1571            impl<'a, BC: AsBytes<'a>> AsBytes<'a> for Strides<BC, &'a u64> {
1572                #[inline(always)]
1573                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1574                    let stride = std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.stride))));
1575                    let length = std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.length))));
1576                    let bounds = self.bounds.as_bytes();
1577                    crate::chain(stride, crate::chain(length, bounds))
1578                }
1579            }
1580            impl<'a, BC: FromBytes<'a>> FromBytes<'a> for Strides<BC, &'a u64> {
1581                #[inline(always)]
1582                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1583                    let stride = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1584                    let length = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1585                    let bounds = BC::from_bytes(bytes);
1586                    Self { stride, length, bounds }
1587                }
1588            }
1589
1590            impl Strides {
1591                pub fn new(stride: u64, length: u64) -> Self {
1592                    Self { stride, length, bounds: Vec::default() }
1593                }
1594                #[inline(always)]
1595                pub fn push(&mut self, item: u64) {
1596                    if self.length == 0 {
1597                        self.stride = item;
1598                        self.length = 1;
1599                    }
1600                    else if !self.bounds.is_empty() {
1601                        self.bounds.push(item);
1602                    }
1603                    else if item == self.stride * (self.length + 1) {
1604                        self.length += 1;
1605                    }
1606                    else {
1607                        self.bounds.push(item);
1608                    }
1609                }
1610                #[inline(always)]
1611                pub fn clear(&mut self) {
1612                    self.stride = 0;
1613                    self.length = 0;
1614                    self.bounds.clear();
1615                }
1616            }
1617
1618            impl<BC: Deref<Target=[u64]>, CC: CopyAs<u64>> Strides<BC, CC> {
1619                #[inline(always)]
1620                pub fn bounds(&self, index: usize) -> (usize, usize) {
1621                    let stride = self.stride.copy_as();
1622                    let length = self.length.copy_as();
1623                    let index = index as u64;
1624                    let lower = if index == 0 { 0 } else {
1625                        let index = index - 1;
1626                        if index < length { (index+1) * stride } else { self.bounds[(index - length) as usize] }
1627                    } as usize;
1628                    let upper = if index < length { (index+1) * stride } else { self.bounds[(index - length) as usize] } as usize;
1629                    (lower, upper)
1630                }
1631            }
1632            impl<BC: Len, CC: CopyAs<u64>> Strides<BC, CC> {
1633                #[inline(always)] pub fn strided(&self) -> Option<u64> {
1634                    if self.bounds.is_empty() {
1635                        Some(self.stride.copy_as())
1636                    }
1637                    else { None }
1638                }
1639            }
1640        }
1641
1642        #[cfg(test)]
1643        mod test {
1644            #[test]
1645            fn round_trip() {
1646
1647                use crate::common::{Index, Push, Len};
1648                use crate::{Borrow, Vecs};
1649                use crate::primitive::offsets::{Strides, Fixeds};
1650
1651                let mut cols = Vecs::<Vec::<i32>, Strides>::default();
1652                for i in 0 .. 100 {
1653                    cols.push(&[1i32, 2, i]);
1654                }
1655
1656                let cols = Vecs {
1657                    bounds: TryInto::<Fixeds<3>>::try_into(cols.bounds).unwrap(),
1658                    values: cols.values,
1659                };
1660
1661                assert_eq!(cols.borrow().len(), 100);
1662                for i in 0 .. 100 {
1663                    assert_eq!(cols.borrow().get(i).len(), 3);
1664                }
1665
1666                let mut cols = Vecs {
1667                    bounds: Strides::new(3, cols.bounds.count),
1668                    values: cols.values
1669                };
1670
1671                cols.push(&[0, 0]);
1672                assert!(TryInto::<Fixeds<3>>::try_into(cols.bounds).is_err());
1673            }
1674        }
1675    }
1676
1677    pub use empty::Empties;
1678    /// A columnar store for `()`.
1679    mod empty {
1680
1681        use crate::common::index::CopyAs;
1682        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, Push, HeapSize, Borrow};
1683
1684        #[derive(Copy, Clone, Debug, Default)]
1685        pub struct Empties<CC = u64> { pub count: CC, pub empty: () }
1686
1687        impl Columnar for () {
1688            #[inline(always)]
1689            fn into_owned<'a>(_other: crate::Ref<'a, Self>) -> Self { }
1690            type Container = Empties;
1691        }
1692
1693        impl Borrow for Empties {
1694            type Ref<'a> = ();
1695            type Borrowed<'a> = Empties<&'a u64>;
1696            #[inline(always)]
1697            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Empties { count: &self.count, empty: () } }
1698            #[inline(always)]
1699            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1700                Empties { count: thing.count, empty: () }
1701            }
1702            #[inline(always)]
1703            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1704        }
1705
1706        impl Container for Empties {
1707            #[inline(always)]
1708            fn extend_from_self(&mut self, _other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1709                self.count += range.len() as u64;
1710            }
1711
1712            fn reserve_for<'a, I>(&mut self, _selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone { }
1713        }
1714
1715        impl<CC: CopyAs<u64>> Len for Empties<CC> {
1716            #[inline(always)] fn len(&self) -> usize { self.count.copy_as() as usize }
1717        }
1718        impl<CC> IndexMut for Empties<CC> {
1719            type IndexMut<'a> = &'a mut () where CC: 'a;
1720            // TODO: panic if out of bounds?
1721            #[inline(always)] fn get_mut(&mut self, _index: usize) -> Self::IndexMut<'_> { &mut self.empty }
1722        }
1723        impl<CC> Index for Empties<CC> {
1724            type Ref = ();
1725            #[inline(always)]
1726            fn get(&self, _index: usize) -> Self::Ref { }
1727        }
1728        impl<'a, CC> Index for &'a Empties<CC> {
1729            type Ref = &'a ();
1730            #[inline(always)]
1731            fn get(&self, _index: usize) -> Self::Ref { &() }
1732        }
1733        impl Push<()> for Empties {
1734            // TODO: check for overflow?
1735            #[inline(always)]
1736            fn push(&mut self, _item: ()) { self.count += 1; }
1737            #[inline(always)]
1738            fn extend(&mut self, iter: impl IntoIterator<Item=()>) {
1739                self.count += iter.into_iter().count() as u64;
1740            }
1741        }
1742        impl<'a> Push<&'a ()> for Empties {
1743            // TODO: check for overflow?
1744            #[inline(always)]
1745            fn push(&mut self, _item: &()) { self.count += 1; }
1746            #[inline(always)]
1747            fn extend(&mut self, iter: impl IntoIterator<Item=&'a ()>) {
1748                self.count += iter.into_iter().count() as u64;
1749            }
1750        }
1751
1752        impl HeapSize for Empties {
1753            #[inline(always)]
1754            fn heap_size(&self) -> (usize, usize) { (0, 0) }
1755        }
1756        impl Clear for Empties {
1757            #[inline(always)]
1758            fn clear(&mut self) { self.count = 0; }
1759        }
1760
1761        impl<'a> crate::AsBytes<'a> for crate::primitive::Empties<&'a u64> {
1762            #[inline(always)]
1763            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1764                std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.count))))
1765            }
1766        }
1767        impl<'a> crate::FromBytes<'a> for crate::primitive::Empties<&'a u64> {
1768            #[inline(always)]
1769            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1770                Self { count: &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0], empty: () }
1771            }
1772        }
1773    }
1774
1775    pub use boolean::Bools;
1776    /// A columnar store for `bool`.
1777    mod boolean {
1778
1779        use crate::common::index::CopyAs;
1780        use crate::{Container, Clear, Len, Index, IndexAs, Push, HeapSize, Borrow};
1781
1782        /// A store for maintaining `Vec<bool>`.
1783        #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1784        #[derive(Copy, Clone, Debug, Default, PartialEq)]
1785        pub struct Bools<VC = Vec<u64>, WC = u64> {
1786            /// The bundles of bits that form complete `u64` values.
1787            pub values: VC,
1788            /// The work-in-progress bits that are not yet complete.
1789            pub last_word: WC,
1790            /// The number of set bits in `bits.last()`.
1791            pub last_bits: WC,
1792        }
1793
1794        impl crate::Columnar for bool {
1795            #[inline(always)]
1796            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1797            type Container = Bools;
1798        }
1799
1800        impl<VC: crate::common::BorrowIndexAs<u64>> Borrow for Bools<VC> {
1801            type Ref<'a> = bool;
1802            type Borrowed<'a> = Bools<VC::Borrowed<'a>, &'a u64> where VC: 'a;
1803            #[inline(always)]
1804            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1805                Bools {
1806                    values: self.values.borrow(),
1807                    last_word: &self.last_word,
1808                    last_bits: &self.last_bits,
1809                }
1810            }
1811            #[inline(always)]
1812            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where VC: 'a {
1813                Bools {
1814                    values: VC::reborrow(thing.values),
1815                    last_word: thing.last_word,
1816                    last_bits: thing.last_bits,
1817                }
1818            }
1819            #[inline(always)]
1820            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1821        }
1822
1823        impl<VC: crate::common::PushIndexAs<u64>> Container for Bools<VC> {
1824            // TODO: There is probably a smart way to implement `extend_from_slice`, but it isn't trivial due to alignment.
1825
1826            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1827                self.values.reserve_for(selves.map(|x| x.values))
1828            }
1829        }
1830
1831        impl<'a, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1832            #[inline(always)]
1833            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1834                let iter = self.values.as_bytes();
1835                let iter = crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_word))));
1836                crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_bits))))
1837            }
1838        }
1839
1840        impl<'a, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1841            #[inline(always)]
1842            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1843                let values = crate::FromBytes::from_bytes(bytes);
1844                let last_word = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1845                let last_bits = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1846                Self { values, last_word, last_bits }
1847            }
1848        }
1849
1850        impl<VC: Len, WC: CopyAs<u64>> Len for Bools<VC, WC> {
1851            #[inline(always)] fn len(&self) -> usize { self.values.len() * 64 + (self.last_bits.copy_as() as usize) }
1852        }
1853
1854        impl<VC: Len + IndexAs<u64>, WC: CopyAs<u64>> Index for Bools<VC, WC> {
1855            type Ref = bool;
1856            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1857                let block = index / 64;
1858                let word = if block == self.values.len() {
1859                    self.last_word.copy_as()
1860                } else {
1861                    self.values.index_as(block)
1862                };
1863                let bit = index % 64;
1864                (word >> bit) & 1 == 1
1865            }
1866        }
1867
1868        impl<VC: Len + IndexAs<u64>, WC: CopyAs<u64>> Index for &Bools<VC, WC> {
1869            type Ref = bool;
1870            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1871                (*self).get(index)
1872            }
1873        }
1874
1875        impl<VC: for<'a> Push<&'a u64>> Push<bool> for Bools<VC> {
1876            #[inline]
1877            fn push(&mut self, bit: bool) {
1878                self.last_word |= (bit as u64) << self.last_bits;
1879                self.last_bits += 1;
1880                // If we have a fully formed word, commit it to `self.values`.
1881                if self.last_bits == 64 {
1882                    self.values.push(&self.last_word);
1883                    self.last_word = 0;
1884                    self.last_bits = 0;
1885                }
1886            }
1887        }
1888        impl<'a, VC: for<'b> Push<&'b u64>> Push<&'a bool> for Bools<VC> {
1889            #[inline(always)]
1890            fn push(&mut self, bit: &'a bool) {
1891                self.push(*bit)
1892            }
1893        }
1894
1895
1896        impl<VC: Clear> Clear for Bools<VC> {
1897            #[inline(always)]
1898            fn clear(&mut self) {
1899                self.values.clear();
1900                self.last_word = 0;
1901                self.last_bits = 0;
1902            }
1903        }
1904
1905        impl<VC: HeapSize> HeapSize for Bools<VC> {
1906            #[inline(always)]
1907            fn heap_size(&self) -> (usize, usize) {
1908                self.values.heap_size()
1909            }
1910        }
1911    }
1912
1913    pub use duration::Durations;
1914    /// A columnar store for `std::time::Duration`.
1915    mod duration {
1916
1917        use std::time::Duration;
1918        use crate::{Container, Len, Index, IndexAs, Push, Clear, HeapSize, Borrow};
1919
1920        // `std::time::Duration` is equivalent to `(u64, u32)`, corresponding to seconds and nanoseconds.
1921        #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1922        #[derive(Copy, Clone, Debug, Default, PartialEq)]
1923        pub struct Durations<SC = Vec<u64>, NC = Vec<u32>> {
1924            pub seconds: SC,
1925            pub nanoseconds: NC,
1926        }
1927
1928        impl crate::Columnar for Duration {
1929            #[inline(always)]
1930            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1931            type Container = Durations;
1932        }
1933
1934        impl<SC: crate::common::BorrowIndexAs<u64>, NC: crate::common::BorrowIndexAs<u32>> Borrow for Durations<SC, NC> {
1935            type Ref<'a> = Duration;
1936            type Borrowed<'a> = Durations<SC::Borrowed<'a>, NC::Borrowed<'a>> where SC: 'a, NC: 'a;
1937            #[inline(always)]
1938            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1939                Durations {
1940                    seconds: self.seconds.borrow(),
1941                    nanoseconds: self.nanoseconds.borrow(),
1942                }
1943            }
1944            #[inline(always)]
1945            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where SC: 'a, NC: 'a {
1946                Durations {
1947                    seconds: SC::reborrow(thing.seconds),
1948                    nanoseconds: NC::reborrow(thing.nanoseconds),
1949                }
1950            }
1951            #[inline(always)]
1952            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1953        }
1954
1955        impl<SC: crate::common::PushIndexAs<u64>, NC: crate::common::PushIndexAs<u32>> Container for Durations<SC, NC> {
1956            #[inline(always)]
1957            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1958                self.seconds.extend_from_self(other.seconds, range.clone());
1959                self.nanoseconds.extend_from_self(other.nanoseconds, range);
1960            }
1961
1962            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1963                self.seconds.reserve_for(selves.clone().map(|x| x.seconds));
1964                self.nanoseconds.reserve_for(selves.map(|x| x.nanoseconds));
1965            }
1966        }
1967
1968        impl<'a, SC: crate::AsBytes<'a>, NC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Durations<SC, NC> {
1969            #[inline(always)]
1970            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1971                crate::chain(self.seconds.as_bytes(), self.nanoseconds.as_bytes())
1972            }
1973        }
1974        impl<'a, SC: crate::FromBytes<'a>, NC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Durations<SC, NC> {
1975            #[inline(always)]
1976            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1977                Self {
1978                    seconds: crate::FromBytes::from_bytes(bytes),
1979                    nanoseconds: crate::FromBytes::from_bytes(bytes),
1980                }
1981            }
1982        }
1983
1984        impl<SC: Len, NC> Len for Durations<SC, NC> {
1985            #[inline(always)] fn len(&self) -> usize { self.seconds.len() }
1986        }
1987
1988        impl<SC: IndexAs<u64>, NC: IndexAs<u32>> Index for Durations<SC, NC> {
1989            type Ref = Duration;
1990            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1991                Duration::new(self.seconds.index_as(index), self.nanoseconds.index_as(index))
1992            }
1993        }
1994
1995        impl<SC: for<'a> Push<&'a u64>, NC: for<'a> Push<&'a u32>> Push<std::time::Duration> for Durations<SC, NC> {
1996            #[inline]
1997            fn push(&mut self, item: std::time::Duration) {
1998                self.seconds.push(&item.as_secs());
1999                self.nanoseconds.push(&item.subsec_nanos());
2000            }
2001        }
2002        impl<'a, SC: for<'b> Push<&'b u64>, NC: for<'b> Push<&'b u32>> Push<&'a std::time::Duration> for Durations<SC, NC> {
2003            #[inline]
2004            fn push(&mut self, item: &'a std::time::Duration) {
2005                self.push(*item)
2006            }
2007        }
2008        impl<'a, SC: Push<&'a u64>, NC: Push<&'a u32>> Push<(&'a u64, &'a u32)> for Durations<SC, NC> {
2009            #[inline]
2010            fn push(&mut self, item: (&'a u64, &'a u32)) {
2011                self.seconds.push(item.0);
2012                self.nanoseconds.push(item.1);
2013            }
2014        }
2015
2016        impl<SC: Clear, NC: Clear> Clear for Durations<SC, NC> {
2017            #[inline(always)]
2018            fn clear(&mut self) {
2019                self.seconds.clear();
2020                self.nanoseconds.clear();
2021            }
2022        }
2023
2024        impl<SC: HeapSize, NC: HeapSize> HeapSize for Durations<SC, NC> {
2025            #[inline(always)]
2026            fn heap_size(&self) -> (usize, usize) {
2027                let (l0, c0) = self.seconds.heap_size();
2028                let (l1, c1) = self.nanoseconds.heap_size();
2029                (l0 + l1, c0 + c1)
2030            }
2031        }
2032    }
2033}
2034
2035pub use string::Strings;
2036pub mod string {
2037
2038    use super::{Clear, Columnar, Container, Len, Index, IndexAs, Push, HeapSize, Borrow};
2039
2040    /// A stand-in for `Vec<String>`.
2041    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2042    #[derive(Copy, Clone, Debug, Default, PartialEq)]
2043    pub struct Strings<BC = Vec<u64>, VC = Vec<u8>> {
2044        /// Bounds container; provides indexed access to offsets.
2045        pub bounds: BC,
2046        /// Values container; provides slice access to bytes.
2047        pub values: VC,
2048    }
2049
2050    impl Columnar for String {
2051        #[inline(always)]
2052        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2053            self.clear();
2054            self.push_str(other);
2055        }
2056        #[inline(always)]
2057        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other.to_string() }
2058        type Container = Strings;
2059    }
2060
2061    impl Columnar for Box<str> {
2062        #[inline(always)]
2063        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2064            let mut s = String::from(std::mem::take(self));
2065            s.clear();
2066            s.push_str(other);
2067            *self = s.into_boxed_str();
2068        }
2069        #[inline(always)]
2070        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { Self::from(other) }
2071        type Container = Strings;
2072    }
2073
2074    impl<BC: crate::common::BorrowIndexAs<u64>> Borrow for Strings<BC, Vec<u8>> {
2075        type Ref<'a> = &'a str;
2076        type Borrowed<'a> = Strings<BC::Borrowed<'a>, &'a [u8]> where BC: 'a;
2077        #[inline(always)]
2078        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2079            Strings {
2080                bounds: self.bounds.borrow(),
2081                values: self.values.borrow(),
2082            }
2083        }
2084        #[inline(always)]
2085        fn reborrow<'c, 'a: 'c>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'c> where BC: 'a {
2086            Strings {
2087                bounds: BC::reborrow(thing.bounds),
2088                values: thing.values,
2089            }
2090        }
2091        #[inline(always)]
2092        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
2093    }
2094
2095    impl<BC: crate::common::PushIndexAs<u64>> Container for Strings<BC, Vec<u8>> {
2096        #[inline(always)]
2097        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2098            if !range.is_empty() {
2099                // Imported bounds will be relative to this starting offset.
2100                let values_len = self.values.len() as u64;
2101
2102                // Push all bytes that we can, all at once.
2103                let other_lower = if range.start == 0 { 0 } else { other.bounds.index_as(range.start-1) };
2104                let other_upper = other.bounds.index_as(range.end-1);
2105                self.values.extend_from_self(other.values, other_lower as usize .. other_upper as usize);
2106
2107                // Each bound needs to be shifted by `values_len - other_lower`.
2108                if values_len == other_lower {
2109                    self.bounds.extend_from_self(other.bounds, range);
2110                }
2111                else {
2112                    for index in range {
2113                        let shifted = other.bounds.index_as(index) - other_lower + values_len;
2114                        self.bounds.push(&shifted)
2115                    }
2116                }
2117            }
2118        }
2119
2120        fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2121            self.bounds.reserve_for(selves.clone().map(|x| x.bounds));
2122            self.values.reserve_for(selves.map(|x| x.values));
2123        }
2124
2125    }
2126
2127    impl<'a, BC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Strings<BC, VC> {
2128        #[inline(always)]
2129        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2130            crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
2131        }
2132    }
2133    impl<'a, BC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Strings<BC, VC> {
2134        #[inline(always)]
2135        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2136            Self {
2137                bounds: crate::FromBytes::from_bytes(bytes),
2138                values: crate::FromBytes::from_bytes(bytes),
2139            }
2140        }
2141    }
2142
2143    impl<BC: Len, VC> Len for Strings<BC, VC> {
2144        #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
2145    }
2146
2147    impl<'a, BC: Len+IndexAs<u64>> Index for Strings<BC, &'a [u8]> {
2148        type Ref = &'a str;
2149        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2150            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2151            let upper = self.bounds.index_as(index);
2152            let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
2153            let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
2154            std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
2155        }
2156    }
2157    impl<'a, BC: Len+IndexAs<u64>> Index for &'a Strings<BC, Vec<u8>> {
2158        type Ref = &'a str;
2159        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2160            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2161            let upper = self.bounds.index_as(index);
2162            let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
2163            let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
2164            std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
2165        }
2166    }
2167
2168    // This is a simpler implementation, but it leads to a performance regression
2169    // for Strings and str because it loses access to `Vec::extend_from_slice`.
2170    //
2171    // impl<BC: Push<u64>, D: std::fmt::Display> Push<D> for Strings<BC> {
2172    //     #[inline(always)]
2173    //     fn push(&mut self, item: D) {
2174    //         use std::io::Write;
2175    //         write!(self.values, "{}", item).unwrap();
2176    //         self.bounds.push(self.values.len() as u64);
2177    //     }
2178    // }
2179
2180    impl<BC: for<'a> Push<&'a u64>> Push<&String> for Strings<BC> {
2181        #[inline(always)] fn push(&mut self, item: &String) {
2182            self.values.extend_from_slice(item.as_bytes());
2183            self.bounds.push(&(self.values.len() as u64));
2184        }
2185    }
2186    impl<BC: for<'a> Push<&'a u64>> Push<&str> for Strings<BC> {
2187        #[inline]
2188        fn push(&mut self, item: &str) {
2189            self.values.extend_from_slice(item.as_bytes());
2190            self.bounds.push(&(self.values.len() as u64));
2191        }
2192    }
2193    impl<BC: for<'a> Push<&'a u64>> Push<&Box<str>> for Strings<BC> {
2194        #[inline]
2195        fn push(&mut self, item: &Box<str>) {
2196            self.values.extend_from_slice(item.as_bytes());
2197            self.bounds.push(&(self.values.len() as u64));
2198        }
2199    }
2200    impl<'a, BC: for<'b> Push<&'b u64>> Push<std::fmt::Arguments<'a>> for Strings<BC> {
2201        #[inline]
2202        fn push(&mut self, item: std::fmt::Arguments<'a>) {
2203            use std::io::Write;
2204            self.values.write_fmt(item).expect("write_fmt failed");
2205            self.bounds.push(&(self.values.len() as u64));
2206        }
2207    }
2208    impl<'a, 'b, BC: for<'c> Push<&'c u64>> Push<&'b std::fmt::Arguments<'a>> for Strings<BC> {
2209        #[inline]
2210        fn push(&mut self, item: &'b std::fmt::Arguments<'a>) {
2211            use std::io::Write;
2212            self.values.write_fmt(*item).expect("write_fmt failed");
2213            self.bounds.push(&(self.values.len() as u64));
2214        }
2215    }
2216    impl<BC: Clear, VC: Clear> Clear for Strings<BC, VC> {
2217        #[inline(always)]
2218        fn clear(&mut self) {
2219            self.bounds.clear();
2220            self.values.clear();
2221        }
2222    }
2223    impl<BC: HeapSize, VC: HeapSize> HeapSize for Strings<BC, VC> {
2224        #[inline(always)]
2225        fn heap_size(&self) -> (usize, usize) {
2226            let (l0, c0) = self.bounds.heap_size();
2227            let (l1, c1) = self.values.heap_size();
2228            (l0 + l1, c0 + c1)
2229        }
2230    }
2231}
2232
2233pub use vector::Vecs;
2234pub mod vector {
2235
2236    use super::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize, Slice, Borrow};
2237
2238    /// A stand-in for `Vec<Vec<T>>` for complex `T`.
2239    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2240    #[derive(Debug, Default, Copy, Clone, PartialEq)]
2241    pub struct Vecs<TC, BC = Vec<u64>> {
2242        pub bounds: BC,
2243        pub values: TC,
2244    }
2245
2246    impl<T: Columnar> Columnar for Vec<T> {
2247        #[inline(always)]
2248        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2249            self.truncate(other.len());
2250            let mut other_iter = other.into_iter();
2251            for (s, o) in self.iter_mut().zip(&mut other_iter) {
2252                T::copy_from(s, o);
2253            }
2254            for o in other_iter {
2255                self.push(T::into_owned(o));
2256            }
2257        }
2258        #[inline(always)]
2259        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2260            other.into_iter().map(|x| T::into_owned(x)).collect()
2261        }
2262        type Container = Vecs<T::Container>;
2263    }
2264
2265    impl<T: Columnar, const N: usize> Columnar for [T; N] {
2266        #[inline(always)]
2267        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2268            for (s, o) in self.iter_mut().zip(other.into_iter()) {
2269                T::copy_from(s, o);
2270            }
2271        }
2272        #[inline(always)]
2273        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2274            let vec: Vec<_> = other.into_iter().map(|x| T::into_owned(x)).collect();
2275            match vec.try_into() {
2276                Ok(array) => array,
2277                Err(_) => panic!("wrong length"),
2278            }
2279        }
2280        type Container = Vecs<T::Container>;
2281    }
2282
2283    impl<T: Columnar, const N: usize> Columnar for smallvec::SmallVec<[T; N]> {
2284        #[inline(always)]
2285        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2286            self.truncate(other.len());
2287            let mut other_iter = other.into_iter();
2288            for (s, o) in self.iter_mut().zip(&mut other_iter) {
2289                T::copy_from(s, o);
2290            }
2291            for o in other_iter {
2292                self.push(T::into_owned(o));
2293            }
2294        }
2295        #[inline(always)]
2296        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2297            other.into_iter().map(|x| T::into_owned(x)).collect()
2298        }
2299        type Container = Vecs<T::Container>;
2300    }
2301
2302    impl<BC: crate::common::BorrowIndexAs<u64>, TC: Container> Borrow for Vecs<TC, BC> {
2303        type Ref<'a> = Slice<TC::Borrowed<'a>> where TC: 'a;
2304        type Borrowed<'a> = Vecs<TC::Borrowed<'a>, BC::Borrowed<'a>> where BC: 'a, TC: 'a;
2305        #[inline(always)]
2306        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2307            Vecs {
2308                bounds: self.bounds.borrow(),
2309                values: self.values.borrow(),
2310            }
2311        }
2312        #[inline(always)]
2313        fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where BC: 'a, TC: 'a {
2314            Vecs {
2315                bounds: BC::reborrow(thing.bounds),
2316                values: TC::reborrow(thing.values),
2317            }
2318        }
2319        #[inline(always)]
2320        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
2321            thing.map(|x| TC::reborrow(x))
2322        }
2323    }
2324
2325    impl<BC: crate::common::PushIndexAs<u64>, TC: Container> Container for Vecs<TC, BC> {
2326        #[inline(always)]
2327        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2328            if !range.is_empty() {
2329                // Imported bounds will be relative to this starting offset.
2330                let values_len = self.values.len() as u64;
2331
2332                // Push all bytes that we can, all at once.
2333                let other_lower = if range.start == 0 { 0 } else { other.bounds.index_as(range.start-1) };
2334                let other_upper = other.bounds.index_as(range.end-1);
2335                self.values.extend_from_self(other.values, other_lower as usize .. other_upper as usize);
2336
2337                // Each bound needs to be shifted by `values_len - other_lower`.
2338                if values_len == other_lower {
2339                    self.bounds.extend_from_self(other.bounds, range);
2340                }
2341                else {
2342                    for index in range {
2343                        let shifted = other.bounds.index_as(index) - other_lower + values_len;
2344                        self.bounds.push(&shifted)
2345                    }
2346                }
2347            }
2348        }
2349
2350        fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2351            self.bounds.reserve_for(selves.clone().map(|x| x.bounds));
2352            self.values.reserve_for(selves.map(|x| x.values));
2353        }
2354    }
2355
2356    impl<'a, TC: crate::AsBytes<'a>, BC: crate::AsBytes<'a>> crate::AsBytes<'a> for Vecs<TC, BC> {
2357        #[inline(always)]
2358        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2359            crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
2360        }
2361    }
2362    impl<'a, TC: crate::FromBytes<'a>, BC: crate::FromBytes<'a>> crate::FromBytes<'a> for Vecs<TC, BC> {
2363        #[inline(always)]
2364        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2365            Self {
2366                bounds: crate::FromBytes::from_bytes(bytes),
2367                values: crate::FromBytes::from_bytes(bytes),
2368            }
2369        }
2370    }
2371
2372    impl<TC: Len> Vecs<TC> {
2373        #[inline]
2374        pub fn push_iter<I>(&mut self, iter: I) where I: IntoIterator, TC: Push<I::Item> {
2375            self.values.extend(iter);
2376            self.bounds.push(self.values.len() as u64);
2377        }
2378    }
2379
2380    impl<TC, BC: Len> Len for Vecs<TC, BC> {
2381        #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
2382    }
2383
2384    impl<TC: Copy, BC: Len+IndexAs<u64>> Index for Vecs<TC, BC> {
2385        type Ref = Slice<TC>;
2386        #[inline(always)]
2387        fn get(&self, index: usize) -> Self::Ref {
2388            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2389            let upper = self.bounds.index_as(index);
2390            Slice::new(lower, upper, self.values)
2391        }
2392    }
2393    impl<'a, TC, BC: Len+IndexAs<u64>> Index for &'a Vecs<TC, BC> {
2394        type Ref = Slice<&'a TC>;
2395        #[inline(always)]
2396        fn get(&self, index: usize) -> Self::Ref {
2397            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2398            let upper = self.bounds.index_as(index);
2399            Slice::new(lower, upper, &self.values)
2400        }
2401    }
2402    impl<TC, BC: Len+IndexAs<u64>> IndexMut for Vecs<TC, BC> {
2403        type IndexMut<'a> = Slice<&'a mut TC> where TC: 'a, BC: 'a;
2404
2405        #[inline(always)]
2406        fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2407            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2408            let upper = self.bounds.index_as(index);
2409            Slice::new(lower, upper, &mut self.values)
2410        }
2411    }
2412
2413    impl<'a, TC: Container, BC: for<'b> Push<&'b u64>> Push<Slice<TC::Borrowed<'a>>> for Vecs<TC, BC> {
2414        #[inline]
2415        fn push(&mut self, item: Slice<TC::Borrowed<'a>>) {
2416            self.values.extend_from_self(item.slice, item.lower .. item.upper);
2417            self.bounds.push(&(self.values.len() as u64));
2418        }
2419    }
2420
2421    impl<I: IntoIterator, TC: Push<I::Item> + Len, BC: for<'a> Push<&'a u64>> Push<I> for Vecs<TC, BC> {
2422        #[inline]
2423        fn push(&mut self, item: I) {
2424            self.values.extend(item);
2425            self.bounds.push(&(self.values.len() as u64));
2426        }
2427    }
2428
2429    impl<TC: Clear, BC: Clear> Clear for Vecs<TC, BC> {
2430        #[inline(always)]
2431        fn clear(&mut self) {
2432            self.bounds.clear();
2433            self.values.clear();
2434        }
2435    }
2436
2437    impl<TC: HeapSize, BC: HeapSize> HeapSize for Vecs<TC, BC> {
2438        fn heap_size(&self) -> (usize, usize) {
2439            let (l0, c0) = self.bounds.heap_size();
2440            let (l1, c1) = self.values.heap_size();
2441            (l0 + l1, c0 + c1)
2442        }
2443    }
2444}
2445
2446#[allow(non_snake_case)]
2447pub mod tuple {
2448
2449    use crate::*;
2450
2451    // Implementations for tuple types.
2452    // These are all macro based, because the implementations are very similar.
2453    // The macro requires two names, one for the store and one for pushable types.
2454    macro_rules! tuple_impl {
2455        ( $($name:ident,$name2:ident,$idx:tt)+) => (
2456
2457            impl<$($name: Columnar),*> Columnar for ($($name,)*) {
2458                #[inline(always)]
2459                fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2460                    let ($($name,)*) = self;
2461                    let ($($name2,)*) = other;
2462                    $(crate::Columnar::copy_from($name, $name2);)*
2463                }
2464                #[inline(always)]
2465                fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2466                    let ($($name2,)*) = other;
2467                    ($($name::into_owned($name2),)*)
2468                }
2469                type Container = ($($name::Container,)*);
2470            }
2471            impl<$($name2: Borrow,)*> Borrow for ($($name2,)*) {
2472                type Ref<'a> = ($($name2::Ref<'a>,)*) where $($name2: 'a,)*;
2473                type Borrowed<'a> = ($($name2::Borrowed<'a>,)*) where $($name2: 'a,)*;
2474                #[inline(always)]
2475                fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2476                    let ($($name,)*) = self;
2477                    ($($name.borrow(),)*)
2478                }
2479                #[inline(always)]
2480                fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where $($name2: 'a,)* {
2481                    let ($($name,)*) = thing;
2482                    ($($name2::reborrow($name),)*)
2483                }
2484                #[inline(always)]
2485                fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
2486                    let ($($name2,)*) = thing;
2487                    ($($name2::reborrow_ref($name2),)*)
2488                }
2489            }
2490            impl<$($name2: Container,)*> Container for ($($name2,)*) {
2491                #[inline(always)]
2492                fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2493                    let ($($name,)*) = self;
2494                    let ($($name2,)*) = other;
2495                    $( $name.extend_from_self($name2, range.clone()); )*
2496                }
2497
2498                fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2499                    let ($($name,)*) = self;
2500                    $( $name.reserve_for(selves.clone().map(|x| x.$idx)); )*
2501                }
2502            }
2503
2504            #[allow(non_snake_case)]
2505            impl<'a, $($name: crate::AsBytes<'a>),*> crate::AsBytes<'a> for ($($name,)*) {
2506                #[inline(always)]
2507                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2508                    let ($($name,)*) = self;
2509                    let iter = None.into_iter();
2510                    $( let iter = crate::chain(iter, $name.as_bytes()); )*
2511                    iter
2512                }
2513            }
2514            impl<'a, $($name: crate::FromBytes<'a>),*> crate::FromBytes<'a> for ($($name,)*) {
2515                #[inline(always)]
2516                #[allow(non_snake_case)]
2517                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2518                    $(let $name = crate::FromBytes::from_bytes(bytes);)*
2519                    ($($name,)*)
2520                }
2521            }
2522
2523            impl<$($name: Len),*> Len for ($($name,)*) {
2524                #[inline(always)]
2525                fn len(&self) -> usize {
2526                    self.0.len()
2527                }
2528            }
2529            impl<$($name: Clear),*> Clear for ($($name,)*) {
2530                #[inline(always)]
2531                fn clear(&mut self) {
2532                    let ($($name,)*) = self;
2533                    $($name.clear();)*
2534                }
2535            }
2536            impl<$($name: HeapSize),*> HeapSize for ($($name,)*) {
2537                #[inline(always)]
2538                fn heap_size(&self) -> (usize, usize) {
2539                    let ($($name,)*) = self;
2540                    let mut l = 0;
2541                    let mut c = 0;
2542                    $(let (l0, c0) = $name.heap_size(); l += l0; c += c0;)*
2543                    (l, c)
2544                }
2545            }
2546            impl<$($name: Index),*> Index for ($($name,)*) {
2547                type Ref = ($($name::Ref,)*);
2548                #[inline(always)]
2549                fn get(&self, index: usize) -> Self::Ref {
2550                    let ($($name,)*) = self;
2551                    ($($name.get(index),)*)
2552                }
2553            }
2554            impl<'a, $($name),*> Index for &'a ($($name,)*) where $( &'a $name: Index),* {
2555                type Ref = ($(<&'a $name as Index>::Ref,)*);
2556                #[inline(always)]
2557                fn get(&self, index: usize) -> Self::Ref {
2558                    let ($($name,)*) = self;
2559                    ($($name.get(index),)*)
2560                }
2561            }
2562
2563            impl<$($name: IndexMut),*> IndexMut for ($($name,)*) {
2564                type IndexMut<'a> = ($($name::IndexMut<'a>,)*) where $($name: 'a),*;
2565                #[inline(always)]
2566                fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2567                    let ($($name,)*) = self;
2568                    ($($name.get_mut(index),)*)
2569                }
2570            }
2571            impl<$($name2, $name: Push<$name2>),*> Push<($($name2,)*)> for ($($name,)*) {
2572                #[inline]
2573                fn push(&mut self, item: ($($name2,)*)) {
2574                    let ($($name,)*) = self;
2575                    let ($($name2,)*) = item;
2576                    $($name.push($name2);)*
2577                }
2578            }
2579            impl<'a, $($name2, $name: Push<&'a $name2>),*> Push<&'a ($($name2,)*)> for ($($name,)*) {
2580                #[inline]
2581                fn push(&mut self, item: &'a ($($name2,)*)) {
2582                    let ($($name,)*) = self;
2583                    let ($($name2,)*) = item;
2584                    $($name.push($name2);)*
2585                }
2586            }
2587        )
2588    }
2589
2590    tuple_impl!(A,AA,0);
2591    tuple_impl!(A,AA,0 B,BB,1);
2592    tuple_impl!(A,AA,0 B,BB,1 C,CC,2);
2593    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3);
2594    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4);
2595    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5);
2596    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6);
2597    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6 H,HH,7);
2598    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6 H,HH,7 I,II,8);
2599    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6 H,HH,7 I,II,8 J,JJ,9);
2600
2601    #[cfg(test)]
2602    mod test {
2603        #[test]
2604        fn round_trip() {
2605
2606            use crate::common::{Index, Push, HeapSize, Len};
2607
2608            let mut column: crate::ContainerOf<(u64, u8, String)> = Default::default();
2609            for i in 0..100 {
2610                column.push((i, i as u8, &i.to_string()));
2611                column.push((i, i as u8, &"".to_string()));
2612            }
2613
2614            assert_eq!(column.len(), 200);
2615            assert_eq!(column.heap_size(), (3590, 4608));
2616
2617            for i in 0..100u64 {
2618                assert_eq!((&column).get((2*i+0) as usize), (&i, &(i as u8), i.to_string().as_str()));
2619                assert_eq!((&column).get((2*i+1) as usize), (&i, &(i as u8), ""));
2620            }
2621
2622            // Compare to the heap size of a `Vec<Option<usize>>`.
2623            let mut column: Vec<(u64, u8, String)> = Default::default();
2624            for i in 0..100 {
2625                column.push((i, i as u8, i.to_string()));
2626                column.push((i, i as u8, "".to_string()));
2627            }
2628            // NB: Rust seems to change the capacities across versions (1.88 != 1.89),
2629            // so we just compare the allocated regions to avoid updating the MSRV.
2630            assert_eq!(column.heap_size().0, 8190);
2631
2632        }
2633    }
2634}
2635
2636pub use sums::{rank_select::RankSelect, result::Results, option::Options};
2637/// Containers for enumerations ("sum types") that store variants separately.
2638///
2639/// The main work of these types is storing a discriminant and index efficiently,
2640/// as containers for each of the variant types can hold the actual data.
2641pub mod sums {
2642
2643    /// Stores for maintaining discriminants, and associated sequential indexes.
2644    ///
2645    /// The sequential indexes are not explicitly maintained, but are supported
2646    /// by a `rank(index)` function that indicates how many of a certain variant
2647    /// precede the given index. While this could potentially be done with a scan
2648    /// of all preceding discriminants, the stores maintain running accumulations
2649    /// that make the operation constant time (using additional amortized memory).
2650    pub mod rank_select {
2651
2652        use crate::primitive::Bools;
2653        use crate::common::index::CopyAs;
2654        use crate::{Borrow, Len, Index, IndexAs, Push, Clear, HeapSize};
2655
2656        /// A store for maintaining `Vec<bool>` with fast `rank` and `select` access.
2657        ///
2658        /// The design is to have `u64` running counts for each block of 1024 bits,
2659        /// which are roughly the size of a cache line. This is roughly 6% overhead,
2660        /// above the bits themselves, which seems pretty solid.
2661        #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2662        #[derive(Copy, Clone, Debug, Default, PartialEq)]
2663        pub struct RankSelect<CC = Vec<u64>, VC = Vec<u64>, WC = u64> {
2664            /// Counts of the number of cumulative set (true) bits, *after* each block of 1024 bits.
2665            pub counts: CC,
2666            /// The bits themselves.
2667            pub values: Bools<VC, WC>,
2668        }
2669
2670        impl<CC: crate::common::BorrowIndexAs<u64>, VC: crate::common::BorrowIndexAs<u64>> RankSelect<CC, VC> {
2671            #[inline(always)]
2672            pub fn borrow<'a>(&'a self) -> RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a u64> {
2673                RankSelect {
2674                    counts: self.counts.borrow(),
2675                    values: self.values.borrow(),
2676                }
2677            }
2678            #[inline(always)]
2679            pub fn reborrow<'b, 'a: 'b>(thing: RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a u64>) -> RankSelect<CC::Borrowed<'b>, VC::Borrowed<'b>, &'b u64> {
2680                RankSelect {
2681                    counts: CC::reborrow(thing.counts),
2682                    values: Bools::<VC, u64>::reborrow(thing.values),
2683                }
2684            }
2685        }
2686
2687        impl<'a, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for RankSelect<CC, VC, &'a u64> {
2688            #[inline(always)]
2689            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2690                crate::chain(self.counts.as_bytes(), self.values.as_bytes())
2691            }
2692        }
2693        impl<'a, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for RankSelect<CC, VC, &'a u64> {
2694            #[inline(always)]
2695            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2696                Self {
2697                    counts: crate::FromBytes::from_bytes(bytes),
2698                    values: crate::FromBytes::from_bytes(bytes),
2699                }
2700            }
2701        }
2702
2703
2704        impl<CC, VC: Len + IndexAs<u64>, WC: CopyAs<u64>> RankSelect<CC, VC, WC> {
2705            #[inline(always)]
2706            pub fn get(&self, index: usize) -> bool {
2707                Index::get(&self.values, index)
2708            }
2709        }
2710        impl<CC: Len + IndexAs<u64>, VC: Len + IndexAs<u64>, WC: CopyAs<u64>> RankSelect<CC, VC, WC> {
2711            /// The number of set bits *strictly* preceding `index`.
2712            ///
2713            /// This number is accumulated first by reading out of `self.counts` at the correct position,
2714            /// then by summing the ones in strictly prior `u64` entries, then by counting the ones in the
2715            /// masked `u64` in which the bit lives.
2716            pub fn rank(&self, index: usize) -> usize {
2717                let bit = index % 64;
2718                let block = index / 64;
2719                let chunk = block / 16;
2720                let mut count = if chunk > 0 { self.counts.index_as(chunk - 1) as usize } else { 0 };
2721                for pos in (16 * chunk) .. block {
2722                    count += self.values.values.index_as(pos).count_ones() as usize;
2723                }
2724                // TODO: Panic if out of bounds?
2725                let intra_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
2726                count += (intra_word & ((1 << bit) - 1)).count_ones() as usize;
2727                count
2728            }
2729            /// The index of the `rank`th set bit, should one exist.
2730            pub fn select(&self, rank: u64) -> Option<usize> {
2731                let mut chunk = 0;
2732                // Step one is to find the position in `counts` where we go from `rank` to `rank + 1`.
2733                // The position we are looking for is within that chunk of bits.
2734                // TODO: Binary search is likely better at many scales. Rust's binary search is .. not helpful with ties.
2735                while chunk < self.counts.len() && self.counts.index_as(chunk) <= rank {
2736                    chunk += 1;
2737                }
2738                let mut count = if chunk < self.counts.len() { self.counts.index_as(chunk) } else { 0 };
2739                // Step two is to find the position within that chunk where the `rank`th bit is.
2740                let mut block = 16 * chunk;
2741                while block < self.values.values.len() && count + (self.values.values.index_as(block).count_ones() as u64) <= rank {
2742                    count += self.values.values.index_as(block).count_ones() as u64;
2743                    block += 1;
2744                }
2745                // Step three is to search the last word for the location, or return `None` if we run out of bits.
2746                let last_bits = if block == self.values.values.len() { self.values.last_bits.copy_as() as usize } else { 64 };
2747                let last_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
2748                for shift in 0 .. last_bits {
2749                    if ((last_word >> shift) & 0x01 == 0x01) && count + 1 == rank {
2750                        return Some(64 * block + shift);
2751                    }
2752                    count += (last_word >> shift) & 0x01;
2753                }
2754
2755                None
2756            }
2757        }
2758
2759        impl<CC, VC: Len, WC: CopyAs<u64>> RankSelect<CC, VC, WC> {
2760            pub fn len(&self) -> usize {
2761                self.values.len()
2762            }
2763        }
2764
2765        // This implementation probably only works for `Vec<u64>` and `Vec<u64>`, but we could fix that.
2766        // Partly, it's hard to name the `Index` flavor that allows one to get back a `u64`.
2767        impl<CC: for<'a> Push<&'a u64> + Len + IndexAs<u64>, VC: for<'a> Push<&'a u64> + Len + IndexAs<u64>> RankSelect<CC, VC> {
2768            #[inline]
2769            pub fn push(&mut self, bit: bool) {
2770                self.values.push(&bit);
2771                while self.counts.len() < self.values.len() / 1024 {
2772                    let mut count = self.counts.last().unwrap_or(0);
2773                    let lower = 16 * self.counts.len();
2774                    let upper = lower + 16;
2775                    for i in lower .. upper {
2776                        count += self.values.values.index_as(i).count_ones() as u64;
2777                    }
2778                    self.counts.push(&count);
2779                }
2780            }
2781        }
2782        impl<CC: Clear, VC: Clear> Clear for RankSelect<CC, VC> {
2783            fn clear(&mut self) {
2784                self.counts.clear();
2785                self.values.clear();
2786            }
2787        }
2788        impl<CC: HeapSize, VC: HeapSize> HeapSize for RankSelect<CC, VC> {
2789            fn heap_size(&self) -> (usize, usize) {
2790                let (l0, c0) = self.counts.heap_size();
2791                let (l1, c1) = self.values.heap_size();
2792                (l0 + l1, c0 + c1)
2793            }
2794        }
2795    }
2796
2797    pub mod result {
2798
2799        use crate::common::index::CopyAs;
2800        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize, Borrow};
2801        use crate::RankSelect;
2802
2803        #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2804        #[derive(Copy, Clone, Debug, Default, PartialEq)]
2805        pub struct Results<SC, TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
2806            /// Bits set to `true` correspond to `Ok` variants.
2807            pub indexes: RankSelect<CC, VC, WC>,
2808            pub oks: SC,
2809            pub errs: TC,
2810        }
2811
2812        impl<S: Columnar, T: Columnar> Columnar for Result<S, T> {
2813            fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2814                match (&mut *self, other) {
2815                    (Ok(x), Ok(y)) => x.copy_from(y),
2816                    (Err(x), Err(y)) => x.copy_from(y),
2817                    (_, other) => { *self = Self::into_owned(other); },
2818                }
2819            }
2820            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2821                match other {
2822                    Ok(y) => Ok(S::into_owned(y)),
2823                    Err(y) => Err(T::into_owned(y)),
2824                }
2825            }
2826            type Container = Results<S::Container, T::Container>;
2827        }
2828
2829        impl<SC: Borrow, TC: Borrow> Borrow for Results<SC, TC> {
2830            type Ref<'a> = Result<SC::Ref<'a>, TC::Ref<'a>> where SC: 'a, TC: 'a;
2831            type Borrowed<'a> = Results<SC::Borrowed<'a>, TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where SC: 'a, TC: 'a;
2832            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2833                Results {
2834                    indexes: self.indexes.borrow(),
2835                    oks: self.oks.borrow(),
2836                    errs: self.errs.borrow(),
2837                }
2838            }
2839            #[inline(always)]
2840            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where SC: 'a, TC: 'a {
2841                Results {
2842                    indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
2843                    oks: SC::reborrow(thing.oks),
2844                    errs: TC::reborrow(thing.errs),
2845                }
2846            }
2847            #[inline(always)]
2848            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
2849                match thing {
2850                    Ok(y) => Ok(SC::reborrow_ref(y)),
2851                    Err(y) => Err(TC::reborrow_ref(y)),
2852                }
2853            }
2854        }
2855
2856        impl<SC: Container, TC: Container> Container for Results<SC, TC> {
2857            #[inline(always)]
2858            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2859                if !range.is_empty() {
2860                    // Starting offsets of each variant in `other`.
2861                    let oks_start = other.indexes.rank(range.start);
2862                    let errs_start = range.start - oks_start;
2863
2864                    // Count the number of `Ok` and `Err` variants as we push, to determine the range.
2865                    // TODO: This could probably be `popcnt` somehow.
2866                    let mut oks = 0;
2867                    for index in range.clone() {
2868                        let bit = other.indexes.get(index);
2869                        self.indexes.push(bit);
2870                        if bit { oks += 1; }
2871                    }
2872                    let errs = range.len() - oks;
2873
2874                    self.oks.extend_from_self(other.oks, oks_start .. oks_start + oks);
2875                    self.errs.extend_from_self(other.errs, errs_start .. errs_start + errs);
2876                }
2877            }
2878
2879            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2880                // TODO: reserve room in `self.indexes`.
2881                self.oks.reserve_for(selves.clone().map(|x| x.oks));
2882                self.errs.reserve_for(selves.map(|x| x.errs));
2883            }
2884        }
2885
2886        impl<'a, SC: crate::AsBytes<'a>, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
2887            #[inline(always)]
2888            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2889                let iter = self.indexes.as_bytes();
2890                let iter = crate::chain(iter, self.oks.as_bytes());
2891                crate::chain(iter, self.errs.as_bytes())
2892            }
2893        }
2894        impl<'a, SC: crate::FromBytes<'a>, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
2895            #[inline(always)]
2896            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2897                Self {
2898                    indexes: crate::FromBytes::from_bytes(bytes),
2899                    oks: crate::FromBytes::from_bytes(bytes),
2900                    errs: crate::FromBytes::from_bytes(bytes),
2901                }
2902            }
2903        }
2904
2905        impl<SC, TC, CC, VC: Len, WC: CopyAs<u64>> Len for Results<SC, TC, CC, VC, WC> {
2906            #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
2907        }
2908
2909        impl<SC, TC, CC, VC, WC> Index for Results<SC, TC, CC, VC, WC>
2910        where
2911            SC: Index,
2912            TC: Index,
2913            CC: IndexAs<u64> + Len,
2914            VC: IndexAs<u64> + Len,
2915            WC: CopyAs<u64>,
2916        {
2917            type Ref = Result<SC::Ref, TC::Ref>;
2918            #[inline(always)]
2919            fn get(&self, index: usize) -> Self::Ref {
2920                if self.indexes.get(index) {
2921                    Ok(self.oks.get(self.indexes.rank(index)))
2922                } else {
2923                    Err(self.errs.get(index - self.indexes.rank(index)))
2924                }
2925            }
2926        }
2927        impl<'a, SC, TC, CC, VC, WC> Index for &'a Results<SC, TC, CC, VC, WC>
2928        where
2929            &'a SC: Index,
2930            &'a TC: Index,
2931            CC: IndexAs<u64> + Len,
2932            VC: IndexAs<u64> + Len,
2933            WC: CopyAs<u64>,
2934        {
2935            type Ref = Result<<&'a SC as Index>::Ref, <&'a TC as Index>::Ref>;
2936            #[inline(always)]
2937            fn get(&self, index: usize) -> Self::Ref {
2938                if self.indexes.get(index) {
2939                    Ok((&self.oks).get(self.indexes.rank(index)))
2940                } else {
2941                    Err((&self.errs).get(index - self.indexes.rank(index)))
2942                }
2943            }
2944        }
2945
2946        // NB: You are not allowed to change the variant, but can change its contents.
2947        impl<SC: IndexMut, TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Results<SC, TC, CC, VC> {
2948            type IndexMut<'a> = Result<SC::IndexMut<'a>, TC::IndexMut<'a>> where SC: 'a, TC: 'a, CC: 'a, VC: 'a;
2949            #[inline(always)]
2950            fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2951                if self.indexes.get(index) {
2952                    Ok(self.oks.get_mut(self.indexes.rank(index)))
2953                } else {
2954                    Err(self.errs.get_mut(index - self.indexes.rank(index)))
2955                }
2956            }
2957        }
2958
2959        impl<S, SC: Push<S>, T, TC: Push<T>> Push<Result<S, T>> for Results<SC, TC> {
2960            #[inline]
2961            fn push(&mut self, item: Result<S, T>) {
2962                match item {
2963                    Ok(item) => {
2964                        self.indexes.push(true);
2965                        self.oks.push(item);
2966                    }
2967                    Err(item) => {
2968                        self.indexes.push(false);
2969                        self.errs.push(item);
2970                    }
2971                }
2972            }
2973        }
2974        impl<'a, S, SC: Push<&'a S>, T, TC: Push<&'a T>> Push<&'a Result<S, T>> for Results<SC, TC> {
2975            #[inline]
2976            fn push(&mut self, item: &'a Result<S, T>) {
2977                match item {
2978                    Ok(item) => {
2979                        self.indexes.push(true);
2980                        self.oks.push(item);
2981                    }
2982                    Err(item) => {
2983                        self.indexes.push(false);
2984                        self.errs.push(item);
2985                    }
2986                }
2987            }
2988        }
2989
2990        impl<SC: Clear, TC: Clear> Clear for Results<SC, TC> {
2991            fn clear(&mut self) {
2992                self.indexes.clear();
2993                self.oks.clear();
2994                self.errs.clear();
2995            }
2996        }
2997
2998        impl<SC: HeapSize, TC: HeapSize> HeapSize for Results<SC, TC> {
2999            fn heap_size(&self) -> (usize, usize) {
3000                let (l0, c0) = self.oks.heap_size();
3001                let (l1, c1) = self.errs.heap_size();
3002                let (li, ci) = self.indexes.heap_size();
3003                (l0 + l1 + li, c0 + c1 + ci)
3004            }
3005        }
3006
3007        impl<SC, TC, CC, VC, WC> Results<SC, TC, CC, VC, WC> {
3008            /// Returns ok values if no errors exist.
3009            pub fn unwrap(self) -> SC where TC: Len {
3010                assert!(self.errs.is_empty());
3011                self.oks
3012            }
3013            /// Returns error values if no oks exist.
3014            pub fn unwrap_err(self) -> TC where SC: Len {
3015                assert!(self.oks.is_empty());
3016                self.errs
3017            }
3018        }
3019        #[cfg(test)]
3020        mod test {
3021            #[test]
3022            fn round_trip() {
3023
3024                use crate::common::{Index, Push, HeapSize, Len};
3025
3026                let mut column: crate::ContainerOf<Result<u64, u64>> = Default::default();
3027                for i in 0..100 {
3028                    column.push(Ok::<u64, u64>(i));
3029                    column.push(Err::<u64, u64>(i));
3030                }
3031
3032                assert_eq!(column.len(), 200);
3033                assert_eq!(column.heap_size(), (1624, 2080));
3034
3035                for i in 0..100 {
3036                    assert_eq!(column.get(2*i+0), Ok(i as u64));
3037                    assert_eq!(column.get(2*i+1), Err(i as u64));
3038                }
3039
3040                let mut column: crate::ContainerOf<Result<u64, u8>> = Default::default();
3041                for i in 0..100 {
3042                    column.push(Ok::<u64, u8>(i as u64));
3043                    column.push(Err::<u64, u8>(i as u8));
3044                }
3045
3046                assert_eq!(column.len(), 200);
3047                assert_eq!(column.heap_size(), (924, 1184));
3048
3049                for i in 0..100 {
3050                    assert_eq!(column.get(2*i+0), Ok(i as u64));
3051                    assert_eq!(column.get(2*i+1), Err(i as u8));
3052                }
3053            }
3054        }
3055    }
3056
3057    pub mod option {
3058
3059        use crate::common::index::CopyAs;
3060        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize, Borrow};
3061        use crate::RankSelect;
3062
3063    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3064        #[derive(Copy, Clone, Debug, Default, PartialEq)]
3065        pub struct Options<TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
3066            /// Uses two bits for each item, one to indicate the variant and one (amortized)
3067            /// to enable efficient rank determination.
3068            pub indexes: RankSelect<CC, VC, WC>,
3069            pub somes: TC,
3070        }
3071
3072        impl<T: Columnar> Columnar for Option<T> {
3073            fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
3074                match (&mut *self, other) {
3075                    (Some(x), Some(y)) => { x.copy_from(y); }
3076                    (_, other) => { *self = Self::into_owned(other); }
3077                }
3078            }
3079            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
3080                other.map(|x| T::into_owned(x))
3081            }
3082            type Container = Options<T::Container>;
3083        }
3084
3085        impl<TC: Borrow> Borrow for Options<TC> {
3086            type Ref<'a> = Option<TC::Ref<'a>> where TC: 'a;
3087            type Borrowed<'a> = Options<TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where TC: 'a;
3088            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
3089                Options {
3090                    indexes: self.indexes.borrow(),
3091                    somes: self.somes.borrow(),
3092                }
3093            }
3094            #[inline(always)]
3095            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where TC: 'a {
3096                Options {
3097                    indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
3098                    somes: TC::reborrow(thing.somes),
3099                }
3100            }
3101            #[inline(always)]
3102            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
3103                thing.map(TC::reborrow_ref)
3104            }
3105        }
3106
3107        impl<TC: Container> Container for Options<TC> {
3108            #[inline(always)]
3109            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
3110                if !range.is_empty() {
3111                    // Starting offsets of `Some` variants in `other`.
3112                    let somes_start = other.indexes.rank(range.start);
3113
3114                    // Count the number of `Some` variants as we push, to determine the range.
3115                    // TODO: This could probably be `popcnt` somehow.
3116                    let mut somes = 0;
3117                    for index in range {
3118                        let bit = other.indexes.get(index);
3119                        self.indexes.push(bit);
3120                        if bit { somes += 1; }
3121                    }
3122
3123                    self.somes.extend_from_self(other.somes, somes_start .. somes_start + somes);
3124                }
3125            }
3126
3127            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
3128                // TODO: reserve room in `self.indexes`.
3129                self.somes.reserve_for(selves.map(|x| x.somes));
3130            }
3131        }
3132
3133        impl<'a, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Options<TC, CC, VC, &'a u64> {
3134            #[inline(always)]
3135            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
3136                crate::chain(self.indexes.as_bytes(), self.somes.as_bytes())
3137            }
3138        }
3139
3140        impl <'a, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Options<TC, CC, VC, &'a u64> {
3141            #[inline(always)]
3142            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
3143                Self {
3144                    indexes: crate::FromBytes::from_bytes(bytes),
3145                    somes: crate::FromBytes::from_bytes(bytes),
3146                }
3147            }
3148        }
3149
3150        impl<T, CC, VC: Len, WC: CopyAs<u64>> Len for Options<T, CC, VC, WC> {
3151            #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
3152        }
3153
3154        impl<TC: Index, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: CopyAs<u64>> Index for Options<TC, CC, VC, WC> {
3155            type Ref = Option<TC::Ref>;
3156            #[inline(always)]
3157            fn get(&self, index: usize) -> Self::Ref {
3158                if self.indexes.get(index) {
3159                    Some(self.somes.get(self.indexes.rank(index)))
3160                } else {
3161                    None
3162                }
3163            }
3164        }
3165        impl<'a, TC, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: CopyAs<u64>> Index for &'a Options<TC, CC, VC, WC>
3166        where &'a TC: Index
3167        {
3168            type Ref = Option<<&'a TC as Index>::Ref>;
3169            #[inline(always)]
3170            fn get(&self, index: usize) -> Self::Ref {
3171                if self.indexes.get(index) {
3172                    Some((&self.somes).get(self.indexes.rank(index)))
3173                } else {
3174                    None
3175                }
3176            }
3177        }
3178        impl<TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Options<TC, CC, VC> {
3179            type IndexMut<'a> = Option<TC::IndexMut<'a>> where TC: 'a, CC: 'a, VC: 'a;
3180            #[inline(always)]
3181            fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
3182                if self.indexes.get(index) {
3183                    Some(self.somes.get_mut(self.indexes.rank(index)))
3184                } else {
3185                    None
3186                }
3187            }
3188        }
3189
3190        impl<T, TC: Push<T> + Len> Push<Option<T>> for Options<TC> {
3191            #[inline]
3192            fn push(&mut self, item: Option<T>) {
3193                match item {
3194                    Some(item) => {
3195                        self.indexes.push(true);
3196                        self.somes.push(item);
3197                    }
3198                    None => {
3199                        self.indexes.push(false);
3200                    }
3201                }
3202            }
3203        }
3204        impl<'a, T, TC: Push<&'a T> + Len> Push<&'a Option<T>> for Options<TC> {
3205            #[inline]
3206            fn push(&mut self, item: &'a Option<T>) {
3207                match item {
3208                    Some(item) => {
3209                        self.indexes.push(true);
3210                        self.somes.push(item);
3211                    }
3212                    None => {
3213                        self.indexes.push(false);
3214                    }
3215                }
3216            }
3217        }
3218
3219        impl<TC: Clear> Clear for Options<TC> {
3220            fn clear(&mut self) {
3221                self.indexes.clear();
3222                self.somes.clear();
3223            }
3224        }
3225
3226        impl<TC: HeapSize> HeapSize for Options<TC> {
3227            fn heap_size(&self) -> (usize, usize) {
3228                let (l0, c0) = self.somes.heap_size();
3229                let (li, ci) = self.indexes.heap_size();
3230                (l0 + li, c0 + ci)
3231            }
3232        }
3233
3234        #[cfg(test)]
3235        mod test {
3236
3237            use crate::Columnar;
3238            use crate::common::{Index, HeapSize, Len};
3239            use crate::Options;
3240
3241            #[test]
3242            fn round_trip_some() {
3243                // Type annotation is important to avoid some inference overflow.
3244                let store: Options<Vec<i32>> = Columnar::into_columns((0..100).map(Some));
3245                assert_eq!(store.len(), 100);
3246                assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == Some(&b)));
3247                assert_eq!(store.heap_size(), (408, 544));
3248            }
3249
3250            #[test]
3251            fn round_trip_none() {
3252                let store = Columnar::into_columns((0..100).map(|_x| None::<i32>));
3253                assert_eq!(store.len(), 100);
3254                let foo = &store;
3255                assert!(foo.index_iter().zip(0..100).all(|(a, _b)| a == None));
3256                assert_eq!(store.heap_size(), (8, 32));
3257            }
3258
3259            #[test]
3260            fn round_trip_mixed() {
3261                // Type annotation is important to avoid some inference overflow.
3262                let store: Options<Vec<i32>>  = Columnar::into_columns((0..100).map(|x| if x % 2 == 0 { Some(x) } else { None }));
3263                assert_eq!(store.len(), 100);
3264                assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == if b % 2 == 0 { Some(&b) } else { None }));
3265                assert_eq!(store.heap_size(), (208, 288));
3266            }
3267        }
3268    }
3269}
3270
3271pub use lookback::{Repeats, Lookbacks};
3272/// Containers that can store either values, or offsets to prior values.
3273///
3274/// This has the potential to be more efficient than a list of `T` when many values repeat in
3275/// close proximity. Values must be equatable, and the degree of lookback can be configured.
3276pub mod lookback {
3277
3278    use crate::{Options, Results, Push, Index, Len, HeapSize};
3279
3280    /// A container that encodes repeated values with a `None` variant, at the cost of extra bits for every record.
3281    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3282    #[derive(Clone, Debug, Default, PartialEq)]
3283    pub struct Repeats<TC, const N: u8 = 255> {
3284        /// Some(x) encodes a value, and None indicates the prior `x` value.
3285        pub inner: Options<TC>,
3286    }
3287
3288    impl<T: PartialEq, TC: Push<T> + Len, const N: u8> Push<T> for Repeats<TC, N>
3289    where
3290        for<'a> &'a TC: Index,
3291        for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
3292    {
3293        #[inline]
3294        fn push(&mut self, item: T) {
3295            // Look at the last `somes` value for a potential match.
3296            let insert: Option<T> = if (&self.inner.somes).last().map(|x| x.eq(&item)) == Some(true) {
3297                None
3298            } else {
3299                Some(item)
3300            };
3301            self.inner.push(insert);
3302        }
3303    }
3304
3305    impl<TC: Len, const N: u8> Len for Repeats<TC, N> {
3306        #[inline(always)] fn len(&self) -> usize { self.inner.len() }
3307    }
3308
3309    impl<TC: Index, const N: u8> Index for Repeats<TC, N> {
3310        type Ref = TC::Ref;
3311        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
3312            match self.inner.get(index) {
3313                Some(item) => item,
3314                None => {
3315                    let pos = self.inner.indexes.rank(index) - 1;
3316                    self.inner.somes.get(pos)
3317                },
3318            }
3319        }
3320    }
3321
3322    impl<TC: HeapSize, const N: u8> HeapSize for Repeats<TC, N> {
3323        fn heap_size(&self) -> (usize, usize) {
3324            self.inner.heap_size()
3325        }
3326    }
3327
3328    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3329    #[derive(Clone, Debug, Default, PartialEq)]
3330    pub struct Lookbacks<TC, VC = Vec<u8>, const N: u8 = 255> {
3331        /// Ok(x) encodes a value, and Err(y) indicates a value `y` back.
3332        pub inner: Results<TC, VC>,
3333    }
3334
3335    impl<T: PartialEq, TC: Push<T> + Len, VC: Push<u8>, const N: u8> Push<T> for Lookbacks<TC, VC, N>
3336    where
3337        for<'a> &'a TC: Index,
3338        for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
3339    {
3340        #[inline]
3341        fn push(&mut self, item: T) {
3342            // Look backwards through (0 .. N) to look for a matching value.
3343            let oks_len = self.inner.oks.len();
3344            let find = (0u8 .. N).take(self.inner.oks.len()).find(|i| (&self.inner.oks).get(oks_len - (*i as usize) - 1) == item);
3345            let insert: Result<T, u8> = if let Some(back) = find { Err(back) } else { Ok(item) };
3346            self.inner.push(insert);
3347        }
3348    }
3349
3350    impl<TC, VC, const N: u8> Len for Lookbacks<TC, VC, N> {
3351        #[inline(always)] fn len(&self) -> usize { self.inner.len() }
3352    }
3353
3354    impl<TC: Index, VC: Index<Ref=u8>, const N: u8> Index for Lookbacks<TC, VC, N> {
3355        type Ref = TC::Ref;
3356        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
3357            match self.inner.get(index) {
3358                Ok(item) => item,
3359                Err(back) => {
3360                    let pos = self.inner.indexes.rank(index) - 1;
3361                    self.inner.oks.get(pos - (back as usize))
3362                },
3363            }
3364        }
3365    }
3366    impl<'a, TC, const N: u8> Index for &'a Lookbacks<TC, Vec<u8>, N>
3367    where
3368        &'a TC: Index,
3369    {
3370        type Ref = <&'a TC as Index>::Ref;
3371        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
3372            match (&self.inner).get(index) {
3373                Ok(item) => item,
3374                Err(back) => {
3375                    let pos = self.inner.indexes.rank(index) - 1;
3376                    (&self.inner.oks).get(pos - (*back as usize))
3377                },
3378            }
3379        }
3380    }
3381
3382    impl<TC: HeapSize, VC: HeapSize, const N: u8> HeapSize for Lookbacks<TC, VC, N> {
3383        fn heap_size(&self) -> (usize, usize) {
3384            self.inner.heap_size()
3385        }
3386    }
3387}
3388
3389/// Roaring bitmap (and similar) containers.
3390pub mod roaring {
3391
3392    use crate::Results;
3393
3394    /// A container for `bool` that uses techniques from Roaring bitmaps.
3395    ///
3396    /// These techniques are to block the bits into blocks of 2^16 bits,
3397    /// and to encode each block based on its density. Either a bitmap
3398    /// for dense blocks or a list of set bits for sparse blocks.
3399    ///
3400    /// Additionally, other representations encode runs of set bits.
3401    pub struct RoaringBits {
3402        _inner: Results<[u64; 1024], Vec<u16>>,
3403    }
3404}
3405
3406pub use chain_mod::{chain, chain_one};
3407
3408pub mod chain_mod {
3409    //! Chain iterators, or iterators and an item. Iterators that might improve inlining, at the
3410    //! expense of not providing iterator maker traits.
3411
3412    /// Chain two iterators together. The result first iterates over `a`, then `b`, until both are
3413    /// exhausted.
3414    ///
3415    /// This addresses a quirk where deep iterators would not be optimized to their full potential.
3416    /// Here, functions are marked with `#[inline(always)]` to indicate that the compiler should
3417    /// try hard to inline the iterators.
3418    #[inline(always)]
3419    pub fn chain<A: IntoIterator, B: IntoIterator<Item=A::Item>>(a: A, b: B) -> Chain<A::IntoIter, B::IntoIter> {
3420        Chain { a: Some(a.into_iter()), b: Some(b.into_iter()) }
3421    }
3422
3423    pub struct Chain<A, B> {
3424        a: Option<A>,
3425        b: Option<B>,
3426    }
3427
3428    impl<A, B> Iterator for Chain<A, B>
3429    where
3430        A: Iterator,
3431        B: Iterator<Item=A::Item>,
3432    {
3433        type Item = A::Item;
3434
3435        #[inline(always)]
3436        fn next(&mut self) -> Option<Self::Item> {
3437            if let Some(a) = self.a.as_mut() {
3438                let x = a.next();
3439                if x.is_none() {
3440                    self.a = None;
3441                } else {
3442                    return x;
3443                }
3444            }
3445            if let Some(b) = self.b.as_mut() {
3446                let x = b.next();
3447                if x.is_none() {
3448                    self.b = None;
3449                } else {
3450                    return x;
3451                }
3452            }
3453            None
3454        }
3455
3456        #[inline]
3457        fn fold<Acc, F>(self, mut acc: Acc, mut f: F) -> Acc
3458        where
3459            F: FnMut(Acc, Self::Item) -> Acc,
3460        {
3461            if let Some(a) = self.a {
3462                acc = a.fold(acc, &mut f);
3463            }
3464            if let Some(b) = self.b {
3465                acc = b.fold(acc, f);
3466            }
3467            acc
3468        }
3469    }
3470
3471    /// Chain a single item to an iterator. The resulting iterator first iterates over `a`,
3472    /// then `b`. The resulting iterator is marked as `#[inline(always)]`, which in some situations
3473    /// causes better inlining behavior with current Rust versions.
3474    #[inline(always)]
3475    pub fn chain_one<A: IntoIterator>(a: A, b: A::Item) -> ChainOne<A::IntoIter> {
3476        ChainOne { a: Some(a.into_iter()), b: Some(b) }
3477    }
3478
3479    pub struct ChainOne<A: Iterator> {
3480        a: Option<A>,
3481        b: Option<A::Item>,
3482    }
3483
3484    impl<A: Iterator> Iterator for ChainOne<A> {
3485        type Item = A::Item;
3486
3487        #[inline(always)]
3488        fn next(&mut self) -> Option<Self::Item> {
3489            if let Some(a) = self.a.as_mut() {
3490                let x = a.next();
3491                if x.is_none() {
3492                    self.a = None;
3493                    self.b.take()
3494                } else {
3495                    x
3496                }
3497            } else {
3498                None
3499            }
3500        }
3501    }
3502
3503    #[cfg(test)]
3504    mod test {
3505        use super::*;
3506
3507        #[test]
3508        fn test_chain() {
3509            let a = [1, 2, 3];
3510            let b = [4, 5, 6];
3511            let mut chain = chain(a, b);
3512            assert_eq!(chain.next(), Some(1));
3513            assert_eq!(chain.next(), Some(2));
3514            assert_eq!(chain.next(), Some(3));
3515            assert_eq!(chain.next(), Some(4));
3516            assert_eq!(chain.next(), Some(5));
3517            assert_eq!(chain.next(), Some(6));
3518            assert_eq!(chain.next(), None);
3519        }
3520
3521        #[test]
3522        fn test_chain_one() {
3523            let a = [1, 2, 3];
3524            let b = 4;
3525            let mut chain = chain_one(a, b);
3526            assert_eq!(chain.next(), Some(1));
3527            assert_eq!(chain.next(), Some(2));
3528            assert_eq!(chain.next(), Some(3));
3529            assert_eq!(chain.next(), Some(4));
3530            assert_eq!(chain.next(), None);
3531        }
3532    }
3533}