columnar/
lib.rs

1//! Types supporting flat / "columnar" layout for complex types.
2//!
3//! The intent is to re-layout `Vec<T>` types into vectors of reduced
4//! complexity, repeatedly. One should be able to push and pop easily,
5//! but indexing will be more complicated because we likely won't have
6//! a real `T` lying around to return as a reference. Instead, we will
7//! use Generic Associated Types (GATs) to provide alternate references.
8
9// Re-export derive crate.
10extern crate columnar_derive;
11pub use columnar_derive::Columnar;
12
13pub mod adts;
14
15pub use bytemuck;
16
17/// A type that can be represented in columnar form.
18///
19/// For a running example, a type like `(A, Vec<B>)`.
20pub trait Columnar : 'static {
21    /// Repopulates `self` from a reference.
22    ///
23    /// By default this just calls `into_owned()`, but it can be overridden.
24    fn copy_from<'a>(&mut self, other: Ref<'a, Self>) where Self: Sized {
25        *self = Self::into_owned(other);
26    }
27    /// Produce an instance of `Self` from `Self::Ref<'a>`.
28    fn into_owned<'a>(other: Ref<'a, Self>) -> Self;
29
30    /// The type that stores the columnar representation.
31    ///
32    /// The container must support pushing both `&Self` and `Self::Ref<'_>`.
33    /// In our running example this might be `(Vec<A>, Vecs<Vec<B>>)`.
34    type Container: ContainerBytes + for<'a> Push<&'a Self>;
35
36    /// Converts a sequence of the references to the type into columnar form.
37    fn as_columns<'a, I>(selves: I) -> Self::Container where I: IntoIterator<Item=&'a Self>, Self: 'a {
38        let mut columns: Self::Container = Default::default();
39        for item in selves {
40            columns.push(item);
41        }
42        columns
43    }
44    /// Converts a sequence of the type into columnar form.
45    ///
46    /// This consumes the owned `Self` types but uses them only by reference.
47    /// Consider `as_columns()` instead if it is equally ergonomic.
48    fn into_columns<I>(selves: I) -> Self::Container where I: IntoIterator<Item = Self>, Self: Sized {
49        let mut columns: Self::Container = Default::default();
50        for item in selves {
51            columns.push(&item);
52        }
53        columns
54    }
55    /// Reborrows the reference type to a shorter lifetime.
56    ///
57    /// Implementations must not change the contents of the reference, and should mark
58    /// the function as `#[inline(always)]`. It is no-op to overcome limitations
59    /// of the borrow checker. In many cases, it is enough to return `thing` as-is.
60    ///
61    /// For example, when comparing two references `Ref<'a>` and `Ref<'b>`, we can
62    /// reborrow both to a local lifetime to compare them. This allows us to keep the
63    /// lifetimes `'a` and `'b` separate, while otherwise Rust would unify them.
64    #[inline(always)] fn reborrow<'b, 'a: 'b>(thing: Ref<'a, Self>) -> Ref<'b, Self> {
65        Self::Container::reborrow_ref(thing)
66    }
67}
68
69/// The container type of columnar type `T`.
70///
71/// Equivalent to `<T as Columnar>::Container`.
72pub type ContainerOf<T> = <T as Columnar>::Container;
73
74/// For a lifetime, the reference type of columnar type `T`.
75///
76/// Equivalent to `<ContainerOf<T> as Container>::Ref<'a>`.
77pub type Ref<'a, T> = <ContainerOf<T> as Container>::Ref<'a>;
78
79/// A container that can hold `C`, and provide its preferred references.
80///
81/// As an example, `(Vec<A>, Vecs<Vec<B>>)`.
82pub trait Container : Len + Clear + for<'a> Push<Self::Ref<'a>> + Clone + Default + Send + 'static {
83    /// For each lifetime, a reference with that lifetime.
84    ///
85    /// As an example, `(&'a A, &'a [B])`.
86    type Ref<'a> : Copy;
87    /// The type of a borrowed container.
88    ///
89    /// Corresponding to our example, `(&'a [A], Vecs<&'a [B], &'a [u64]>)`.
90    type Borrowed<'a>: Copy + Len + Index<Ref = Self::Ref<'a>> where Self: 'a;
91    /// Converts a reference to the type to a borrowed variant.
92    fn borrow<'a>(&'a self) -> Self::Borrowed<'a>;
93    /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
94    fn reborrow<'b, 'a: 'b>(item: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a;
95    /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
96    fn reborrow_ref<'b, 'a: 'b>(item: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a;
97
98
99    /// Allocates an empty container that can be extended by `selves` without reallocation.
100    ///
101    /// This goal is optimistic, and some containers may struggle to size correctly, especially
102    /// if they employ compression or other variable-sizing techniques that respond to the data
103    /// and the order in which is it presented. Best effort, but still useful!
104    fn with_capacity_for<'a, I>(selves: I) -> Self
105    where
106        Self: 'a,
107        I: Iterator<Item = Self::Borrowed<'a>> + Clone
108    {
109        let mut output = Self::default();
110        output.reserve_for(selves);
111        output
112    }
113
114    // Ensure that `self` can extend from `selves` without reallocation.
115    fn reserve_for<'a, I>(&mut self, selves: I)
116    where
117        Self: 'a,
118        I: Iterator<Item = Self::Borrowed<'a>> + Clone;
119
120
121    /// Extends `self` by a range in `other`.
122    ///
123    /// This method has a default implementation, but can and should be specialized when ranges can be copied.
124    /// As an example, lists of lists are often backed by contiguous elements, all of which can be memcopied,
125    /// with only the offsets into them (the bounds) to push either before or after (rather than during).
126    #[inline(always)]
127    fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
128        self.extend(range.map(|i| other.get(i)))
129    }
130}
131
132impl<T: Clone + Send + 'static> Container for Vec<T> {
133    type Ref<'a> = &'a T;
134    type Borrowed<'a> = &'a [T];
135    fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { &self[..] }
136    fn reborrow<'b, 'a: 'b>(item: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a { item }
137    fn reborrow_ref<'b, 'a: 'b>(item: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { item }
138    fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
139        self.extend_from_slice(&other[range])
140    }
141    fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
142        self.reserve(selves.map(|x| x.len()).sum::<usize>())
143    }
144}
145
146/// A container that can also be viewed as and reconstituted from bytes.
147pub trait ContainerBytes : for<'a> Container<Borrowed<'a> : AsBytes<'a> + FromBytes<'a>> { }
148impl<C: for<'a> Container<Borrowed<'a> : AsBytes<'a> + FromBytes<'a>>> ContainerBytes for C { }
149
150pub use common::{Clear, Len, Push, IndexMut, Index, IndexAs, HeapSize, Slice, AsBytes, FromBytes};
151/// Common traits and types that are re-used throughout the module.
152pub mod common {
153
154    /// A type with a length.
155    pub trait Len {
156        /// The number of contained elements.
157        fn len(&self) -> usize;
158        /// Whether this contains no elements.
159        fn is_empty(&self) -> bool {
160            self.len() == 0
161        }
162    }
163    impl<L: Len + ?Sized> Len for &L {
164        #[inline(always)] fn len(&self) -> usize { L::len(*self) }
165    }
166    impl<L: Len + ?Sized> Len for &mut L {
167        #[inline(always)] fn len(&self) -> usize { L::len(*self) }
168    }
169    impl<T> Len for Vec<T> {
170        #[inline(always)] fn len(&self) -> usize { self.len() }
171    }
172    impl<T> Len for [T] {
173        #[inline(always)] fn len(&self) -> usize { <[T]>::len(self) }
174    }
175
176    /// A type that can accept items of type `T`.
177    pub trait Push<T> {
178        /// Pushes an item onto `self`.
179        fn push(&mut self, item: T);
180        /// Pushes elements of an iterator onto `self`.
181        #[inline(always)] fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
182            for item in iter {
183                self.push(item);
184            }
185        }
186    }
187    impl<T> Push<T> for Vec<T> {
188        #[inline(always)] fn push(&mut self, item: T) { self.push(item) }
189
190        #[inline(always)]
191        fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
192            std::iter::Extend::extend(self, iter)
193        }
194    }
195    impl<'a, T: Clone> Push<&'a T> for Vec<T> {
196        #[inline(always)] fn push(&mut self, item: &'a T) { self.push(item.clone()) }
197
198        #[inline(always)]
199        fn extend(&mut self, iter: impl IntoIterator<Item=&'a T>) {
200            std::iter::Extend::extend(self, iter.into_iter().cloned())
201        }
202    }
203    impl<'a, T: Clone> Push<&'a [T]> for Vec<T> {
204        #[inline(always)] fn push(&mut self, item: &'a [T]) { self.clone_from_slice(item) }
205    }
206
207
208    pub use index::{Index, IndexMut, IndexAs};
209    /// Traits for accessing elements by `usize` indexes.
210    ///
211    /// There are several traits, with a core distinction being whether the returned reference depends on the lifetime of `&self`.
212    /// For one trait `Index` the result does not depend on this lifetime.
213    /// There is a third trait `IndexMut` that allows mutable access, that may be less commonly implemented.
214    pub mod index {
215
216        use crate::Len;
217        use crate::common::IterOwn;
218
219        /// A type that can be mutably accessed by `usize`.
220        pub trait IndexMut {
221            /// Type mutably referencing an indexed element.
222            type IndexMut<'a> where Self: 'a;
223            fn get_mut(& mut self, index: usize) -> Self::IndexMut<'_>;
224            /// A reference to the last element, should one exist.
225            #[inline(always)] fn last_mut(&mut self) -> Option<Self::IndexMut<'_>> where Self: Len {
226                if self.is_empty() { None }
227                else { Some(self.get_mut(self.len()-1)) }
228            }
229        }
230
231        impl<T: IndexMut + ?Sized> IndexMut for &mut T {
232            type IndexMut<'a> = T::IndexMut<'a> where Self: 'a;
233            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
234                T::get_mut(*self, index)
235            }
236        }
237        impl<T> IndexMut for Vec<T> {
238            type IndexMut<'a> = &'a mut T where Self: 'a;
239            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
240        }
241        impl<T> IndexMut for [T] {
242            type IndexMut<'a> = &'a mut T where Self: 'a;
243            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
244        }
245
246        /// A type that can be accessed by `usize` but without borrowing `self`.
247        ///
248        /// This can be useful for types which include their own lifetimes, and
249        /// which wish to express that their reference has the same lifetime.
250        /// In the GAT `Index`, the `Ref<'_>` lifetime would be tied to `&self`.
251        ///
252        /// This trait may be challenging to implement for owning containers,
253        /// for example `Vec<_>`, which would need their `Ref` type to depend
254        /// on the lifetime of the `&self` borrow in the `get()` function.
255        pub trait Index {
256            /// The type returned by the `get` method.
257            ///
258            /// Notably, this does not vary with lifetime, and will not depend on the lifetime of `&self`.
259            type Ref;
260            fn get(&self, index: usize) -> Self::Ref;
261            #[inline(always)] fn last(&self) -> Option<Self::Ref> where Self: Len {
262                if self.is_empty() { None }
263                else { Some(self.get(self.len()-1)) }
264            }
265            /// Converts `&self` into an iterator.
266            ///
267            /// This has an awkward name to avoid collision with `iter()`, which may also be implemented.
268            #[inline(always)]
269            fn index_iter(&self) -> IterOwn<&Self> {
270                IterOwn {
271                    index: 0,
272                    slice: self,
273                }
274            }
275            /// Converts `self` into an iterator.
276            ///
277            /// This has an awkward name to avoid collision with `into_iter()`, which may also be implemented.
278            #[inline(always)]
279            fn into_index_iter(self) -> IterOwn<Self> where Self: Sized {
280                IterOwn {
281                    index: 0,
282                    slice: self,
283                }
284            }
285        }
286
287        // These implementations aim to reveal a longer lifetime, or to copy results to avoid a lifetime.
288        impl<'a, T> Index for &'a [T] {
289            type Ref = &'a T;
290            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
291        }
292        impl<T: Copy> Index for [T] {
293            type Ref = T;
294            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
295        }
296        impl<'a, T> Index for &'a Vec<T> {
297            type Ref = &'a T;
298            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
299        }
300        impl<T: Copy> Index for Vec<T> {
301            type Ref = T;
302            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
303        }
304
305
306        /// Types that can be converted into another type by copying.
307        ///
308        /// We use this trait to unify the ability of `T` and `&T` to be converted into `T`.
309        /// This is handy for copy types that we'd like to use, like `u8`, `u64` and `usize`.
310        pub trait CopyAs<T> : Copy {
311            fn copy_as(self) -> T;
312        }
313        impl<T: Copy> CopyAs<T> for &T {
314            #[inline(always)] fn copy_as(self) -> T { *self }
315        }
316        impl<T: Copy> CopyAs<T> for T {
317            #[inline(always)] fn copy_as(self) -> T { self }
318        }
319
320        pub trait IndexAs<T> {
321            fn index_as(&self, index: usize) -> T;
322            #[inline(always)] fn last(&self) -> Option<T> where Self: Len {
323                if self.is_empty() { None }
324                else { Some(self.index_as(self.len()-1)) }
325            }
326        }
327
328        impl<T: Index, S> IndexAs<S> for T where T::Ref: CopyAs<S> {
329            #[inline(always)] fn index_as(&self, index: usize) -> S { self.get(index).copy_as() }
330        }
331
332    }
333
334    use crate::Container;
335    use crate::common::index::CopyAs;
336    /// A composite trait which captures the ability `Push<&T>` and `Index<Ref = T>`.
337    ///
338    /// Implement `CopyAs<T>` for the reference type, and `Push<&'a T>` for the container.
339    pub trait PushIndexAs<T> : for<'a> Container<Ref<'a>: CopyAs<T>> + for<'a> Push<&'a T> { }
340    impl<T, C: for<'a> Container<Ref<'a>: CopyAs<T>> + for<'a> Push<&'a T>> PushIndexAs<T> for C { }
341
342    /// A type that can remove its contents and return to an empty state.
343    ///
344    /// Generally, this method does not release resources, and is used to make the container available for re-insertion.
345    pub trait Clear {
346        /// Clears `self`, without changing its capacity.
347        fn clear(&mut self);
348    }
349    // Vectors can be cleared.
350    impl<T> Clear for Vec<T> {
351        #[inline(always)] fn clear(&mut self) { self.clear() }
352    }
353    // Slice references can be cleared.
354    impl<T> Clear for &[T] {
355        #[inline(always)] fn clear(&mut self) { *self = &[]; }
356    }
357
358    pub trait HeapSize {
359        /// Active (len) and allocated (cap) heap sizes in bytes.
360        /// This should not include the size of `self` itself.
361        fn heap_size(&self) -> (usize, usize) { (0, 0) }
362    }
363
364    impl HeapSize for String {
365        fn heap_size(&self) -> (usize, usize) {
366            (self.len(), self.capacity())
367        }
368    }
369    impl<T: HeapSize> HeapSize for [T] {
370        fn heap_size(&self) -> (usize, usize) {
371            let mut l = std::mem::size_of_val(self);
372            let mut c = std::mem::size_of_val(self);
373            for item in self.iter() {
374                let (il, ic) = item.heap_size();
375                l += il;
376                c += ic;
377            }
378            (l, c)
379        }
380    }
381    impl<T: HeapSize> HeapSize for Vec<T> {
382        fn heap_size(&self) -> (usize, usize) {
383            let mut l = std::mem::size_of::<T>() * self.len();
384            let mut c = std::mem::size_of::<T>() * self.capacity();
385            for item in (self[..]).iter() {
386                let (il, ic) = item.heap_size();
387                l += il;
388                c += ic;
389            }
390            (l, c)
391        }
392    }
393
394    /// A struct representing a slice of a range of values.
395    ///
396    /// The lower and upper bounds should be meaningfully set on construction.
397    #[derive(Copy, Clone, Debug)]
398    pub struct Slice<S> {
399        pub lower: usize,
400        pub upper: usize,
401        pub slice: S,
402    }
403
404    impl<S> std::hash::Hash for Slice<S> where Self: Index<Ref: std::hash::Hash> {
405        fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
406            self.len().hash(state);
407            for i in 0 .. self.len() {
408                self.get(i).hash(state);
409            }
410        }
411    }
412
413    impl<S> Slice<S> {
414        pub fn slice<R: std::ops::RangeBounds<usize>>(self, range: R) -> Self {
415            use std::ops::Bound;
416            let lower = match range.start_bound() {
417                Bound::Included(s) => std::cmp::max(self.lower, *s),
418                Bound::Excluded(s) => std::cmp::max(self.lower, *s+1),
419                Bound::Unbounded => self.lower,
420            };
421            let upper = match range.end_bound() {
422                Bound::Included(s) => std::cmp::min(self.upper, *s+1),
423                Bound::Excluded(s) => std::cmp::min(self.upper, *s),
424                Bound::Unbounded => self.upper,
425            };
426            assert!(lower <= upper);
427            Self { lower, upper, slice: self.slice }
428        }
429        pub fn new(lower: u64, upper: u64, slice: S) -> Self {
430            let lower: usize = lower.try_into().expect("slice bounds must fit in `usize`");
431            let upper: usize = upper.try_into().expect("slice bounds must fit in `usize`");
432            Self { lower, upper, slice }
433        }
434        pub fn len(&self) -> usize { self.upper - self.lower }
435        /// Map the slice to another type.
436        pub(crate) fn map<T>(self, f: impl Fn(S) -> T) -> Slice<T> {
437            Slice {
438                lower: self.lower,
439                upper: self.upper,
440                slice: f(self.slice),
441            }
442        }
443    }
444
445    impl<S: Index> PartialEq for Slice<S> where S::Ref: PartialEq {
446        fn eq(&self, other: &Self) -> bool {
447            if self.len() != other.len() { return false; }
448            for i in 0 .. self.len() {
449                if self.get(i) != other.get(i) { return false; }
450            }
451            true
452        }
453    }
454    impl<S: Index> PartialEq<[S::Ref]> for Slice<S> where S::Ref: PartialEq {
455        fn eq(&self, other: &[S::Ref]) -> bool {
456            if self.len() != other.len() { return false; }
457            for i in 0 .. self.len() {
458                if self.get(i) != other[i] { return false; }
459            }
460            true
461        }
462    }
463    impl<S: Index> PartialEq<Vec<S::Ref>> for Slice<S> where S::Ref: PartialEq {
464        fn eq(&self, other: &Vec<S::Ref>) -> bool {
465            if self.len() != other.len() { return false; }
466            for i in 0 .. self.len() {
467                if self.get(i) != other[i] { return false; }
468            }
469            true
470        }
471    }
472
473    impl<S: Index> Eq for Slice<S> where S::Ref: Eq { }
474
475    impl<S: Index> PartialOrd for Slice<S> where S::Ref: PartialOrd {
476        fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
477            use std::cmp::Ordering;
478            let len = std::cmp::min(self.len(), other.len());
479
480            for i in 0 .. len {
481                match self.get(i).partial_cmp(&other.get(i)) {
482                    Some(Ordering::Equal) => (),
483                    not_equal => return not_equal,
484                }
485            }
486
487            self.len().partial_cmp(&other.len())
488        }
489    }
490
491    impl<S: Index> Ord for Slice<S> where S::Ref: Ord + Eq {
492        fn cmp(&self, other: &Self) -> std::cmp::Ordering {
493            use std::cmp::Ordering;
494            let len = std::cmp::min(self.len(), other.len());
495
496            for i in 0 .. len {
497                match self.get(i).cmp(&other.get(i)) {
498                    Ordering::Equal => (),
499                    not_equal => return not_equal,
500                }
501            }
502
503            self.len().cmp(&other.len())
504        }
505    }
506
507    impl<S> Len for Slice<S> {
508        #[inline(always)] fn len(&self) -> usize { self.len() }
509    }
510
511    impl<S: Index> Index for Slice<S> {
512        type Ref = S::Ref;
513        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
514            assert!(index < self.upper - self.lower);
515            self.slice.get(self.lower + index)
516        }
517    }
518    impl<'a, S> Index for &'a Slice<S>
519    where
520        &'a S : Index,
521    {
522        type Ref = <&'a S as Index>::Ref;
523        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
524            assert!(index < self.upper - self.lower);
525            (&self.slice).get(self.lower + index)
526        }
527    }
528
529    impl<S: IndexMut> IndexMut for Slice<S> {
530        type IndexMut<'a> = S::IndexMut<'a> where S: 'a;
531        #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
532            assert!(index < self.upper - self.lower);
533            self.slice.get_mut(self.lower + index)
534        }
535    }
536
537    impl<S: Index + Len> Slice<S> {
538        /// Converts the slice into an iterator.
539        ///
540        /// This method exists rather than an `IntoIterator` implementation to avoid
541        /// a conflicting implementation for pushing an `I: IntoIterator` into `Vecs`.
542        pub fn into_iter(self) -> IterOwn<Slice<S>> {
543            self.into_index_iter()
544        }
545    }
546
547    impl<'a, T> Slice<&'a [T]> {
548        pub fn as_slice(&self) -> &'a [T] {
549            &self.slice[self.lower .. self.upper]
550        }
551    }
552
553    pub struct IterOwn<S> {
554        index: usize,
555        slice: S,
556    }
557
558    impl<S> IterOwn<S> {
559        pub fn new(index: usize, slice: S) -> Self {
560            Self { index, slice }
561        }
562    }
563
564    impl<S: Index + Len> Iterator for IterOwn<S> {
565        type Item = S::Ref;
566        #[inline(always)] fn next(&mut self) -> Option<Self::Item> {
567            if self.index < self.slice.len() {
568                let result = self.slice.get(self.index);
569                self.index += 1;
570                Some(result)
571            } else {
572                None
573            }
574        }
575        #[inline(always)]
576        fn size_hint(&self) -> (usize, Option<usize>) {
577            (self.slice.len() - self.index, Some(self.slice.len() - self.index))
578        }
579    }
580
581    impl<S: Index + Len> ExactSizeIterator for IterOwn<S> { }
582
583    /// A type that can be viewed as byte slices with lifetime `'a`.
584    ///
585    /// Implementors of this trait almost certainly reference the lifetime `'a` themselves.
586    pub trait AsBytes<'a> {
587        /// Presents `self` as a sequence of byte slices, with their required alignment.
588        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])>;
589    }
590
591    /// A type that can be reconstituted from byte slices with lifetime `'a`.
592    ///
593    /// Implementors of this trait almost certainly reference the lifetime `'a` themselves,
594    /// unless they actively deserialize the bytes (vs sit on the slices, as if zero-copy).
595    pub trait FromBytes<'a> {
596        /// Reconstructs `self` from a sequence of correctly aligned and sized bytes slices.
597        ///
598        /// The implementation is expected to consume the right number of items from the iterator,
599        /// which may go on to be used by other implementations of `FromBytes`.
600        ///
601        /// The implementation should aim for only doing trivial work, as it backs calls like
602        /// `borrow` for serialized containers.
603        ///
604        /// Implementations should almost always be marked as `#[inline(always)]` to ensure that
605        /// they are inlined. A single non-inlined function on a tree of `from_bytes` calls
606        /// can cause the performance to drop significantly.
607        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self;
608    }
609
610}
611
612/// Logic related to the transformation to and from bytes.
613///
614/// The methods here line up with the `AsBytes` and `FromBytes` traits.
615pub mod bytes {
616
617    use crate::AsBytes;
618
619    /// A coupled encode/decode pair for byte sequences.
620    pub trait EncodeDecode {
621        /// Encoded length in number of `u64` words required.
622        fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a>;
623        /// Encoded length in number of `u8` bytes required.
624        ///
625        /// This method should always be eight times `Self::length_in_words`, and is provided for convenience and clarity.
626        fn length_in_bytes<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> { 8 * Self::length_in_words(bytes) }
627        /// Encodes `bytes` into a sequence of `u64`.
628        fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a>;
629        /// Writes `bytes` in the encoded format to an arbitrary writer.
630        fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a>;
631        /// Decodes bytes from a sequence of `u64`.
632        fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]>;
633    }
634
635    /// A sequential byte layout for `AsBytes` and `FromBytes` implementors.
636    ///
637    /// The layout is aligned like a sequence of `u64`, where we repeatedly announce a length,
638    /// and then follow it by that many bytes. We may need to follow this with padding bytes.
639    pub use serialization::Sequence;
640    mod serialization {
641
642        use crate::AsBytes;
643
644        /// Encodes and decodes bytes sequences, by prepending the length and appending the all sequences.
645        pub struct Sequence;
646        impl super::EncodeDecode for Sequence {
647            fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
648                // Each byte slice has one `u64` for the length, and then as many `u64`s as needed to hold all bytes.
649                bytes.as_bytes().map(|(_align, bytes)| 1 + bytes.len().div_ceil(8)).sum()
650            }
651            fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
652                encode(store, bytes.as_bytes())
653            }
654            fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
655                write(writer, bytes.as_bytes())
656            }
657            fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
658                decode(store)
659            }
660        }
661
662        /// Encodes a sequence of byte slices as their length followed by their bytes, aligned to 8 bytes.
663        ///
664        /// Each length will be exactly 8 bytes, and the bytes that follow are padded out to a multiple of 8 bytes.
665        /// When reading the data, the length is in bytes, and one should consume those bytes and advance over padding.
666        pub fn encode<'a>(store: &mut Vec<u64>, bytes: impl Iterator<Item=(u64, &'a [u8])>) {
667            for (align, bytes) in bytes {
668                assert!(align <= 8);
669                store.push(bytes.len() as u64);
670                let whole_words = 8 * (bytes.len() / 8);
671                // We want to extend `store` by `bytes`, but `bytes` may not be `u64` aligned.
672                // In the latter case, init `store` and cast and copy onto it as a byte slice.
673                if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
674                    store.extend_from_slice(words);
675                }
676                else {
677                    let store_len = store.len();
678                    store.resize(store_len + whole_words/8, 0);
679                    let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
680                    slice.copy_from_slice(&bytes[.. whole_words]);
681                }
682                let remaining_bytes = &bytes[whole_words..];
683                if !remaining_bytes.is_empty() {
684                    let mut remainder = 0u64;
685                    let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
686                    for (i, byte) in remaining_bytes.iter().enumerate() {
687                        transmute[i] = *byte;
688                    }
689                    store.push(remainder);
690                }
691            }
692        }
693
694        /// Writes a sequence of byte slices as their length followed by their bytes, padded to 8 bytes.
695        ///
696        /// Each length will be exactly 8 bytes, and the bytes that follow are padded out to a multiple of 8 bytes.
697        /// When reading the data, the length is in bytes, and one should consume those bytes and advance over padding.
698        pub fn write<'a>(mut writer: impl std::io::Write, bytes: impl Iterator<Item=(u64, &'a [u8])>) -> std::io::Result<()> {
699            // Columnar data is serialized as a sequence of `u64` values, with each `[u8]` slice
700            // serialize as first its length in bytes, and then as many `u64` values as needed.
701            // Padding should be added, but only for alignment; no specific values are required.
702            for (align, bytes) in bytes {
703                assert!(align <= 8);
704                let length = u64::try_from(bytes.len()).unwrap();
705                writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&length)))?;
706                writer.write_all(bytes)?;
707                let padding = usize::try_from((8 - (length % 8)) % 8).unwrap();
708                writer.write_all(&[0; 8][..padding])?;
709            }
710            Ok(())
711        }
712
713        /// Decodes a sequence of byte slices from their length followed by their bytes.
714        ///
715        /// This decoder matches the `encode` function above.
716        /// In particular, it anticipates padding bytes when the length is not a multiple of eight.
717        pub fn decode(store: &[u64]) -> Decoder<'_> {
718            Decoder { store }
719        }
720
721        /// An iterator over byte slices, decoding from a sequence of lengths followed by bytes.
722        pub struct Decoder<'a> {
723            store: &'a [u64],
724        }
725
726        impl<'a> Iterator for Decoder<'a> {
727            type Item = &'a [u8];
728            fn next(&mut self) -> Option<Self::Item> {
729                if let Some(length) = self.store.first() {
730                    let length = *length as usize;
731                    self.store = &self.store[1..];
732                    let whole_words = if length % 8 == 0 { length / 8 } else { length / 8 + 1 };
733                    let bytes: &[u8] = bytemuck::try_cast_slice(&self.store[..whole_words]).expect("&[u64] should convert to &[u8]");
734                    self.store = &self.store[whole_words..];
735                    Some(&bytes[..length])
736                } else {
737                    None
738                }
739            }
740        }
741    }
742
743    /// A binary encoding of sequences of byte slices.
744    ///
745    /// The encoding starts with a sequence of n+1 offsets describing where to find the n slices in the bytes that follow.
746    /// Treating the offsets as a byte slice too, the each offset indicates the location (in bytes) of the end of its slice.
747    /// Each byte slice can be found from a pair of adjacent offsets, where the first is rounded up to a multiple of eight.
748    pub use serialization_neu::Indexed;
749    pub mod serialization_neu {
750
751        use crate::AsBytes;
752
753        /// Encodes and decodes bytes sequences, using an index of offsets.
754        pub struct Indexed;
755        impl super::EncodeDecode for Indexed {
756            fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
757                1 + bytes.as_bytes().map(|(_align, bytes)| 1 + bytes.len().div_ceil(8)).sum::<usize>()
758            }
759            fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
760                encode(store, bytes)
761            }
762            fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
763                write(writer, bytes)
764            }
765            fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
766                decode(store)
767            }
768        }
769
770        /// Encodes `item` into `u64` aligned words.
771        ///
772        /// The sequence of byte slices are appended, with padding to have each slice start `u64` aligned.
773        /// The sequence is then pre-pended with as many byte offsets as there are slices in `item`, plus one.
774        /// The byte offsets indicate where each slice ends, and by rounding up to `u64` alignemnt where the next slice begins.
775        /// The first offset indicates where the list of offsets itself ends, and where the first slice begins.
776        ///
777        /// We will need to visit `as_bytes` three times to extract this information, so the method should be efficient and inlined.
778        /// The first read writes the first offset, the second writes each other offset, and the third writes the bytes themselves.
779        ///
780        /// The offsets are zero-based, rather than based on `store.len()`.
781        /// If you call the method with a non-empty `store` be careful decoding.
782        pub fn encode<'a, A>(store: &mut Vec<u64>, iter: &A)
783        where A : AsBytes<'a>,
784        {
785            // Read 1: Number of offsets we will record, equal to the number of slices plus one.
786            // TODO: right-size `store` before first call to `push`.
787            let offsets = 1 + iter.as_bytes().count();
788            let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
789            store.push(offsets_end);
790            // Read 2: Establish each of the offsets based on lengths of byte slices.
791            let mut position_bytes = offsets_end;
792            for (align, bytes) in iter.as_bytes() {
793                assert!(align <= 8);
794                // Write length in bytes, but round up to words before updating `position_bytes`.
795                let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
796                store.push(to_push);
797                let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
798                position_bytes += round_len;
799            }
800            // Read 3: Append each byte slice, with padding to align starts to `u64`.
801            for (_align, bytes) in iter.as_bytes() {
802                let whole_words = 8 * (bytes.len() / 8);
803                // We want to extend `store` by `bytes`, but `bytes` may not be `u64` aligned.
804                // In the latter case, init `store` and cast and copy onto it as a byte slice.
805                if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
806                    store.extend_from_slice(words);
807                }
808                else {
809                    let store_len = store.len();
810                    store.resize(store_len + whole_words/8, 0);
811                    let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
812                    slice.copy_from_slice(&bytes[.. whole_words]);
813                }
814                let remaining_bytes = &bytes[whole_words..];
815                if !remaining_bytes.is_empty() {
816                    let mut remainder = 0u64;
817                    let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
818                    for (i, byte) in remaining_bytes.iter().enumerate() {
819                        transmute[i] = *byte;
820                    }
821                    store.push(remainder);
822                }
823            }
824        }
825
826        pub fn write<'a, A, W>(mut writer: W, iter: &A) -> std::io::Result<()>
827        where
828            A: AsBytes<'a>,
829            W: std::io::Write,
830        {
831            // Read 1: Number of offsets we will record, equal to the number of slices plus one.
832            let offsets = 1 + iter.as_bytes().count();
833            let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
834            writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&offsets_end)))?;
835            // Read 2: Establish each of the offsets based on lengths of byte slices.
836            let mut position_bytes = offsets_end;
837            for (align, bytes) in iter.as_bytes() {
838                assert!(align <= 8);
839                // Write length in bytes, but round up to words before updating `position_bytes`.
840                let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
841                writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&to_push)))?;
842                let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
843                position_bytes += round_len;
844            }
845            // Read 3: Append each byte slice, with padding to align starts to `u64`.
846            for (_align, bytes) in iter.as_bytes() {
847                writer.write_all(bytes)?;
848                let padding = ((bytes.len() + 7) & !7) - bytes.len();
849                if padding > 0 {
850                    writer.write_all(&[0u8;8][..padding])?;
851                }
852            }
853
854            Ok(())
855        }
856
857        /// Decodes an encoded sequence of byte slices. Each result will be `u64` aligned.
858        pub fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
859            assert!(store[0] % 8 == 0);
860            let slices = (store[0] / 8) - 1;
861            (0 .. slices).map(|i| decode_index(store, i))
862        }
863
864        /// Decodes a specific byte slice by index. It will be `u64` aligned.
865        #[inline(always)]
866        pub fn decode_index(store: &[u64], index: u64) -> &[u8] {
867            debug_assert!(index + 1 < store[0]/8);
868            let index: usize = index.try_into().unwrap();
869            let lower: usize = ((store[index] + 7) & !7).try_into().unwrap();
870            let upper: usize = (store[index + 1]).try_into().unwrap();
871            let bytes: &[u8] = bytemuck::try_cast_slice(store).expect("&[u64] should convert to &[u8]");
872            &bytes[lower .. upper]
873        }
874
875        #[cfg(test)]
876        mod test {
877
878            use crate::{Container, ContainerOf};
879            use crate::common::Push;
880            use crate::AsBytes;
881
882            use super::{encode, decode};
883
884            fn assert_roundtrip<'a, AB: AsBytes<'a>>(item: &AB) {
885                let mut store = Vec::new();
886                encode(&mut store, item);
887                assert!(item.as_bytes().map(|x| x.1).eq(decode(&store)));
888            }
889
890            #[test]
891            fn round_trip() {
892
893                let mut column: ContainerOf<Result<u64, String>> = Default::default();
894                for i in 0..10000u64 {
895                    column.push(&Ok::<u64, String>(i));
896                    column.push(&Err::<u64, String>(format!("{:?}", i)));
897                }
898
899                assert_roundtrip(&column.borrow());
900            }
901        }
902    }
903
904    #[cfg(test)]
905    mod test {
906        use crate::ContainerOf;
907
908        #[test]
909        fn round_trip() {
910
911            use crate::common::{Push, HeapSize, Len, Index};
912            use crate::{Container, AsBytes, FromBytes};
913
914            let mut column: ContainerOf<Result<u64, u64>> = Default::default();
915            for i in 0..100u64 {
916                column.push(Ok::<u64, u64>(i));
917                column.push(Err::<u64, u64>(i));
918            }
919
920            assert_eq!(column.len(), 200);
921            assert_eq!(column.heap_size(), (1624, 2080));
922
923            for i in 0..100 {
924                assert_eq!(column.get(2*i+0), Ok(i as u64));
925                assert_eq!(column.get(2*i+1), Err(i as u64));
926            }
927
928            let column2 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column.borrow().as_bytes().map(|(_, bytes)| bytes));
929            for i in 0..100 {
930                assert_eq!(column.get(2*i+0), column2.get(2*i+0).copied().map_err(|e| *e));
931                assert_eq!(column.get(2*i+1), column2.get(2*i+1).copied().map_err(|e| *e));
932            }
933
934            let column3 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column2.as_bytes().map(|(_, bytes)| bytes));
935            for i in 0..100 {
936                assert_eq!(column3.get(2*i+0), column2.get(2*i+0));
937                assert_eq!(column3.get(2*i+1), column2.get(2*i+1));
938            }
939        }
940    }
941}
942
943/// Types that prefer to be represented by `Vec<T>`.
944pub mod primitive {
945    use std::num::Wrapping;
946
947    /// An implementation of opinions for types that want to use `Vec<T>`.
948    macro_rules! implement_columnable {
949        ($($index_type:ty),*) => { $(
950            impl crate::Columnar for $index_type {
951                #[inline(always)]
952                fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { *other }
953
954                type Container = Vec<$index_type>;
955            }
956
957            impl crate::HeapSize for $index_type { }
958
959            impl<'a> crate::AsBytes<'a> for &'a [$index_type] {
960                #[inline(always)]
961                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
962                    std::iter::once((std::mem::align_of::<$index_type>() as u64, bytemuck::cast_slice(&self[..])))
963                }
964            }
965            impl<'a> crate::FromBytes<'a> for &'a [$index_type] {
966                #[inline(always)]
967                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
968                    // We use `unwrap()` here in order to panic with the `bytemuck` error, which may be informative.
969                    bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()
970                }
971            }
972            impl<'a, const N: usize> crate::AsBytes<'a> for &'a [[$index_type; N]] {
973                #[inline(always)]
974                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
975                    std::iter::once((std::mem::align_of::<$index_type>() as u64, bytemuck::cast_slice(&self[..])))
976                }
977            }
978            impl<'a, const N: usize> crate::FromBytes<'a> for &'a [[$index_type; N]] {
979                #[inline(always)]
980                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
981                    // We use `unwrap()` here in order to panic with the `bytemuck` error, which may be informative.
982                    bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()
983                }
984            }
985        )* }
986    }
987
988    implement_columnable!(u8, u16, u32, u64);
989    implement_columnable!(i8, i16, i32, i64);
990    implement_columnable!(f32, f64);
991    implement_columnable!(Wrapping<u8>, Wrapping<u16>, Wrapping<u32>, Wrapping<u64>);
992    implement_columnable!(Wrapping<i8>, Wrapping<i16>, Wrapping<i32>, Wrapping<i64>);
993
994    pub use sizes::{Usizes, Isizes};
995    /// Columnar stores for `usize` and `isize`, stored as 64 bits.
996    mod sizes {
997
998        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize};
999        use crate::common::PushIndexAs;
1000
1001        #[derive(Copy, Clone, Default)]
1002        pub struct Usizes<CV = Vec<u64>> { pub values: CV }
1003
1004        impl Columnar for usize {
1005            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1006            type Container = Usizes;
1007        }
1008
1009        impl<CV: PushIndexAs<u64>> Container for Usizes<CV> {
1010            type Ref<'a> = usize;
1011            type Borrowed<'a> = Usizes<CV::Borrowed<'a>> where CV: 'a;
1012            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1013                Usizes { values: self.values.borrow() }
1014            }
1015            #[inline(always)]
1016            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1017                Usizes { values: CV::reborrow(thing.values) }
1018            }
1019            #[inline(always)]
1020            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1021
1022            #[inline(always)]
1023            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1024                self.values.extend_from_self(other.values, range)
1025            }
1026
1027            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1028                self.values.reserve_for(selves.map(|x| x.values))
1029            }
1030        }
1031
1032        impl<CV: Len> Len for Usizes<CV> { fn len(&self) -> usize { self.values.len() }}
1033        impl IndexMut for Usizes {
1034            type IndexMut<'a> = &'a mut u64;
1035            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
1036        }
1037        impl<CV: IndexAs<u64>> Index for Usizes<CV> {
1038            type Ref = usize;
1039            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
1040        }
1041        impl<CV: IndexAs<u64>> Index for &Usizes<CV> {
1042            type Ref = usize;
1043            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
1044        }
1045        impl<CV: for<'a> Push<&'a u64>> Push<usize> for Usizes<CV> {
1046            #[inline]
1047            fn push(&mut self, item: usize) { self.values.push(&item.try_into().expect("usize must fit in a u64")) }
1048        }
1049        impl Push<&usize> for Usizes {
1050            #[inline]
1051            fn push(&mut self, item: &usize) { self.values.push((*item).try_into().expect("usize must fit in a u64")) }
1052        }
1053        impl<CV: Clear> Clear for Usizes<CV> { fn clear(&mut self) { self.values.clear() }}
1054
1055        impl<CV: HeapSize> HeapSize for Usizes<CV> {
1056            fn heap_size(&self) -> (usize, usize) {
1057                self.values.heap_size()
1058            }
1059        }
1060
1061        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Usizes<CV> {
1062            #[inline(always)]
1063            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1064                self.values.as_bytes()
1065            }
1066        }
1067
1068        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Usizes<CV> {
1069            #[inline(always)]
1070            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1071                Self { values: CV::from_bytes(bytes) }
1072            }
1073        }
1074
1075
1076        #[derive(Copy, Clone, Default)]
1077        pub struct Isizes<CV = Vec<i64>> { pub values: CV }
1078
1079        impl Columnar for isize {
1080            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1081            type Container = Isizes;
1082        }
1083
1084        impl<CV: PushIndexAs<i64>> Container for Isizes<CV> {
1085            type Ref<'a> = isize;
1086            type Borrowed<'a> = Isizes<CV::Borrowed<'a>> where CV: 'a;
1087            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1088                Isizes { values: self.values.borrow() }
1089            }
1090            #[inline(always)]
1091            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1092                Isizes { values: CV::reborrow(thing.values) }
1093            }
1094            #[inline(always)]
1095            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1096
1097            #[inline(always)]
1098            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1099                self.values.extend_from_self(other.values, range)
1100            }
1101
1102            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1103                self.values.reserve_for(selves.map(|x| x.values))
1104            }
1105        }
1106
1107        impl<CV: Len> Len for Isizes<CV> { fn len(&self) -> usize { self.values.len() }}
1108        impl IndexMut for Isizes {
1109            type IndexMut<'a> = &'a mut i64;
1110            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
1111        }
1112        impl<CV: IndexAs<i64>> Index for Isizes<CV> {
1113            type Ref = isize;
1114            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
1115        }
1116        impl<CV: IndexAs<i64>> Index for &Isizes<CV> {
1117            type Ref = isize;
1118            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
1119        }
1120        impl<CV: for<'a> Push<&'a i64>> Push<isize> for Isizes<CV> {
1121            #[inline]
1122            fn push(&mut self, item: isize) { self.values.push(&item.try_into().expect("isize must fit in a i64")) }
1123        }
1124        impl Push<&isize> for Isizes {
1125            #[inline]
1126            fn push(&mut self, item: &isize) { self.values.push((*item).try_into().expect("isize must fit in a i64")) }
1127        }
1128        impl<CV: Clear> Clear for Isizes<CV> { fn clear(&mut self) { self.values.clear() }}
1129
1130        impl<CV: HeapSize> HeapSize for Isizes<CV> {
1131            fn heap_size(&self) -> (usize, usize) {
1132                self.values.heap_size()
1133            }
1134        }
1135
1136        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Isizes<CV> {
1137            #[inline(always)]
1138            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1139                self.values.as_bytes()
1140            }
1141        }
1142
1143        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Isizes<CV> {
1144            #[inline(always)]
1145            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1146                Self { values: CV::from_bytes(bytes) }
1147            }
1148        }
1149    }
1150
1151    pub use chars::{Chars};
1152    /// Columnar store for `char`, stored as a `u32`.
1153    mod chars {
1154
1155        use crate::{Clear, Columnar, Container, Len, Index, IndexAs, Push, HeapSize};
1156        use crate::common::PushIndexAs;
1157
1158        type Encoded = u32;
1159
1160        #[derive(Copy, Clone, Default)]
1161        pub struct Chars<CV = Vec<Encoded>> { pub values: CV }
1162
1163        impl Columnar for char {
1164            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1165            type Container = Chars;
1166        }
1167
1168        impl<CV: PushIndexAs<Encoded>> Container for Chars<CV> {
1169            type Ref<'a> = char;
1170            type Borrowed<'a> = Chars<CV::Borrowed<'a>> where CV: 'a;
1171            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1172                Chars { values: self.values.borrow() }
1173            }
1174            #[inline(always)]
1175            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1176                Chars { values: CV::reborrow(thing.values) }
1177            }
1178            #[inline(always)]
1179            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1180
1181            #[inline(always)]
1182            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1183                self.values.extend_from_self(other.values, range)
1184            }
1185
1186            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1187                self.values.reserve_for(selves.map(|x| x.values))
1188            }
1189        }
1190
1191        impl<CV: Len> Len for Chars<CV> { fn len(&self) -> usize { self.values.len() }}
1192        impl<CV: IndexAs<Encoded>> Index for Chars<CV> {
1193            type Ref = char;
1194            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { char::from_u32(self.values.index_as(index)).unwrap() }
1195        }
1196        impl<CV: IndexAs<Encoded>> Index for &Chars<CV> {
1197            type Ref = char;
1198            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { char::from_u32(self.values.index_as(index)).unwrap() }
1199        }
1200        impl<CV: for<'a> Push<&'a Encoded>> Push<char> for Chars<CV> {
1201            #[inline]
1202            fn push(&mut self, item: char) { self.values.push(&u32::from(item)) }
1203        }
1204        impl Push<&char> for Chars {
1205            #[inline]
1206            fn push(&mut self, item: &char) { self.values.push(u32::from(*item)) }
1207        }
1208        impl<CV: Clear> Clear for Chars<CV> { fn clear(&mut self) { self.values.clear() }}
1209
1210        impl<CV: HeapSize> HeapSize for Chars<CV> {
1211            fn heap_size(&self) -> (usize, usize) {
1212                self.values.heap_size()
1213            }
1214        }
1215
1216        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for Chars<CV> {
1217            #[inline(always)]
1218            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1219                self.values.as_bytes()
1220            }
1221        }
1222
1223        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for Chars<CV> {
1224            #[inline(always)]
1225            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1226                Self { values: CV::from_bytes(bytes) }
1227            }
1228        }
1229    }
1230
1231    pub use larges::{U128s, I128s};
1232    /// Columnar stores for `u128` and `i128`, stored as [u8; 16] bits.
1233    mod larges {
1234
1235        use crate::{Clear, Columnar, Container, Len, Index, IndexAs, Push, HeapSize};
1236        use crate::common::PushIndexAs;
1237
1238        type Encoded = [u8; 16];
1239
1240        #[derive(Copy, Clone, Default)]
1241        pub struct U128s<CV = Vec<Encoded>> { pub values: CV }
1242
1243        impl Columnar for u128 {
1244            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1245            type Container = U128s;
1246        }
1247
1248        impl<CV: PushIndexAs<Encoded>> Container for U128s<CV> {
1249            type Ref<'a> = u128;
1250            type Borrowed<'a> = U128s<CV::Borrowed<'a>> where CV: 'a;
1251            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1252                U128s { values: self.values.borrow() }
1253            }
1254            #[inline(always)]
1255            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1256                U128s { values: CV::reborrow(thing.values) }
1257            }
1258            #[inline(always)]
1259            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1260
1261            #[inline(always)]
1262            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1263                self.values.extend_from_self(other.values, range)
1264            }
1265
1266            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1267                self.values.reserve_for(selves.map(|x| x.values))
1268            }
1269        }
1270
1271        impl<CV: Len> Len for U128s<CV> { fn len(&self) -> usize { self.values.len() }}
1272        impl<CV: IndexAs<Encoded>> Index for U128s<CV> {
1273            type Ref = u128;
1274            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { u128::from_le_bytes(self.values.index_as(index)) }
1275        }
1276        impl<CV: IndexAs<Encoded>> Index for &U128s<CV> {
1277            type Ref = u128;
1278            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { u128::from_le_bytes(self.values.index_as(index)) }
1279        }
1280        impl<CV: for<'a> Push<&'a Encoded>> Push<u128> for U128s<CV> {
1281            #[inline]
1282            fn push(&mut self, item: u128) { self.values.push(&item.to_le_bytes()) }
1283        }
1284        impl Push<&u128> for U128s {
1285            #[inline]
1286            fn push(&mut self, item: &u128) { self.values.push(item.to_le_bytes()) }
1287        }
1288        impl<CV: Clear> Clear for U128s<CV> { fn clear(&mut self) { self.values.clear() }}
1289
1290        impl<CV: HeapSize> HeapSize for U128s<CV> {
1291            fn heap_size(&self) -> (usize, usize) {
1292                self.values.heap_size()
1293            }
1294        }
1295
1296        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for U128s<CV> {
1297            #[inline(always)]
1298            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1299                self.values.as_bytes()
1300            }
1301        }
1302
1303        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for U128s<CV> {
1304            #[inline(always)]
1305            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1306                Self { values: CV::from_bytes(bytes) }
1307            }
1308        }
1309
1310        #[derive(Copy, Clone, Default)]
1311        pub struct I128s<CV = Vec<Encoded>> { pub values: CV }
1312
1313        impl Columnar for i128 {
1314            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1315            type Container = I128s;
1316        }
1317
1318        impl<CV: PushIndexAs<Encoded>> Container for I128s<CV> {
1319            type Ref<'a> = i128;
1320            type Borrowed<'a> = I128s<CV::Borrowed<'a>> where CV: 'a;
1321            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1322                I128s { values: self.values.borrow() }
1323            }
1324            #[inline(always)]
1325            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1326                I128s { values: CV::reborrow(thing.values) }
1327            }
1328            #[inline(always)]
1329            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1330
1331            #[inline(always)]
1332            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1333                self.values.extend_from_self(other.values, range)
1334            }
1335
1336            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1337                self.values.reserve_for(selves.map(|x| x.values))
1338            }
1339        }
1340
1341        impl<CV: Len> Len for I128s<CV> { fn len(&self) -> usize { self.values.len() }}
1342        impl<CV: IndexAs<Encoded>> Index for I128s<CV> {
1343            type Ref = i128;
1344            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { i128::from_le_bytes(self.values.index_as(index)) }
1345        }
1346        impl<CV: IndexAs<Encoded>> Index for &I128s<CV> {
1347            type Ref = i128;
1348            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { i128::from_le_bytes(self.values.index_as(index)) }
1349        }
1350        impl<CV: for<'a> Push<&'a Encoded>> Push<i128> for I128s<CV> {
1351            #[inline]
1352            fn push(&mut self, item: i128) { self.values.push(&item.to_le_bytes()) }
1353        }
1354        impl Push<&i128> for I128s {
1355            #[inline]
1356            fn push(&mut self, item: &i128) { self.values.push(item.to_le_bytes()) }
1357        }
1358        impl<CV: Clear> Clear for I128s<CV> { fn clear(&mut self) { self.values.clear() }}
1359
1360        impl<CV: HeapSize> HeapSize for I128s<CV> {
1361            fn heap_size(&self) -> (usize, usize) {
1362                self.values.heap_size()
1363            }
1364        }
1365
1366        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for I128s<CV> {
1367            #[inline(always)]
1368            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1369                self.values.as_bytes()
1370            }
1371        }
1372
1373        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for I128s<CV> {
1374            #[inline(always)]
1375            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1376                Self { values: CV::from_bytes(bytes) }
1377            }
1378        }
1379    }
1380
1381    /// Columnar stores for non-decreasing `u64`, stored in various ways.
1382    ///
1383    /// The venerable `Vec<u64>` works as a general container for arbitrary offests,
1384    /// but it can be non-optimal for various patterns of offset, including constant
1385    /// inter-offset spacing, and relatively short runs (compared to a `RankSelect`).
1386    pub mod offsets {
1387
1388        pub use array::Fixeds;
1389        pub use stride::Strides;
1390
1391        /// An offset container that encodes a constant spacing in its type.
1392        ///
1393        /// Any attempt to push any value will result in pushing the next value
1394        /// at the specified spacing. This type is only appropriate in certain
1395        /// contexts, for example when storing `[T; K]` array types, or having
1396        /// introspected a `Strides` and found it to be only one constant stride.
1397        mod array {
1398
1399            use crate::{Container, Index, Len, Push};
1400            use crate::common::index::CopyAs;
1401
1402            /// An offset container that encodes a constant `K` spacing.
1403            #[derive(Copy, Clone, Debug, Default)]
1404            pub struct Fixeds<const K: u64, CC = u64> { pub count: CC }
1405
1406            impl<const K: u64> Container for Fixeds<K> {
1407                type Ref<'a> = u64;
1408                type Borrowed<'a> = Fixeds<K, &'a u64>;
1409                #[inline(always)]
1410                fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Fixeds { count: &self.count } }
1411                #[inline(always)]
1412                fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1413                    Fixeds { count: thing.count }
1414                }
1415                #[inline(always)]
1416                fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1417
1418                #[inline(always)]
1419                fn extend_from_self(&mut self, _other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1420                    self.count += range.len() as u64;
1421                }
1422
1423                fn reserve_for<'a, I>(&mut self, _selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone { }
1424            }
1425
1426            impl<const K: u64, CC: CopyAs<u64>> Len for Fixeds<K, CC> {
1427                #[inline(always)] fn len(&self) -> usize { self.count.copy_as() as usize }
1428            }
1429
1430            impl<const K: u64, CC> Index for Fixeds<K, CC> {
1431                type Ref = u64;
1432                #[inline(always)]
1433                fn get(&self, index: usize) -> Self::Ref { (index as u64 + 1) * K }
1434            }
1435            impl<'a, const K: u64, CC> Index for &'a Fixeds<K, CC> {
1436                type Ref = u64;
1437                #[inline(always)]
1438                fn get(&self, index: usize) -> Self::Ref { (index as u64 + 1) * K }
1439            }
1440
1441            impl<'a, const K: u64, T> Push<T> for Fixeds<K> {
1442                // TODO: check for overflow?
1443                #[inline(always)]
1444                fn push(&mut self, _item: T) { self.count += 1; }
1445                #[inline(always)]
1446                fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
1447                    self.count += iter.into_iter().count() as u64;
1448                }
1449            }
1450
1451            impl<const K: u64> crate::HeapSize for Fixeds<K> {
1452                #[inline(always)]
1453                fn heap_size(&self) -> (usize, usize) { (0, 0) }
1454            }
1455            impl<const K: u64> crate::Clear for Fixeds<K> {
1456                #[inline(always)]
1457                fn clear(&mut self) { self.count = 0; }
1458            }
1459
1460            impl<'a, const K: u64> crate::AsBytes<'a> for Fixeds<K, &'a u64> {
1461                #[inline(always)]
1462                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1463                    std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.count))))
1464                }
1465            }
1466            impl<'a, const K: u64> crate::FromBytes<'a> for Fixeds<K, &'a u64> {
1467                #[inline(always)]
1468                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1469                    Self { count: &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0] }
1470                }
1471            }
1472
1473            use super::Strides;
1474            impl<const K: u64, BC: Len, CC: CopyAs<u64>> std::convert::TryFrom<Strides<BC, CC>> for Fixeds<K, CC> {
1475                // On error we return the original.
1476                type Error = Strides<BC, CC>;
1477                fn try_from(item: Strides<BC, CC>) -> Result<Self, Self::Error> {
1478                    if item.strided() == Some(K) { Ok( Self { count: item.length } ) } else { Err(item) }
1479                }
1480            }
1481        }
1482
1483        /// An general offset container optimized for fixed inter-offset sizes.
1484        ///
1485        /// Although it can handle general offsets, it starts with the optimistic
1486        /// assumption that the offsets will be evenly spaced from zero, and while
1487        /// that holds it will maintain the stride and length. Should it stop being
1488        /// true, when a non-confirming offset is pushed, it will start to store
1489        /// the offsets in a general container.
1490        mod stride {
1491
1492            use std::ops::Deref;
1493            use crate::{Container, Index, Len, Push, Clear, AsBytes, FromBytes};
1494            use crate::common::index::CopyAs;
1495
1496            /// The first two integers describe a stride pattern, [stride, length].
1497            ///
1498            /// If the length is zero the collection is empty. The first `item` pushed
1499            /// always becomes the first list element. The next element is the number of
1500            /// items at position `i` whose value is `item * (i+1)`. After this comes
1501            /// the remaining entries in the bounds container.
1502            #[derive(Copy, Clone, Debug, Default)]
1503            pub struct Strides<BC = Vec<u64>, CC = u64> {
1504                pub stride: CC,
1505                pub length: CC,
1506                pub bounds: BC,
1507            }
1508
1509            impl Container for Strides {
1510                type Ref<'a> = u64;
1511                type Borrowed<'a> = Strides<&'a [u64], &'a u64>;
1512
1513                #[inline(always)] fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Strides { stride: &self.stride, length: &self.length, bounds: &self.bounds[..] } }
1514                /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
1515                #[inline(always)] fn reborrow<'b, 'a: 'b>(item: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1516                    Strides { stride: item.stride, length: item.length, bounds: item.bounds }
1517                }
1518                /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
1519                 #[inline(always)]fn reborrow_ref<'b, 'a: 'b>(item: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { item }
1520
1521                fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1522                    self.bounds.reserve_for(selves.map(|x| x.bounds))
1523                }
1524            }
1525
1526            impl<'a> Push<&'a u64> for Strides { #[inline(always)] fn push(&mut self, item: &'a u64) { self.push(*item) } }
1527            impl Push<u64> for Strides { #[inline(always)] fn push(&mut self, item: u64) { self.push(item) } }
1528            impl Clear for Strides { #[inline(always)] fn clear(&mut self) { self.clear() } }
1529
1530            impl<BC: Len, CC: CopyAs<u64>> Len for Strides<BC, CC> {
1531                #[inline(always)]
1532                fn len(&self) -> usize { self.length.copy_as() as usize + self.bounds.len() }
1533            }
1534            impl Index for Strides<&[u64], &u64> {
1535                type Ref = u64;
1536                #[inline(always)]
1537                fn get(&self, index: usize) -> Self::Ref {
1538                    let index = index as u64;
1539                    if index < *self.length { (index+1) * self.stride } else { self.bounds[(index - self.length) as usize] }
1540                }
1541            }
1542
1543            impl<'a, BC: AsBytes<'a>> AsBytes<'a> for Strides<BC, &'a u64> {
1544                #[inline(always)]
1545                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1546                    let stride = std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.stride))));
1547                    let length = std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.length))));
1548                    let bounds = self.bounds.as_bytes();
1549                    crate::chain(stride, crate::chain(length, bounds))
1550                }
1551            }
1552            impl<'a, BC: FromBytes<'a>> FromBytes<'a> for Strides<BC, &'a u64> {
1553                #[inline(always)]
1554                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1555                    let stride = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1556                    let length = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1557                    let bounds = BC::from_bytes(bytes);
1558                    Self { stride, length, bounds }
1559                }
1560            }
1561
1562            impl Strides {
1563                pub fn new(stride: u64, length: u64) -> Self {
1564                    Self { stride, length, bounds: Vec::default() }
1565                }
1566                #[inline(always)]
1567                pub fn push(&mut self, item: u64) {
1568                    if self.length == 0 {
1569                        self.stride = item;
1570                        self.length = 1;
1571                    }
1572                    else if !self.bounds.is_empty() {
1573                        self.bounds.push(item);
1574                    }
1575                    else if item == self.stride * (self.length + 1) {
1576                        self.length += 1;
1577                    }
1578                    else {
1579                        self.bounds.push(item);
1580                    }
1581                }
1582                #[inline(always)]
1583                pub fn clear(&mut self) {
1584                    self.stride = 0;
1585                    self.length = 0;
1586                    self.bounds.clear();
1587                }
1588            }
1589
1590            impl<BC: Deref<Target=[u64]>, CC: CopyAs<u64>> Strides<BC, CC> {
1591                #[inline(always)]
1592                pub fn bounds(&self, index: usize) -> (usize, usize) {
1593                    let stride = self.stride.copy_as();
1594                    let length = self.length.copy_as();
1595                    let index = index as u64;
1596                    let lower = if index == 0 { 0 } else {
1597                        let index = index - 1;
1598                        if index < length { (index+1) * stride } else { self.bounds[(index - length) as usize] }
1599                    } as usize;
1600                    let upper = if index < length { (index+1) * stride } else { self.bounds[(index - length) as usize] } as usize;
1601                    (lower, upper)
1602                }
1603            }
1604            impl<BC: Len, CC: CopyAs<u64>> Strides<BC, CC> {
1605                #[inline(always)] pub fn strided(&self) -> Option<u64> {
1606                    if self.bounds.is_empty() {
1607                        Some(self.stride.copy_as())
1608                    }
1609                    else { None }
1610                }
1611            }
1612        }
1613
1614        #[cfg(test)]
1615        mod test {
1616            #[test]
1617            fn round_trip() {
1618
1619                use crate::common::{Index, Push, Len};
1620                use crate::{Container, Vecs};
1621                use crate::primitive::offsets::{Strides, Fixeds};
1622
1623                let mut cols = Vecs::<Vec::<i32>, Strides>::default();
1624                for i in 0 .. 100 {
1625                    cols.push(&[1i32, 2, i]);
1626                }
1627
1628                let cols = Vecs {
1629                    bounds: TryInto::<Fixeds<3>>::try_into(cols.bounds).unwrap(),
1630                    values: cols.values,
1631                };
1632
1633                assert_eq!(cols.borrow().len(), 100);
1634                for i in 0 .. 100 {
1635                    assert_eq!(cols.borrow().get(i).len(), 3);
1636                }
1637
1638                let mut cols = Vecs {
1639                    bounds: Strides::new(3, cols.bounds.count),
1640                    values: cols.values
1641                };
1642
1643                cols.push(&[0, 0]);
1644                assert!(TryInto::<Fixeds<3>>::try_into(cols.bounds).is_err());
1645            }
1646        }
1647    }
1648
1649    pub use empty::Empties;
1650    /// A columnar store for `()`.
1651    mod empty {
1652
1653        use crate::common::index::CopyAs;
1654        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, Push, HeapSize};
1655
1656        #[derive(Copy, Clone, Debug, Default)]
1657        pub struct Empties<CC = u64> { pub count: CC, pub empty: () }
1658
1659        impl Columnar for () {
1660            #[inline(always)]
1661            fn into_owned<'a>(_other: crate::Ref<'a, Self>) -> Self { }
1662            type Container = Empties;
1663        }
1664
1665        impl Container for Empties {
1666            type Ref<'a> = ();
1667            type Borrowed<'a> = Empties<&'a u64>;
1668            #[inline(always)]
1669            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Empties { count: &self.count, empty: () } }
1670            #[inline(always)]
1671            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1672                Empties { count: thing.count, empty: () }
1673            }
1674            #[inline(always)]
1675            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1676
1677            #[inline(always)]
1678            fn extend_from_self(&mut self, _other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1679                self.count += range.len() as u64;
1680            }
1681
1682            fn reserve_for<'a, I>(&mut self, _selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone { }
1683        }
1684
1685        impl<CC: CopyAs<u64>> Len for Empties<CC> {
1686            #[inline(always)] fn len(&self) -> usize { self.count.copy_as() as usize }
1687        }
1688        impl<CC> IndexMut for Empties<CC> {
1689            type IndexMut<'a> = &'a mut () where CC: 'a;
1690            // TODO: panic if out of bounds?
1691            #[inline(always)] fn get_mut(&mut self, _index: usize) -> Self::IndexMut<'_> { &mut self.empty }
1692        }
1693        impl<CC> Index for Empties<CC> {
1694            type Ref = ();
1695            #[inline(always)]
1696            fn get(&self, _index: usize) -> Self::Ref { }
1697        }
1698        impl<'a, CC> Index for &'a Empties<CC> {
1699            type Ref = &'a ();
1700            #[inline(always)]
1701            fn get(&self, _index: usize) -> Self::Ref { &() }
1702        }
1703        impl Push<()> for Empties {
1704            // TODO: check for overflow?
1705            #[inline(always)]
1706            fn push(&mut self, _item: ()) { self.count += 1; }
1707            #[inline(always)]
1708            fn extend(&mut self, iter: impl IntoIterator<Item=()>) {
1709                self.count += iter.into_iter().count() as u64;
1710            }
1711        }
1712        impl<'a> Push<&'a ()> for Empties {
1713            // TODO: check for overflow?
1714            #[inline(always)]
1715            fn push(&mut self, _item: &()) { self.count += 1; }
1716            #[inline(always)]
1717            fn extend(&mut self, iter: impl IntoIterator<Item=&'a ()>) {
1718                self.count += iter.into_iter().count() as u64;
1719            }
1720        }
1721
1722        impl HeapSize for Empties {
1723            #[inline(always)]
1724            fn heap_size(&self) -> (usize, usize) { (0, 0) }
1725        }
1726        impl Clear for Empties {
1727            #[inline(always)]
1728            fn clear(&mut self) { self.count = 0; }
1729        }
1730
1731        impl<'a> crate::AsBytes<'a> for crate::primitive::Empties<&'a u64> {
1732            #[inline(always)]
1733            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1734                std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.count))))
1735            }
1736        }
1737        impl<'a> crate::FromBytes<'a> for crate::primitive::Empties<&'a u64> {
1738            #[inline(always)]
1739            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1740                Self { count: &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0], empty: () }
1741            }
1742        }
1743    }
1744
1745    pub use boolean::Bools;
1746    /// A columnar store for `bool`.
1747    mod boolean {
1748
1749        use crate::common::index::CopyAs;
1750        use crate::{Container, Clear, Len, Index, IndexAs, Push, HeapSize};
1751
1752        /// A store for maintaining `Vec<bool>`.
1753        #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1754        #[derive(Copy, Clone, Debug, Default, PartialEq)]
1755        pub struct Bools<VC = Vec<u64>, WC = u64> {
1756            /// The bundles of bits that form complete `u64` values.
1757            pub values: VC,
1758            /// The work-in-progress bits that are not yet complete.
1759            pub last_word: WC,
1760            /// The number of set bits in `bits.last()`.
1761            pub last_bits: WC,
1762        }
1763
1764        impl crate::Columnar for bool {
1765            #[inline(always)]
1766            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1767            type Container = Bools;
1768        }
1769
1770        impl<VC: crate::common::PushIndexAs<u64>> Container for Bools<VC> {
1771            type Ref<'a> = bool;
1772            type Borrowed<'a> = Bools<VC::Borrowed<'a>, &'a u64> where VC: 'a;
1773            #[inline(always)]
1774            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1775                Bools {
1776                    values: self.values.borrow(),
1777                    last_word: &self.last_word,
1778                    last_bits: &self.last_bits,
1779                }
1780            }
1781            #[inline(always)]
1782            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where VC: 'a {
1783                Bools {
1784                    values: VC::reborrow(thing.values),
1785                    last_word: thing.last_word,
1786                    last_bits: thing.last_bits,
1787                }
1788            }
1789            #[inline(always)]
1790            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1791
1792            // TODO: There is probably a smart way to implement `extend_from_slice`, but it isn't trivial due to alignment.
1793
1794            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1795                self.values.reserve_for(selves.map(|x| x.values))
1796            }
1797        }
1798
1799        impl<'a, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1800            #[inline(always)]
1801            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1802                let iter = self.values.as_bytes();
1803                let iter = crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_word))));
1804                crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_bits))))
1805            }
1806        }
1807
1808        impl<'a, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1809            #[inline(always)]
1810            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1811                let values = crate::FromBytes::from_bytes(bytes);
1812                let last_word = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1813                let last_bits = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1814                Self { values, last_word, last_bits }
1815            }
1816        }
1817
1818        impl<VC: Len, WC: CopyAs<u64>> Len for Bools<VC, WC> {
1819            #[inline(always)] fn len(&self) -> usize { self.values.len() * 64 + (self.last_bits.copy_as() as usize) }
1820        }
1821
1822        impl<VC: Len + IndexAs<u64>, WC: CopyAs<u64>> Index for Bools<VC, WC> {
1823            type Ref = bool;
1824            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1825                let block = index / 64;
1826                let word = if block == self.values.len() {
1827                    self.last_word.copy_as()
1828                } else {
1829                    self.values.index_as(block)
1830                };
1831                let bit = index % 64;
1832                (word >> bit) & 1 == 1
1833            }
1834        }
1835
1836        impl<VC: Len + IndexAs<u64>, WC: CopyAs<u64>> Index for &Bools<VC, WC> {
1837            type Ref = bool;
1838            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1839                (*self).get(index)
1840            }
1841        }
1842
1843        impl<VC: for<'a> Push<&'a u64>> Push<bool> for Bools<VC> {
1844            #[inline]
1845            fn push(&mut self, bit: bool) {
1846                self.last_word |= (bit as u64) << self.last_bits;
1847                self.last_bits += 1;
1848                // If we have a fully formed word, commit it to `self.values`.
1849                if self.last_bits == 64 {
1850                    self.values.push(&self.last_word);
1851                    self.last_word = 0;
1852                    self.last_bits = 0;
1853                }
1854            }
1855        }
1856        impl<'a, VC: for<'b> Push<&'b u64>> Push<&'a bool> for Bools<VC> {
1857            #[inline(always)]
1858            fn push(&mut self, bit: &'a bool) {
1859                self.push(*bit)
1860            }
1861        }
1862
1863
1864        impl<VC: Clear> Clear for Bools<VC> {
1865            #[inline(always)]
1866            fn clear(&mut self) {
1867                self.values.clear();
1868                self.last_word = 0;
1869                self.last_bits = 0;
1870            }
1871        }
1872
1873        impl<VC: HeapSize> HeapSize for Bools<VC> {
1874            #[inline(always)]
1875            fn heap_size(&self) -> (usize, usize) {
1876                self.values.heap_size()
1877            }
1878        }
1879    }
1880
1881    pub use duration::Durations;
1882    /// A columnar store for `std::time::Duration`.
1883    mod duration {
1884
1885        use std::time::Duration;
1886        use crate::{Container, Len, Index, IndexAs, Push, Clear, HeapSize};
1887
1888        // `std::time::Duration` is equivalent to `(u64, u32)`, corresponding to seconds and nanoseconds.
1889        #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1890        #[derive(Copy, Clone, Debug, Default, PartialEq)]
1891        pub struct Durations<SC = Vec<u64>, NC = Vec<u32>> {
1892            pub seconds: SC,
1893            pub nanoseconds: NC,
1894        }
1895
1896        impl crate::Columnar for Duration {
1897            #[inline(always)]
1898            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1899            type Container = Durations;
1900        }
1901
1902        impl<SC: crate::common::PushIndexAs<u64>, NC: crate::common::PushIndexAs<u32>> Container for Durations<SC, NC> {
1903            type Ref<'a> = Duration;
1904            type Borrowed<'a> = Durations<SC::Borrowed<'a>, NC::Borrowed<'a>> where SC: 'a, NC: 'a;
1905            #[inline(always)]
1906            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1907                Durations {
1908                    seconds: self.seconds.borrow(),
1909                    nanoseconds: self.nanoseconds.borrow(),
1910                }
1911            }
1912            #[inline(always)]
1913            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where SC: 'a, NC: 'a {
1914                Durations {
1915                    seconds: SC::reborrow(thing.seconds),
1916                    nanoseconds: NC::reborrow(thing.nanoseconds),
1917                }
1918            }
1919            #[inline(always)]
1920            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1921
1922            #[inline(always)]
1923            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1924                self.seconds.extend_from_self(other.seconds, range.clone());
1925                self.nanoseconds.extend_from_self(other.nanoseconds, range);
1926            }
1927
1928            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1929                self.seconds.reserve_for(selves.clone().map(|x| x.seconds));
1930                self.nanoseconds.reserve_for(selves.map(|x| x.nanoseconds));
1931            }
1932        }
1933
1934        impl<'a, SC: crate::AsBytes<'a>, NC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Durations<SC, NC> {
1935            #[inline(always)]
1936            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1937                crate::chain(self.seconds.as_bytes(), self.nanoseconds.as_bytes())
1938            }
1939        }
1940        impl<'a, SC: crate::FromBytes<'a>, NC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Durations<SC, NC> {
1941            #[inline(always)]
1942            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1943                Self {
1944                    seconds: crate::FromBytes::from_bytes(bytes),
1945                    nanoseconds: crate::FromBytes::from_bytes(bytes),
1946                }
1947            }
1948        }
1949
1950        impl<SC: Len, NC> Len for Durations<SC, NC> {
1951            #[inline(always)] fn len(&self) -> usize { self.seconds.len() }
1952        }
1953
1954        impl<SC: IndexAs<u64>, NC: IndexAs<u32>> Index for Durations<SC, NC> {
1955            type Ref = Duration;
1956            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1957                Duration::new(self.seconds.index_as(index), self.nanoseconds.index_as(index))
1958            }
1959        }
1960
1961        impl<SC: for<'a> Push<&'a u64>, NC: for<'a> Push<&'a u32>> Push<std::time::Duration> for Durations<SC, NC> {
1962            #[inline]
1963            fn push(&mut self, item: std::time::Duration) {
1964                self.seconds.push(&item.as_secs());
1965                self.nanoseconds.push(&item.subsec_nanos());
1966            }
1967        }
1968        impl<'a, SC: for<'b> Push<&'b u64>, NC: for<'b> Push<&'b u32>> Push<&'a std::time::Duration> for Durations<SC, NC> {
1969            #[inline]
1970            fn push(&mut self, item: &'a std::time::Duration) {
1971                self.push(*item)
1972            }
1973        }
1974        impl<'a, SC: Push<&'a u64>, NC: Push<&'a u32>> Push<(&'a u64, &'a u32)> for Durations<SC, NC> {
1975            #[inline]
1976            fn push(&mut self, item: (&'a u64, &'a u32)) {
1977                self.seconds.push(item.0);
1978                self.nanoseconds.push(item.1);
1979            }
1980        }
1981
1982        impl<SC: Clear, NC: Clear> Clear for Durations<SC, NC> {
1983            #[inline(always)]
1984            fn clear(&mut self) {
1985                self.seconds.clear();
1986                self.nanoseconds.clear();
1987            }
1988        }
1989
1990        impl<SC: HeapSize, NC: HeapSize> HeapSize for Durations<SC, NC> {
1991            #[inline(always)]
1992            fn heap_size(&self) -> (usize, usize) {
1993                let (l0, c0) = self.seconds.heap_size();
1994                let (l1, c1) = self.nanoseconds.heap_size();
1995                (l0 + l1, c0 + c1)
1996            }
1997        }
1998    }
1999}
2000
2001pub use string::Strings;
2002pub mod string {
2003
2004    use super::{Clear, Columnar, Container, Len, Index, IndexAs, Push, HeapSize};
2005
2006    /// A stand-in for `Vec<String>`.
2007    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2008    #[derive(Copy, Clone, Debug, Default, PartialEq)]
2009    pub struct Strings<BC = Vec<u64>, VC = Vec<u8>> {
2010        /// Bounds container; provides indexed access to offsets.
2011        pub bounds: BC,
2012        /// Values container; provides slice access to bytes.
2013        pub values: VC,
2014    }
2015
2016    impl Columnar for String {
2017        #[inline(always)]
2018        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2019            self.clear();
2020            self.push_str(other);
2021        }
2022        #[inline(always)]
2023        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other.to_string() }
2024        type Container = Strings;
2025    }
2026
2027    impl<BC: crate::common::PushIndexAs<u64>> Container for Strings<BC, Vec<u8>> {
2028        type Ref<'a> = &'a str;
2029        type Borrowed<'a> = Strings<BC::Borrowed<'a>, &'a [u8]> where BC: 'a;
2030        #[inline(always)]
2031        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2032            Strings {
2033                bounds: self.bounds.borrow(),
2034                values: self.values.borrow(),
2035            }
2036        }
2037        #[inline(always)]
2038        fn reborrow<'c, 'a: 'c>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'c> where BC: 'a {
2039            Strings {
2040                bounds: BC::reborrow(thing.bounds),
2041                values: thing.values,
2042            }
2043        }
2044        #[inline(always)]
2045        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
2046
2047        #[inline(always)]
2048        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2049            if !range.is_empty() {
2050                // Imported bounds will be relative to this starting offset.
2051                let values_len = self.values.len() as u64;
2052
2053                // Push all bytes that we can, all at once.
2054                let other_lower = if range.start == 0 { 0 } else { other.bounds.index_as(range.start-1) };
2055                let other_upper = other.bounds.index_as(range.end-1);
2056                self.values.extend_from_self(other.values, other_lower as usize .. other_upper as usize);
2057
2058                // Each bound needs to be shifted by `values_len - other_lower`.
2059                if values_len == other_lower {
2060                    self.bounds.extend_from_self(other.bounds, range);
2061                }
2062                else {
2063                    for index in range {
2064                        let shifted = other.bounds.index_as(index) - other_lower + values_len;
2065                        self.bounds.push(&shifted)
2066                    }
2067                }
2068            }
2069        }
2070
2071        fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2072            self.bounds.reserve_for(selves.clone().map(|x| x.bounds));
2073            self.values.reserve_for(selves.map(|x| x.values));
2074        }
2075
2076    }
2077
2078    impl<'a, BC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Strings<BC, VC> {
2079        #[inline(always)]
2080        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2081            crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
2082        }
2083    }
2084    impl<'a, BC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Strings<BC, VC> {
2085        #[inline(always)]
2086        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2087            Self {
2088                bounds: crate::FromBytes::from_bytes(bytes),
2089                values: crate::FromBytes::from_bytes(bytes),
2090            }
2091        }
2092    }
2093
2094    impl<BC: Len, VC> Len for Strings<BC, VC> {
2095        #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
2096    }
2097
2098    impl<'a, BC: Len+IndexAs<u64>> Index for Strings<BC, &'a [u8]> {
2099        type Ref = &'a str;
2100        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2101            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2102            let upper = self.bounds.index_as(index);
2103            let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
2104            let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
2105            std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
2106        }
2107    }
2108    impl<'a, BC: Len+IndexAs<u64>> Index for &'a Strings<BC, Vec<u8>> {
2109        type Ref = &'a str;
2110        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2111            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2112            let upper = self.bounds.index_as(index);
2113            let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
2114            let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
2115            std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
2116        }
2117    }
2118
2119    // This is a simpler implementation, but it leads to a performance regression
2120    // for Strings and str because it loses access to `Vec::extend_from_slice`.
2121    //
2122    // impl<BC: Push<u64>, D: std::fmt::Display> Push<D> for Strings<BC> {
2123    //     #[inline(always)]
2124    //     fn push(&mut self, item: D) {
2125    //         use std::io::Write;
2126    //         write!(self.values, "{}", item).unwrap();
2127    //         self.bounds.push(self.values.len() as u64);
2128    //     }
2129    // }
2130
2131    impl<BC: for<'a> Push<&'a u64>> Push<&String> for Strings<BC> {
2132        #[inline(always)] fn push(&mut self, item: &String) {
2133            self.values.extend_from_slice(item.as_bytes());
2134            self.bounds.push(&(self.values.len() as u64));
2135        }
2136    }
2137    impl<BC: for<'a> Push<&'a u64>> Push<&str> for Strings<BC> {
2138        #[inline]
2139        fn push(&mut self, item: &str) {
2140            self.values.extend_from_slice(item.as_bytes());
2141            self.bounds.push(&(self.values.len() as u64));
2142        }
2143    }
2144    impl<'a, BC: for<'b> Push<&'b u64>> Push<std::fmt::Arguments<'a>> for Strings<BC> {
2145        #[inline]
2146        fn push(&mut self, item: std::fmt::Arguments<'a>) {
2147            use std::io::Write;
2148            self.values.write_fmt(item).expect("write_fmt failed");
2149            self.bounds.push(&(self.values.len() as u64));
2150        }
2151    }
2152    impl<'a, 'b, BC: for<'c> Push<&'c u64>> Push<&'b std::fmt::Arguments<'a>> for Strings<BC> {
2153        #[inline]
2154        fn push(&mut self, item: &'b std::fmt::Arguments<'a>) {
2155            use std::io::Write;
2156            self.values.write_fmt(*item).expect("write_fmt failed");
2157            self.bounds.push(&(self.values.len() as u64));
2158        }
2159    }
2160    impl<BC: Clear, VC: Clear> Clear for Strings<BC, VC> {
2161        #[inline(always)]
2162        fn clear(&mut self) {
2163            self.bounds.clear();
2164            self.values.clear();
2165        }
2166    }
2167    impl<BC: HeapSize, VC: HeapSize> HeapSize for Strings<BC, VC> {
2168        #[inline(always)]
2169        fn heap_size(&self) -> (usize, usize) {
2170            let (l0, c0) = self.bounds.heap_size();
2171            let (l1, c1) = self.values.heap_size();
2172            (l0 + l1, c0 + c1)
2173        }
2174    }
2175}
2176
2177pub use vector::Vecs;
2178pub mod vector {
2179
2180    use super::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize, Slice};
2181
2182    /// A stand-in for `Vec<Vec<T>>` for complex `T`.
2183    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2184    #[derive(Debug, Default, Copy, Clone, PartialEq)]
2185    pub struct Vecs<TC, BC = Vec<u64>> {
2186        pub bounds: BC,
2187        pub values: TC,
2188    }
2189
2190    impl<T: Columnar> Columnar for Vec<T> {
2191        #[inline(always)]
2192        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2193            self.truncate(other.len());
2194            let mut other_iter = other.into_iter();
2195            for (s, o) in self.iter_mut().zip(&mut other_iter) {
2196                T::copy_from(s, o);
2197            }
2198            for o in other_iter {
2199                self.push(T::into_owned(o));
2200            }
2201        }
2202        #[inline(always)]
2203        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2204            other.into_iter().map(|x| T::into_owned(x)).collect()
2205        }
2206        type Container = Vecs<T::Container>;
2207    }
2208
2209    impl<T: Columnar, const N: usize> Columnar for [T; N] {
2210        #[inline(always)]
2211        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2212            for (s, o) in self.iter_mut().zip(other.into_iter()) {
2213                T::copy_from(s, o);
2214            }
2215        }
2216        #[inline(always)]
2217        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2218            let vec: Vec<_> = other.into_iter().map(|x| T::into_owned(x)).collect();
2219            match vec.try_into() {
2220                Ok(array) => array,
2221                Err(_) => panic!("wrong length"),
2222            }
2223        }
2224        type Container = Vecs<T::Container>;
2225    }
2226
2227    impl<T: Columnar, const N: usize> Columnar for smallvec::SmallVec<[T; N]> {
2228        #[inline(always)]
2229        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2230            self.truncate(other.len());
2231            let mut other_iter = other.into_iter();
2232            for (s, o) in self.iter_mut().zip(&mut other_iter) {
2233                T::copy_from(s, o);
2234            }
2235            for o in other_iter {
2236                self.push(T::into_owned(o));
2237            }
2238        }
2239        #[inline(always)]
2240        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2241            other.into_iter().map(|x| T::into_owned(x)).collect()
2242        }
2243        type Container = Vecs<T::Container>;
2244    }
2245
2246    impl<BC: crate::common::PushIndexAs<u64>, TC: Container> Container for Vecs<TC, BC> {
2247        type Ref<'a> = Slice<TC::Borrowed<'a>> where TC: 'a;
2248        type Borrowed<'a> = Vecs<TC::Borrowed<'a>, BC::Borrowed<'a>> where BC: 'a, TC: 'a;
2249        #[inline(always)]
2250        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2251            Vecs {
2252                bounds: self.bounds.borrow(),
2253                values: self.values.borrow(),
2254            }
2255        }
2256        #[inline(always)]
2257        fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where BC: 'a, TC: 'a {
2258            Vecs {
2259                bounds: BC::reborrow(thing.bounds),
2260                values: TC::reborrow(thing.values),
2261            }
2262        }
2263        #[inline(always)]
2264        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
2265            thing.map(|x| TC::reborrow(x))
2266        }
2267
2268        #[inline(always)]
2269        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2270            if !range.is_empty() {
2271                // Imported bounds will be relative to this starting offset.
2272                let values_len = self.values.len() as u64;
2273
2274                // Push all bytes that we can, all at once.
2275                let other_lower = if range.start == 0 { 0 } else { other.bounds.index_as(range.start-1) };
2276                let other_upper = other.bounds.index_as(range.end-1);
2277                self.values.extend_from_self(other.values, other_lower as usize .. other_upper as usize);
2278
2279                // Each bound needs to be shifted by `values_len - other_lower`.
2280                if values_len == other_lower {
2281                    self.bounds.extend_from_self(other.bounds, range);
2282                }
2283                else {
2284                    for index in range {
2285                        let shifted = other.bounds.index_as(index) - other_lower + values_len;
2286                        self.bounds.push(&shifted)
2287                    }
2288                }
2289            }
2290        }
2291
2292        fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2293            self.bounds.reserve_for(selves.clone().map(|x| x.bounds));
2294            self.values.reserve_for(selves.map(|x| x.values));
2295        }
2296    }
2297
2298    impl<'a, TC: crate::AsBytes<'a>, BC: crate::AsBytes<'a>> crate::AsBytes<'a> for Vecs<TC, BC> {
2299        #[inline(always)]
2300        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2301            crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
2302        }
2303    }
2304    impl<'a, TC: crate::FromBytes<'a>, BC: crate::FromBytes<'a>> crate::FromBytes<'a> for Vecs<TC, BC> {
2305        #[inline(always)]
2306        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2307            Self {
2308                bounds: crate::FromBytes::from_bytes(bytes),
2309                values: crate::FromBytes::from_bytes(bytes),
2310            }
2311        }
2312    }
2313
2314    impl<TC: Len> Vecs<TC> {
2315        #[inline]
2316        pub fn push_iter<I>(&mut self, iter: I) where I: IntoIterator, TC: Push<I::Item> {
2317            self.values.extend(iter);
2318            self.bounds.push(self.values.len() as u64);
2319        }
2320    }
2321
2322    impl<TC, BC: Len> Len for Vecs<TC, BC> {
2323        #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
2324    }
2325
2326    impl<TC: Copy, BC: Len+IndexAs<u64>> Index for Vecs<TC, BC> {
2327        type Ref = Slice<TC>;
2328        #[inline(always)]
2329        fn get(&self, index: usize) -> Self::Ref {
2330            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2331            let upper = self.bounds.index_as(index);
2332            Slice::new(lower, upper, self.values)
2333        }
2334    }
2335    impl<'a, TC, BC: Len+IndexAs<u64>> Index for &'a Vecs<TC, BC> {
2336        type Ref = Slice<&'a TC>;
2337        #[inline(always)]
2338        fn get(&self, index: usize) -> Self::Ref {
2339            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2340            let upper = self.bounds.index_as(index);
2341            Slice::new(lower, upper, &self.values)
2342        }
2343    }
2344    impl<TC, BC: Len+IndexAs<u64>> IndexMut for Vecs<TC, BC> {
2345        type IndexMut<'a> = Slice<&'a mut TC> where TC: 'a, BC: 'a;
2346
2347        #[inline(always)]
2348        fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2349            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
2350            let upper = self.bounds.index_as(index);
2351            Slice::new(lower, upper, &mut self.values)
2352        }
2353    }
2354
2355    impl<'a, TC: Container, BC: for<'b> Push<&'b u64>> Push<Slice<TC::Borrowed<'a>>> for Vecs<TC, BC> {
2356        #[inline]
2357        fn push(&mut self, item: Slice<TC::Borrowed<'a>>) {
2358            self.values.extend_from_self(item.slice, item.lower .. item.upper);
2359            self.bounds.push(&(self.values.len() as u64));
2360        }
2361    }
2362
2363    impl<I: IntoIterator, TC: Push<I::Item> + Len, BC: for<'a> Push<&'a u64>> Push<I> for Vecs<TC, BC> {
2364        #[inline]
2365        fn push(&mut self, item: I) {
2366            self.values.extend(item);
2367            self.bounds.push(&(self.values.len() as u64));
2368        }
2369    }
2370
2371    impl<TC: Clear, BC: Clear> Clear for Vecs<TC, BC> {
2372        #[inline(always)]
2373        fn clear(&mut self) {
2374            self.bounds.clear();
2375            self.values.clear();
2376        }
2377    }
2378
2379    impl<TC: HeapSize, BC: HeapSize> HeapSize for Vecs<TC, BC> {
2380        fn heap_size(&self) -> (usize, usize) {
2381            let (l0, c0) = self.bounds.heap_size();
2382            let (l1, c1) = self.values.heap_size();
2383            (l0 + l1, c0 + c1)
2384        }
2385    }
2386}
2387
2388#[allow(non_snake_case)]
2389pub mod tuple {
2390
2391    use super::{Clear, Columnar, Container, Len, IndexMut, Index, Push, HeapSize};
2392
2393    // Implementations for tuple types.
2394    // These are all macro based, because the implementations are very similar.
2395    // The macro requires two names, one for the store and one for pushable types.
2396    macro_rules! tuple_impl {
2397        ( $($name:ident,$name2:ident,$idx:tt)+) => (
2398
2399            impl<$($name: Columnar),*> Columnar for ($($name,)*) {
2400                #[inline(always)]
2401                fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2402                    let ($($name,)*) = self;
2403                    let ($($name2,)*) = other;
2404                    $(crate::Columnar::copy_from($name, $name2);)*
2405                }
2406                #[inline(always)]
2407                fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2408                    let ($($name2,)*) = other;
2409                    ($($name::into_owned($name2),)*)
2410                }
2411                type Container = ($($name::Container,)*);
2412            }
2413            impl<$($name2: Container,)*> Container for ($($name2,)*) {
2414                type Ref<'a> = ($($name2::Ref<'a>,)*) where $($name2: 'a,)*;
2415                type Borrowed<'a> = ($($name2::Borrowed<'a>,)*) where $($name2: 'a,)*;
2416                #[inline(always)]
2417                fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2418                    let ($($name,)*) = self;
2419                    ($($name.borrow(),)*)
2420                }
2421                #[inline(always)]
2422                fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where $($name2: 'a,)* {
2423                    let ($($name,)*) = thing;
2424                    ($($name2::reborrow($name),)*)
2425                }
2426                #[inline(always)]
2427                fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
2428                    let ($($name2,)*) = thing;
2429                    ($($name2::reborrow_ref($name2),)*)
2430                }
2431
2432                #[inline(always)]
2433                fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2434                    let ($($name,)*) = self;
2435                    let ($($name2,)*) = other;
2436                    $( $name.extend_from_self($name2, range.clone()); )*
2437                }
2438
2439                fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2440                    let ($($name,)*) = self;
2441                    $( $name.reserve_for(selves.clone().map(|x| x.$idx)); )*
2442                }
2443            }
2444
2445            #[allow(non_snake_case)]
2446            impl<'a, $($name: crate::AsBytes<'a>),*> crate::AsBytes<'a> for ($($name,)*) {
2447                #[inline(always)]
2448                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2449                    let ($($name,)*) = self;
2450                    let iter = None.into_iter();
2451                    $( let iter = crate::chain(iter, $name.as_bytes()); )*
2452                    iter
2453                }
2454            }
2455            impl<'a, $($name: crate::FromBytes<'a>),*> crate::FromBytes<'a> for ($($name,)*) {
2456                #[inline(always)]
2457                #[allow(non_snake_case)]
2458                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2459                    $(let $name = crate::FromBytes::from_bytes(bytes);)*
2460                    ($($name,)*)
2461                }
2462            }
2463
2464            impl<$($name: Len),*> Len for ($($name,)*) {
2465                #[inline(always)]
2466                fn len(&self) -> usize {
2467                    self.0.len()
2468                }
2469            }
2470            impl<$($name: Clear),*> Clear for ($($name,)*) {
2471                #[inline(always)]
2472                fn clear(&mut self) {
2473                    let ($($name,)*) = self;
2474                    $($name.clear();)*
2475                }
2476            }
2477            impl<$($name: HeapSize),*> HeapSize for ($($name,)*) {
2478                #[inline(always)]
2479                fn heap_size(&self) -> (usize, usize) {
2480                    let ($($name,)*) = self;
2481                    let mut l = 0;
2482                    let mut c = 0;
2483                    $(let (l0, c0) = $name.heap_size(); l += l0; c += c0;)*
2484                    (l, c)
2485                }
2486            }
2487            impl<$($name: Index),*> Index for ($($name,)*) {
2488                type Ref = ($($name::Ref,)*);
2489                #[inline(always)]
2490                fn get(&self, index: usize) -> Self::Ref {
2491                    let ($($name,)*) = self;
2492                    ($($name.get(index),)*)
2493                }
2494            }
2495            impl<'a, $($name),*> Index for &'a ($($name,)*) where $( &'a $name: Index),* {
2496                type Ref = ($(<&'a $name as Index>::Ref,)*);
2497                #[inline(always)]
2498                fn get(&self, index: usize) -> Self::Ref {
2499                    let ($($name,)*) = self;
2500                    ($($name.get(index),)*)
2501                }
2502            }
2503
2504            impl<$($name: IndexMut),*> IndexMut for ($($name,)*) {
2505                type IndexMut<'a> = ($($name::IndexMut<'a>,)*) where $($name: 'a),*;
2506                #[inline(always)]
2507                fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2508                    let ($($name,)*) = self;
2509                    ($($name.get_mut(index),)*)
2510                }
2511            }
2512            impl<$($name2, $name: Push<$name2>),*> Push<($($name2,)*)> for ($($name,)*) {
2513                #[inline]
2514                fn push(&mut self, item: ($($name2,)*)) {
2515                    let ($($name,)*) = self;
2516                    let ($($name2,)*) = item;
2517                    $($name.push($name2);)*
2518                }
2519            }
2520            impl<'a, $($name2, $name: Push<&'a $name2>),*> Push<&'a ($($name2,)*)> for ($($name,)*) {
2521                #[inline]
2522                fn push(&mut self, item: &'a ($($name2,)*)) {
2523                    let ($($name,)*) = self;
2524                    let ($($name2,)*) = item;
2525                    $($name.push($name2);)*
2526                }
2527            }
2528        )
2529    }
2530
2531    tuple_impl!(A,AA,0);
2532    tuple_impl!(A,AA,0 B,BB,1);
2533    tuple_impl!(A,AA,0 B,BB,1 C,CC,2);
2534    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3);
2535    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4);
2536    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5);
2537    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6);
2538    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6 H,HH,7);
2539    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6 H,HH,7 I,II,8);
2540    tuple_impl!(A,AA,0 B,BB,1 C,CC,2 D,DD,3 E,EE,4 F,FF,5 G,GG,6 H,HH,7 I,II,8 J,JJ,9);
2541
2542    #[cfg(test)]
2543    mod test {
2544        #[test]
2545        fn round_trip() {
2546
2547            use crate::common::{Index, Push, HeapSize, Len};
2548
2549            let mut column: crate::ContainerOf<(u64, u8, String)> = Default::default();
2550            for i in 0..100 {
2551                column.push((i, i as u8, &i.to_string()));
2552                column.push((i, i as u8, &"".to_string()));
2553            }
2554
2555            assert_eq!(column.len(), 200);
2556            assert_eq!(column.heap_size(), (3590, 4608));
2557
2558            for i in 0..100u64 {
2559                assert_eq!((&column).get((2*i+0) as usize), (&i, &(i as u8), i.to_string().as_str()));
2560                assert_eq!((&column).get((2*i+1) as usize), (&i, &(i as u8), ""));
2561            }
2562
2563            // Compare to the heap size of a `Vec<Option<usize>>`.
2564            let mut column: Vec<(u64, u8, String)> = Default::default();
2565            for i in 0..100 {
2566                column.push((i, i as u8, i.to_string()));
2567                column.push((i, i as u8, "".to_string()));
2568            }
2569            // NB: Rust seems to change the capacities across versions (1.88 != 1.89),
2570            // so we just compare the allocated regions to avoid updating the MSRV.
2571            assert_eq!(column.heap_size().0, 8190);
2572
2573        }
2574    }
2575}
2576
2577pub use sums::{rank_select::RankSelect, result::Results, option::Options};
2578/// Containers for enumerations ("sum types") that store variants separately.
2579///
2580/// The main work of these types is storing a discriminant and index efficiently,
2581/// as containers for each of the variant types can hold the actual data.
2582pub mod sums {
2583
2584    /// Stores for maintaining discriminants, and associated sequential indexes.
2585    ///
2586    /// The sequential indexes are not explicitly maintained, but are supported
2587    /// by a `rank(index)` function that indicates how many of a certain variant
2588    /// precede the given index. While this could potentially be done with a scan
2589    /// of all preceding discriminants, the stores maintain running accumulations
2590    /// that make the operation constant time (using additional amortized memory).
2591    pub mod rank_select {
2592
2593        use crate::primitive::Bools;
2594        use crate::common::index::CopyAs;
2595        use crate::{Container, Len, Index, IndexAs, Push, Clear, HeapSize};
2596
2597        /// A store for maintaining `Vec<bool>` with fast `rank` and `select` access.
2598        ///
2599        /// The design is to have `u64` running counts for each block of 1024 bits,
2600        /// which are roughly the size of a cache line. This is roughly 6% overhead,
2601        /// above the bits themselves, which seems pretty solid.
2602    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2603        #[derive(Copy, Clone, Debug, Default, PartialEq)]
2604        pub struct RankSelect<CC = Vec<u64>, VC = Vec<u64>, WC = u64> {
2605            /// Counts of the number of cumulative set (true) bits, *after* each block of 1024 bits.
2606            pub counts: CC,
2607            /// The bits themselves.
2608            pub values: Bools<VC, WC>,
2609        }
2610
2611        impl<CC: crate::common::PushIndexAs<u64>, VC: crate::common::PushIndexAs<u64>> RankSelect<CC, VC> {
2612            pub fn borrow<'a>(&'a self) -> RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a u64> {
2613                RankSelect {
2614                    counts: self.counts.borrow(),
2615                    values: self.values.borrow(),
2616                }
2617            }
2618            #[inline(always)]
2619            pub fn reborrow<'b, 'a: 'b>(thing: RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a u64>) -> RankSelect<CC::Borrowed<'b>, VC::Borrowed<'b>, &'b u64> {
2620                RankSelect {
2621                    counts: CC::reborrow(thing.counts),
2622                    values: Bools::<VC, u64>::reborrow(thing.values),
2623                }
2624            }
2625        }
2626
2627        impl<'a, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for RankSelect<CC, VC, &'a u64> {
2628            #[inline(always)]
2629            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2630                crate::chain(self.counts.as_bytes(), self.values.as_bytes())
2631            }
2632        }
2633        impl<'a, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for RankSelect<CC, VC, &'a u64> {
2634            #[inline(always)]
2635            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2636                Self {
2637                    counts: crate::FromBytes::from_bytes(bytes),
2638                    values: crate::FromBytes::from_bytes(bytes),
2639                }
2640            }
2641        }
2642
2643
2644        impl<CC, VC: Len + IndexAs<u64>, WC: CopyAs<u64>> RankSelect<CC, VC, WC> {
2645            #[inline(always)]
2646            pub fn get(&self, index: usize) -> bool {
2647                Index::get(&self.values, index)
2648            }
2649        }
2650        impl<CC: Len + IndexAs<u64>, VC: Len + IndexAs<u64>, WC: CopyAs<u64>> RankSelect<CC, VC, WC> {
2651            /// The number of set bits *strictly* preceding `index`.
2652            ///
2653            /// This number is accumulated first by reading out of `self.counts` at the correct position,
2654            /// then by summing the ones in strictly prior `u64` entries, then by counting the ones in the
2655            /// masked `u64` in which the bit lives.
2656            pub fn rank(&self, index: usize) -> usize {
2657                let bit = index % 64;
2658                let block = index / 64;
2659                let chunk = block / 16;
2660                let mut count = if chunk > 0 { self.counts.index_as(chunk - 1) as usize } else { 0 };
2661                for pos in (16 * chunk) .. block {
2662                    count += self.values.values.index_as(pos).count_ones() as usize;
2663                }
2664                // TODO: Panic if out of bounds?
2665                let intra_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
2666                count += (intra_word & ((1 << bit) - 1)).count_ones() as usize;
2667                count
2668            }
2669            /// The index of the `rank`th set bit, should one exist.
2670            pub fn select(&self, rank: u64) -> Option<usize> {
2671                let mut chunk = 0;
2672                // Step one is to find the position in `counts` where we go from `rank` to `rank + 1`.
2673                // The position we are looking for is within that chunk of bits.
2674                // TODO: Binary search is likely better at many scales. Rust's binary search is .. not helpful with ties.
2675                while chunk < self.counts.len() && self.counts.index_as(chunk) <= rank {
2676                    chunk += 1;
2677                }
2678                let mut count = if chunk < self.counts.len() { self.counts.index_as(chunk) } else { 0 };
2679                // Step two is to find the position within that chunk where the `rank`th bit is.
2680                let mut block = 16 * chunk;
2681                while block < self.values.values.len() && count + (self.values.values.index_as(block).count_ones() as u64) <= rank {
2682                    count += self.values.values.index_as(block).count_ones() as u64;
2683                    block += 1;
2684                }
2685                // Step three is to search the last word for the location, or return `None` if we run out of bits.
2686                let last_bits = if block == self.values.values.len() { self.values.last_bits.copy_as() as usize } else { 64 };
2687                let last_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
2688                for shift in 0 .. last_bits {
2689                    if ((last_word >> shift) & 0x01 == 0x01) && count + 1 == rank {
2690                        return Some(64 * block + shift);
2691                    }
2692                    count += (last_word >> shift) & 0x01;
2693                }
2694
2695                None
2696            }
2697        }
2698
2699        impl<CC, VC: Len, WC: CopyAs<u64>> RankSelect<CC, VC, WC> {
2700            pub fn len(&self) -> usize {
2701                self.values.len()
2702            }
2703        }
2704
2705        // This implementation probably only works for `Vec<u64>` and `Vec<u64>`, but we could fix that.
2706        // Partly, it's hard to name the `Index` flavor that allows one to get back a `u64`.
2707        impl<CC: for<'a> Push<&'a u64> + Len + IndexAs<u64>, VC: for<'a> Push<&'a u64> + Len + IndexAs<u64>> RankSelect<CC, VC> {
2708            #[inline]
2709            pub fn push(&mut self, bit: bool) {
2710                self.values.push(&bit);
2711                while self.counts.len() < self.values.len() / 1024 {
2712                    let mut count = self.counts.last().unwrap_or(0);
2713                    let lower = 16 * self.counts.len();
2714                    let upper = lower + 16;
2715                    for i in lower .. upper {
2716                        count += self.values.values.index_as(i).count_ones() as u64;
2717                    }
2718                    self.counts.push(&count);
2719                }
2720            }
2721        }
2722        impl<CC: Clear, VC: Clear> Clear for RankSelect<CC, VC> {
2723            fn clear(&mut self) {
2724                self.counts.clear();
2725                self.values.clear();
2726            }
2727        }
2728        impl<CC: HeapSize, VC: HeapSize> HeapSize for RankSelect<CC, VC> {
2729            fn heap_size(&self) -> (usize, usize) {
2730                let (l0, c0) = self.counts.heap_size();
2731                let (l1, c1) = self.values.heap_size();
2732                (l0 + l1, c0 + c1)
2733            }
2734        }
2735    }
2736
2737    pub mod result {
2738
2739        use crate::common::index::CopyAs;
2740        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize};
2741        use crate::RankSelect;
2742
2743        #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2744        #[derive(Copy, Clone, Debug, Default, PartialEq)]
2745        pub struct Results<SC, TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
2746            /// Bits set to `true` correspond to `Ok` variants.
2747            pub indexes: RankSelect<CC, VC, WC>,
2748            pub oks: SC,
2749            pub errs: TC,
2750        }
2751
2752        impl<S: Columnar, T: Columnar> Columnar for Result<S, T> {
2753            fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2754                match (&mut *self, other) {
2755                    (Ok(x), Ok(y)) => x.copy_from(y),
2756                    (Err(x), Err(y)) => x.copy_from(y),
2757                    (_, other) => { *self = Self::into_owned(other); },
2758                }
2759            }
2760            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2761                match other {
2762                    Ok(y) => Ok(S::into_owned(y)),
2763                    Err(y) => Err(T::into_owned(y)),
2764                }
2765            }
2766            type Container = Results<S::Container, T::Container>;
2767        }
2768
2769        impl<SC: Container, TC: Container> Container for Results<SC, TC> {
2770            type Ref<'a> = Result<SC::Ref<'a>, TC::Ref<'a>> where SC: 'a, TC: 'a;
2771            type Borrowed<'a> = Results<SC::Borrowed<'a>, TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where SC: 'a, TC: 'a;
2772            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2773                Results {
2774                    indexes: self.indexes.borrow(),
2775                    oks: self.oks.borrow(),
2776                    errs: self.errs.borrow(),
2777                }
2778            }
2779            #[inline(always)]
2780            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where SC: 'a, TC: 'a {
2781                Results {
2782                    indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
2783                    oks: SC::reborrow(thing.oks),
2784                    errs: TC::reborrow(thing.errs),
2785                }
2786            }
2787            #[inline(always)]
2788            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
2789                match thing {
2790                    Ok(y) => Ok(SC::reborrow_ref(y)),
2791                    Err(y) => Err(TC::reborrow_ref(y)),
2792                }
2793            }
2794
2795            #[inline(always)]
2796            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2797                if !range.is_empty() {
2798                    // Starting offsets of each variant in `other`.
2799                    let oks_start = other.indexes.rank(range.start);
2800                    let errs_start = range.start - oks_start;
2801
2802                    // Count the number of `Ok` and `Err` variants as we push, to determine the range.
2803                    // TODO: This could probably be `popcnt` somehow.
2804                    let mut oks = 0;
2805                    for index in range.clone() {
2806                        let bit = other.indexes.get(index);
2807                        self.indexes.push(bit);
2808                        if bit { oks += 1; }
2809                    }
2810                    let errs = range.len() - oks;
2811
2812                    self.oks.extend_from_self(other.oks, oks_start .. oks_start + oks);
2813                    self.errs.extend_from_self(other.errs, errs_start .. errs_start + errs);
2814                }
2815            }
2816
2817            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
2818                // TODO: reserve room in `self.indexes`.
2819                self.oks.reserve_for(selves.clone().map(|x| x.oks));
2820                self.errs.reserve_for(selves.map(|x| x.errs));
2821            }
2822        }
2823
2824        impl<'a, SC: crate::AsBytes<'a>, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
2825            #[inline(always)]
2826            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2827                let iter = self.indexes.as_bytes();
2828                let iter = crate::chain(iter, self.oks.as_bytes());
2829                crate::chain(iter, self.errs.as_bytes())
2830            }
2831        }
2832        impl<'a, SC: crate::FromBytes<'a>, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
2833            #[inline(always)]
2834            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2835                Self {
2836                    indexes: crate::FromBytes::from_bytes(bytes),
2837                    oks: crate::FromBytes::from_bytes(bytes),
2838                    errs: crate::FromBytes::from_bytes(bytes),
2839                }
2840            }
2841        }
2842
2843        impl<SC, TC, CC, VC: Len, WC: CopyAs<u64>> Len for Results<SC, TC, CC, VC, WC> {
2844            #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
2845        }
2846
2847        impl<SC, TC, CC, VC, WC> Index for Results<SC, TC, CC, VC, WC>
2848        where
2849            SC: Index,
2850            TC: Index,
2851            CC: IndexAs<u64> + Len,
2852            VC: IndexAs<u64> + Len,
2853            WC: CopyAs<u64>,
2854        {
2855            type Ref = Result<SC::Ref, TC::Ref>;
2856            #[inline(always)]
2857            fn get(&self, index: usize) -> Self::Ref {
2858                if self.indexes.get(index) {
2859                    Ok(self.oks.get(self.indexes.rank(index)))
2860                } else {
2861                    Err(self.errs.get(index - self.indexes.rank(index)))
2862                }
2863            }
2864        }
2865        impl<'a, SC, TC, CC, VC, WC> Index for &'a Results<SC, TC, CC, VC, WC>
2866        where
2867            &'a SC: Index,
2868            &'a TC: Index,
2869            CC: IndexAs<u64> + Len,
2870            VC: IndexAs<u64> + Len,
2871            WC: CopyAs<u64>,
2872        {
2873            type Ref = Result<<&'a SC as Index>::Ref, <&'a TC as Index>::Ref>;
2874            #[inline(always)]
2875            fn get(&self, index: usize) -> Self::Ref {
2876                if self.indexes.get(index) {
2877                    Ok((&self.oks).get(self.indexes.rank(index)))
2878                } else {
2879                    Err((&self.errs).get(index - self.indexes.rank(index)))
2880                }
2881            }
2882        }
2883
2884        // NB: You are not allowed to change the variant, but can change its contents.
2885        impl<SC: IndexMut, TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Results<SC, TC, CC, VC> {
2886            type IndexMut<'a> = Result<SC::IndexMut<'a>, TC::IndexMut<'a>> where SC: 'a, TC: 'a, CC: 'a, VC: 'a;
2887            #[inline(always)]
2888            fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2889                if self.indexes.get(index) {
2890                    Ok(self.oks.get_mut(self.indexes.rank(index)))
2891                } else {
2892                    Err(self.errs.get_mut(index - self.indexes.rank(index)))
2893                }
2894            }
2895        }
2896
2897        impl<S, SC: Push<S>, T, TC: Push<T>> Push<Result<S, T>> for Results<SC, TC> {
2898            #[inline]
2899            fn push(&mut self, item: Result<S, T>) {
2900                match item {
2901                    Ok(item) => {
2902                        self.indexes.push(true);
2903                        self.oks.push(item);
2904                    }
2905                    Err(item) => {
2906                        self.indexes.push(false);
2907                        self.errs.push(item);
2908                    }
2909                }
2910            }
2911        }
2912        impl<'a, S, SC: Push<&'a S>, T, TC: Push<&'a T>> Push<&'a Result<S, T>> for Results<SC, TC> {
2913            #[inline]
2914            fn push(&mut self, item: &'a Result<S, T>) {
2915                match item {
2916                    Ok(item) => {
2917                        self.indexes.push(true);
2918                        self.oks.push(item);
2919                    }
2920                    Err(item) => {
2921                        self.indexes.push(false);
2922                        self.errs.push(item);
2923                    }
2924                }
2925            }
2926        }
2927
2928        impl<SC: Clear, TC: Clear> Clear for Results<SC, TC> {
2929            fn clear(&mut self) {
2930                self.indexes.clear();
2931                self.oks.clear();
2932                self.errs.clear();
2933            }
2934        }
2935
2936        impl<SC: HeapSize, TC: HeapSize> HeapSize for Results<SC, TC> {
2937            fn heap_size(&self) -> (usize, usize) {
2938                let (l0, c0) = self.oks.heap_size();
2939                let (l1, c1) = self.errs.heap_size();
2940                let (li, ci) = self.indexes.heap_size();
2941                (l0 + l1 + li, c0 + c1 + ci)
2942            }
2943        }
2944
2945        impl<SC, TC, CC, VC, WC> Results<SC, TC, CC, VC, WC> {
2946            /// Returns ok values if no errors exist.
2947            pub fn unwrap(self) -> SC where TC: Len {
2948                assert!(self.errs.is_empty());
2949                self.oks
2950            }
2951            /// Returns error values if no oks exist.
2952            pub fn unwrap_err(self) -> TC where SC: Len {
2953                assert!(self.oks.is_empty());
2954                self.errs
2955            }
2956        }
2957        #[cfg(test)]
2958        mod test {
2959            #[test]
2960            fn round_trip() {
2961
2962                use crate::common::{Index, Push, HeapSize, Len};
2963
2964                let mut column: crate::ContainerOf<Result<u64, u64>> = Default::default();
2965                for i in 0..100 {
2966                    column.push(Ok::<u64, u64>(i));
2967                    column.push(Err::<u64, u64>(i));
2968                }
2969
2970                assert_eq!(column.len(), 200);
2971                assert_eq!(column.heap_size(), (1624, 2080));
2972
2973                for i in 0..100 {
2974                    assert_eq!(column.get(2*i+0), Ok(i as u64));
2975                    assert_eq!(column.get(2*i+1), Err(i as u64));
2976                }
2977
2978                let mut column: crate::ContainerOf<Result<u64, u8>> = Default::default();
2979                for i in 0..100 {
2980                    column.push(Ok::<u64, u8>(i as u64));
2981                    column.push(Err::<u64, u8>(i as u8));
2982                }
2983
2984                assert_eq!(column.len(), 200);
2985                assert_eq!(column.heap_size(), (924, 1184));
2986
2987                for i in 0..100 {
2988                    assert_eq!(column.get(2*i+0), Ok(i as u64));
2989                    assert_eq!(column.get(2*i+1), Err(i as u8));
2990                }
2991            }
2992        }
2993    }
2994
2995    pub mod option {
2996
2997        use crate::common::index::CopyAs;
2998        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize};
2999        use crate::RankSelect;
3000
3001    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3002        #[derive(Copy, Clone, Debug, Default, PartialEq)]
3003        pub struct Options<TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
3004            /// Uses two bits for each item, one to indicate the variant and one (amortized)
3005            /// to enable efficient rank determination.
3006            pub indexes: RankSelect<CC, VC, WC>,
3007            pub somes: TC,
3008        }
3009
3010        impl<T: Columnar> Columnar for Option<T> {
3011            fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
3012                match (&mut *self, other) {
3013                    (Some(x), Some(y)) => { x.copy_from(y); }
3014                    (_, other) => { *self = Self::into_owned(other); }
3015                }
3016            }
3017            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
3018                other.map(|x| T::into_owned(x))
3019            }
3020            type Container = Options<T::Container>;
3021        }
3022
3023        impl<TC: Container> Container for Options<TC> {
3024            type Ref<'a> = Option<TC::Ref<'a>> where TC: 'a;
3025            type Borrowed<'a> = Options<TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where TC: 'a;
3026            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
3027                Options {
3028                    indexes: self.indexes.borrow(),
3029                    somes: self.somes.borrow(),
3030                }
3031            }
3032            #[inline(always)]
3033            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where TC: 'a {
3034                Options {
3035                    indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
3036                    somes: TC::reborrow(thing.somes),
3037                }
3038            }
3039            #[inline(always)]
3040            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
3041                thing.map(TC::reborrow_ref)
3042            }
3043
3044            #[inline(always)]
3045            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
3046                if !range.is_empty() {
3047                    // Starting offsets of `Some` variants in `other`.
3048                    let somes_start = other.indexes.rank(range.start);
3049
3050                    // Count the number of `Some` variants as we push, to determine the range.
3051                    // TODO: This could probably be `popcnt` somehow.
3052                    let mut somes = 0;
3053                    for index in range {
3054                        let bit = other.indexes.get(index);
3055                        self.indexes.push(bit);
3056                        if bit { somes += 1; }
3057                    }
3058
3059                    self.somes.extend_from_self(other.somes, somes_start .. somes_start + somes);
3060                }
3061            }
3062
3063            fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
3064                // TODO: reserve room in `self.indexes`.
3065                self.somes.reserve_for(selves.map(|x| x.somes));
3066            }
3067        }
3068
3069        impl<'a, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Options<TC, CC, VC, &'a u64> {
3070            #[inline(always)]
3071            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
3072                crate::chain(self.indexes.as_bytes(), self.somes.as_bytes())
3073            }
3074        }
3075
3076        impl <'a, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Options<TC, CC, VC, &'a u64> {
3077            #[inline(always)]
3078            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
3079                Self {
3080                    indexes: crate::FromBytes::from_bytes(bytes),
3081                    somes: crate::FromBytes::from_bytes(bytes),
3082                }
3083            }
3084        }
3085
3086        impl<T, CC, VC: Len, WC: CopyAs<u64>> Len for Options<T, CC, VC, WC> {
3087            #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
3088        }
3089
3090        impl<TC: Index, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: CopyAs<u64>> Index for Options<TC, CC, VC, WC> {
3091            type Ref = Option<TC::Ref>;
3092            #[inline(always)]
3093            fn get(&self, index: usize) -> Self::Ref {
3094                if self.indexes.get(index) {
3095                    Some(self.somes.get(self.indexes.rank(index)))
3096                } else {
3097                    None
3098                }
3099            }
3100        }
3101        impl<'a, TC, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: CopyAs<u64>> Index for &'a Options<TC, CC, VC, WC>
3102        where &'a TC: Index
3103        {
3104            type Ref = Option<<&'a TC as Index>::Ref>;
3105            #[inline(always)]
3106            fn get(&self, index: usize) -> Self::Ref {
3107                if self.indexes.get(index) {
3108                    Some((&self.somes).get(self.indexes.rank(index)))
3109                } else {
3110                    None
3111                }
3112            }
3113        }
3114        impl<TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Options<TC, CC, VC> {
3115            type IndexMut<'a> = Option<TC::IndexMut<'a>> where TC: 'a, CC: 'a, VC: 'a;
3116            #[inline(always)]
3117            fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
3118                if self.indexes.get(index) {
3119                    Some(self.somes.get_mut(self.indexes.rank(index)))
3120                } else {
3121                    None
3122                }
3123            }
3124        }
3125
3126        impl<T, TC: Push<T> + Len> Push<Option<T>> for Options<TC> {
3127            #[inline]
3128            fn push(&mut self, item: Option<T>) {
3129                match item {
3130                    Some(item) => {
3131                        self.indexes.push(true);
3132                        self.somes.push(item);
3133                    }
3134                    None => {
3135                        self.indexes.push(false);
3136                    }
3137                }
3138            }
3139        }
3140        impl<'a, T, TC: Push<&'a T> + Len> Push<&'a Option<T>> for Options<TC> {
3141            #[inline]
3142            fn push(&mut self, item: &'a Option<T>) {
3143                match item {
3144                    Some(item) => {
3145                        self.indexes.push(true);
3146                        self.somes.push(item);
3147                    }
3148                    None => {
3149                        self.indexes.push(false);
3150                    }
3151                }
3152            }
3153        }
3154
3155        impl<TC: Clear> Clear for Options<TC> {
3156            fn clear(&mut self) {
3157                self.indexes.clear();
3158                self.somes.clear();
3159            }
3160        }
3161
3162        impl<TC: HeapSize> HeapSize for Options<TC> {
3163            fn heap_size(&self) -> (usize, usize) {
3164                let (l0, c0) = self.somes.heap_size();
3165                let (li, ci) = self.indexes.heap_size();
3166                (l0 + li, c0 + ci)
3167            }
3168        }
3169
3170        #[cfg(test)]
3171        mod test {
3172
3173            use crate::Columnar;
3174            use crate::common::{Index, HeapSize, Len};
3175            use crate::Options;
3176
3177            #[test]
3178            fn round_trip_some() {
3179                // Type annotation is important to avoid some inference overflow.
3180                let store: Options<Vec<i32>> = Columnar::into_columns((0..100).map(Some));
3181                assert_eq!(store.len(), 100);
3182                assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == Some(&b)));
3183                assert_eq!(store.heap_size(), (408, 544));
3184            }
3185
3186            #[test]
3187            fn round_trip_none() {
3188                let store = Columnar::into_columns((0..100).map(|_x| None::<i32>));
3189                assert_eq!(store.len(), 100);
3190                let foo = &store;
3191                assert!(foo.index_iter().zip(0..100).all(|(a, _b)| a == None));
3192                assert_eq!(store.heap_size(), (8, 32));
3193            }
3194
3195            #[test]
3196            fn round_trip_mixed() {
3197                // Type annotation is important to avoid some inference overflow.
3198                let store: Options<Vec<i32>>  = Columnar::into_columns((0..100).map(|x| if x % 2 == 0 { Some(x) } else { None }));
3199                assert_eq!(store.len(), 100);
3200                assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == if b % 2 == 0 { Some(&b) } else { None }));
3201                assert_eq!(store.heap_size(), (208, 288));
3202            }
3203        }
3204    }
3205}
3206
3207pub use lookback::{Repeats, Lookbacks};
3208/// Containers that can store either values, or offsets to prior values.
3209///
3210/// This has the potential to be more efficient than a list of `T` when many values repeat in
3211/// close proximity. Values must be equatable, and the degree of lookback can be configured.
3212pub mod lookback {
3213
3214    use crate::{Options, Results, Push, Index, Len, HeapSize};
3215
3216    /// A container that encodes repeated values with a `None` variant, at the cost of extra bits for every record.
3217    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3218    #[derive(Clone, Debug, Default, PartialEq)]
3219    pub struct Repeats<TC, const N: u8 = 255> {
3220        /// Some(x) encodes a value, and None indicates the prior `x` value.
3221        pub inner: Options<TC>,
3222    }
3223
3224    impl<T: PartialEq, TC: Push<T> + Len, const N: u8> Push<T> for Repeats<TC, N>
3225    where
3226        for<'a> &'a TC: Index,
3227        for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
3228    {
3229        #[inline]
3230        fn push(&mut self, item: T) {
3231            // Look at the last `somes` value for a potential match.
3232            let insert: Option<T> = if (&self.inner.somes).last().map(|x| x.eq(&item)) == Some(true) {
3233                None
3234            } else {
3235                Some(item)
3236            };
3237            self.inner.push(insert);
3238        }
3239    }
3240
3241    impl<TC: Len, const N: u8> Len for Repeats<TC, N> {
3242        #[inline(always)] fn len(&self) -> usize { self.inner.len() }
3243    }
3244
3245    impl<TC: Index, const N: u8> Index for Repeats<TC, N> {
3246        type Ref = TC::Ref;
3247        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
3248            match self.inner.get(index) {
3249                Some(item) => item,
3250                None => {
3251                    let pos = self.inner.indexes.rank(index) - 1;
3252                    self.inner.somes.get(pos)
3253                },
3254            }
3255        }
3256    }
3257
3258    impl<TC: HeapSize, const N: u8> HeapSize for Repeats<TC, N> {
3259        fn heap_size(&self) -> (usize, usize) {
3260            self.inner.heap_size()
3261        }
3262    }
3263
3264    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3265    #[derive(Clone, Debug, Default, PartialEq)]
3266    pub struct Lookbacks<TC, VC = Vec<u8>, const N: u8 = 255> {
3267        /// Ok(x) encodes a value, and Err(y) indicates a value `y` back.
3268        pub inner: Results<TC, VC>,
3269    }
3270
3271    impl<T: PartialEq, TC: Push<T> + Len, VC: Push<u8>, const N: u8> Push<T> for Lookbacks<TC, VC, N>
3272    where
3273        for<'a> &'a TC: Index,
3274        for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
3275    {
3276        #[inline]
3277        fn push(&mut self, item: T) {
3278            // Look backwards through (0 .. N) to look for a matching value.
3279            let oks_len = self.inner.oks.len();
3280            let find = (0u8 .. N).take(self.inner.oks.len()).find(|i| (&self.inner.oks).get(oks_len - (*i as usize) - 1) == item);
3281            let insert: Result<T, u8> = if let Some(back) = find { Err(back) } else { Ok(item) };
3282            self.inner.push(insert);
3283        }
3284    }
3285
3286    impl<TC, VC, const N: u8> Len for Lookbacks<TC, VC, N> {
3287        #[inline(always)] fn len(&self) -> usize { self.inner.len() }
3288    }
3289
3290    impl<TC: Index, VC: Index<Ref=u8>, const N: u8> Index for Lookbacks<TC, VC, N> {
3291        type Ref = TC::Ref;
3292        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
3293            match self.inner.get(index) {
3294                Ok(item) => item,
3295                Err(back) => {
3296                    let pos = self.inner.indexes.rank(index) - 1;
3297                    self.inner.oks.get(pos - (back as usize))
3298                },
3299            }
3300        }
3301    }
3302    impl<'a, TC, const N: u8> Index for &'a Lookbacks<TC, Vec<u8>, N>
3303    where
3304        &'a TC: Index,
3305    {
3306        type Ref = <&'a TC as Index>::Ref;
3307        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
3308            match (&self.inner).get(index) {
3309                Ok(item) => item,
3310                Err(back) => {
3311                    let pos = self.inner.indexes.rank(index) - 1;
3312                    (&self.inner.oks).get(pos - (*back as usize))
3313                },
3314            }
3315        }
3316    }
3317
3318    impl<TC: HeapSize, VC: HeapSize, const N: u8> HeapSize for Lookbacks<TC, VC, N> {
3319        fn heap_size(&self) -> (usize, usize) {
3320            self.inner.heap_size()
3321        }
3322    }
3323}
3324
3325/// Roaring bitmap (and similar) containers.
3326pub mod roaring {
3327
3328    use crate::Results;
3329
3330    /// A container for `bool` that uses techniques from Roaring bitmaps.
3331    ///
3332    /// These techniques are to block the bits into blocks of 2^16 bits,
3333    /// and to encode each block based on its density. Either a bitmap
3334    /// for dense blocks or a list of set bits for sparse blocks.
3335    ///
3336    /// Additionally, other representations encode runs of set bits.
3337    pub struct RoaringBits {
3338        _inner: Results<[u64; 1024], Vec<u16>>,
3339    }
3340}
3341
3342pub use chain_mod::{chain, chain_one};
3343
3344pub mod chain_mod {
3345    //! Chain iterators, or iterators and an item. Iterators that might improve inlining, at the
3346    //! expense of not providing iterator maker traits.
3347
3348    /// Chain two iterators together. The result first iterates over `a`, then `b`, until both are
3349    /// exhausted.
3350    ///
3351    /// This addresses a quirk where deep iterators would not be optimized to their full potential.
3352    /// Here, functions are marked with `#[inline(always)]` to indicate that the compiler should
3353    /// try hard to inline the iterators.
3354    #[inline(always)]
3355    pub fn chain<A: IntoIterator, B: IntoIterator<Item=A::Item>>(a: A, b: B) -> Chain<A::IntoIter, B::IntoIter> {
3356        Chain { a: Some(a.into_iter()), b: Some(b.into_iter()) }
3357    }
3358
3359    pub struct Chain<A, B> {
3360        a: Option<A>,
3361        b: Option<B>,
3362    }
3363
3364    impl<A, B> Iterator for Chain<A, B>
3365    where
3366        A: Iterator,
3367        B: Iterator<Item=A::Item>,
3368    {
3369        type Item = A::Item;
3370
3371        #[inline(always)]
3372        fn next(&mut self) -> Option<Self::Item> {
3373            if let Some(a) = self.a.as_mut() {
3374                let x = a.next();
3375                if x.is_none() {
3376                    self.a = None;
3377                } else {
3378                    return x;
3379                }
3380            }
3381            if let Some(b) = self.b.as_mut() {
3382                let x = b.next();
3383                if x.is_none() {
3384                    self.b = None;
3385                } else {
3386                    return x;
3387                }
3388            }
3389            None
3390        }
3391
3392        #[inline]
3393        fn fold<Acc, F>(self, mut acc: Acc, mut f: F) -> Acc
3394        where
3395            F: FnMut(Acc, Self::Item) -> Acc,
3396        {
3397            if let Some(a) = self.a {
3398                acc = a.fold(acc, &mut f);
3399            }
3400            if let Some(b) = self.b {
3401                acc = b.fold(acc, f);
3402            }
3403            acc
3404        }
3405    }
3406
3407    /// Chain a single item to an iterator. The resulting iterator first iterates over `a`,
3408    /// then `b`. The resulting iterator is marked as `#[inline(always)]`, which in some situations
3409    /// causes better inlining behavior with current Rust versions.
3410    #[inline(always)]
3411    pub fn chain_one<A: IntoIterator>(a: A, b: A::Item) -> ChainOne<A::IntoIter> {
3412        ChainOne { a: Some(a.into_iter()), b: Some(b) }
3413    }
3414
3415    pub struct ChainOne<A: Iterator> {
3416        a: Option<A>,
3417        b: Option<A::Item>,
3418    }
3419
3420    impl<A: Iterator> Iterator for ChainOne<A> {
3421        type Item = A::Item;
3422
3423        #[inline(always)]
3424        fn next(&mut self) -> Option<Self::Item> {
3425            if let Some(a) = self.a.as_mut() {
3426                let x = a.next();
3427                if x.is_none() {
3428                    self.a = None;
3429                    self.b.take()
3430                } else {
3431                    x
3432                }
3433            } else {
3434                None
3435            }
3436        }
3437    }
3438
3439    #[cfg(test)]
3440    mod test {
3441        use super::*;
3442
3443        #[test]
3444        fn test_chain() {
3445            let a = [1, 2, 3];
3446            let b = [4, 5, 6];
3447            let mut chain = chain(a, b);
3448            assert_eq!(chain.next(), Some(1));
3449            assert_eq!(chain.next(), Some(2));
3450            assert_eq!(chain.next(), Some(3));
3451            assert_eq!(chain.next(), Some(4));
3452            assert_eq!(chain.next(), Some(5));
3453            assert_eq!(chain.next(), Some(6));
3454            assert_eq!(chain.next(), None);
3455        }
3456
3457        #[test]
3458        fn test_chain_one() {
3459            let a = [1, 2, 3];
3460            let b = 4;
3461            let mut chain = chain_one(a, b);
3462            assert_eq!(chain.next(), Some(1));
3463            assert_eq!(chain.next(), Some(2));
3464            assert_eq!(chain.next(), Some(3));
3465            assert_eq!(chain.next(), Some(4));
3466            assert_eq!(chain.next(), None);
3467        }
3468    }
3469}