Skip to main content

columnar/
sums.rs

1//! Containers for enumerations ("sum types") that store variants separately.
2//!
3//! The main work of these types is storing a discriminant and index efficiently,
4//! as containers for each of the variant types can hold the actual data.
5
6/// Stores for maintaining discriminants, and associated sequential indexes.
7///
8/// The sequential indexes are not explicitly maintained, but are supported
9/// by a `rank(index)` function that indicates how many of a certain variant
10/// precede the given index. While this could potentially be done with a scan
11/// of all preceding discriminants, the stores maintain running accumulations
12/// that make the operation constant time (using additional amortized memory).
13pub mod rank_select {
14
15    use alloc::{vec::Vec, string::String};
16    use crate::primitive::Bools;
17
18    use crate::{Borrow, Len, Index, IndexAs, Push, Clear};
19
20    /// A store for maintaining `Vec<bool>` with fast `rank` and `select` access.
21    ///
22    /// The design is to have `u64` running counts for each block of 1024 bits,
23    /// which are roughly the size of a cache line. This is roughly 6% overhead,
24    /// above the bits themselves, which seems pretty solid.
25    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
26    #[derive(Copy, Clone, Debug, Default, PartialEq)]
27    pub struct RankSelect<CC = Vec<u64>, VC = Vec<u64>, WC = [u64; 2]> {
28        /// Counts of the number of cumulative set (true) bits, *after* each block of 1024 bits.
29        pub counts: CC,
30        /// The bits themselves.
31        pub values: Bools<VC, WC>,
32    }
33
34    impl<CC: crate::common::BorrowIndexAs<u64>, VC: crate::common::BorrowIndexAs<u64>> RankSelect<CC, VC> {
35        #[inline(always)]
36        pub fn borrow<'a>(&'a self) -> RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a [u64]> {
37            RankSelect {
38                counts: self.counts.borrow(),
39                values: self.values.borrow(),
40            }
41        }
42        #[inline(always)]
43        pub fn reborrow<'b, 'a: 'b>(thing: RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a [u64]>) -> RankSelect<CC::Borrowed<'b>, VC::Borrowed<'b>, &'b [u64]> {
44            RankSelect {
45                counts: CC::reborrow(thing.counts),
46                values: Bools::<VC, [u64; 2]>::reborrow(thing.values),
47            }
48        }
49    }
50
51    impl<'a, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for RankSelect<CC, VC, &'a [u64]> {
52        #[inline(always)]
53        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
54            crate::chain(self.counts.as_bytes(), self.values.as_bytes())
55        }
56    }
57    impl<'a, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for RankSelect<CC, VC, &'a [u64]> {
58        const SLICE_COUNT: usize = CC::SLICE_COUNT + <crate::primitive::Bools<VC, &'a [u64]>>::SLICE_COUNT;
59        #[inline(always)]
60        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
61            Self {
62                counts: crate::FromBytes::from_bytes(bytes),
63                values: crate::FromBytes::from_bytes(bytes),
64            }
65        }
66        #[inline(always)]
67        fn from_store(store: &crate::bytes::indexed::DecodedStore<'a>, offset: &mut usize) -> Self {
68            Self {
69                counts: CC::from_store(store, offset),
70                values: <crate::primitive::Bools<VC, &'a [u64]>>::from_store(store, offset),
71            }
72        }
73        fn element_sizes(sizes: &mut Vec<usize>) -> Result<(), String> {
74            CC::element_sizes(sizes)?;
75            <crate::primitive::Bools<VC, &'a [u64]>>::element_sizes(sizes)?;
76            Ok(())
77        }
78    }
79
80
81    impl<CC, VC: Len + IndexAs<u64>, WC: IndexAs<u64>> RankSelect<CC, VC, WC> {
82        #[inline(always)]
83        pub fn get(&self, index: usize) -> bool {
84            Index::get(&self.values, index)
85        }
86    }
87    impl<CC: Len + IndexAs<u64>, VC: Len + IndexAs<u64>, WC: IndexAs<u64>> RankSelect<CC, VC, WC> {
88        /// The number of set bits *strictly* preceding `index`.
89        ///
90        /// This number is accumulated first by reading out of `self.counts` at the correct position,
91        /// then by summing the ones in strictly prior `u64` entries, then by counting the ones in the
92        /// masked `u64` in which the bit lives.
93        pub fn rank(&self, index: usize) -> usize {
94            let bit = index % 64;
95            let block = index / 64;
96            let chunk = block / 16;
97            let mut count = if chunk > 0 { self.counts.index_as(chunk - 1) as usize } else { 0 };
98            for pos in (16 * chunk) .. block {
99                count += self.values.values.index_as(pos).count_ones() as usize;
100            }
101            // TODO: Panic if out of bounds?
102            let intra_word = if block == self.values.values.len() { self.values.tail.index_as(0) } else { self.values.values.index_as(block) };
103            count += (intra_word & ((1 << bit) - 1)).count_ones() as usize;
104            count
105        }
106        /// The index of the `rank`th set bit, should one exist.
107        pub fn select(&self, rank: u64) -> Option<usize> {
108            let mut chunk = 0;
109            // Step one is to find the position in `counts` where we go from `rank` to `rank + 1`.
110            // The position we are looking for is within that chunk of bits.
111            // TODO: Binary search is likely better at many scales. Rust's binary search is .. not helpful with ties.
112            while chunk < self.counts.len() && self.counts.index_as(chunk) <= rank {
113                chunk += 1;
114            }
115            let mut count = if chunk < self.counts.len() { self.counts.index_as(chunk) } else { 0 };
116            // Step two is to find the position within that chunk where the `rank`th bit is.
117            let mut block = 16 * chunk;
118            while block < self.values.values.len() && count + (self.values.values.index_as(block).count_ones() as u64) <= rank {
119                count += self.values.values.index_as(block).count_ones() as u64;
120                block += 1;
121            }
122            // Step three is to search the last word for the location, or return `None` if we run out of bits.
123            let last_bits = if block == self.values.values.len() { self.values.tail.index_as(1) as usize } else { 64 };
124            let last_word = if block == self.values.values.len() { self.values.tail.index_as(0) } else { self.values.values.index_as(block) };
125            for shift in 0 .. last_bits {
126                if ((last_word >> shift) & 0x01 == 0x01) && count + 1 == rank {
127                    return Some(64 * block + shift);
128                }
129                count += (last_word >> shift) & 0x01;
130            }
131
132            None
133        }
134    }
135
136    impl<CC, VC: Len, WC: IndexAs<u64>> RankSelect<CC, VC, WC> {
137        pub fn len(&self) -> usize {
138            self.values.len()
139        }
140    }
141
142    // This implementation probably only works for `Vec<u64>` and `Vec<u64>`, but we could fix that.
143    // Partly, it's hard to name the `Index` flavor that allows one to get back a `u64`.
144    impl<CC: for<'a> Push<&'a u64> + Len + IndexAs<u64>, VC: for<'a> Push<&'a u64> + Len + IndexAs<u64>> RankSelect<CC, VC> {
145        #[inline]
146        pub fn push(&mut self, bit: bool) {
147            self.values.push(&bit);
148            while self.counts.len() < self.values.len() / 1024 {
149                let mut count = self.counts.last().unwrap_or(0);
150                let lower = 16 * self.counts.len();
151                let upper = lower + 16;
152                for i in lower .. upper {
153                    count += self.values.values.index_as(i).count_ones() as u64;
154                }
155                self.counts.push(&count);
156            }
157        }
158    }
159    impl<CC: Clear, VC: Clear> Clear for RankSelect<CC, VC> {
160        fn clear(&mut self) {
161            self.counts.clear();
162            self.values.clear();
163        }
164    }
165}
166
167pub mod result {
168
169    use alloc::{vec::Vec, string::String};
170
171    use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, Borrow};
172    use crate::RankSelect;
173
174    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
175    #[derive(Copy, Clone, Debug, Default, PartialEq)]
176    pub struct Results<SC, TC, CC=Vec<u64>, VC=Vec<u64>, WC=[u64; 2]> {
177        /// Bits set to `true` correspond to `Ok` variants.
178        pub indexes: RankSelect<CC, VC, WC>,
179        pub oks: SC,
180        pub errs: TC,
181    }
182
183    impl<S: Columnar, T: Columnar> Columnar for Result<S, T> {
184        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
185            match (&mut *self, other) {
186                (Ok(x), Ok(y)) => x.copy_from(y),
187                (Err(x), Err(y)) => x.copy_from(y),
188                (_, other) => { *self = Self::into_owned(other); },
189            }
190        }
191        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
192            match other {
193                Ok(y) => Ok(S::into_owned(y)),
194                Err(y) => Err(T::into_owned(y)),
195            }
196        }
197        type Container = Results<S::Container, T::Container>;
198    }
199
200    impl<SC: Borrow, TC: Borrow> Borrow for Results<SC, TC> {
201        type Ref<'a> = Result<SC::Ref<'a>, TC::Ref<'a>> where SC: 'a, TC: 'a;
202        type Borrowed<'a> = Results<SC::Borrowed<'a>, TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a [u64]> where SC: 'a, TC: 'a;
203        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
204            Results {
205                indexes: self.indexes.borrow(),
206                oks: self.oks.borrow(),
207                errs: self.errs.borrow(),
208            }
209        }
210        #[inline(always)]
211        fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where SC: 'a, TC: 'a {
212            Results {
213                indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
214                oks: SC::reborrow(thing.oks),
215                errs: TC::reborrow(thing.errs),
216            }
217        }
218        #[inline(always)]
219        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
220            match thing {
221                Ok(y) => Ok(SC::reborrow_ref(y)),
222                Err(y) => Err(TC::reborrow_ref(y)),
223            }
224        }
225    }
226
227    impl<SC: Container, TC: Container> Container for Results<SC, TC> {
228        #[inline(always)]
229        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: core::ops::Range<usize>) {
230            if !range.is_empty() {
231                // Starting offsets of each variant in `other`.
232                let oks_start = other.indexes.rank(range.start);
233                let errs_start = range.start - oks_start;
234
235                // Count the number of `Ok` and `Err` variants as we push, to determine the range.
236                // TODO: This could probably be `popcnt` somehow.
237                let mut oks = 0;
238                for index in range.clone() {
239                    let bit = other.indexes.get(index);
240                    self.indexes.push(bit);
241                    if bit { oks += 1; }
242                }
243                let errs = range.len() - oks;
244
245                self.oks.extend_from_self(other.oks, oks_start .. oks_start + oks);
246                self.errs.extend_from_self(other.errs, errs_start .. errs_start + errs);
247            }
248        }
249
250        fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
251            // TODO: reserve room in `self.indexes`.
252            self.oks.reserve_for(selves.clone().map(|x| x.oks));
253            self.errs.reserve_for(selves.map(|x| x.errs));
254        }
255    }
256
257    impl<'a, SC: crate::AsBytes<'a>, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Results<SC, TC, CC, VC, &'a [u64]> {
258        #[inline(always)]
259        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
260            let iter = self.indexes.as_bytes();
261            let iter = crate::chain(iter, self.oks.as_bytes());
262            crate::chain(iter, self.errs.as_bytes())
263        }
264    }
265    impl<'a, SC: crate::FromBytes<'a>, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Results<SC, TC, CC, VC, &'a [u64]> {
266        const SLICE_COUNT: usize = <RankSelect<CC, VC, &'a [u64]>>::SLICE_COUNT + SC::SLICE_COUNT + TC::SLICE_COUNT;
267        #[inline(always)]
268        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
269            Self {
270                indexes: crate::FromBytes::from_bytes(bytes),
271                oks: crate::FromBytes::from_bytes(bytes),
272                errs: crate::FromBytes::from_bytes(bytes),
273            }
274        }
275        #[inline(always)]
276        fn from_store(store: &crate::bytes::indexed::DecodedStore<'a>, offset: &mut usize) -> Self {
277            Self {
278                indexes: crate::FromBytes::from_store(store, offset),
279                oks: SC::from_store(store, offset),
280                errs: TC::from_store(store, offset),
281            }
282        }
283        fn element_sizes(sizes: &mut Vec<usize>) -> Result<(), String> {
284            <RankSelect<CC, VC, &'a [u64]>>::element_sizes(sizes)?;
285            SC::element_sizes(sizes)?;
286            TC::element_sizes(sizes)?;
287            Ok(())
288        }
289    }
290
291    impl<SC, TC, CC, VC: Len, WC: IndexAs<u64>> Len for Results<SC, TC, CC, VC, WC> {
292        #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
293    }
294
295    impl<SC, TC, CC, VC, WC> Index for Results<SC, TC, CC, VC, WC>
296    where
297        SC: Index,
298        TC: Index,
299        CC: IndexAs<u64> + Len,
300        VC: IndexAs<u64> + Len,
301        WC: IndexAs<u64>,
302    {
303        type Ref = Result<SC::Ref, TC::Ref>;
304        #[inline(always)]
305        fn get(&self, index: usize) -> Self::Ref {
306            if self.indexes.get(index) {
307                Ok(self.oks.get(self.indexes.rank(index)))
308            } else {
309                Err(self.errs.get(index - self.indexes.rank(index)))
310            }
311        }
312    }
313    impl<'a, SC, TC, CC, VC, WC> Index for &'a Results<SC, TC, CC, VC, WC>
314    where
315        &'a SC: Index,
316        &'a TC: Index,
317        CC: IndexAs<u64> + Len,
318        VC: IndexAs<u64> + Len,
319        WC: IndexAs<u64>,
320    {
321        type Ref = Result<<&'a SC as Index>::Ref, <&'a TC as Index>::Ref>;
322        #[inline(always)]
323        fn get(&self, index: usize) -> Self::Ref {
324            if self.indexes.get(index) {
325                Ok((&self.oks).get(self.indexes.rank(index)))
326            } else {
327                Err((&self.errs).get(index - self.indexes.rank(index)))
328            }
329        }
330    }
331
332    // NB: You are not allowed to change the variant, but can change its contents.
333    impl<SC: IndexMut, TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Results<SC, TC, CC, VC> {
334        type IndexMut<'a> = Result<SC::IndexMut<'a>, TC::IndexMut<'a>> where SC: 'a, TC: 'a, CC: 'a, VC: 'a;
335        #[inline(always)]
336        fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
337            if self.indexes.get(index) {
338                Ok(self.oks.get_mut(self.indexes.rank(index)))
339            } else {
340                Err(self.errs.get_mut(index - self.indexes.rank(index)))
341            }
342        }
343    }
344
345    impl<S, SC: Push<S>, T, TC: Push<T>> Push<Result<S, T>> for Results<SC, TC> {
346        #[inline]
347        fn push(&mut self, item: Result<S, T>) {
348            match item {
349                Ok(item) => {
350                    self.indexes.push(true);
351                    self.oks.push(item);
352                }
353                Err(item) => {
354                    self.indexes.push(false);
355                    self.errs.push(item);
356                }
357            }
358        }
359    }
360    impl<'a, S, SC: Push<&'a S>, T, TC: Push<&'a T>> Push<&'a Result<S, T>> for Results<SC, TC> {
361        #[inline]
362        fn push(&mut self, item: &'a Result<S, T>) {
363            match item {
364                Ok(item) => {
365                    self.indexes.push(true);
366                    self.oks.push(item);
367                }
368                Err(item) => {
369                    self.indexes.push(false);
370                    self.errs.push(item);
371                }
372            }
373        }
374    }
375
376    impl<SC: Clear, TC: Clear> Clear for Results<SC, TC> {
377        fn clear(&mut self) {
378            self.indexes.clear();
379            self.oks.clear();
380            self.errs.clear();
381        }
382    }
383
384    impl<SC, TC, CC, VC, WC> Results<SC, TC, CC, VC, WC> {
385        /// Returns ok values if no errors exist.
386        pub fn unwrap(self) -> SC where TC: Len {
387            assert!(self.errs.is_empty());
388            self.oks
389        }
390        /// Returns error values if no oks exist.
391        pub fn unwrap_err(self) -> TC where SC: Len {
392            assert!(self.oks.is_empty());
393            self.errs
394        }
395        /// Returns ok values if no errors exist, or `None`.
396        pub fn try_unwrap(self) -> Option<SC> where TC: Len {
397            if self.errs.is_empty() { Some(self.oks) } else { None }
398        }
399        /// Returns error values if no oks exist, or `None`.
400        pub fn try_unwrap_err(self) -> Option<TC> where SC: Len {
401            if self.oks.is_empty() { Some(self.errs) } else { None }
402        }
403    }
404    #[cfg(test)]
405    mod test {
406        #[test]
407        fn round_trip() {
408
409            use crate::common::{Index, Push, Len};
410
411            let mut column: crate::ContainerOf<Result<u64, u64>> = Default::default();
412            for i in 0..100 {
413                column.push(Ok::<u64, u64>(i));
414                column.push(Err::<u64, u64>(i));
415            }
416
417            assert_eq!(column.len(), 200);
418
419            for i in 0..100 {
420                assert_eq!(column.get(2*i+0), Ok(i as u64));
421                assert_eq!(column.get(2*i+1), Err(i as u64));
422            }
423
424            let mut column: crate::ContainerOf<Result<u64, u8>> = Default::default();
425            for i in 0..100 {
426                column.push(Ok::<u64, u8>(i as u64));
427                column.push(Err::<u64, u8>(i as u8));
428            }
429
430            assert_eq!(column.len(), 200);
431
432            for i in 0..100 {
433                assert_eq!(column.get(2*i+0), Ok(i as u64));
434                assert_eq!(column.get(2*i+1), Err(i as u8));
435            }
436        }
437    }
438}
439
440pub mod option {
441
442    use alloc::{vec::Vec, string::String};
443
444    use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, Borrow};
445    use crate::RankSelect;
446
447#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
448    #[derive(Copy, Clone, Debug, Default, PartialEq)]
449    pub struct Options<TC, CC=Vec<u64>, VC=Vec<u64>, WC=[u64; 2]> {
450        /// Uses two bits for each item, one to indicate the variant and one (amortized)
451        /// to enable efficient rank determination.
452        pub indexes: RankSelect<CC, VC, WC>,
453        pub somes: TC,
454    }
455
456    impl<T: Columnar> Columnar for Option<T> {
457        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
458            match (&mut *self, other) {
459                (Some(x), Some(y)) => { x.copy_from(y); }
460                (_, other) => { *self = Self::into_owned(other); }
461            }
462        }
463        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
464            other.map(|x| T::into_owned(x))
465        }
466        type Container = Options<T::Container>;
467    }
468
469    impl<TC: Borrow> Borrow for Options<TC> {
470        type Ref<'a> = Option<TC::Ref<'a>> where TC: 'a;
471        type Borrowed<'a> = Options<TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a [u64]> where TC: 'a;
472        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
473            Options {
474                indexes: self.indexes.borrow(),
475                somes: self.somes.borrow(),
476            }
477        }
478        #[inline(always)]
479        fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where TC: 'a {
480            Options {
481                indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
482                somes: TC::reborrow(thing.somes),
483            }
484        }
485        #[inline(always)]
486        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
487            thing.map(TC::reborrow_ref)
488        }
489    }
490
491    impl<TC: Container> Container for Options<TC> {
492        #[inline(always)]
493        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: core::ops::Range<usize>) {
494            if !range.is_empty() {
495                // Starting offsets of `Some` variants in `other`.
496                let somes_start = other.indexes.rank(range.start);
497
498                // Count the number of `Some` variants as we push, to determine the range.
499                // TODO: This could probably be `popcnt` somehow.
500                let mut somes = 0;
501                for index in range {
502                    let bit = other.indexes.get(index);
503                    self.indexes.push(bit);
504                    if bit { somes += 1; }
505                }
506
507                self.somes.extend_from_self(other.somes, somes_start .. somes_start + somes);
508            }
509        }
510
511        fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
512            // TODO: reserve room in `self.indexes`.
513            self.somes.reserve_for(selves.map(|x| x.somes));
514        }
515    }
516
517    impl<'a, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Options<TC, CC, VC, &'a [u64]> {
518        #[inline(always)]
519        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
520            crate::chain(self.indexes.as_bytes(), self.somes.as_bytes())
521        }
522    }
523
524    impl <'a, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Options<TC, CC, VC, &'a [u64]> {
525        const SLICE_COUNT: usize = <RankSelect<CC, VC, &'a [u64]>>::SLICE_COUNT + TC::SLICE_COUNT;
526        #[inline(always)]
527        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
528            Self {
529                indexes: crate::FromBytes::from_bytes(bytes),
530                somes: crate::FromBytes::from_bytes(bytes),
531            }
532        }
533        #[inline(always)]
534        fn from_store(store: &crate::bytes::indexed::DecodedStore<'a>, offset: &mut usize) -> Self {
535            Self {
536                indexes: crate::FromBytes::from_store(store, offset),
537                somes: TC::from_store(store, offset),
538            }
539        }
540        fn element_sizes(sizes: &mut Vec<usize>) -> Result<(), String> {
541            <RankSelect<CC, VC, &'a [u64]>>::element_sizes(sizes)?;
542            TC::element_sizes(sizes)?;
543            Ok(())
544        }
545    }
546
547    impl<T, CC, VC: Len, WC: IndexAs<u64>> Len for Options<T, CC, VC, WC> {
548        #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
549    }
550
551    impl<TC: Index, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: IndexAs<u64>> Index for Options<TC, CC, VC, WC> {
552        type Ref = Option<TC::Ref>;
553        #[inline(always)]
554        fn get(&self, index: usize) -> Self::Ref {
555            if self.indexes.get(index) {
556                Some(self.somes.get(self.indexes.rank(index)))
557            } else {
558                None
559            }
560        }
561    }
562    impl<'a, TC, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: IndexAs<u64>> Index for &'a Options<TC, CC, VC, WC>
563    where &'a TC: Index
564    {
565        type Ref = Option<<&'a TC as Index>::Ref>;
566        #[inline(always)]
567        fn get(&self, index: usize) -> Self::Ref {
568            if self.indexes.get(index) {
569                Some((&self.somes).get(self.indexes.rank(index)))
570            } else {
571                None
572            }
573        }
574    }
575    impl<TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Options<TC, CC, VC> {
576        type IndexMut<'a> = Option<TC::IndexMut<'a>> where TC: 'a, CC: 'a, VC: 'a;
577        #[inline(always)]
578        fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
579            if self.indexes.get(index) {
580                Some(self.somes.get_mut(self.indexes.rank(index)))
581            } else {
582                None
583            }
584        }
585    }
586
587    impl<T, TC: Push<T> + Len> Push<Option<T>> for Options<TC> {
588        #[inline]
589        fn push(&mut self, item: Option<T>) {
590            match item {
591                Some(item) => {
592                    self.indexes.push(true);
593                    self.somes.push(item);
594                }
595                None => {
596                    self.indexes.push(false);
597                }
598            }
599        }
600    }
601    impl<'a, T, TC: Push<&'a T> + Len> Push<&'a Option<T>> for Options<TC> {
602        #[inline]
603        fn push(&mut self, item: &'a Option<T>) {
604            match item {
605                Some(item) => {
606                    self.indexes.push(true);
607                    self.somes.push(item);
608                }
609                None => {
610                    self.indexes.push(false);
611                }
612            }
613        }
614    }
615
616    impl<TC, CC, VC, WC> Options<TC, CC, VC, WC> {
617        /// Returns the inner container if all elements are `Some`, or `None`.
618        pub fn try_unwrap(self) -> Option<TC> where TC: Len, VC: Len, WC: IndexAs<u64> {
619            if self.somes.len() == self.indexes.len() { Some(self.somes) } else { None }
620        }
621        /// True if all elements are `None`.
622        pub fn is_all_none(&self) -> bool where TC: Len {
623            self.somes.is_empty()
624        }
625    }
626
627    impl<TC: Clear> Clear for Options<TC> {
628        fn clear(&mut self) {
629            self.indexes.clear();
630            self.somes.clear();
631        }
632    }
633
634    #[cfg(test)]
635    mod test {
636        use alloc::vec::Vec;
637
638        use crate::Columnar;
639        use crate::common::{Index, Len};
640        use crate::Options;
641
642        #[test]
643        fn round_trip_some() {
644            // Type annotation is important to avoid some inference overflow.
645            let store: Options<Vec<i32>> = Columnar::into_columns((0..100).map(Some));
646            assert_eq!(store.len(), 100);
647            assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == Some(&b)));
648        }
649
650        #[test]
651        fn round_trip_none() {
652            let store = Columnar::into_columns((0..100).map(|_x| None::<i32>));
653            assert_eq!(store.len(), 100);
654            let foo = &store;
655            assert!(foo.index_iter().zip(0..100).all(|(a, _b)| a == None));
656        }
657
658        #[test]
659        fn round_trip_mixed() {
660            // Type annotation is important to avoid some inference overflow.
661            let store: Options<Vec<i32>>  = Columnar::into_columns((0..100).map(|x| if x % 2 == 0 { Some(x) } else { None }));
662            assert_eq!(store.len(), 100);
663            assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == if b % 2 == 0 { Some(&b) } else { None }));
664        }
665    }
666}
667
668pub mod discriminant {
669
670    use alloc::{vec::Vec, string::String};
671    use crate::{Clear, Container, Len, Index, IndexAs, Borrow};
672
673    /// Tracks variant discriminants and offsets for enum containers.
674    ///
675    /// Uses two arrays (`variant` and `offset`) with three states:
676    /// - **Empty**: both arrays empty, length is 0.
677    /// - **Homogeneous**: `variant` is empty, `offset` holds `[tag, count]` where
678    ///   `tag = variant_index + 1`. All elements share a single variant with
679    ///   identity offsets (element `i` maps to offset `i`).
680    /// - **Heterogeneous**: `variant` has per-element discriminants (`u8`),
681    ///   `offset` has per-element offsets into variant containers (`u64`).
682    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
683    #[derive(Clone, Debug, Default, PartialEq)]
684    pub struct Discriminant<CVar = Vec<u8>, COff = Vec<u64>> {
685        /// Per-element variant discriminants; empty when homogeneous.
686        pub variant: CVar,
687        /// Per-element offsets (heterogeneous), or `[tag, count]` (homogeneous), or empty.
688        pub offset: COff,
689    }
690
691    impl<CVar: Copy, COff: Copy> Copy for Discriminant<CVar, COff> {}
692
693    impl Discriminant {
694        /// Push a variant discriminant and the offset into its variant container.
695        #[inline]
696        pub fn push(&mut self, variant: u8, offset: u64) {
697            let tag = variant as u64 + 1;
698            if self.variant.is_empty() {
699                if self.offset.is_empty() {
700                    // Empty → start homogeneous: offset = [tag, 1].
701                    self.offset.push(tag);
702                    self.offset.push(1);
703                } else if self.offset[0] == tag {
704                    // Same variant; stay homogeneous, increment count.
705                    self.offset[1] += 1;
706                } else {
707                    // Different variant; transition to heterogeneous.
708                    let prev = (self.offset[0] - 1) as u8;
709                    let count = self.offset[1];
710                    self.variant.reserve(count as usize + 1);
711                    self.offset.clear();
712                    self.offset.reserve(count as usize + 1);
713                    for i in 0..count {
714                        self.variant.push(prev);
715                        self.offset.push(i);
716                    }
717                    self.variant.push(variant);
718                    self.offset.push(offset);
719                }
720            } else {
721                // Already heterogeneous.
722                self.variant.push(variant);
723                self.offset.push(offset);
724            }
725        }
726
727        /// Pre-allocate for the given borrowed discriminants.
728        pub fn reserve_for<'a>(&mut self, selves: impl Iterator<Item = Discriminant<&'a [u8], &'a [u64]>> + Clone) {
729            self.variant.reserve_for(selves.clone().map(|x| x.variant));
730            self.offset.reserve_for(selves.map(|x| x.offset));
731        }
732    }
733
734    impl<CVar: Len, COff: Len> Discriminant<CVar, COff> {
735        /// True if elements have mixed variants, with per-element discriminants and offsets.
736        #[inline]
737        pub fn is_heterogeneous(&self) -> bool {
738            !self.variant.is_empty()
739        }
740        /// Returns `Some(variant)` if all elements share a single variant.
741        #[inline]
742        pub fn homogeneous(&self) -> Option<u8> where COff: IndexAs<u64> {
743            if self.variant.is_empty() && self.offset.len() >= 2 {
744                Some((self.offset.index_as(0) - 1) as u8)
745            } else {
746                None
747            }
748        }
749        /// Returns `(variant, offset)` for the element at `index`.
750        #[inline(always)]
751        pub fn get(&self, index: usize) -> (u8, u64) where CVar: IndexAs<u8>, COff: IndexAs<u64> {
752            if self.is_heterogeneous() {
753                (self.variant.index_as(index), self.offset.index_as(index))
754            } else {
755                let tag: u64 = self.offset.index_as(0);
756                ((tag - 1) as u8, index as u64)
757            }
758        }
759    }
760
761    impl<CVar: Len, COff: Len + IndexAs<u64>> Len for Discriminant<CVar, COff> {
762        #[inline(always)]
763        fn len(&self) -> usize {
764            if self.is_heterogeneous() { self.variant.len() }
765            else if self.offset.len() >= 2 { self.offset.index_as(1) as usize }
766            else { 0 }
767        }
768    }
769
770    // Index for the borrowed form: returns (variant, offset).
771    impl<'a> Index for Discriminant<&'a [u8], &'a [u64]> {
772        type Ref = (u8, u64);
773        #[inline(always)]
774        fn get(&self, index: usize) -> (u8, u64) {
775            if self.is_heterogeneous() {
776                (self.variant.index_as(index), self.offset.index_as(index))
777            } else {
778                ((self.offset[0] - 1) as u8, index as u64)
779            }
780        }
781    }
782
783    // Borrow
784    impl Borrow for Discriminant {
785        type Ref<'a> = (u8, u64);
786        type Borrowed<'a> = Discriminant<&'a [u8], &'a [u64]>;
787        #[inline(always)]
788        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
789            Discriminant {
790                variant: &self.variant[..],
791                offset: &self.offset[..],
792            }
793        }
794        #[inline(always)]
795        fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> {
796            Discriminant {
797                variant: thing.variant,
798                offset: thing.offset,
799            }
800        }
801        #[inline(always)]
802        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> { thing }
803    }
804
805    impl<CVar: Clear, COff: Clear> Clear for Discriminant<CVar, COff> {
806        #[inline(always)]
807        fn clear(&mut self) {
808            self.variant.clear();
809            self.offset.clear();
810        }
811    }
812
813
814    // AsBytes for borrowed form
815    impl<'a> crate::AsBytes<'a> for Discriminant<&'a [u8], &'a [u64]> {
816        #[inline(always)]
817        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
818            let variant = self.variant.as_bytes();
819            let offset = self.offset.as_bytes();
820            crate::chain(variant, offset)
821        }
822    }
823
824    // FromBytes for borrowed form
825    impl<'a> crate::FromBytes<'a> for Discriminant<&'a [u8], &'a [u64]> {
826        const SLICE_COUNT: usize = <&'a [u8]>::SLICE_COUNT + <&'a [u64]>::SLICE_COUNT;
827        #[inline(always)]
828        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
829            let variant = crate::FromBytes::from_bytes(bytes);
830            let offset = crate::FromBytes::from_bytes(bytes);
831            Self { variant, offset }
832        }
833        #[inline(always)]
834        fn from_store(store: &crate::bytes::indexed::DecodedStore<'a>, offset: &mut usize) -> Self {
835            let variant = crate::FromBytes::from_store(store, offset);
836            let offset_field = crate::FromBytes::from_store(store, offset);
837            Self { variant, offset: offset_field }
838        }
839        fn element_sizes(sizes: &mut Vec<usize>) -> Result<(), String> {
840            <&[u8]>::element_sizes(sizes)?;
841            <&[u64]>::element_sizes(sizes)?;
842            Ok(())
843        }
844    }
845
846    #[cfg(test)]
847    mod test {
848        use crate::Len;
849
850        #[test]
851        fn homogeneous_push() {
852            let mut d = super::Discriminant::default();
853            d.push(2, 0);
854            d.push(2, 1);
855            d.push(2, 2);
856            assert_eq!(d.len(), 3);
857            assert_eq!(d.homogeneous(), Some(2));
858            assert!(d.variant.is_empty());
859            // offset holds [tag, count] = [3, 3] in homogeneous mode.
860            assert_eq!(d.offset, vec![3, 3]);
861        }
862
863        #[test]
864        fn heterogeneous_transition() {
865            let mut d = super::Discriminant::default();
866            d.push(0, 0);
867            d.push(0, 1);
868            d.push(1, 0); // transition
869            assert_eq!(d.len(), 3);
870            assert_eq!(d.homogeneous(), None);
871            assert_eq!(d.variant, vec![0, 0, 1]);
872            assert_eq!(d.offset, vec![0, 1, 0]);
873        }
874
875        #[test]
876        fn clear_resets() {
877            use crate::Clear;
878            let mut d = super::Discriminant::default();
879            d.push(1, 0);
880            d.push(1, 1);
881            d.clear();
882            assert_eq!(d.len(), 0);
883            // After clear, first push starts homogeneous again.
884            d.push(3, 0);
885            assert_eq!(d.homogeneous(), Some(3));
886            assert_eq!(d.len(), 1);
887        }
888
889        #[test]
890        fn borrow_index() {
891            use crate::Borrow;
892            let mut d = super::Discriminant::default();
893            d.push(2, 0);
894            d.push(2, 1);
895            d.push(2, 2);
896            let b = d.borrow();
897            assert_eq!(b.get(0), (2, 0));
898            assert_eq!(b.get(1), (2, 1));
899            assert_eq!(b.get(2), (2, 2));
900        }
901
902        #[test]
903        fn borrow_index_heterogeneous() {
904            use crate::Borrow;
905            let mut d = super::Discriminant::default();
906            d.push(0, 0);
907            d.push(1, 0);
908            d.push(0, 1);
909            let b = d.borrow();
910            assert_eq!(b.get(0), (0, 0));
911            assert_eq!(b.get(1), (1, 0));
912            assert_eq!(b.get(2), (0, 1));
913        }
914    }
915}