Skip to main content

columnar/adts/
tree.rs

1//! Columnar representations of tree-structured data.
2//!
3//! A `Tree<T>` is a node with a value of type `T` and a list of children.
4//! `Trees<TC>` stores a collection of trees with columnar storage for node values.
5use alloc::{vec::Vec, string::String};
6
7use crate::{Borrow, Index, IndexAs, Len, Clear, Push};
8
9/// A tree node with a value and children.
10#[derive(Clone)]
11pub struct Tree<T> {
12    pub data: T,
13    pub kids: Vec<Tree<T>>,
14}
15
16impl Tree<usize> {
17    pub fn sum(&self) -> usize {
18        self.data + self.kids.iter().map(|x| x.sum()).sum::<usize>()
19    }
20}
21
22/// A stand-in for `Vec<Tree<T>>`, with columnar storage for node values.
23///
24/// Nodes are stored in BFS order. `groups` tracks tree boundaries (cumulative
25/// node counts with a leading 0). `bounds` tracks child ranges per node
26/// (also with a leading 0 sentinel).
27#[derive(Copy, Clone)]
28pub struct Trees<TC, BC = Vec<u64>> {
29    /// Cumulative node counts: tree `i` starts at node `groups[i]`.
30    pub groups: BC,
31    /// Child delimiters: node `j`'s children are at `bounds[j]..bounds[j+1]`
32    /// (root nodes use `bounds[j]+1..bounds[j+1]` to skip themselves).
33    pub bounds: BC,
34    /// Columnar container for node values in BFS order.
35    pub values: TC,
36}
37
38impl<TC: Default> Default for Trees<TC> {
39    fn default() -> Self {
40        Self {
41            groups: vec![0u64],
42            bounds: vec![0u64],
43            values: TC::default(),
44        }
45    }
46}
47
48/// A reference to a single node within a `Trees` container.
49///
50/// Holds a copy of the borrowed values and bounds containers,
51/// plus the node's index and child range. Navigation to children
52/// constructs new `TreesRef` values.
53pub struct TreesRef<V, B> {
54    index: usize,
55    lower: usize,
56    upper: usize,
57    values: V,
58    bounds: B,
59}
60
61impl<V: Copy, B: Copy> Clone for TreesRef<V, B> {
62    fn clone(&self) -> Self { *self }
63}
64impl<V: Copy, B: Copy> Copy for TreesRef<V, B> {}
65
66impl<V: Index, B: IndexAs<u64>> TreesRef<V, B> {
67    /// The value at this node.
68    #[inline(always)]
69    pub fn value(&self) -> V::Ref {
70        self.values.get(self.index)
71    }
72    /// The number of children of this node.
73    #[inline(always)]
74    pub fn kids(&self) -> usize {
75        self.upper - self.lower
76    }
77}
78impl<V: Index + Copy, B: IndexAs<u64> + Copy> TreesRef<V, B> {
79    /// A reference to the `index`-th child of this node.
80    #[inline(always)]
81    pub fn child(&self, index: usize) -> Self {
82        assert!(index < self.upper - self.lower);
83        let child = self.lower + index;
84        TreesRef {
85            index: child,
86            lower: self.bounds.index_as(child) as usize,
87            upper: self.bounds.index_as(child + 1) as usize,
88            values: self.values,
89            bounds: self.bounds,
90        }
91    }
92}
93
94impl<TC, BC: Len> Len for Trees<TC, BC> {
95    #[inline(always)]
96    fn len(&self) -> usize { self.groups.len() - 1 }
97}
98
99impl<TC: Index + Copy, BC: IndexAs<u64> + Len + Copy> Index for Trees<TC, BC> {
100    type Ref = TreesRef<TC, BC>;
101    #[inline(always)]
102    fn get(&self, index: usize) -> Self::Ref {
103        let root = self.groups.index_as(index) as usize;
104        TreesRef {
105            index: root,
106            lower: self.bounds.index_as(root) as usize + 1,
107            upper: self.bounds.index_as(root + 1) as usize,
108            values: self.values,
109            bounds: self.bounds,
110        }
111    }
112}
113
114impl<'a, TC, BC: IndexAs<u64> + Len> Index for &'a Trees<TC, BC>
115where
116    &'a TC: Index,
117    &'a BC: IndexAs<u64>,
118{
119    type Ref = TreesRef<&'a TC, &'a BC>;
120    #[inline(always)]
121    fn get(&self, index: usize) -> Self::Ref {
122        let root = self.groups.index_as(index) as usize;
123        TreesRef {
124            index: root,
125            lower: self.bounds.index_as(root) as usize + 1,
126            upper: self.bounds.index_as(root + 1) as usize,
127            values: &self.values,
128            bounds: &self.bounds,
129        }
130    }
131}
132
133impl<TC: Borrow> Borrow for Trees<TC> {
134    type Ref<'a> = TreesRef<TC::Borrowed<'a>, &'a [u64]> where TC: 'a;
135    type Borrowed<'a> = Trees<TC::Borrowed<'a>, &'a [u64]> where TC: 'a;
136    #[inline(always)]
137    fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
138        Trees {
139            groups: &self.groups[..],
140            bounds: &self.bounds[..],
141            values: self.values.borrow(),
142        }
143    }
144    #[inline(always)]
145    fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where TC: 'a {
146        Trees {
147            groups: thing.groups,
148            bounds: thing.bounds,
149            values: TC::reborrow(thing.values),
150        }
151    }
152    #[inline(always)]
153    fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
154        TreesRef {
155            index: thing.index,
156            lower: thing.lower,
157            upper: thing.upper,
158            values: TC::reborrow(thing.values),
159            bounds: thing.bounds,
160        }
161    }
162}
163
164impl<TC: Clear> Clear for Trees<TC> {
165    fn clear(&mut self) {
166        self.groups.clear();
167        self.groups.push(0u64);
168        self.bounds.clear();
169        self.bounds.push(0u64);
170        self.values.clear();
171    }
172}
173
174impl<TC: Len> Trees<TC> {
175    /// Pushes a tree into the container, storing nodes in BFS order.
176    pub fn push_tree<T>(&mut self, tree: Tree<T>) where TC: for<'a> Push<&'a T> {
177        let mut todo = alloc::collections::VecDeque::default();
178        todo.push_back(tree);
179        while let Some(node) = todo.pop_front() {
180            let cursor = self.values.len() + todo.len() + 1;
181            self.values.push(&node.data);
182            self.bounds.push((cursor + node.kids.len()) as u64);
183            for child in node.kids.into_iter() {
184                todo.push_back(child);
185            }
186        }
187        self.groups.push(self.values.len() as u64);
188    }
189}
190
191impl<'a, TC: crate::AsBytes<'a>, BC: crate::AsBytes<'a>> crate::AsBytes<'a> for Trees<TC, BC> {
192    const SLICE_COUNT: usize = BC::SLICE_COUNT + BC::SLICE_COUNT + TC::SLICE_COUNT;
193    #[inline]
194    fn get_byte_slice(&self, index: usize) -> (u64, &'a [u8]) {
195        debug_assert!(index < Self::SLICE_COUNT);
196        if index < BC::SLICE_COUNT {
197            self.groups.get_byte_slice(index)
198        } else if index < BC::SLICE_COUNT + BC::SLICE_COUNT {
199            self.bounds.get_byte_slice(index - BC::SLICE_COUNT)
200        } else {
201            self.values.get_byte_slice(index - BC::SLICE_COUNT - BC::SLICE_COUNT)
202        }
203    }
204}
205
206impl<'a, TC: crate::FromBytes<'a>, BC: crate::FromBytes<'a>> crate::FromBytes<'a> for Trees<TC, BC> {
207    const SLICE_COUNT: usize = BC::SLICE_COUNT + BC::SLICE_COUNT + TC::SLICE_COUNT;
208    #[inline(always)]
209    fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
210        Self {
211            groups: crate::FromBytes::from_bytes(bytes),
212            bounds: crate::FromBytes::from_bytes(bytes),
213            values: crate::FromBytes::from_bytes(bytes),
214        }
215    }
216    #[inline(always)]
217    fn from_store(store: &crate::bytes::indexed::DecodedStore<'a>, offset: &mut usize) -> Self {
218        Self {
219            groups: BC::from_store(store, offset),
220            bounds: BC::from_store(store, offset),
221            values: TC::from_store(store, offset),
222        }
223    }
224    fn element_sizes(sizes: &mut Vec<usize>) -> Result<(), String> {
225        BC::element_sizes(sizes)?;
226        BC::element_sizes(sizes)?;
227        TC::element_sizes(sizes)?;
228        Ok(())
229    }
230}
231
232/// LOUDS (level ordered unary degree sequence) is a succinct tree representation.
233mod louds {
234
235    // The tree is encoded by traversing it in a BFS order, with each node producing
236    // as many `1`s as it has children, followed by a `0`. There is also a `1` at the
237    // beginning of the sequence, for the root.
238    //
239    // The logic to determine the child of a node is as follows:
240    //
241    //      child(x, i) = select0(rank1(x) + i − 1) + 1
242    //
243    // It is possible that `i` here starts at 1, which we should fix to be `0`.
244
245}
246
247#[cfg(test)]
248mod test {
249
250    use alloc::{vec, vec::Vec, string::ToString};
251    use crate::common::{Index, Len, Clear};
252    use crate::{Borrow, AsBytes, FromBytes};
253    use super::{Tree, Trees};
254
255    fn leaf<T>(data: T) -> Tree<T> {
256        Tree { data, kids: vec![] }
257    }
258    fn branch<T>(data: T, kids: Vec<Tree<T>>) -> Tree<T> {
259        Tree { data, kids }
260    }
261
262    #[test]
263    fn push_and_index() {
264        let mut trees: Trees<Vec<u64>> = Default::default();
265        // Tree: 10 -> [20, 30 -> [40]]
266        let tree = branch(10u64, vec![leaf(20), branch(30, vec![leaf(40)])]);
267        trees.push_tree(tree);
268
269        assert_eq!(trees.len(), 1);
270
271        let borrowed = trees.borrow();
272        let root = borrowed.get(0);
273        assert_eq!(*root.value(), 10);
274        assert_eq!(root.kids(), 2);
275
276        let c0 = root.child(0);
277        assert_eq!(*c0.value(), 20);
278        assert_eq!(c0.kids(), 0);
279
280        let c1 = root.child(1);
281        assert_eq!(*c1.value(), 30);
282        assert_eq!(c1.kids(), 1);
283
284        let c1_0 = c1.child(0);
285        assert_eq!(*c1_0.value(), 40);
286        assert_eq!(c1_0.kids(), 0);
287    }
288
289    #[test]
290    fn multiple_trees() {
291        let mut trees: Trees<Vec<u64>> = Default::default();
292        trees.push_tree(branch(1u64, vec![leaf(2), leaf(3)]));
293        trees.push_tree(leaf(100u64));
294        trees.push_tree(branch(200u64, vec![leaf(300)]));
295
296        assert_eq!(trees.len(), 3);
297
298        let borrowed = trees.borrow();
299
300        let t0 = borrowed.get(0);
301        assert_eq!(*t0.value(), 1);
302        assert_eq!(t0.kids(), 2);
303        assert_eq!(*t0.child(0).value(), 2);
304        assert_eq!(*t0.child(1).value(), 3);
305
306        let t1 = borrowed.get(1);
307        assert_eq!(*t1.value(), 100);
308        assert_eq!(t1.kids(), 0);
309
310        let t2 = borrowed.get(2);
311        assert_eq!(*t2.value(), 200);
312        assert_eq!(t2.kids(), 1);
313        assert_eq!(*t2.child(0).value(), 300);
314    }
315
316    #[test]
317    fn ref_index() {
318        let mut trees: Trees<Vec<u64>> = Default::default();
319        trees.push_tree(branch(1u64, vec![leaf(2)]));
320
321        let root = (&trees).get(0);
322        assert_eq!(*root.value(), 1);
323        assert_eq!(*root.child(0).value(), 2);
324    }
325
326    #[test]
327    fn clear_and_reuse() {
328        let mut trees: Trees<Vec<u64>> = Default::default();
329        trees.push_tree(leaf(42u64));
330        assert_eq!(trees.len(), 1);
331
332        trees.clear();
333        assert_eq!(trees.len(), 0);
334
335        trees.push_tree(leaf(99u64));
336        assert_eq!(trees.len(), 1);
337        assert_eq!(*trees.borrow().get(0).value(), 99);
338    }
339
340    #[test]
341    fn as_from_bytes() {
342        let mut trees: Trees<Vec<u64>> = Default::default();
343        trees.push_tree(branch(10u64, vec![leaf(20), branch(30, vec![leaf(40)])]));
344        trees.push_tree(leaf(100u64));
345
346        let borrowed = trees.borrow();
347        let rebuilt = Trees::<&[u64], &[u64]>::from_bytes(
348            &mut borrowed.as_bytes().map(|(_, bytes)| bytes)
349        );
350        assert_eq!(rebuilt.len(), 2);
351
352        let root = rebuilt.get(0);
353        assert_eq!(*root.value(), 10);
354        assert_eq!(root.kids(), 2);
355        assert_eq!(*root.child(0).value(), 20);
356        assert_eq!(*root.child(1).value(), 30);
357        assert_eq!(*root.child(1).child(0).value(), 40);
358
359        let t1 = rebuilt.get(1);
360        assert_eq!(*t1.value(), 100);
361        assert_eq!(t1.kids(), 0);
362    }
363
364    #[test]
365    fn columnar_strings() {
366        use crate::Strings;
367
368        let mut trees: Trees<Strings> = Default::default();
369        trees.push_tree(branch(
370            "root".to_string(),
371            vec![leaf("left".to_string()), leaf("right".to_string())],
372        ));
373
374        let borrowed = trees.borrow();
375        let root = borrowed.get(0);
376        assert_eq!(root.value(), b"root");
377        assert_eq!(root.kids(), 2);
378        assert_eq!(root.child(0).value(), b"left");
379        assert_eq!(root.child(1).value(), b"right");
380    }
381
382    #[test]
383    fn deep_tree() {
384        let mut trees: Trees<Vec<u64>> = Default::default();
385        // Build a chain: 0 -> 1 -> 2 -> 3 -> 4
386        let mut tree = leaf(4u64);
387        for i in (0..4).rev() {
388            tree = branch(i, vec![tree]);
389        }
390        trees.push_tree(tree);
391
392        let borrowed = trees.borrow();
393        let mut node = borrowed.get(0);
394        for i in 0..5u64 {
395            assert_eq!(*node.value(), i);
396            if i < 4 {
397                assert_eq!(node.kids(), 1);
398                node = node.child(0);
399            } else {
400                assert_eq!(node.kids(), 0);
401            }
402        }
403    }
404}