Skip to main content

columnar/adts/
tree.rs

1//! Columnar representations of tree-structured data.
2//!
3//! A `Tree<T>` is a node with a value of type `T` and a list of children.
4//! `Trees<TC>` stores a collection of trees with columnar storage for node values.
5use alloc::{vec::Vec, string::String};
6
7use crate::{Borrow, Index, IndexAs, Len, Clear, Push};
8
9/// A tree node with a value and children.
10#[derive(Clone)]
11pub struct Tree<T> {
12    pub data: T,
13    pub kids: Vec<Tree<T>>,
14}
15
16impl Tree<usize> {
17    pub fn sum(&self) -> usize {
18        self.data + self.kids.iter().map(|x| x.sum()).sum::<usize>()
19    }
20}
21
22/// A stand-in for `Vec<Tree<T>>`, with columnar storage for node values.
23///
24/// Nodes are stored in BFS order. `groups` tracks tree boundaries (cumulative
25/// node counts with a leading 0). `bounds` tracks child ranges per node
26/// (also with a leading 0 sentinel).
27#[derive(Copy, Clone)]
28pub struct Trees<TC, BC = Vec<u64>> {
29    /// Cumulative node counts: tree `i` starts at node `groups[i]`.
30    pub groups: BC,
31    /// Child delimiters: node `j`'s children are at `bounds[j]..bounds[j+1]`
32    /// (root nodes use `bounds[j]+1..bounds[j+1]` to skip themselves).
33    pub bounds: BC,
34    /// Columnar container for node values in BFS order.
35    pub values: TC,
36}
37
38impl<TC: Default> Default for Trees<TC> {
39    fn default() -> Self {
40        Self {
41            groups: vec![0u64],
42            bounds: vec![0u64],
43            values: TC::default(),
44        }
45    }
46}
47
48/// A reference to a single node within a `Trees` container.
49///
50/// Holds a copy of the borrowed values and bounds containers,
51/// plus the node's index and child range. Navigation to children
52/// constructs new `TreesRef` values.
53pub struct TreesRef<V, B> {
54    index: usize,
55    lower: usize,
56    upper: usize,
57    values: V,
58    bounds: B,
59}
60
61impl<V: Copy, B: Copy> Clone for TreesRef<V, B> {
62    fn clone(&self) -> Self { *self }
63}
64impl<V: Copy, B: Copy> Copy for TreesRef<V, B> {}
65
66impl<V: Index, B: IndexAs<u64>> TreesRef<V, B> {
67    /// The value at this node.
68    #[inline(always)]
69    pub fn value(&self) -> V::Ref {
70        self.values.get(self.index)
71    }
72    /// The number of children of this node.
73    #[inline(always)]
74    pub fn kids(&self) -> usize {
75        self.upper - self.lower
76    }
77}
78impl<V: Index + Copy, B: IndexAs<u64> + Copy> TreesRef<V, B> {
79    /// A reference to the `index`-th child of this node.
80    #[inline(always)]
81    pub fn child(&self, index: usize) -> Self {
82        assert!(index < self.upper - self.lower);
83        let child = self.lower + index;
84        TreesRef {
85            index: child,
86            lower: self.bounds.index_as(child) as usize,
87            upper: self.bounds.index_as(child + 1) as usize,
88            values: self.values,
89            bounds: self.bounds,
90        }
91    }
92}
93
94impl<TC, BC: Len> Len for Trees<TC, BC> {
95    #[inline(always)]
96    fn len(&self) -> usize { self.groups.len() - 1 }
97}
98
99impl<TC: Index + Copy, BC: IndexAs<u64> + Len + Copy> Index for Trees<TC, BC> {
100    type Ref = TreesRef<TC, BC>;
101    #[inline(always)]
102    fn get(&self, index: usize) -> Self::Ref {
103        let root = self.groups.index_as(index) as usize;
104        TreesRef {
105            index: root,
106            lower: self.bounds.index_as(root) as usize + 1,
107            upper: self.bounds.index_as(root + 1) as usize,
108            values: self.values,
109            bounds: self.bounds,
110        }
111    }
112}
113
114impl<'a, TC, BC: IndexAs<u64> + Len> Index for &'a Trees<TC, BC>
115where
116    &'a TC: Index,
117    &'a BC: IndexAs<u64>,
118{
119    type Ref = TreesRef<&'a TC, &'a BC>;
120    #[inline(always)]
121    fn get(&self, index: usize) -> Self::Ref {
122        let root = self.groups.index_as(index) as usize;
123        TreesRef {
124            index: root,
125            lower: self.bounds.index_as(root) as usize + 1,
126            upper: self.bounds.index_as(root + 1) as usize,
127            values: &self.values,
128            bounds: &self.bounds,
129        }
130    }
131}
132
133impl<TC: Borrow> Borrow for Trees<TC> {
134    type Ref<'a> = TreesRef<TC::Borrowed<'a>, &'a [u64]> where TC: 'a;
135    type Borrowed<'a> = Trees<TC::Borrowed<'a>, &'a [u64]> where TC: 'a;
136    #[inline(always)]
137    fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
138        Trees {
139            groups: &self.groups[..],
140            bounds: &self.bounds[..],
141            values: self.values.borrow(),
142        }
143    }
144    #[inline(always)]
145    fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where TC: 'a {
146        Trees {
147            groups: thing.groups,
148            bounds: thing.bounds,
149            values: TC::reborrow(thing.values),
150        }
151    }
152    #[inline(always)]
153    fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
154        TreesRef {
155            index: thing.index,
156            lower: thing.lower,
157            upper: thing.upper,
158            values: TC::reborrow(thing.values),
159            bounds: thing.bounds,
160        }
161    }
162}
163
164impl<TC: Clear> Clear for Trees<TC> {
165    fn clear(&mut self) {
166        self.groups.clear();
167        self.groups.push(0u64);
168        self.bounds.clear();
169        self.bounds.push(0u64);
170        self.values.clear();
171    }
172}
173
174impl<TC: Len> Trees<TC> {
175    /// Pushes a tree into the container, storing nodes in BFS order.
176    pub fn push_tree<T>(&mut self, tree: Tree<T>) where TC: for<'a> Push<&'a T> {
177        let mut todo = alloc::collections::VecDeque::default();
178        todo.push_back(tree);
179        while let Some(node) = todo.pop_front() {
180            let cursor = self.values.len() + todo.len() + 1;
181            self.values.push(&node.data);
182            self.bounds.push((cursor + node.kids.len()) as u64);
183            for child in node.kids.into_iter() {
184                todo.push_back(child);
185            }
186        }
187        self.groups.push(self.values.len() as u64);
188    }
189}
190
191impl<'a, TC: crate::AsBytes<'a>, BC: crate::AsBytes<'a>> crate::AsBytes<'a> for Trees<TC, BC> {
192    #[inline(always)]
193    fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
194        let iter = self.groups.as_bytes();
195        let iter = crate::chain(iter, self.bounds.as_bytes());
196        crate::chain(iter, self.values.as_bytes())
197    }
198}
199
200impl<'a, TC: crate::FromBytes<'a>, BC: crate::FromBytes<'a>> crate::FromBytes<'a> for Trees<TC, BC> {
201    const SLICE_COUNT: usize = BC::SLICE_COUNT + BC::SLICE_COUNT + TC::SLICE_COUNT;
202    #[inline(always)]
203    fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
204        Self {
205            groups: crate::FromBytes::from_bytes(bytes),
206            bounds: crate::FromBytes::from_bytes(bytes),
207            values: crate::FromBytes::from_bytes(bytes),
208        }
209    }
210    #[inline(always)]
211    fn from_store(store: &crate::bytes::indexed::DecodedStore<'a>, offset: &mut usize) -> Self {
212        Self {
213            groups: BC::from_store(store, offset),
214            bounds: BC::from_store(store, offset),
215            values: TC::from_store(store, offset),
216        }
217    }
218    fn element_sizes(sizes: &mut Vec<usize>) -> Result<(), String> {
219        BC::element_sizes(sizes)?;
220        BC::element_sizes(sizes)?;
221        TC::element_sizes(sizes)?;
222        Ok(())
223    }
224}
225
226/// LOUDS (level ordered unary degree sequence) is a succinct tree representation.
227mod louds {
228
229    // The tree is encoded by traversing it in a BFS order, with each node producing
230    // as many `1`s as it has children, followed by a `0`. There is also a `1` at the
231    // beginning of the sequence, for the root.
232    //
233    // The logic to determine the child of a node is as follows:
234    //
235    //      child(x, i) = select0(rank1(x) + i − 1) + 1
236    //
237    // It is possible that `i` here starts at 1, which we should fix to be `0`.
238
239}
240
241#[cfg(test)]
242mod test {
243
244    use alloc::{vec, vec::Vec, string::ToString};
245    use crate::common::{Index, Len, Clear};
246    use crate::{Borrow, AsBytes, FromBytes};
247    use super::{Tree, Trees};
248
249    fn leaf<T>(data: T) -> Tree<T> {
250        Tree { data, kids: vec![] }
251    }
252    fn branch<T>(data: T, kids: Vec<Tree<T>>) -> Tree<T> {
253        Tree { data, kids }
254    }
255
256    #[test]
257    fn push_and_index() {
258        let mut trees: Trees<Vec<u64>> = Default::default();
259        // Tree: 10 -> [20, 30 -> [40]]
260        let tree = branch(10u64, vec![leaf(20), branch(30, vec![leaf(40)])]);
261        trees.push_tree(tree);
262
263        assert_eq!(trees.len(), 1);
264
265        let borrowed = trees.borrow();
266        let root = borrowed.get(0);
267        assert_eq!(*root.value(), 10);
268        assert_eq!(root.kids(), 2);
269
270        let c0 = root.child(0);
271        assert_eq!(*c0.value(), 20);
272        assert_eq!(c0.kids(), 0);
273
274        let c1 = root.child(1);
275        assert_eq!(*c1.value(), 30);
276        assert_eq!(c1.kids(), 1);
277
278        let c1_0 = c1.child(0);
279        assert_eq!(*c1_0.value(), 40);
280        assert_eq!(c1_0.kids(), 0);
281    }
282
283    #[test]
284    fn multiple_trees() {
285        let mut trees: Trees<Vec<u64>> = Default::default();
286        trees.push_tree(branch(1u64, vec![leaf(2), leaf(3)]));
287        trees.push_tree(leaf(100u64));
288        trees.push_tree(branch(200u64, vec![leaf(300)]));
289
290        assert_eq!(trees.len(), 3);
291
292        let borrowed = trees.borrow();
293
294        let t0 = borrowed.get(0);
295        assert_eq!(*t0.value(), 1);
296        assert_eq!(t0.kids(), 2);
297        assert_eq!(*t0.child(0).value(), 2);
298        assert_eq!(*t0.child(1).value(), 3);
299
300        let t1 = borrowed.get(1);
301        assert_eq!(*t1.value(), 100);
302        assert_eq!(t1.kids(), 0);
303
304        let t2 = borrowed.get(2);
305        assert_eq!(*t2.value(), 200);
306        assert_eq!(t2.kids(), 1);
307        assert_eq!(*t2.child(0).value(), 300);
308    }
309
310    #[test]
311    fn ref_index() {
312        let mut trees: Trees<Vec<u64>> = Default::default();
313        trees.push_tree(branch(1u64, vec![leaf(2)]));
314
315        let root = (&trees).get(0);
316        assert_eq!(*root.value(), 1);
317        assert_eq!(*root.child(0).value(), 2);
318    }
319
320    #[test]
321    fn clear_and_reuse() {
322        let mut trees: Trees<Vec<u64>> = Default::default();
323        trees.push_tree(leaf(42u64));
324        assert_eq!(trees.len(), 1);
325
326        trees.clear();
327        assert_eq!(trees.len(), 0);
328
329        trees.push_tree(leaf(99u64));
330        assert_eq!(trees.len(), 1);
331        assert_eq!(*trees.borrow().get(0).value(), 99);
332    }
333
334    #[test]
335    fn as_from_bytes() {
336        let mut trees: Trees<Vec<u64>> = Default::default();
337        trees.push_tree(branch(10u64, vec![leaf(20), branch(30, vec![leaf(40)])]));
338        trees.push_tree(leaf(100u64));
339
340        let borrowed = trees.borrow();
341        let rebuilt = Trees::<&[u64], &[u64]>::from_bytes(
342            &mut borrowed.as_bytes().map(|(_, bytes)| bytes)
343        );
344        assert_eq!(rebuilt.len(), 2);
345
346        let root = rebuilt.get(0);
347        assert_eq!(*root.value(), 10);
348        assert_eq!(root.kids(), 2);
349        assert_eq!(*root.child(0).value(), 20);
350        assert_eq!(*root.child(1).value(), 30);
351        assert_eq!(*root.child(1).child(0).value(), 40);
352
353        let t1 = rebuilt.get(1);
354        assert_eq!(*t1.value(), 100);
355        assert_eq!(t1.kids(), 0);
356    }
357
358    #[test]
359    fn columnar_strings() {
360        use crate::Strings;
361
362        let mut trees: Trees<Strings> = Default::default();
363        trees.push_tree(branch(
364            "root".to_string(),
365            vec![leaf("left".to_string()), leaf("right".to_string())],
366        ));
367
368        let borrowed = trees.borrow();
369        let root = borrowed.get(0);
370        assert_eq!(root.value(), b"root");
371        assert_eq!(root.kids(), 2);
372        assert_eq!(root.child(0).value(), b"left");
373        assert_eq!(root.child(1).value(), b"right");
374    }
375
376    #[test]
377    fn deep_tree() {
378        let mut trees: Trees<Vec<u64>> = Default::default();
379        // Build a chain: 0 -> 1 -> 2 -> 3 -> 4
380        let mut tree = leaf(4u64);
381        for i in (0..4).rev() {
382            tree = branch(i, vec![tree]);
383        }
384        trees.push_tree(tree);
385
386        let borrowed = trees.borrow();
387        let mut node = borrowed.get(0);
388        for i in 0..5u64 {
389            assert_eq!(*node.value(), i);
390            if i < 4 {
391                assert_eq!(node.kids(), 1);
392                node = node.child(0);
393            } else {
394                assert_eq!(node.kids(), 0);
395            }
396        }
397    }
398}