columnar/adts/
json.rs

1use serde_json::Value as JsonJson;
2
3use crate::{Push, Len, Index, HeapSize};
4use crate::{Vecs, Strings, Lookbacks};
5
6/// Stand in for JSON, from `serde_json`.
7#[derive(Debug, PartialEq, serde::Serialize, serde::Deserialize)]
8pub enum Json {
9    Null,
10    Bool(bool),
11    Number(serde_json::Number),
12    String(String),
13    Array(Vec<Json>),
14    Object(Vec<(String, Json)>),
15}
16
17impl HeapSize for Json {
18    fn heap_size(&self) -> (usize, usize) {
19        match self {
20            Json::Null => (0, 0),
21            Json::Bool(_) => (0, 0),
22            Json::Number(_) => (0, 0),
23            Json::String(s) => (0, s.len()),
24            Json::Array(a) => a.heap_size(),
25            Json::Object(o) => o.heap_size(),
26        }
27    }
28}
29
30impl Json {
31    pub fn from_json(json: JsonJson) -> Self {
32        match json {
33            JsonJson::Null => { Json::Null },
34            JsonJson::Bool(b) => { Json::Bool(b) },
35            JsonJson::Number(n) => { Json::Number(n) },
36            JsonJson::String(s) => { Json::String(s) },
37            JsonJson::Array(a) => { Json::Array(a.into_iter().map(Json::from_json).collect()) },
38            JsonJson::Object(o) => {
39                let mut list: Vec<_> = o.into_iter().map(|(s,j)| (s, Json::from_json(j))).collect();
40                list.sort_by(|x,y| x.0.cmp(&y.0));
41                Json::Object(list)
42            },
43        }
44    }
45}
46
47/// Sum type indicating where to find the data for each variant.
48#[derive(Copy, Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
49pub enum JsonIdx {
50    Null,
51    Bool(bool),
52    Number(usize),
53    String(usize),
54    Array(usize),
55    Object(usize),
56}
57
58impl HeapSize for JsonIdx {
59    fn heap_size(&self) -> (usize, usize) { (0, 0) }
60}
61
62/// Stand-in for `Vec<Json>`.
63///
64/// This approach uses `indexes` which contains discriminants, which should allow
65/// an efficient representation of offset information. Unfortunately, both `arrays`
66/// and `objects` just list their intended offsets directly, rather than encode the
67/// offsets using unary degree sequences, which seemed hard to thread through the 
68/// other abstractions. Their `Vec<usize>` container can probably be made smarter,
69/// in particular by an `Option<usize>` container where `None` indicates increment.
70// struct Jsons {
71//     pub indexes: Vec<JsonDiscriminant>,     // Container for `JsonDiscriminant`.
72//     pub numbers: Vec<serde_json::Number>,   // Any `Number` container.
73//     pub strings: Strings,                   // Any `String` container.
74//     pub arrays: Vecs<Vec<usize>>,           // Any `Vec<usize>` container.
75//     pub objects Vecs<(Lookbacks<Strings>, Vec<usize>)>,
76// }
77
78/// Stand-in for `Vec<Json>`.
79///
80/// The `roots` vector indicates the root of each stored `Json`.
81/// The (transitive) contents of each `Json` are stored throughout,
82/// at locations that may not necessarily be found in `roots`.
83#[derive(Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
84pub struct Jsons {
85    pub roots: Vec<JsonIdx>,               // Any `JsonIdx` container.
86    // pub nulls: Vec<()>,                  // No need to store null Jsons.
87    // pub bools: Vec<bool>,                // No need to store bool Jsons.
88    pub numbers: Vec<serde_json::Number>,   // Any `Number` container.
89    pub strings: Lookbacks<Strings>,
90    pub arrays: Vecs<Vec<JsonIdx>>,
91    pub objects: Vecs<(Lookbacks<Strings>, Vec<JsonIdx>)>,
92}
93
94impl HeapSize for Jsons {
95    fn heap_size(&self) -> (usize, usize) {
96        let (l0, c0) = self.roots.heap_size();
97        let (l1, c1) = self.numbers.heap_size();
98        let (l2, c2) = self.strings.heap_size();
99        let (l3, c3) = self.arrays.heap_size();
100        let (l4, c4) = self.objects.heap_size();
101        (l0 + l1 + l2 + l3 + l4, c0 + c1 + c2 + c3 + c4)
102    }
103}
104
105/// Stand-in for `&'a Json`.
106#[derive(Debug)]
107pub enum JsonsRef<'a> {
108    Null,
109    Bool(bool),
110    Number(&'a serde_json::Number),
111    String(&'a str),
112    Array(ArrRef<'a>),
113    Object(ObjRef<'a>),
114}
115
116/// Stand-in for `&'a [Json]`
117#[derive(Debug)]
118pub struct ArrRef<'a> {
119    /// Reference into `store.arrays`.
120    pub index: usize,
121    pub store: &'a Jsons,
122}
123
124/// Stand-in for `&'a [(String, Json)]`.
125#[derive(Debug)]
126pub struct ObjRef<'a> {
127    /// Reference into `store.objects`.
128    pub index: usize,
129    pub store: &'a Jsons,
130}
131
132impl<'a> PartialEq<Json> for JsonsRef<'a> {
133    #[inline(always)] fn eq(&self, other: &Json) -> bool {
134        match (self, other) {
135            (JsonsRef::Null, Json::Null) => { true },
136            (JsonsRef::Bool(b0), Json::Bool(b1)) => { b0 == b1 },
137            (JsonsRef::Number(n0), Json::Number(n1)) => { *n0 == n1 },
138            (JsonsRef::String(s0), Json::String(s1)) => { *s0 == s1 },
139            (JsonsRef::Array(a0), Json::Array(a1)) => {
140                let slice: crate::Slice<&Vec<JsonIdx>> = (&a0.store.arrays).get(a0.index);
141                slice.len() == a1.len() && slice.into_iter().zip(a1).all(|(a,b)| a0.store.dereference(*a).eq(b))
142            },
143            (JsonsRef::Object(o0), Json::Object(o1)) => {
144                let slice: crate::Slice<&(_, _)> = (&o0.store.objects).get(o0.index);
145                slice.len() == o1.len() && slice.into_iter().zip(o1).all(|((xs, xv),(ys, yv))| xs == ys && o0.store.dereference(*xv).eq(yv))
146            },
147            _ => { false }
148        }
149    }
150}
151
152impl Push<Json> for Jsons {
153    fn push(&mut self, json: Json) {
154        let mut worker = JsonQueues::new_from(self);
155        let json_idx = worker.copy(&json);
156        worker.store.roots.push(json_idx);
157        worker.finish();
158    }
159    // It would be nice to implement `extend`, but lifetimes seem to prevent this.
160    // Because the iterator produces owned content, we would need to collect the Jsons
161    // so that their lifetimes can outlive the `JsonQueues` instance.
162}
163impl<'a> Push<&'a Json> for Jsons {
164    fn push(&mut self, json: &'a Json) {
165        let mut worker = JsonQueues::new_from(self);
166        let json_idx = worker.copy(json);
167        worker.store.roots.push(json_idx);
168        worker.finish();
169    }
170    fn extend(&mut self, jsons: impl IntoIterator<Item=&'a Json>) {
171        let mut worker = JsonQueues::new_from(self);
172        for json in jsons {
173            let json_idx = worker.copy(json);
174            worker.store.roots.push(json_idx);
175            worker.finish();
176        }
177    }
178}
179
180impl Len for Jsons {
181    fn len(&self) -> usize {
182        self.roots.len()
183    }
184}
185
186// impl IndexGat for Jsons {
187//     type Ref<'a> = JsonsRef<'a>;
188//     fn get(&self, index: usize) -> Self::Ref<'_> {
189//         self.dereference(self.roots[index])
190//     }
191// }
192impl<'a> Index for &'a Jsons {
193    type Ref = JsonsRef<'a>;
194    #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
195        self.dereference(self.roots[index])
196    }
197}
198
199impl Jsons {
200    #[inline(always)] pub fn dereference(&self, index: JsonIdx) -> JsonsRef<'_> {
201        match index {
202            JsonIdx::Null => JsonsRef::Null,
203            JsonIdx::Bool(i) => JsonsRef::Bool(i),
204            JsonIdx::Number(i) => JsonsRef::Number((&self.numbers).get(i)),
205            JsonIdx::String(i) => JsonsRef::String((&self.strings).get(i)),
206            JsonIdx::Array(i) => {
207                JsonsRef::Array(ArrRef {
208                    index: i,
209                    store: self,
210                })
211            },
212            JsonIdx::Object(i) => {
213                JsonsRef::Object(ObjRef {
214                    index: i,
215                    store: self,
216                })
217            }
218        }
219    }
220}
221
222struct JsonQueues<'a> {
223    arr_todo: std::collections::VecDeque<&'a [Json]>,
224    obj_todo: std::collections::VecDeque<&'a [(String, Json)]>,
225    store: &'a mut Jsons,
226}
227
228impl<'a> JsonQueues<'a> {
229    /// Creates a new `JsonQueues` from a `Jsons`.
230    fn new_from(store: &'a mut Jsons) -> Self {
231        Self {
232            arr_todo: Default::default(),
233            obj_todo: Default::default(),
234            store,
235        }
236    }
237
238    /// Copies a Json, into either the store or a queue.
239    fn copy(&mut self, json: &'a Json) -> JsonIdx {
240        match json {
241            Json::Null => JsonIdx::Null,
242            Json::Bool(b) => JsonIdx::Bool(*b),
243            Json::Number(n) => {
244                self.store.numbers.push(n.clone());
245                JsonIdx::Number(self.store.numbers.len() - 1)
246            },
247            Json::String(s) => {
248                self.store.strings.push(s);
249                JsonIdx::String(self.store.strings.len() - 1)
250            },
251            Json::Array(a) => {
252                self.arr_todo.push_back(a);
253                JsonIdx::Array(self.store.arrays.len() + self.arr_todo.len() - 1)
254            },
255            Json::Object(o) => {
256                self.obj_todo.push_back(o);
257                JsonIdx::Object(self.store.objects.len() + self.obj_todo.len() - 1)
258            },
259        }
260    }
261    /// Drains all queues, so that `store` is fully populated.
262    fn finish(&mut self) {
263        let mut temp = Vec::default();
264        while !self.arr_todo.is_empty() || !self.obj_todo.is_empty() {
265            // Odd logic, but: need the queue to retain the element so that `self.copy` produces
266            // the correct indexes for any nested arrays.
267            while let Some(json) = self.arr_todo.front().cloned() {
268                Extend::extend(&mut temp, json.iter().map(|v| self.copy(v)));
269                self.arr_todo.pop_front();
270                self.store.arrays.push_iter(temp.drain(..));
271            }
272            // Odd logic, but: need the queue to retain the element so that `self.copy` produces
273            // the correct indexes for any nested objects.
274            while let Some(pairs) = self.obj_todo.front().cloned() {
275                Extend::extend(&mut temp, pairs.iter().map(|(_,v)| self.copy(v)));
276                self.obj_todo.pop_front();
277                self.store.objects.push_iter(temp.drain(..).zip(pairs).map(|(v,(s,_))| (s, v)));
278            }
279        }
280    }
281}