1use std::collections::BTreeMap;
11use std::fmt::Debug;
12
13use mz_proto::{IntoRustIfSome, ProtoType, RustType, TryFromProtoError};
14use proptest::prelude::*;
15use proptest::strategy::{Strategy, Union};
16use serde_json::json;
17
18use crate::stats::primitive::{PrimitiveStats, any_primitive_stats};
19use crate::stats::{
20 DynStats, ProtoJsonMapElementStats, ProtoJsonMapStats, ProtoJsonStats, TrimStats,
21 proto_json_stats,
22};
23
24#[derive(Clone)]
30pub enum JsonStats {
31 None,
33 Mixed,
36 JsonNulls,
38 Bools(PrimitiveStats<bool>),
40 Strings(PrimitiveStats<String>),
42 Numerics(PrimitiveStats<Vec<u8>>),
46 Lists,
53 Maps(BTreeMap<String, JsonMapElementStats>),
56}
57
58#[derive(Default, Clone)]
59pub struct JsonMapElementStats {
60 pub len: usize,
61 pub stats: JsonStats,
62}
63
64impl Default for JsonStats {
65 fn default() -> Self {
66 JsonStats::None
67 }
68}
69
70impl Debug for JsonStats {
71 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72 Debug::fmt(&self.debug_json(), f)
73 }
74}
75
76impl JsonStats {
77 pub fn debug_json(&self) -> serde_json::Value {
78 match self {
79 JsonStats::None => json!({}),
80 JsonStats::Mixed => "json_mixed".into(),
81 JsonStats::JsonNulls => "json_nulls".into(),
82 JsonStats::Bools(x) => x.debug_json(),
83 JsonStats::Strings(x) => x.debug_json(),
84 JsonStats::Numerics(x) => x.debug_json(),
85 JsonStats::Lists => "json_lists".into(),
86 JsonStats::Maps(x) => x
87 .iter()
88 .map(|(k, v)| (k.clone(), v.debug_json()))
89 .collect::<serde_json::Map<_, _>>()
90 .into(),
91 }
92 }
93}
94
95impl JsonMapElementStats {
96 pub fn debug_json(&self) -> serde_json::Value {
97 json!({"len": self.len, "stats": self.stats.debug_json()})
98 }
99}
100
101impl RustType<ProtoJsonStats> for JsonStats {
102 fn into_proto(&self) -> ProtoJsonStats {
103 ProtoJsonStats {
104 kind: Some(match self {
105 JsonStats::None => proto_json_stats::Kind::None(()),
106 JsonStats::Mixed => proto_json_stats::Kind::Mixed(()),
107 JsonStats::JsonNulls => proto_json_stats::Kind::JsonNulls(()),
108 JsonStats::Bools(x) => proto_json_stats::Kind::Bools(RustType::into_proto(x)),
109 JsonStats::Strings(x) => proto_json_stats::Kind::Strings(RustType::into_proto(x)),
110 JsonStats::Numerics(x) => proto_json_stats::Kind::Numerics(RustType::into_proto(x)),
111 JsonStats::Lists => proto_json_stats::Kind::Lists(()),
112 JsonStats::Maps(x) => proto_json_stats::Kind::Maps(ProtoJsonMapStats {
113 elements: x
114 .iter()
115 .map(|(k, v)| ProtoJsonMapElementStats {
116 name: k.into_proto(),
117 len: v.len.into_proto(),
118 stats: Some(RustType::into_proto(&v.stats)),
119 })
120 .collect(),
121 }),
122 }),
123 }
124 }
125
126 fn from_proto(proto: ProtoJsonStats) -> Result<Self, TryFromProtoError> {
127 Ok(match proto.kind {
128 Some(proto_json_stats::Kind::None(())) => JsonStats::None,
129 Some(proto_json_stats::Kind::Mixed(())) => JsonStats::Mixed,
130 Some(proto_json_stats::Kind::JsonNulls(())) => JsonStats::JsonNulls,
131 Some(proto_json_stats::Kind::Bools(x)) => JsonStats::Bools(x.into_rust()?),
132 Some(proto_json_stats::Kind::Strings(x)) => JsonStats::Strings(x.into_rust()?),
133 Some(proto_json_stats::Kind::Numerics(x)) => JsonStats::Numerics(x.into_rust()?),
134 Some(proto_json_stats::Kind::Lists(())) => JsonStats::Lists,
135 Some(proto_json_stats::Kind::Maps(x)) => {
136 let mut elements = BTreeMap::new();
137 for x in x.elements {
138 let stats = JsonMapElementStats {
139 len: x.len.into_rust()?,
140 stats: x.stats.into_rust_if_some("JsonMapElementStats::stats")?,
141 };
142 elements.insert(x.name.into_rust()?, stats);
143 }
144 JsonStats::Maps(elements)
145 }
146 None => JsonStats::Mixed,
148 })
149 }
150}
151
152impl TrimStats for ProtoJsonStats {
153 fn trim(&mut self) {
154 use proto_json_stats::*;
155 match &mut self.kind {
156 Some(Kind::Strings(stats)) => {
157 stats.trim();
158 }
159 Some(Kind::Maps(stats)) => {
160 for value in &mut stats.elements {
161 if let Some(stats) = &mut value.stats {
162 stats.trim();
163 }
164 }
165 }
166 Some(
167 Kind::None(_)
168 | Kind::Mixed(_)
169 | Kind::JsonNulls(_)
170 | Kind::Bools(_)
171 | Kind::Numerics(_)
172 | Kind::Lists(_),
173 ) => {}
174 None => {}
175 }
176 }
177}
178
179pub(crate) fn any_json_stats() -> impl Strategy<Value = JsonStats> {
181 let leaf = Union::new(vec![
182 any::<()>().prop_map(|_| JsonStats::None).boxed(),
183 any::<()>().prop_map(|_| JsonStats::Mixed).boxed(),
184 any::<()>().prop_map(|_| JsonStats::JsonNulls).boxed(),
185 any_primitive_stats::<bool>()
186 .prop_map(JsonStats::Bools)
187 .boxed(),
188 any_primitive_stats::<String>()
189 .prop_map(JsonStats::Strings)
190 .boxed(),
191 any::<()>().prop_map(|_| JsonStats::Lists).boxed(),
192 ]);
193 leaf.prop_recursive(2, 5, 3, |inner| {
194 (proptest::collection::btree_map(any::<String>(), inner, 0..3)).prop_map(|cols| {
195 let cols = cols
196 .into_iter()
197 .map(|(k, stats)| (k, JsonMapElementStats { len: 1, stats }))
198 .collect();
199 JsonStats::Maps(cols)
200 })
201 })
202}
203
204#[cfg(test)]
205mod tests {
206 use prost::Message;
207
208 use super::*;
209 use crate::stats::trim_to_budget_jsonb;
210
211 #[mz_ore::test]
212 fn jsonb_trim_to_budget() {
213 #[track_caller]
214 fn testcase(cols: &[(&str, usize)], required: Option<&str>) {
215 let cols = cols
216 .iter()
217 .map(|(key, cost)| {
218 let stats = JsonStats::Numerics(PrimitiveStats {
219 lower: vec![],
220 upper: vec![0u8; *cost],
221 });
222 let len = stats.debug_json().to_string().len();
223 ((*key).to_owned(), JsonMapElementStats { len, stats })
224 })
225 .collect();
226
227 let stats: ProtoJsonStats = RustType::into_proto(&JsonStats::Maps(cols));
229 let ProtoJsonStats {
230 kind: Some(proto_json_stats::Kind::Maps(mut stats)),
231 } = stats
232 else {
233 panic!("serialized produced wrong type!");
234 };
235
236 let mut budget = stats.encoded_len().next_power_of_two();
237 while budget > 0 {
238 let cost_before = stats.encoded_len();
239 trim_to_budget_jsonb(&mut stats, &mut budget, &|col| Some(col) == required);
240 let cost_after = stats.encoded_len();
241 assert!(cost_before >= cost_after);
242
243 if let Some(required) = required {
245 assert!(
246 stats
247 .elements
248 .iter()
249 .any(|element| element.name == required)
250 );
251 } else {
252 assert!(cost_after <= budget);
253 }
254
255 budget = budget / 2;
256 }
257 }
258
259 testcase(&[], None);
260 testcase(&[("a", 100)], None);
261 testcase(&[("a", 1), ("b", 2), ("c", 4)], None);
262 testcase(&[("a", 1), ("b", 2), ("c", 4)], Some("b"));
263 }
264
265 #[mz_ore::test]
266 fn jsonb_trim_to_budget_smoke() {
267 let og_stats = JsonStats::Maps(
268 [
269 (
270 "a".to_string(),
271 JsonMapElementStats {
272 len: 1,
273 stats: JsonStats::Strings(PrimitiveStats {
274 lower: "foobar".to_string(),
275 upper: "foobaz".to_string(),
276 }),
277 },
278 ),
279 (
280 "context".to_string(),
281 JsonMapElementStats {
282 len: 100,
283 stats: JsonStats::Maps(
284 [
285 (
286 "b".to_string(),
287 JsonMapElementStats {
288 len: 99,
289 stats: JsonStats::Numerics(PrimitiveStats {
290 lower: vec![],
291 upper: vec![42u8; 99],
292 }),
293 },
294 ),
295 (
296 "c".to_string(),
297 JsonMapElementStats {
298 len: 1,
299 stats: JsonStats::Bools(PrimitiveStats {
300 lower: false,
301 upper: true,
302 }),
303 },
304 ),
305 ]
306 .into(),
307 ),
308 },
309 ),
310 ]
311 .into(),
312 );
313
314 let stats: ProtoJsonStats = RustType::into_proto(&og_stats);
316 let ProtoJsonStats {
317 kind: Some(proto_json_stats::Kind::Maps(mut stats)),
318 } = stats
319 else {
320 panic!("serialized produced wrong type!");
321 };
322
323 let mut budget_shortfall = 50;
324 trim_to_budget_jsonb(&mut stats, &mut budget_shortfall, &|_name| false);
326
327 let mut elements = stats
328 .elements
329 .into_iter()
330 .map(|element| (element.name.clone(), element))
331 .collect::<BTreeMap<String, _>>();
332 assert!(elements.remove("a").is_some());
333
334 let context = elements.remove("context").expect("trimmed too much");
335 let Some(ProtoJsonStats {
336 kind: Some(proto_json_stats::Kind::Maps(context)),
337 }) = context.stats
338 else {
339 panic!("serialized produced wrong type!")
340 };
341
342 assert_eq!(context.elements.len(), 1);
344 assert_eq!(context.elements[0].name, "c");
345
346 let stats: ProtoJsonStats = RustType::into_proto(&og_stats);
350 let ProtoJsonStats {
351 kind: Some(proto_json_stats::Kind::Maps(mut stats)),
352 } = stats
353 else {
354 panic!("serialized produced wrong type!");
355 };
356
357 let mut budget_shortfall = 50;
358 trim_to_budget_jsonb(&mut stats, &mut budget_shortfall, &|name| name == "b");
361
362 assert_eq!(stats.elements.len(), 1);
363 assert_eq!(stats.elements[0].name, "context");
364
365 let Some(ProtoJsonStats {
366 kind: Some(proto_json_stats::Kind::Maps(context)),
367 }) = &stats.elements[0].stats
368 else {
369 panic!("serialized produced wrong type!")
370 };
371
372 assert_eq!(context.elements.len(), 1);
373 assert_eq!(context.elements[0].name, "b");
374 }
375
376 #[mz_ore::test]
379 fn stats_trim_regression_json() {
380 #[track_caller]
383 fn testcase(stats: JsonStats) {
384 let mut stats = stats.into_proto();
385 let before = stats.encoded_len();
386 stats.trim();
387 let after = stats.encoded_len();
388 assert!(after < before, "{} vs {}: {:?}", after, before, stats);
389 }
390
391 let col = JsonStats::Strings(PrimitiveStats {
392 lower: "foobar".into(),
393 upper: "foobaz".into(),
394 });
395 testcase(col.clone());
396 let mut cols = BTreeMap::new();
397 cols.insert("col".into(), JsonMapElementStats { len: 1, stats: col });
398 testcase(JsonStats::Maps(cols));
399 }
400}