1use std::collections::BTreeMap;
11
12use mz_proto::{ProtoType, RustType, TryFromProtoError};
13use proptest::prelude::*;
14use proptest::strategy::Strategy;
15use proptest_derive::Arbitrary;
16use serde::ser::{SerializeMap, SerializeStruct};
17
18use crate::stats::{
19 ColumnStatKinds, ColumnStats, ColumnarStats, DynStats, OptionStats, ProtoStructStats,
20 TrimStats, any_columnar_stats, proto_dyn_stats,
21};
22
23#[derive(Arbitrary, Default, Clone)]
26pub struct StructStats {
27 pub len: usize,
29 #[proptest(strategy = "any_struct_stats_cols()")]
34 pub cols: BTreeMap<String, ColumnarStats>,
35}
36
37impl std::fmt::Debug for StructStats {
38 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39 std::fmt::Display::fmt(&self.debug_json(), f)
40 }
41}
42
43impl serde::Serialize for StructStats {
44 fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
45 let StructStats { len, cols } = self;
46 let mut s = s.serialize_struct("StructStats", 2)?;
47 let () = s.serialize_field("len", len)?;
48 let () = s.serialize_field("cols", &DynStatsCols(cols))?;
49 s.end()
50 }
51}
52
53impl DynStats for StructStats {
54 fn debug_json(&self) -> serde_json::Value {
55 let mut cols = serde_json::Map::new();
56 cols.insert("len".to_owned(), self.len.into());
57 for (name, stats) in self.cols.iter() {
58 cols.insert(name.clone(), stats.debug_json());
59 }
60 cols.into()
61 }
62 fn into_columnar_stats(self) -> ColumnarStats {
63 ColumnarStats {
64 nulls: None,
65 values: ColumnStatKinds::Struct(self),
66 }
67 }
68}
69
70impl StructStats {
71 pub fn col(&self, name: &str) -> Option<&ColumnarStats> {
76 self.cols.get(name)
77 }
78}
79
80impl ColumnStats for StructStats {
81 type Ref<'a> = ();
82
83 fn lower<'a>(&'a self) -> Option<Self::Ref<'a>> {
84 None
86 }
87 fn upper<'a>(&'a self) -> Option<Self::Ref<'a>> {
88 None
90 }
91 fn none_count(&self) -> usize {
92 0
93 }
94}
95
96impl ColumnStats for OptionStats<StructStats> {
97 type Ref<'a> = Option<()>;
98
99 fn lower<'a>(&'a self) -> Option<Self::Ref<'a>> {
100 self.some.lower().map(Some)
101 }
102 fn upper<'a>(&'a self) -> Option<Self::Ref<'a>> {
103 self.some.upper().map(Some)
104 }
105 fn none_count(&self) -> usize {
106 self.none
107 }
108}
109
110impl RustType<ProtoStructStats> for StructStats {
111 fn into_proto(&self) -> ProtoStructStats {
112 ProtoStructStats {
113 len: self.len.into_proto(),
114 cols: self
115 .cols
116 .iter()
117 .map(|(k, v)| (k.into_proto(), RustType::into_proto(v)))
118 .collect(),
119 }
120 }
121
122 fn from_proto(proto: ProtoStructStats) -> Result<Self, TryFromProtoError> {
123 let mut cols = BTreeMap::new();
124 for (k, v) in proto.cols {
125 cols.insert(k.into_rust()?, v.into_rust()?);
126 }
127 Ok(StructStats {
128 len: proto.len.into_rust()?,
129 cols,
130 })
131 }
132}
133
134impl TrimStats for ProtoStructStats {
135 fn trim(&mut self) {
136 use proto_dyn_stats::*;
137
138 for value in self.cols.values_mut() {
139 match &mut value.kind {
140 Some(Kind::Primitive(stats)) => stats.trim(),
141 Some(Kind::Bytes(stats)) => stats.trim(),
142 Some(Kind::Struct(stats)) => stats.trim(),
143 Some(Kind::None(())) => (),
144 None => {}
145 }
146 }
147 }
148}
149
150struct DynStatsCols<'a>(&'a BTreeMap<String, ColumnarStats>);
151
152impl serde::Serialize for DynStatsCols<'_> {
153 fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
154 let mut s = s.serialize_map(Some(self.0.len()))?;
155 for (k, v) in self.0.iter() {
156 let v = v.debug_json();
157 let () = s.serialize_entry(k, &v)?;
158 }
159 s.end()
160 }
161}
162
163pub(crate) fn any_struct_stats_cols() -> impl Strategy<Value = BTreeMap<String, ColumnarStats>> {
165 proptest::collection::btree_map(any::<String>(), any_columnar_stats(), 1..5)
166}
167
168#[cfg(test)]
169mod tests {
170 use prost::Message;
171
172 use super::*;
173 use crate::stats::primitive::PrimitiveStats;
174 use crate::stats::{BytesStats, ColumnNullStats, ColumnStatKinds, trim_to_budget};
175
176 #[mz_ore::test]
177 fn struct_trim_to_budget() {
178 #[track_caller]
179 fn testcase(cols: &[(&str, usize)], required: Option<&str>) {
180 let cols = cols
181 .iter()
182 .map(|(key, cost)| {
183 let stats = BytesStats::Primitive(PrimitiveStats {
184 lower: vec![],
185 upper: vec![0u8; *cost],
186 });
187 let stats = ColumnarStats {
188 nulls: None,
189 values: ColumnStatKinds::Bytes(stats),
190 };
191 ((*key).to_owned(), stats)
192 })
193 .collect();
194 let mut stats: ProtoStructStats = RustType::into_proto(&StructStats { len: 0, cols });
195 let mut budget = stats.encoded_len().next_power_of_two();
196 while budget > 0 {
197 let cost_before = stats.encoded_len();
198 let trimmed = trim_to_budget(&mut stats, budget, |col| Some(col) == required);
199 let cost_after = stats.encoded_len();
200 assert!(cost_before >= cost_after);
201 assert_eq!(trimmed, cost_before - cost_after);
202 if let Some(required) = required {
203 assert!(stats.cols.contains_key(required));
204 } else {
205 assert!(cost_after <= budget);
206 }
207 budget = budget / 2;
208 }
209 }
210
211 testcase(&[], None);
212 testcase(&[("a", 100)], None);
213 testcase(&[("a", 1), ("b", 2), ("c", 4)], None);
214 testcase(&[("a", 1), ("b", 2), ("c", 4)], Some("b"));
215 }
216
217 #[mz_ore::test]
219 fn trim_order_regression() {
220 fn column_stats(lower: &'static str, upper: &'static str) -> ColumnarStats {
221 let stats = PrimitiveStats {
222 lower: lower.to_owned(),
223 upper: upper.to_owned(),
224 };
225 ColumnarStats {
226 nulls: None,
227 values: ColumnStatKinds::Primitive(stats.into()),
228 }
229 }
230 let stats = StructStats {
231 len: 2,
232 cols: BTreeMap::from([
233 ("foo".to_owned(), column_stats("a", "b")),
234 (
235 "bar".to_owned(),
236 column_stats("aaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaab"),
237 ),
238 ]),
239 };
240
241 let mut proto_stats = RustType::into_proto(&stats);
244 trim_to_budget(&mut proto_stats, 30, |_| false);
245 assert!(proto_stats.cols.contains_key("foo"));
246 assert!(!proto_stats.cols.contains_key("bar"));
247 }
248
249 #[mz_ore::test]
254 fn stats_trim_to_budget_regression_recursion() {
255 fn str_stats(n: usize, l: &str, u: &str) -> ColumnarStats {
256 ColumnarStats {
257 nulls: Some(ColumnNullStats { count: n }),
258 values: ColumnStatKinds::Primitive(
259 PrimitiveStats {
260 lower: l.to_owned(),
261 upper: u.to_owned(),
262 }
263 .into(),
264 ),
265 }
266 }
267
268 const BIG: usize = 100;
269
270 let mut cols = BTreeMap::new();
273 for col in 'a'..='z' {
274 let col = col.to_string();
275 let stats = str_stats(2, "", &col.repeat(BIG));
276 cols.insert(col, stats);
277 }
278 cols.insert("foo_timestamp".to_string(), str_stats(2, "foo", "foo"));
279 let source_data_stats = StructStats {
280 len: 2,
281 cols: BTreeMap::from([
282 ("err".to_owned(), str_stats(2, "", "")),
283 (
284 "ok".to_owned(),
285 ColumnarStats {
286 nulls: None,
287 values: ColumnStatKinds::Struct(StructStats { len: 2, cols }),
288 },
289 ),
290 ]),
291 };
292 let mut proto_stats = RustType::into_proto(&source_data_stats);
293 let trimmed = trim_to_budget(&mut proto_stats, BIG, |x| {
294 x.ends_with("timestamp") || x == "err"
295 });
296 assert!(trimmed > 0);
298 assert!(proto_stats.cols.contains_key("ok"));
300 assert!(proto_stats.cols.contains_key("err"));
301 let ok = proto_stats.cols.get("ok").unwrap();
303 let proto_dyn_stats::Kind::Struct(ok_struct) = ok.kind.as_ref().unwrap() else {
304 panic!("ok was of unexpected type {:?}", ok);
305 };
306 assert!(ok_struct.cols.contains_key("foo_timestamp"));
307 }
308}