mz_persist_types/stats/
bytes.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::fmt::{Debug, Formatter};
11
12use mz_ore::str::redact;
13use mz_proto::{IntoRustIfSome, ProtoType, RustType, TryFromProtoError};
14use proptest::strategy::{Just, Strategy, Union};
15use serde::Serialize;
16
17use crate::stats::json::{JsonStats, any_json_stats};
18use crate::stats::primitive::{PrimitiveStats, any_primitive_vec_u8_stats};
19use crate::stats::{
20    ColumnStatKinds, ColumnStats, ColumnarStats, DynStats, OptionStats, ProtoAtomicBytesStats,
21    ProtoBytesStats, ProtoFixedSizeBytesStats, TrimStats, proto_bytes_stats,
22    proto_fixed_size_bytes_stats,
23};
24
25/// `PrimitiveStats<Vec<u8>>` that cannot safely be trimmed.
26#[derive(Clone)]
27pub struct AtomicBytesStats {
28    /// See [PrimitiveStats::lower]
29    pub lower: Vec<u8>,
30    /// See [PrimitiveStats::upper]
31    pub upper: Vec<u8>,
32}
33
34impl Debug for AtomicBytesStats {
35    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
36        f.debug_struct("AtomicBytesStats")
37            .field("lower", &redact(&hex::encode(&self.lower)))
38            .field("upper", &redact(&hex::encode(&self.upper)))
39            .finish()
40    }
41}
42
43impl AtomicBytesStats {
44    fn debug_json(&self) -> serde_json::Value {
45        serde_json::json!({
46            "lower": hex::encode(&self.lower),
47            "upper": hex::encode(&self.upper),
48        })
49    }
50}
51
52impl RustType<ProtoAtomicBytesStats> for AtomicBytesStats {
53    fn into_proto(&self) -> ProtoAtomicBytesStats {
54        ProtoAtomicBytesStats {
55            lower: self.lower.into_proto(),
56            upper: self.upper.into_proto(),
57        }
58    }
59
60    fn from_proto(proto: ProtoAtomicBytesStats) -> Result<Self, TryFromProtoError> {
61        Ok(AtomicBytesStats {
62            lower: proto.lower.into_rust()?,
63            upper: proto.upper.into_rust()?,
64        })
65    }
66}
67
68/// `PrimitiveStats<Vec<u8>>` for types that implement [`FixedSizeCodec`] and
69/// cannot safely be trimmed.
70///
71/// [`FixedSizeCodec`]: crate::columnar::FixedSizeCodec
72#[derive(Clone)]
73pub struct FixedSizeBytesStats {
74    /// See [PrimitiveStats::lower]
75    pub lower: Vec<u8>,
76    /// See [PrimitiveStats::upper]
77    pub upper: Vec<u8>,
78    /// The kind of data these stats represent.
79    pub kind: FixedSizeBytesStatsKind,
80}
81
82impl Debug for FixedSizeBytesStats {
83    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
84        f.debug_struct("FixedSizeBytesStats")
85            .field("lower", &redact(&hex::encode(&self.lower)))
86            .field("upper", &redact(&hex::encode(&self.upper)))
87            .field("kind", &self.kind)
88            .finish()
89    }
90}
91
92impl FixedSizeBytesStats {
93    fn debug_json(&self) -> serde_json::Value {
94        serde_json::json!({
95            "lower": hex::encode(&self.lower),
96            "upper": hex::encode(&self.upper),
97            "kind": self.kind,
98        })
99    }
100}
101
102impl RustType<ProtoFixedSizeBytesStats> for FixedSizeBytesStats {
103    fn into_proto(&self) -> ProtoFixedSizeBytesStats {
104        ProtoFixedSizeBytesStats {
105            lower: self.lower.into_proto(),
106            upper: self.upper.into_proto(),
107            kind: Some(self.kind.into_proto()),
108        }
109    }
110
111    fn from_proto(proto: ProtoFixedSizeBytesStats) -> Result<Self, TryFromProtoError> {
112        Ok(FixedSizeBytesStats {
113            lower: proto.lower.into_rust()?,
114            upper: proto.upper.into_rust()?,
115            kind: proto
116                .kind
117                .into_rust_if_some("missing field ProtoFixedSizeBytesStats::kind")?,
118        })
119    }
120}
121
122/// The type of data encoded in an [`FixedSizeBytesStats`].
123#[derive(Debug, Clone, Serialize)]
124#[serde(rename_all = "kebab-case")]
125pub enum FixedSizeBytesStatsKind {
126    PackedTime,
127    PackedDateTime,
128    PackedInterval,
129    PackedNumeric,
130    Uuid,
131}
132
133impl RustType<proto_fixed_size_bytes_stats::Kind> for FixedSizeBytesStatsKind {
134    fn into_proto(&self) -> proto_fixed_size_bytes_stats::Kind {
135        match self {
136            FixedSizeBytesStatsKind::PackedTime => {
137                proto_fixed_size_bytes_stats::Kind::PackedTime(())
138            }
139            FixedSizeBytesStatsKind::PackedDateTime => {
140                proto_fixed_size_bytes_stats::Kind::PackedDateTime(())
141            }
142            FixedSizeBytesStatsKind::PackedInterval => {
143                proto_fixed_size_bytes_stats::Kind::PackedInterval(())
144            }
145            FixedSizeBytesStatsKind::PackedNumeric => {
146                proto_fixed_size_bytes_stats::Kind::PackedNumeric(())
147            }
148            FixedSizeBytesStatsKind::Uuid => proto_fixed_size_bytes_stats::Kind::Uuid(()),
149        }
150    }
151
152    fn from_proto(proto: proto_fixed_size_bytes_stats::Kind) -> Result<Self, TryFromProtoError> {
153        let kind = match proto {
154            proto_fixed_size_bytes_stats::Kind::PackedTime(_) => {
155                FixedSizeBytesStatsKind::PackedTime
156            }
157            proto_fixed_size_bytes_stats::Kind::PackedDateTime(_) => {
158                FixedSizeBytesStatsKind::PackedDateTime
159            }
160            proto_fixed_size_bytes_stats::Kind::PackedInterval(_) => {
161                FixedSizeBytesStatsKind::PackedInterval
162            }
163            proto_fixed_size_bytes_stats::Kind::PackedNumeric(_) => {
164                FixedSizeBytesStatsKind::PackedNumeric
165            }
166            proto_fixed_size_bytes_stats::Kind::Uuid(_) => FixedSizeBytesStatsKind::Uuid,
167        };
168        Ok(kind)
169    }
170}
171
172/// Statistics about a column of `Vec<u8>`.
173#[derive(Clone)]
174pub enum BytesStats {
175    Primitive(PrimitiveStats<Vec<u8>>),
176    Json(JsonStats),
177    Atomic(AtomicBytesStats),
178    FixedSize(FixedSizeBytesStats),
179}
180
181impl Debug for BytesStats {
182    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
183        match self {
184            BytesStats::Primitive(stats) => stats.fmt(f),
185            BytesStats::Json(stats) => stats.fmt(f),
186            BytesStats::Atomic(stats) => stats.fmt(f),
187            BytesStats::FixedSize(stats) => stats.fmt(f),
188        }
189    }
190}
191
192impl DynStats for BytesStats {
193    fn debug_json(&self) -> serde_json::Value {
194        match self {
195            BytesStats::Primitive(x) => x.debug_json(),
196            BytesStats::Json(x) => x.debug_json(),
197            BytesStats::Atomic(x) => x.debug_json(),
198            BytesStats::FixedSize(x) => x.debug_json(),
199        }
200    }
201
202    fn into_columnar_stats(self) -> ColumnarStats {
203        ColumnarStats {
204            nulls: None,
205            values: ColumnStatKinds::Bytes(self),
206        }
207    }
208}
209
210impl ColumnStats for BytesStats {
211    type Ref<'a> = &'a [u8];
212
213    fn lower<'a>(&'a self) -> Option<Self::Ref<'a>> {
214        match self {
215            BytesStats::Primitive(x) => Some(x.lower.as_slice()),
216            BytesStats::Json(_) => None,
217            BytesStats::Atomic(x) => Some(&x.lower),
218            BytesStats::FixedSize(x) => Some(&x.lower),
219        }
220    }
221    fn upper<'a>(&'a self) -> Option<Self::Ref<'a>> {
222        match self {
223            BytesStats::Primitive(x) => Some(x.upper.as_slice()),
224            BytesStats::Json(_) => None,
225            BytesStats::Atomic(x) => Some(&x.upper),
226            BytesStats::FixedSize(x) => Some(&x.upper),
227        }
228    }
229    fn none_count(&self) -> usize {
230        0
231    }
232}
233
234impl ColumnStats for OptionStats<BytesStats> {
235    type Ref<'a> = Option<&'a [u8]>;
236
237    fn lower<'a>(&'a self) -> Option<Self::Ref<'a>> {
238        self.some.lower().map(Some)
239    }
240    fn upper<'a>(&'a self) -> Option<Self::Ref<'a>> {
241        self.some.upper().map(Some)
242    }
243    fn none_count(&self) -> usize {
244        self.none
245    }
246}
247
248impl RustType<ProtoBytesStats> for BytesStats {
249    fn into_proto(&self) -> ProtoBytesStats {
250        let kind = match self {
251            BytesStats::Primitive(x) => proto_bytes_stats::Kind::Primitive(RustType::into_proto(x)),
252            BytesStats::Json(x) => proto_bytes_stats::Kind::Json(RustType::into_proto(x)),
253            BytesStats::Atomic(x) => proto_bytes_stats::Kind::Atomic(RustType::into_proto(x)),
254            BytesStats::FixedSize(x) => proto_bytes_stats::Kind::FixedSize(RustType::into_proto(x)),
255        };
256        ProtoBytesStats { kind: Some(kind) }
257    }
258
259    fn from_proto(proto: ProtoBytesStats) -> Result<Self, TryFromProtoError> {
260        match proto.kind {
261            Some(proto_bytes_stats::Kind::Primitive(x)) => Ok(BytesStats::Primitive(
262                PrimitiveStats::<Vec<u8>>::from_proto(x)?,
263            )),
264            Some(proto_bytes_stats::Kind::Json(x)) => {
265                Ok(BytesStats::Json(JsonStats::from_proto(x)?))
266            }
267            Some(proto_bytes_stats::Kind::Atomic(x)) => {
268                Ok(BytesStats::Atomic(AtomicBytesStats::from_proto(x)?))
269            }
270            Some(proto_bytes_stats::Kind::FixedSize(x)) => {
271                Ok(BytesStats::FixedSize(FixedSizeBytesStats::from_proto(x)?))
272            }
273            None => Err(TryFromProtoError::missing_field("ProtoBytesStats::kind")),
274        }
275    }
276}
277
278impl TrimStats for ProtoBytesStats {
279    fn trim(&mut self) {
280        use proto_bytes_stats::*;
281        match &mut self.kind {
282            Some(Kind::Primitive(stats)) => stats.trim(),
283            Some(Kind::Json(stats)) => stats.trim(),
284            // We explicitly don't trim atomic stats!
285            Some(Kind::Atomic(_)) => {}
286            // We explicitly don't trim fixed size stats!
287            Some(Kind::FixedSize(_)) => {}
288            None => {}
289        }
290    }
291}
292
293/// Returns a [`Strategy`] for generating arbitrary [`BytesStats`].
294pub(crate) fn any_bytes_stats() -> impl Strategy<Value = BytesStats> {
295    let kind_of_packed = Union::new(vec![
296        Just(FixedSizeBytesStatsKind::PackedTime),
297        Just(FixedSizeBytesStatsKind::PackedInterval),
298        Just(FixedSizeBytesStatsKind::PackedNumeric),
299        Just(FixedSizeBytesStatsKind::Uuid),
300    ]);
301
302    Union::new(vec![
303        any_primitive_vec_u8_stats()
304            .prop_map(BytesStats::Primitive)
305            .boxed(),
306        any_json_stats().prop_map(BytesStats::Json).boxed(),
307        any_primitive_vec_u8_stats()
308            .prop_map(|x| {
309                BytesStats::Atomic(AtomicBytesStats {
310                    lower: x.lower,
311                    upper: x.upper,
312                })
313            })
314            .boxed(),
315        (any_primitive_vec_u8_stats(), kind_of_packed)
316            .prop_map(|(x, kind)| {
317                BytesStats::FixedSize(FixedSizeBytesStats {
318                    lower: x.lower,
319                    upper: x.upper,
320                    kind,
321                })
322            })
323            .boxed(),
324    ])
325}