mz_persist_types/stats/
bytes.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::fmt::Debug;
11
12use mz_proto::{IntoRustIfSome, ProtoType, RustType, TryFromProtoError};
13use proptest::strategy::{Just, Strategy, Union};
14use serde::Serialize;
15
16use crate::stats::json::{JsonStats, any_json_stats};
17use crate::stats::primitive::{PrimitiveStats, any_primitive_vec_u8_stats};
18use crate::stats::{
19    ColumnStatKinds, ColumnStats, ColumnarStats, DynStats, OptionStats, ProtoAtomicBytesStats,
20    ProtoBytesStats, ProtoFixedSizeBytesStats, TrimStats, proto_bytes_stats,
21    proto_fixed_size_bytes_stats,
22};
23
24/// `PrimitiveStats<Vec<u8>>` that cannot safely be trimmed.
25#[derive(Debug, Clone)]
26pub struct AtomicBytesStats {
27    /// See [PrimitiveStats::lower]
28    pub lower: Vec<u8>,
29    /// See [PrimitiveStats::upper]
30    pub upper: Vec<u8>,
31}
32
33impl AtomicBytesStats {
34    fn debug_json(&self) -> serde_json::Value {
35        serde_json::json!({
36            "lower": hex::encode(&self.lower),
37            "upper": hex::encode(&self.upper),
38        })
39    }
40}
41
42impl RustType<ProtoAtomicBytesStats> for AtomicBytesStats {
43    fn into_proto(&self) -> ProtoAtomicBytesStats {
44        ProtoAtomicBytesStats {
45            lower: self.lower.into_proto(),
46            upper: self.upper.into_proto(),
47        }
48    }
49
50    fn from_proto(proto: ProtoAtomicBytesStats) -> Result<Self, TryFromProtoError> {
51        Ok(AtomicBytesStats {
52            lower: proto.lower.into_rust()?,
53            upper: proto.upper.into_rust()?,
54        })
55    }
56}
57
58/// `PrimitiveStats<Vec<u8>>` for types that implement [`FixedSizeCodec`] and
59/// cannot safely be trimmed.
60///
61/// [`FixedSizeCodec`]: crate::columnar::FixedSizeCodec
62#[derive(Debug, Clone)]
63pub struct FixedSizeBytesStats {
64    /// See [PrimitiveStats::lower]
65    pub lower: Vec<u8>,
66    /// See [PrimitiveStats::upper]
67    pub upper: Vec<u8>,
68    /// The kind of data these stats represent.
69    pub kind: FixedSizeBytesStatsKind,
70}
71
72impl FixedSizeBytesStats {
73    fn debug_json(&self) -> serde_json::Value {
74        serde_json::json!({
75            "lower": hex::encode(&self.lower),
76            "upper": hex::encode(&self.upper),
77            "kind": self.kind,
78        })
79    }
80}
81
82impl RustType<ProtoFixedSizeBytesStats> for FixedSizeBytesStats {
83    fn into_proto(&self) -> ProtoFixedSizeBytesStats {
84        ProtoFixedSizeBytesStats {
85            lower: self.lower.into_proto(),
86            upper: self.upper.into_proto(),
87            kind: Some(self.kind.into_proto()),
88        }
89    }
90
91    fn from_proto(proto: ProtoFixedSizeBytesStats) -> Result<Self, TryFromProtoError> {
92        Ok(FixedSizeBytesStats {
93            lower: proto.lower.into_rust()?,
94            upper: proto.upper.into_rust()?,
95            kind: proto
96                .kind
97                .into_rust_if_some("missing field ProtoFixedSizeBytesStats::kind")?,
98        })
99    }
100}
101
102/// The type of data encoded in an [`FixedSizeBytesStats`].
103#[derive(Debug, Clone, Serialize)]
104#[serde(rename_all = "kebab-case")]
105pub enum FixedSizeBytesStatsKind {
106    PackedTime,
107    PackedDateTime,
108    PackedInterval,
109    PackedNumeric,
110    Uuid,
111}
112
113impl RustType<proto_fixed_size_bytes_stats::Kind> for FixedSizeBytesStatsKind {
114    fn into_proto(&self) -> proto_fixed_size_bytes_stats::Kind {
115        match self {
116            FixedSizeBytesStatsKind::PackedTime => {
117                proto_fixed_size_bytes_stats::Kind::PackedTime(())
118            }
119            FixedSizeBytesStatsKind::PackedDateTime => {
120                proto_fixed_size_bytes_stats::Kind::PackedDateTime(())
121            }
122            FixedSizeBytesStatsKind::PackedInterval => {
123                proto_fixed_size_bytes_stats::Kind::PackedInterval(())
124            }
125            FixedSizeBytesStatsKind::PackedNumeric => {
126                proto_fixed_size_bytes_stats::Kind::PackedNumeric(())
127            }
128            FixedSizeBytesStatsKind::Uuid => proto_fixed_size_bytes_stats::Kind::Uuid(()),
129        }
130    }
131
132    fn from_proto(proto: proto_fixed_size_bytes_stats::Kind) -> Result<Self, TryFromProtoError> {
133        let kind = match proto {
134            proto_fixed_size_bytes_stats::Kind::PackedTime(_) => {
135                FixedSizeBytesStatsKind::PackedTime
136            }
137            proto_fixed_size_bytes_stats::Kind::PackedDateTime(_) => {
138                FixedSizeBytesStatsKind::PackedDateTime
139            }
140            proto_fixed_size_bytes_stats::Kind::PackedInterval(_) => {
141                FixedSizeBytesStatsKind::PackedInterval
142            }
143            proto_fixed_size_bytes_stats::Kind::PackedNumeric(_) => {
144                FixedSizeBytesStatsKind::PackedNumeric
145            }
146            proto_fixed_size_bytes_stats::Kind::Uuid(_) => FixedSizeBytesStatsKind::Uuid,
147        };
148        Ok(kind)
149    }
150}
151
152/// Statistics about a column of `Vec<u8>`.
153#[derive(Clone)]
154pub enum BytesStats {
155    Primitive(PrimitiveStats<Vec<u8>>),
156    Json(JsonStats),
157    Atomic(AtomicBytesStats),
158    FixedSize(FixedSizeBytesStats),
159}
160
161impl Debug for BytesStats {
162    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
163        Debug::fmt(&self.debug_json(), f)
164    }
165}
166
167impl DynStats for BytesStats {
168    fn debug_json(&self) -> serde_json::Value {
169        match self {
170            BytesStats::Primitive(x) => x.debug_json(),
171            BytesStats::Json(x) => x.debug_json(),
172            BytesStats::Atomic(x) => x.debug_json(),
173            BytesStats::FixedSize(x) => x.debug_json(),
174        }
175    }
176
177    fn into_columnar_stats(self) -> ColumnarStats {
178        ColumnarStats {
179            nulls: None,
180            values: ColumnStatKinds::Bytes(self),
181        }
182    }
183}
184
185impl ColumnStats for BytesStats {
186    type Ref<'a> = &'a [u8];
187
188    fn lower<'a>(&'a self) -> Option<Self::Ref<'a>> {
189        match self {
190            BytesStats::Primitive(x) => Some(x.lower.as_slice()),
191            BytesStats::Json(_) => None,
192            BytesStats::Atomic(x) => Some(&x.lower),
193            BytesStats::FixedSize(x) => Some(&x.lower),
194        }
195    }
196    fn upper<'a>(&'a self) -> Option<Self::Ref<'a>> {
197        match self {
198            BytesStats::Primitive(x) => Some(x.upper.as_slice()),
199            BytesStats::Json(_) => None,
200            BytesStats::Atomic(x) => Some(&x.upper),
201            BytesStats::FixedSize(x) => Some(&x.upper),
202        }
203    }
204    fn none_count(&self) -> usize {
205        0
206    }
207}
208
209impl ColumnStats for OptionStats<BytesStats> {
210    type Ref<'a> = Option<&'a [u8]>;
211
212    fn lower<'a>(&'a self) -> Option<Self::Ref<'a>> {
213        self.some.lower().map(Some)
214    }
215    fn upper<'a>(&'a self) -> Option<Self::Ref<'a>> {
216        self.some.upper().map(Some)
217    }
218    fn none_count(&self) -> usize {
219        self.none
220    }
221}
222
223impl RustType<ProtoBytesStats> for BytesStats {
224    fn into_proto(&self) -> ProtoBytesStats {
225        let kind = match self {
226            BytesStats::Primitive(x) => proto_bytes_stats::Kind::Primitive(RustType::into_proto(x)),
227            BytesStats::Json(x) => proto_bytes_stats::Kind::Json(RustType::into_proto(x)),
228            BytesStats::Atomic(x) => proto_bytes_stats::Kind::Atomic(RustType::into_proto(x)),
229            BytesStats::FixedSize(x) => proto_bytes_stats::Kind::FixedSize(RustType::into_proto(x)),
230        };
231        ProtoBytesStats { kind: Some(kind) }
232    }
233
234    fn from_proto(proto: ProtoBytesStats) -> Result<Self, TryFromProtoError> {
235        match proto.kind {
236            Some(proto_bytes_stats::Kind::Primitive(x)) => Ok(BytesStats::Primitive(
237                PrimitiveStats::<Vec<u8>>::from_proto(x)?,
238            )),
239            Some(proto_bytes_stats::Kind::Json(x)) => {
240                Ok(BytesStats::Json(JsonStats::from_proto(x)?))
241            }
242            Some(proto_bytes_stats::Kind::Atomic(x)) => {
243                Ok(BytesStats::Atomic(AtomicBytesStats::from_proto(x)?))
244            }
245            Some(proto_bytes_stats::Kind::FixedSize(x)) => {
246                Ok(BytesStats::FixedSize(FixedSizeBytesStats::from_proto(x)?))
247            }
248            None => Err(TryFromProtoError::missing_field("ProtoBytesStats::kind")),
249        }
250    }
251}
252
253impl TrimStats for ProtoBytesStats {
254    fn trim(&mut self) {
255        use proto_bytes_stats::*;
256        match &mut self.kind {
257            Some(Kind::Primitive(stats)) => stats.trim(),
258            Some(Kind::Json(stats)) => stats.trim(),
259            // We explicitly don't trim atomic stats!
260            Some(Kind::Atomic(_)) => {}
261            // We explicitly don't trim fixed size stats!
262            Some(Kind::FixedSize(_)) => {}
263            None => {}
264        }
265    }
266}
267
268/// Returns a [`Strategy`] for generating arbitrary [`BytesStats`].
269pub(crate) fn any_bytes_stats() -> impl Strategy<Value = BytesStats> {
270    let kind_of_packed = Union::new(vec![
271        Just(FixedSizeBytesStatsKind::PackedTime),
272        Just(FixedSizeBytesStatsKind::PackedInterval),
273        Just(FixedSizeBytesStatsKind::PackedNumeric),
274        Just(FixedSizeBytesStatsKind::Uuid),
275    ]);
276
277    Union::new(vec![
278        any_primitive_vec_u8_stats()
279            .prop_map(BytesStats::Primitive)
280            .boxed(),
281        any_json_stats().prop_map(BytesStats::Json).boxed(),
282        any_primitive_vec_u8_stats()
283            .prop_map(|x| {
284                BytesStats::Atomic(AtomicBytesStats {
285                    lower: x.lower,
286                    upper: x.upper,
287                })
288            })
289            .boxed(),
290        (any_primitive_vec_u8_stats(), kind_of_packed)
291            .prop_map(|(x, kind)| {
292                BytesStats::FixedSize(FixedSizeBytesStats {
293                    lower: x.lower,
294                    upper: x.upper,
295                    kind,
296                })
297            })
298            .boxed(),
299    ])
300}