mz_persist_client/
stats.rs1use std::borrow::Cow;
13use std::sync::Arc;
14
15use mz_dyncfg::{Config, ConfigSet};
16
17use crate::batch::UntrimmableColumns;
18use crate::metrics::Metrics;
19use crate::read::LazyPartStats;
20
21use crate::ShardId;
22
23pub(crate) const STATS_AUDIT_PERCENT: Config<usize> = Config::new(
25 "persist_stats_audit_percent",
26 1,
27 "Percent of filtered data to opt in to correctness auditing (Materialize).",
28);
29
30pub const STATS_AUDIT_PANIC: Config<bool> = Config::new(
32 "persist_stats_audit_panic",
33 true,
34 "If set (as it is by default), panic on any auditing failure. If not, report an error but \
35 pass along the data as normal. This should almost certainly be paired with an audit rate of 100%, \
36 so all parts are audited, for consistency.",
37);
38
39pub(crate) const STATS_COLLECTION_ENABLED: Config<bool> = Config::new(
44 "persist_stats_collection_enabled",
45 true,
46 "\
47 Whether to calculate and record statistics about the data stored in \
48 persist to be used at read time, see persist_stats_filter_enabled \
49 (Materialize).",
50);
51
52pub const STATS_FILTER_ENABLED: Config<bool> = Config::new(
57 "persist_stats_filter_enabled",
58 true,
59 "\
60 Whether to use recorded statistics about the data stored in persist to \
61 filter at read time, see persist_stats_collection_enabled (Materialize).",
62);
63
64pub(crate) const STATS_BUDGET_BYTES: Config<usize> = Config::new(
68 "persist_stats_budget_bytes",
69 1024,
70 "The budget (in bytes) of how many stats to maintain per batch part.",
71);
72
73pub(crate) const STATS_UNTRIMMABLE_COLUMNS_EQUALS: Config<fn() -> String> = Config::new(
74 "persist_stats_untrimmable_columns_equals",
75 || {
76 [
77 "err",
80 "ts",
81 "receivedat",
82 "createdat",
83 "_fivetran_deleted",
87 ]
88 .join(",")
89 },
90 "\
91 Which columns to always retain during persist stats trimming. Any column \
92 with a name exactly equal (case-insensitive) to one of these will be kept. \
93 Comma separated list.",
94);
95
96pub(crate) const STATS_UNTRIMMABLE_COLUMNS_PREFIX: Config<fn() -> String> = Config::new(
97 "persist_stats_untrimmable_columns_prefix",
98 || ["last_"].join(","),
99 "\
100 Which columns to always retain during persist stats trimming. Any column \
101 with a name starting with (case-insensitive) one of these will be kept. \
102 Comma separated list.",
103);
104
105pub(crate) const STATS_UNTRIMMABLE_COLUMNS_SUFFIX: Config<fn() -> String> = Config::new(
106 "persist_stats_untrimmable_columns_suffix",
107 || ["timestamp", "time", "_at", "_tstamp"].join(","),
108 "\
109 Which columns to always retain during persist stats trimming. Any column \
110 with a name ending with (case-insensitive) one of these will be kept. \
111 Comma separated list.",
112);
113
114pub(crate) fn untrimmable_columns(cfg: &ConfigSet) -> UntrimmableColumns {
115 fn split(x: String) -> Vec<Cow<'static, str>> {
116 x.split(',')
117 .filter(|x| !x.is_empty())
118 .map(|x| x.to_owned().into())
119 .collect()
120 }
121 UntrimmableColumns {
122 equals: split(STATS_UNTRIMMABLE_COLUMNS_EQUALS.get(cfg)),
123 prefixes: split(STATS_UNTRIMMABLE_COLUMNS_PREFIX.get(cfg)),
124 suffixes: split(STATS_UNTRIMMABLE_COLUMNS_SUFFIX.get(cfg)),
125 }
126}
127
128#[derive(Debug)]
132pub struct SnapshotStats {
133 pub shard_id: ShardId,
135 pub num_updates: usize,
147}
148
149#[derive(Debug)]
151pub struct SnapshotPartsStats {
152 pub metrics: Arc<Metrics>,
155 pub shard_id: ShardId,
157 pub parts: Vec<SnapshotPartStats>,
159}
160
161#[derive(Debug)]
163pub struct SnapshotPartStats {
164 pub encoded_size_bytes: usize,
166 pub stats: Option<LazyPartStats>,
168}