mz_persist_client/
stats.rs1use std::borrow::Cow;
13use std::sync::Arc;
14
15use mz_dyncfg::{Config, ConfigSet};
16
17use crate::batch::UntrimmableColumns;
18use crate::metrics::Metrics;
19use crate::read::LazyPartStats;
20
21use crate::ShardId;
22
23pub(crate) const STATS_AUDIT_PERCENT: Config<usize> = Config::new(
25 "persist_stats_audit_percent",
26 1,
27 "Percent of filtered data to opt in to correctness auditing (Materialize).",
28);
29
30pub(crate) const STATS_COLLECTION_ENABLED: Config<bool> = Config::new(
35 "persist_stats_collection_enabled",
36 true,
37 "\
38 Whether to calculate and record statistics about the data stored in \
39 persist to be used at read time, see persist_stats_filter_enabled \
40 (Materialize).",
41);
42
43pub const STATS_FILTER_ENABLED: Config<bool> = Config::new(
48 "persist_stats_filter_enabled",
49 true,
50 "\
51 Whether to use recorded statistics about the data stored in persist to \
52 filter at read time, see persist_stats_collection_enabled (Materialize).",
53);
54
55pub(crate) const STATS_BUDGET_BYTES: Config<usize> = Config::new(
59 "persist_stats_budget_bytes",
60 1024,
61 "The budget (in bytes) of how many stats to maintain per batch part.",
62);
63
64pub(crate) const STATS_UNTRIMMABLE_COLUMNS_EQUALS: Config<fn() -> String> = Config::new(
65 "persist_stats_untrimmable_columns_equals",
66 || {
67 [
68 "err",
71 "ts",
72 "receivedat",
73 "createdat",
74 "_fivetran_deleted",
78 ]
79 .join(",")
80 },
81 "\
82 Which columns to always retain during persist stats trimming. Any column \
83 with a name exactly equal (case-insensitive) to one of these will be kept. \
84 Comma separated list.",
85);
86
87pub(crate) const STATS_UNTRIMMABLE_COLUMNS_PREFIX: Config<fn() -> String> = Config::new(
88 "persist_stats_untrimmable_columns_prefix",
89 || ["last_"].join(","),
90 "\
91 Which columns to always retain during persist stats trimming. Any column \
92 with a name starting with (case-insensitive) one of these will be kept. \
93 Comma separated list.",
94);
95
96pub(crate) const STATS_UNTRIMMABLE_COLUMNS_SUFFIX: Config<fn() -> String> = Config::new(
97 "persist_stats_untrimmable_columns_suffix",
98 || ["timestamp", "time", "_at", "_tstamp"].join(","),
99 "\
100 Which columns to always retain during persist stats trimming. Any column \
101 with a name ending with (case-insensitive) one of these will be kept. \
102 Comma separated list.",
103);
104
105pub(crate) fn untrimmable_columns(cfg: &ConfigSet) -> UntrimmableColumns {
106 fn split(x: String) -> Vec<Cow<'static, str>> {
107 x.split(',')
108 .filter(|x| !x.is_empty())
109 .map(|x| x.to_owned().into())
110 .collect()
111 }
112 UntrimmableColumns {
113 equals: split(STATS_UNTRIMMABLE_COLUMNS_EQUALS.get(cfg)),
114 prefixes: split(STATS_UNTRIMMABLE_COLUMNS_PREFIX.get(cfg)),
115 suffixes: split(STATS_UNTRIMMABLE_COLUMNS_SUFFIX.get(cfg)),
116 }
117}
118
119#[derive(Debug)]
123pub struct SnapshotStats {
124 pub shard_id: ShardId,
126 pub num_updates: usize,
138}
139
140#[derive(Debug)]
142pub struct SnapshotPartsStats {
143 pub metrics: Arc<Metrics>,
146 pub shard_id: ShardId,
148 pub parts: Vec<SnapshotPartStats>,
150}
151
152#[derive(Debug)]
154pub struct SnapshotPartStats {
155 pub encoded_size_bytes: usize,
157 pub stats: Option<LazyPartStats>,
159}