mz_compute_types/
dyncfgs.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Dyncfgs used by the compute layer.
11
12use std::time::Duration;
13
14use mz_dyncfg::{Config, ConfigSet};
15
16/// Whether rendering should use `mz_join_core` rather than DD's `JoinCore::join_core`.
17pub const ENABLE_MZ_JOIN_CORE: Config<bool> = Config::new(
18    "enable_mz_join_core",
19    true,
20    "Whether compute should use `mz_join_core` rather than DD's `JoinCore::join_core` to render \
21     linear joins.",
22);
23
24/// Whether rendering should use the new MV sink correction buffer implementation.
25pub const ENABLE_CORRECTION_V2: Config<bool> = Config::new(
26    "enable_compute_correction_v2",
27    false,
28    "Whether compute should use the new MV sink correction buffer implementation.",
29);
30
31/// Whether to enable temporal bucketing in compute.
32pub const ENABLE_TEMPORAL_BUCKETING: Config<bool> = Config::new(
33    "enable_compute_temporal_bucketing",
34    false,
35    "Whether to enable temporal bucketing in compute.",
36);
37
38/// The summary to apply to the frontier in temporal bucketing in compute.
39pub const TEMPORAL_BUCKETING_SUMMARY: Config<Duration> = Config::new(
40    "compute_temporal_bucketing_summary",
41    Duration::from_secs(2),
42    "The summary to apply to frontiers in temporal bucketing in compute.",
43);
44
45/// The yielding behavior with which linear joins should be rendered.
46pub const LINEAR_JOIN_YIELDING: Config<&str> = Config::new(
47    "linear_join_yielding",
48    "work:1000000,time:100",
49    "The yielding behavior compute rendering should apply for linear join operators. Either \
50     'work:<amount>' or 'time:<milliseconds>' or 'work:<amount>,time:<milliseconds>'. Note \
51     that omitting one of 'work' or 'time' will entirely disable join yielding by time or \
52     work, respectively, rather than falling back to some default.",
53);
54
55/// Enable lgalloc.
56pub const ENABLE_LGALLOC: Config<bool> = Config::new("enable_lgalloc", true, "Enable lgalloc.");
57
58/// Enable lgalloc's eager memory return/reclamation feature.
59pub const ENABLE_LGALLOC_EAGER_RECLAMATION: Config<bool> = Config::new(
60    "enable_lgalloc_eager_reclamation",
61    true,
62    "Enable lgalloc's eager return behavior.",
63);
64
65/// The interval at which the background thread wakes.
66pub const LGALLOC_BACKGROUND_INTERVAL: Config<Duration> = Config::new(
67    "lgalloc_background_interval",
68    Duration::from_secs(1),
69    "Scheduling interval for lgalloc's background worker.",
70);
71
72/// Enable lgalloc's eager memory return/reclamation feature.
73pub const LGALLOC_FILE_GROWTH_DAMPENER: Config<usize> = Config::new(
74    "lgalloc_file_growth_dampener",
75    2,
76    "Lgalloc's file growth dampener parameter.",
77);
78
79/// Enable lgalloc's eager memory return/reclamation feature.
80pub const LGALLOC_LOCAL_BUFFER_BYTES: Config<usize> = Config::new(
81    "lgalloc_local_buffer_bytes",
82    64 << 20,
83    "Lgalloc's local buffer bytes parameter.",
84);
85
86/// The bytes to reclaim (slow path) per size class, for each background thread activation.
87pub const LGALLOC_SLOW_CLEAR_BYTES: Config<usize> = Config::new(
88    "lgalloc_slow_clear_bytes",
89    128 << 20,
90    "Clear byte size per size class for every invocation",
91);
92
93/// Interval to run the memory limiter. A zero duration disables the limiter.
94pub const MEMORY_LIMITER_INTERVAL: Config<Duration> = Config::new(
95    "memory_limiter_interval",
96    Duration::from_secs(10),
97    "Interval to run the memory limiter. A zero duration disables the limiter.",
98);
99
100/// Bias to the memory limiter usage factor.
101pub const MEMORY_LIMITER_USAGE_BIAS: Config<f64> = Config::new(
102    "memory_limiter_usage_bias",
103    1.,
104    "Multiplicative bias to the memory limiter's limit.",
105);
106
107/// Burst factor to memory limit.
108pub const MEMORY_LIMITER_BURST_FACTOR: Config<f64> = Config::new(
109    "memory_limiter_burst_factor",
110    0.,
111    "Multiplicative burst factor to the memory limiter's limit.",
112);
113
114/// Enable lgalloc for columnation.
115pub const ENABLE_COLUMNATION_LGALLOC: Config<bool> = Config::new(
116    "enable_columnation_lgalloc",
117    true,
118    "Enable allocating regions from lgalloc.",
119);
120
121/// Enable lgalloc for columnar.
122pub const ENABLE_COLUMNAR_LGALLOC: Config<bool> = Config::new(
123    "enable_columnar_lgalloc",
124    true,
125    "Enable allocating aligned regions in columnar from lgalloc.",
126);
127
128/// The interval at which the compute server performs maintenance tasks.
129pub const COMPUTE_SERVER_MAINTENANCE_INTERVAL: Config<Duration> = Config::new(
130    "compute_server_maintenance_interval",
131    Duration::from_millis(10),
132    "The interval at which the compute server performs maintenance tasks. Zero enables maintenance on every iteration.",
133);
134
135/// Maximum number of in-flight bytes emitted by persist_sources feeding dataflows.
136pub const DATAFLOW_MAX_INFLIGHT_BYTES: Config<Option<usize>> = Config::new(
137    "compute_dataflow_max_inflight_bytes",
138    None,
139    "The maximum number of in-flight bytes emitted by persist_sources feeding \
140     compute dataflows in non-cc clusters.",
141);
142
143/// The "physical backpressure" of `compute_dataflow_max_inflight_bytes_cc` has
144/// been replaced in cc replicas by persist lgalloc and we intend to remove it
145/// once everything has switched to cc. In the meantime, this is a CYA to turn
146/// it back on if absolutely necessary.
147pub const DATAFLOW_MAX_INFLIGHT_BYTES_CC: Config<Option<usize>> = Config::new(
148    "compute_dataflow_max_inflight_bytes_cc",
149    None,
150    "The maximum number of in-flight bytes emitted by persist_sources feeding \
151     compute dataflows in cc clusters.",
152);
153
154/// The term `n` in the growth rate `1 + 1/(n + 1)` for `ConsolidatingVec`.
155/// The smallest value `0` corresponds to the greatest allowed growth, of doubling.
156pub const CONSOLIDATING_VEC_GROWTH_DAMPENER: Config<usize> = Config::new(
157    "consolidating_vec_growth_dampener",
158    1,
159    "Dampener in growth rate for consolidating vector size",
160);
161
162/// The number of dataflows that may hydrate concurrently.
163pub const HYDRATION_CONCURRENCY: Config<usize> = Config::new(
164    "compute_hydration_concurrency",
165    4,
166    "Controls how many compute dataflows may hydrate concurrently.",
167);
168
169/// See `src/storage-operators/src/s3_oneshot_sink/parquet.rs` for more details.
170pub const COPY_TO_S3_PARQUET_ROW_GROUP_FILE_RATIO: Config<usize> = Config::new(
171    "copy_to_s3_parquet_row_group_file_ratio",
172    20,
173    "The ratio (defined as a percentage) of row-group size to max-file-size. \
174        Must be <= 100.",
175);
176
177/// See `src/storage-operators/src/s3_oneshot_sink/parquet.rs` for more details.
178pub const COPY_TO_S3_ARROW_BUILDER_BUFFER_RATIO: Config<usize> = Config::new(
179    "copy_to_s3_arrow_builder_buffer_ratio",
180    150,
181    "The ratio (defined as a percentage) of arrow-builder size to row-group size. \
182        Must be >= 100.",
183);
184
185/// The size of each part in the multi-part upload to use when uploading files to S3.
186pub const COPY_TO_S3_MULTIPART_PART_SIZE_BYTES: Config<usize> = Config::new(
187    "copy_to_s3_multipart_part_size_bytes",
188    1024 * 1024 * 8,
189    "The size of each part in a multipart upload to S3.",
190);
191
192/// Main switch to enable or disable replica expiration.
193///
194/// Changes affect existing replicas only after restart.
195pub const ENABLE_COMPUTE_REPLICA_EXPIRATION: Config<bool> = Config::new(
196    "enable_compute_replica_expiration",
197    true,
198    "Main switch to disable replica expiration.",
199);
200
201/// The maximum lifetime of a replica configured as an offset to the replica start time.
202/// Used in temporal filters to drop diffs generated at timestamps beyond the expiration time.
203///
204/// A zero duration implies no expiration. Changing this value does not affect existing replicas,
205/// even when they are restarted.
206pub const COMPUTE_REPLICA_EXPIRATION_OFFSET: Config<Duration> = Config::new(
207    "compute_replica_expiration_offset",
208    Duration::ZERO,
209    "The expiration time offset for replicas. Zero disables expiration.",
210);
211
212/// When enabled, applies the column demands from a MapFilterProject onto the RelationDesc used to
213/// read out of Persist. This allows Persist to prune unneeded columns as a performance
214/// optimization.
215pub const COMPUTE_APPLY_COLUMN_DEMANDS: Config<bool> = Config::new(
216    "compute_apply_column_demands",
217    true,
218    "When enabled, passes applys column demands to the RelationDesc used to read out of Persist.",
219);
220
221/// The amount of output the flat-map operator produces before yielding. Set to a high value to
222/// avoid yielding, or to a low value to yield frequently.
223pub const COMPUTE_FLAT_MAP_FUEL: Config<usize> = Config::new(
224    "compute_flat_map_fuel",
225    1_000_000,
226    "The amount of output the flat-map operator produces before yielding.",
227);
228
229/// Whether to render `as_specific_collection` using a fueled flat-map operator.
230pub const ENABLE_COMPUTE_RENDER_FUELED_AS_SPECIFIC_COLLECTION: Config<bool> = Config::new(
231    "enable_compute_render_fueled_as_specific_collection",
232    true,
233    "When enabled, renders `as_specific_collection` using a fueled flat-map operator.",
234);
235
236/// Whether to apply logical backpressure in compute dataflows.
237pub const ENABLE_COMPUTE_LOGICAL_BACKPRESSURE: Config<bool> = Config::new(
238    "enable_compute_logical_backpressure",
239    false,
240    "When enabled, compute dataflows will apply logical backpressure.",
241);
242
243/// Maximal number of capabilities retained by the logical backpressure operator.
244///
245/// Selecting this value is subtle. If it's too small, it'll diminish the effectiveness of the
246/// logical backpressure operators. If it's too big, we can slow down hydration and cause state
247/// in the operator's implementation to build up.
248///
249/// The default value represents a compromise between these two extremes. We retain some metrics
250/// for 30 days, and the metrics update every minute. The default is exactly this number.
251pub const COMPUTE_LOGICAL_BACKPRESSURE_MAX_RETAINED_CAPABILITIES: Config<Option<usize>> =
252    Config::new(
253        "compute_logical_backpressure_max_retained_capabilities",
254        Some(30 * 24 * 60),
255        "The maximum number of capabilities retained by the logical backpressure operator.",
256    );
257
258/// The slack to round observed timestamps up to.
259///
260/// The default corresponds to Mz's default tick interval, but does not need to do so. Ideally,
261/// it is not smaller than the tick interval, but it can be larger.
262pub const COMPUTE_LOGICAL_BACKPRESSURE_INFLIGHT_SLACK: Config<Duration> = Config::new(
263    "compute_logical_backpressure_inflight_slack",
264    Duration::from_secs(1),
265    "Round observed timestamps to slack.",
266);
267
268/// Whether to use `drop_dataflow` to actively cancel dataflows.
269pub const ENABLE_ACTIVE_DATAFLOW_CANCELATION: Config<bool> = Config::new(
270    "enable_compute_active_dataflow_cancelation",
271    false,
272    "Whether to use `drop_dataflow` to actively cancel compute dataflows.",
273);
274
275/// Whether to enable the peek response stash, for sending back large peek
276/// responses. The response stash will only be used for results that exceed
277/// `compute_peek_response_stash_threshold_bytes`.
278pub const ENABLE_PEEK_RESPONSE_STASH: Config<bool> = Config::new(
279    "enable_compute_peek_response_stash",
280    false,
281    "Whether to enable the peek response stash, for sending back large peek responses. Will only be used for results that exceed compute_peek_response_stash_threshold_bytes.",
282);
283
284/// The threshold for peek response size above which we should use the peek
285/// response stash. Only used if the peek response stash is enabled _and_ if the
286/// query is "streamable" (roughly: doesn't have an ORDER BY).
287pub const PEEK_RESPONSE_STASH_THRESHOLD_BYTES: Config<usize> = Config::new(
288    "compute_peek_response_stash_threshold_bytes",
289    1024 * 1024 * 300, /* 300mb */
290    "The threshold above which to use the peek response stash, for sending back large peek responses.",
291);
292
293/// The target number of maximum runs in the batches written to the stash.
294///
295/// Setting this reasonably low will make it so batches get consolidated/sorted
296/// concurrently with data being written. Which will in turn make it so that we
297/// have to do less work when reading/consolidating those batches in
298/// `environmentd`.
299pub const PEEK_RESPONSE_STASH_BATCH_MAX_RUNS: Config<usize> = Config::new(
300    "compute_peek_response_stash_batch_max_runs",
301    // The lowest possible setting, do as much work as possible on the
302    // `clusterd` side.
303    2,
304    "The target number of maximum runs in the batches written to the stash.",
305);
306
307/// The target size for batches of rows we read out of the peek stash.
308pub const PEEK_RESPONSE_STASH_READ_BATCH_SIZE_BYTES: Config<usize> = Config::new(
309    "compute_peek_response_stash_read_batch_size_bytes",
310    1024 * 1024 * 100, /* 100mb */
311    "The target size for batches of rows we read out of the peek stash.",
312);
313
314/// The memory budget for consolidating stashed peek responses in
315/// `environmentd`.
316pub const PEEK_RESPONSE_STASH_READ_MEMORY_BUDGET_BYTES: Config<usize> = Config::new(
317    "compute_peek_response_stash_read_memory_budget_bytes",
318    1024 * 1024 * 64, /* 64mb */
319    "The memory budget for consolidating stashed peek responses in environmentd.",
320);
321
322/// The number of batches to pump from the peek result iterator when stashing peek responses.
323pub const PEEK_STASH_NUM_BATCHES: Config<usize> = Config::new(
324    "compute_peek_stash_num_batches",
325    100,
326    "The number of batches to pump from the peek result iterator (in one iteration through the worker loop) when stashing peek responses.",
327);
328
329/// The size of each batch, as number of rows, pumped from the peek result
330/// iterator when stashing peek responses.
331pub const PEEK_STASH_BATCH_SIZE: Config<usize> = Config::new(
332    "compute_peek_stash_batch_size",
333    100000,
334    "The size, as number of rows, of each batch pumped from the peek result iterator (in one iteration through the worker loop) when stashing peek responses.",
335);
336
337/// Adds the full set of all compute `Config`s.
338pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet {
339    configs
340        .add(&ENABLE_MZ_JOIN_CORE)
341        .add(&ENABLE_CORRECTION_V2)
342        .add(&ENABLE_TEMPORAL_BUCKETING)
343        .add(&TEMPORAL_BUCKETING_SUMMARY)
344        .add(&LINEAR_JOIN_YIELDING)
345        .add(&ENABLE_LGALLOC)
346        .add(&LGALLOC_BACKGROUND_INTERVAL)
347        .add(&LGALLOC_FILE_GROWTH_DAMPENER)
348        .add(&LGALLOC_LOCAL_BUFFER_BYTES)
349        .add(&LGALLOC_SLOW_CLEAR_BYTES)
350        .add(&MEMORY_LIMITER_INTERVAL)
351        .add(&MEMORY_LIMITER_USAGE_BIAS)
352        .add(&MEMORY_LIMITER_BURST_FACTOR)
353        .add(&ENABLE_LGALLOC_EAGER_RECLAMATION)
354        .add(&ENABLE_COLUMNATION_LGALLOC)
355        .add(&ENABLE_COLUMNAR_LGALLOC)
356        .add(&COMPUTE_SERVER_MAINTENANCE_INTERVAL)
357        .add(&DATAFLOW_MAX_INFLIGHT_BYTES)
358        .add(&DATAFLOW_MAX_INFLIGHT_BYTES_CC)
359        .add(&HYDRATION_CONCURRENCY)
360        .add(&COPY_TO_S3_PARQUET_ROW_GROUP_FILE_RATIO)
361        .add(&COPY_TO_S3_ARROW_BUILDER_BUFFER_RATIO)
362        .add(&COPY_TO_S3_MULTIPART_PART_SIZE_BYTES)
363        .add(&ENABLE_COMPUTE_REPLICA_EXPIRATION)
364        .add(&COMPUTE_REPLICA_EXPIRATION_OFFSET)
365        .add(&COMPUTE_APPLY_COLUMN_DEMANDS)
366        .add(&COMPUTE_FLAT_MAP_FUEL)
367        .add(&CONSOLIDATING_VEC_GROWTH_DAMPENER)
368        .add(&ENABLE_COMPUTE_RENDER_FUELED_AS_SPECIFIC_COLLECTION)
369        .add(&ENABLE_COMPUTE_LOGICAL_BACKPRESSURE)
370        .add(&COMPUTE_LOGICAL_BACKPRESSURE_MAX_RETAINED_CAPABILITIES)
371        .add(&COMPUTE_LOGICAL_BACKPRESSURE_INFLIGHT_SLACK)
372        .add(&ENABLE_ACTIVE_DATAFLOW_CANCELATION)
373        .add(&ENABLE_PEEK_RESPONSE_STASH)
374        .add(&PEEK_RESPONSE_STASH_THRESHOLD_BYTES)
375        .add(&PEEK_RESPONSE_STASH_BATCH_MAX_RUNS)
376        .add(&PEEK_RESPONSE_STASH_READ_BATCH_SIZE_BYTES)
377        .add(&PEEK_RESPONSE_STASH_READ_MEMORY_BUDGET_BYTES)
378        .add(&PEEK_STASH_NUM_BATCHES)
379        .add(&PEEK_STASH_BATCH_SIZE)
380}