mz_compute_types/
dyncfgs.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Dyncfgs used by the compute layer.
11
12use std::time::Duration;
13
14use mz_dyncfg::{Config, ConfigSet};
15
16/// Whether rendering should use `mz_join_core` rather than DD's `JoinCore::join_core`.
17pub const ENABLE_MZ_JOIN_CORE: Config<bool> = Config::new(
18    "enable_mz_join_core",
19    true,
20    "Whether compute should use `mz_join_core` rather than DD's `JoinCore::join_core` to render \
21     linear joins.",
22);
23
24/// Whether rendering should use `mz_join_core_v2` rather than DD's `JoinCore::join_core`.
25pub const ENABLE_MZ_JOIN_CORE_V2: Config<bool> = Config::new(
26    "enable_mz_join_core_v2",
27    false,
28    "Whether compute should use `mz_join_core_v2` rather than DD's `JoinCore::join_core` to render \
29     linear joins.",
30);
31
32/// Whether rendering should use the new MV sink correction buffer implementation.
33pub const ENABLE_CORRECTION_V2: Config<bool> = Config::new(
34    "enable_compute_correction_v2",
35    false,
36    "Whether compute should use the new MV sink correction buffer implementation.",
37);
38
39/// Whether the MV sink should distribute appends among workers.
40pub const ENABLE_MV_APPEND_SMEARING: Config<bool> = Config::new(
41    "enable_compute_mv_append_smearing",
42    true,
43    "Whether the MV sink should distribute appends among workers.",
44);
45
46/// Whether to enable temporal bucketing in compute.
47pub const ENABLE_TEMPORAL_BUCKETING: Config<bool> = Config::new(
48    "enable_compute_temporal_bucketing",
49    false,
50    "Whether to enable temporal bucketing in compute.",
51);
52
53/// The summary to apply to the frontier in temporal bucketing in compute.
54pub const TEMPORAL_BUCKETING_SUMMARY: Config<Duration> = Config::new(
55    "compute_temporal_bucketing_summary",
56    Duration::from_secs(2),
57    "The summary to apply to frontiers in temporal bucketing in compute.",
58);
59
60/// The yielding behavior with which linear joins should be rendered.
61pub const LINEAR_JOIN_YIELDING: Config<&str> = Config::new(
62    "linear_join_yielding",
63    "work:1000000,time:100",
64    "The yielding behavior compute rendering should apply for linear join operators. Either \
65     'work:<amount>' or 'time:<milliseconds>' or 'work:<amount>,time:<milliseconds>'. Note \
66     that omitting one of 'work' or 'time' will entirely disable join yielding by time or \
67     work, respectively, rather than falling back to some default.",
68);
69
70/// Enable lgalloc.
71pub const ENABLE_LGALLOC: Config<bool> = Config::new("enable_lgalloc", true, "Enable lgalloc.");
72
73/// Enable lgalloc's eager memory return/reclamation feature.
74pub const ENABLE_LGALLOC_EAGER_RECLAMATION: Config<bool> = Config::new(
75    "enable_lgalloc_eager_reclamation",
76    true,
77    "Enable lgalloc's eager return behavior.",
78);
79
80/// The interval at which the background thread wakes.
81pub const LGALLOC_BACKGROUND_INTERVAL: Config<Duration> = Config::new(
82    "lgalloc_background_interval",
83    Duration::from_secs(1),
84    "Scheduling interval for lgalloc's background worker.",
85);
86
87/// Enable lgalloc's eager memory return/reclamation feature.
88pub const LGALLOC_FILE_GROWTH_DAMPENER: Config<usize> = Config::new(
89    "lgalloc_file_growth_dampener",
90    2,
91    "Lgalloc's file growth dampener parameter.",
92);
93
94/// Enable lgalloc's eager memory return/reclamation feature.
95pub const LGALLOC_LOCAL_BUFFER_BYTES: Config<usize> = Config::new(
96    "lgalloc_local_buffer_bytes",
97    64 << 20,
98    "Lgalloc's local buffer bytes parameter.",
99);
100
101/// The bytes to reclaim (slow path) per size class, for each background thread activation.
102pub const LGALLOC_SLOW_CLEAR_BYTES: Config<usize> = Config::new(
103    "lgalloc_slow_clear_bytes",
104    128 << 20,
105    "Clear byte size per size class for every invocation",
106);
107
108/// Interval to run the memory limiter. A zero duration disables the limiter.
109pub const MEMORY_LIMITER_INTERVAL: Config<Duration> = Config::new(
110    "memory_limiter_interval",
111    Duration::from_secs(10),
112    "Interval to run the memory limiter. A zero duration disables the limiter.",
113);
114
115/// Factor of the memory limit that the process will be permitted to use before terminating the process.
116pub const MEMORY_LIMITER_USAGE_FACTOR: Config<f64> = Config::new(
117    "memory_limiter_usage_factor",
118    2.,
119    "Factor of the memory limit that the process will use before terminating the process.",
120);
121
122/// Bias to the memory limiter usage factor.
123pub const MEMORY_LIMITER_USAGE_BIAS: Config<f64> = Config::new(
124    "memory_limiter_usage_bias",
125    1.,
126    "Multiplicative bias to memory_limiter_usage_factor.",
127);
128
129/// Burst factor to memory limit.
130pub const MEMORY_LIMITER_BURST_FACTOR: Config<f64> = Config::new(
131    "memory_limiter_burst_factor",
132    0.,
133    "Multiplicative burst factor to memory limit.",
134);
135
136/// Interval to run the lgalloc limiter. A zero duration disables the limiter.
137pub const LGALLOC_LIMITER_INTERVAL: Config<Duration> = Config::new(
138    "lgalloc_limiter_interval",
139    Duration::from_secs(10),
140    "Interval to run the lgalloc limiter. A zero duration disables the limiter.",
141);
142
143/// Factor of the memory limit that lgalloc will be permitted to use before terminating the process.
144pub const LGALLOC_LIMITER_USAGE_FACTOR: Config<f64> = Config::new(
145    "lgalloc_limiter_usage_factor",
146    2.,
147    "Factor of the memory limit that lgalloc will use before terminating the process.",
148);
149
150/// Bias to the lgalloc limiter usage factor.
151pub const LGALLOC_LIMITER_USAGE_BIAS: Config<f64> = Config::new(
152    "lgalloc_limiter_usage_bias",
153    1.,
154    "Multiplicative bias to lgalloc_limiter_usage_factor.",
155);
156
157/// Burst factor to disk limit.
158pub const LGALLOC_LIMITER_BURST_FACTOR: Config<f64> = Config::new(
159    "lgalloc_limiter_burst_factor",
160    0.,
161    "Multiplicative burst factor to disk limit.",
162);
163
164/// Enable lgalloc for columnation.
165pub const ENABLE_COLUMNATION_LGALLOC: Config<bool> = Config::new(
166    "enable_columnation_lgalloc",
167    true,
168    "Enable allocating regions from lgalloc.",
169);
170
171/// Enable lgalloc for columnar.
172pub const ENABLE_COLUMNAR_LGALLOC: Config<bool> = Config::new(
173    "enable_columnar_lgalloc",
174    true,
175    "Enable allocating aligned regions in columnar from lgalloc.",
176);
177
178/// The interval at which the compute server performs maintenance tasks.
179pub const COMPUTE_SERVER_MAINTENANCE_INTERVAL: Config<Duration> = Config::new(
180    "compute_server_maintenance_interval",
181    Duration::from_millis(10),
182    "The interval at which the compute server performs maintenance tasks. Zero enables maintenance on every iteration.",
183);
184
185/// Maximum number of in-flight bytes emitted by persist_sources feeding dataflows.
186pub const DATAFLOW_MAX_INFLIGHT_BYTES: Config<Option<usize>> = Config::new(
187    "compute_dataflow_max_inflight_bytes",
188    None,
189    "The maximum number of in-flight bytes emitted by persist_sources feeding \
190     compute dataflows in non-cc clusters.",
191);
192
193/// The "physical backpressure" of `compute_dataflow_max_inflight_bytes_cc` has
194/// been replaced in cc replicas by persist lgalloc and we intend to remove it
195/// once everything has switched to cc. In the meantime, this is a CYA to turn
196/// it back on if absolutely necessary.
197pub const DATAFLOW_MAX_INFLIGHT_BYTES_CC: Config<Option<usize>> = Config::new(
198    "compute_dataflow_max_inflight_bytes_cc",
199    None,
200    "The maximum number of in-flight bytes emitted by persist_sources feeding \
201     compute dataflows in cc clusters.",
202);
203
204/// The term `n` in the growth rate `1 + 1/(n + 1)` for `ConsolidatingVec`.
205/// The smallest value `0` corresponds to the greatest allowed growth, of doubling.
206pub const CONSOLIDATING_VEC_GROWTH_DAMPENER: Config<usize> = Config::new(
207    "consolidating_vec_growth_dampener",
208    1,
209    "Dampener in growth rate for consolidating vector size",
210);
211
212/// The number of dataflows that may hydrate concurrently.
213pub const HYDRATION_CONCURRENCY: Config<usize> = Config::new(
214    "compute_hydration_concurrency",
215    4,
216    "Controls how many compute dataflows may hydrate concurrently.",
217);
218
219/// See `src/storage-operators/src/s3_oneshot_sink/parquet.rs` for more details.
220pub const COPY_TO_S3_PARQUET_ROW_GROUP_FILE_RATIO: Config<usize> = Config::new(
221    "copy_to_s3_parquet_row_group_file_ratio",
222    20,
223    "The ratio (defined as a percentage) of row-group size to max-file-size. \
224        Must be <= 100.",
225);
226
227/// See `src/storage-operators/src/s3_oneshot_sink/parquet.rs` for more details.
228pub const COPY_TO_S3_ARROW_BUILDER_BUFFER_RATIO: Config<usize> = Config::new(
229    "copy_to_s3_arrow_builder_buffer_ratio",
230    150,
231    "The ratio (defined as a percentage) of arrow-builder size to row-group size. \
232        Must be >= 100.",
233);
234
235/// The size of each part in the multi-part upload to use when uploading files to S3.
236pub const COPY_TO_S3_MULTIPART_PART_SIZE_BYTES: Config<usize> = Config::new(
237    "copy_to_s3_multipart_part_size_bytes",
238    1024 * 1024 * 8,
239    "The size of each part in a multipart upload to S3.",
240);
241
242/// Main switch to enable or disable replica expiration.
243///
244/// Changes affect existing replicas only after restart.
245pub const ENABLE_COMPUTE_REPLICA_EXPIRATION: Config<bool> = Config::new(
246    "enable_compute_replica_expiration",
247    true,
248    "Main switch to disable replica expiration.",
249);
250
251/// The maximum lifetime of a replica configured as an offset to the replica start time.
252/// Used in temporal filters to drop diffs generated at timestamps beyond the expiration time.
253///
254/// A zero duration implies no expiration. Changing this value does not affect existing replicas,
255/// even when they are restarted.
256pub const COMPUTE_REPLICA_EXPIRATION_OFFSET: Config<Duration> = Config::new(
257    "compute_replica_expiration_offset",
258    Duration::ZERO,
259    "The expiration time offset for replicas. Zero disables expiration.",
260);
261
262/// When enabled, applies the column demands from a MapFilterProject onto the RelationDesc used to
263/// read out of Persist. This allows Persist to prune unneeded columns as a performance
264/// optimization.
265pub const COMPUTE_APPLY_COLUMN_DEMANDS: Config<bool> = Config::new(
266    "compute_apply_column_demands",
267    true,
268    "When enabled, passes applys column demands to the RelationDesc used to read out of Persist.",
269);
270
271/// The amount of output the flat-map operator produces before yielding. Set to a high value to
272/// avoid yielding, or to a low value to yield frequently.
273pub const COMPUTE_FLAT_MAP_FUEL: Config<usize> = Config::new(
274    "compute_flat_map_fuel",
275    1_000_000,
276    "The amount of output the flat-map operator produces before yielding.",
277);
278
279/// Whether to render `as_specific_collection` using a fueled flat-map operator.
280pub const ENABLE_COMPUTE_RENDER_FUELED_AS_SPECIFIC_COLLECTION: Config<bool> = Config::new(
281    "enable_compute_render_fueled_as_specific_collection",
282    true,
283    "When enabled, renders `as_specific_collection` using a fueled flat-map operator.",
284);
285
286/// Whether to apply logical backpressure in compute dataflows.
287pub const ENABLE_COMPUTE_LOGICAL_BACKPRESSURE: Config<bool> = Config::new(
288    "enable_compute_logical_backpressure",
289    false,
290    "When enabled, compute dataflows will apply logical backpressure.",
291);
292
293/// Maximal number of capabilities retained by the logical backpressure operator.
294///
295/// Selecting this value is subtle. If it's too small, it'll diminish the effectiveness of the
296/// logical backpressure operators. If it's too big, we can slow down hydration and cause state
297/// in the operator's implementation to build up.
298///
299/// The default value represents a compromise between these two extremes. We retain some metrics
300/// for 30 days, and the metrics update every minute. The default is exactly this number.
301pub const COMPUTE_LOGICAL_BACKPRESSURE_MAX_RETAINED_CAPABILITIES: Config<Option<usize>> =
302    Config::new(
303        "compute_logical_backpressure_max_retained_capabilities",
304        Some(30 * 24 * 60),
305        "The maximum number of capabilities retained by the logical backpressure operator.",
306    );
307
308/// The slack to round observed timestamps up to.
309///
310/// The default corresponds to Mz's default tick interval, but does not need to do so. Ideally,
311/// it is not smaller than the tick interval, but it can be larger.
312pub const COMPUTE_LOGICAL_BACKPRESSURE_INFLIGHT_SLACK: Config<Duration> = Config::new(
313    "compute_logical_backpressure_inflight_slack",
314    Duration::from_secs(1),
315    "Round observed timestamps to slack.",
316);
317
318/// Whether to use `drop_dataflow` to actively cancel dataflows.
319pub const ENABLE_ACTIVE_DATAFLOW_CANCELATION: Config<bool> = Config::new(
320    "enable_compute_active_dataflow_cancelation",
321    false,
322    "Whether to use `drop_dataflow` to actively cancel compute dataflows.",
323);
324
325/// Whether to enable the peek response stash, for sending back large peek
326/// responses. The response stash will only be used for results that exceed
327/// `compute_peek_response_stash_threshold_bytes`.
328pub const ENABLE_PEEK_RESPONSE_STASH: Config<bool> = Config::new(
329    "enable_compute_peek_response_stash",
330    false,
331    "Whether to enable the peek response stash, for sending back large peek responses. Will only be used for results that exceed compute_peek_response_stash_threshold_bytes.",
332);
333
334/// The threshold for peek response size above which we should use the peek
335/// response stash. Only used if the peek response stash is enabled _and_ if the
336/// query is "streamable" (roughly: doesn't have an ORDER BY).
337pub const PEEK_RESPONSE_STASH_THRESHOLD_BYTES: Config<usize> = Config::new(
338    "compute_peek_response_stash_threshold_bytes",
339    1024 * 1024 * 300, /* 300mb */
340    "The threshold above which to use the peek response stash, for sending back large peek responses.",
341);
342
343/// The target number of maximum runs in the batches written to the stash.
344///
345/// Setting this reasonably low will make it so batches get consolidated/sorted
346/// concurrently with data being written. Which will in turn make it so that we
347/// have to do less work when reading/consolidating those batches in
348/// `environmentd`.
349pub const PEEK_RESPONSE_STASH_BATCH_MAX_RUNS: Config<usize> = Config::new(
350    "compute_peek_response_stash_batch_max_runs",
351    // The lowest possible setting, do as much work as possible on the
352    // `clusterd` side.
353    2,
354    "The target number of maximum runs in the batches written to the stash.",
355);
356
357/// The target size for batches of rows we read out of the peek stash.
358pub const PEEK_RESPONSE_STASH_READ_BATCH_SIZE_BYTES: Config<usize> = Config::new(
359    "compute_peek_response_stash_read_batch_size_bytes",
360    1024 * 1024 * 100, /* 100mb */
361    "The target size for batches of rows we read out of the peek stash.",
362);
363
364/// The memory budget for consolidating stashed peek responses in
365/// `environmentd`.
366pub const PEEK_RESPONSE_STASH_READ_MEMORY_BUDGET_BYTES: Config<usize> = Config::new(
367    "compute_peek_response_stash_read_memory_budget_bytes",
368    1024 * 1024 * 64, /* 64mb */
369    "The memory budget for consolidating stashed peek responses in environmentd.",
370);
371
372/// The number of batches to pump from the peek result iterator when stashing peek responses.
373pub const PEEK_STASH_NUM_BATCHES: Config<usize> = Config::new(
374    "compute_peek_stash_num_batches",
375    100,
376    "The number of batches to pump from the peek result iterator (in one iteration through the worker loop) when stashing peek responses.",
377);
378
379/// The size of each batch, as number of rows, pumped from the peek result
380/// iterator when stashing peek responses.
381pub const PEEK_STASH_BATCH_SIZE: Config<usize> = Config::new(
382    "compute_peek_stash_batch_size",
383    100000,
384    "The size, as number of rows, of each batch pumped from the peek result iterator (in one iteration through the worker loop) when stashing peek responses.",
385);
386
387/// Adds the full set of all compute `Config`s.
388pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet {
389    configs
390        .add(&ENABLE_MZ_JOIN_CORE)
391        .add(&ENABLE_MZ_JOIN_CORE_V2)
392        .add(&ENABLE_CORRECTION_V2)
393        .add(&ENABLE_MV_APPEND_SMEARING)
394        .add(&ENABLE_TEMPORAL_BUCKETING)
395        .add(&TEMPORAL_BUCKETING_SUMMARY)
396        .add(&LINEAR_JOIN_YIELDING)
397        .add(&ENABLE_LGALLOC)
398        .add(&LGALLOC_BACKGROUND_INTERVAL)
399        .add(&LGALLOC_FILE_GROWTH_DAMPENER)
400        .add(&LGALLOC_LOCAL_BUFFER_BYTES)
401        .add(&LGALLOC_SLOW_CLEAR_BYTES)
402        .add(&MEMORY_LIMITER_INTERVAL)
403        .add(&MEMORY_LIMITER_USAGE_FACTOR)
404        .add(&MEMORY_LIMITER_USAGE_BIAS)
405        .add(&MEMORY_LIMITER_BURST_FACTOR)
406        .add(&LGALLOC_LIMITER_INTERVAL)
407        .add(&LGALLOC_LIMITER_USAGE_FACTOR)
408        .add(&LGALLOC_LIMITER_USAGE_BIAS)
409        .add(&LGALLOC_LIMITER_BURST_FACTOR)
410        .add(&ENABLE_LGALLOC_EAGER_RECLAMATION)
411        .add(&ENABLE_COLUMNATION_LGALLOC)
412        .add(&ENABLE_COLUMNAR_LGALLOC)
413        .add(&COMPUTE_SERVER_MAINTENANCE_INTERVAL)
414        .add(&DATAFLOW_MAX_INFLIGHT_BYTES)
415        .add(&DATAFLOW_MAX_INFLIGHT_BYTES_CC)
416        .add(&HYDRATION_CONCURRENCY)
417        .add(&COPY_TO_S3_PARQUET_ROW_GROUP_FILE_RATIO)
418        .add(&COPY_TO_S3_ARROW_BUILDER_BUFFER_RATIO)
419        .add(&COPY_TO_S3_MULTIPART_PART_SIZE_BYTES)
420        .add(&ENABLE_COMPUTE_REPLICA_EXPIRATION)
421        .add(&COMPUTE_REPLICA_EXPIRATION_OFFSET)
422        .add(&COMPUTE_APPLY_COLUMN_DEMANDS)
423        .add(&COMPUTE_FLAT_MAP_FUEL)
424        .add(&CONSOLIDATING_VEC_GROWTH_DAMPENER)
425        .add(&ENABLE_COMPUTE_RENDER_FUELED_AS_SPECIFIC_COLLECTION)
426        .add(&ENABLE_COMPUTE_LOGICAL_BACKPRESSURE)
427        .add(&COMPUTE_LOGICAL_BACKPRESSURE_MAX_RETAINED_CAPABILITIES)
428        .add(&COMPUTE_LOGICAL_BACKPRESSURE_INFLIGHT_SLACK)
429        .add(&ENABLE_ACTIVE_DATAFLOW_CANCELATION)
430        .add(&ENABLE_PEEK_RESPONSE_STASH)
431        .add(&PEEK_RESPONSE_STASH_THRESHOLD_BYTES)
432        .add(&PEEK_RESPONSE_STASH_BATCH_MAX_RUNS)
433        .add(&PEEK_RESPONSE_STASH_READ_BATCH_SIZE_BYTES)
434        .add(&PEEK_RESPONSE_STASH_READ_MEMORY_BUDGET_BYTES)
435        .add(&PEEK_STASH_NUM_BATCHES)
436        .add(&PEEK_STASH_BATCH_SIZE)
437}