mz_clusterd_test_driver/
dataflow.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Assembly of compute [`DataflowDescription`]s for the headless test driver.
11//!
12//! [`DataflowBuilder`] is the generic boundary between tests and the dataflow
13//! assembly mechanism. A test describes its dataflow in terms of persist imports,
14//! MIR objects to compute, and index exports; the builder owns the parts that are
15//! hard and reusable — the MIR-to-LIR lowering, the [`RenderPlan`] conversion, the
16//! [`CollectionMetadata`] attachment, and the `SqlRelationType`-versus-
17//! `ReprRelationType` bookkeeping — and produces a
18//! `DataflowDescription<RenderPlan, CollectionMetadata>` ready to ship as
19//! [`ComputeCommand::CreateDataflow`].
20//!
21//! [`index_dataflow`] is thin sugar over the builder for the common single-index
22//! shape.
23//!
24//! [`ComputeCommand::CreateDataflow`]: mz_compute_client::protocol::command::ComputeCommand::CreateDataflow
25
26use std::collections::BTreeMap;
27
28use mz_compute_types::dataflows::{
29    BuildDesc, DataflowDescription, IndexDesc, IndexImport, SourceImport,
30};
31use mz_compute_types::plan::LirRelationExpr;
32use mz_compute_types::plan::render_plan::RenderPlan;
33use mz_compute_types::sinks::{
34    ComputeSinkConnection, ComputeSinkDesc, MaterializedViewSinkConnection, MetricSinkConnection,
35    SubscribeSinkConnection,
36};
37use mz_compute_types::sources::SourceInstanceDesc;
38use mz_expr::{
39    AggregateExpr, AggregateFunc, MirRelationExpr, MirScalarExpr, OptimizedMirRelationExpr,
40};
41use mz_persist_types::{PersistLocation, ShardId};
42use mz_repr::optimize::OptimizerFeatures;
43use mz_repr::{GlobalId, RelationDesc, ReprRelationType, Timestamp};
44use mz_storage_types::controller::CollectionMetadata;
45use mz_transform::dataflow::DataflowMetainfo;
46use mz_transform::typecheck::empty_typechecking_context;
47use mz_transform::{EmptyStatisticsOracle, IndexOracle, TransformCtx, optimize_dataflow};
48use timely::progress::Antichain;
49
50/// A persist-backed storage collection to import into a dataflow.
51///
52/// `upper` is the exclusive upper bound of the shard's written data (the next
53/// timestamp after the last written one): for data written at a single timestamp
54/// `t`, pass `t + 1`; for data spread across `0..n_ts`, pass `n_ts`. The compute
55/// instance uses it to know when the source's data is fully available.
56#[derive(Clone, Debug)]
57pub struct PersistSource {
58    /// The data shard backing the collection.
59    pub shard: ShardId,
60    /// The persist location (blob + consensus) the shard lives in.
61    pub location: PersistLocation,
62    /// The relation schema of the collection.
63    pub desc: RelationDesc,
64    /// The exclusive upper bound of the shard's written data.
65    pub upper: Timestamp,
66}
67
68/// A persist-backed target shard for a materialized-view sink to write to.
69#[derive(Clone, Debug)]
70pub struct PersistSink {
71    /// The data shard the sink writes its output to.
72    pub shard: ShardId,
73    /// The persist location (blob + consensus) the shard lives in.
74    pub location: PersistLocation,
75}
76
77/// An [`IndexOracle`] over a dataflow's own `index_imports`, exposing exactly the
78/// arrangements this dataflow may read.
79///
80/// The real `environmentd` optimizer is handed a catalog-backed oracle that knows
81/// every index on the cluster; the test driver has no catalog, but a dataflow's
82/// `index_imports` already name exactly the arrangements available to it, so they
83/// are the correct — and only — index information to expose. Without this, the
84/// optimizer would not recognize an imported index and would re-plan a `Get` over
85/// the indexed collection as a (non-existent) persist read.
86#[derive(Debug)]
87struct ImportedIndexOracle {
88    /// `on_id` -> the `(index_id, key)` arrangements imported on it.
89    by_on_id: BTreeMap<GlobalId, Vec<(GlobalId, Vec<MirScalarExpr>)>>,
90}
91
92impl ImportedIndexOracle {
93    /// Build the oracle from a dataflow's `index_imports`, grouping by arranged id.
94    fn new(index_imports: &BTreeMap<GlobalId, IndexImport>) -> Self {
95        let mut by_on_id: BTreeMap<GlobalId, Vec<(GlobalId, Vec<MirScalarExpr>)>> = BTreeMap::new();
96        for (index_id, import) in index_imports {
97            by_on_id
98                .entry(import.desc.on_id)
99                .or_default()
100                .push((*index_id, import.desc.key.clone()));
101        }
102        ImportedIndexOracle { by_on_id }
103    }
104}
105
106impl IndexOracle for ImportedIndexOracle {
107    fn indexes_on(
108        &self,
109        id: GlobalId,
110    ) -> Box<dyn Iterator<Item = (GlobalId, &[MirScalarExpr])> + '_> {
111        match self.by_on_id.get(&id) {
112            Some(indexes) => Box::new(indexes.iter().map(|(id, key)| (*id, key.as_slice()))),
113            None => Box::new(std::iter::empty()),
114        }
115    }
116}
117
118/// A handle to an imported collection or built object, used to reference it when
119/// constructing MIR for further objects.
120#[derive(Clone, Debug)]
121pub struct Input {
122    id: GlobalId,
123    typ: ReprRelationType,
124}
125
126impl Input {
127    /// The id this input is bound to in the dataflow.
128    pub fn id(&self) -> GlobalId {
129        self.id
130    }
131
132    /// A MIR `Get` of this input, carrying its relation type, for use as a leaf
133    /// when building a computation over it.
134    pub fn get(&self) -> MirRelationExpr {
135        MirRelationExpr::global_get(self.id, self.typ.clone())
136    }
137}
138
139/// Builds a compute dataflow from generic parts, hiding the lowering and persist
140/// wiring mechanism.
141///
142/// # Contract
143///
144/// By default the caller supplies MIR and the builder lowers it *faithfully*,
145/// attaching the persist wiring without optimizing — so a hand-built minimal plan
146/// lowers exactly as written. Optimization — fusion, predicate pushdown, and notably
147/// join-implementation selection — is opt-in via [`Self::optimize`], paid for only
148/// by callers that need it. A `Join` whose `implementation` is left `Unimplemented`
149/// is rejected by the LIR lowering, so a plan containing one requires `optimize`,
150/// which runs [`mz_transform::optimize_dataflow`] to fill the implementation first.
151/// When optimizing, the builder hands the optimizer an index oracle built from its
152/// own `index_imports` (`ImportedIndexOracle`), so imported arrangements are
153/// recognized — the same index information `environmentd`'s catalog oracle would
154/// supply for these imports.
155///
156/// # Construction strategy
157///
158/// The builder deliberately does *not* hand-roll the [`RenderPlan`]: the [`LirId`]s
159/// used to stitch nodes together have no public constructor, and the [`LetFreePlan`]
160/// invariants (notably a valid `topological_order`) are easy to get wrong. Instead
161/// it mirrors exactly what the real compute controller does:
162///
163///  1. Accumulate a MIR-level [`DataflowDescription<OptimizedMirRelationExpr, ()>`]
164///     using the same [`import_source`] / [`insert_plan`] / [`export_index`] helpers
165///     the optimizer uses.
166///  2. Lower it to LIR via [`LirRelationExpr::finalize_dataflow`], yielding
167///     [`DataflowDescription<LirRelationExpr, ()>`].
168///  3. Augment it into [`DataflowDescription<RenderPlan, CollectionMetadata>`] by
169///     converting each object's [`LirRelationExpr`] via [`RenderPlan::try_from`] and attaching
170///     the storage [`CollectionMetadata`] to each source import — the same step
171///     performed in `compute-client`'s `Instance::create_dataflow`.
172///
173/// This guarantees the emitted plan is structurally identical to one produced by a
174/// live `environmentd`, at the cost of running the (cheap, deterministic) lowering
175/// in-process.
176///
177/// [`LirId`]: mz_compute_types::plan::LirId
178/// [`LetFreePlan`]: mz_compute_types::plan::render_plan::LetFreePlan
179/// [`import_source`]: DataflowDescription::import_source
180/// [`insert_plan`]: DataflowDescription::insert_plan
181/// [`export_index`]: DataflowDescription::export_index
182/// [`DataflowDescription<OptimizedMirRelationExpr, ()>`]: DataflowDescription
183/// [`DataflowDescription<Plan, ()>`]: DataflowDescription
184/// [`DataflowDescription<RenderPlan, CollectionMetadata>`]: DataflowDescription
185pub struct DataflowBuilder {
186    /// The MIR-level description being accumulated.
187    mir: DataflowDescription<OptimizedMirRelationExpr, ()>,
188    /// Persist metadata per imported source id, consumed by the augment step.
189    sources: BTreeMap<GlobalId, PersistSource>,
190    /// Target storage metadata per materialized-view sink id, consumed by the
191    /// augment step to fill the sink connection's `storage_metadata`.
192    sinks: BTreeMap<GlobalId, CollectionMetadata>,
193    /// Relation type per referenceable id (imports and built objects), so
194    /// `export_index` can derive the `on_type` instead of taking it as an argument.
195    types: BTreeMap<GlobalId, ReprRelationType>,
196    /// Whether `finish` runs the MIR dataflow optimizer before lowering. Off by
197    /// default (faithful lowering of the caller's MIR); see [`Self::optimize`].
198    optimize: bool,
199}
200
201impl DataflowBuilder {
202    /// Start an empty builder. `name` becomes the dataflow's debug name.
203    pub fn new(name: impl Into<String>) -> Self {
204        DataflowBuilder {
205            mir: DataflowDescription::new(name.into()),
206            sources: BTreeMap::new(),
207            sinks: BTreeMap::new(),
208            types: BTreeMap::new(),
209            optimize: false,
210        }
211    }
212
213    /// Import a persist-backed storage collection as `id`.
214    ///
215    /// Registers the source on the MIR description and records the persist metadata
216    /// for the augment step. Returns an [`Input`] handle whose [`Input::get`] yields
217    /// a correctly typed `Get` node, so callers never construct a [`ReprRelationType`]
218    /// by hand.
219    pub fn import_persist(&mut self, id: GlobalId, source: PersistSource) -> Input {
220        // `import_source` takes the `SqlRelationType`; the `Get`/export path wants the
221        // `ReprRelationType`. Both are derived from the single `desc`.
222        let sql_typ = source.desc.typ().clone();
223        let repr_typ = ReprRelationType::from(source.desc.typ());
224        // `monotonic: false` matches the verified-structure requirement.
225        self.mir.import_source(id, sql_typ, false);
226        self.sources.insert(id, source);
227        self.types.insert(id, repr_typ.clone());
228        Input { id, typ: repr_typ }
229    }
230
231    /// Import a previously-exported index, making the collection it arranges
232    /// (`on_id`) available to this dataflow as an in-memory arrangement.
233    ///
234    /// Unlike [`Self::import_persist`], this imports no storage collection: the
235    /// arrangement is served from the replica's existing, hydrated index, so the
236    /// dataflow needs no [`CollectionMetadata`] and the augment step leaves the
237    /// index import untouched. The MIR-to-LIR lowering registers the imported
238    /// arrangement under `Get(on_id)` automatically, so a faithful (unoptimized)
239    /// `Get(on_id)` picks it up. Returns an [`Input`] referencing `on_id` — the
240    /// id a computation `Get`s, not the index id itself.
241    pub fn import_index(
242        &mut self,
243        index_id: GlobalId,
244        on_id: GlobalId,
245        key_cols: Vec<usize>,
246        on_type: ReprRelationType,
247        monotonic: bool,
248    ) -> Input {
249        let key: Vec<MirScalarExpr> = key_cols.into_iter().map(MirScalarExpr::column).collect();
250        self.mir.import_index(
251            index_id,
252            IndexDesc { on_id, key },
253            on_type.clone(),
254            monotonic,
255        );
256        self.types.insert(on_id, on_type.clone());
257        Input {
258            id: on_id,
259            typ: on_type,
260        }
261    }
262
263    /// A typed `Get` of an already-imported or built id, for callers that
264    /// assemble MIR by id rather than threading [`Input`] handles — notably the
265    /// JSON MIR translator. Errors if `id` was never imported or built, so a bad
266    /// reference surfaces cleanly instead of constructing an ill-typed `Get`.
267    pub fn get(&self, id: GlobalId) -> anyhow::Result<MirRelationExpr> {
268        let typ = self
269            .types
270            .get(&id)
271            .ok_or_else(|| anyhow::anyhow!("get of unknown id {id}; import or build it first"))?
272            .clone();
273        Ok(MirRelationExpr::global_get(id, typ))
274    }
275
276    /// Insert a MIR object to compute, bound to `id`.
277    ///
278    /// `expr` is wrapped via [`OptimizedMirRelationExpr::declare_optimized`]; the
279    /// caller is responsible for any optimization (see the type-level contract). The
280    /// object's relation type is recorded so a later [`Self::export_index`] over `id`
281    /// can derive its `on_type`.
282    pub fn build(&mut self, id: GlobalId, expr: MirRelationExpr) -> &mut Self {
283        self.types.insert(id, expr.typ());
284        self.mir
285            .insert_plan(id, OptimizedMirRelationExpr::declare_optimized(expr));
286        self
287    }
288
289    /// Export an index `index_id` arranging `on_id` by `key_cols`.
290    ///
291    /// `on_id` may be an imported source or a built object; either way the lowering
292    /// synthesizes the `ArrangeBy`. The `on_type` is derived from the referenced id,
293    /// which must have been imported or built first.
294    pub fn export_index(
295        &mut self,
296        index_id: GlobalId,
297        on_id: GlobalId,
298        key_cols: Vec<usize>,
299    ) -> &mut Self {
300        let on_type = self
301            .types
302            .get(&on_id)
303            .unwrap_or_else(|| panic!("export_index on unknown id {on_id}"))
304            .clone();
305        let key: Vec<MirScalarExpr> = key_cols.into_iter().map(MirScalarExpr::column).collect();
306        self.mir
307            .export_index(index_id, IndexDesc { on_id, key }, on_type);
308        self
309    }
310
311    /// Export a materialized-view persist sink `sink_id` writing the collection
312    /// `from_id` to a target persist shard (a materialized view).
313    ///
314    /// `value_desc` is the output relation schema; it must match `from_id`'s type
315    /// (validated by the caller). The target shard is identified by `target`, whose
316    /// `CollectionMetadata` the augment step splices into the sink connection — the
317    /// compute persist sink opens it as `SourceData/()/Timestamp/StorageDiff`, the
318    /// same codec a storage collection uses, so the shard reads back like any other.
319    ///
320    /// `up_to` is always the empty antichain: the persist sink does not implement
321    /// `UP TO` (it panics during rendering otherwise), and the real optimizer
322    /// likewise leaves a materialized view's `up_to` empty — it is a subscribe-only
323    /// concept.
324    pub fn export_materialized_view(
325        &mut self,
326        sink_id: GlobalId,
327        from_id: GlobalId,
328        value_desc: RelationDesc,
329        target: PersistSink,
330    ) -> &mut Self {
331        let metadata = CollectionMetadata {
332            persist_location: target.location,
333            data_shard: target.shard,
334            relation_desc: value_desc.clone(),
335            txns_shard: None,
336        };
337        self.sinks.insert(sink_id, metadata);
338        // The MIR-level description carries the unit storage metadata; the augment
339        // step replaces it with the `CollectionMetadata` recorded above.
340        let desc = ComputeSinkDesc {
341            from: from_id,
342            from_desc: value_desc.clone(),
343            connection: ComputeSinkConnection::MaterializedView(MaterializedViewSinkConnection {
344                value_desc,
345                storage_metadata: (),
346            }),
347            with_snapshot: true,
348            up_to: Antichain::new(),
349            non_null_assertions: vec![],
350            refresh_schedule: None,
351        };
352        self.mir.export_sink(sink_id, desc);
353        self
354    }
355
356    /// Export a subscribe sink `sink_id` streaming changes of the collection
357    /// `from_id` back as `ComputeResponse::SubscribeResponse` batches.
358    ///
359    /// Unlike a materialized view, a subscribe writes no shard, so it needs no
360    /// storage metadata. `value_desc` is the output schema (must match `from_id`'s
361    /// type); `up_to` is the exclusive upper at which the subscribe completes. The
362    /// empty `output` ordering leaves intra-timestamp order unconstrained — the
363    /// driver consolidates and sorts the updates for a deterministic golden.
364    pub fn export_subscribe(
365        &mut self,
366        sink_id: GlobalId,
367        from_id: GlobalId,
368        value_desc: RelationDesc,
369        up_to: Antichain<Timestamp>,
370    ) -> &mut Self {
371        let desc = ComputeSinkDesc {
372            from: from_id,
373            from_desc: value_desc,
374            connection: ComputeSinkConnection::Subscribe(SubscribeSinkConnection {
375                output: vec![],
376            }),
377            with_snapshot: true,
378            up_to,
379            non_null_assertions: vec![],
380            refresh_schedule: None,
381        };
382        self.mir.export_sink(sink_id, desc);
383        self
384    }
385
386    /// Export a metric sink `sink_id` publishing the collection `from_id` into the replica's
387    /// in-process Prometheus registry.
388    ///
389    /// Like a subscribe, a metric sink writes no shard, so it needs no storage metadata.
390    /// `from_desc` must be the shaped canonical row shape the operator reads: `metric_name`,
391    /// `metric_type`, `labels`, `value`, `help`, plus the planner-computed `metric_kind` and
392    /// `name_valid` columns (see `mz_adapter::optimize::metric_sink::shape_metric_sink_source`).
393    /// The sink has no upper bound, matching a maintained (non-`UP TO`) export.
394    pub fn export_metric_sink(
395        &mut self,
396        sink_id: GlobalId,
397        from_id: GlobalId,
398        from_desc: RelationDesc,
399    ) -> &mut Self {
400        let desc = ComputeSinkDesc {
401            from: from_id,
402            from_desc,
403            connection: ComputeSinkConnection::MetricSink(MetricSinkConnection {}),
404            with_snapshot: true,
405            up_to: Antichain::new(),
406            non_null_assertions: vec![],
407            refresh_schedule: None,
408        };
409        self.mir.export_sink(sink_id, desc);
410        self
411    }
412
413    /// Set the dataflow's `as_of` (the read frontier hydration starts from).
414    pub fn as_of(&mut self, t: Timestamp) -> &mut Self {
415        self.mir.as_of = Some(Antichain::from_elem(t));
416        self
417    }
418
419    /// Set the dataflow's `until` (the exclusive upper bound past which output is
420    /// dropped). Defaults to the empty antichain (no bound).
421    pub fn until(&mut self, t: Timestamp) -> &mut Self {
422        self.mir.until = Antichain::from_elem(t);
423        self
424    }
425
426    /// Run the MIR dataflow optimizer in [`Self::finish`] before lowering.
427    ///
428    /// Off by default: the builder otherwise lowers the caller's MIR faithfully (the
429    /// contract above). Enable it for plans that don't lower from raw MIR — notably a
430    /// `Join`, whose `implementation` defaults to `Unimplemented` and is rejected by
431    /// the LIR lowering until [`mz_transform::optimize_dataflow`]'s `JoinImplementation`
432    /// fills it in — or to reproduce the plan `environmentd` would ship for a logical
433    /// expression rather than the literal one written.
434    pub fn optimize(&mut self) -> &mut Self {
435        self.optimize = true;
436        self
437    }
438
439    /// Lower the accumulated MIR and attach persist wiring, producing the
440    /// `DataflowDescription` the compute protocol expects.
441    ///
442    /// Returns an error rather than panicking on a malformed plan (e.g. a key
443    /// column out of range, or an unbalanced object graph), so a caller driving
444    /// this from external input — notably the script reader — can surface a clean
445    /// error instead of crashing the process.
446    pub fn finish(mut self) -> anyhow::Result<DataflowDescription<RenderPlan, CollectionMetadata>> {
447        let features = OptimizerFeatures::default();
448        // Optionally run the MIR dataflow optimizer first (e.g. to fill a `Join`'s
449        // implementation). The index oracle is built from this dataflow's own
450        // `index_imports`, so the optimizer recognizes imported arrangements and
451        // plans `Get`s over them as arrangement reads (not persist reads); the
452        // statistics oracle is empty — no catalog stats — so join planning falls
453        // back to a differential join, which lowers.
454        if self.optimize {
455            let indexes = ImportedIndexOracle::new(&self.mir.index_imports);
456            let typecheck_ctx = empty_typechecking_context();
457            let mut df_meta = DataflowMetainfo::default();
458            let mut ctx = TransformCtx::global(
459                &indexes,
460                &EmptyStatisticsOracle,
461                &features,
462                &typecheck_ctx,
463                &mut df_meta,
464                None,
465            );
466            optimize_dataflow(&mut self.mir, &mut ctx, false)
467                .map_err(|e| anyhow::anyhow!("optimizing dataflow failed: {e}"))?;
468        }
469        // Lower MIR -> LIR. Deterministic and self-contained.
470        let lowered: DataflowDescription<LirRelationExpr, ()> =
471            LirRelationExpr::finalize_dataflow(self.mir, &features, None)
472                .map_err(|e| anyhow::anyhow!("lowering dataflow failed: {e}"))?;
473        augment(lowered, &self.sources, &self.sinks)
474    }
475}
476
477/// Build a single-index dataflow over a persist shard.
478///
479/// Thin sugar over [`DataflowBuilder`] for the common shape: import the collection
480/// backed by `shard` as `source_id`, set `as_of`, and export an index `index_id`
481/// arranging the collection by `key_cols`.
482///
483/// `shard_upper` is the exclusive upper bound of the shard's written data; see
484/// [`PersistSource::upper`].
485pub fn index_dataflow(
486    source_id: GlobalId,
487    index_id: GlobalId,
488    shard: ShardId,
489    location: PersistLocation,
490    desc: RelationDesc,
491    key_cols: Vec<usize>,
492    as_of: Timestamp,
493    shard_upper: Timestamp,
494) -> anyhow::Result<DataflowDescription<RenderPlan, CollectionMetadata>> {
495    let mut builder = DataflowBuilder::new("headless-index");
496    builder.import_persist(
497        source_id,
498        PersistSource {
499            shard,
500            location,
501            desc,
502            upper: shard_upper,
503        },
504    );
505    builder.as_of(as_of);
506    builder.export_index(index_id, source_id, key_cols);
507    builder.finish()
508}
509
510/// Build a dataflow that counts the rows of an existing index and exports the
511/// count as a new, peekable index.
512///
513/// Imports index `index_id` (arranging `on_id`, schema `on_type`, key `key_cols`),
514/// computes `Reduce` with a single `count(*)` aggregate and an empty group key over
515/// `Get(on_id)`, and exports `out_index_id` arranging the one-column count by `[0]`.
516/// This is the compute-side realization of a row-count assertion: the count runs
517/// through a real reduce operator rather than being tallied in the driver.
518///
519/// The result collection has one `bigint` column. Over an empty input the reduce
520/// emits no rows (SQL's default-zero is added higher up), so a peek of the output
521/// yields `[]`, which callers read as a count of `0`.
522pub fn count_over_index(
523    index_id: GlobalId,
524    on_id: GlobalId,
525    on_type: ReprRelationType,
526    key_cols: Vec<usize>,
527    reduce_id: GlobalId,
528    out_index_id: GlobalId,
529    as_of: Timestamp,
530) -> anyhow::Result<DataflowDescription<RenderPlan, CollectionMetadata>> {
531    let mut builder = DataflowBuilder::new("headless-count");
532    // `monotonic: false` keeps the import faithful to a general (non-append-only)
533    // index; the count reduce does not require monotonicity.
534    let input = builder.import_index(index_id, on_id, key_cols, on_type, false);
535    // `count(*)`: count over a non-null literal, so every row contributes.
536    let count = AggregateExpr {
537        func: AggregateFunc::Count,
538        expr: MirScalarExpr::literal_true(),
539        distinct: false,
540    };
541    let reduce = MirRelationExpr::Reduce {
542        input: Box::new(input.get()),
543        group_key: vec![],
544        aggregates: vec![count],
545        monotonic: false,
546        expected_group_size: None,
547    };
548    builder.build(reduce_id, reduce);
549    builder.as_of(as_of);
550    // The reduce output is a single column; arrange it by that column so the
551    // exported index is peekable.
552    builder.export_index(out_index_id, reduce_id, vec![0]);
553    builder.finish()
554}
555
556/// Convert a lowered `DataflowDescription<Plan, ()>` into the
557/// `<RenderPlan, CollectionMetadata>` form expected by the compute protocol.
558///
559/// Mirrors `compute-client`'s `Instance::create_dataflow`: each object's [`LirRelationExpr`]
560/// is flattened into a [`RenderPlan`], and every source import is augmented with the
561/// storage [`CollectionMetadata`] needed by the compute instance to read it. The
562/// per-id [`PersistSource`] supplies the metadata and the exclusive `upper` telling
563/// the compute instance up to which timestamp the shard's data is available.
564fn augment(
565    lowered: DataflowDescription<LirRelationExpr, ()>,
566    sources: &BTreeMap<GlobalId, PersistSource>,
567    sinks: &BTreeMap<GlobalId, CollectionMetadata>,
568) -> anyhow::Result<DataflowDescription<RenderPlan, CollectionMetadata>> {
569    // Attach the storage metadata to each source import, looked up by id. In a live
570    // controller the `upper` is the storage collection's real write frontier; the
571    // caller provides it via `PersistSource::upper` to reflect the written data.
572    let mut source_imports = BTreeMap::new();
573    for (id, import) in lowered.source_imports {
574        let source = sources
575            .get(&id)
576            .ok_or_else(|| anyhow::anyhow!("no persist metadata registered for source {id}"))?;
577        let metadata = CollectionMetadata {
578            persist_location: source.location.clone(),
579            data_shard: source.shard,
580            relation_desc: source.desc.clone(),
581            txns_shard: None,
582        };
583        let desc = SourceInstanceDesc {
584            storage_metadata: metadata,
585            arguments: import.desc.arguments,
586            typ: import.desc.typ,
587        };
588        source_imports.insert(
589            id,
590            SourceImport {
591                desc,
592                monotonic: import.monotonic,
593                with_snapshot: import.with_snapshot,
594                upper: Antichain::from_elem(source.upper),
595            },
596        );
597    }
598
599    let objects_to_build = lowered
600        .objects_to_build
601        .into_iter()
602        .map(|object| {
603            // `RenderPlan::try_from` fails (with `()`) on a structurally invalid
604            // lowered plan; surface it as an error rather than panicking.
605            let plan = RenderPlan::try_from(object.plan)
606                .map_err(|()| anyhow::anyhow!("RenderPlan conversion failed for {}", object.id))?;
607            Ok::<_, anyhow::Error>(BuildDesc {
608                id: object.id,
609                plan,
610            })
611        })
612        .collect::<anyhow::Result<Vec<_>>>()?;
613
614    // Splice the storage metadata into each sink export, mirroring how
615    // `compute-client`'s `Instance::create_dataflow` fills the materialized-view
616    // sink's `storage_metadata` from the storage controller. A subscribe carries no
617    // metadata; copy-to is not built by this driver.
618    let mut sink_exports = BTreeMap::new();
619    for (id, sink) in lowered.sink_exports {
620        let connection = match sink.connection {
621            ComputeSinkConnection::MaterializedView(conn) => {
622                let metadata = sinks.get(&id).ok_or_else(|| {
623                    anyhow::anyhow!("no target metadata registered for materialized-view sink {id}")
624                })?;
625                ComputeSinkConnection::MaterializedView(MaterializedViewSinkConnection {
626                    value_desc: conn.value_desc,
627                    storage_metadata: metadata.clone(),
628                })
629            }
630            ComputeSinkConnection::Subscribe(conn) => ComputeSinkConnection::Subscribe(conn),
631            // A metric sink writes into the process-local metrics registry, not persist, so it
632            // carries no storage metadata to splice.
633            ComputeSinkConnection::MetricSink(conn) => ComputeSinkConnection::MetricSink(conn),
634            ComputeSinkConnection::CopyToS3Oneshot(_) => {
635                anyhow::bail!("copy-to-s3 sink {id} is not implemented")
636            }
637        };
638        sink_exports.insert(
639            id,
640            ComputeSinkDesc {
641                from: sink.from,
642                from_desc: sink.from_desc,
643                connection,
644                with_snapshot: sink.with_snapshot,
645                up_to: sink.up_to,
646                non_null_assertions: sink.non_null_assertions,
647                refresh_schedule: sink.refresh_schedule,
648            },
649        );
650    }
651
652    Ok(DataflowDescription {
653        source_imports,
654        objects_to_build,
655        // The remaining fields carry over unchanged from the lowered dataflow.
656        index_imports: lowered.index_imports,
657        index_exports: lowered.index_exports,
658        sink_exports,
659        as_of: lowered.as_of,
660        until: lowered.until,
661        initial_storage_as_of: lowered.initial_storage_as_of,
662        refresh_schedule: lowered.refresh_schedule,
663        debug_name: lowered.debug_name,
664        time_dependence: lowered.time_dependence,
665    })
666}
667
668#[cfg(test)]
669mod tests {
670    use super::*;
671
672    use mz_compute_types::plan::GetPlan;
673    use mz_compute_types::plan::render_plan::Expr;
674    use mz_compute_types::plan::scalar::LirScalarExpr;
675    use mz_expr::Id;
676
677    /// Assert the assembled dataflow matches the verified structure: a single
678    /// source import, a single object building `Get(source) -> ArrangeBy(key)`,
679    /// and a single index export over the source.
680    #[mz_ore::test]
681    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
682    fn index_dataflow_structure() {
683        let desc = crate::data::sample_desc();
684        let loc = PersistLocation {
685            blob_uri: "mem://".parse().unwrap(),
686            consensus_uri: "mem://".parse().unwrap(),
687        };
688        let df = index_dataflow(
689            GlobalId::User(1000),
690            GlobalId::User(1001),
691            ShardId::new(),
692            loc,
693            desc,
694            vec![0],
695            Timestamp::from(0),
696            Timestamp::from(1),
697        )
698        .unwrap();
699        // Structural assertions mirroring the spec.
700        assert_eq!(df.source_imports.len(), 1);
701        assert_eq!(df.objects_to_build.len(), 1);
702        assert_eq!(df.index_exports.len(), 1);
703        assert!(df.sink_exports.is_empty());
704        assert!(df.index_imports.is_empty());
705        assert_eq!(df.as_of, Some(Antichain::from_elem(Timestamp::from(0))));
706        assert_eq!(df.debug_name, "headless-index");
707
708        let (sid, si) = df.source_imports.iter().next().unwrap();
709        assert_eq!(*sid, GlobalId::User(1000));
710        assert!(si.with_snapshot);
711        assert!(!si.monotonic);
712        assert_eq!(si.upper, Antichain::from_elem(Timestamp::from(1)));
713        assert!(si.desc.arguments.operators.is_none());
714
715        let (iid, (idesc, _typ)) = df.index_exports.iter().next().unwrap();
716        assert_eq!(*iid, GlobalId::User(1001));
717        assert_eq!(idesc.on_id, GlobalId::User(1000));
718        assert_eq!(idesc.key, vec![MirScalarExpr::column(0)]);
719
720        // The built object is `Get(source) -> ArrangeBy(key)`. Destructure the
721        // `RenderPlan` and verify the root arranges, keyed by `Column(0)`, over a
722        // `Get` of the source collection.
723        let plan = &df.objects_to_build[0].plan;
724        assert!(plan.binds.is_empty());
725        let (nodes, root, _order) = plan.body.clone().destruct();
726        let root_node = &nodes[&root];
727        let Expr::ArrangeBy {
728            input,
729            forms,
730            strategy,
731            ..
732        } = &root_node.expr
733        else {
734            panic!("expected root ArrangeBy, got {:?}", root_node.expr);
735        };
736        assert_eq!(forms.arranged.len(), 1);
737        assert_eq!(forms.arranged[0].0, vec![LirScalarExpr::column(0)]);
738        assert_eq!(
739            *strategy,
740            mz_compute_types::plan::ArrangementStrategy::Direct
741        );
742        let input_node = &nodes[input];
743        let Expr::Get { id, plan, .. } = &input_node.expr else {
744            panic!("expected ArrangeBy input Get, got {:?}", input_node.expr);
745        };
746        assert_eq!(*id, Id::Global(GlobalId::User(1000)));
747        assert!(matches!(plan, GetPlan::PassArrangements));
748    }
749
750    /// Exercise the general `build` path: import a source, compute a `Project` over
751    /// it, and export an index on the computed object. The computation and the
752    /// arrange must lower to two distinct objects, and the index export must
753    /// reference the built object rather than the source.
754    #[mz_ore::test]
755    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
756    fn build_computed_object_lowers() {
757        let desc = crate::data::sample_desc();
758        let loc = PersistLocation {
759            blob_uri: "mem://".parse().unwrap(),
760            consensus_uri: "mem://".parse().unwrap(),
761        };
762        let (source_id, comp_id, index_id) = (
763            GlobalId::User(1000),
764            GlobalId::User(1001),
765            GlobalId::User(1002),
766        );
767
768        let mut builder = DataflowBuilder::new("headless-build");
769        let src = builder.import_persist(
770            source_id,
771            PersistSource {
772                shard: ShardId::new(),
773                location: loc,
774                desc,
775                upper: Timestamp::from(1),
776            },
777        );
778        // Project away the payload column, keeping only `id` (column 0).
779        builder.build(comp_id, src.get().project(vec![0]));
780        builder.as_of(Timestamp::from(0));
781        builder.export_index(index_id, comp_id, vec![0]);
782        let df = builder.finish().unwrap();
783
784        // One source import; the index export references the computed object.
785        assert_eq!(df.source_imports.len(), 1);
786        assert!(df.source_imports.contains_key(&source_id));
787        let (iid, (idesc, _typ)) = df.index_exports.iter().next().unwrap();
788        assert_eq!(*iid, index_id);
789        assert_eq!(idesc.on_id, comp_id);
790
791        // The computation and the arrange lower to two distinct build objects.
792        assert_eq!(df.objects_to_build.len(), 2);
793        let ids: Vec<_> = df.objects_to_build.iter().map(|o| o.id).collect();
794        assert!(ids.contains(&comp_id));
795        assert!(ids.contains(&index_id));
796    }
797
798    /// A `Join` does not lower from raw MIR — its `implementation` defaults to
799    /// `Unimplemented` and the LIR lowering rejects it — but `optimize()` runs the
800    /// MIR optimizer first, which fills the implementation, so the same dataflow
801    /// then lowers. This is exactly what the `optimize` flag buys.
802    #[mz_ore::test]
803    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
804    fn join_lowers_only_with_optimize() {
805        let loc = PersistLocation {
806            blob_uri: "mem://".parse().unwrap(),
807            consensus_uri: "mem://".parse().unwrap(),
808        };
809        // Build a two-source equi-join (`#0 = #2` across the concatenated columns)
810        // and export an index over it. `optimize` selects whether the MIR optimizer
811        // runs in `finish`.
812        let assemble = |optimize: bool| {
813            let mut builder = DataflowBuilder::new("headless-join-test");
814            let left = builder.import_persist(
815                GlobalId::User(1000),
816                PersistSource {
817                    shard: ShardId::new(),
818                    location: loc.clone(),
819                    desc: crate::data::sample_desc(),
820                    upper: Timestamp::from(1),
821                },
822            );
823            let right = builder.import_persist(
824                GlobalId::User(1001),
825                PersistSource {
826                    shard: ShardId::new(),
827                    location: loc.clone(),
828                    desc: crate::data::sample_desc(),
829                    upper: Timestamp::from(1),
830                },
831            );
832            let join = MirRelationExpr::join_scalars(
833                vec![left.get(), right.get()],
834                vec![vec![MirScalarExpr::column(0), MirScalarExpr::column(2)]],
835            );
836            builder.build(GlobalId::User(2000), join);
837            if optimize {
838                builder.optimize();
839            }
840            builder.as_of(Timestamp::from(0));
841            builder.export_index(GlobalId::User(2001), GlobalId::User(2000), vec![0]);
842            builder.finish()
843        };
844
845        // Without the optimizer the `Unimplemented` join is rejected by the lowering.
846        assert!(assemble(false).is_err());
847        // With it, the optimizer fills the join implementation and the dataflow lowers.
848        assert!(assemble(true).is_ok());
849    }
850
851    /// A single dataflow can export both an index and a materialized view over the
852    /// same built object (binding). Both exports reference that object; the index
853    /// arranges it and the MV sink writes it to a target shard.
854    #[mz_ore::test]
855    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
856    fn index_and_mv_same_binding() {
857        let desc = crate::data::sample_desc();
858        let loc = PersistLocation {
859            blob_uri: "mem://".parse().unwrap(),
860            consensus_uri: "mem://".parse().unwrap(),
861        };
862        let (source_id, view_id, index_id, sink_id) = (
863            GlobalId::User(1000),
864            GlobalId::User(1001),
865            GlobalId::User(1002),
866            GlobalId::User(1003),
867        );
868
869        let mut builder = DataflowBuilder::new("headless-index-and-mv");
870        let src = builder.import_persist(
871            source_id,
872            PersistSource {
873                shard: ShardId::new(),
874                location: loc.clone(),
875                desc: desc.clone(),
876                upper: Timestamp::from(1),
877            },
878        );
879        // A view over the source is the shared binding both exports reference.
880        builder.build(
881            view_id,
882            src.get().filter(vec![MirScalarExpr::literal_true()]),
883        );
884        builder.as_of(Timestamp::from(0));
885        builder.export_index(index_id, view_id, vec![0]);
886        builder.export_materialized_view(
887            sink_id,
888            view_id,
889            desc,
890            PersistSink {
891                shard: ShardId::new(),
892                location: loc,
893            },
894        );
895        let df = builder.finish().unwrap();
896
897        // Both exports are present and reference the same view binding.
898        assert_eq!(df.index_exports.len(), 1);
899        assert_eq!(df.sink_exports.len(), 1);
900        let (_iid, (idesc, _typ)) = df.index_exports.iter().next().unwrap();
901        assert_eq!(idesc.on_id, view_id);
902        let (sid, sink) = df.sink_exports.iter().next().unwrap();
903        assert_eq!(*sid, sink_id);
904        assert_eq!(sink.from, view_id);
905        // The MV sink carries the target shard's storage metadata after augment.
906        assert!(matches!(
907            sink.connection,
908            ComputeSinkConnection::MaterializedView(_)
909        ));
910    }
911
912    /// A metric sink assembles like any other export: a source import, a view binding built over
913    /// it, and one sink export whose connection is a payload-free `MetricSink`. Unlike a
914    /// materialized view, the augment step splices no storage metadata into it.
915    #[mz_ore::test]
916    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
917    fn metric_sink_dataflow_structure() {
918        let desc = crate::data::sample_desc();
919        let loc = PersistLocation {
920            blob_uri: "mem://".parse().unwrap(),
921            consensus_uri: "mem://".parse().unwrap(),
922        };
923        let (source_id, view_id, sink_id) = (
924            GlobalId::User(1000),
925            GlobalId::User(1001),
926            GlobalId::User(1002),
927        );
928
929        let mut builder = DataflowBuilder::new("headless-metric-sink");
930        let src = builder.import_persist(
931            source_id,
932            PersistSource {
933                shard: ShardId::new(),
934                location: loc,
935                desc: desc.clone(),
936                upper: Timestamp::from(1),
937            },
938        );
939        builder.build(
940            view_id,
941            src.get().filter(vec![MirScalarExpr::literal_true()]),
942        );
943        builder.as_of(Timestamp::from(0));
944        builder.export_metric_sink(sink_id, view_id, desc);
945        let df = builder.finish().unwrap();
946
947        assert_eq!(df.sink_exports.len(), 1);
948        let (sid, sink) = df.sink_exports.iter().next().unwrap();
949        assert_eq!(*sid, sink_id);
950        assert_eq!(sink.from, view_id);
951        // The metric sink carries a payload-free connection and no storage metadata.
952        assert!(matches!(
953            sink.connection,
954            ComputeSinkConnection::MetricSink(MetricSinkConnection {})
955        ));
956    }
957
958    /// With `optimize` on, the optimizer is handed an index oracle built from the
959    /// dataflow's `index_imports`, so a `Get` over an imported (but not persisted)
960    /// collection is recognized as an arrangement read. Were the oracle empty, the
961    /// optimizer would re-plan that `Get` as a persist read of a collection that has
962    /// no source import, and `finish` would fail — so success here, with one index
963    /// import and no source imports, is the proof the index information reached the
964    /// optimizer.
965    #[mz_ore::test]
966    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
967    fn optimize_uses_imported_index() {
968        let desc = crate::data::sample_desc();
969        let on_type = ReprRelationType::from(desc.typ());
970        let (index_id, on_id, view_id, out_index_id) = (
971            GlobalId::User(1001),
972            GlobalId::User(1000),
973            GlobalId::User(2000),
974            GlobalId::User(2001),
975        );
976
977        let mut builder = DataflowBuilder::new("headless-optimize-imported-index");
978        let input = builder.import_index(index_id, on_id, vec![0], on_type, false);
979        // A view over the imported arrangement; with `optimize` the optimizer must
980        // recognize the import to plan the `Get` as an arrangement read.
981        builder.build(view_id, input.get().project(vec![0]));
982        builder.optimize();
983        builder.as_of(Timestamp::from(0));
984        builder.export_index(out_index_id, view_id, vec![0]);
985        let df = builder.finish().unwrap();
986
987        // The collection is read from the imported arrangement, not from persist:
988        // exactly one index import, no source imports.
989        assert_eq!(df.index_imports.len(), 1);
990        assert!(df.source_imports.is_empty());
991        let (iid, import) = df.index_imports.iter().next().unwrap();
992        assert_eq!(*iid, index_id);
993        assert_eq!(import.desc.on_id, on_id);
994    }
995
996    /// A count-over-index dataflow imports the index (no storage source), builds
997    /// the reduce and its arrange as two objects, and exports the count index.
998    #[mz_ore::test]
999    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
1000    fn count_over_index_structure() {
1001        let desc = crate::data::sample_desc();
1002        let on_type = ReprRelationType::from(desc.typ());
1003        let df = count_over_index(
1004            GlobalId::User(1001), // existing index to import
1005            GlobalId::User(1000), // collection it arranges
1006            on_type,
1007            vec![0],              // its key
1008            GlobalId::User(2000), // reduce build object
1009            GlobalId::User(2001), // exported count index
1010            Timestamp::from(0),
1011        )
1012        .unwrap();
1013
1014        // Imports the arrangement, not a storage collection.
1015        assert_eq!(df.index_imports.len(), 1);
1016        assert!(df.source_imports.is_empty());
1017        let (iid, import) = df.index_imports.iter().next().unwrap();
1018        assert_eq!(*iid, GlobalId::User(1001));
1019        assert_eq!(import.desc.on_id, GlobalId::User(1000));
1020        assert_eq!(import.desc.key, vec![MirScalarExpr::column(0)]);
1021
1022        // Reduce + arrange lower to two build objects; the count index exports.
1023        assert_eq!(df.objects_to_build.len(), 2);
1024        assert_eq!(df.index_exports.len(), 1);
1025        let (eid, (edesc, _typ)) = df.index_exports.iter().next().unwrap();
1026        assert_eq!(*eid, GlobalId::User(2001));
1027        assert_eq!(edesc.on_id, GlobalId::User(2000));
1028        assert_eq!(edesc.key, vec![MirScalarExpr::column(0)]);
1029    }
1030}
mz_clusterd_test_driver/dataflow.rs

mz_clusterd_test_driver/
dataflow.rs