mz_clusterd_test_driver/
script.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Executes a text command script against `clusterd`.
11//!
12//! Instead of recompiling a Rust scenario, a test (or an agent) writes a
13//! [`crate::text`] script: a sequence of commands, each with an expected output
14//! block (`----`) that is the assertion. The coarse orchestration verbs map almost
15//! directly to [`Driver`] calls; `define` carries arbitrary MIR (as pretty-form
16//! specs parsed by `mz-expr-parser`, the `.spec` test syntax) over the full
17//! [`DataflowBuilder`] surface, including index imports, while `define_index`
18//! stays as sugar for the common single-index shape. Explicit `write_rows`
19//! payloads are typed against the schema token-by-token via `cell_from_token`
20//! (reusing `mz_repr::strconv`) rather than `Row`'s opaque serde.
21//!
22//! # Execution
23//!
24//! [`run`] parses the script, executes each command, and compares its golden
25//! output to the expected block — failing the run on a mismatch, or rewriting the
26//! file when `REWRITE` is set. A command that fails renders as `error: <message>`,
27//! so an expected failure is asserted by its golden block. Assertions are
28//! level-triggered waits on monotonic frontiers, so a single sequential script is
29//! deterministic regardless of how the dataflows interleave.
30//!
31//! Shards are referenced by a string alias; the first command naming an alias
32//! allocates a fresh [`ShardId`] for it. Object ids are raw `u64`s mapped to
33//! [`GlobalId::User`].
34
35use std::collections::BTreeMap;
36use std::path::Path;
37use std::time::Duration;
38
39use anyhow::Context;
40use mz_compute_client::protocol::command::{ComputeCommand, PeekTarget};
41use mz_dyncfg::{ConfigType, ConfigUpdates, ConfigVal};
42use mz_expr::visit::Visit;
43use mz_expr::{Id, MirRelationExpr};
44use mz_expr_parser::{TestCatalog, try_parse_mir};
45use mz_persist_client::PersistClient;
46use mz_persist_types::{PersistLocation, ShardId};
47use mz_repr::{
48    GlobalId, RelationDesc, ReprRelationType, Row, SqlColumnType, SqlRelationType, SqlScalarType,
49    Timestamp, strconv,
50};
51use mz_storage_types::controller::CollectionMetadata;
52use serde::{Deserialize, Serialize};
53use timely::progress::Antichain;
54
55use crate::data::{
56    Cell, pack_cells, sample_desc, synth_rows, write_rows_single_ts, write_rows_spread,
57};
58use crate::dataflow::{
59    DataflowBuilder, PersistSink, PersistSource, count_over_index, index_dataflow,
60};
61use crate::driver::Driver;
62
63/// The default payload padding (bytes) for synthetic rows when a command omits it.
64const DEFAULT_ROW_BYTES: usize = 64;
65/// The default timeout (seconds) for `await_frontier` when a command omits it.
66const DEFAULT_TIMEOUT_SECS: u64 = 600;
67
68/// A column declaration in a `define_schema` command.
69#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
70pub struct ColumnSpec {
71    /// Column name.
72    pub name: String,
73    /// Scalar type name; see `scalar_type_from_str`.
74    #[serde(rename = "type")]
75    pub ty: String,
76    /// Whether the column admits `NULL`.
77    #[serde(default)]
78    pub nullable: bool,
79}
80
81/// A single dyncfg update in an `update-configuration` command: a config name, a
82/// type tag selecting how `value` is parsed (`bool`/`u32`/`usize`/`f64`/`string`/
83/// `duration`), and the value. Typed against [`mz_dyncfg`] at execution.
84#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
85pub struct ConfigSetting {
86    /// The dyncfg name (sent to the replica by name; unknown names are ignored).
87    pub name: String,
88    /// The type tag selecting the [`ConfigVal`] variant.
89    #[serde(rename = "type")]
90    pub ty: String,
91    /// The value, parsed against `ty`.
92    pub value: String,
93}
94
95/// A collection to import in a `define` command: a persist source or an existing
96/// index. Externally tagged: `{"source": {…}}` or `{"index": {…}}`.
97#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
98#[serde(rename_all = "snake_case")]
99pub enum ImportSpec {
100    /// Import a persist-backed storage collection, as `define_index` does.
101    Source {
102        /// The imported source's global id.
103        id: u64,
104        /// Shard alias to import; allocated on first use.
105        shard: String,
106        /// Schema name; defaults to the built-in sample schema.
107        #[serde(default)]
108        schema: Option<String>,
109        /// The shard's exclusive write upper (see `PersistSource::upper`).
110        upper: u64,
111    },
112    /// Import an existing index by its global id; its arranged collection, key,
113    /// and type are taken from the registry, so it must have been defined first.
114    Index {
115        /// The index's global id.
116        index_id: u64,
117    },
118}
119
120/// A MIR object to build in a `define` command.
121#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
122pub struct BuildSpec {
123    /// The built object's global id.
124    pub id: u64,
125    /// The computation, as a pretty-form MIR spec parsed by `mz-expr-parser`
126    /// (e.g. `Reduce aggregates=[count(*)]` over `Get u1000`). It references
127    /// imported or previously-built objects by their global-id name (`u<n>`); the
128    /// leaf `Get`'s type is resolved from the import, not authored.
129    pub expr: String,
130}
131
132/// An export in a `create-dataflow` command, mirroring the export kinds a real
133/// dataflow produces (see [`mz_compute_types::sinks::ComputeSinkConnection`]).
134/// `copy-to` is intentionally absent: the parser rejects it as unimplemented.
135#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
136#[serde(rename_all = "kebab-case")]
137pub enum ExportSpec {
138    /// An arrangement, peekable as an index and importable by later dataflows.
139    Index {
140        /// The exported index's global id.
141        index_id: u64,
142        /// The imported or built id the index arranges.
143        on_id: u64,
144        /// Columns to arrange by.
145        key: Vec<usize>,
146    },
147    /// A persist sink writing the collection to a shard (a materialized view),
148    /// verified by reading the shard back with a persist `peek` of the sink id.
149    MaterializedView {
150        /// The sink's global id (scheduled and frontier-tracked under this id).
151        sink_id: u64,
152        /// The imported or built id the sink writes.
153        on_id: u64,
154        /// Target shard alias; allocated on first use.
155        shard: String,
156        /// Output schema; defaults to the sample schema. Must match `on_id`'s type.
157        schema: Option<String>,
158    },
159    /// A subscribe sink streaming changes back as responses, collected by
160    /// `await-subscribe`.
161    Subscribe {
162        /// The sink's global id.
163        sink_id: u64,
164        /// The imported or built id the sink streams.
165        on_id: u64,
166        /// Output schema; defaults to the sample schema. Must match `on_id`'s type.
167        schema: Option<String>,
168        /// Exclusive upper at which the subscribe completes; unbounded if absent.
169        up_to: Option<u64>,
170    },
171}
172
173/// Map a JSON type name to a [`SqlScalarType`]. The supported set is intentionally
174/// small and matches [`crate::data::Cell`]; extend both together.
175fn scalar_type_from_str(s: &str) -> anyhow::Result<SqlScalarType> {
176    Ok(match s.to_ascii_lowercase().as_str() {
177        "int16" | "smallint" => SqlScalarType::Int16,
178        "int32" | "int" | "integer" => SqlScalarType::Int32,
179        "int64" | "bigint" => SqlScalarType::Int64,
180        "bool" | "boolean" => SqlScalarType::Bool,
181        "string" | "text" => SqlScalarType::String,
182        "bytes" | "bytea" => SqlScalarType::Bytes,
183        other => anyhow::bail!("unsupported column type {other:?}"),
184    })
185}
186
187/// Parse a configuration value string into a [`ConfigVal`], selecting the variant
188/// by a type tag. Each numeric/bool/duration type reuses [`mz_dyncfg`]'s own
189/// `ConfigType::parse` (so the accepted syntax — e.g. `on`/`off` for bool, humantime
190/// for duration — matches the rest of the codebase); `string` is taken verbatim.
191fn parse_config_val(ty: &str, value: &str) -> anyhow::Result<ConfigVal> {
192    let err = |e: String| anyhow::anyhow!("config value {value:?} is not a valid {ty}: {e}");
193    Ok(match ty {
194        "bool" => <bool as ConfigType>::parse(value).map_err(err)?.into(),
195        "u32" => <u32 as ConfigType>::parse(value).map_err(err)?.into(),
196        "usize" => <usize as ConfigType>::parse(value).map_err(err)?.into(),
197        "f64" => <f64 as ConfigType>::parse(value).map_err(err)?.into(),
198        "duration" => <Duration as ConfigType>::parse(value).map_err(err)?.into(),
199        "string" => ConfigVal::String(value.to_string()),
200        other => anyhow::bail!(
201            "unsupported config type {other:?}; use bool/u32/usize/f64/duration/string"
202        ),
203    })
204}
205
206/// Build a [`RelationDesc`] from column specs.
207fn relation_desc(columns: &[ColumnSpec]) -> anyhow::Result<RelationDesc> {
208    let mut builder = RelationDesc::builder();
209    for col in columns {
210        builder = builder.with_column(
211            col.name.as_str(),
212            SqlColumnType {
213                scalar_type: scalar_type_from_str(&col.ty)?,
214                nullable: col.nullable,
215            },
216        );
217    }
218    Ok(builder.finish())
219}
220
221/// Strip surrounding double quotes from a token, if present.
222fn unquote(s: &str) -> &str {
223    s.strip_prefix('"')
224        .and_then(|s| s.strip_suffix('"'))
225        .unwrap_or(s)
226}
227
228/// Type a raw row-value token against its column into an owned [`Cell`].
229///
230/// The bare token `null` is SQL `NULL` (only in a nullable column); quote it
231/// (`"null"`) for the literal string. Numeric and boolean tokens go through
232/// [`mz_repr::strconv`] — the canonical PostgreSQL-compatible text parser the rest
233/// of the codebase uses — so the accepted syntax matches `mz_pgrepr`'s text decode.
234/// `string`/`bytes` columns take the (unquoted) token verbatim; `bytes` is its
235/// UTF-8 encoding.
236fn cell_from_token(token: &str, col: &SqlColumnType) -> anyhow::Result<Cell> {
237    if token == "null" {
238        anyhow::ensure!(col.nullable, "null value in non-nullable column");
239        return Ok(Cell::Null);
240    }
241    let parse =
242        |kind: &str, e: strconv::ParseError| anyhow::anyhow!("parsing {token:?} as {kind}: {e}");
243    let cell = match col.scalar_type {
244        SqlScalarType::Int16 => {
245            Cell::Int16(strconv::parse_int16(token).map_err(|e| parse("int16", e))?)
246        }
247        SqlScalarType::Int32 => {
248            Cell::Int32(strconv::parse_int32(token).map_err(|e| parse("int32", e))?)
249        }
250        SqlScalarType::Int64 => {
251            Cell::Int64(strconv::parse_int64(token).map_err(|e| parse("int64", e))?)
252        }
253        SqlScalarType::Bool => {
254            Cell::Bool(strconv::parse_bool(token).map_err(|e| parse("bool", e))?)
255        }
256        SqlScalarType::String => Cell::Str(unquote(token).to_string()),
257        SqlScalarType::Bytes => Cell::Bytes(unquote(token).as_bytes().to_vec()),
258        ref other => anyhow::bail!("unsupported column type {other:?}"),
259    };
260    Ok(cell)
261}
262
263/// Pack explicit row tokens against `desc`, validating arity per row.
264fn rows_from_tokens(desc: &RelationDesc, rows: &[Vec<String>]) -> anyhow::Result<Vec<Row>> {
265    let cols: Vec<&SqlColumnType> = desc.iter_types().collect();
266    let mut out = Vec::with_capacity(rows.len());
267    for (r, row) in rows.iter().enumerate() {
268        anyhow::ensure!(
269            row.len() == cols.len(),
270            "row {r} has {} values but schema has {} columns",
271            row.len(),
272            cols.len()
273        );
274        // Arity validated above, so indexing `cols` by position is in bounds.
275        let cells = row
276            .iter()
277            .enumerate()
278            .map(|(c, v)| cell_from_token(v, cols[c]))
279            .collect::<anyhow::Result<Vec<Cell>>>()?;
280        out.push(pack_cells(&cells));
281    }
282    Ok(out)
283}
284
285/// Register a referenceable object in the parser `catalog` under its global-id
286/// name (e.g. `u1000`), recording the name-to-id mapping so the parsed `Get`s —
287/// which carry the catalog's own assigned ids — can be remapped back to the
288/// script's ids.
289fn register_catalog_object(
290    catalog: &mut TestCatalog,
291    name_to_id: &mut BTreeMap<String, GlobalId>,
292    id: GlobalId,
293    sql_typ: SqlRelationType,
294) -> anyhow::Result<()> {
295    let name = id.to_string();
296    // Column names are only used for display; `Get` references columns by `#n`,
297    // so synthetic `c0..cN` names suffice.
298    let cols = (0..sql_typ.column_types.len())
299        .map(|i| format!("c{i}"))
300        .collect();
301    catalog
302        .insert(&name, cols, sql_typ, false)
303        .map_err(|e| anyhow::anyhow!("registering {name} in catalog: {e}"))?;
304    name_to_id.insert(name, id);
305    Ok(())
306}
307
308/// Rewrite every global `Get` in `expr` from the catalog's assigned id back to
309/// the script's id, looked up by the object's name.
310fn remap_gets(
311    expr: &mut MirRelationExpr,
312    catalog: &TestCatalog,
313    name_to_id: &BTreeMap<String, GlobalId>,
314) -> anyhow::Result<()> {
315    expr.try_visit_mut_post::<_, anyhow::Error>(&mut |e| {
316        if let MirRelationExpr::Get {
317            id: Id::Global(g), ..
318        } = e
319        {
320            let name = catalog
321                .get_source_name(g)
322                .ok_or_else(|| anyhow::anyhow!("get of unknown catalog object {g}"))?;
323            let id = name_to_id
324                .get(name)
325                .ok_or_else(|| anyhow::anyhow!("get of unregistered object {name}"))?;
326            *g = *id;
327        }
328        Ok(())
329    })
330}
331
332/// A command read from the script stream.
333///
334/// Tagged on `"cmd"`, snake_case, e.g.
335/// `{"cmd":"write_single_ts","shard":"s1","ts":0,"rows":1000}`.
336#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
337#[serde(tag = "cmd", rename_all = "snake_case")]
338pub enum Command {
339    /// Declare a named relation schema for later `schema` references.
340    DefineSchema {
341        /// Schema name, referenced by `schema` fields on other commands.
342        name: String,
343        /// Ordered column declarations.
344        columns: Vec<ColumnSpec>,
345    },
346    /// Write `count` synthetic rows to `shard` at a single timestamp `ts`.
347    WriteSingleTs {
348        /// Shard alias; allocated on first use.
349        shard: String,
350        /// Schema name; defaults to the built-in `(bigint, text)` sample schema.
351        #[serde(default)]
352        schema: Option<String>,
353        /// The timestamp to write at.
354        ts: u64,
355        /// Number of synthetic rows to write.
356        count: u64,
357        /// First synthetic row index, so successive batches can use disjoint id
358        /// ranges (`start..start + count`) that never consolidate. Defaults to 0.
359        #[serde(default)]
360        start: u64,
361        /// Payload padding per row; defaults to `DEFAULT_ROW_BYTES`.
362        #[serde(default)]
363        row_bytes: Option<usize>,
364    },
365    /// Write `count` synthetic rows to `shard`, spread across `n_ts` timestamps in a
366    /// single append.
367    WriteSpread {
368        /// Shard alias; allocated on first use.
369        shard: String,
370        /// Schema name; defaults to the built-in sample schema.
371        #[serde(default)]
372        schema: Option<String>,
373        /// Number of synthetic rows to write.
374        count: u64,
375        /// Number of distinct timestamps to spread the rows across.
376        n_ts: u64,
377        /// First synthetic row index (see [`Command::WriteSingleTs`]). Defaults to 0.
378        #[serde(default)]
379        start: u64,
380        /// Payload padding per row; defaults to `DEFAULT_ROW_BYTES`.
381        #[serde(default)]
382        row_bytes: Option<usize>,
383    },
384    /// Write explicit rows to `shard` at a single timestamp `ts`. Each row is an
385    /// array of JSON values matching the schema's columns in order.
386    WriteRows {
387        /// Shard alias; allocated on first use.
388        shard: String,
389        /// Schema name; defaults to the built-in sample schema.
390        #[serde(default)]
391        schema: Option<String>,
392        /// The timestamp to write at.
393        ts: u64,
394        /// Rows as arrays of raw value tokens, typed against the schema by
395        /// `cell_from_token`.
396        rows: Vec<Vec<String>>,
397    },
398    /// Submit (without scheduling) an index dataflow over `shard`.
399    DefineIndex {
400        /// The imported source's global id.
401        source_id: u64,
402        /// The exported index's global id.
403        index_id: u64,
404        /// Shard alias to import; must already exist.
405        shard: String,
406        /// Schema name; defaults to the built-in sample schema. Must match what was
407        /// written to `shard`.
408        #[serde(default)]
409        schema: Option<String>,
410        /// Columns to arrange by.
411        key: Vec<usize>,
412        /// The dataflow's `as_of`.
413        as_of: u64,
414        /// The shard's exclusive write upper (see `PersistSource::upper`).
415        upper: u64,
416    },
417    /// Schedule a previously-submitted collection so it makes progress.
418    Schedule {
419        /// The collection's global id.
420        id: u64,
421    },
422    /// Advance an index's read frontier (`since`) via `AllowCompaction`.
423    AllowCompaction {
424        /// The index's global id.
425        id: u64,
426        /// The new read frontier.
427        frontier: u64,
428    },
429    /// Take a collection out of read-only mode via `AllowWrites`, letting its
430    /// persist sink begin writing. Every dataflow starts read-only; indexes,
431    /// subscribes, and peeks work regardless, but a materialized-view sink withholds
432    /// all persist writes until this is sent for its sink id.
433    AllowWrites {
434        /// The sink's global id.
435        id: u64,
436    },
437    /// Wait until `id`'s output frontier reaches `ts`, or fail after the timeout.
438    AwaitFrontier {
439        /// The collection's global id.
440        id: u64,
441        /// The target output-frontier timestamp.
442        ts: u64,
443        /// Timeout in seconds; defaults to `DEFAULT_TIMEOUT_SECS`.
444        #[serde(default)]
445        timeout_secs: Option<u64>,
446        /// If true, a timeout is reported (`status: timeout`) without failing the
447        /// run. Used by reproductions where not reaching the frontier is an
448        /// expected outcome, not an assertion failure.
449        #[serde(default)]
450        allow_timeout: bool,
451    },
452    /// Count `id`'s rows at `ts` and emit the count.
453    ///
454    /// Sugar over a `Reduce`: builds an ephemeral dataflow that index-imports `id`,
455    /// computes `count(*)` over it, and peeks the single-row result — so the count
456    /// runs through a real reduce operator rather than being tallied in the driver.
457    /// `id` must have been registered by a prior `define_index` (or `define`
458    /// export). The golden output is the count; the script's `----` block asserts it.
459    Count {
460        /// The index's global id.
461        id: u64,
462        /// The timestamp to count at.
463        ts: u64,
464    },
465    /// Submit (without scheduling) a dataflow built from generic MIR — the
466    /// abstraction behind index / materialized-view / subscribe / copy-to.
467    ///
468    /// A projection of [`DataflowBuilder`]: import sources and/or existing indexes,
469    /// build MIR objects (each a pretty-form MIR spec; see [`BuildSpec`]), and
470    /// export over them. Exports are index, materialized-view, or subscribe (see
471    /// [`ExportSpec`]); copy-to is not implemented. Exported indexes are registered
472    /// for later import or count assertion; subscribe sinks register a response
473    /// buffer for `await-subscribe`. `define-index` is sugar over this. The optional
474    /// `optimize` flag runs the MIR optimizer before lowering (needed for joins).
475    CreateDataflow {
476        /// Debug name for the dataflow; defaults to `headless-create-dataflow`.
477        #[serde(default)]
478        name: Option<String>,
479        /// Collections to import (persist sources and/or existing indexes).
480        #[serde(default)]
481        imports: Vec<ImportSpec>,
482        /// MIR objects to compute, each bound to an id.
483        #[serde(default)]
484        builds: Vec<BuildSpec>,
485        /// Exports over imported or built ids.
486        #[serde(default)]
487        exports: Vec<ExportSpec>,
488        /// The dataflow's `as_of`.
489        as_of: u64,
490        /// Run the MIR optimizer before lowering (needed for e.g. joins). Off by
491        /// default, so the caller's MIR is lowered faithfully.
492        #[serde(default)]
493        optimize: bool,
494    },
495    /// Peek `id` at `ts` and emit the returned rows (sorted, one per line). The
496    /// generic output assertion: the script's `----` block holds the expected rows.
497    Peek {
498        /// The index's global id.
499        id: u64,
500        /// Schema name describing the peek's output; defaults to the sample schema.
501        #[serde(default)]
502        schema: Option<String>,
503        /// The timestamp to peek at.
504        ts: u64,
505    },
506    /// Wait for subscribe sink `id`'s upper to reach `up_to`, then emit its
507    /// accumulated updates as `<ts> <diff> <datums>` lines (consolidated, sorted).
508    /// The output assertion for a subscribe sink.
509    AwaitSubscribe {
510        /// The subscribe sink's global id.
511        id: u64,
512        /// The exclusive upper to wait for (typically the sink's `up_to`).
513        up_to: u64,
514        /// Timeout in seconds; defaults to `DEFAULT_TIMEOUT_SECS`.
515        timeout_secs: Option<u64>,
516    },
517    /// Send `CreateInstance`, opening the compute instance (and the reconciliation
518    /// window). The settable [`InstanceConfig`] knobs default to the values a plain
519    /// `create-instance` supplies; the peek-stash location is always the host's, and
520    /// the peek-response stash is force-disabled (see `Driver::create_instance`).
521    ///
522    /// [`InstanceConfig`]: mz_compute_client::protocol::command::InstanceConfig
523    CreateInstance {
524        /// Replica expiration offset (a duration like `30s`); none if absent.
525        #[serde(default)]
526        expiration_offset: Option<String>,
527        /// Whether arrangements use dictionary compression.
528        #[serde(default)]
529        arrangement_dictionary_compression: bool,
530        /// The create-time dyncfg snapshot the controller would supply (`name type value` rows).
531        /// Applied to the replica's worker config before create-time setup, so a scenario can
532        /// assert that create-time work observes synced values rather than dyncfg defaults.
533        #[serde(default)]
534        initial_config: Vec<ConfigSetting>,
535    },
536    /// Send `UpdateConfiguration` with a table of dyncfg updates (`name type value`
537    /// rows). Generic over any configuration; the peek-response stash is not settable
538    /// here (it is force-disabled at instance creation).
539    UpdateConfiguration {
540        /// The dyncfg updates to apply.
541        #[serde(default)]
542        updates: Vec<ConfigSetting>,
543    },
544    /// Drop the current connection and reconnect, sending only `Hello`. Re-issue
545    /// `create_instance`, replay the dataflows the replica should keep, then
546    /// `initialization_complete` to close the reconciliation window.
547    Reconnect,
548    /// Send `InitializationComplete`, closing the reconciliation window.
549    InitializationComplete,
550}
551
552/// What the registry remembers about an exported index, so a later `define`
553/// import or count assertion can reconstruct the import without re-declaring it.
554struct IndexEntry {
555    /// The id of the collection the index arranges.
556    on_id: u64,
557    /// The columns the index is arranged by.
558    key: Vec<usize>,
559    /// The arranged collection's relation type (for `import_index`).
560    on_type: ReprRelationType,
561}
562
563/// The base for ephemeral global ids the count sugar allocates. Far above any
564/// id a script would use, so its dataflows never collide with user objects.
565const INTERNAL_ID_BASE: u64 = u64::MAX / 2;
566
567/// Mutable state threaded through a script run.
568pub struct ScriptState {
569    driver: Driver,
570    client: PersistClient,
571    loc: PersistLocation,
572    /// Named schemas declared via `define_schema`.
573    schemas: BTreeMap<String, RelationDesc>,
574    /// Alias-to-shard map; aliases are allocated lazily on first use.
575    shards: BTreeMap<String, ShardId>,
576    /// Exported indexes, by global id, for later import / count assertions.
577    indexes: BTreeMap<u64, IndexEntry>,
578    /// Materialized-view sink outputs, by sink global id: the target shard's
579    /// metadata, so a `peek` of the sink id reads its shard via a persist peek.
580    mv_outputs: BTreeMap<u64, CollectionMetadata>,
581    /// Next ephemeral id for the count sugar's dataflows.
582    next_internal: u64,
583}
584
585impl ScriptState {
586    /// Build the state from a connected driver and its persist location, opening a
587    /// persist client.
588    pub async fn new(driver: Driver, loc: PersistLocation) -> anyhow::Result<Self> {
589        let client = driver.host.client().await?;
590        Ok(ScriptState {
591            driver,
592            client,
593            loc,
594            schemas: BTreeMap::new(),
595            shards: BTreeMap::new(),
596            indexes: BTreeMap::new(),
597            mv_outputs: BTreeMap::new(),
598            next_internal: INTERNAL_ID_BASE,
599        })
600    }
601
602    /// Resolve a shard alias, allocating a fresh [`ShardId`] on first use.
603    fn shard_id(&mut self, alias: &str) -> ShardId {
604        *self
605            .shards
606            .entry(alias.to_string())
607            .or_insert_with(ShardId::new)
608    }
609
610    /// Allocate a fresh ephemeral global id for an internally-built dataflow.
611    fn alloc_internal(&mut self) -> GlobalId {
612        let id = self.next_internal;
613        self.next_internal += 1;
614        GlobalId::User(id)
615    }
616
617    /// Count the rows of a registered index at `ts` by running a `count(*)`
618    /// `Reduce` over it: build an ephemeral dataflow that index-imports `index_id`,
619    /// schedule and hydrate it, then peek its single-row output. An empty result
620    /// (the reduce emits no row over empty input) reads as a count of `0`.
621    async fn count_via_reduce(&mut self, index_id: u64, ts: u64) -> anyhow::Result<u64> {
622        let entry = self.indexes.get(&index_id).ok_or_else(|| {
623            anyhow::anyhow!("unknown index {index_id}; define it with define_index first")
624        })?;
625        let on_id = entry.on_id;
626        let key = entry.key.clone();
627        let on_type = entry.on_type.clone();
628
629        let reduce_id = self.alloc_internal();
630        let out_index_id = self.alloc_internal();
631        let df = count_over_index(
632            GlobalId::User(index_id),
633            GlobalId::User(on_id),
634            on_type,
635            key,
636            reduce_id,
637            out_index_id,
638            Timestamp::from(ts),
639        )?;
640        self.driver.submit_dataflow(df)?;
641        self.driver.schedule(out_index_id)?;
642        // The count is final once the output frontier passes `ts`.
643        self.driver
644            .expect_frontier(
645                out_index_id,
646                Timestamp::from(ts).step_forward(),
647                Duration::from_secs(DEFAULT_TIMEOUT_SECS),
648            )
649            .await?;
650
651        // The reduce output is a single non-null bigint column.
652        let count_desc = RelationDesc::builder()
653            .with_column(
654                "count",
655                SqlColumnType {
656                    scalar_type: SqlScalarType::Int64,
657                    nullable: false,
658                },
659            )
660            .finish();
661        let rows = self
662            .driver
663            .peek(
664                PeekTarget::Index { id: out_index_id },
665                count_desc,
666                Timestamp::from(ts),
667            )
668            .await?;
669        match rows.as_slice() {
670            // No row over empty input: count is zero.
671            [] => Ok(0),
672            [row] => {
673                let count = row.unpack_first().unwrap_int64();
674                Ok(u64::try_from(count)?)
675            }
676            other => anyhow::bail!(
677                "count reduce returned {} rows, expected 0 or 1",
678                other.len()
679            ),
680        }
681    }
682
683    /// Resolve a schema name, defaulting to the built-in sample schema when absent.
684    fn resolve_schema(&self, name: &Option<String>) -> anyhow::Result<RelationDesc> {
685        match name {
686            None => Ok(sample_desc()),
687            Some(name) => self.schemas.get(name).cloned().ok_or_else(|| {
688                anyhow::anyhow!("unknown schema {name:?}; declare it with define_schema first")
689            }),
690        }
691    }
692
693    /// Validate that a sink's declared output schema `desc` matches the column types
694    /// of the object `on_id` it exports. Compares column types (not inferred keys),
695    /// so a mismatched arity or type fails before the dataflow is submitted.
696    fn check_sink_schema(
697        &self,
698        builder: &DataflowBuilder,
699        on_id: u64,
700        desc: &RelationDesc,
701    ) -> anyhow::Result<()> {
702        let on_type = builder.get(GlobalId::User(on_id))?.typ();
703        let want = ReprRelationType::from(desc.typ());
704        anyhow::ensure!(
705            on_type.column_types == want.column_types,
706            "sink output schema does not match object {on_id}: \
707             declared {:?}, object is {:?}",
708            want.column_types,
709            on_type.column_types
710        );
711        Ok(())
712    }
713
714    /// Execute a single command, returning its golden output text.
715    pub async fn execute(&mut self, cmd: Command) -> anyhow::Result<String> {
716        match cmd {
717            Command::DefineSchema { name, columns } => {
718                let desc = relation_desc(&columns)?;
719                self.schemas.insert(name, desc);
720                Ok("ok".to_string())
721            }
722            Command::WriteSingleTs {
723                shard,
724                schema,
725                ts,
726                count,
727                start,
728                row_bytes,
729            } => {
730                let desc = self.resolve_schema(&schema)?;
731                let shard = self.shard_id(&shard);
732                let pad = row_bytes.unwrap_or(DEFAULT_ROW_BYTES);
733                let batch = synth_rows(&desc, start, count, pad);
734                write_rows_single_ts(&self.client, shard, &desc, &batch, Timestamp::from(ts))
735                    .await?;
736                Ok(format!("wrote {count}"))
737            }
738            Command::WriteSpread {
739                shard,
740                schema,
741                count,
742                n_ts,
743                start,
744                row_bytes,
745            } => {
746                let desc = self.resolve_schema(&schema)?;
747                let shard = self.shard_id(&shard);
748                let pad = row_bytes.unwrap_or(DEFAULT_ROW_BYTES);
749                let batch = synth_rows(&desc, start, count, pad);
750                write_rows_spread(&self.client, shard, &desc, &batch, n_ts).await?;
751                Ok(format!("wrote {count}"))
752            }
753            Command::WriteRows {
754                shard,
755                schema,
756                ts,
757                rows,
758            } => {
759                let desc = self.resolve_schema(&schema)?;
760                let batch = rows_from_tokens(&desc, &rows)?;
761                let written = batch.len();
762                let shard = self.shard_id(&shard);
763                write_rows_single_ts(&self.client, shard, &desc, &batch, Timestamp::from(ts))
764                    .await?;
765                Ok(format!("wrote {written}"))
766            }
767            Command::DefineIndex {
768                source_id,
769                index_id,
770                shard,
771                schema,
772                key,
773                as_of,
774                upper,
775            } => {
776                let desc = self.resolve_schema(&schema)?;
777                // Validate the key columns against the schema up front, so a bad
778                // index (e.g. key past the last column) yields a clean error rather
779                // than reaching the lowering with an out-of-range column reference.
780                let arity = desc.arity();
781                for &col in &key {
782                    anyhow::ensure!(
783                        col < arity,
784                        "key column {col} out of range for a {arity}-column schema"
785                    );
786                }
787                let shard = self.shard_id(&shard);
788                let on_type = ReprRelationType::from(desc.typ());
789                let df = index_dataflow(
790                    GlobalId::User(source_id),
791                    GlobalId::User(index_id),
792                    shard,
793                    self.loc.clone(),
794                    desc,
795                    key.clone(),
796                    Timestamp::from(as_of),
797                    Timestamp::from(upper),
798                )?;
799                self.driver.submit_dataflow(df)?;
800                // Register only after a successful submit, so a rejected index is
801                // not later importable or countable.
802                self.indexes.insert(
803                    index_id,
804                    IndexEntry {
805                        on_id: source_id,
806                        key,
807                        on_type,
808                    },
809                );
810                Ok("ok".to_string())
811            }
812            Command::Schedule { id } => {
813                self.driver.schedule(GlobalId::User(id))?;
814                Ok("ok".to_string())
815            }
816            Command::AllowCompaction { id, frontier } => {
817                self.driver.send(ComputeCommand::AllowCompaction {
818                    id: GlobalId::User(id),
819                    frontier: Antichain::from_elem(Timestamp::from(frontier)),
820                })?;
821                Ok("ok".to_string())
822            }
823            Command::AllowWrites { id } => {
824                self.driver
825                    .send(ComputeCommand::AllowWrites(GlobalId::User(id)))?;
826                Ok("ok".to_string())
827            }
828            Command::AwaitFrontier {
829                id,
830                ts,
831                timeout_secs,
832                allow_timeout,
833            } => {
834                let timeout = Duration::from_secs(timeout_secs.unwrap_or(DEFAULT_TIMEOUT_SECS));
835                let result = self
836                    .driver
837                    .expect_frontier(GlobalId::User(id), Timestamp::from(ts), timeout)
838                    .await;
839                if allow_timeout {
840                    // The outcome is intentionally unobserved: emit a fixed token so
841                    // the golden output stays deterministic whether or not the
842                    // frontier was reached (see `multi_dataflow`, whose hydration is
843                    // nondeterministic).
844                    Ok("awaited".to_string())
845                } else {
846                    result?;
847                    Ok("ok".to_string())
848                }
849            }
850            Command::Count { id, ts } => {
851                let count = self.count_via_reduce(id, ts).await?;
852                Ok(count.to_string())
853            }
854            Command::CreateDataflow {
855                name,
856                imports,
857                builds,
858                exports,
859                as_of,
860                optimize,
861            } => {
862                let mut builder = DataflowBuilder::new(
863                    name.unwrap_or_else(|| "headless-create-dataflow".to_string()),
864                );
865                if optimize {
866                    builder.optimize();
867                }
868                // The parser's catalog resolves `Get u<n>` leaves by name; it
869                // assigns its own global ids, so we keep a name->our-id map and
870                // remap the parsed `Get`s back to the script's ids afterwards.
871                let mut catalog = TestCatalog::default();
872                let mut name_to_id: BTreeMap<String, GlobalId> = BTreeMap::new();
873                for import in imports {
874                    match import {
875                        ImportSpec::Source {
876                            id,
877                            shard,
878                            schema,
879                            upper,
880                        } => {
881                            let desc = self.resolve_schema(&schema)?;
882                            let id = GlobalId::User(id);
883                            register_catalog_object(
884                                &mut catalog,
885                                &mut name_to_id,
886                                id,
887                                desc.typ().clone(),
888                            )?;
889                            let shard = self.shard_id(&shard);
890                            builder.import_persist(
891                                id,
892                                PersistSource {
893                                    shard,
894                                    location: self.loc.clone(),
895                                    desc,
896                                    upper: Timestamp::from(upper),
897                                },
898                            );
899                        }
900                        ImportSpec::Index { index_id } => {
901                            let entry = self.indexes.get(&index_id).ok_or_else(|| {
902                                anyhow::anyhow!(
903                                    "unknown index {index_id}; define it before importing it"
904                                )
905                            })?;
906                            let on_id = GlobalId::User(entry.on_id);
907                            let key = entry.key.clone();
908                            let on_type = entry.on_type.clone();
909                            register_catalog_object(
910                                &mut catalog,
911                                &mut name_to_id,
912                                on_id,
913                                SqlRelationType::from_repr(&on_type),
914                            )?;
915                            builder.import_index(
916                                GlobalId::User(index_id),
917                                on_id,
918                                key,
919                                on_type,
920                                false,
921                            );
922                        }
923                    }
924                }
925                for build in builds {
926                    // Parse the pretty MIR spec against the catalog, then remap
927                    // its catalog-assigned `Get` ids to the script's ids.
928                    let mut expr = try_parse_mir(&catalog, &build.expr)
929                        .map_err(|e| anyhow::anyhow!("parsing MIR for object {}: {e}", build.id))?;
930                    remap_gets(&mut expr, &catalog, &name_to_id)?;
931                    let id = GlobalId::User(build.id);
932                    // Register the built object so later builds can `get` it.
933                    register_catalog_object(
934                        &mut catalog,
935                        &mut name_to_id,
936                        id,
937                        SqlRelationType::from_repr(&expr.typ()),
938                    )?;
939                    builder.build(id, expr);
940                }
941                // Wire each export onto the builder. Index exports are captured for
942                // later import / count assertions; sink exports route their output
943                // either to a target shard (materialized view) or back as responses
944                // (subscribe). Sink output schemas must match the exported object's
945                // type, validated here so a mismatch fails before submission.
946                let mut new_indexes = Vec::new();
947                let mut new_subscribes = Vec::new();
948                let mut new_mv_outputs = Vec::new();
949                for export in exports {
950                    match export {
951                        ExportSpec::Index {
952                            index_id,
953                            on_id,
954                            key,
955                        } => {
956                            let on_type = builder.get(GlobalId::User(on_id))?.typ();
957                            builder.export_index(
958                                GlobalId::User(index_id),
959                                GlobalId::User(on_id),
960                                key.clone(),
961                            );
962                            new_indexes.push((index_id, on_id, key, on_type));
963                        }
964                        ExportSpec::MaterializedView {
965                            sink_id,
966                            on_id,
967                            shard,
968                            schema,
969                        } => {
970                            let desc = self.resolve_schema(&schema)?;
971                            self.check_sink_schema(&builder, on_id, &desc)?;
972                            let shard = self.shard_id(&shard);
973                            let location = self.loc.clone();
974                            builder.export_materialized_view(
975                                GlobalId::User(sink_id),
976                                GlobalId::User(on_id),
977                                desc.clone(),
978                                PersistSink {
979                                    shard,
980                                    location: location.clone(),
981                                },
982                            );
983                            // Record the target shard so a later `peek` of the sink
984                            // id reads it via a persist peek (the `SELECT * FROM mv`
985                            // path), with no separate read-back command.
986                            new_mv_outputs.push((
987                                sink_id,
988                                CollectionMetadata {
989                                    persist_location: location,
990                                    data_shard: shard,
991                                    relation_desc: desc,
992                                    txns_shard: None,
993                                },
994                            ));
995                        }
996                        ExportSpec::Subscribe {
997                            sink_id,
998                            on_id,
999                            schema,
1000                            up_to,
1001                        } => {
1002                            let desc = self.resolve_schema(&schema)?;
1003                            self.check_sink_schema(&builder, on_id, &desc)?;
1004                            builder.export_subscribe(
1005                                GlobalId::User(sink_id),
1006                                GlobalId::User(on_id),
1007                                desc,
1008                                up_to_antichain(up_to),
1009                            );
1010                            new_subscribes.push(sink_id);
1011                        }
1012                    }
1013                }
1014                builder.as_of(Timestamp::from(as_of));
1015                let df = builder.finish()?;
1016                self.driver.submit_dataflow(df)?;
1017                // Register only after a successful submit, so a rejected dataflow
1018                // leaves no dangling index entry or subscribe buffer.
1019                for (index_id, on_id, key, on_type) in new_indexes {
1020                    self.indexes.insert(
1021                        index_id,
1022                        IndexEntry {
1023                            on_id,
1024                            key,
1025                            on_type,
1026                        },
1027                    );
1028                }
1029                for sink_id in new_subscribes {
1030                    self.driver.register_subscribe(GlobalId::User(sink_id));
1031                }
1032                for (sink_id, metadata) in new_mv_outputs {
1033                    self.mv_outputs.insert(sink_id, metadata);
1034                }
1035                Ok("ok".to_string())
1036            }
1037            Command::Peek { id, schema, ts } => {
1038                let desc = self.resolve_schema(&schema)?;
1039                // A materialized-view sink id resolves to a persist peek of its
1040                // output shard; any other id is an index peek. The persist peek
1041                // blocks until the shard seals through `ts`, so it doubles as a wait
1042                // for the writing sink to catch up.
1043                let target = match self.mv_outputs.get(&id) {
1044                    Some(metadata) => PeekTarget::Persist {
1045                        id: GlobalId::User(id),
1046                        metadata: metadata.clone(),
1047                    },
1048                    None => PeekTarget::Index {
1049                        id: GlobalId::User(id),
1050                    },
1051                };
1052                let rows = self.driver.peek(target, desc, Timestamp::from(ts)).await?;
1053                Ok(render_rows(&rows))
1054            }
1055            Command::AwaitSubscribe {
1056                id,
1057                up_to,
1058                timeout_secs,
1059            } => {
1060                let timeout = Duration::from_secs(timeout_secs.unwrap_or(DEFAULT_TIMEOUT_SECS));
1061                let updates = self
1062                    .driver
1063                    .await_subscribe(GlobalId::User(id), Timestamp::from(up_to), timeout)
1064                    .await?;
1065                Ok(render_updates(&updates))
1066            }
1067            Command::CreateInstance {
1068                expiration_offset,
1069                arrangement_dictionary_compression,
1070                initial_config,
1071            } => {
1072                let expiration_offset = expiration_offset
1073                    .as_deref()
1074                    .map(|s| {
1075                        <Duration as ConfigType>::parse(s).map_err(|e| {
1076                            anyhow::anyhow!("expiration-offset {s:?} is not a duration: {e}")
1077                        })
1078                    })
1079                    .transpose()?;
1080                let mut initial = ConfigUpdates::default();
1081                for setting in &initial_config {
1082                    initial.add_dynamic(
1083                        &setting.name,
1084                        parse_config_val(&setting.ty, &setting.value)?,
1085                    );
1086                }
1087                self.driver.create_instance(
1088                    expiration_offset,
1089                    arrangement_dictionary_compression,
1090                    initial,
1091                )?;
1092                Ok("ok".to_string())
1093            }
1094            Command::UpdateConfiguration { updates } => {
1095                let mut dyncfg_updates = ConfigUpdates::default();
1096                for setting in &updates {
1097                    dyncfg_updates.add_dynamic(
1098                        &setting.name,
1099                        parse_config_val(&setting.ty, &setting.value)?,
1100                    );
1101                }
1102                self.driver.update_configuration(dyncfg_updates)?;
1103                Ok("ok".to_string())
1104            }
1105            Command::Reconnect => {
1106                self.driver.reconnect().await?;
1107                Ok("ok".to_string())
1108            }
1109            Command::InitializationComplete => {
1110                self.driver.send(ComputeCommand::InitializationComplete)?;
1111                Ok("ok".to_string())
1112            }
1113        }
1114    }
1115}
1116
1117/// Run a script: parse `content` into stanzas, execute each command, and either
1118/// compare its output to the stanza's expected block or — when `REWRITE` is set
1119/// and `path` is given — rewrite the file in place with the actual outputs.
1120///
1121/// Returns `Err` if any stanza's output differs from its expected block, so a
1122/// scripted run exits non-zero on a mismatch (and CI fails). A command that fails
1123/// renders as `error: <message>`, so an expected failure is asserted by its
1124/// golden block rather than a special command.
1125pub async fn run(
1126    driver: Driver,
1127    loc: PersistLocation,
1128    content: &str,
1129    path: Option<&Path>,
1130) -> anyhow::Result<()> {
1131    let items = crate::text::parse_file(content)?;
1132    let mut state = ScriptState::new(driver, loc).await?;
1133    let rewrite = std::env::var_os("REWRITE").is_some();
1134
1135    let mut actuals = Vec::new();
1136    let mut mismatches = 0usize;
1137    for item in &items {
1138        let crate::text::Item::Stanza(stanza) = item else {
1139            continue;
1140        };
1141        let actual = match state.execute(stanza.command.clone()).await {
1142            Ok(output) => output,
1143            Err(e) => format!("error: {e}"),
1144        };
1145        let directive = stanza.input.lines().next().unwrap_or_default();
1146        if rewrite {
1147            println!("{directive} => {actual}");
1148        } else if actual == stanza.expected {
1149            println!("ok: {directive}");
1150        } else {
1151            mismatches += 1;
1152            println!(
1153                "MISMATCH: {directive}\n  expected: {:?}\n  actual:   {:?}",
1154                stanza.expected, actual
1155            );
1156        }
1157        actuals.push(actual);
1158    }
1159
1160    if rewrite {
1161        let path = path.context("REWRITE is set but the script came from stdin")?;
1162        std::fs::write(path, crate::text::rewrite(&items, &actuals))
1163            .with_context(|| format!("rewriting {}", path.display()))?;
1164        return Ok(());
1165    }
1166    if mismatches > 0 {
1167        anyhow::bail!("{mismatches} stanza(s) did not match their expected output");
1168    }
1169    Ok(())
1170}
1171
1172/// Render peeked rows as deterministic golden text: each row's datums joined by
1173/// spaces, with the rows sorted so the output is independent of arrangement order.
1174fn render_rows(rows: &[Row]) -> String {
1175    let mut lines: Vec<String> = rows
1176        .iter()
1177        .map(|row| {
1178            row.unpack()
1179                .iter()
1180                .map(|datum| datum.to_string())
1181                .collect::<Vec<_>>()
1182                .join(" ")
1183        })
1184        .collect();
1185    lines.sort();
1186    lines.join("\n")
1187}
1188
1189/// Convert an optional `up_to` timestamp into a sink's exclusive upper antichain;
1190/// `None` is the empty antichain (no bound — the sink runs indefinitely).
1191fn up_to_antichain(up_to: Option<u64>) -> Antichain<Timestamp> {
1192    match up_to {
1193        Some(t) => Antichain::from_elem(Timestamp::from(t)),
1194        None => Antichain::new(),
1195    }
1196}
1197
1198/// Render a subscribe's updates as golden text: `<ts> <diff> <datums>` per line.
1199/// Updates are consolidated by `(time, row)` — so split batches and retractions
1200/// collapse — net-zero rows dropped, and the lines sorted for determinism.
1201fn render_updates(updates: &[(Row, Timestamp, i64)]) -> String {
1202    let mut by_key: BTreeMap<(Timestamp, Row), i64> = BTreeMap::new();
1203    for (row, ts, diff) in updates {
1204        *by_key.entry((*ts, row.clone())).or_default() += diff;
1205    }
1206    let mut lines: Vec<String> = by_key
1207        .into_iter()
1208        .filter(|(_, diff)| *diff != 0)
1209        .map(|((ts, row), diff)| {
1210            let datums = row
1211                .unpack()
1212                .iter()
1213                .map(|datum| datum.to_string())
1214                .collect::<Vec<_>>()
1215                .join(" ");
1216            format!("{ts} {diff} {datums}")
1217        })
1218        .collect();
1219    lines.sort();
1220    lines.join("\n")
1221}
1222
1223#[cfg(test)]
1224mod tests {
1225    use super::*;
1226
1227    /// `define_schema` types map to a `RelationDesc` with matching arity and
1228    /// nullability, and `synth_rows` fills it.
1229    #[mz_ore::test]
1230    fn schema_parse_and_synth() {
1231        let columns = vec![
1232            ColumnSpec {
1233                name: "k".to_string(),
1234                ty: "bigint".to_string(),
1235                nullable: false,
1236            },
1237            ColumnSpec {
1238                name: "flag".to_string(),
1239                ty: "boolean".to_string(),
1240                nullable: false,
1241            },
1242            ColumnSpec {
1243                name: "v".to_string(),
1244                ty: "text".to_string(),
1245                nullable: true,
1246            },
1247        ];
1248        let desc = relation_desc(&columns).unwrap();
1249        assert_eq!(desc.arity(), 3);
1250        let types: Vec<_> = desc.iter_types().collect();
1251        assert_eq!(types[0].scalar_type, SqlScalarType::Int64);
1252        assert_eq!(types[1].scalar_type, SqlScalarType::Bool);
1253        assert!(types[2].nullable);
1254
1255        let rows = synth_rows(&desc, 0, 4, 8);
1256        assert_eq!(rows.len(), 4);
1257
1258        assert!(scalar_type_from_str("nope").is_err());
1259    }
1260
1261    /// Tokens type into the right `Cell`s against their column, bare `null` is SQL
1262    /// null (rejected for a non-nullable column), and a bad numeric token errors.
1263    #[mz_ore::test]
1264    fn cell_from_token_maps_values() {
1265        let int_col = SqlColumnType {
1266            scalar_type: SqlScalarType::Int64,
1267            nullable: false,
1268        };
1269        let str_col = SqlColumnType {
1270            scalar_type: SqlScalarType::String,
1271            nullable: true,
1272        };
1273        assert_eq!(cell_from_token("7", &int_col).unwrap(), Cell::Int64(7));
1274        assert_eq!(
1275            cell_from_token("hi", &str_col).unwrap(),
1276            Cell::Str("hi".to_string())
1277        );
1278        // A quoted string keeps its contents; the quotes are stripped.
1279        assert_eq!(
1280            cell_from_token("\"a b\"", &str_col).unwrap(),
1281            Cell::Str("a b".to_string())
1282        );
1283        assert_eq!(cell_from_token("null", &str_col).unwrap(), Cell::Null);
1284        // null into a non-nullable column is an error.
1285        assert!(cell_from_token("null", &int_col).is_err());
1286        // a non-numeric token in an int column is an error.
1287        assert!(cell_from_token("x", &int_col).is_err());
1288    }
1289}
mz_clusterd_test_driver/script.rs

mz_clusterd_test_driver/
script.rs