Skip to main content

mz_deploy/
types.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Data-contract system for external dependencies.
11//!
12//! When a project references objects it does not own (e.g. tables created by an
13//! upstream ingestion pipeline), mz-deploy needs to know their column schemas so
14//! it can type-check views that depend on them. This module manages that contract
15//! through the `types.lock` file.
16//!
17//! ## Lock File Lifecycle
18//!
19//! 1. **Capture** — Column schemas are queried from the live environment and
20//!    written to `types.lock`.
21//! 2. **Compile** — The lock file is loaded and its schemas are used to resolve
22//!    external dependency columns during compilation.
23//! 3. **Validate** — During incremental typechecking, external dependency
24//!    schemas are provided to the validation backend when dirty objects
25//!    reference them.
26//!
27//! ## Compiler Integration
28//!
29//! Incremental runtime typechecking is owned by
30//! [`crate::project::compiler::typecheck`]. That subsystem persists per-object
31//! validation artifacts for consumers such as `explain` and the LSP.
32//!
33//! This module owns:
34//!
35//! - the `types.lock` contract format
36//! - shared type/schema utilities such as `type_hash`
37//!
38//! ## Key Types
39//!
40//! - [`Types`] — In-memory representation of a `types.lock` file: a versioned
41//!   map from fully-qualified object names to column schemas, plus optional
42//!   object-level comments from `COMMENT ON` in the source database.
43//! - [`ColumnType`] — A single column's type name, nullability, and optional
44//!   `COMMENT ON COLUMN` description.
45
46use crate::project::ir::object_id::ObjectId;
47use serde::{Deserialize, Serialize};
48use std::collections::BTreeMap;
49use std::fmt;
50use std::fs;
51use std::path::{Path, PathBuf};
52use std::str::FromStr;
53use thiserror::Error;
54
55/// The kind of database object recorded in a `types.lock` entry.
56///
57/// `TableFromSource` is treated as `Table` from a contract perspective — both
58/// represent row-producing relations that can serve as FK targets.
59#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq, Eq)]
60#[serde(rename_all = "kebab-case")]
61pub enum ObjectKind {
62    Table,
63    View,
64    MaterializedView,
65    Source,
66    Sink,
67    Secret,
68    Connection,
69}
70
71impl FromStr for ObjectKind {
72    type Err = String;
73
74    fn from_str(s: &str) -> Result<Self, Self::Err> {
75        match s {
76            "table" => Ok(ObjectKind::Table),
77            "view" => Ok(ObjectKind::View),
78            "materialized-view" => Ok(ObjectKind::MaterializedView),
79            "source" => Ok(ObjectKind::Source),
80            "sink" => Ok(ObjectKind::Sink),
81            "secret" => Ok(ObjectKind::Secret),
82            "connection" => Ok(ObjectKind::Connection),
83            _ => Err(format!("unknown object kind: {}", s)),
84        }
85    }
86}
87
88impl ObjectKind {
89    /// Parse from the kebab-case string stored in SQLite.
90    pub fn from_db_str(s: &str) -> Self {
91        match s {
92            "table" => ObjectKind::Table,
93            "view" => ObjectKind::View,
94            "materialized-view" => ObjectKind::MaterializedView,
95            "source" => ObjectKind::Source,
96            "sink" => ObjectKind::Sink,
97            "secret" => ObjectKind::Secret,
98            "connection" => ObjectKind::Connection,
99            _ => ObjectKind::Table,
100        }
101    }
102
103    /// Returns the kebab-case string matching the serde serialization format.
104    pub fn as_str(self) -> &'static str {
105        match self {
106            ObjectKind::Table => "table",
107            ObjectKind::View => "view",
108            ObjectKind::MaterializedView => "materialized-view",
109            ObjectKind::Source => "source",
110            ObjectKind::Sink => "sink",
111            ObjectKind::Secret => "secret",
112            ObjectKind::Connection => "connection",
113        }
114    }
115}
116
117impl fmt::Display for ObjectKind {
118    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
119        match self {
120            ObjectKind::Table => write!(f, "table"),
121            ObjectKind::View => write!(f, "view"),
122            ObjectKind::MaterializedView => write!(f, "materialized view"),
123            ObjectKind::Source => write!(f, "source"),
124            ObjectKind::Sink => write!(f, "sink"),
125            ObjectKind::Secret => write!(f, "secret"),
126            ObjectKind::Connection => write!(f, "connection"),
127        }
128    }
129}
130
131/// Directory name for mz-deploy build artifacts.
132pub(crate) const BUILD_DIR: &str = "target";
133
134/// Errors that can occur when reading, writing, or parsing `types.lock` files.
135#[derive(Error, Debug)]
136pub enum TypesError {
137    #[error(transparent)]
138    BuildArtifactFailed(#[from] crate::project::compiler::cache::CacheError),
139
140    #[error("failed to read types.lock at {path}")]
141    FileReadFailed {
142        path: PathBuf,
143        #[source]
144        source: std::io::Error,
145    },
146    #[error("failed to write types.lock at {path}")]
147    FileWriteFailed {
148        path: PathBuf,
149        #[source]
150        source: std::io::Error,
151    },
152    #[error("failed to parse types.lock at {path}")]
153    ParseFailed {
154        path: PathBuf,
155        #[source]
156        source: toml::de::Error,
157    },
158    #[error("failed to create directory {path}")]
159    DirectoryCreationFailed {
160        path: PathBuf,
161        #[source]
162        source: std::io::Error,
163    },
164    #[error(transparent)]
165    DependencyError(#[from] crate::project::error::DependencyError),
166}
167
168/// A single column's type name, nullability, and optional comment in a data contract.
169#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)]
170pub struct ColumnType {
171    /// SQL type syntax used when recreating cached dependencies as stub tables.
172    pub r#type: String,
173    pub nullable: bool,
174    /// Original column position from the database schema.
175    #[serde(default)]
176    pub position: usize,
177    /// Optional `COMMENT ON COLUMN` description from the source database.
178    #[serde(default, skip_serializing_if = "Option::is_none")]
179    pub comment: Option<String>,
180}
181
182/// In-memory representation of a `types.lock` file.
183///
184/// Maps `ObjectId` (fully-qualified `database.schema.object`) to column
185/// schemas. Used for type-checking views against external dependencies.
186/// Optionally includes object-level and column-level comments from
187/// `COMMENT ON` statements in the source database.
188#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)]
189pub struct Types {
190    pub version: u8,
191    pub tables: BTreeMap<ObjectId, BTreeMap<String, ColumnType>>,
192    pub kinds: BTreeMap<ObjectId, ObjectKind>,
193    /// Object-level comments from `COMMENT ON` in the source database.
194    #[serde(default)]
195    pub comments: BTreeMap<ObjectId, String>,
196}
197
198impl Default for Types {
199    fn default() -> Self {
200        Types {
201            version: 1,
202            tables: BTreeMap::new(),
203            kinds: BTreeMap::new(),
204            comments: BTreeMap::new(),
205        }
206    }
207}
208
209/// TOML serialization format for types.lock
210#[derive(Serialize, Deserialize)]
211struct TypesLock {
212    version: u8,
213    #[serde(default)]
214    table: Vec<ObjectLock>,
215    #[serde(default)]
216    view: Vec<ObjectLock>,
217    #[serde(default, rename = "materialized-view")]
218    materialized_view: Vec<ObjectLock>,
219    #[serde(default)]
220    source: Vec<ObjectLock>,
221    #[serde(default)]
222    sink: Vec<ObjectLock>,
223    #[serde(default)]
224    secret: Vec<ObjectLock>,
225    #[serde(default)]
226    connection: Vec<ObjectLock>,
227}
228
229impl Default for TypesLock {
230    fn default() -> Self {
231        Self {
232            version: 1,
233            table: vec![],
234            view: vec![],
235            materialized_view: vec![],
236            source: vec![],
237            sink: vec![],
238            secret: vec![],
239            connection: vec![],
240        }
241    }
242}
243
244impl TypesLock {
245    /// Collect all objects paired with their kind into a vec.
246    fn all_objects(&self) -> Vec<(ObjectKind, &ObjectLock)> {
247        let mut result = Vec::new();
248        for obj in &self.table {
249            result.push((ObjectKind::Table, obj));
250        }
251        for obj in &self.view {
252            result.push((ObjectKind::View, obj));
253        }
254        for obj in &self.materialized_view {
255            result.push((ObjectKind::MaterializedView, obj));
256        }
257        for obj in &self.source {
258            result.push((ObjectKind::Source, obj));
259        }
260        for obj in &self.sink {
261            result.push((ObjectKind::Sink, obj));
262        }
263        for obj in &self.secret {
264            result.push((ObjectKind::Secret, obj));
265        }
266        for obj in &self.connection {
267            result.push((ObjectKind::Connection, obj));
268        }
269        result
270    }
271
272    /// Return a mutable reference to the vec for a given kind.
273    fn vec_for_kind(&mut self, kind: ObjectKind) -> &mut Vec<ObjectLock> {
274        match kind {
275            ObjectKind::Table => &mut self.table,
276            ObjectKind::View => &mut self.view,
277            ObjectKind::MaterializedView => &mut self.materialized_view,
278            ObjectKind::Source => &mut self.source,
279            ObjectKind::Sink => &mut self.sink,
280            ObjectKind::Secret => &mut self.secret,
281            ObjectKind::Connection => &mut self.connection,
282        }
283    }
284}
285
286#[derive(Serialize, Deserialize)]
287struct ObjectLock {
288    name: ObjectId,
289    #[serde(default, skip_serializing_if = "Option::is_none")]
290    comment: Option<String>,
291    columns: Vec<ColumnLock>,
292}
293
294#[derive(Serialize, Deserialize)]
295struct ColumnLock {
296    name: String,
297    #[serde(rename = "type")]
298    r#type: String,
299    nullable: bool,
300    #[serde(default, skip_serializing_if = "Option::is_none")]
301    comment: Option<String>,
302}
303
304impl From<&Types> for TypesLock {
305    fn from(types: &Types) -> Self {
306        let mut lock = TypesLock {
307            version: types.version,
308            table: Vec::new(),
309            view: Vec::new(),
310            materialized_view: Vec::new(),
311            source: Vec::new(),
312            sink: Vec::new(),
313            secret: Vec::new(),
314            connection: Vec::new(),
315        };
316
317        for (id, columns) in &types.tables {
318            let mut cols: Vec<_> = columns.iter().collect();
319            cols.sort_by_key(|(_, ct)| ct.position);
320            let cols: Vec<ColumnLock> = cols
321                .into_iter()
322                .map(|(col_name, col_type)| ColumnLock {
323                    name: col_name.clone(),
324                    r#type: col_type.r#type.clone(),
325                    nullable: col_type.nullable,
326                    comment: col_type.comment.clone(),
327                })
328                .collect();
329
330            let kind = types
331                .kinds
332                .get(id)
333                .unwrap_or_else(|| panic!("no kind for type {}", id.clone()));
334            let comment = types.comments.get(id).cloned();
335
336            let obj = ObjectLock {
337                name: id.clone(),
338                comment,
339                columns: cols,
340            };
341
342            lock.vec_for_kind(*kind).push(obj);
343        }
344
345        lock
346    }
347}
348
349impl From<TypesLock> for Types {
350    fn from(lock: TypesLock) -> Self {
351        let mut tables = BTreeMap::new();
352        let mut kinds = BTreeMap::new();
353        let mut comments = BTreeMap::new();
354
355        for (kind, obj) in lock.all_objects() {
356            let id = obj.name.clone();
357            let mut columns = BTreeMap::new();
358            for (position, col) in obj.columns.iter().enumerate() {
359                columns.insert(
360                    col.name.clone(),
361                    ColumnType {
362                        r#type: col.r#type.clone(),
363                        nullable: col.nullable,
364                        position,
365                        comment: col.comment.clone(),
366                    },
367                );
368            }
369            kinds.insert(id.clone(), kind);
370            if let Some(comment) = &obj.comment {
371                comments.insert(id.clone(), comment.clone());
372            }
373            tables.insert(id, columns);
374        }
375
376        Types {
377            version: lock.version,
378            tables,
379            kinds,
380            comments,
381        }
382    }
383}
384
385/// Escape a string for use as a TOML basic string value.
386fn escape_toml_string(s: &str) -> String {
387    let mut out = String::with_capacity(s.len());
388    for c in s.chars() {
389        match c {
390            '\\' => out.push_str("\\\\"),
391            '"' => out.push_str("\\\""),
392            '\n' => out.push_str("\\n"),
393            '\r' => out.push_str("\\r"),
394            '\t' => out.push_str("\\t"),
395            c if c.is_control() => {
396                out.push_str(&format!("\\u{:04X}", u32::from(c)));
397            }
398            c => out.push(c),
399        }
400    }
401    out
402}
403
404/// Hand-format a `TypesLock` as TOML with per-kind sections and inline columns.
405fn write_toml(lock: &TypesLock) -> String {
406    let mut out = String::new();
407    out.push_str("# This file is automatically @generated by mz-deploy.\n");
408    out.push_str("# It is not intended for manual editing.\n");
409    out.push_str(&format!("version = {}\n", lock.version));
410
411    let sections: &[(ObjectKind, &Vec<ObjectLock>)] = &[
412        (ObjectKind::Secret, &lock.secret),
413        (ObjectKind::Connection, &lock.connection),
414        (ObjectKind::Source, &lock.source),
415        (ObjectKind::Table, &lock.table),
416        (ObjectKind::View, &lock.view),
417        (ObjectKind::MaterializedView, &lock.materialized_view),
418        (ObjectKind::Sink, &lock.sink),
419    ];
420
421    for (kind, objs) in sections {
422        for obj in *objs {
423            out.push('\n');
424            out.push_str(&format!("[[{}]]\n", kind.as_str()));
425            out.push_str(&format!(
426                "name = \"{}\"\n",
427                escape_toml_string(&obj.name.to_string())
428            ));
429            if let Some(comment) = &obj.comment {
430                out.push_str(&format!("comment = \"{}\"\n", escape_toml_string(comment)));
431            }
432            out.push_str("columns = [\n");
433            for col in &obj.columns {
434                let mut parts = format!(
435                    "name = \"{}\", type = \"{}\", nullable = {}",
436                    escape_toml_string(&col.name),
437                    escape_toml_string(&col.r#type),
438                    col.nullable,
439                );
440                if let Some(comment) = &col.comment {
441                    parts.push_str(&format!(", comment = \"{}\"", escape_toml_string(comment)));
442                }
443                out.push_str(&format!("    {{ {} }},\n", parts));
444            }
445            out.push_str("]\n");
446        }
447    }
448
449    out
450}
451
452/// Load the types.lock file from the specified directory.
453/// Returns an error if the file doesn't exist or cannot be parsed.
454pub(crate) fn load_types_lock(directory: &Path) -> Result<Types, TypesError> {
455    let path = directory.join("types.lock");
456
457    let contents = fs::read_to_string(&path).map_err(|source| TypesError::FileReadFailed {
458        path: path.clone(),
459        source,
460    })?;
461
462    let lock: TypesLock =
463        toml::from_str(&contents).map_err(|source| TypesError::ParseFailed { path, source })?;
464    Ok(lock.into())
465}
466
467impl Types {
468    /// Write the types.lock file to the specified directory.
469    /// Overwrites any existing file at that location.
470    pub fn write_types_lock(&self, directory: &Path) -> Result<(), TypesError> {
471        let path = directory.join("types.lock");
472
473        let lock = TypesLock::from(self);
474        let contents = write_toml(&lock);
475
476        fs::write(&path, contents).map_err(|source| TypesError::FileWriteFailed { path, source })
477    }
478
479    /// Get the column schema for an object.
480    pub fn get_table(&self, id: &ObjectId) -> Option<&BTreeMap<String, ColumnType>> {
481        self.tables.get(id)
482    }
483
484    /// Get the object kind for an object.
485    ///
486    /// Returns `Table` if the id is not in the kinds map, which can happen
487    /// when `Types` is constructed programmatically (e.g., from `type_info`).
488    pub fn get_kind(&self, id: &ObjectId) -> ObjectKind {
489        self.kinds.get(id).copied().unwrap_or(ObjectKind::Table)
490    }
491}
492
493#[cfg(test)]
494mod tests {
495    use super::*;
496    use std::collections::BTreeMap;
497
498    #[mz_ore::test]
499    fn test_write_and_read_types_lock_round_trip() {
500        let mut tables = BTreeMap::new();
501
502        let mut order_cols = BTreeMap::new();
503        order_cols.insert(
504            "amount".to_string(),
505            ColumnType {
506                r#type: "numeric".to_string(),
507                nullable: true,
508                position: 0,
509                comment: None,
510            },
511        );
512        order_cols.insert(
513            "id".to_string(),
514            ColumnType {
515                r#type: "integer".to_string(),
516                nullable: false,
517                position: 1,
518                comment: None,
519            },
520        );
521        order_cols.insert(
522            "user_id".to_string(),
523            ColumnType {
524                r#type: "integer".to_string(),
525                nullable: true,
526                position: 2,
527                comment: None,
528            },
529        );
530        tables.insert("app.ingest.orders".parse::<ObjectId>().unwrap(), order_cols);
531
532        let mut user_cols = BTreeMap::new();
533        user_cols.insert(
534            "name".to_string(),
535            ColumnType {
536                r#type: "text".to_string(),
537                nullable: true,
538                position: 0,
539                comment: None,
540            },
541        );
542        user_cols.insert(
543            "user_id".to_string(),
544            ColumnType {
545                r#type: "integer".to_string(),
546                nullable: false,
547                position: 1,
548                comment: None,
549            },
550        );
551        tables.insert("app.ingest.users".parse::<ObjectId>().unwrap(), user_cols);
552
553        let mut kinds = BTreeMap::new();
554        kinds.insert(
555            "app.ingest.orders".parse::<ObjectId>().unwrap(),
556            ObjectKind::Table,
557        );
558        kinds.insert(
559            "app.ingest.users".parse::<ObjectId>().unwrap(),
560            ObjectKind::Table,
561        );
562
563        let types = Types {
564            version: 1,
565            tables,
566            kinds,
567            comments: BTreeMap::new(),
568        };
569
570        let dir = tempfile::tempdir().expect("failed to create temp dir");
571        types
572            .write_types_lock(dir.path())
573            .expect("failed to write types.lock");
574
575        let loaded = load_types_lock(dir.path()).expect("failed to load types.lock");
576        assert_eq!(types, loaded);
577    }
578
579    #[mz_ore::test]
580    fn test_round_trip_with_kind() {
581        let mut tables = BTreeMap::new();
582        let mut cols = BTreeMap::new();
583        cols.insert(
584            "id".to_string(),
585            ColumnType {
586                r#type: "integer".to_string(),
587                nullable: false,
588                position: 0,
589                comment: None,
590            },
591        );
592        tables.insert(
593            "app.ingest.orders".parse::<ObjectId>().unwrap(),
594            cols.clone(),
595        );
596        tables.insert(
597            "app.ingest.order_summary".parse::<ObjectId>().unwrap(),
598            cols,
599        );
600
601        let mut kinds = BTreeMap::new();
602        kinds.insert(
603            "app.ingest.orders".parse::<ObjectId>().unwrap(),
604            ObjectKind::Table,
605        );
606        kinds.insert(
607            "app.ingest.order_summary".parse::<ObjectId>().unwrap(),
608            ObjectKind::MaterializedView,
609        );
610
611        let types = Types {
612            version: 1,
613            tables,
614            kinds,
615            comments: BTreeMap::new(),
616        };
617
618        let dir = tempfile::tempdir().expect("failed to create temp dir");
619        types
620            .write_types_lock(dir.path())
621            .expect("failed to write types.lock");
622
623        let loaded = load_types_lock(dir.path()).expect("failed to load types.lock");
624        assert_eq!(types, loaded);
625    }
626
627    #[mz_ore::test]
628    fn test_round_trip_with_comments() {
629        let mut tables = BTreeMap::new();
630        let mut cols = BTreeMap::new();
631        cols.insert(
632            "id".to_string(),
633            ColumnType {
634                r#type: "integer".to_string(),
635                nullable: false,
636                position: 0,
637                comment: Some("Primary key".to_string()),
638            },
639        );
640        cols.insert(
641            "name".to_string(),
642            ColumnType {
643                r#type: "text".to_string(),
644                nullable: true,
645                position: 1,
646                comment: None,
647            },
648        );
649        tables.insert("app.ingest.orders".parse::<ObjectId>().unwrap(), cols);
650
651        let mut kinds = BTreeMap::new();
652        kinds.insert(
653            "app.ingest.orders".parse::<ObjectId>().unwrap(),
654            ObjectKind::Table,
655        );
656
657        let mut comments = BTreeMap::new();
658        comments.insert(
659            "app.ingest.orders".parse::<ObjectId>().unwrap(),
660            "All incoming customer orders".to_string(),
661        );
662
663        let types = Types {
664            version: 1,
665            tables,
666            kinds,
667            comments,
668        };
669
670        let dir = tempfile::tempdir().expect("failed to create temp dir");
671        types
672            .write_types_lock(dir.path())
673            .expect("failed to write types.lock");
674
675        let loaded = load_types_lock(dir.path()).expect("failed to load types.lock");
676        assert_eq!(types, loaded);
677    }
678
679    #[mz_ore::test]
680    fn test_backward_compat_no_comments() {
681        // A types.lock file without comment fields should parse successfully
682        let toml = r#"
683version = 1
684
685[[table]]
686name = "app.ingest.orders"
687columns = [
688    { name = "id", type = "integer", nullable = false },
689]
690"#;
691        let dir = tempfile::tempdir().expect("failed to create temp dir");
692        fs::write(dir.path().join("types.lock"), toml).unwrap();
693
694        let loaded = load_types_lock(dir.path()).expect("should parse without comments");
695        assert_eq!(loaded.tables.len(), 1);
696        assert!(loaded.comments.is_empty());
697        let cols = loaded
698            .tables
699            .get(&"app.ingest.orders".parse::<ObjectId>().unwrap())
700            .unwrap();
701        assert!(cols.get("id").unwrap().comment.is_none());
702    }
703}