mz_deploy/project/ir/graph.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Dependency-aware project graph.
11//!
12//! [`Project`] is the final output of compilation — the result type of
13//! [`compile_sync`](crate::project::compiler::compile_sync). It combines a
14//! hierarchical view of the project with a flat dependency graph:
15//!
16//! - **`databases`** — the `database > schema > object` hierarchy used for
17//! iteration, deployment ordering, and module-statement execution.
18//! - **`dependency_graph`** — a flat adjacency list (`ObjectId → {ObjectId}`)
19//! used for topological sort, change propagation, and reverse-dependency
20//! lookups.
21//! - **`external_dependencies`** — objects referenced by the project but not
22//! defined in it (e.g., pre-existing sources). These appear as values in
23//! `dependency_graph` entries but are excluded from topological sorts and
24//! deployment.
25//! - **`cluster_dependencies`** and **`replacement_schemas`** — deployment
26//! metadata extracted during graph assembly.
27//!
28//! **Invariant:** every `ObjectId` reachable through `databases` has an entry
29//! in `dependency_graph`. External dependencies appear only in dependency-set
30//! values and in the `external_dependencies` set.
31
32use super::super::ast::Cluster;
33use crate::project::SchemaQualifier;
34use crate::project::ir::compiled;
35use crate::project::ir::object_id::ObjectId;
36use mz_sql_parser::ast::*;
37use std::collections::{BTreeMap, BTreeSet};
38use std::fmt::Display;
39use std::str::FromStr;
40
41/// A database object with its dependencies.
42#[derive(Debug)]
43pub struct DatabaseObject {
44 /// The object identifier
45 pub id: ObjectId,
46 /// The validated compiled object
47 pub typed_object: compiled::DatabaseObject,
48 /// Set of objects this object depends on
49 pub dependencies: BTreeSet<ObjectId>,
50}
51
52/// A module-level statement with context about where it should be executed.
53///
54/// Module statements are executed before object statements and come from
55/// database.sql or schema.sql files. They're used for setup like grants,
56/// comments, and other database/schema-level configuration.
57#[derive(Debug)]
58pub enum ModStatement<'a> {
59 /// Database-level statement (from database.sql file)
60 Database {
61 /// The database name
62 database: &'a str,
63 /// The statement to execute
64 statement: &'a Statement<Raw>,
65 },
66 /// Schema-level statement (from schema.sql file)
67 Schema {
68 /// The database name
69 database: &'a str,
70 /// The schema name
71 schema: &'a str,
72 /// The statement to execute
73 statement: &'a Statement<Raw>,
74 },
75}
76
77/// The type of objects contained in a schema.
78///
79/// Schemas are segregated by object type to prevent accidental recreation:
80/// - Storage schemas contain tables, sinks, and tables from sources
81/// - Compute schemas contain views and materialized views
82/// - Empty schemas contain no objects
83#[derive(Debug, Clone, Copy, PartialEq, Eq)]
84pub enum SchemaType {
85 /// Schema contains storage objects (tables, sinks)
86 Storage,
87 /// Schema contains computation objects (views, materialized views)
88 Compute,
89 /// Schema contains no objects
90 Empty,
91}
92
93impl Display for SchemaType {
94 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95 match self {
96 SchemaType::Storage => write!(f, "storage"),
97 SchemaType::Compute => write!(f, "compute"),
98 SchemaType::Empty => write!(f, "empty"),
99 }
100 }
101}
102
103impl FromStr for SchemaType {
104 type Err = String;
105
106 fn from_str(s: &str) -> Result<Self, Self::Err> {
107 match s.to_lowercase().as_str() {
108 "storage" => Ok(SchemaType::Storage),
109 "compute" => Ok(SchemaType::Compute),
110 "empty" => Ok(SchemaType::Empty),
111 _ => Err(format!("unknown schema type {}", s)),
112 }
113 }
114}
115
116/// A schema containing objects with dependency information.
117#[derive(Debug)]
118pub struct Schema {
119 pub name: String,
120 pub objects: Vec<DatabaseObject>,
121 /// Optional module-level statements (from schema.sql file)
122 pub mod_statements: Option<Vec<Statement<Raw>>>,
123 /// The type of objects in this schema (Storage, Compute, or Empty)
124 pub schema_type: SchemaType,
125}
126
127/// A database containing schemas with dependency information.
128#[derive(Debug)]
129pub struct Database {
130 pub name: String,
131 pub schemas: Vec<Schema>,
132 /// Optional module-level statements (from database.sql file)
133 pub mod_statements: Option<Vec<Statement<Raw>>>,
134}
135
136/// A project graph with full dependency tracking.
137#[derive(Debug)]
138pub struct Project {
139 pub databases: Vec<Database>,
140 /// Global dependency graph: object_id -> set of dependencies
141 pub dependency_graph: BTreeMap<ObjectId, BTreeSet<ObjectId>>,
142 /// External dependencies: objects referenced but not defined in this project
143 pub external_dependencies: BTreeSet<ObjectId>,
144 /// Cluster dependencies: clusters referenced by indexes and materialized views
145 pub cluster_dependencies: BTreeSet<Cluster>,
146 /// Unit tests defined in the project, organized by the object they test
147 pub tests: Vec<(ObjectId, crate::project::ir::unit_test::UnitTest)>,
148 /// Schemas that use replacement materialized views, derived from
149 /// `SET api = stable` statements.
150 pub replacement_schemas: BTreeSet<SchemaQualifier>,
151 /// Objects whose compiled artifact was a cache miss in this run.
152 /// Drives incremental typechecking: a miss means the file content
153 /// changed (or never compiled before), so the object's typecheck
154 /// result must be recomputed.
155 pub compile_dirty: BTreeSet<ObjectId>,
156}