mz_deploy/project.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Project compilation, graph assembly, and deployment analysis.
11//!
12//! This module defines the compile contract for a Materialize project rooted on
13//! disk. The result of compilation is an [`ir::graph::Project`].
14//!
15//! Compilation has two behavioral layers:
16//!
17//! 1. **Object compilation** — each logical object is discovered from source
18//! files, parsed, validated, and normalized independently. These object-local
19//! results are the unit of parallelism and the unit of persistent cache reuse.
20//! 2. **Graph assembly** — the current object set is assembled into a compiled
21//! project and then into a dependency-aware project graph, where cross-object
22//! constraints and deployment ordering are enforced.
23//!
24//! The project module is organized by compiler responsibility:
25//!
26//! - **`compiler`** — compile orchestration, object validation, incremental
27//! caching, and assembly
28//! - **`syntax`** — source-file discovery, parsed input structures, parser
29//! integration, profile variants, and variable substitution
30//! - **`resolve`** — name qualification, normalization, and lowering transforms
31//! - **`analysis`** — dependency extraction, topology, deployment snapshots,
32//! dirty propagation, and graph-wide validations
33//! - **`ir`** — semantic identifiers, compiled project IR, and dependency graph IR
34//!
35//! [`plan_sync()`] is the canonical synchronous compiler entrypoint. It uses the
36//! incremental compiler in [`compiler`] to reuse persisted object artifacts
37//! across invocations. [`plan()`] is an async wrapper that runs this compile
38//! contract on a blocking thread pool.
39//!
40//! The sibling modules in `analysis/` operate on the assembled project graph to
41//! answer deployment questions such as which objects changed, which downstream
42//! objects must be restaged, and whether runtime cluster rules are satisfied.
43
44use std::collections::{BTreeMap, BTreeSet};
45use std::path::{Path, PathBuf};
46
47pub(crate) mod analysis;
48pub(crate) mod ast;
49pub(crate) mod clusters;
50pub(crate) mod compiler;
51pub(crate) mod error;
52pub(crate) mod ir;
53pub(crate) mod network_policies;
54pub(crate) mod resolve;
55pub(crate) mod roles;
56pub(crate) mod syntax;
57
58// Re-export commonly used types
59pub(crate) use ir::graph::ModStatement;
60
61/// A `(database_name, schema_name)` pair identifying a schema within a project.
62#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize)]
63pub struct SchemaQualifier {
64 pub database: String,
65 pub schema: String,
66}
67
68impl SchemaQualifier {
69 pub fn new(database: String, schema: String) -> Self {
70 Self { database, schema }
71 }
72
73 /// Collect the distinct `(database, schema)` pairs from a slice of objects.
74 pub fn collect_from(objs: &[&ir::graph::DatabaseObject]) -> BTreeSet<Self> {
75 objs.iter()
76 .map(|obj| {
77 Self::new(
78 obj.id.expect_database().to_string(),
79 obj.id.schema().to_string(),
80 )
81 })
82 .collect()
83 }
84}
85
86/// Async wrapper around [`plan_sync`] that runs the CPU-bound compiler on a
87/// blocking thread pool.
88pub(crate) async fn plan(
89 root: PathBuf,
90 profile: Option<String>,
91 profile_suffix: Option<String>,
92 variables: BTreeMap<String, String>,
93 fs: crate::fs::FileSystem,
94) -> Result<ir::graph::Project, error::ProjectError> {
95 mz_ore::task::spawn_blocking(
96 || "project::plan",
97 move || {
98 plan_sync(
99 &fs,
100 root,
101 profile.as_deref(),
102 profile_suffix.as_deref(),
103 &variables,
104 )
105 },
106 )
107 .await
108}
109
110#[cfg(test)]
111mod plan_tests {
112 use super::*;
113
114 /// Overlay content replaces what's on disk: a project whose disk SQL would
115 /// fail to parse compiles cleanly when an overlay provides valid SQL for
116 /// the same file.
117 #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
118 #[mz_ore::test]
119 fn plan_sync_uses_overlay_content() {
120 let root = tempfile::tempdir().unwrap();
121 std::fs::write(
122 root.path().join("project.toml"),
123 "[project]\nname = \"t\"\n",
124 )
125 .unwrap();
126 let model_dir = root.path().join("models/mydb/public");
127 std::fs::create_dir_all(&model_dir).unwrap();
128 let sql_path = model_dir.join("foo.sql");
129 // Disk version is unparseable.
130 std::fs::write(&sql_path, "THIS IS NOT VALID SQL").unwrap();
131
132 // Without overlay, planning fails.
133 let fs = crate::fs::FileSystem::new();
134 assert!(
135 plan_sync(&fs, root.path(), None, None, &Default::default()).is_err(),
136 "disk-only plan should fail on unparseable SQL"
137 );
138
139 // With overlay supplying valid SQL for that path, planning succeeds.
140 let mut overlay = BTreeMap::new();
141 overlay.insert(sql_path, "CREATE VIEW foo AS SELECT 1 AS x;\n".to_string());
142 let fs = crate::fs::FileSystem::with_overlay(overlay);
143 let project = plan_sync(&fs, root.path(), None, None, &Default::default())
144 .expect("overlay should supply parseable SQL");
145
146 let id = ir::object_id::ObjectId::new(
147 "mydb".to_string(),
148 "public".to_string(),
149 "foo".to_string(),
150 );
151 assert!(
152 project.find_object(&id).is_some(),
153 "overlay-defined view should be present in planned project"
154 );
155 }
156
157 /// With a profile_suffix active, an object whose CREATE statement qualifies
158 /// itself with the unsuffixed directory name still validates: the user
159 /// writes the canonical database name in their SQL and the suffix is
160 /// applied during assembly. The compiled statement and its ObjectId both
161 /// carry the suffixed database name.
162 #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
163 #[mz_ore::test]
164 fn plan_sync_with_profile_suffix_compiles_unsuffixed_qualified_name() {
165 let root = tempfile::tempdir().unwrap();
166 std::fs::write(root.path().join("project.toml"), "").unwrap();
167 let schema_dir = root.path().join("models/mydb/public");
168 std::fs::create_dir_all(&schema_dir).unwrap();
169 // User writes the unsuffixed directory-derived database name.
170 std::fs::write(
171 schema_dir.join("foo.sql"),
172 "CREATE VIEW mydb.public.foo AS SELECT 1 AS x;\n",
173 )
174 .unwrap();
175
176 let fs = crate::fs::FileSystem::new();
177 let project = plan_sync(&fs, root.path(), None, Some("_dev"), &Default::default())
178 .expect("unsuffixed qualified name should validate when suffix is active");
179
180 let id = ir::object_id::ObjectId::new(
181 "mydb_dev".to_string(),
182 "public".to_string(),
183 "foo".to_string(),
184 );
185 let obj = project
186 .find_object(&id)
187 .expect("planned project should expose object under suffixed database");
188 let ident = obj.typed_object.stmt.ident();
189 assert_eq!(
190 ident
191 .database
192 .as_ref()
193 .map(mz_sql_parser::ast::Ident::as_str),
194 Some("mydb_dev"),
195 "compiled CREATE statement should carry the suffixed database name",
196 );
197 }
198
199 /// Cross-database references inside a view body that target another
200 /// project-owned database get rewritten to the suffixed form alongside the
201 /// owning database, so the resulting SQL is internally consistent.
202 #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
203 #[mz_ore::test]
204 fn plan_sync_with_profile_suffix_rewrites_cross_database_dependencies() {
205 let root = tempfile::tempdir().unwrap();
206 std::fs::write(root.path().join("project.toml"), "").unwrap();
207
208 // Base table in db_a.
209 let a_dir = root.path().join("models/db_a/public");
210 std::fs::create_dir_all(&a_dir).unwrap();
211 std::fs::write(
212 a_dir.join("base.sql"),
213 "CREATE TABLE db_a.public.base (id INT);\n",
214 )
215 .unwrap();
216
217 // View in db_b that references db_a.public.base by its unsuffixed name.
218 let b_dir = root.path().join("models/db_b/public");
219 std::fs::create_dir_all(&b_dir).unwrap();
220 std::fs::write(
221 b_dir.join("derived.sql"),
222 "CREATE VIEW db_b.public.derived AS SELECT id FROM db_a.public.base;\n",
223 )
224 .unwrap();
225
226 let fs = crate::fs::FileSystem::new();
227 let project = plan_sync(&fs, root.path(), None, Some("_dev"), &Default::default())
228 .expect("cross-database references should compile under a profile suffix");
229
230 let derived_id = ir::object_id::ObjectId::new(
231 "db_b_dev".to_string(),
232 "public".to_string(),
233 "derived".to_string(),
234 );
235 let derived = project
236 .find_object(&derived_id)
237 .expect("derived view should appear under the suffixed database");
238
239 // The dependency edge should target the *suffixed* db_a, not the
240 // raw name the user wrote in the SELECT body.
241 let base_id = ir::object_id::ObjectId::new(
242 "db_a_dev".to_string(),
243 "public".to_string(),
244 "base".to_string(),
245 );
246 assert!(
247 derived.dependencies.contains(&base_id),
248 "cross-database dependency should be rewritten to the suffixed name; \
249 got {:?}",
250 derived.dependencies,
251 );
252
253 // The serialized statement body should mention the suffixed reference,
254 // not the original one written by the user. (Catches regressions where
255 // the AST rewrite skips the body or the statement's own name.)
256 // The unsuffixed "db_a.public.base" cannot appear as a substring of
257 // "db_a_dev.public.base" — the `_dev` interrupts the match — so a plain
258 // `contains` check is enough to assert the original name is gone.
259 let serialized = derived.typed_object.stmt.to_string();
260 assert!(
261 serialized.contains("db_a_dev.public.base"),
262 "body should reference the suffixed db_a; got: {serialized}",
263 );
264 assert!(
265 !serialized.contains("db_a.public.base"),
266 "body should not retain the original db_a name; got: {serialized}",
267 );
268 assert!(
269 serialized.contains("db_b_dev.public.derived"),
270 "statement's own name should be suffixed; got: {serialized}",
271 );
272 }
273}
274
275/// Compile a project root into a planned deployment representation.
276///
277/// Behaviorally, this function:
278///
279/// - discovers project-owned objects and mod statements
280/// - reuses any valid persisted object artifacts for the active compile context
281/// - recompiles cache misses in parallel
282/// - assembles the current typed project and lowers it into a planned project
283///
284/// The returned plan is defined by the project sources, the active profile
285/// configuration, and the compile-time variable bindings. Cached artifacts may
286/// accelerate evaluation, but they do not change the result.
287pub(crate) fn plan_sync<P: AsRef<Path>>(
288 fs: &crate::fs::FileSystem,
289 root: P,
290 profile: Option<&str>,
291 profile_suffix: Option<&str>,
292 variables: &BTreeMap<String, String>,
293) -> Result<ir::graph::Project, error::ProjectError> {
294 compiler::compile_sync(fs, root, profile, profile_suffix, variables)
295}