Skip to main content

mz_deploy/
project.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Project compilation, graph assembly, and deployment analysis.
11//!
12//! This module defines the compile contract for a Materialize project rooted on
13//! disk. The result of compilation is an [`ir::graph::Project`].
14//!
15//! Compilation has two behavioral layers:
16//!
17//! 1. **Object compilation** — each logical object is discovered from source
18//!    files, parsed, validated, and normalized independently. These object-local
19//!    results are the unit of parallelism and the unit of persistent cache reuse.
20//! 2. **Graph assembly** — the current object set is assembled into a compiled
21//!    project and then into a dependency-aware project graph, where cross-object
22//!    constraints and deployment ordering are enforced.
23//!
24//! The project module is organized by compiler responsibility:
25//!
26//! - **`compiler`** — compile orchestration, object validation, incremental
27//!   caching, and assembly
28//! - **`syntax`** — source-file discovery, parsed input structures, parser
29//!   integration, profile variants, and variable substitution
30//! - **`resolve`** — name qualification, normalization, and lowering transforms
31//! - **`analysis`** — dependency extraction, topology, deployment snapshots,
32//!   dirty propagation, and graph-wide validations
33//! - **`ir`** — semantic identifiers, compiled project IR, and dependency graph IR
34//!
35//! [`plan_sync()`] is the canonical synchronous compiler entrypoint. It uses the
36//! incremental compiler in [`compiler`] to reuse persisted object artifacts
37//! across invocations. [`plan()`] is an async wrapper that runs this compile
38//! contract on a blocking thread pool.
39//!
40//! The sibling modules in `analysis/` operate on the assembled project graph to
41//! answer deployment questions such as which objects changed, which downstream
42//! objects must be restaged, and whether runtime cluster rules are satisfied.
43
44use std::collections::{BTreeMap, BTreeSet};
45use std::path::{Path, PathBuf};
46
47pub(crate) mod analysis;
48pub(crate) mod ast;
49pub(crate) mod clusters;
50pub(crate) mod compiler;
51pub(crate) mod error;
52pub(crate) mod ir;
53pub(crate) mod network_policies;
54pub(crate) mod resolve;
55pub(crate) mod roles;
56pub(crate) mod syntax;
57
58// Re-export commonly used types
59pub(crate) use ir::graph::ModStatement;
60
61/// A `(database_name, schema_name)` pair identifying a schema within a project.
62#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize)]
63pub struct SchemaQualifier {
64    pub database: String,
65    pub schema: String,
66}
67
68impl SchemaQualifier {
69    pub fn new(database: String, schema: String) -> Self {
70        Self { database, schema }
71    }
72
73    /// Collect the distinct `(database, schema)` pairs from a slice of objects.
74    pub fn collect_from(objs: &[&ir::graph::DatabaseObject]) -> BTreeSet<Self> {
75        objs.iter()
76            .map(|obj| {
77                Self::new(
78                    obj.id.expect_database().to_string(),
79                    obj.id.schema().to_string(),
80                )
81            })
82            .collect()
83    }
84}
85
86/// Async wrapper around [`plan_sync`] that runs the CPU-bound compiler on a
87/// blocking thread pool.
88pub(crate) async fn plan(
89    root: PathBuf,
90    profile: Option<String>,
91    profile_suffix: Option<String>,
92    variables: BTreeMap<String, String>,
93    fs: crate::fs::FileSystem,
94) -> Result<ir::graph::Project, error::ProjectError> {
95    mz_ore::task::spawn_blocking(
96        || "project::plan",
97        move || {
98            plan_sync(
99                &fs,
100                root,
101                profile.as_deref(),
102                profile_suffix.as_deref(),
103                &variables,
104            )
105        },
106    )
107    .await
108}
109
110#[cfg(test)]
111mod plan_tests {
112    use super::*;
113
114    /// Overlay content replaces what's on disk: a project whose disk SQL would
115    /// fail to parse compiles cleanly when an overlay provides valid SQL for
116    /// the same file.
117    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
118    #[mz_ore::test]
119    fn plan_sync_uses_overlay_content() {
120        let root = tempfile::tempdir().unwrap();
121        std::fs::write(
122            root.path().join("project.toml"),
123            "[project]\nname = \"t\"\n",
124        )
125        .unwrap();
126        let model_dir = root.path().join("models/mydb/public");
127        std::fs::create_dir_all(&model_dir).unwrap();
128        let sql_path = model_dir.join("foo.sql");
129        // Disk version is unparseable.
130        std::fs::write(&sql_path, "THIS IS NOT VALID SQL").unwrap();
131
132        // Without overlay, planning fails.
133        let fs = crate::fs::FileSystem::new();
134        assert!(
135            plan_sync(&fs, root.path(), None, None, &Default::default()).is_err(),
136            "disk-only plan should fail on unparseable SQL"
137        );
138
139        // With overlay supplying valid SQL for that path, planning succeeds.
140        let mut overlay = BTreeMap::new();
141        overlay.insert(sql_path, "CREATE VIEW foo AS SELECT 1 AS x;\n".to_string());
142        let fs = crate::fs::FileSystem::with_overlay(overlay);
143        let project = plan_sync(&fs, root.path(), None, None, &Default::default())
144            .expect("overlay should supply parseable SQL");
145
146        let id = ir::object_id::ObjectId::new(
147            "mydb".to_string(),
148            "public".to_string(),
149            "foo".to_string(),
150        );
151        assert!(
152            project.find_object(&id).is_some(),
153            "overlay-defined view should be present in planned project"
154        );
155    }
156
157    /// With a profile_suffix active, an object whose CREATE statement qualifies
158    /// itself with the unsuffixed directory name still validates: the user
159    /// writes the canonical database name in their SQL and the suffix is
160    /// applied during assembly. The compiled statement and its ObjectId both
161    /// carry the suffixed database name.
162    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
163    #[mz_ore::test]
164    fn plan_sync_with_profile_suffix_compiles_unsuffixed_qualified_name() {
165        let root = tempfile::tempdir().unwrap();
166        std::fs::write(root.path().join("project.toml"), "").unwrap();
167        let schema_dir = root.path().join("models/mydb/public");
168        std::fs::create_dir_all(&schema_dir).unwrap();
169        // User writes the unsuffixed directory-derived database name.
170        std::fs::write(
171            schema_dir.join("foo.sql"),
172            "CREATE VIEW mydb.public.foo AS SELECT 1 AS x;\n",
173        )
174        .unwrap();
175
176        let fs = crate::fs::FileSystem::new();
177        let project = plan_sync(&fs, root.path(), None, Some("_dev"), &Default::default())
178            .expect("unsuffixed qualified name should validate when suffix is active");
179
180        let id = ir::object_id::ObjectId::new(
181            "mydb_dev".to_string(),
182            "public".to_string(),
183            "foo".to_string(),
184        );
185        let obj = project
186            .find_object(&id)
187            .expect("planned project should expose object under suffixed database");
188        let ident = obj.typed_object.stmt.ident();
189        assert_eq!(
190            ident
191                .database
192                .as_ref()
193                .map(mz_sql_parser::ast::Ident::as_str),
194            Some("mydb_dev"),
195            "compiled CREATE statement should carry the suffixed database name",
196        );
197    }
198
199    /// Cross-database references inside a view body that target another
200    /// project-owned database get rewritten to the suffixed form alongside the
201    /// owning database, so the resulting SQL is internally consistent.
202    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
203    #[mz_ore::test]
204    fn plan_sync_with_profile_suffix_rewrites_cross_database_dependencies() {
205        let root = tempfile::tempdir().unwrap();
206        std::fs::write(root.path().join("project.toml"), "").unwrap();
207
208        // Base table in db_a.
209        let a_dir = root.path().join("models/db_a/public");
210        std::fs::create_dir_all(&a_dir).unwrap();
211        std::fs::write(
212            a_dir.join("base.sql"),
213            "CREATE TABLE db_a.public.base (id INT);\n",
214        )
215        .unwrap();
216
217        // View in db_b that references db_a.public.base by its unsuffixed name.
218        let b_dir = root.path().join("models/db_b/public");
219        std::fs::create_dir_all(&b_dir).unwrap();
220        std::fs::write(
221            b_dir.join("derived.sql"),
222            "CREATE VIEW db_b.public.derived AS SELECT id FROM db_a.public.base;\n",
223        )
224        .unwrap();
225
226        let fs = crate::fs::FileSystem::new();
227        let project = plan_sync(&fs, root.path(), None, Some("_dev"), &Default::default())
228            .expect("cross-database references should compile under a profile suffix");
229
230        let derived_id = ir::object_id::ObjectId::new(
231            "db_b_dev".to_string(),
232            "public".to_string(),
233            "derived".to_string(),
234        );
235        let derived = project
236            .find_object(&derived_id)
237            .expect("derived view should appear under the suffixed database");
238
239        // The dependency edge should target the *suffixed* db_a, not the
240        // raw name the user wrote in the SELECT body.
241        let base_id = ir::object_id::ObjectId::new(
242            "db_a_dev".to_string(),
243            "public".to_string(),
244            "base".to_string(),
245        );
246        assert!(
247            derived.dependencies.contains(&base_id),
248            "cross-database dependency should be rewritten to the suffixed name; \
249             got {:?}",
250            derived.dependencies,
251        );
252
253        // The serialized statement body should mention the suffixed reference,
254        // not the original one written by the user. (Catches regressions where
255        // the AST rewrite skips the body or the statement's own name.)
256        // The unsuffixed "db_a.public.base" cannot appear as a substring of
257        // "db_a_dev.public.base" — the `_dev` interrupts the match — so a plain
258        // `contains` check is enough to assert the original name is gone.
259        let serialized = derived.typed_object.stmt.to_string();
260        assert!(
261            serialized.contains("db_a_dev.public.base"),
262            "body should reference the suffixed db_a; got: {serialized}",
263        );
264        assert!(
265            !serialized.contains("db_a.public.base"),
266            "body should not retain the original db_a name; got: {serialized}",
267        );
268        assert!(
269            serialized.contains("db_b_dev.public.derived"),
270            "statement's own name should be suffixed; got: {serialized}",
271        );
272    }
273}
274
275/// Compile a project root into a planned deployment representation.
276///
277/// Behaviorally, this function:
278///
279/// - discovers project-owned objects and mod statements
280/// - reuses any valid persisted object artifacts for the active compile context
281/// - recompiles cache misses in parallel
282/// - assembles the current typed project and lowers it into a planned project
283///
284/// The returned plan is defined by the project sources, the active profile
285/// configuration, and the compile-time variable bindings. Cached artifacts may
286/// accelerate evaluation, but they do not change the result.
287pub(crate) fn plan_sync<P: AsRef<Path>>(
288    fs: &crate::fs::FileSystem,
289    root: P,
290    profile: Option<&str>,
291    profile_suffix: Option<&str>,
292    variables: &BTreeMap<String, String>,
293) -> Result<ir::graph::Project, error::ProjectError> {
294    compiler::compile_sync(fs, root, profile, profile_suffix, variables)
295}