Skip to main content

mz_deploy/project/ir/
compiled.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Compiled project IR.
11//!
12//! This module contains the validated semantic structures produced by object
13//! compilation and project assembly — the layer between raw parsed SQL and
14//! the dependency-aware graph in [`ir::graph`](super::graph).
15//!
16//! - [`DatabaseObject`] is a single `.sql` file after validation and name
17//!   normalization: one primary CREATE statement plus its indexes,
18//!   grants, comments, and tests. All identifiers are fully qualified.
19//! - [`Project`] groups validated objects by `(database, schema)` with
20//!   module-level statements, but has **no dependency graph**. Dependency
21//!   extraction happens downstream in [`analysis::deps`](crate::project::analysis::deps),
22//!   which produces the final [`graph::Project`](super::graph::Project).
23
24use super::super::ast::Statement;
25use crate::project::SchemaQualifier;
26use crate::project::error::ValidationError;
27use crate::project::error::ValidationErrorKind;
28use crate::project::ir::object_id::ObjectId;
29use crate::project::resolve::normalize::{ClusterTransformer, NameTransformer, NormalizingVisitor};
30use mz_sql_parser::ast::*;
31use std::collections::{BTreeMap, BTreeSet};
32use std::path::PathBuf;
33
34/// Fully qualified name parsed from file path structure.
35///
36/// Represents the canonical `database.schema.object` name based on directory structure.
37/// File path format: `<root>/<database>/<schema>/<object>.sql`
38///
39/// This struct is created during object validation and is used to:
40/// - Normalize statement names to be fully qualified
41/// - Validate that SQL statement names match the directory structure
42/// - Provide a consistent FQN for error messages and validation
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub struct FullyQualifiedName {
45    id: ObjectId,
46    pub path: PathBuf,
47    item_name: UnresolvedItemName,
48}
49
50impl FullyQualifiedName {
51    /// Get the database name.
52    ///
53    /// `FullyQualifiedName` is constructed only for project-internal user
54    /// objects, so the database is always present.
55    #[inline]
56    pub fn database(&self) -> &str {
57        self.id.expect_database()
58    }
59
60    /// Get the schema name.
61    #[inline]
62    pub fn schema(&self) -> &str {
63        self.id.schema()
64    }
65
66    /// Get the object name.
67    #[inline]
68    pub fn object(&self) -> &str {
69        self.id.object()
70    }
71
72    /// Get the ObjectId.
73    pub fn object_id(&self) -> &ObjectId {
74        &self.id
75    }
76
77    /// Get the UnresolvedItemName for updating statement names.
78    pub fn to_item_name(&self) -> UnresolvedItemName {
79        self.item_name.clone()
80    }
81
82    /// Create a FullyQualifiedName with explicit database and schema names.
83    ///
84    /// Unlike `TryFrom<(&Path, &str)>` which derives names from the file path,
85    /// this constructor accepts the names directly. This is needed when a suffix
86    /// has been applied to the database name, so the path-derived name no longer
87    /// matches the desired database name.
88    pub fn with_names(
89        path: &std::path::Path,
90        object_name: &str,
91        database: &str,
92        schema: &str,
93    ) -> Result<Self, ValidationError> {
94        let database_ident = Ident::new(database).map_err(|e| {
95            ValidationError::with_file(
96                ValidationErrorKind::InvalidIdentifier {
97                    name: database.to_string(),
98                    reason: e.to_string(),
99                },
100                path.to_path_buf(),
101            )
102        })?;
103
104        let schema_ident = Ident::new(schema).map_err(|e| {
105            ValidationError::with_file(
106                ValidationErrorKind::InvalidIdentifier {
107                    name: schema.to_string(),
108                    reason: e.to_string(),
109                },
110                path.to_path_buf(),
111            )
112        })?;
113
114        let object_ident = Ident::new(object_name).map_err(|e| {
115            ValidationError::with_file(
116                ValidationErrorKind::InvalidIdentifier {
117                    name: object_name.to_string(),
118                    reason: e.to_string(),
119                },
120                path.to_path_buf(),
121            )
122        })?;
123
124        let item_name = UnresolvedItemName(vec![database_ident, schema_ident, object_ident]);
125
126        let id = ObjectId::new(
127            database.to_string(),
128            schema.to_string(),
129            object_name.to_string(),
130        );
131
132        Ok(FullyQualifiedName {
133            id,
134            path: path.to_path_buf(),
135            item_name,
136        })
137    }
138}
139
140impl TryFrom<UnresolvedItemName> for FullyQualifiedName {
141    type Error = &'static str;
142
143    fn try_from(value: UnresolvedItemName) -> Result<Self, Self::Error> {
144        if value.0.len() < 3 {
145            return Err("fully qualified names require database, schema, and object parts");
146        }
147        let id = ObjectId::new(
148            value.0[0].to_string(),
149            value.0[1].to_string(),
150            value.0[2].to_string(),
151        );
152        Ok(Self {
153            id,
154            path: PathBuf::new(),
155            item_name: value,
156        })
157    }
158}
159
160impl TryFrom<(&std::path::Path, &str)> for FullyQualifiedName {
161    type Error = ValidationError;
162
163    /// Extract fully qualified name from file path.
164    ///
165    /// Path format: `<root>/<database>/<schema>/<object>.sql`
166    /// Returns error if path structure is invalid.
167    fn try_from(value: (&std::path::Path, &str)) -> Result<Self, Self::Error> {
168        let (path, object_name) = value;
169
170        // Extract schema (parent directory)
171        let schema = path
172            .parent()
173            .and_then(|p| p.file_name())
174            .and_then(|s| s.to_str())
175            .ok_or_else(|| {
176                ValidationError::with_file(
177                    ValidationErrorKind::SchemaExtractionFailed,
178                    path.to_path_buf(),
179                )
180            })?;
181
182        // Extract database (parent of schema directory)
183        let database = path
184            .parent()
185            .and_then(|p| p.parent())
186            .and_then(|p| p.file_name())
187            .and_then(|s| s.to_str())
188            .ok_or_else(|| {
189                ValidationError::with_file(
190                    ValidationErrorKind::DatabaseExtractionFailed,
191                    path.to_path_buf(),
192                )
193            })?;
194
195        // Create Ident instances for each component
196        let database_ident = Ident::new(database).map_err(|e| {
197            ValidationError::with_file(
198                ValidationErrorKind::InvalidIdentifier {
199                    name: database.to_string(),
200                    reason: e.to_string(),
201                },
202                path.to_path_buf(),
203            )
204        })?;
205
206        let schema_ident = Ident::new(schema).map_err(|e| {
207            ValidationError::with_file(
208                ValidationErrorKind::InvalidIdentifier {
209                    name: schema.to_string(),
210                    reason: e.to_string(),
211                },
212                path.to_path_buf(),
213            )
214        })?;
215
216        let object_ident = Ident::new(object_name).map_err(|e| {
217            ValidationError::with_file(
218                ValidationErrorKind::InvalidIdentifier {
219                    name: object_name.to_string(),
220                    reason: e.to_string(),
221                },
222                path.to_path_buf(),
223            )
224        })?;
225
226        // Create the UnresolvedItemName
227        let item_name = UnresolvedItemName(vec![database_ident, schema_ident, object_ident]);
228
229        // Create ObjectId
230        let id = ObjectId::new(
231            database.to_string(),
232            schema.to_string(),
233            object_name.to_string(),
234        );
235
236        Ok(FullyQualifiedName {
237            id,
238            path: path.to_path_buf(),
239            item_name,
240        })
241    }
242}
243
244impl From<ObjectId> for FullyQualifiedName {
245    fn from(id: ObjectId) -> Self {
246        let item_name = UnresolvedItemName(vec![
247            Ident::new(id.expect_database()).expect("validated database identifier"),
248            Ident::new(id.schema()).expect("validated schema identifier"),
249            Ident::new(id.object()).expect("validated object identifier"),
250        ]);
251        Self {
252            id,
253            path: PathBuf::new(),
254            item_name,
255        }
256    }
257}
258
259/// The primary CREATE statement for a database object.
260impl Statement {
261    /// Normalizes the statement name to be fully qualified.
262    pub fn normalize_stmt(self, fqn: &FullyQualifiedName) -> Self {
263        let mut visitor = NormalizingVisitor::fully_qualifying(fqn);
264        self.normalize_name_with(&visitor, &fqn.to_item_name())
265            .normalize_dependencies_with(&mut visitor)
266    }
267
268    /// Normalizes the statement name using a custom transformer.
269    pub fn normalize_name_with<T: NameTransformer>(
270        self,
271        visitor: &NormalizingVisitor<T>,
272        item_name: &UnresolvedItemName,
273    ) -> Self {
274        let transformed_name = visitor.transformer().transform_own_name(item_name);
275
276        match self {
277            Statement::CreateSink(mut s) => {
278                s.name = Some(transformed_name);
279                Statement::CreateSink(s)
280            }
281            Statement::CreateView(mut s) => {
282                s.definition.name = transformed_name;
283                Statement::CreateView(s)
284            }
285            Statement::CreateMaterializedView(mut s) => {
286                s.name = transformed_name;
287                Statement::CreateMaterializedView(s)
288            }
289            Statement::CreateTable(mut s) => {
290                s.name = transformed_name;
291                Statement::CreateTable(s)
292            }
293            Statement::CreateTableFromSource(mut s) => {
294                s.name = transformed_name;
295                Statement::CreateTableFromSource(s)
296            }
297            Statement::CreateSource(mut s) => {
298                s.name = transformed_name;
299                Statement::CreateSource(s)
300            }
301            Statement::CreateSecret(mut s) => {
302                s.name = transformed_name;
303                Statement::CreateSecret(s)
304            }
305            Statement::CreateConnection(mut s) => {
306                s.name = transformed_name;
307                Statement::CreateConnection(s)
308            }
309        }
310    }
311
312    /// Normalizes all object references within the statement using a custom transformer.
313    pub fn normalize_dependencies_with<T: NameTransformer>(
314        self,
315        visitor: &mut NormalizingVisitor<T>,
316    ) -> Self {
317        match self {
318            Statement::CreateView(mut s) => {
319                visitor.normalize_query(&mut s.definition.query);
320                Statement::CreateView(s)
321            }
322            Statement::CreateMaterializedView(mut s) => {
323                visitor.normalize_query(&mut s.query);
324                Statement::CreateMaterializedView(s)
325            }
326            Statement::CreateTableFromSource(mut s) => {
327                visitor.normalize_raw_item_name(&mut s.source);
328                Statement::CreateTableFromSource(s)
329            }
330            Statement::CreateSink(mut s) => {
331                visitor.normalize_raw_item_name(&mut s.from);
332                visitor.normalize_sink_connection(&mut s.connection);
333                Statement::CreateSink(s)
334            }
335            Statement::CreateConnection(mut s) => {
336                visitor.normalize_connection_options(&mut s.values);
337                Statement::CreateConnection(s)
338            }
339            Statement::CreateSource(mut s) => {
340                visitor.normalize_source_connection(&mut s.connection);
341                Statement::CreateSource(s)
342            }
343            // These statements don't have dependencies on other database objects
344            Statement::CreateTable(_) | Statement::CreateSecret(_) => self,
345        }
346    }
347
348    /// Normalize cluster references using a ClusterTransformer.
349    ///
350    /// This method is separate from normalize_dependencies_with because cluster
351    /// normalization is only needed for staging environments, not regular deployments.
352    pub fn normalize_cluster_with<T: ClusterTransformer>(
353        self,
354        visitor: &NormalizingVisitor<T>,
355    ) -> Self {
356        match self {
357            Statement::CreateMaterializedView(mut s) => {
358                visitor.normalize_cluster_name(&mut s.in_cluster);
359                Statement::CreateMaterializedView(s)
360            }
361            Statement::CreateSink(mut s) => {
362                visitor.normalize_cluster_name(&mut s.in_cluster);
363                Statement::CreateSink(s)
364            }
365            Statement::CreateSource(mut s) => {
366                visitor.normalize_cluster_name(&mut s.in_cluster);
367                Statement::CreateSource(s)
368            }
369            // These statements don't have cluster references
370            Statement::CreateView(_)
371            | Statement::CreateTable(_)
372            | Statement::CreateTableFromSource(_)
373            | Statement::CreateSecret(_)
374            | Statement::CreateConnection(_) => self,
375        }
376    }
377}
378
379/// A validated database object with its primary statement and supporting declarations.
380///
381/// Represents a single database object (table, view, source, etc.) that has been
382/// validated to ensure:
383/// - Exactly one primary CREATE statement exists
384/// - The object name matches the file name
385/// - All supporting statements (indexes, grants, comments) reference this object
386/// - Object types are consistent across statements
387///
388/// # Structure
389///
390/// Each `DatabaseObject` is loaded from a single `.sql` file and contains:
391/// - One primary statement (CREATE TABLE, CREATE VIEW, etc.)
392/// - Zero or more CREATE INDEX statements (for indexable objects)
393/// - Zero or more GRANT statements
394/// - Zero or more COMMENT statements
395///
396/// # Example
397///
398/// For a file `my_schema/users.sql`:
399/// ```sql
400/// CREATE TABLE users (
401///     id INT,
402///     name TEXT
403/// );
404///
405/// CREATE INDEX users_id_idx ON users (id);
406/// GRANT SELECT ON users TO analyst_role;
407/// COMMENT ON TABLE users IS 'User account information';
408/// ```
409///
410/// This would be validated and represented as a single `DatabaseObject`.
411#[derive(Debug, Clone)]
412pub struct DatabaseObject {
413    /// Path to the source `.sql` file that defined this object.
414    ///
415    /// Carried from [`input::ObjectVariant::path`](crate::project::syntax::input::ObjectVariant)
416    /// through validation so downstream consumers (LSP, diagnostics) can cite
417    /// the source file without reconstructing it from the object's name.
418    pub path: PathBuf,
419    /// The primary CREATE statement for this object
420    pub stmt: Statement,
421    /// Indexes defined on this object
422    pub indexes: Vec<CreateIndexStatement<Raw>>,
423    /// Grant statements for this object
424    pub grants: Vec<GrantPrivilegesStatement<Raw>>,
425    /// Comment statements for this object or its columns
426    pub comments: Vec<CommentStatement<Raw>>,
427    /// Unit tests for this object
428    pub tests: Vec<ExecuteUnitTestStatement<Raw>>,
429}
430
431impl DatabaseObject {
432    pub fn clusters(&self) -> BTreeSet<String> {
433        let mut cluster_set = BTreeSet::new();
434
435        let in_cluster = match &self.stmt {
436            Statement::CreateMaterializedView(mv) => mv.in_cluster.as_ref(),
437            Statement::CreateSink(sink) => sink.in_cluster.as_ref(),
438            Statement::CreateSource(source) => source.in_cluster.as_ref(),
439            Statement::CreateView(_)
440            | Statement::CreateTable(_)
441            | Statement::CreateTableFromSource(_)
442            | Statement::CreateSecret(_)
443            | Statement::CreateConnection(_) => None,
444        };
445        if let Some(RawClusterName::Unresolved(cluster_name)) = in_cluster {
446            cluster_set.insert(cluster_name.to_string());
447        }
448
449        for index in &self.indexes {
450            if let Some(RawClusterName::Unresolved(cluster_name)) = &index.in_cluster {
451                cluster_set.insert(cluster_name.to_string());
452            }
453        }
454        cluster_set
455    }
456
457    /// Convert the statement to a `Query<Raw>` for type checking purposes.
458    pub fn to_query(&self) -> Option<Query<Raw>> {
459        match &self.stmt {
460            Statement::CreateView(stmt) => Some(stmt.definition.query.clone()),
461            Statement::CreateMaterializedView(stmt) => Some(stmt.query.clone()),
462            Statement::CreateTable(_)
463            | Statement::CreateSecret(_)
464            | Statement::CreateConnection(_)
465            | Statement::CreateTableFromSource(_)
466            | Statement::CreateSink(_)
467            | Statement::CreateSource(_) => None,
468        }
469    }
470
471    /// Rewrite cluster references using the given cluster name map.
472    ///
473    /// For any `RawClusterName::Unresolved(ident)` where the ident matches a key
474    /// in the map, replace it with the suffixed name. This applies to `IN CLUSTER`
475    /// clauses in the main statement and in index definitions.
476    pub fn rewrite_cluster_references(&mut self, cluster_map: &BTreeMap<String, String>) {
477        // Rewrite IN CLUSTER on the main statement
478        match &mut self.stmt {
479            Statement::CreateMaterializedView(s) => {
480                rewrite_in_cluster(&mut s.in_cluster, cluster_map);
481            }
482            Statement::CreateSink(s) => {
483                rewrite_in_cluster(&mut s.in_cluster, cluster_map);
484            }
485            Statement::CreateSource(s) => {
486                rewrite_in_cluster(&mut s.in_cluster, cluster_map);
487            }
488            Statement::CreateView(_)
489            | Statement::CreateTable(_)
490            | Statement::CreateTableFromSource(_)
491            | Statement::CreateSecret(_)
492            | Statement::CreateConnection(_) => {}
493        }
494
495        // Rewrite IN CLUSTER on indexes
496        for index in &mut self.indexes {
497            rewrite_in_cluster(&mut index.in_cluster, cluster_map);
498        }
499    }
500
501    /// Rewrite cross-database references using the given database name map.
502    ///
503    /// For any 3-part `UnresolvedItemName` where the database part matches an
504    /// original name in the map, replace it with the suffixed name. This
505    /// includes the statement's own name (which after per-object validation
506    /// carries the original directory-derived database), supporting statements
507    /// (indexes, grants, comments), and all dependency references in the body.
508    /// External databases (not in the map) are untouched.
509    pub fn rewrite_database_references(&mut self, db_map: &BTreeMap<String, String>) {
510        let ident = self.stmt.ident();
511        let database = ident
512            .database
513            .clone()
514            .unwrap_or_else(|| Ident::new_unchecked("unknown"));
515        let schema = ident
516            .schema
517            .clone()
518            .unwrap_or_else(|| Ident::new_unchecked("unknown"));
519        let own_name = UnresolvedItemName(vec![database, schema, ident.object.clone()]);
520        let fqn = FullyQualifiedName::try_from(own_name.clone())
521            .expect("database, schema, and object are always present");
522        let mut visitor = NormalizingVisitor::fully_qualifying_with_db_map(&fqn, Some(db_map));
523        self.stmt = self
524            .stmt
525            .clone()
526            .normalize_name_with(&visitor, &own_name)
527            .normalize_dependencies_with(&mut visitor);
528
529        visitor.normalize_index_references(&mut self.indexes);
530        visitor.normalize_grant_references(&mut self.grants);
531        visitor.normalize_comment_references(&mut self.comments);
532    }
533}
534
535/// A validated schema containing multiple database objects.
536///
537/// Represents a schema directory in the project structure. Each schema contains
538/// multiple database objects (tables, views, sources, etc.) that have all been
539/// validated.
540///
541/// # Directory Mapping
542///
543/// A schema corresponds to a directory in the project structure:
544/// ```text
545/// database_name/
546///   schema_name/        <- Schema
547///     object1.sql       <- DatabaseObject
548///     object2.sql       <- DatabaseObject
549///     mod.sql           (optional, not represented in HIR)
550/// ```
551///
552/// Note: `mod.sql` files are parsed as source inputs and carried here as
553/// schema-level setup statements.
554#[derive(Debug)]
555pub struct Schema {
556    /// The name of the schema (directory name)
557    pub name: String,
558    /// All validated database objects in this schema
559    pub objects: Vec<DatabaseObject>,
560    /// Optional module-level statements (from schema.sql file)
561    pub mod_statements: Option<Vec<mz_sql_parser::ast::Statement<Raw>>>,
562}
563
564/// A validated database containing multiple schemas.
565///
566/// Represents a database directory in the project structure. Each database contains
567/// multiple schemas, each of which contains multiple database objects.
568///
569/// # Directory Mapping
570///
571/// A database corresponds to a directory in the project structure:
572/// ```text
573/// project_root/
574///   database_name/      <- Database
575///     schema1/          <- Schema
576///       object1.sql
577///     schema2/          <- Schema
578///       object2.sql
579///     mod.sql           (optional, not represented in HIR)
580/// ```
581#[derive(Debug)]
582pub struct Database {
583    /// The name of the database (directory name)
584    pub name: String,
585    /// All validated schemas in this database
586    pub schemas: Vec<Schema>,
587    /// Optional module-level statements (from database.sql file)
588    pub mod_statements: Option<Vec<mz_sql_parser::ast::Statement<Raw>>>,
589}
590
591/// A fully validated compiled project.
592///
593/// Represents the complete validated project structure, containing all databases,
594/// schemas, and objects. This is the top-level compiled project produced after
595/// validating project source inputs.
596///
597/// Compiled projects are produced by [`crate::project::plan_sync`] and the
598/// internal compiler entrypoints in [`crate::project::compiler`].
599///
600/// # Validation Guarantees
601///
602/// A successfully created `Project` guarantees:
603/// - All object names match their file names
604/// - All qualified names match the directory structure
605/// - All supporting statements reference the correct objects
606/// - All object types are consistent across statements
607/// - No unsupported statement types are present
608#[derive(Debug)]
609pub struct Project {
610    /// All validated databases in this project
611    pub databases: Vec<Database>,
612    /// Schemas that use replacement materialized views, derived from
613    /// `SET api = stable` statements.
614    /// Each entry is a `(database, schema)` pair.
615    pub replacement_schemas: BTreeSet<SchemaQualifier>,
616}
617
618impl Project {
619    /// Rewrite all cluster references in the project using the given cluster name map.
620    ///
621    /// Walks all objects in all databases/schemas and replaces cluster names in
622    /// `IN CLUSTER` clauses when the cluster name is a key in the map.
623    pub fn rewrite_cluster_references(&mut self, cluster_map: &BTreeMap<String, String>) {
624        for db in &mut self.databases {
625            for schema in &mut db.schemas {
626                for obj in &mut schema.objects {
627                    obj.rewrite_cluster_references(cluster_map);
628                }
629            }
630        }
631    }
632
633    /// Rewrite all cross-database references in the project using the given database name map.
634    ///
635    /// This walks all objects, mod_statements, etc. and replaces database names in
636    /// 3-part qualified references when the database is a project-owned database
637    /// that appears in the map.
638    pub fn rewrite_database_references(&mut self, db_map: &BTreeMap<String, String>) {
639        for db in &mut self.databases {
640            for schema in &mut db.schemas {
641                for obj in &mut schema.objects {
642                    obj.rewrite_database_references(db_map);
643                }
644            }
645        }
646    }
647}
648
649/// Rewrite an optional `RawClusterName::Unresolved` ident if it matches a key in the map.
650fn rewrite_in_cluster(
651    in_cluster: &mut Option<RawClusterName>,
652    cluster_map: &BTreeMap<String, String>,
653) {
654    if let Some(RawClusterName::Unresolved(ident)) = in_cluster {
655        let name = ident.to_string();
656        if let Some(suffixed) = cluster_map.get(&name) {
657            *ident = Ident::new(suffixed).expect("valid cluster identifier");
658        }
659    }
660}