mz_deploy/project/ir/compiled.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Compiled project IR.
11//!
12//! This module contains the validated semantic structures produced by object
13//! compilation and project assembly — the layer between raw parsed SQL and
14//! the dependency-aware graph in [`ir::graph`](super::graph).
15//!
16//! - [`DatabaseObject`] is a single `.sql` file after validation and name
17//! normalization: one primary CREATE statement plus its indexes,
18//! grants, comments, and tests. All identifiers are fully qualified.
19//! - [`Project`] groups validated objects by `(database, schema)` with
20//! module-level statements, but has **no dependency graph**. Dependency
21//! extraction happens downstream in [`analysis::deps`](crate::project::analysis::deps),
22//! which produces the final [`graph::Project`](super::graph::Project).
23
24use super::super::ast::Statement;
25use crate::project::SchemaQualifier;
26use crate::project::error::ValidationError;
27use crate::project::error::ValidationErrorKind;
28use crate::project::ir::object_id::ObjectId;
29use crate::project::resolve::normalize::{ClusterTransformer, NameTransformer, NormalizingVisitor};
30use mz_sql_parser::ast::*;
31use std::collections::{BTreeMap, BTreeSet};
32use std::path::PathBuf;
33
34/// Fully qualified name parsed from file path structure.
35///
36/// Represents the canonical `database.schema.object` name based on directory structure.
37/// File path format: `<root>/<database>/<schema>/<object>.sql`
38///
39/// This struct is created during object validation and is used to:
40/// - Normalize statement names to be fully qualified
41/// - Validate that SQL statement names match the directory structure
42/// - Provide a consistent FQN for error messages and validation
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub struct FullyQualifiedName {
45 id: ObjectId,
46 pub path: PathBuf,
47 item_name: UnresolvedItemName,
48}
49
50impl FullyQualifiedName {
51 /// Get the database name.
52 ///
53 /// `FullyQualifiedName` is constructed only for project-internal user
54 /// objects, so the database is always present.
55 #[inline]
56 pub fn database(&self) -> &str {
57 self.id.expect_database()
58 }
59
60 /// Get the schema name.
61 #[inline]
62 pub fn schema(&self) -> &str {
63 self.id.schema()
64 }
65
66 /// Get the object name.
67 #[inline]
68 pub fn object(&self) -> &str {
69 self.id.object()
70 }
71
72 /// Get the ObjectId.
73 pub fn object_id(&self) -> &ObjectId {
74 &self.id
75 }
76
77 /// Get the UnresolvedItemName for updating statement names.
78 pub fn to_item_name(&self) -> UnresolvedItemName {
79 self.item_name.clone()
80 }
81
82 /// Create a FullyQualifiedName with explicit database and schema names.
83 ///
84 /// Unlike `TryFrom<(&Path, &str)>` which derives names from the file path,
85 /// this constructor accepts the names directly. This is needed when a suffix
86 /// has been applied to the database name, so the path-derived name no longer
87 /// matches the desired database name.
88 pub fn with_names(
89 path: &std::path::Path,
90 object_name: &str,
91 database: &str,
92 schema: &str,
93 ) -> Result<Self, ValidationError> {
94 let database_ident = Ident::new(database).map_err(|e| {
95 ValidationError::with_file(
96 ValidationErrorKind::InvalidIdentifier {
97 name: database.to_string(),
98 reason: e.to_string(),
99 },
100 path.to_path_buf(),
101 )
102 })?;
103
104 let schema_ident = Ident::new(schema).map_err(|e| {
105 ValidationError::with_file(
106 ValidationErrorKind::InvalidIdentifier {
107 name: schema.to_string(),
108 reason: e.to_string(),
109 },
110 path.to_path_buf(),
111 )
112 })?;
113
114 let object_ident = Ident::new(object_name).map_err(|e| {
115 ValidationError::with_file(
116 ValidationErrorKind::InvalidIdentifier {
117 name: object_name.to_string(),
118 reason: e.to_string(),
119 },
120 path.to_path_buf(),
121 )
122 })?;
123
124 let item_name = UnresolvedItemName(vec![database_ident, schema_ident, object_ident]);
125
126 let id = ObjectId::new(
127 database.to_string(),
128 schema.to_string(),
129 object_name.to_string(),
130 );
131
132 Ok(FullyQualifiedName {
133 id,
134 path: path.to_path_buf(),
135 item_name,
136 })
137 }
138}
139
140impl TryFrom<UnresolvedItemName> for FullyQualifiedName {
141 type Error = &'static str;
142
143 fn try_from(value: UnresolvedItemName) -> Result<Self, Self::Error> {
144 if value.0.len() < 3 {
145 return Err("fully qualified names require database, schema, and object parts");
146 }
147 let id = ObjectId::new(
148 value.0[0].to_string(),
149 value.0[1].to_string(),
150 value.0[2].to_string(),
151 );
152 Ok(Self {
153 id,
154 path: PathBuf::new(),
155 item_name: value,
156 })
157 }
158}
159
160impl TryFrom<(&std::path::Path, &str)> for FullyQualifiedName {
161 type Error = ValidationError;
162
163 /// Extract fully qualified name from file path.
164 ///
165 /// Path format: `<root>/<database>/<schema>/<object>.sql`
166 /// Returns error if path structure is invalid.
167 fn try_from(value: (&std::path::Path, &str)) -> Result<Self, Self::Error> {
168 let (path, object_name) = value;
169
170 // Extract schema (parent directory)
171 let schema = path
172 .parent()
173 .and_then(|p| p.file_name())
174 .and_then(|s| s.to_str())
175 .ok_or_else(|| {
176 ValidationError::with_file(
177 ValidationErrorKind::SchemaExtractionFailed,
178 path.to_path_buf(),
179 )
180 })?;
181
182 // Extract database (parent of schema directory)
183 let database = path
184 .parent()
185 .and_then(|p| p.parent())
186 .and_then(|p| p.file_name())
187 .and_then(|s| s.to_str())
188 .ok_or_else(|| {
189 ValidationError::with_file(
190 ValidationErrorKind::DatabaseExtractionFailed,
191 path.to_path_buf(),
192 )
193 })?;
194
195 // Create Ident instances for each component
196 let database_ident = Ident::new(database).map_err(|e| {
197 ValidationError::with_file(
198 ValidationErrorKind::InvalidIdentifier {
199 name: database.to_string(),
200 reason: e.to_string(),
201 },
202 path.to_path_buf(),
203 )
204 })?;
205
206 let schema_ident = Ident::new(schema).map_err(|e| {
207 ValidationError::with_file(
208 ValidationErrorKind::InvalidIdentifier {
209 name: schema.to_string(),
210 reason: e.to_string(),
211 },
212 path.to_path_buf(),
213 )
214 })?;
215
216 let object_ident = Ident::new(object_name).map_err(|e| {
217 ValidationError::with_file(
218 ValidationErrorKind::InvalidIdentifier {
219 name: object_name.to_string(),
220 reason: e.to_string(),
221 },
222 path.to_path_buf(),
223 )
224 })?;
225
226 // Create the UnresolvedItemName
227 let item_name = UnresolvedItemName(vec![database_ident, schema_ident, object_ident]);
228
229 // Create ObjectId
230 let id = ObjectId::new(
231 database.to_string(),
232 schema.to_string(),
233 object_name.to_string(),
234 );
235
236 Ok(FullyQualifiedName {
237 id,
238 path: path.to_path_buf(),
239 item_name,
240 })
241 }
242}
243
244impl From<ObjectId> for FullyQualifiedName {
245 fn from(id: ObjectId) -> Self {
246 let item_name = UnresolvedItemName(vec![
247 Ident::new(id.expect_database()).expect("validated database identifier"),
248 Ident::new(id.schema()).expect("validated schema identifier"),
249 Ident::new(id.object()).expect("validated object identifier"),
250 ]);
251 Self {
252 id,
253 path: PathBuf::new(),
254 item_name,
255 }
256 }
257}
258
259/// The primary CREATE statement for a database object.
260impl Statement {
261 /// Normalizes the statement name to be fully qualified.
262 pub fn normalize_stmt(self, fqn: &FullyQualifiedName) -> Self {
263 let mut visitor = NormalizingVisitor::fully_qualifying(fqn);
264 self.normalize_name_with(&visitor, &fqn.to_item_name())
265 .normalize_dependencies_with(&mut visitor)
266 }
267
268 /// Normalizes the statement name using a custom transformer.
269 pub fn normalize_name_with<T: NameTransformer>(
270 self,
271 visitor: &NormalizingVisitor<T>,
272 item_name: &UnresolvedItemName,
273 ) -> Self {
274 let transformed_name = visitor.transformer().transform_own_name(item_name);
275
276 match self {
277 Statement::CreateSink(mut s) => {
278 s.name = Some(transformed_name);
279 Statement::CreateSink(s)
280 }
281 Statement::CreateView(mut s) => {
282 s.definition.name = transformed_name;
283 Statement::CreateView(s)
284 }
285 Statement::CreateMaterializedView(mut s) => {
286 s.name = transformed_name;
287 Statement::CreateMaterializedView(s)
288 }
289 Statement::CreateTable(mut s) => {
290 s.name = transformed_name;
291 Statement::CreateTable(s)
292 }
293 Statement::CreateTableFromSource(mut s) => {
294 s.name = transformed_name;
295 Statement::CreateTableFromSource(s)
296 }
297 Statement::CreateSource(mut s) => {
298 s.name = transformed_name;
299 Statement::CreateSource(s)
300 }
301 Statement::CreateSecret(mut s) => {
302 s.name = transformed_name;
303 Statement::CreateSecret(s)
304 }
305 Statement::CreateConnection(mut s) => {
306 s.name = transformed_name;
307 Statement::CreateConnection(s)
308 }
309 }
310 }
311
312 /// Normalizes all object references within the statement using a custom transformer.
313 pub fn normalize_dependencies_with<T: NameTransformer>(
314 self,
315 visitor: &mut NormalizingVisitor<T>,
316 ) -> Self {
317 match self {
318 Statement::CreateView(mut s) => {
319 visitor.normalize_query(&mut s.definition.query);
320 Statement::CreateView(s)
321 }
322 Statement::CreateMaterializedView(mut s) => {
323 visitor.normalize_query(&mut s.query);
324 Statement::CreateMaterializedView(s)
325 }
326 Statement::CreateTableFromSource(mut s) => {
327 visitor.normalize_raw_item_name(&mut s.source);
328 Statement::CreateTableFromSource(s)
329 }
330 Statement::CreateSink(mut s) => {
331 visitor.normalize_raw_item_name(&mut s.from);
332 visitor.normalize_sink_connection(&mut s.connection);
333 Statement::CreateSink(s)
334 }
335 Statement::CreateConnection(mut s) => {
336 visitor.normalize_connection_options(&mut s.values);
337 Statement::CreateConnection(s)
338 }
339 Statement::CreateSource(mut s) => {
340 visitor.normalize_source_connection(&mut s.connection);
341 Statement::CreateSource(s)
342 }
343 // These statements don't have dependencies on other database objects
344 Statement::CreateTable(_) | Statement::CreateSecret(_) => self,
345 }
346 }
347
348 /// Normalize cluster references using a ClusterTransformer.
349 ///
350 /// This method is separate from normalize_dependencies_with because cluster
351 /// normalization is only needed for staging environments, not regular deployments.
352 pub fn normalize_cluster_with<T: ClusterTransformer>(
353 self,
354 visitor: &NormalizingVisitor<T>,
355 ) -> Self {
356 match self {
357 Statement::CreateMaterializedView(mut s) => {
358 visitor.normalize_cluster_name(&mut s.in_cluster);
359 Statement::CreateMaterializedView(s)
360 }
361 Statement::CreateSink(mut s) => {
362 visitor.normalize_cluster_name(&mut s.in_cluster);
363 Statement::CreateSink(s)
364 }
365 Statement::CreateSource(mut s) => {
366 visitor.normalize_cluster_name(&mut s.in_cluster);
367 Statement::CreateSource(s)
368 }
369 // These statements don't have cluster references
370 Statement::CreateView(_)
371 | Statement::CreateTable(_)
372 | Statement::CreateTableFromSource(_)
373 | Statement::CreateSecret(_)
374 | Statement::CreateConnection(_) => self,
375 }
376 }
377}
378
379/// A validated database object with its primary statement and supporting declarations.
380///
381/// Represents a single database object (table, view, source, etc.) that has been
382/// validated to ensure:
383/// - Exactly one primary CREATE statement exists
384/// - The object name matches the file name
385/// - All supporting statements (indexes, grants, comments) reference this object
386/// - Object types are consistent across statements
387///
388/// # Structure
389///
390/// Each `DatabaseObject` is loaded from a single `.sql` file and contains:
391/// - One primary statement (CREATE TABLE, CREATE VIEW, etc.)
392/// - Zero or more CREATE INDEX statements (for indexable objects)
393/// - Zero or more GRANT statements
394/// - Zero or more COMMENT statements
395///
396/// # Example
397///
398/// For a file `my_schema/users.sql`:
399/// ```sql
400/// CREATE TABLE users (
401/// id INT,
402/// name TEXT
403/// );
404///
405/// CREATE INDEX users_id_idx ON users (id);
406/// GRANT SELECT ON users TO analyst_role;
407/// COMMENT ON TABLE users IS 'User account information';
408/// ```
409///
410/// This would be validated and represented as a single `DatabaseObject`.
411#[derive(Debug, Clone)]
412pub struct DatabaseObject {
413 /// Path to the source `.sql` file that defined this object.
414 ///
415 /// Carried from [`input::ObjectVariant::path`](crate::project::syntax::input::ObjectVariant)
416 /// through validation so downstream consumers (LSP, diagnostics) can cite
417 /// the source file without reconstructing it from the object's name.
418 pub path: PathBuf,
419 /// The primary CREATE statement for this object
420 pub stmt: Statement,
421 /// Indexes defined on this object
422 pub indexes: Vec<CreateIndexStatement<Raw>>,
423 /// Grant statements for this object
424 pub grants: Vec<GrantPrivilegesStatement<Raw>>,
425 /// Comment statements for this object or its columns
426 pub comments: Vec<CommentStatement<Raw>>,
427 /// Unit tests for this object
428 pub tests: Vec<ExecuteUnitTestStatement<Raw>>,
429}
430
431impl DatabaseObject {
432 pub fn clusters(&self) -> BTreeSet<String> {
433 let mut cluster_set = BTreeSet::new();
434
435 let in_cluster = match &self.stmt {
436 Statement::CreateMaterializedView(mv) => mv.in_cluster.as_ref(),
437 Statement::CreateSink(sink) => sink.in_cluster.as_ref(),
438 Statement::CreateSource(source) => source.in_cluster.as_ref(),
439 Statement::CreateView(_)
440 | Statement::CreateTable(_)
441 | Statement::CreateTableFromSource(_)
442 | Statement::CreateSecret(_)
443 | Statement::CreateConnection(_) => None,
444 };
445 if let Some(RawClusterName::Unresolved(cluster_name)) = in_cluster {
446 cluster_set.insert(cluster_name.to_string());
447 }
448
449 for index in &self.indexes {
450 if let Some(RawClusterName::Unresolved(cluster_name)) = &index.in_cluster {
451 cluster_set.insert(cluster_name.to_string());
452 }
453 }
454 cluster_set
455 }
456
457 /// Convert the statement to a `Query<Raw>` for type checking purposes.
458 pub fn to_query(&self) -> Option<Query<Raw>> {
459 match &self.stmt {
460 Statement::CreateView(stmt) => Some(stmt.definition.query.clone()),
461 Statement::CreateMaterializedView(stmt) => Some(stmt.query.clone()),
462 Statement::CreateTable(_)
463 | Statement::CreateSecret(_)
464 | Statement::CreateConnection(_)
465 | Statement::CreateTableFromSource(_)
466 | Statement::CreateSink(_)
467 | Statement::CreateSource(_) => None,
468 }
469 }
470
471 /// Rewrite cluster references using the given cluster name map.
472 ///
473 /// For any `RawClusterName::Unresolved(ident)` where the ident matches a key
474 /// in the map, replace it with the suffixed name. This applies to `IN CLUSTER`
475 /// clauses in the main statement and in index definitions.
476 pub fn rewrite_cluster_references(&mut self, cluster_map: &BTreeMap<String, String>) {
477 // Rewrite IN CLUSTER on the main statement
478 match &mut self.stmt {
479 Statement::CreateMaterializedView(s) => {
480 rewrite_in_cluster(&mut s.in_cluster, cluster_map);
481 }
482 Statement::CreateSink(s) => {
483 rewrite_in_cluster(&mut s.in_cluster, cluster_map);
484 }
485 Statement::CreateSource(s) => {
486 rewrite_in_cluster(&mut s.in_cluster, cluster_map);
487 }
488 Statement::CreateView(_)
489 | Statement::CreateTable(_)
490 | Statement::CreateTableFromSource(_)
491 | Statement::CreateSecret(_)
492 | Statement::CreateConnection(_) => {}
493 }
494
495 // Rewrite IN CLUSTER on indexes
496 for index in &mut self.indexes {
497 rewrite_in_cluster(&mut index.in_cluster, cluster_map);
498 }
499 }
500
501 /// Rewrite cross-database references using the given database name map.
502 ///
503 /// For any 3-part `UnresolvedItemName` where the database part matches an
504 /// original name in the map, replace it with the suffixed name. This
505 /// includes the statement's own name (which after per-object validation
506 /// carries the original directory-derived database), supporting statements
507 /// (indexes, grants, comments), and all dependency references in the body.
508 /// External databases (not in the map) are untouched.
509 pub fn rewrite_database_references(&mut self, db_map: &BTreeMap<String, String>) {
510 let ident = self.stmt.ident();
511 let database = ident
512 .database
513 .clone()
514 .unwrap_or_else(|| Ident::new_unchecked("unknown"));
515 let schema = ident
516 .schema
517 .clone()
518 .unwrap_or_else(|| Ident::new_unchecked("unknown"));
519 let own_name = UnresolvedItemName(vec![database, schema, ident.object.clone()]);
520 let fqn = FullyQualifiedName::try_from(own_name.clone())
521 .expect("database, schema, and object are always present");
522 let mut visitor = NormalizingVisitor::fully_qualifying_with_db_map(&fqn, Some(db_map));
523 self.stmt = self
524 .stmt
525 .clone()
526 .normalize_name_with(&visitor, &own_name)
527 .normalize_dependencies_with(&mut visitor);
528
529 visitor.normalize_index_references(&mut self.indexes);
530 visitor.normalize_grant_references(&mut self.grants);
531 visitor.normalize_comment_references(&mut self.comments);
532 }
533}
534
535/// A validated schema containing multiple database objects.
536///
537/// Represents a schema directory in the project structure. Each schema contains
538/// multiple database objects (tables, views, sources, etc.) that have all been
539/// validated.
540///
541/// # Directory Mapping
542///
543/// A schema corresponds to a directory in the project structure:
544/// ```text
545/// database_name/
546/// schema_name/ <- Schema
547/// object1.sql <- DatabaseObject
548/// object2.sql <- DatabaseObject
549/// mod.sql (optional, not represented in HIR)
550/// ```
551///
552/// Note: `mod.sql` files are parsed as source inputs and carried here as
553/// schema-level setup statements.
554#[derive(Debug)]
555pub struct Schema {
556 /// The name of the schema (directory name)
557 pub name: String,
558 /// All validated database objects in this schema
559 pub objects: Vec<DatabaseObject>,
560 /// Optional module-level statements (from schema.sql file)
561 pub mod_statements: Option<Vec<mz_sql_parser::ast::Statement<Raw>>>,
562}
563
564/// A validated database containing multiple schemas.
565///
566/// Represents a database directory in the project structure. Each database contains
567/// multiple schemas, each of which contains multiple database objects.
568///
569/// # Directory Mapping
570///
571/// A database corresponds to a directory in the project structure:
572/// ```text
573/// project_root/
574/// database_name/ <- Database
575/// schema1/ <- Schema
576/// object1.sql
577/// schema2/ <- Schema
578/// object2.sql
579/// mod.sql (optional, not represented in HIR)
580/// ```
581#[derive(Debug)]
582pub struct Database {
583 /// The name of the database (directory name)
584 pub name: String,
585 /// All validated schemas in this database
586 pub schemas: Vec<Schema>,
587 /// Optional module-level statements (from database.sql file)
588 pub mod_statements: Option<Vec<mz_sql_parser::ast::Statement<Raw>>>,
589}
590
591/// A fully validated compiled project.
592///
593/// Represents the complete validated project structure, containing all databases,
594/// schemas, and objects. This is the top-level compiled project produced after
595/// validating project source inputs.
596///
597/// Compiled projects are produced by [`crate::project::plan_sync`] and the
598/// internal compiler entrypoints in [`crate::project::compiler`].
599///
600/// # Validation Guarantees
601///
602/// A successfully created `Project` guarantees:
603/// - All object names match their file names
604/// - All qualified names match the directory structure
605/// - All supporting statements reference the correct objects
606/// - All object types are consistent across statements
607/// - No unsupported statement types are present
608#[derive(Debug)]
609pub struct Project {
610 /// All validated databases in this project
611 pub databases: Vec<Database>,
612 /// Schemas that use replacement materialized views, derived from
613 /// `SET api = stable` statements.
614 /// Each entry is a `(database, schema)` pair.
615 pub replacement_schemas: BTreeSet<SchemaQualifier>,
616}
617
618impl Project {
619 /// Rewrite all cluster references in the project using the given cluster name map.
620 ///
621 /// Walks all objects in all databases/schemas and replaces cluster names in
622 /// `IN CLUSTER` clauses when the cluster name is a key in the map.
623 pub fn rewrite_cluster_references(&mut self, cluster_map: &BTreeMap<String, String>) {
624 for db in &mut self.databases {
625 for schema in &mut db.schemas {
626 for obj in &mut schema.objects {
627 obj.rewrite_cluster_references(cluster_map);
628 }
629 }
630 }
631 }
632
633 /// Rewrite all cross-database references in the project using the given database name map.
634 ///
635 /// This walks all objects, mod_statements, etc. and replaces database names in
636 /// 3-part qualified references when the database is a project-owned database
637 /// that appears in the map.
638 pub fn rewrite_database_references(&mut self, db_map: &BTreeMap<String, String>) {
639 for db in &mut self.databases {
640 for schema in &mut db.schemas {
641 for obj in &mut schema.objects {
642 obj.rewrite_database_references(db_map);
643 }
644 }
645 }
646 }
647}
648
649/// Rewrite an optional `RawClusterName::Unresolved` ident if it matches a key in the map.
650fn rewrite_in_cluster(
651 in_cluster: &mut Option<RawClusterName>,
652 cluster_map: &BTreeMap<String, String>,
653) {
654 if let Some(RawClusterName::Unresolved(ident)) = in_cluster {
655 let name = ident.to_string();
656 if let Some(suffixed) = cluster_map.get(&name) {
657 *ident = Ident::new(suffixed).expect("valid cluster identifier");
658 }
659 }
660}