Skip to main content

mz_deploy/project/syntax/
parser.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! SQL parsing with `mz_sql_parser`.
11//!
12//! Wraps `mz_sql_parser` to parse `.sql` files into AST statements, attaching
13//! file-path context to error messages so that parse failures point back to
14//! the originating source file.
15//!
16//! ## Variable Resolution and Parsing
17//!
18//! [`parse_statements_with_context`] runs variable resolution *before* parsing:
19//! 1. Resolve psql-style variables (`:foo`, `:'foo'`, `:"foo"`) via
20//!    [`super::variables::resolve_variables`]
21//! 2. Check for unresolved variables — error or warning based on pragma
22//! 3. Parse the resolved SQL via `mz_sql_parser`
23//! 4. Wrap any parse errors with file path and SQL content for context
24
25use super::variables::VariableError;
26use crate::info;
27use crate::project::error::ParseError;
28use mz_sql_parser::ast::{Raw, Statement};
29use std::collections::BTreeMap;
30use std::path::PathBuf;
31
32/// Parses one or more SQL statements from an iterable collection of strings.
33///
34/// This function is only used in tests for simple parsing without file context.
35#[cfg(test)]
36pub(crate) fn parse_statements<I, S>(raw: I) -> Result<Vec<Statement<Raw>>, ParseError>
37where
38    I: IntoIterator<Item = S>,
39    S: AsRef<str>,
40{
41    let mut statements = vec![];
42    for s in raw {
43        let parsed_results = mz_sql_parser::parser::parse_statements_with_limit(s.as_ref())
44            .map_err(|e| ParseError::StatementsParseFailed {
45                message: format!("Parser limit error: {}", e),
46            })?
47            .map_err(|e| ParseError::StatementsParseFailed {
48                message: format!("Parse error: {}", e.error),
49            })?;
50
51        let mut parsed: Vec<Statement<Raw>> = parsed_results
52            .into_iter()
53            .map(|result| result.ast)
54            .collect();
55
56        statements.append(&mut parsed);
57    }
58
59    Ok(statements)
60}
61
62/// A parsed SQL statement paired with its byte offset within the source file.
63#[derive(Debug, Clone)]
64pub struct LocatedStatement {
65    /// The parsed AST node.
66    pub ast: Statement<Raw>,
67    /// Byte offset of the statement's start within the (resolved) SQL text.
68    pub byte_offset: usize,
69}
70
71/// Parse SQL statements and add file context to any errors.
72///
73/// Resolves psql-style variables (`:foo`, `:'foo'`, `:"foo"`) before parsing.
74/// Returns each statement together with its byte offset within the resolved SQL
75/// so that downstream validation errors can point to the exact location.
76///
77/// `profile_set` informs the unresolved-variables error display so that the
78/// hint can direct the user to set a profile when none is active.
79pub(crate) fn parse_statements_with_context(
80    sql: &str,
81    path: PathBuf,
82    variables: &BTreeMap<String, String>,
83    profile_set: bool,
84) -> Result<Vec<LocatedStatement>, ParseError> {
85    let resolved = super::variables::resolve_variables(sql, variables);
86
87    if !resolved.unresolved.is_empty() {
88        if resolved.has_warn_pragma {
89            let formatted: Vec<String> = resolved
90                .unresolved
91                .iter()
92                .map(|v| format!(":{}", v.name))
93                .collect();
94            info!(
95                "\x1b[33mwarning\x1b[0m: unresolved variables in {}: {}",
96                path.display(),
97                formatted.join(", ")
98            );
99        } else {
100            return Err(ParseError::UnresolvedVariables(VariableError {
101                unresolved: resolved.unresolved,
102                path,
103                profile_set,
104            }));
105        }
106    }
107
108    let sql = resolved.sql;
109
110    let mut statements = vec![];
111
112    let parsed_results = mz_sql_parser::parser::parse_statements_with_limit(&sql)
113        .map_err(|e| ParseError::StatementsParseFailed {
114            message: format!("Parser limit error in file {}: {}", path.display(), e),
115        })?
116        .map_err(|e| ParseError::SqlParseFailed {
117            path: path.clone(),
118            sql: sql.to_string(),
119            source: e,
120        })?;
121
122    // Compute byte offsets via pointer arithmetic on `StatementParseResult.sql`.
123    //
124    // `result.sql` is a `&'a str` subslice of the input we passed to
125    // `parse_statements_with_limit` — the parser produces it by indexing
126    // `self.sql[before..after].trim()` inside `Parser::parse_statement`.
127    // Rust's lifetime parameter on `StatementParseResult<'a>` enforces this
128    // at the type level: the returned slice cannot outlive the input.
129    //
130    // Because both pointers reference the same allocation, subtracting the
131    // base pointer from the slice pointer yields a valid byte offset.
132    //
133    // Note: offsets are relative to the *variable-resolved* SQL text (the
134    // `sql` local above), not the raw file contents. When the LSP converts
135    // these to line/column positions it must build the Rope from the same
136    // resolved text, or re-resolve variables before lookup.
137    #[allow(clippy::as_conversions)]
138    let sql_base = sql.as_ptr() as usize;
139    let mut parsed: Vec<LocatedStatement> = parsed_results
140        .into_iter()
141        .map(|result| {
142            #[allow(clippy::as_conversions)]
143            let byte_offset = result.sql.as_ptr() as usize - sql_base;
144            LocatedStatement {
145                ast: result.ast,
146                byte_offset,
147            }
148        })
149        .collect();
150
151    statements.append(&mut parsed);
152
153    Ok(statements)
154}
155
156/// Get a human-readable name for a statement type.
157///
158/// Used by resource definition modules (clusters, roles) to produce clear
159/// error messages when an unsupported statement type is encountered.
160pub(crate) fn statement_type_name(stmt: &Statement<Raw>) -> &'static str {
161    match stmt {
162        Statement::CreateTable(_) => "CREATE TABLE",
163        Statement::CreateView(_) => "CREATE VIEW",
164        Statement::CreateMaterializedView(_) => "CREATE MATERIALIZED VIEW",
165        Statement::CreateSource(_) => "CREATE SOURCE",
166        Statement::CreateSink(_) => "CREATE SINK",
167        Statement::CreateIndex(_) => "CREATE INDEX",
168        Statement::CreateCluster(_) => "CREATE CLUSTER",
169        Statement::CreateConnection(_) => "CREATE CONNECTION",
170        Statement::CreateSecret(_) => "CREATE SECRET",
171        Statement::CreateSchema(_) => "CREATE SCHEMA",
172        Statement::CreateDatabase(_) => "CREATE DATABASE",
173        Statement::CreateRole(_) => "CREATE ROLE",
174        Statement::CreateNetworkPolicy(_) => "CREATE NETWORK POLICY",
175        Statement::AlterRole(_) => "ALTER ROLE",
176        Statement::AlterCluster(_) => "ALTER CLUSTER",
177        Statement::GrantRole(_) => "GRANT ROLE",
178        Statement::GrantPrivileges(_) => "GRANT",
179        Statement::Comment(_) => "COMMENT",
180        _ => "unsupported statement",
181    }
182}
183
184#[cfg(test)]
185mod test {
186    use crate::project::syntax::parser::parse_statements;
187
188    #[mz_ore::test]
189    fn validate() {
190        let _ = parse_statements(vec!["CREATE CLUSTER c (INTROSPECTION INTERVAL = 0)"]).unwrap();
191    }
192
193    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
194    #[mz_ore::test]
195    fn test_mv_in_cluster() {
196        let result = parse_statements(vec![
197            "CREATE MATERIALIZED VIEW mv IN CLUSTER quickstart AS SELECT 1",
198        ]);
199        assert!(
200            result.is_ok(),
201            "Failed to parse MV with IN CLUSTER: {:?}",
202            result.err()
203        );
204    }
205}