mz_sql/ast/
transform.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Provides a publicly available interface to transform our SQL ASTs.
11
12use std::collections::{BTreeMap, BTreeSet};
13
14use mz_ore::str::StrExt;
15use mz_repr::CatalogItemId;
16use mz_sql_parser::ast::CreateTableFromSourceStatement;
17
18use crate::ast::visit::{self, Visit};
19use crate::ast::visit_mut::{self, VisitMut};
20use crate::ast::{
21    AstInfo, CreateConnectionStatement, CreateIndexStatement, CreateMaterializedViewStatement,
22    CreateSecretStatement, CreateSinkStatement, CreateSourceStatement, CreateSubsourceStatement,
23    CreateTableStatement, CreateViewStatement, CreateWebhookSourceStatement, Expr, Ident, Query,
24    Raw, RawItemName, Statement, UnresolvedItemName, ViewDefinition,
25};
26use crate::names::FullItemName;
27
28/// Given a [`Statement`] rewrites all references of the schema name `cur_schema_name` to
29/// `new_schema_name`.
30pub fn create_stmt_rename_schema_refs(
31    create_stmt: &mut Statement<Raw>,
32    database: &str,
33    cur_schema: &str,
34    new_schema: &str,
35) -> Result<(), (String, String)> {
36    match create_stmt {
37        stmt @ Statement::CreateConnection(_)
38        | stmt @ Statement::CreateDatabase(_)
39        | stmt @ Statement::CreateSchema(_)
40        | stmt @ Statement::CreateWebhookSource(_)
41        | stmt @ Statement::CreateSource(_)
42        | stmt @ Statement::CreateSubsource(_)
43        | stmt @ Statement::CreateSink(_)
44        | stmt @ Statement::CreateView(_)
45        | stmt @ Statement::CreateMaterializedView(_)
46        | stmt @ Statement::CreateTable(_)
47        | stmt @ Statement::CreateTableFromSource(_)
48        | stmt @ Statement::CreateIndex(_)
49        | stmt @ Statement::CreateType(_)
50        | stmt @ Statement::CreateSecret(_) => {
51            let mut visitor = CreateSqlRewriteSchema {
52                database,
53                cur_schema,
54                new_schema,
55                error: None,
56            };
57            visitor.visit_statement_mut(stmt);
58
59            if let Some(e) = visitor.error.take() {
60                Err(e)
61            } else {
62                Ok(())
63            }
64        }
65        stmt => {
66            unreachable!("Internal error: only catalog items need to update item refs. {stmt:?}")
67        }
68    }
69}
70
71struct CreateSqlRewriteSchema<'a> {
72    database: &'a str,
73    cur_schema: &'a str,
74    new_schema: &'a str,
75    error: Option<(String, String)>,
76}
77
78impl<'a> CreateSqlRewriteSchema<'a> {
79    fn maybe_rewrite_idents(&mut self, name: &mut [Ident]) {
80        match name {
81            [schema, item] if schema.as_str() == self.cur_schema => {
82                // TODO(parkmycar): I _think_ when the database component is not specified we can
83                // always infer we're using the current database. But I'm not positive, so for now
84                // we'll bail in this case.
85                if self.error.is_none() {
86                    self.error = Some((schema.to_string(), item.to_string()));
87                }
88            }
89            [database, schema, _item] => {
90                if database.as_str() == self.database && schema.as_str() == self.cur_schema {
91                    *schema = Ident::new_unchecked(self.new_schema);
92                }
93            }
94            _ => (),
95        }
96    }
97}
98
99impl<'a, 'ast> VisitMut<'ast, Raw> for CreateSqlRewriteSchema<'a> {
100    fn visit_expr_mut(&mut self, e: &'ast mut Expr<Raw>) {
101        match e {
102            Expr::Identifier(id) => {
103                // The last ID component is a column name that should not be
104                // considered in the rewrite.
105                let i = id.len() - 1;
106                self.maybe_rewrite_idents(&mut id[..i]);
107            }
108            Expr::QualifiedWildcard(id) => {
109                self.maybe_rewrite_idents(id);
110            }
111            _ => visit_mut::visit_expr_mut(self, e),
112        }
113    }
114
115    fn visit_unresolved_item_name_mut(
116        &mut self,
117        unresolved_item_name: &'ast mut UnresolvedItemName,
118    ) {
119        self.maybe_rewrite_idents(&mut unresolved_item_name.0);
120    }
121
122    fn visit_item_name_mut(
123        &mut self,
124        item_name: &'ast mut <mz_sql_parser::ast::Raw as AstInfo>::ItemName,
125    ) {
126        match item_name {
127            RawItemName::Name(n) | RawItemName::Id(_, n, _) => self.maybe_rewrite_idents(&mut n.0),
128        }
129    }
130}
131
132/// Changes the `name` used in an item's `CREATE` statement. To complete a
133/// rename operation, you must also call `create_stmt_rename_refs` on all dependent
134/// items.
135pub fn create_stmt_rename(create_stmt: &mut Statement<Raw>, to_item_name: String) {
136    // TODO(sploiselle): Support renaming schemas and databases.
137    match create_stmt {
138        Statement::CreateIndex(CreateIndexStatement { name, .. }) => {
139            *name = Some(Ident::new_unchecked(to_item_name));
140        }
141        Statement::CreateSink(CreateSinkStatement {
142            name: Some(name), ..
143        })
144        | Statement::CreateSource(CreateSourceStatement { name, .. })
145        | Statement::CreateSubsource(CreateSubsourceStatement { name, .. })
146        | Statement::CreateView(CreateViewStatement {
147            definition: ViewDefinition { name, .. },
148            ..
149        })
150        | Statement::CreateMaterializedView(CreateMaterializedViewStatement { name, .. })
151        | Statement::CreateTable(CreateTableStatement { name, .. })
152        | Statement::CreateTableFromSource(CreateTableFromSourceStatement { name, .. })
153        | Statement::CreateSecret(CreateSecretStatement { name, .. })
154        | Statement::CreateConnection(CreateConnectionStatement { name, .. })
155        | Statement::CreateWebhookSource(CreateWebhookSourceStatement { name, .. }) => {
156            // The last name in an ItemName is the item name. The item name
157            // does not have a fixed index.
158            // TODO: https://github.com/MaterializeInc/database-issues/issues/1721
159            let item_name_len = name.0.len() - 1;
160            name.0[item_name_len] = Ident::new_unchecked(to_item_name);
161        }
162        item => unreachable!("Internal error: only catalog items can be renamed {item:?}"),
163    }
164}
165
166/// Updates all references of `from_name` in `create_stmt` to `to_name` or
167/// errors if request is ambiguous.
168///
169/// Requests are considered ambiguous if `create_stmt` is a
170/// `Statement::CreateView`, and any of the following apply to its `query`:
171/// - `to_name.item` is used as an [`Ident`] in `query`.
172/// - `from_name.item` does not unambiguously refer to an item in the query,
173///   e.g. it is also used as a schema, or not all references to the item are
174///   sufficiently qualified.
175/// - `to_name.item` does not unambiguously refer to an item in the query after
176///   the rename. Right now, given the first condition, this is just a coherence
177///   check, but will be more meaningful once the first restriction is lifted.
178pub fn create_stmt_rename_refs(
179    create_stmt: &mut Statement<Raw>,
180    from_name: FullItemName,
181    to_item_name: String,
182) -> Result<(), String> {
183    let from_item = UnresolvedItemName::from(from_name.clone());
184    let maybe_update_item_name = |item_name: &mut UnresolvedItemName| {
185        if item_name.0 == from_item.0 {
186            // The last name in an ItemName is the item name. The item name
187            // does not have a fixed index.
188            // TODO: https://github.com/MaterializeInc/database-issues/issues/1721
189            let item_name_len = item_name.0.len() - 1;
190            item_name.0[item_name_len] = Ident::new_unchecked(to_item_name.clone());
191        }
192    };
193
194    // TODO(sploiselle): Support renaming schemas and databases.
195    match create_stmt {
196        Statement::CreateIndex(CreateIndexStatement { on_name, .. }) => {
197            maybe_update_item_name(on_name.name_mut());
198        }
199        Statement::CreateSink(CreateSinkStatement { from, .. }) => {
200            maybe_update_item_name(from.name_mut());
201        }
202        Statement::CreateView(CreateViewStatement {
203            definition: ViewDefinition { query, .. },
204            ..
205        })
206        | Statement::CreateMaterializedView(CreateMaterializedViewStatement { query, .. }) => {
207            rewrite_query(from_name, to_item_name, query)?;
208        }
209        Statement::CreateSource(_)
210        | Statement::CreateSubsource(_)
211        | Statement::CreateTable(_)
212        | Statement::CreateTableFromSource(_)
213        | Statement::CreateSecret(_)
214        | Statement::CreateConnection(_)
215        | Statement::CreateWebhookSource(_) => {}
216        item => {
217            unreachable!("Internal error: only catalog items need to update item refs {item:?}")
218        }
219    }
220
221    Ok(())
222}
223
224/// Rewrites `query`'s references of `from` to `to` or errors if too ambiguous.
225fn rewrite_query(from: FullItemName, to: String, query: &mut Query<Raw>) -> Result<(), String> {
226    let from_ident = Ident::new_unchecked(from.item.clone());
227    let to_ident = Ident::new_unchecked(to);
228    let qual_depth =
229        QueryIdentAgg::determine_qual_depth(&from_ident, Some(to_ident.clone()), query)?;
230    CreateSqlRewriter::rewrite_query_with_qual_depth(from, to_ident.clone(), qual_depth, query);
231    // Ensure that our rewrite didn't didn't introduce ambiguous
232    // references to `to_name`.
233    match QueryIdentAgg::determine_qual_depth(&to_ident, None, query) {
234        Ok(_) => Ok(()),
235        Err(e) => Err(e),
236    }
237}
238
239fn ambiguous_err(n: &Ident, t: &str) -> String {
240    format!(
241        "{} potentially used ambiguously as item and {}",
242        n.as_str().quoted(),
243        t
244    )
245}
246
247/// Visits a [`Query`], assessing catalog item [`Ident`]s' use of a specified `Ident`.
248struct QueryIdentAgg<'a> {
249    /// The name whose usage you want to assess.
250    name: &'a Ident,
251    /// Tracks all second-level qualifiers used on `name` in a `BTreeMap`, as
252    /// well as any third-level qualifiers used on those second-level qualifiers
253    /// in a `BTreeSet`.
254    qualifiers: BTreeMap<Ident, BTreeSet<Ident>>,
255    /// Tracks the least qualified instance of `name` seen.
256    min_qual_depth: usize,
257    /// Provides an option to fail the visit if encounters a specified `Ident`.
258    fail_on: Option<Ident>,
259    err: Option<String>,
260}
261
262impl<'a> QueryIdentAgg<'a> {
263    /// Determines the depth of qualification needed to unambiguously reference
264    /// catalog items in a [`Query`].
265    ///
266    /// Includes an option to fail if a given `Ident` is encountered.
267    ///
268    /// `Result`s of `Ok(usize)` indicate that `name` can be unambiguously
269    /// referred to with `usize` parts, e.g. 2 requires schema and item name
270    /// qualification.
271    ///
272    /// `Result`s of `Err` indicate that we cannot unambiguously reference
273    /// `name` or encountered `fail_on`, if it's provided.
274    fn determine_qual_depth(
275        name: &Ident,
276        fail_on: Option<Ident>,
277        query: &Query<Raw>,
278    ) -> Result<usize, String> {
279        let mut v = QueryIdentAgg {
280            qualifiers: BTreeMap::new(),
281            min_qual_depth: usize::MAX,
282            err: None,
283            name,
284            fail_on,
285        };
286
287        // Aggregate identities in `v`.
288        v.visit_query(query);
289        // Not possible to have a qualification depth of 0;
290        assert!(v.min_qual_depth > 0);
291
292        if let Some(e) = v.err {
293            return Err(e);
294        }
295
296        // Check if there was more than one 3rd-level (e.g.
297        // database) qualification used for any reference to `name`.
298        let req_depth = if v.qualifiers.values().any(|v| v.len() > 1) {
299            3
300        // Check if there was more than one 2nd-level (e.g. schema)
301        // qualification used for any reference to `name`.
302        } else if v.qualifiers.len() > 1 {
303            2
304        } else {
305            1
306        };
307
308        if v.min_qual_depth < req_depth {
309            Err(format!(
310                "{} is not sufficiently qualified to support renaming",
311                name.as_str().quoted()
312            ))
313        } else {
314            Ok(req_depth)
315        }
316    }
317
318    // Assesses `v` for uses of `self.name` and `self.fail_on`.
319    fn check_failure(&mut self, v: &[Ident]) {
320        // Fail if we encounter `self.fail_on`.
321        if let Some(f) = &self.fail_on {
322            if v.iter().any(|i| i == f) {
323                self.err = Some(format!(
324                    "found reference to {}; cannot rename {} to any identity \
325                    used in any existing view definitions",
326                    f.as_str().quoted(),
327                    self.name.as_str().quoted()
328                ));
329            }
330        }
331    }
332}
333
334impl<'a, 'ast> Visit<'ast, Raw> for QueryIdentAgg<'a> {
335    fn visit_expr(&mut self, e: &'ast Expr<Raw>) {
336        match e {
337            Expr::Identifier(i) => {
338                self.check_failure(i);
339                if let Some(p) = i.iter().rposition(|e| e == self.name) {
340                    if p == i.len() - 1 {
341                        // `self.name` used as a column if it's in the final
342                        // position here, e.g. `SELECT view.col FROM ...`
343                        self.err = Some(ambiguous_err(self.name, "column"));
344                        return;
345                    }
346                    self.min_qual_depth = std::cmp::min(p + 1, self.min_qual_depth);
347                }
348            }
349            Expr::QualifiedWildcard(i) => {
350                self.check_failure(i);
351                if let Some(p) = i.iter().rposition(|e| e == self.name) {
352                    self.min_qual_depth = std::cmp::min(p + 1, self.min_qual_depth);
353                }
354            }
355            _ => visit::visit_expr(self, e),
356        }
357    }
358
359    fn visit_ident(&mut self, ident: &'ast Ident) {
360        self.check_failure(&[ident.clone()]);
361        // This is an unqualified item using `self.name`, e.g. an alias, which
362        // we cannot unambiguously resolve.
363        if ident == self.name {
364            self.err = Some(ambiguous_err(self.name, "alias or column"));
365        }
366    }
367
368    fn visit_unresolved_item_name(&mut self, unresolved_item_name: &'ast UnresolvedItemName) {
369        let names = &unresolved_item_name.0;
370        self.check_failure(names);
371        // Every item is used as an `ItemName` at least once, which
372        // lets use track all items named `self.name`.
373        if let Some(p) = names.iter().rposition(|e| e == self.name) {
374            // Name used as last element of `<db>.<schema>.<item>`
375            if p == names.len() - 1 && names.len() == 3 {
376                self.qualifiers
377                    .entry(names[1].clone())
378                    .or_default()
379                    .insert(names[0].clone());
380                self.min_qual_depth = std::cmp::min(3, self.min_qual_depth);
381            } else {
382                // Any other use is a database or schema
383                self.err = Some(ambiguous_err(self.name, "database, schema, or function"))
384            }
385        }
386    }
387
388    fn visit_item_name(&mut self, item_name: &'ast <Raw as AstInfo>::ItemName) {
389        match item_name {
390            RawItemName::Name(n) | RawItemName::Id(_, n, _) => self.visit_unresolved_item_name(n),
391        }
392    }
393}
394
395struct CreateSqlRewriter {
396    from: Vec<Ident>,
397    to: Ident,
398}
399
400impl CreateSqlRewriter {
401    fn rewrite_query_with_qual_depth(
402        from_name: FullItemName,
403        to_name: Ident,
404        qual_depth: usize,
405        query: &mut Query<Raw>,
406    ) {
407        let from = match qual_depth {
408            1 => vec![Ident::new_unchecked(from_name.item)],
409            2 => vec![
410                Ident::new_unchecked(from_name.schema),
411                Ident::new_unchecked(from_name.item),
412            ],
413            3 => vec![
414                Ident::new_unchecked(from_name.database.to_string()),
415                Ident::new_unchecked(from_name.schema),
416                Ident::new_unchecked(from_name.item),
417            ],
418            _ => unreachable!(),
419        };
420        let mut v = CreateSqlRewriter { from, to: to_name };
421        v.visit_query_mut(query);
422    }
423
424    fn maybe_rewrite_idents(&self, name: &mut [Ident]) {
425        if name.len() > 0 && name.ends_with(&self.from) {
426            name[name.len() - 1] = self.to.clone();
427        }
428    }
429}
430
431impl<'ast> VisitMut<'ast, Raw> for CreateSqlRewriter {
432    fn visit_expr_mut(&mut self, e: &'ast mut Expr<Raw>) {
433        match e {
434            Expr::Identifier(id) => {
435                // The last ID component is a column name that should not be
436                // considered in the rewrite.
437                let i = id.len() - 1;
438                self.maybe_rewrite_idents(&mut id[..i]);
439            }
440            Expr::QualifiedWildcard(id) => {
441                self.maybe_rewrite_idents(id);
442            }
443            _ => visit_mut::visit_expr_mut(self, e),
444        }
445    }
446    fn visit_unresolved_item_name_mut(
447        &mut self,
448        unresolved_item_name: &'ast mut UnresolvedItemName,
449    ) {
450        self.maybe_rewrite_idents(&mut unresolved_item_name.0);
451    }
452    fn visit_item_name_mut(
453        &mut self,
454        item_name: &'ast mut <mz_sql_parser::ast::Raw as AstInfo>::ItemName,
455    ) {
456        match item_name {
457            RawItemName::Name(n) | RawItemName::Id(_, n, _) => self.maybe_rewrite_idents(&mut n.0),
458        }
459    }
460}
461
462/// Updates all `CatalogItemId`s from the keys of `ids` to the values of `ids` within `create_stmt`.
463pub fn create_stmt_replace_ids(
464    create_stmt: &mut Statement<Raw>,
465    ids: &BTreeMap<CatalogItemId, CatalogItemId>,
466) {
467    let mut id_replacer = CreateSqlIdReplacer { ids };
468    id_replacer.visit_statement_mut(create_stmt);
469}
470
471struct CreateSqlIdReplacer<'a> {
472    ids: &'a BTreeMap<CatalogItemId, CatalogItemId>,
473}
474
475impl<'ast> VisitMut<'ast, Raw> for CreateSqlIdReplacer<'_> {
476    fn visit_item_name_mut(
477        &mut self,
478        item_name: &'ast mut <mz_sql_parser::ast::Raw as AstInfo>::ItemName,
479    ) {
480        match item_name {
481            RawItemName::Id(id, _, _) => {
482                let old_id = match id.parse() {
483                    Ok(old_id) => old_id,
484                    Err(_) => panic!("invalid persisted global id {id}"),
485                };
486                if let Some(new_id) = self.ids.get(&old_id) {
487                    *id = new_id.to_string();
488                }
489            }
490            RawItemName::Name(_) => {}
491        }
492    }
493}