Skip to main content

mz_sql_parser/ast/
metadata.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use std::fmt::{self, Debug};
17use std::hash::Hash;
18
19use crate::ast::display::{self, AstDisplay, AstFormatter};
20use crate::ast::fold::{Fold, FoldNode};
21use crate::ast::{
22    Ident, Statement, UnresolvedDatabaseName, UnresolvedItemName, UnresolvedObjectName,
23    UnresolvedSchemaName, Version,
24};
25
26/// This represents the metadata that lives next to an AST, as we take it through
27/// various stages in the planning process.
28///
29/// Conceptually, when we first receive an AST from the parsing process, it only
30/// represents the syntax that the user input, and has no semantic information
31/// embedded in it. Later in this process, we want to be able to walk the tree
32/// and add additional information to it piecemeal, perhaps without going down
33/// the full planning pipeline. AstInfo represents various bits of information
34/// that get stored in the tree: for instance, at first, table names are only
35/// represented by the names the user input (in the `Raw` implementor of this
36/// trait), but later on, we replace them with both the name along with the ID
37/// that it gets resolved to.
38///
39/// Currently this process brings an `Ast<Raw>` to `Ast<Aug>`, and lives in
40/// sql/src/names.rs:resolve.
41pub trait AstInfo: Clone {
42    /// The type used for nested statements.
43    type NestedStatement: AstDisplay + Clone + Hash + Debug + Eq;
44    /// The type used for item references. Items are the subset of objects that are namespaced by a
45    /// database and schema.
46    type ItemName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
47    /// The type used to specify a column.
48    ///
49    /// n.b. when implementing visitors, you likely want to build the visitor to
50    /// visit [`crate::ast::ColumnName`] instead of visiting this struct
51    /// directly. The visitor on this should usually just return an error.
52    type ColumnReference: AstDisplay + Clone + Hash + Debug + Eq + Ord;
53    /// The type used for schema names.
54    type SchemaName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
55    /// The type used for database names.
56    type DatabaseName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
57    /// The type used for cluster names.
58    type ClusterName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
59    /// The type used for data types.
60    type DataType: AstDisplay + Clone + Hash + Debug + Eq + Ord;
61    /// The type stored next to CTEs for their assigned ID.
62    type CteId: Clone + Hash + Debug + Eq + Ord;
63    /// The type used for role references.
64    type RoleName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
65    /// The type used for network policy references.
66    type NetworkPolicyName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
67    /// They type used for any object names. Objects are the superset of all objects in Materialize.
68    type ObjectName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
69}
70
71#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone, Default)]
72pub struct Raw;
73
74impl AstInfo for Raw {
75    type NestedStatement = Statement<Raw>;
76    type ItemName = RawItemName;
77    type ColumnReference = Ident;
78    type SchemaName = UnresolvedSchemaName;
79    type DatabaseName = UnresolvedDatabaseName;
80    type ClusterName = RawClusterName;
81    type DataType = RawDataType;
82    type CteId = ();
83    type RoleName = Ident;
84    type NetworkPolicyName = RawNetworkPolicyName;
85    type ObjectName = UnresolvedObjectName;
86}
87
88#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone)]
89pub enum RawItemName {
90    Name(UnresolvedItemName),
91    Id(String, UnresolvedItemName, Option<Version>),
92}
93
94impl RawItemName {
95    pub fn name(&self) -> &UnresolvedItemName {
96        match self {
97            RawItemName::Name(name) => name,
98            RawItemName::Id(_, name, _) => name,
99        }
100    }
101
102    pub fn name_mut(&mut self) -> &mut UnresolvedItemName {
103        match self {
104            RawItemName::Name(name) => name,
105            RawItemName::Id(_, name, _) => name,
106        }
107    }
108}
109
110impl AstDisplay for RawItemName {
111    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
112        match self {
113            RawItemName::Name(o) => f.write_node(o),
114            RawItemName::Id(id, o, v) => {
115                // `id` is parsed from an identifier token (`[<id> AS …]`), so a
116                // crafted id with spaces/keywords must be quoted to reparse as a
117                // single identifier. A normal global id like `u1` is printed
118                // bare in every mode — including the stable mode `pg_get_viewdef`
119                // renders in, which would otherwise force-quote it.
120                f.write_str("[");
121                let id = Ident::new_unchecked(id.clone());
122                if id.can_be_printed_bare() {
123                    f.write_str(id.as_str());
124                } else {
125                    f.write_node(&id);
126                }
127                f.write_str(" AS ");
128                f.write_node(o);
129                if let Some(v) = v {
130                    f.write_str(" VERSION ");
131                    f.write_node(v);
132                }
133                f.write_str("]");
134            }
135        }
136    }
137}
138impl_display!(RawItemName);
139
140impl<T> FoldNode<Raw, T> for RawItemName
141where
142    T: AstInfo,
143{
144    type Folded = T::ItemName;
145
146    fn fold<F>(self, f: &mut F) -> Self::Folded
147    where
148        F: Fold<Raw, T>,
149    {
150        f.fold_item_name(self)
151    }
152}
153
154#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone)]
155pub enum RawClusterName {
156    Unresolved(Ident),
157    Resolved(String),
158}
159
160impl AstDisplay for RawClusterName {
161    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
162        match self {
163            RawClusterName::Unresolved(id) => f.write_node(id),
164            RawClusterName::Resolved(id) => {
165                f.write_str(format!("[{}]", id));
166            }
167        }
168    }
169}
170impl_display!(RawClusterName);
171
172impl<T> FoldNode<Raw, T> for RawClusterName
173where
174    T: AstInfo,
175{
176    type Folded = T::ClusterName;
177
178    fn fold<F>(self, f: &mut F) -> Self::Folded
179    where
180        F: Fold<Raw, T>,
181    {
182        f.fold_cluster_name(self)
183    }
184}
185
186#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone)]
187pub enum RawNetworkPolicyName {
188    Unresolved(Ident),
189    Resolved(String),
190}
191
192impl AstDisplay for RawNetworkPolicyName {
193    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
194        match self {
195            RawNetworkPolicyName::Unresolved(id) => f.write_node(id),
196            RawNetworkPolicyName::Resolved(id) => {
197                f.write_str(format!("[{}]", id));
198            }
199        }
200    }
201}
202impl_display!(RawNetworkPolicyName);
203
204impl<T> FoldNode<Raw, T> for RawNetworkPolicyName
205where
206    T: AstInfo,
207{
208    type Folded = T::NetworkPolicyName;
209
210    fn fold<F>(self, f: &mut F) -> Self::Folded
211    where
212        F: Fold<Raw, T>,
213    {
214        f.fold_network_policy_name(self)
215    }
216}
217
218/// SQL data types
219#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
220pub enum RawDataType {
221    /// Array
222    Array(Box<RawDataType>),
223    /// List
224    List(Box<RawDataType>),
225    /// Map
226    Map {
227        key_type: Box<RawDataType>,
228        value_type: Box<RawDataType>,
229    },
230    /// Types who don't embed other types, e.g. INT
231    Other {
232        name: RawItemName,
233        /// Typ modifiers appended to the type name, e.g. `numeric(38,0)`.
234        typ_mod: Vec<i64>,
235    },
236}
237
238impl AstDisplay for RawDataType {
239    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
240        match self {
241            RawDataType::Array(ty) => {
242                f.write_node(&ty);
243                f.write_str("[]");
244            }
245            RawDataType::List(ty) => {
246                f.write_node(&ty);
247                f.write_str(" list");
248            }
249            RawDataType::Map {
250                key_type,
251                value_type,
252            } => {
253                f.write_str("map[");
254                f.write_node(&key_type);
255                f.write_str("=>");
256                f.write_node(&value_type);
257                f.write_str("]");
258            }
259            RawDataType::Other { name, typ_mod } => {
260                // If the first component of the type name clashes with a
261                // keyword that `parse_data_type` either dispatches into a
262                // special grammar (`map[...]`) or canonicalizes to a
263                // different spelling (`string` → `text`, `bigint` → `int8`,
264                // …), an unquoted emit would reparse to a different AST —
265                // either the canonicalizing branch fires and replaces the
266                // name outright, or (for a qualified name) the dispatch
267                // consumes the first part before the rest reaches the
268                // type-name parser. Force the always-quoted stable form in
269                // those cases. Keywords whose canonicalized name matches
270                // the keyword text itself (`bpchar`, `varchar`, `time`,
271                // `timestamp`, `timestamptz`) round-trip unquoted via the
272                // keyword path, so they're omitted here.
273                let first_ident_clashes = name
274                    .name()
275                    .0
276                    .first()
277                    .and_then(|id| id.as_keyword())
278                    .map(data_type_keyword_needs_quoting)
279                    .unwrap_or(false);
280                if first_ident_clashes {
281                    f.write_str(&name.to_ast_string_stable());
282                } else {
283                    f.write_node(name);
284                }
285                if typ_mod.len() > 0 {
286                    f.write_str("(");
287                    f.write_node(&display::comma_separated(typ_mod));
288                    f.write_str(")");
289                }
290            }
291        }
292    }
293}
294impl_display!(RawDataType);
295
296/// Reports whether `kw`, as the first component of a data type name, would be
297/// reparsed differently by `Parser::parse_data_type` — either
298/// dispatched into a special grammar (`map[...]`) or canonicalized to a
299/// different spelling (`string` → `text`, `bigint` → `int8`, …). Such names
300/// must be emitted always-quoted to round-trip.
301///
302/// Keywords that `parse_data_type` parses back to themselves verbatim
303/// (`bpchar`, `varchar`, `time`, `timestamp`, `timestamptz`) round-trip
304/// unquoted and are intentionally excluded. Keep this in sync with the keyword
305/// arms of `parse_data_type`.
306fn data_type_keyword_needs_quoting(kw: mz_sql_lexer::keywords::Keyword) -> bool {
307    use mz_sql_lexer::keywords::*;
308    matches!(
309        kw,
310        MAP | STRING
311            | BIGINT
312            | SMALLINT
313            | DEC
314            | DECIMAL
315            | DOUBLE
316            | FLOAT
317            | INT
318            | INTEGER
319            | REAL
320            | BOOLEAN
321            | BYTES
322            | JSON
323            | CHAR
324            | CHARACTER
325    )
326}
327
328impl<T> FoldNode<Raw, T> for RawDataType
329where
330    T: AstInfo,
331{
332    type Folded = T::DataType;
333
334    fn fold<F>(self, f: &mut F) -> Self::Folded
335    where
336        F: Fold<Raw, T>,
337    {
338        f.fold_data_type(self)
339    }
340}