Skip to main content

mz_sql_lexer/
keywords.rs

1// Copyright 2018 sqlparser-rs contributors. All rights reserved.
2// Copyright Materialize, Inc. and contributors. All rights reserved.
3//
4// This file is derived from the sqlparser-rs project, available at
5// https://github.com/andygrove/sqlparser-rs. It was incorporated
6// directly into Materialize on December 21, 2019.
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License in the LICENSE file at the
11// root of this repository, or online at
12//
13//     http://www.apache.org/licenses/LICENSE-2.0
14//
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20
21use std::fmt;
22use std::str::FromStr;
23
24use uncased::UncasedStr;
25
26// The `Keyword` type and the keyword constants are automatically generated from
27// the list in keywords.txt by the crate's build script.
28//
29// We go to the trouble of code generation primarily to create a "perfect hash
30// function" at compile time via the phf crate, which enables very fast,
31// case-insensitive keyword parsing. From there it's easy to generate a few
32// more convenience functions and accessors.
33//
34// If the only keywords were `Insert` and `Select`, we'd generate the following
35// code:
36//
37//     pub enum Keyword {
38//         Insert,
39//         Select,
40//     }
41//
42//     pub const INSERT: Keyword = Keyword::Insert;
43//     pub const SELECT: Keyword = Keyword::Select;
44//
45//     impl Keyword {
46//         pub fn as_str(&self) -> &'static str {
47//             match self {
48//                 Keyword::Insert => "INSERT",
49//                 Keyword::Select => "SELECT",
50//             }
51//         }
52//     }
53//
54//     static KEYWORDS: phf::Map<&'static UncasedStr, Keyword> = { /* ... */ };
55//
56include!(concat!(env!("OUT_DIR"), "/keywords.rs"));
57
58impl Keyword {
59    /// Reports whether this keyword requires quoting when used as an
60    /// identifier in any context.
61    ///
62    /// The only exception to the rule is when the keyword follows `AS` in a
63    /// column or table alias.
64    pub fn is_always_reserved(self) -> bool {
65        matches!(
66            self,
67            // Keywords that can appear at the top-level of a SELECT
68            // statement.
69            WITH | SELECT | FROM | WHERE | GROUP | HAVING |
70            QUALIFY | WINDOW | ORDER | LIMIT | OFFSET | FETCH |
71            OPTIONS | RETURNING |
72            // Set operations.
73            UNION | EXCEPT | INTERSECT
74        )
75    }
76
77    /// Reports whether this keyword begins a query body (`SELECT`, `VALUES`,
78    /// `TABLE …`, etc.).
79    ///
80    /// When `AstDisplay` parenthesizes an expression (e.g. to disambiguate a
81    /// field access like `(expr).f`) and that expression's leading token is a
82    /// bare identifier with one of these names, the re-parser treats the
83    /// parentheses as a subquery and the identifier as its leading clause
84    /// (e.g. `(table & x)` parses as a `TABLE`-query). Such identifiers must be
85    /// quoted to round-trip. `SELECT`/`WITH` are already `is_always_reserved`.
86    pub fn begins_query_body(self) -> bool {
87        matches!(self, WITH | SELECT | VALUES | SHOW | TABLE)
88    }
89
90    /// Reports whether this keyword requires quoting when used in scalar expressions.
91    ///
92    /// These are the keywords `Parser::parse_prefix` won't parse as an identifier.
93    /// (Note that for some keywords `parse_prefix` checks whether they are followed by an opening
94    /// parenthesis before treating them as keywords. These keywords do not need to be marked as
95    /// reserved here.)
96    ///
97    /// This refers to the PostgreSQL notion of "reserved" keywords,
98    /// which generally refers to built in tables, functions, and
99    /// constructs that cannot be used as identifiers without quoting.
100    /// See <https://www.postgresql.org/docs/current/sql-keywords-appendix.html>
101    /// for more details.
102    pub fn is_reserved_in_scalar_expression(self) -> bool {
103        matches!(self, TRUE | FALSE | NULL | ARRAY | CASE | CAST | NOT) || self.is_always_reserved()
104    }
105
106    /// Reports whether this keyword requires quoting when used as a table
107    /// alias.
108    ///
109    /// Note that this rule is only applies when the table alias is "bare";
110    /// i.e., when the table alias is not preceded by `AS`.
111    ///
112    /// Ensures that `FROM <table_name> <table_alias>` can be parsed
113    /// unambiguously.
114    pub fn is_reserved_in_table_alias(self) -> bool {
115        matches!(
116            self,
117            // These keywords are ambiguous when used as a table alias, as they
118            // conflict with the syntax for joins.
119            ON | JOIN | INNER | CROSS | FULL | LEFT | RIGHT | NATURAL | USING |
120            // Needed for UPDATE.
121            SET |
122            // `OUTER` is not strictly ambiguous, but it prevents `a OUTER JOIN
123            // b` from parsing as `a AS outer JOIN b`, instead producing a nice
124            // syntax error.
125            OUTER
126        ) || self.is_always_reserved()
127    }
128
129    /// Reports whether this keyword requires quoting when used as a column
130    /// alias.
131    ///
132    /// Note that this rule is only applies when the column alias is "bare";
133    /// i.e., when the column alias is not preceded by `AS`.
134    ///
135    /// Ensures that `SELECT <column_name> <column_alias>` can be parsed
136    /// unambiguously.
137    pub fn is_reserved_in_column_alias(self) -> bool {
138        matches!(
139            self,
140            // These timelike keywords conflict with interval timeframe
141            // suffixes. They are not strictly ambiguous, but marking them
142            // reserved prevents e.g. `SELECT pg_catalog.interval '1' year` from
143            // parsing as `SELECT pg_catalog.interval '1' AS YEAR`.
144            YEAR | MONTH | DAY | HOUR | MINUTE | SECOND
145        ) || self.is_always_reserved()
146    }
147
148    /// Reports whether a keyword is considered reserved in any context:
149    /// either in table aliases, column aliases, or in all contexts.
150    pub fn is_sometimes_reserved(self) -> bool {
151        self.is_always_reserved()
152            || self.is_reserved_in_table_alias()
153            || self.is_reserved_in_column_alias()
154            || self.is_reserved_in_scalar_expression()
155    }
156
157    /// Reports whether a keyword has a special parser-dispatch form (e.g.
158    /// `POSITION(expr IN expr)`, `MAP[K => V]`) such that an unquoted
159    /// occurrence in expression position triggers the special grammar
160    /// rather than parsing as a plain identifier. The parser itself
161    /// disambiguates by looking at the next token, but `AstDisplay` has no
162    /// such context — so when emitting an `Ident` whose name matches one
163    /// of these, we force quoting to keep the round trip stable.
164    pub fn is_context_sensitive_keyword(self) -> bool {
165        matches!(
166            self,
167            ALL | ANY
168                | COALESCE
169                | EXISTS
170                | EXTRACT
171                | GREATEST
172                | LEAST
173                | MAP
174                | NORMALIZE
175                | NULLIF
176                | POSITION
177                | ROW
178                | SOME
179                | SUBSTRING
180                | TRIM
181        )
182    }
183}
184
185impl FromStr for Keyword {
186    type Err = ();
187
188    fn from_str(s: &str) -> Result<Keyword, ()> {
189        match KEYWORDS.get(UncasedStr::new(s)) {
190            Some(kw) => Ok(*kw),
191            None => Err(()),
192        }
193    }
194}
195
196impl fmt::Display for Keyword {
197    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
198        f.write_str(self.as_str())
199    }
200}