Skip to main content

mz_sql_parser/ast/defs/
name.rs

1// Copyright 2018 sqlparser-rs contributors. All rights reserved.
2// Copyright Materialize, Inc. and contributors. All rights reserved.
3//
4// This file is derived from the sqlparser-rs project, available at
5// https://github.com/andygrove/sqlparser-rs. It was incorporated
6// directly into Materialize on December 21, 2019.
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License in the LICENSE file at the
11// root of this repository, or online at
12//
13//     http://www.apache.org/licenses/LICENSE-2.0
14//
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20
21use mz_ore::str::StrExt;
22use mz_sql_lexer::keywords::{ALL, ANY, AS, DISTINCT, INTO, Keyword, LIST, PREPARE, SOME, WHEN};
23use mz_sql_lexer::lexer::{IdentString, MAX_IDENTIFIER_LENGTH};
24use serde::{Deserialize, Serialize};
25use std::fmt;
26
27use crate::ast::display::{self, AstDisplay, AstFormatter};
28use crate::ast::{AstInfo, QualifiedReplica};
29
30/// An identifier.
31#[derive(
32    Debug,
33    Clone,
34    PartialEq,
35    Eq,
36    Hash,
37    PartialOrd,
38    Ord,
39    Serialize,
40    Deserialize
41)]
42pub struct Ident(pub(crate) String);
43
44impl Ident {
45    /// Maximum length of an identifier in Materialize.
46    pub const MAX_LENGTH: usize = MAX_IDENTIFIER_LENGTH;
47
48    /// Create a new [`Ident`] with the given value, checking our invariants.
49    ///
50    /// # Examples
51    ///
52    /// ```
53    /// use mz_sql_parser::ast::Ident;
54    ///
55    /// let id = Ident::new("hello_world").unwrap();
56    /// assert_eq!(id.as_str(), "hello_world");
57    ///
58    /// let too_long = "I am a very long identifier that is more than 255 bytes long which is the max length for idents.\
59    /// 😊😁😅😂😬🍻😮‍💨😮🗽🛰️🌈😊😁😅😂😬🍻😮‍💨😮🗽🛰️🌈😊😁😅😂😬🍻😮‍💨😮🗽🛰️🌈";
60    /// assert_eq!(too_long.len(), 258);
61    ///
62    /// let too_long_id = Ident::new(too_long);
63    /// assert!(too_long_id.is_err());
64    ///
65    /// let invalid_name_dot = Ident::new(".");
66    /// assert!(invalid_name_dot.is_err());
67    ///
68    /// let invalid_name_dot_dot = Ident::new("..");
69    /// assert!(invalid_name_dot_dot.is_err());
70    /// ```
71    ///
72    pub fn new<S>(s: S) -> Result<Self, IdentError>
73    where
74        S: TryInto<IdentString>,
75        <S as TryInto<IdentString>>::Error: fmt::Display,
76    {
77        let s = s
78            .try_into()
79            .map_err(|e| IdentError::TooLong(e.to_string()))?;
80
81        if &*s == "." || &*s == ".." {
82            return Err(IdentError::Invalid(s.into_inner()));
83        }
84
85        Ok(Ident(s.into_inner()))
86    }
87
88    /// Create a new [`Ident`] modifying the given value as necessary to meet our invariants.
89    ///
90    /// # Examples
91    ///
92    /// ```
93    /// use mz_sql_parser::ast::Ident;
94    ///
95    /// let too_long = "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
96    /// 🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵\
97    /// 🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴";
98    ///
99    /// let id = Ident::new_lossy(too_long);
100    ///
101    /// // `new_lossy` will truncate the provided string, since it's too long. Note the missing
102    /// // `🔴` characters.
103    /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
104    /// ```
105    pub fn new_lossy<S: Into<String>>(value: S) -> Self {
106        let s: String = value.into();
107        if s.len() <= Self::MAX_LENGTH {
108            return Ident(s);
109        }
110
111        let mut byte_length = 0;
112        let s_truncated = s
113            .chars()
114            .take_while(|c| {
115                byte_length += c.len_utf8();
116                byte_length <= Self::MAX_LENGTH
117            })
118            .collect();
119
120        Ident(s_truncated)
121    }
122
123    /// Create a new [`Ident`] _without checking any of our invariants_.
124    ///
125    /// NOTE: Generally you __should not use this function__! If you're trying to create an
126    /// [`Ident`] from a `&'static str` you know is valid, use the [`ident!`] macro. For all other
127    /// use cases, see [`Ident::new`] which correctly checks our invariants.
128    ///
129    /// [`ident!`]: [`mz_sql_parser::ident`]
130    pub fn new_unchecked<S: Into<String>>(value: S) -> Self {
131        let s = value.into();
132        mz_ore::soft_assert_no_log!(s.len() <= Self::MAX_LENGTH);
133
134        Ident(s)
135    }
136
137    /// Generate a valid [`Ident`] with the provided `prefix` and `suffix`.
138    ///
139    /// # Examples
140    ///
141    /// ```
142    /// use mz_sql_parser::ast::{Ident, IdentError};
143    ///
144    /// let good_id =
145    ///   Ident::try_generate_name("hello", "_world", |_| Ok::<_, IdentError>(true)).unwrap();
146    /// assert_eq!(good_id.as_str(), "hello_world");
147    ///
148    /// // Return invalid once.
149    /// let mut attempts = 0;
150    /// let one_failure = Ident::try_generate_name("hello", "_world", |_candidate| {
151    ///     if attempts == 0 {
152    ///         attempts += 1;
153    ///         Ok::<_, IdentError>(false)
154    ///     } else {
155    ///         Ok(true)
156    ///     }
157    /// })
158    /// .unwrap();
159    ///
160    /// // We "hello_world" was invalid, so we appended "_1".
161    /// assert_eq!(one_failure.as_str(), "hello_world_1");
162    /// ```
163    pub fn try_generate_name<P, S, F, E>(prefix: P, suffix: S, mut is_valid: F) -> Result<Self, E>
164    where
165        P: Into<String>,
166        S: Into<String>,
167        E: From<IdentError>,
168        F: FnMut(&Ident) -> Result<bool, E>,
169    {
170        const MAX_ATTEMPTS: usize = 1000;
171
172        let prefix: String = prefix.into();
173        let suffix: String = suffix.into();
174
175        // First just append the prefix and suffix.
176        let mut candidate = Ident(prefix.clone());
177        candidate.append_lossy(suffix.clone());
178        if is_valid(&candidate)? {
179            return Ok(candidate);
180        }
181
182        // Otherwise, append a number to the back.
183        for i in 1..MAX_ATTEMPTS {
184            let mut candidate = Ident(prefix.clone());
185            candidate.append_lossy(format!("{suffix}_{i}"));
186
187            if is_valid(&candidate)? {
188                return Ok(candidate);
189            }
190        }
191
192        // Couldn't find any valid name!
193        Err(E::from(IdentError::FailedToGenerate {
194            prefix,
195            suffix,
196            attempts: MAX_ATTEMPTS,
197        }))
198    }
199
200    /// Append the provided `suffix`, truncating `self` as necessary to satisfy our invariants.
201    ///
202    /// Note: We soft-assert that the provided `suffix` is not too long, if it is, we'll
203    /// truncate it.
204    ///
205    /// # Examples
206    ///
207    /// ```
208    /// use mz_sql_parser::{
209    ///     ident,
210    ///     ast::Ident,
211    /// };
212    ///
213    /// let mut id = ident!("🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
214    /// id.append_lossy("🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
215    ///
216    /// // We truncated the original ident, removing all '🔵' chars.
217    /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
218    /// ```
219    ///
220    /// ### Too long suffix
221    /// If the provided suffix is too long, we'll also truncate that.
222    ///
223    /// ```
224    /// # mz_ore::assert::SOFT_ASSERTIONS.store(false, std::sync::atomic::Ordering::Relaxed);
225    /// use mz_sql_parser::{
226    ///     ident,
227    ///     ast::Ident,
228    /// };
229    ///
230    /// let mut stem = ident!("hello___world");
231    ///
232    /// let too_long_suffix = "\
233    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
234    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
235    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
236    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵\
237    /// ";
238    ///
239    /// stem.append_lossy(too_long_suffix);
240    ///
241    /// // Notice the "hello___world" stem got truncated, as did the "🔵🔵" characters from the suffix.
242    /// let result = "hello___wor\
243    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
244    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
245    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
246    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
247    /// ";
248    /// assert_eq!(stem.as_str(), result);
249    /// ```
250    pub fn append_lossy<S: Into<String>>(&mut self, suffix: S) {
251        // Make sure our suffix at least leaves a bit of room for the original ident.
252        const MAX_SUFFIX_LENGTH: usize = Ident::MAX_LENGTH - 8;
253
254        let mut suffix: String = suffix.into();
255        mz_ore::soft_assert_or_log!(suffix.len() <= MAX_SUFFIX_LENGTH, "suffix too long");
256
257        // Truncate the suffix as necessary.
258        if suffix.len() > MAX_SUFFIX_LENGTH {
259            let mut byte_length = 0;
260            suffix = suffix
261                .chars()
262                .take_while(|c| {
263                    byte_length += c.len_utf8();
264                    byte_length <= MAX_SUFFIX_LENGTH
265                })
266                .collect();
267        }
268
269        // Truncate ourselves as necessary.
270        let available_length = Ident::MAX_LENGTH - suffix.len();
271        if self.0.len() > available_length {
272            let mut byte_length = 0;
273            self.0 = self
274                .0
275                .chars()
276                .take_while(|c| {
277                    byte_length += c.len_utf8();
278                    byte_length <= available_length
279                })
280                .collect();
281        }
282
283        // Append the suffix.
284        self.0.push_str(&suffix);
285    }
286
287    /// Reports whether the identifier matches the regex `[a-z_][a-z0-9_]*`,
288    /// i.e. it is composed only of characters that never require quoting.
289    ///
290    /// This is the character-level half of [`Ident::can_be_printed_bare`]. It
291    /// deliberately does *not* consider keywords: whether a keyword-named
292    /// identifier needs quoting depends on the surrounding grammar (a
293    /// reparsing concern), not on its characters. Contexts that only need
294    /// legible, unambiguous output — rather than a SQL round-trip — should use
295    /// this instead (see `HumanizedExplain::humanize_ident`).
296    pub fn has_only_bare_chars(&self) -> bool {
297        let mut chars = self.0.chars();
298        chars
299            .next()
300            .map(|ch| matches!(ch, 'a'..='z' | '_'))
301            .unwrap_or(false)
302            && chars.all(|ch| matches!(ch, 'a'..='z' | '0'..='9' | '_'))
303    }
304
305    /// An identifier can be printed in bare mode if
306    ///  * it matches the regex `[a-z_][a-z0-9_]*` and
307    ///  * it is not a "reserved keyword."
308    pub fn can_be_printed_bare(&self) -> bool {
309        self.has_only_bare_chars()
310            && !self
311                .as_keyword()
312                .map(|kw| {
313                    kw.is_sometimes_reserved()
314                        || kw.begins_query_body()
315                        // `AS` at the start of a SELECT item is consumed as the
316                        // `AS OF` timestamp keyword (an empty projection), so a
317                        // bare `as` identifier/function name fails to reparse.
318                        || kw == AS
319                        // `ANY`/`ALL`/`SOME` after a comparison operator start a
320                        // quantified-comparison (`x op ANY (...)`), so a bare such
321                        // identifier — e.g. `0 # some` — reparses as the start of a
322                        // quantifier rather than an identifier.
323                        || matches!(kw, ANY | ALL | SOME)
324                        // `ALL`/`DISTINCT` right after `SELECT` are consumed as the
325                        // projection quantifier, so a bare `"all"` / `"distinct"`
326                        // column reference reparses to a quantifier with an empty
327                        // projection instead of an identifier. (`ALL` is already
328                        // covered above; quoting these keeps display-only — unlike
329                        // marking them always-reserved, which also rejects `WHERE
330                        // distinct = 1` at parse time.)
331                        || kw == DISTINCT
332                        // `LIST` followed by `[` re-lexes as a `LIST[...]` literal
333                        // (`list[1]` is a valid one-element list), so a bare `list`
334                        // identifier that gets subscripted — `"list"[1]` — would
335                        // reparse as a list literal instead of a subscript. (`ARRAY`
336                        // is reserved-in-scalar-expression and so already quoted;
337                        // `MAP[...]` requires `=>`, so `map[1]` is unambiguously a
338                        // subscript.)
339                        || kw == LIST
340                        // `DEALLOCATE [PREPARE] <name>` accepts an optional
341                        // `PREPARE` keyword before the name, so a bare `prepare`
342                        // name is consumed as that keyword on reparse, leaving no
343                        // name (`DEALLOCATE prepare` -> `DEALLOCATE` + the optional
344                        // keyword + a missing name).
345                        || kw == PREPARE
346                        // `CASE` treats a leading `WHEN` as the start of the
347                        // first arm (a searched `CASE` with no operand), so a
348                        // bare `when` identifier used as the `CASE` operand —
349                        // `CASE when.a WHEN ...` — reparses as `CASE WHEN .a ...`
350                        // ("expected an expression, found dot"). Quoting it keeps
351                        // the operand an identifier.
352                        || kw == WHEN
353                        // `COPY [INTO] <table> FROM …` accepts an optional `INTO`
354                        // keyword before the relation name, so a bare `into`
355                        // relation is consumed as that keyword on reparse
356                        // (`COPY into FROM x` -> `COPY INTO <name=from> …`, which
357                        // then fails expecting the FROM/TO direction).
358                        || kw == INTO
359                })
360                .unwrap_or(false)
361    }
362
363    pub fn as_str(&self) -> &str {
364        &self.0
365    }
366
367    pub fn as_keyword(&self) -> Option<Keyword> {
368        self.0.parse().ok()
369    }
370
371    pub fn into_string(self) -> String {
372        self.0
373    }
374}
375
376/// More-or-less a direct translation of the Postgres function for doing the same thing:
377///
378///   <https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/ruleutils.c#L10730-L10812>
379///
380/// Quotation is forced when printing in Stable mode.
381impl AstDisplay for Ident {
382    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
383        if self.can_be_printed_bare() && !f.stable() {
384            f.write_str(&self.0);
385        } else {
386            f.write_str("\"");
387            for ch in self.0.chars() {
388                // Double up on double-quotes.
389                if ch == '"' {
390                    f.write_str("\"");
391                }
392                f.write_str(ch);
393            }
394            f.write_str("\"");
395        }
396    }
397}
398impl_display!(Ident);
399
400#[derive(Clone, Debug, thiserror::Error)]
401pub enum IdentError {
402    #[error("identifier too long (len: {}, max: {}, value: {})", .0.len(), Ident::MAX_LENGTH, .0.quoted())]
403    TooLong(String),
404    #[error(
405        "failed to generate identifier with prefix '{prefix}' and suffix '{suffix}' after {attempts} attempts"
406    )]
407    FailedToGenerate {
408        prefix: String,
409        suffix: String,
410        attempts: usize,
411    },
412
413    #[error("invalid identifier: {}", .0.quoted())]
414    Invalid(String),
415}
416
417/// A name of a table, view, custom type, etc. that lives in a schema, possibly multi-part, i.e. db.schema.obj
418#[derive(
419    Debug,
420    Clone,
421    PartialEq,
422    Eq,
423    Hash,
424    PartialOrd,
425    Ord,
426    Serialize,
427    Deserialize
428)]
429pub struct UnresolvedItemName(pub Vec<Ident>);
430
431pub enum CatalogName {
432    ItemName(Vec<Ident>),
433    FuncName(Vec<Ident>),
434}
435
436impl UnresolvedItemName {
437    /// Creates an `ItemName` with a single [`Ident`], i.e. it appears as
438    /// "unqualified".
439    pub fn unqualified(ident: Ident) -> UnresolvedItemName {
440        UnresolvedItemName(vec![ident])
441    }
442
443    /// Creates an `ItemName` with an [`Ident`] for each element of `n`.
444    ///
445    /// Panics if passed an in ineligible `&[&str]` whose length is 0 or greater
446    /// than 3.
447    pub fn qualified(n: &[Ident]) -> UnresolvedItemName {
448        assert!(n.len() <= 3 && n.len() > 0);
449        UnresolvedItemName(n.iter().cloned().collect::<Vec<_>>())
450    }
451}
452
453impl AstDisplay for UnresolvedItemName {
454    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
455        display::separated(&self.0, ".").fmt(f);
456    }
457}
458impl_display!(UnresolvedItemName);
459
460impl AstDisplay for &UnresolvedItemName {
461    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
462        display::separated(&self.0, ".").fmt(f);
463    }
464}
465
466/// A name of a schema
467#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
468pub struct UnresolvedSchemaName(pub Vec<Ident>);
469
470impl AstDisplay for UnresolvedSchemaName {
471    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
472        display::separated(&self.0, ".").fmt(f);
473    }
474}
475impl_display!(UnresolvedSchemaName);
476
477/// A name of a database
478#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
479pub struct UnresolvedDatabaseName(pub Ident);
480
481impl AstDisplay for UnresolvedDatabaseName {
482    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
483        f.write_node(&self.0);
484    }
485}
486impl_display!(UnresolvedDatabaseName);
487
488// The name of an item not yet created during name resolution, which should be
489// resolveable as an item name later.
490#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
491pub enum DeferredItemName<T: AstInfo> {
492    Named(T::ItemName),
493    Deferred(UnresolvedItemName),
494}
495
496impl<T: AstInfo> AstDisplay for DeferredItemName<T> {
497    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
498        match self {
499            DeferredItemName::Named(o) => f.write_node(o),
500            DeferredItemName::Deferred(o) => f.write_node(o),
501        }
502    }
503}
504impl_display_t!(DeferredItemName);
505
506#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
507pub enum UnresolvedObjectName {
508    Cluster(Ident),
509    ClusterReplica(QualifiedReplica),
510    Database(UnresolvedDatabaseName),
511    Schema(UnresolvedSchemaName),
512    Role(Ident),
513    Item(UnresolvedItemName),
514    NetworkPolicy(Ident),
515}
516
517impl AstDisplay for UnresolvedObjectName {
518    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
519        match self {
520            UnresolvedObjectName::Cluster(n) => f.write_node(n),
521            UnresolvedObjectName::ClusterReplica(n) => f.write_node(n),
522            UnresolvedObjectName::Database(n) => f.write_node(n),
523            UnresolvedObjectName::Schema(n) => f.write_node(n),
524            UnresolvedObjectName::Role(n) => f.write_node(n),
525            UnresolvedObjectName::Item(n) => f.write_node(n),
526            UnresolvedObjectName::NetworkPolicy(n) => f.write_node(n),
527        }
528    }
529}
530impl_display!(UnresolvedObjectName);