mz_sql_parser/ast/defs/name.rs
1// Copyright 2018 sqlparser-rs contributors. All rights reserved.
2// Copyright Materialize, Inc. and contributors. All rights reserved.
3//
4// This file is derived from the sqlparser-rs project, available at
5// https://github.com/andygrove/sqlparser-rs. It was incorporated
6// directly into Materialize on December 21, 2019.
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License in the LICENSE file at the
11// root of this repository, or online at
12//
13// http://www.apache.org/licenses/LICENSE-2.0
14//
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20
21use mz_ore::str::StrExt;
22use mz_sql_lexer::keywords::{ALL, ANY, AS, DISTINCT, INTO, Keyword, LIST, PREPARE, SOME, WHEN};
23use mz_sql_lexer::lexer::{IdentString, MAX_IDENTIFIER_LENGTH};
24use serde::{Deserialize, Serialize};
25use std::fmt;
26
27use crate::ast::display::{self, AstDisplay, AstFormatter};
28use crate::ast::{AstInfo, QualifiedReplica};
29
30/// An identifier.
31#[derive(
32 Debug,
33 Clone,
34 PartialEq,
35 Eq,
36 Hash,
37 PartialOrd,
38 Ord,
39 Serialize,
40 Deserialize
41)]
42pub struct Ident(pub(crate) String);
43
44impl Ident {
45 /// Maximum length of an identifier in Materialize.
46 pub const MAX_LENGTH: usize = MAX_IDENTIFIER_LENGTH;
47
48 /// Create a new [`Ident`] with the given value, checking our invariants.
49 ///
50 /// # Examples
51 ///
52 /// ```
53 /// use mz_sql_parser::ast::Ident;
54 ///
55 /// let id = Ident::new("hello_world").unwrap();
56 /// assert_eq!(id.as_str(), "hello_world");
57 ///
58 /// let too_long = "I am a very long identifier that is more than 255 bytes long which is the max length for idents.\
59 /// 😊😁😅😂😬🍻😮💨😮🗽🛰️🌈😊😁😅😂😬🍻😮💨😮🗽🛰️🌈😊😁😅😂😬🍻😮💨😮🗽🛰️🌈";
60 /// assert_eq!(too_long.len(), 258);
61 ///
62 /// let too_long_id = Ident::new(too_long);
63 /// assert!(too_long_id.is_err());
64 ///
65 /// let invalid_name_dot = Ident::new(".");
66 /// assert!(invalid_name_dot.is_err());
67 ///
68 /// let invalid_name_dot_dot = Ident::new("..");
69 /// assert!(invalid_name_dot_dot.is_err());
70 /// ```
71 ///
72 pub fn new<S>(s: S) -> Result<Self, IdentError>
73 where
74 S: TryInto<IdentString>,
75 <S as TryInto<IdentString>>::Error: fmt::Display,
76 {
77 let s = s
78 .try_into()
79 .map_err(|e| IdentError::TooLong(e.to_string()))?;
80
81 if &*s == "." || &*s == ".." {
82 return Err(IdentError::Invalid(s.into_inner()));
83 }
84
85 Ok(Ident(s.into_inner()))
86 }
87
88 /// Create a new [`Ident`] modifying the given value as necessary to meet our invariants.
89 ///
90 /// # Examples
91 ///
92 /// ```
93 /// use mz_sql_parser::ast::Ident;
94 ///
95 /// let too_long = "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
96 /// 🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵\
97 /// 🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴";
98 ///
99 /// let id = Ident::new_lossy(too_long);
100 ///
101 /// // `new_lossy` will truncate the provided string, since it's too long. Note the missing
102 /// // `🔴` characters.
103 /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
104 /// ```
105 pub fn new_lossy<S: Into<String>>(value: S) -> Self {
106 let s: String = value.into();
107 if s.len() <= Self::MAX_LENGTH {
108 return Ident(s);
109 }
110
111 let mut byte_length = 0;
112 let s_truncated = s
113 .chars()
114 .take_while(|c| {
115 byte_length += c.len_utf8();
116 byte_length <= Self::MAX_LENGTH
117 })
118 .collect();
119
120 Ident(s_truncated)
121 }
122
123 /// Create a new [`Ident`] _without checking any of our invariants_.
124 ///
125 /// NOTE: Generally you __should not use this function__! If you're trying to create an
126 /// [`Ident`] from a `&'static str` you know is valid, use the [`ident!`] macro. For all other
127 /// use cases, see [`Ident::new`] which correctly checks our invariants.
128 ///
129 /// [`ident!`]: [`mz_sql_parser::ident`]
130 pub fn new_unchecked<S: Into<String>>(value: S) -> Self {
131 let s = value.into();
132 mz_ore::soft_assert_no_log!(s.len() <= Self::MAX_LENGTH);
133
134 Ident(s)
135 }
136
137 /// Generate a valid [`Ident`] with the provided `prefix` and `suffix`.
138 ///
139 /// # Examples
140 ///
141 /// ```
142 /// use mz_sql_parser::ast::{Ident, IdentError};
143 ///
144 /// let good_id =
145 /// Ident::try_generate_name("hello", "_world", |_| Ok::<_, IdentError>(true)).unwrap();
146 /// assert_eq!(good_id.as_str(), "hello_world");
147 ///
148 /// // Return invalid once.
149 /// let mut attempts = 0;
150 /// let one_failure = Ident::try_generate_name("hello", "_world", |_candidate| {
151 /// if attempts == 0 {
152 /// attempts += 1;
153 /// Ok::<_, IdentError>(false)
154 /// } else {
155 /// Ok(true)
156 /// }
157 /// })
158 /// .unwrap();
159 ///
160 /// // We "hello_world" was invalid, so we appended "_1".
161 /// assert_eq!(one_failure.as_str(), "hello_world_1");
162 /// ```
163 pub fn try_generate_name<P, S, F, E>(prefix: P, suffix: S, mut is_valid: F) -> Result<Self, E>
164 where
165 P: Into<String>,
166 S: Into<String>,
167 E: From<IdentError>,
168 F: FnMut(&Ident) -> Result<bool, E>,
169 {
170 const MAX_ATTEMPTS: usize = 1000;
171
172 let prefix: String = prefix.into();
173 let suffix: String = suffix.into();
174
175 // First just append the prefix and suffix.
176 let mut candidate = Ident(prefix.clone());
177 candidate.append_lossy(suffix.clone());
178 if is_valid(&candidate)? {
179 return Ok(candidate);
180 }
181
182 // Otherwise, append a number to the back.
183 for i in 1..MAX_ATTEMPTS {
184 let mut candidate = Ident(prefix.clone());
185 candidate.append_lossy(format!("{suffix}_{i}"));
186
187 if is_valid(&candidate)? {
188 return Ok(candidate);
189 }
190 }
191
192 // Couldn't find any valid name!
193 Err(E::from(IdentError::FailedToGenerate {
194 prefix,
195 suffix,
196 attempts: MAX_ATTEMPTS,
197 }))
198 }
199
200 /// Append the provided `suffix`, truncating `self` as necessary to satisfy our invariants.
201 ///
202 /// Note: We soft-assert that the provided `suffix` is not too long, if it is, we'll
203 /// truncate it.
204 ///
205 /// # Examples
206 ///
207 /// ```
208 /// use mz_sql_parser::{
209 /// ident,
210 /// ast::Ident,
211 /// };
212 ///
213 /// let mut id = ident!("🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
214 /// id.append_lossy("🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
215 ///
216 /// // We truncated the original ident, removing all '🔵' chars.
217 /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
218 /// ```
219 ///
220 /// ### Too long suffix
221 /// If the provided suffix is too long, we'll also truncate that.
222 ///
223 /// ```
224 /// # mz_ore::assert::SOFT_ASSERTIONS.store(false, std::sync::atomic::Ordering::Relaxed);
225 /// use mz_sql_parser::{
226 /// ident,
227 /// ast::Ident,
228 /// };
229 ///
230 /// let mut stem = ident!("hello___world");
231 ///
232 /// let too_long_suffix = "\
233 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
234 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
235 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
236 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵\
237 /// ";
238 ///
239 /// stem.append_lossy(too_long_suffix);
240 ///
241 /// // Notice the "hello___world" stem got truncated, as did the "🔵🔵" characters from the suffix.
242 /// let result = "hello___wor\
243 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
244 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
245 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
246 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
247 /// ";
248 /// assert_eq!(stem.as_str(), result);
249 /// ```
250 pub fn append_lossy<S: Into<String>>(&mut self, suffix: S) {
251 // Make sure our suffix at least leaves a bit of room for the original ident.
252 const MAX_SUFFIX_LENGTH: usize = Ident::MAX_LENGTH - 8;
253
254 let mut suffix: String = suffix.into();
255 mz_ore::soft_assert_or_log!(suffix.len() <= MAX_SUFFIX_LENGTH, "suffix too long");
256
257 // Truncate the suffix as necessary.
258 if suffix.len() > MAX_SUFFIX_LENGTH {
259 let mut byte_length = 0;
260 suffix = suffix
261 .chars()
262 .take_while(|c| {
263 byte_length += c.len_utf8();
264 byte_length <= MAX_SUFFIX_LENGTH
265 })
266 .collect();
267 }
268
269 // Truncate ourselves as necessary.
270 let available_length = Ident::MAX_LENGTH - suffix.len();
271 if self.0.len() > available_length {
272 let mut byte_length = 0;
273 self.0 = self
274 .0
275 .chars()
276 .take_while(|c| {
277 byte_length += c.len_utf8();
278 byte_length <= available_length
279 })
280 .collect();
281 }
282
283 // Append the suffix.
284 self.0.push_str(&suffix);
285 }
286
287 /// Reports whether the identifier matches the regex `[a-z_][a-z0-9_]*`,
288 /// i.e. it is composed only of characters that never require quoting.
289 ///
290 /// This is the character-level half of [`Ident::can_be_printed_bare`]. It
291 /// deliberately does *not* consider keywords: whether a keyword-named
292 /// identifier needs quoting depends on the surrounding grammar (a
293 /// reparsing concern), not on its characters. Contexts that only need
294 /// legible, unambiguous output — rather than a SQL round-trip — should use
295 /// this instead (see `HumanizedExplain::humanize_ident`).
296 pub fn has_only_bare_chars(&self) -> bool {
297 let mut chars = self.0.chars();
298 chars
299 .next()
300 .map(|ch| matches!(ch, 'a'..='z' | '_'))
301 .unwrap_or(false)
302 && chars.all(|ch| matches!(ch, 'a'..='z' | '0'..='9' | '_'))
303 }
304
305 /// An identifier can be printed in bare mode if
306 /// * it matches the regex `[a-z_][a-z0-9_]*` and
307 /// * it is not a "reserved keyword."
308 pub fn can_be_printed_bare(&self) -> bool {
309 self.has_only_bare_chars()
310 && !self
311 .as_keyword()
312 .map(|kw| {
313 kw.is_sometimes_reserved()
314 || kw.begins_query_body()
315 // `AS` at the start of a SELECT item is consumed as the
316 // `AS OF` timestamp keyword (an empty projection), so a
317 // bare `as` identifier/function name fails to reparse.
318 || kw == AS
319 // `ANY`/`ALL`/`SOME` after a comparison operator start a
320 // quantified-comparison (`x op ANY (...)`), so a bare such
321 // identifier — e.g. `0 # some` — reparses as the start of a
322 // quantifier rather than an identifier.
323 || matches!(kw, ANY | ALL | SOME)
324 // `ALL`/`DISTINCT` right after `SELECT` are consumed as the
325 // projection quantifier, so a bare `"all"` / `"distinct"`
326 // column reference reparses to a quantifier with an empty
327 // projection instead of an identifier. (`ALL` is already
328 // covered above; quoting these keeps display-only — unlike
329 // marking them always-reserved, which also rejects `WHERE
330 // distinct = 1` at parse time.)
331 || kw == DISTINCT
332 // `LIST` followed by `[` re-lexes as a `LIST[...]` literal
333 // (`list[1]` is a valid one-element list), so a bare `list`
334 // identifier that gets subscripted — `"list"[1]` — would
335 // reparse as a list literal instead of a subscript. (`ARRAY`
336 // is reserved-in-scalar-expression and so already quoted;
337 // `MAP[...]` requires `=>`, so `map[1]` is unambiguously a
338 // subscript.)
339 || kw == LIST
340 // `DEALLOCATE [PREPARE] <name>` accepts an optional
341 // `PREPARE` keyword before the name, so a bare `prepare`
342 // name is consumed as that keyword on reparse, leaving no
343 // name (`DEALLOCATE prepare` -> `DEALLOCATE` + the optional
344 // keyword + a missing name).
345 || kw == PREPARE
346 // `CASE` treats a leading `WHEN` as the start of the
347 // first arm (a searched `CASE` with no operand), so a
348 // bare `when` identifier used as the `CASE` operand —
349 // `CASE when.a WHEN ...` — reparses as `CASE WHEN .a ...`
350 // ("expected an expression, found dot"). Quoting it keeps
351 // the operand an identifier.
352 || kw == WHEN
353 // `COPY [INTO] <table> FROM …` accepts an optional `INTO`
354 // keyword before the relation name, so a bare `into`
355 // relation is consumed as that keyword on reparse
356 // (`COPY into FROM x` -> `COPY INTO <name=from> …`, which
357 // then fails expecting the FROM/TO direction).
358 || kw == INTO
359 })
360 .unwrap_or(false)
361 }
362
363 pub fn as_str(&self) -> &str {
364 &self.0
365 }
366
367 pub fn as_keyword(&self) -> Option<Keyword> {
368 self.0.parse().ok()
369 }
370
371 pub fn into_string(self) -> String {
372 self.0
373 }
374}
375
376/// More-or-less a direct translation of the Postgres function for doing the same thing:
377///
378/// <https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/ruleutils.c#L10730-L10812>
379///
380/// Quotation is forced when printing in Stable mode.
381impl AstDisplay for Ident {
382 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
383 if self.can_be_printed_bare() && !f.stable() {
384 f.write_str(&self.0);
385 } else {
386 f.write_str("\"");
387 for ch in self.0.chars() {
388 // Double up on double-quotes.
389 if ch == '"' {
390 f.write_str("\"");
391 }
392 f.write_str(ch);
393 }
394 f.write_str("\"");
395 }
396 }
397}
398impl_display!(Ident);
399
400#[derive(Clone, Debug, thiserror::Error)]
401pub enum IdentError {
402 #[error("identifier too long (len: {}, max: {}, value: {})", .0.len(), Ident::MAX_LENGTH, .0.quoted())]
403 TooLong(String),
404 #[error(
405 "failed to generate identifier with prefix '{prefix}' and suffix '{suffix}' after {attempts} attempts"
406 )]
407 FailedToGenerate {
408 prefix: String,
409 suffix: String,
410 attempts: usize,
411 },
412
413 #[error("invalid identifier: {}", .0.quoted())]
414 Invalid(String),
415}
416
417/// A name of a table, view, custom type, etc. that lives in a schema, possibly multi-part, i.e. db.schema.obj
418#[derive(
419 Debug,
420 Clone,
421 PartialEq,
422 Eq,
423 Hash,
424 PartialOrd,
425 Ord,
426 Serialize,
427 Deserialize
428)]
429pub struct UnresolvedItemName(pub Vec<Ident>);
430
431pub enum CatalogName {
432 ItemName(Vec<Ident>),
433 FuncName(Vec<Ident>),
434}
435
436impl UnresolvedItemName {
437 /// Creates an `ItemName` with a single [`Ident`], i.e. it appears as
438 /// "unqualified".
439 pub fn unqualified(ident: Ident) -> UnresolvedItemName {
440 UnresolvedItemName(vec![ident])
441 }
442
443 /// Creates an `ItemName` with an [`Ident`] for each element of `n`.
444 ///
445 /// Panics if passed an in ineligible `&[&str]` whose length is 0 or greater
446 /// than 3.
447 pub fn qualified(n: &[Ident]) -> UnresolvedItemName {
448 assert!(n.len() <= 3 && n.len() > 0);
449 UnresolvedItemName(n.iter().cloned().collect::<Vec<_>>())
450 }
451}
452
453impl AstDisplay for UnresolvedItemName {
454 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
455 display::separated(&self.0, ".").fmt(f);
456 }
457}
458impl_display!(UnresolvedItemName);
459
460impl AstDisplay for &UnresolvedItemName {
461 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
462 display::separated(&self.0, ".").fmt(f);
463 }
464}
465
466/// A name of a schema
467#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
468pub struct UnresolvedSchemaName(pub Vec<Ident>);
469
470impl AstDisplay for UnresolvedSchemaName {
471 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
472 display::separated(&self.0, ".").fmt(f);
473 }
474}
475impl_display!(UnresolvedSchemaName);
476
477/// A name of a database
478#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
479pub struct UnresolvedDatabaseName(pub Ident);
480
481impl AstDisplay for UnresolvedDatabaseName {
482 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
483 f.write_node(&self.0);
484 }
485}
486impl_display!(UnresolvedDatabaseName);
487
488// The name of an item not yet created during name resolution, which should be
489// resolveable as an item name later.
490#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
491pub enum DeferredItemName<T: AstInfo> {
492 Named(T::ItemName),
493 Deferred(UnresolvedItemName),
494}
495
496impl<T: AstInfo> AstDisplay for DeferredItemName<T> {
497 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
498 match self {
499 DeferredItemName::Named(o) => f.write_node(o),
500 DeferredItemName::Deferred(o) => f.write_node(o),
501 }
502 }
503}
504impl_display_t!(DeferredItemName);
505
506#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
507pub enum UnresolvedObjectName {
508 Cluster(Ident),
509 ClusterReplica(QualifiedReplica),
510 Database(UnresolvedDatabaseName),
511 Schema(UnresolvedSchemaName),
512 Role(Ident),
513 Item(UnresolvedItemName),
514 NetworkPolicy(Ident),
515}
516
517impl AstDisplay for UnresolvedObjectName {
518 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
519 match self {
520 UnresolvedObjectName::Cluster(n) => f.write_node(n),
521 UnresolvedObjectName::ClusterReplica(n) => f.write_node(n),
522 UnresolvedObjectName::Database(n) => f.write_node(n),
523 UnresolvedObjectName::Schema(n) => f.write_node(n),
524 UnresolvedObjectName::Role(n) => f.write_node(n),
525 UnresolvedObjectName::Item(n) => f.write_node(n),
526 UnresolvedObjectName::NetworkPolicy(n) => f.write_node(n),
527 }
528 }
529}
530impl_display!(UnresolvedObjectName);