mz_sql_parser/ast/defs/name.rs
1// Copyright 2018 sqlparser-rs contributors. All rights reserved.
2// Copyright Materialize, Inc. and contributors. All rights reserved.
3//
4// This file is derived from the sqlparser-rs project, available at
5// https://github.com/andygrove/sqlparser-rs. It was incorporated
6// directly into Materialize on December 21, 2019.
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License in the LICENSE file at the
11// root of this repository, or online at
12//
13// http://www.apache.org/licenses/LICENSE-2.0
14//
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20
21use mz_ore::str::StrExt;
22use mz_sql_lexer::keywords::Keyword;
23use mz_sql_lexer::lexer::{IdentString, MAX_IDENTIFIER_LENGTH};
24use serde::{Deserialize, Serialize};
25use std::fmt;
26
27use crate::ast::display::{self, AstDisplay, AstFormatter};
28use crate::ast::{AstInfo, QualifiedReplica};
29
30/// An identifier.
31#[derive(
32 Debug,
33 Clone,
34 PartialEq,
35 Eq,
36 Hash,
37 PartialOrd,
38 Ord,
39 Serialize,
40 Deserialize
41)]
42pub struct Ident(pub(crate) String);
43
44impl Ident {
45 /// Maximum length of an identifier in Materialize.
46 pub const MAX_LENGTH: usize = MAX_IDENTIFIER_LENGTH;
47
48 /// Create a new [`Ident`] with the given value, checking our invariants.
49 ///
50 /// # Examples
51 ///
52 /// ```
53 /// use mz_sql_parser::ast::Ident;
54 ///
55 /// let id = Ident::new("hello_world").unwrap();
56 /// assert_eq!(id.as_str(), "hello_world");
57 ///
58 /// let too_long = "I am a very long identifier that is more than 255 bytes long which is the max length for idents.\
59 /// 😊😁😅😂😬🍻😮💨😮🗽🛰️🌈😊😁😅😂😬🍻😮💨😮🗽🛰️🌈😊😁😅😂😬🍻😮💨😮🗽🛰️🌈";
60 /// assert_eq!(too_long.len(), 258);
61 ///
62 /// let too_long_id = Ident::new(too_long);
63 /// assert!(too_long_id.is_err());
64 ///
65 /// let invalid_name_dot = Ident::new(".");
66 /// assert!(invalid_name_dot.is_err());
67 ///
68 /// let invalid_name_dot_dot = Ident::new("..");
69 /// assert!(invalid_name_dot_dot.is_err());
70 /// ```
71 ///
72 pub fn new<S>(s: S) -> Result<Self, IdentError>
73 where
74 S: TryInto<IdentString>,
75 <S as TryInto<IdentString>>::Error: fmt::Display,
76 {
77 let s = s
78 .try_into()
79 .map_err(|e| IdentError::TooLong(e.to_string()))?;
80
81 if &*s == "." || &*s == ".." {
82 return Err(IdentError::Invalid(s.into_inner()));
83 }
84
85 Ok(Ident(s.into_inner()))
86 }
87
88 /// Create a new [`Ident`] modifying the given value as necessary to meet our invariants.
89 ///
90 /// # Examples
91 ///
92 /// ```
93 /// use mz_sql_parser::ast::Ident;
94 ///
95 /// let too_long = "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
96 /// 🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵\
97 /// 🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴";
98 ///
99 /// let id = Ident::new_lossy(too_long);
100 ///
101 /// // `new_lossy` will truncate the provided string, since it's too long. Note the missing
102 /// // `🔴` characters.
103 /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
104 /// ```
105 pub fn new_lossy<S: Into<String>>(value: S) -> Self {
106 let s: String = value.into();
107 if s.len() <= Self::MAX_LENGTH {
108 return Ident(s);
109 }
110
111 let mut byte_length = 0;
112 let s_truncated = s
113 .chars()
114 .take_while(|c| {
115 byte_length += c.len_utf8();
116 byte_length <= Self::MAX_LENGTH
117 })
118 .collect();
119
120 Ident(s_truncated)
121 }
122
123 /// Create a new [`Ident`] _without checking any of our invariants_.
124 ///
125 /// NOTE: Generally you __should not use this function__! If you're trying to create an
126 /// [`Ident`] from a `&'static str` you know is valid, use the [`ident!`] macro. For all other
127 /// use cases, see [`Ident::new`] which correctly checks our invariants.
128 ///
129 /// [`ident!`]: [`mz_sql_parser::ident`]
130 pub fn new_unchecked<S: Into<String>>(value: S) -> Self {
131 let s = value.into();
132 mz_ore::soft_assert_no_log!(s.len() <= Self::MAX_LENGTH);
133
134 Ident(s)
135 }
136
137 /// Generate a valid [`Ident`] with the provided `prefix` and `suffix`.
138 ///
139 /// # Examples
140 ///
141 /// ```
142 /// use mz_sql_parser::ast::{Ident, IdentError};
143 ///
144 /// let good_id =
145 /// Ident::try_generate_name("hello", "_world", |_| Ok::<_, IdentError>(true)).unwrap();
146 /// assert_eq!(good_id.as_str(), "hello_world");
147 ///
148 /// // Return invalid once.
149 /// let mut attempts = 0;
150 /// let one_failure = Ident::try_generate_name("hello", "_world", |_candidate| {
151 /// if attempts == 0 {
152 /// attempts += 1;
153 /// Ok::<_, IdentError>(false)
154 /// } else {
155 /// Ok(true)
156 /// }
157 /// })
158 /// .unwrap();
159 ///
160 /// // We "hello_world" was invalid, so we appended "_1".
161 /// assert_eq!(one_failure.as_str(), "hello_world_1");
162 /// ```
163 pub fn try_generate_name<P, S, F, E>(prefix: P, suffix: S, mut is_valid: F) -> Result<Self, E>
164 where
165 P: Into<String>,
166 S: Into<String>,
167 E: From<IdentError>,
168 F: FnMut(&Ident) -> Result<bool, E>,
169 {
170 const MAX_ATTEMPTS: usize = 1000;
171
172 let prefix: String = prefix.into();
173 let suffix: String = suffix.into();
174
175 // First just append the prefix and suffix.
176 let mut candidate = Ident(prefix.clone());
177 candidate.append_lossy(suffix.clone());
178 if is_valid(&candidate)? {
179 return Ok(candidate);
180 }
181
182 // Otherwise, append a number to the back.
183 for i in 1..MAX_ATTEMPTS {
184 let mut candidate = Ident(prefix.clone());
185 candidate.append_lossy(format!("{suffix}_{i}"));
186
187 if is_valid(&candidate)? {
188 return Ok(candidate);
189 }
190 }
191
192 // Couldn't find any valid name!
193 Err(E::from(IdentError::FailedToGenerate {
194 prefix,
195 suffix,
196 attempts: MAX_ATTEMPTS,
197 }))
198 }
199
200 /// Append the provided `suffix`, truncating `self` as necessary to satisfy our invariants.
201 ///
202 /// Note: We soft-assert that the provided `suffix` is not too long, if it is, we'll
203 /// truncate it.
204 ///
205 /// # Examples
206 ///
207 /// ```
208 /// use mz_sql_parser::{
209 /// ident,
210 /// ast::Ident,
211 /// };
212 ///
213 /// let mut id = ident!("🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
214 /// id.append_lossy("🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
215 ///
216 /// // We truncated the original ident, removing all '🔵' chars.
217 /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
218 /// ```
219 ///
220 /// ### Too long suffix
221 /// If the provided suffix is too long, we'll also truncate that.
222 ///
223 /// ```
224 /// # mz_ore::assert::SOFT_ASSERTIONS.store(false, std::sync::atomic::Ordering::Relaxed);
225 /// use mz_sql_parser::{
226 /// ident,
227 /// ast::Ident,
228 /// };
229 ///
230 /// let mut stem = ident!("hello___world");
231 ///
232 /// let too_long_suffix = "\
233 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
234 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
235 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
236 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵\
237 /// ";
238 ///
239 /// stem.append_lossy(too_long_suffix);
240 ///
241 /// // Notice the "hello___world" stem got truncated, as did the "🔵🔵" characters from the suffix.
242 /// let result = "hello___wor\
243 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
244 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
245 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
246 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
247 /// ";
248 /// assert_eq!(stem.as_str(), result);
249 /// ```
250 pub fn append_lossy<S: Into<String>>(&mut self, suffix: S) {
251 // Make sure our suffix at least leaves a bit of room for the original ident.
252 const MAX_SUFFIX_LENGTH: usize = Ident::MAX_LENGTH - 8;
253
254 let mut suffix: String = suffix.into();
255 mz_ore::soft_assert_or_log!(suffix.len() <= MAX_SUFFIX_LENGTH, "suffix too long");
256
257 // Truncate the suffix as necessary.
258 if suffix.len() > MAX_SUFFIX_LENGTH {
259 let mut byte_length = 0;
260 suffix = suffix
261 .chars()
262 .take_while(|c| {
263 byte_length += c.len_utf8();
264 byte_length <= MAX_SUFFIX_LENGTH
265 })
266 .collect();
267 }
268
269 // Truncate ourselves as necessary.
270 let available_length = Ident::MAX_LENGTH - suffix.len();
271 if self.0.len() > available_length {
272 let mut byte_length = 0;
273 self.0 = self
274 .0
275 .chars()
276 .take_while(|c| {
277 byte_length += c.len_utf8();
278 byte_length <= available_length
279 })
280 .collect();
281 }
282
283 // Append the suffix.
284 self.0.push_str(&suffix);
285 }
286
287 /// An identifier can be printed in bare mode if
288 /// * it matches the regex `[a-z_][a-z0-9_]*` and
289 /// * it is not a "reserved keyword."
290 pub fn can_be_printed_bare(&self) -> bool {
291 let mut chars = self.0.chars();
292 chars
293 .next()
294 .map(|ch| matches!(ch, 'a'..='z' | '_'))
295 .unwrap_or(false)
296 && chars.all(|ch| matches!(ch, 'a'..='z' | '0'..='9' | '_'))
297 && !self
298 .as_keyword()
299 .map(Keyword::is_sometimes_reserved)
300 .unwrap_or(false)
301 }
302
303 pub fn as_str(&self) -> &str {
304 &self.0
305 }
306
307 pub fn as_keyword(&self) -> Option<Keyword> {
308 self.0.parse().ok()
309 }
310
311 pub fn into_string(self) -> String {
312 self.0
313 }
314}
315
316/// More-or-less a direct translation of the Postgres function for doing the same thing:
317///
318/// <https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/ruleutils.c#L10730-L10812>
319///
320/// Quotation is forced when printing in Stable mode.
321impl AstDisplay for Ident {
322 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
323 if self.can_be_printed_bare() && !f.stable() {
324 f.write_str(&self.0);
325 } else {
326 f.write_str("\"");
327 for ch in self.0.chars() {
328 // Double up on double-quotes.
329 if ch == '"' {
330 f.write_str("\"");
331 }
332 f.write_str(ch);
333 }
334 f.write_str("\"");
335 }
336 }
337}
338impl_display!(Ident);
339
340#[derive(Clone, Debug, thiserror::Error)]
341pub enum IdentError {
342 #[error("identifier too long (len: {}, max: {}, value: {})", .0.len(), Ident::MAX_LENGTH, .0.quoted())]
343 TooLong(String),
344 #[error(
345 "failed to generate identifier with prefix '{prefix}' and suffix '{suffix}' after {attempts} attempts"
346 )]
347 FailedToGenerate {
348 prefix: String,
349 suffix: String,
350 attempts: usize,
351 },
352
353 #[error("invalid identifier: {}", .0.quoted())]
354 Invalid(String),
355}
356
357/// A name of a table, view, custom type, etc. that lives in a schema, possibly multi-part, i.e. db.schema.obj
358#[derive(
359 Debug,
360 Clone,
361 PartialEq,
362 Eq,
363 Hash,
364 PartialOrd,
365 Ord,
366 Serialize,
367 Deserialize
368)]
369pub struct UnresolvedItemName(pub Vec<Ident>);
370
371pub enum CatalogName {
372 ItemName(Vec<Ident>),
373 FuncName(Vec<Ident>),
374}
375
376impl UnresolvedItemName {
377 /// Creates an `ItemName` with a single [`Ident`], i.e. it appears as
378 /// "unqualified".
379 pub fn unqualified(ident: Ident) -> UnresolvedItemName {
380 UnresolvedItemName(vec![ident])
381 }
382
383 /// Creates an `ItemName` with an [`Ident`] for each element of `n`.
384 ///
385 /// Panics if passed an in ineligible `&[&str]` whose length is 0 or greater
386 /// than 3.
387 pub fn qualified(n: &[Ident]) -> UnresolvedItemName {
388 assert!(n.len() <= 3 && n.len() > 0);
389 UnresolvedItemName(n.iter().cloned().collect::<Vec<_>>())
390 }
391}
392
393impl AstDisplay for UnresolvedItemName {
394 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
395 display::separated(&self.0, ".").fmt(f);
396 }
397}
398impl_display!(UnresolvedItemName);
399
400impl AstDisplay for &UnresolvedItemName {
401 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
402 display::separated(&self.0, ".").fmt(f);
403 }
404}
405
406/// A name of a schema
407#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
408pub struct UnresolvedSchemaName(pub Vec<Ident>);
409
410impl AstDisplay for UnresolvedSchemaName {
411 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
412 display::separated(&self.0, ".").fmt(f);
413 }
414}
415impl_display!(UnresolvedSchemaName);
416
417/// A name of a database
418#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
419pub struct UnresolvedDatabaseName(pub Ident);
420
421impl AstDisplay for UnresolvedDatabaseName {
422 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
423 f.write_node(&self.0);
424 }
425}
426impl_display!(UnresolvedDatabaseName);
427
428// The name of an item not yet created during name resolution, which should be
429// resolveable as an item name later.
430#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
431pub enum DeferredItemName<T: AstInfo> {
432 Named(T::ItemName),
433 Deferred(UnresolvedItemName),
434}
435
436impl<T: AstInfo> AstDisplay for DeferredItemName<T> {
437 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
438 match self {
439 DeferredItemName::Named(o) => f.write_node(o),
440 DeferredItemName::Deferred(o) => f.write_node(o),
441 }
442 }
443}
444impl_display_t!(DeferredItemName);
445
446#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
447pub enum UnresolvedObjectName {
448 Cluster(Ident),
449 ClusterReplica(QualifiedReplica),
450 Database(UnresolvedDatabaseName),
451 Schema(UnresolvedSchemaName),
452 Role(Ident),
453 Item(UnresolvedItemName),
454 NetworkPolicy(Ident),
455}
456
457impl AstDisplay for UnresolvedObjectName {
458 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
459 match self {
460 UnresolvedObjectName::Cluster(n) => f.write_node(n),
461 UnresolvedObjectName::ClusterReplica(n) => f.write_node(n),
462 UnresolvedObjectName::Database(n) => f.write_node(n),
463 UnresolvedObjectName::Schema(n) => f.write_node(n),
464 UnresolvedObjectName::Role(n) => f.write_node(n),
465 UnresolvedObjectName::Item(n) => f.write_node(n),
466 UnresolvedObjectName::NetworkPolicy(n) => f.write_node(n),
467 }
468 }
469}
470impl_display!(UnresolvedObjectName);