mz_sql_parser/ast/
display.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
// Copyright 2020 sqlparser-rs contributors. All rights reserved.
// Copyright Materialize, Inc. and contributors. All rights reserved.
//
// This file is derived from the sqlparser-rs project, available at
// https://github.com/andygrove/sqlparser-rs. It was incorporated
// directly into Materialize on December 21, 2019.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file at the
// root of this repository, or online at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt;

pub struct DisplaySeparated<'a, T>
where
    T: AstDisplay,
{
    slice: &'a [T],
    sep: &'static str,
}

impl<'a, T> AstDisplay for DisplaySeparated<'a, T>
where
    T: AstDisplay,
{
    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
        let mut delim = "";
        for t in self.slice {
            f.write_str(delim);
            delim = self.sep;
            t.fmt(f);
        }
    }
}

impl<'a, T> std::fmt::Display for DisplaySeparated<'a, T>
where
    T: AstDisplay,
{
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        AstFormatter::new(f, FormatMode::Simple).write_node(self);
        Ok(())
    }
}

pub fn separated<'a, T>(slice: &'a [T], sep: &'static str) -> DisplaySeparated<'a, T>
where
    T: AstDisplay,
{
    DisplaySeparated { slice, sep }
}

pub fn comma_separated<T>(slice: &[T]) -> DisplaySeparated<'_, T>
where
    T: AstDisplay,
{
    DisplaySeparated { slice, sep: ", " }
}

/// Describes the context in which to print an AST.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum FormatMode {
    /// Simple is the normal way of printing for human consumption. Identifiers are quoted only if
    /// necessary and sensative information is redacted.
    Simple,
    /// SimpleRedacted is like Simple, but strips out string and number literals.
    /// This makes SQL queries be "usage data", rather than "customer data" according to our
    /// data management policy, allowing us to introspect it.
    SimpleRedacted,
    /// Stable prints out the AST in a form more suitable for persistence. All identifiers are
    /// quoted, even if not necessary. This mode is used when persisting table information to the
    /// catalog.
    Stable,
}

#[derive(Debug)]
pub struct AstFormatter<W> {
    buf: W,
    mode: FormatMode,
}

impl<W> AstFormatter<W>
where
    W: fmt::Write,
{
    pub fn write_node<T: AstDisplay>(&mut self, s: &T) {
        s.fmt(self);
    }

    // TODO(justin): make this only accept a &str so that we don't accidentally pass an AstDisplay
    // to it.
    pub fn write_str<T: fmt::Display>(&mut self, s: T) {
        write!(self.buf, "{}", s).expect("unexpected error in fmt::Display implementation");
    }

    // Whether the AST should be optimized for persistence.
    pub fn stable(&self) -> bool {
        self.mode == FormatMode::Stable
    }

    /// Whether the AST should be printed out in a more human readable format.
    pub fn simple(&self) -> bool {
        matches!(self.mode, FormatMode::Simple | FormatMode::SimpleRedacted)
    }

    /// Whether the AST should be printed in redacted form
    pub fn redacted(&self) -> bool {
        self.mode == FormatMode::SimpleRedacted
    }

    /// Sets the current mode to a compatible version that does not redact
    /// values; returns the current mode, which should be reset when the
    /// unredacted printing is complete using [`Self::set_mode`].
    ///
    /// Note that this is the simplest means of unredacting values opt-out
    /// rather than opt-in. We must monitor usage of this API carefully to
    /// ensure we don't end up leaking values.
    pub fn unredact(&mut self) -> FormatMode {
        match self.mode {
            FormatMode::Simple => FormatMode::Simple,
            FormatMode::SimpleRedacted => {
                self.mode = FormatMode::Simple;
                FormatMode::SimpleRedacted
            }
            FormatMode::Stable => FormatMode::Stable,
        }
    }

    pub fn set_mode(&mut self, mode: FormatMode) {
        self.mode = mode;
    }

    pub fn new(buf: W, mode: FormatMode) -> Self {
        AstFormatter { buf, mode }
    }
}

// AstDisplay is an alternative to fmt::Display to be used for formatting ASTs. It permits
// configuration global to a printing of a given AST.
pub trait AstDisplay {
    fn fmt<W>(&self, f: &mut AstFormatter<W>)
    where
        W: fmt::Write;

    fn to_ast_string(&self) -> String {
        let mut buf = String::new();
        let mut f = AstFormatter::new(&mut buf, FormatMode::Simple);
        self.fmt(&mut f);
        buf
    }

    fn to_ast_string_stable(&self) -> String {
        let mut buf = String::new();
        let mut f = AstFormatter::new(&mut buf, FormatMode::Stable);
        self.fmt(&mut f);
        buf
    }

    fn to_ast_string_redacted(&self) -> String {
        let mut buf = String::new();
        let mut f = AstFormatter::new(&mut buf, FormatMode::SimpleRedacted);
        self.fmt(&mut f);
        buf
    }
}

// Derive a fmt::Display implementation for types implementing AstDisplay.
#[macro_export]
macro_rules! impl_display {
    ($name:ident) => {
        impl std::fmt::Display for $name {
            fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
                use $crate::ast::display::{AstFormatter, FormatMode};
                AstFormatter::new(f, FormatMode::Simple).write_node(self);
                Ok(())
            }
        }
    };
}

macro_rules! impl_display_t {
    ($name:ident) => {
        impl<T: AstInfo> std::fmt::Display for $name<T> {
            fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
                use crate::ast::display::{AstFormatter, FormatMode};
                AstFormatter::new(f, FormatMode::Simple).write_node(self);
                Ok(())
            }
        }
    };
}

/// Functions that generalize to AST nodes representing the "name" of a `WITH`
/// option.
pub trait WithOptionName {
    /// Expresses whether or not values should be redacted based on the option
    /// name (i.e. the option's "key").
    ///
    /// # WARNING
    ///
    /// Whenever implementing this trait consider very carefully whether or not
    /// this value could contain sensitive user data.
    ///
    /// # Context
    /// Many statements in MZ use the format `WITH (<options>...)` to modify the
    /// resulting behavior of the statement. Most often these are modeled in the
    /// AST as a struct with two fields: an option name and a value.
    ///
    /// We do not type check the values of the types until planning, so most
    /// values represent arbitrary user input. To prevent leaking any PII in
    /// that data, we default to replacing values with the string `<REDACTED>`.
    ///
    /// However, in some cases, the values do not need to be redacted. For our
    /// `WITH` options, knowing which option we're dealing with should be
    /// sufficient to understand if a value needs redaction––so this trait
    /// controls redaction on a per-option basis.
    ///
    /// ## Genericizing `WITH` options
    /// It would be nice to force every AST node we consider a `WITH` option to
    /// conform to a particular structure––however, we have a proc macro that
    /// generates visitors over all of our nodes that inhibits our ability to do
    /// this easily. This means, unfortunately, that we cannot rely on
    /// compilation guarantees for this and instead must use the honor system.
    ///
    /// ## Nothing is ever redacted...
    ///
    /// In the initial implementation of this trait, no option requires its
    /// values to be redacted (except for the one test case). That doesn't mean
    /// there won't be in the future. When in doubt, take the more conservative
    /// approach.
    fn redact_value(&self) -> bool {
        // We conservatively assume that all values should be redacted.
        true
    }
}

/// To allow `WITH` option AST nodes to be printed without redaction, you should
/// use this macro's implementation of `AstDisplay`. For more details, consult
/// the doc strings on the functions used on its implementation.
macro_rules! impl_display_for_with_option {
    ($name:ident) => {
        impl<T: AstInfo> AstDisplay for $name<T> {
            fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
                f.write_node(&self.name);
                if let Some(v) = &self.value {
                    f.write_str(" = ");

                    // If the formatter is redacted, but the name does not
                    // require setting the value to be redacted, allow the value
                    // to be printed without redaction.
                    if f.redacted() && !self.name.redact_value() {
                        let mode = f.unredact();
                        f.write_node(v);
                        f.set_mode(mode);
                    } else {
                        f.write_node(v);
                    }
                }
            }
        }
    };
}

impl<T: AstDisplay> AstDisplay for &Box<T> {
    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
        (*self).fmt(f);
    }
}

impl<T: AstDisplay> AstDisplay for Box<T> {
    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
        (**self).fmt(f);
    }
}

// u32 used directly to represent, e.g., oids
impl AstDisplay for u32 {
    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
        f.write_str(self);
    }
}

// u64 used directly to represent, e.g., type modifiers
impl AstDisplay for u64 {
    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
        f.write_str(self);
    }
}

impl AstDisplay for i64 {
    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
        f.write_str(self);
    }
}

pub struct EscapeSingleQuoteString<'a>(&'a str);

impl<'a> AstDisplay for EscapeSingleQuoteString<'a> {
    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
        for c in self.0.chars() {
            if c == '\'' {
                f.write_str("\'\'");
            } else {
                f.write_str(c);
            }
        }
    }
}

impl<'a> fmt::Display for EscapeSingleQuoteString<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str(&self.to_ast_string())
    }
}

pub fn escape_single_quote_string(s: &str) -> EscapeSingleQuoteString<'_> {
    EscapeSingleQuoteString(s)
}

pub struct EscapedStringLiteral<'a>(&'a str);

impl<'a> AstDisplay for EscapedStringLiteral<'a> {
    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
        f.write_str("'");
        f.write_node(&escape_single_quote_string(self.0));
        f.write_str("'");
    }
}

impl<'a> fmt::Display for EscapedStringLiteral<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str(&self.to_ast_string())
    }
}

pub fn escaped_string_literal(s: &str) -> EscapedStringLiteral<'_> {
    EscapedStringLiteral(s)
}