Skip to main content

mz_repr/
explain.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! A set of traits for modeling things that can be explained by a
11//! SQL `EXPLAIN` statement.
12//!
13//! The main trait in this module is [`Explain`].
14//!
15//! An explainable subject `S` implements [`Explain`], and as part of that:
16//!
17//! 1. Fixes the *context type* required for the explanation.
18//!    in [`Explain::Context`].
19//! 2. Fixes the *explanation type* for each [`ExplainFormat`]
20//!    in [`Explain::Text`], [`Explain::Json`], ....
21//! 3. Provides *an explanation type constructor* for each supported
22//!    [`ExplainFormat`] from references to `S`, [`ExplainConfig` ],
23//!    and the current [`Explain::Context`] in
24//!    [`Explain::explain_text`], [`Explain::explain_json`], ....
25//!
26//! The same *explanation type* can be shared by more than one
27//! [`ExplainFormat`].
28//!
29//! Use [`UnsupportedFormat`] and the default `explain_$format`
30//! constructor for [`Explain`] to indicate that the implementation does
31//! not support this `$format`.
32
33use itertools::Itertools;
34use proptest_derive::Arbitrary;
35use serde::{Deserialize, Serialize};
36use std::borrow::Cow;
37use std::collections::{BTreeMap, BTreeSet};
38use std::fmt;
39use std::fmt::{Display, Formatter};
40
41use mz_ore::stack::RecursionLimitError;
42use mz_ore::str::{Indent, bracketed, separated};
43
44use crate::explain::dot::{DisplayDot, dot_string};
45use crate::explain::json::{DisplayJson, json_string};
46use crate::explain::text::{DisplayText, text_string};
47use crate::optimize::OptimizerFeatureOverrides;
48use crate::{GlobalId, ReprColumnType, ReprScalarType, SqlColumnType, SqlScalarType};
49
50pub mod dot;
51pub mod json;
52pub mod text;
53#[cfg(feature = "tracing")]
54pub mod tracing;
55
56#[cfg(feature = "tracing")]
57pub use crate::explain::tracing::trace_plan;
58
59/// Possible output formats for an explanation.
60#[derive(Debug, Clone, Copy, Eq, PartialEq)]
61pub enum ExplainFormat {
62    Text,
63    Json,
64    Dot,
65}
66
67impl fmt::Display for ExplainFormat {
68    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69        match self {
70            ExplainFormat::Text => f.write_str("TEXT"),
71            ExplainFormat::Json => f.write_str("JSON"),
72            ExplainFormat::Dot => f.write_str("DOT"),
73        }
74    }
75}
76
77/// A zero-variant enum to be used as the explanation type in the
78/// [`Explain`] implementation for all formats that are not supported
79/// for `Self`.
80#[allow(missing_debug_implementations)]
81pub enum UnsupportedFormat {}
82
83/// The type of errors that may occur when an [`Explain::explain`]
84/// call goes wrong.
85#[derive(Debug)]
86pub enum ExplainError {
87    UnsupportedFormat(ExplainFormat),
88    FormatError(fmt::Error),
89    AnyhowError(anyhow::Error),
90    RecursionLimitError(RecursionLimitError),
91    SerdeJsonError(serde_json::Error),
92    LinearChainsPlusRecursive,
93    UnknownError(String),
94}
95
96impl fmt::Display for ExplainError {
97    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
98        write!(f, "error while rendering explain output: ")?;
99        match self {
100            ExplainError::UnsupportedFormat(format) => {
101                write!(f, "{} format is not supported", format)
102            }
103            ExplainError::FormatError(error) => {
104                write!(f, "{}", error)
105            }
106            ExplainError::AnyhowError(error) => {
107                write!(f, "{}", error)
108            }
109            ExplainError::RecursionLimitError(error) => {
110                write!(f, "{}", error)
111            }
112            ExplainError::SerdeJsonError(error) => {
113                write!(f, "{}", error)
114            }
115            ExplainError::LinearChainsPlusRecursive => {
116                write!(
117                    f,
118                    "The linear_chains option is not supported with WITH MUTUALLY RECURSIVE."
119                )
120            }
121            ExplainError::UnknownError(error) => {
122                write!(f, "{}", error)
123            }
124        }
125    }
126}
127
128impl From<fmt::Error> for ExplainError {
129    fn from(error: fmt::Error) -> Self {
130        ExplainError::FormatError(error)
131    }
132}
133
134impl From<anyhow::Error> for ExplainError {
135    fn from(error: anyhow::Error) -> Self {
136        ExplainError::AnyhowError(error)
137    }
138}
139
140impl From<RecursionLimitError> for ExplainError {
141    fn from(error: RecursionLimitError) -> Self {
142        ExplainError::RecursionLimitError(error)
143    }
144}
145
146impl From<serde_json::Error> for ExplainError {
147    fn from(error: serde_json::Error) -> Self {
148        ExplainError::SerdeJsonError(error)
149    }
150}
151
152/// A set of options for controlling the output of [`Explain`] implementations.
153#[derive(Clone, Debug)]
154pub struct ExplainConfig {
155    // Analyses:
156    // (These are shown only if the Analysis is supported by the backing IR.)
157    /// Show the `SubtreeSize` Analysis in the explanation.
158    pub subtree_size: bool,
159    /// Show the number of columns, i.e., the `Arity` Analysis.
160    pub arity: bool,
161    /// Show the types, i.e., the `SqlRelationType` Analysis.
162    pub types: bool,
163    /// Show the sets of unique keys, i.e., the `UniqueKeys` Analysis.
164    pub keys: bool,
165    /// Show the `NonNegative` Analysis.
166    pub non_negative: bool,
167    /// Show the `Cardinality` Analysis.
168    pub cardinality: bool,
169    /// Show the `ColumnNames` Analysis.
170    pub column_names: bool,
171    /// Show the `Equivalences` Analysis.
172    pub equivalences: bool,
173    // TODO: add an option to show the `Monotonic` Analysis. This is non-trivial, because this
174    // Analysis needs the set of monotonic GlobalIds, which are cumbersome to pass around.
175
176    // Other display options:
177    /// Render implemented MIR `Join` nodes in a way which reflects the implementation.
178    pub join_impls: bool,
179    /// Use inferred column names when rendering scalar and aggregate expressions.
180    pub humanized_exprs: bool,
181    /// Restrict output trees to linear chains. Ignored if `raw_plans` is set.
182    pub linear_chains: bool,
183    /// Show the slow path plan even if a fast path plan was created. Useful for debugging.
184    /// Enforced if `timing` is set.
185    pub no_fast_path: bool,
186    /// Don't print optimizer hints.
187    pub no_notices: bool,
188    /// Show node IDs in physical plans.
189    pub node_ids: bool,
190    /// Don't normalize plans before explaining them.
191    pub raw_plans: bool,
192    /// Disable virtual syntax in the explanation.
193    pub raw_syntax: bool,
194    /// Use verbose syntax in the explanation.
195    pub verbose_syntax: bool,
196    /// Anonymize literals in the plan.
197    pub redacted: bool,
198    /// Print optimization timings.
199    pub timing: bool,
200    /// Show MFP pushdown information.
201    pub filter_pushdown: bool,
202
203    /// Optimizer feature flags.
204    pub features: OptimizerFeatureOverrides,
205}
206
207impl Default for ExplainConfig {
208    fn default() -> Self {
209        Self {
210            // Don't redact in debug builds and in CI.
211            redacted: !mz_ore::assert::soft_assertions_enabled(),
212            arity: false,
213            cardinality: false,
214            column_names: false,
215            filter_pushdown: false,
216            humanized_exprs: false,
217            join_impls: true,
218            keys: false,
219            linear_chains: false,
220            no_fast_path: true,
221            no_notices: false,
222            node_ids: false,
223            non_negative: false,
224            raw_plans: true,
225            raw_syntax: false,
226            verbose_syntax: false,
227            subtree_size: false,
228            timing: false,
229            types: false,
230            equivalences: false,
231            features: Default::default(),
232        }
233    }
234}
235
236impl ExplainConfig {
237    pub fn requires_analyses(&self) -> bool {
238        self.subtree_size
239            || self.non_negative
240            || self.arity
241            || self.types
242            || self.keys
243            || self.cardinality
244            || self.column_names
245            || self.equivalences
246    }
247}
248
249/// The type of object to be explained
250#[derive(Clone, Debug)]
251pub enum Explainee {
252    /// An existing materialized view.
253    MaterializedView(GlobalId),
254    /// An existing index.
255    Index(GlobalId),
256    /// An object that will be served using a dataflow.
257    ///
258    /// This variant is deprecated and will be removed in database-issues#5301.
259    Dataflow(GlobalId),
260    /// The object to be explained is a one-off query and may or may not be
261    /// served using a dataflow.
262    Select,
263}
264
265/// A trait that provides a unified interface for objects that
266/// can be explained.
267///
268/// All possible subjects of the various forms of an `EXPLAIN`
269/// SQL statement should implement this trait.
270pub trait Explain<'a>: 'a {
271    /// The type of the immutable context in which
272    /// the explanation will be rendered.
273    type Context;
274
275    /// The explanation type produced by a successful
276    /// [`Explain::explain_text`] call.
277    type Text: DisplayText;
278
279    /// The explanation type produced by a successful
280    /// [`Explain::explain_json`] call.
281    type Json: DisplayJson;
282
283    /// The explanation type produced by a successful
284    /// [`Explain::explain_json`] call.
285    type Dot: DisplayDot;
286
287    /// Explain an instance of [`Self`] within the given
288    /// [`Explain::Context`].
289    ///
290    /// Implementors should never have the need to not rely on
291    /// this default implementation.
292    ///
293    /// # Errors
294    ///
295    /// If the given `format` is not supported, the implementation
296    /// should return an [`ExplainError::UnsupportedFormat`].
297    ///
298    /// If an [`ExplainConfig`] parameter cannot be honored, the
299    /// implementation should silently ignore this parameter and
300    /// proceed without returning a [`Result::Err`].
301    fn explain(
302        &'a mut self,
303        format: &'a ExplainFormat,
304        context: &'a Self::Context,
305    ) -> Result<String, ExplainError> {
306        match format {
307            ExplainFormat::Text => self.explain_text(context).map(|e| text_string(&e)),
308            ExplainFormat::Json => self.explain_json(context).map(|e| json_string(&e)),
309            ExplainFormat::Dot => self.explain_dot(context).map(|e| dot_string(&e)),
310        }
311    }
312
313    /// Construct a [`Result::Ok`] of the [`Explain::Text`] format
314    /// from the config and the context.
315    ///
316    /// # Errors
317    ///
318    /// If the [`ExplainFormat::Text`] is not supported, the implementation
319    /// should return an [`ExplainError::UnsupportedFormat`].
320    ///
321    /// If an [`ExplainConfig`] parameter cannot be honored, the
322    /// implementation should silently ignore this parameter and
323    /// proceed without returning a [`Result::Err`].
324    #[allow(unused_variables)]
325    fn explain_text(&'a mut self, context: &'a Self::Context) -> Result<Self::Text, ExplainError> {
326        Err(ExplainError::UnsupportedFormat(ExplainFormat::Text))
327    }
328
329    /// Construct a [`Result::Ok`] of the [`Explain::Json`] format
330    /// from the config and the context.
331    ///
332    /// # Errors
333    ///
334    /// If the [`ExplainFormat::Json`] is not supported, the implementation
335    /// should return an [`ExplainError::UnsupportedFormat`].
336    ///
337    /// If an [`ExplainConfig`] parameter cannot be honored, the
338    /// implementation should silently ignore this parameter and
339    /// proceed without returning a [`Result::Err`].
340    #[allow(unused_variables)]
341    fn explain_json(&'a mut self, context: &'a Self::Context) -> Result<Self::Json, ExplainError> {
342        Err(ExplainError::UnsupportedFormat(ExplainFormat::Json))
343    }
344
345    /// Construct a [`Result::Ok`] of the [`Explain::Dot`] format
346    /// from the config and the context.
347    ///
348    /// # Errors
349    ///
350    /// If the [`ExplainFormat::Dot`] is not supported, the implementation
351    /// should return an [`ExplainError::UnsupportedFormat`].
352    ///
353    /// If an [`ExplainConfig`] parameter cannot be honored, the
354    /// implementation should silently ignore this parameter and
355    /// proceed without returning a [`Result::Err`].
356    #[allow(unused_variables)]
357    fn explain_dot(&'a mut self, context: &'a Self::Context) -> Result<Self::Dot, ExplainError> {
358        Err(ExplainError::UnsupportedFormat(ExplainFormat::Dot))
359    }
360}
361
362/// A helper struct which will most commonly be used as the generic
363/// rendering context type `C` for various `Explain$Format`
364/// implementations.
365#[derive(Debug)]
366pub struct RenderingContext<'a> {
367    pub indent: Indent,
368    pub humanizer: &'a dyn ExprHumanizer,
369}
370
371impl<'a> RenderingContext<'a> {
372    pub fn new(indent: Indent, humanizer: &'a dyn ExprHumanizer) -> RenderingContext<'a> {
373        RenderingContext { indent, humanizer }
374    }
375}
376
377impl<'a> AsMut<Indent> for RenderingContext<'a> {
378    fn as_mut(&mut self) -> &mut Indent {
379        &mut self.indent
380    }
381}
382
383impl<'a> AsRef<&'a dyn ExprHumanizer> for RenderingContext<'a> {
384    fn as_ref(&self) -> &&'a dyn ExprHumanizer {
385        &self.humanizer
386    }
387}
388
389#[allow(missing_debug_implementations)]
390pub struct PlanRenderingContext<'a, T> {
391    pub indent: Indent,
392    pub humanizer: &'a dyn ExprHumanizer,
393    pub annotations: BTreeMap<&'a T, Analyses>,
394    pub config: &'a ExplainConfig,
395    /// IDs that must be qualified in the output.
396    pub ambiguous_ids: BTreeSet<GlobalId>,
397}
398
399impl<'a, T> PlanRenderingContext<'a, T> {
400    pub fn new(
401        indent: Indent,
402        humanizer: &'a dyn ExprHumanizer,
403        annotations: BTreeMap<&'a T, Analyses>,
404        config: &'a ExplainConfig,
405        ambiguous_ids: BTreeSet<GlobalId>,
406    ) -> PlanRenderingContext<'a, T> {
407        PlanRenderingContext {
408            indent,
409            humanizer,
410            annotations,
411            config,
412            ambiguous_ids,
413        }
414    }
415
416    /// Unqualified names where unambiguous. Qualified names otherwise.
417    pub fn humanize_id_maybe_unqualified(&self, id: GlobalId) -> Option<String> {
418        if self.ambiguous_ids.contains(&id) {
419            self.humanizer.humanize_id(id)
420        } else {
421            self.humanizer.humanize_id_unqualified(id)
422        }
423    }
424}
425
426impl<'a, T> AsMut<Indent> for PlanRenderingContext<'a, T> {
427    fn as_mut(&mut self) -> &mut Indent {
428        &mut self.indent
429    }
430}
431
432impl<'a, T> AsRef<&'a dyn ExprHumanizer> for PlanRenderingContext<'a, T> {
433    fn as_ref(&self) -> &&'a dyn ExprHumanizer {
434        &self.humanizer
435    }
436}
437
438/// A trait for humanizing components of an expression.
439///
440/// This will be most often used as part of the rendering context
441/// type for various `Display$Format` implementation.
442pub trait ExprHumanizer: fmt::Debug + Sync {
443    /// Attempts to return a human-readable string for the relation
444    /// identified by `id`.
445    fn humanize_id(&self, id: GlobalId) -> Option<String>;
446
447    /// Same as above, but without qualifications, e.g., only `foo` for `materialize.public.foo`.
448    fn humanize_id_unqualified(&self, id: GlobalId) -> Option<String>;
449
450    /// Like [`Self::humanize_id`], but returns the constituent parts of the
451    /// name as individual elements.
452    fn humanize_id_parts(&self, id: GlobalId) -> Option<Vec<String>>;
453
454    /// Returns a human-readable name for the specified scalar type.
455    /// Used in, e.g., EXPLAIN and error msgs, in which case exact Postgres compatibility is less
456    /// important than showing as much detail as possible. Also used in `pg_typeof`, where Postgres
457    /// compatibility is more important.
458    fn humanize_sql_scalar_type(&self, ty: &SqlScalarType, postgres_compat: bool) -> String;
459
460    /// Returns a human-readable name for the specified scalar type.
461    ///
462    /// Uses std::fmt::Display, since we don't need to worry about resolving
463    ///  custom type IDs or postgres compatibility.
464    fn humanize_scalar_type(&self, typ: &ReprScalarType) -> String {
465        typ.to_string()
466    }
467
468    /// Returns a human-readable name for the specified column type.
469    /// Used in, e.g., EXPLAIN and error msgs, in which case exact Postgres compatibility is less
470    /// important than showing as much detail as possible. Also used in `pg_typeof`, where Postgres
471    /// compatibility is more important.
472    fn humanize_sql_column_type(&self, typ: &SqlColumnType, postgres_compat: bool) -> String {
473        format!(
474            "{}{}",
475            self.humanize_sql_scalar_type(&typ.scalar_type, postgres_compat),
476            if typ.nullable { "?" } else { "" }
477        )
478    }
479
480    /// Returns a human-readable name for the specified column type.
481    ///
482    /// Uses std::fmt::Display, since we don't need to worry about resolving
483    ///  custom type IDs or postgres compatibility.
484    fn humanize_column_type(&self, typ: &ReprColumnType) -> String {
485        typ.to_string()
486    }
487
488    /// Returns a vector of column names for the relation identified by `id`.
489    fn column_names_for_id(&self, id: GlobalId) -> Option<Vec<String>>;
490
491    /// Returns the `#column` name for the relation identified by `id`.
492    fn humanize_column(&self, id: GlobalId, column: usize) -> Option<String>;
493
494    /// Returns whether the specified id exists.
495    fn id_exists(&self, id: GlobalId) -> bool;
496}
497
498/// An [`ExprHumanizer`] that extends the `inner` instance with shadow items
499/// that are reported as present, even though they might not exist in `inner`.
500#[derive(Debug)]
501pub struct ExprHumanizerExt<'a> {
502    /// A map of custom items that might not exist in the backing `inner`
503    /// humanizer, but are reported as present by this humanizer instance.
504    items: BTreeMap<GlobalId, TransientItem>,
505    /// The inner humanizer used to resolve queries for [GlobalId] values not
506    /// present in the `items` map.
507    inner: &'a dyn ExprHumanizer,
508}
509
510impl<'a> ExprHumanizerExt<'a> {
511    pub fn new(items: BTreeMap<GlobalId, TransientItem>, inner: &'a dyn ExprHumanizer) -> Self {
512        Self { items, inner }
513    }
514}
515
516impl<'a> ExprHumanizer for ExprHumanizerExt<'a> {
517    fn humanize_id(&self, id: GlobalId) -> Option<String> {
518        match self.items.get(&id) {
519            Some(item) => item
520                .humanized_id_parts
521                .as_ref()
522                .map(|parts| parts.join(".")),
523            None => self.inner.humanize_id(id),
524        }
525    }
526
527    fn humanize_id_unqualified(&self, id: GlobalId) -> Option<String> {
528        match self.items.get(&id) {
529            Some(item) => item
530                .humanized_id_parts
531                .as_ref()
532                .and_then(|parts| parts.last().cloned()),
533            None => self.inner.humanize_id_unqualified(id),
534        }
535    }
536
537    fn humanize_id_parts(&self, id: GlobalId) -> Option<Vec<String>> {
538        match self.items.get(&id) {
539            Some(item) => item.humanized_id_parts.clone(),
540            None => self.inner.humanize_id_parts(id),
541        }
542    }
543
544    fn humanize_sql_scalar_type(&self, ty: &SqlScalarType, postgres_compat: bool) -> String {
545        self.inner.humanize_sql_scalar_type(ty, postgres_compat)
546    }
547
548    fn column_names_for_id(&self, id: GlobalId) -> Option<Vec<String>> {
549        match self.items.get(&id) {
550            Some(item) => item.column_names.clone(),
551            None => self.inner.column_names_for_id(id),
552        }
553    }
554
555    fn humanize_column(&self, id: GlobalId, column: usize) -> Option<String> {
556        match self.items.get(&id) {
557            Some(item) => match &item.column_names {
558                Some(column_names) => Some(column_names[column].clone()),
559                None => None,
560            },
561            None => self.inner.humanize_column(id, column),
562        }
563    }
564
565    fn id_exists(&self, id: GlobalId) -> bool {
566        self.items.contains_key(&id) || self.inner.id_exists(id)
567    }
568}
569
570/// A description of a catalog item that does not exist, but can be reported as
571/// present in the catalog by a [`ExprHumanizerExt`] instance that has it in its
572/// `items` list.
573#[derive(Debug)]
574pub struct TransientItem {
575    humanized_id_parts: Option<Vec<String>>,
576    column_names: Option<Vec<String>>,
577}
578
579impl TransientItem {
580    pub fn new(humanized_id_parts: Option<Vec<String>>, column_names: Option<Vec<String>>) -> Self {
581        Self {
582            humanized_id_parts,
583            column_names,
584        }
585    }
586}
587
588/// A bare-minimum implementation of [`ExprHumanizer`].
589///
590/// The `DummyHumanizer` does a poor job of humanizing expressions. It is
591/// intended for use in contexts where polish is not required, like in tests or
592/// while debugging.
593#[derive(Debug)]
594pub struct DummyHumanizer;
595
596impl ExprHumanizer for DummyHumanizer {
597    fn humanize_id(&self, _: GlobalId) -> Option<String> {
598        // Returning `None` allows the caller to fall back to displaying the
599        // ID, if they so desire.
600        None
601    }
602
603    fn humanize_id_unqualified(&self, _id: GlobalId) -> Option<String> {
604        None
605    }
606
607    fn humanize_id_parts(&self, _id: GlobalId) -> Option<Vec<String>> {
608        None
609    }
610
611    fn humanize_sql_scalar_type(&self, ty: &SqlScalarType, _postgres_compat: bool) -> String {
612        // The debug implementation is better than nothing.
613        format!("{:?}", ty)
614    }
615
616    fn column_names_for_id(&self, _id: GlobalId) -> Option<Vec<String>> {
617        None
618    }
619
620    fn humanize_column(&self, _id: GlobalId, _column: usize) -> Option<String> {
621        None
622    }
623
624    fn id_exists(&self, _id: GlobalId) -> bool {
625        false
626    }
627}
628
629/// Pretty-prints a list of indices.
630#[derive(Debug)]
631pub struct Indices<'a>(pub &'a [usize]);
632
633/// Pretty-prints a list of scalar expressions that may have runs of column
634/// indices as a comma-separated list interleaved with interval expressions.
635///
636/// Interval expressions are used only for runs of three or more elements.
637#[derive(Debug)]
638pub struct CompactScalarSeq<'a, T: ScalarOps>(pub &'a [T]); // TODO(cloud#8196) remove this
639
640/// Pretty-prints a list of scalar expressions that may have runs of column
641/// indices as a comma-separated list interleaved with interval expressions.
642///
643/// Interval expressions are used only for runs of three or more elements.
644#[derive(Debug)]
645pub struct CompactScalars<T, I>(pub I)
646where
647    T: ScalarOps,
648    I: Iterator<Item = T> + Clone;
649
650pub trait ScalarOps {
651    fn match_col_ref(&self) -> Option<usize>;
652
653    fn references(&self, col_ref: usize) -> bool;
654}
655
656/// A somewhat ad-hoc way to keep carry a plan with a set
657/// of analyses derived for each node in that plan.
658#[allow(missing_debug_implementations)]
659pub struct AnnotatedPlan<'a, T> {
660    pub plan: &'a T,
661    pub annotations: BTreeMap<&'a T, Analyses>,
662}
663
664/// A container for derived analyses.
665#[derive(Clone, Default, Debug)]
666pub struct Analyses {
667    pub non_negative: Option<bool>,
668    pub subtree_size: Option<usize>,
669    pub arity: Option<usize>,
670    pub types: Option<Option<Vec<ReprColumnType>>>,
671    pub keys: Option<Vec<Vec<usize>>>,
672    pub cardinality: Option<String>,
673    pub column_names: Option<Vec<String>>,
674    pub equivalences: Option<String>,
675}
676
677#[derive(Debug, Clone)]
678pub struct HumanizedAnalyses<'a> {
679    analyses: &'a Analyses,
680    humanizer: &'a dyn ExprHumanizer,
681    config: &'a ExplainConfig,
682}
683
684impl<'a> HumanizedAnalyses<'a> {
685    pub fn new<T>(analyses: &'a Analyses, ctx: &PlanRenderingContext<'a, T>) -> Self {
686        Self {
687            analyses,
688            humanizer: ctx.humanizer,
689            config: ctx.config,
690        }
691    }
692}
693
694impl<'a> Display for HumanizedAnalyses<'a> {
695    // Analysis rendering is guarded by the ExplainConfig flag for each
696    // Analysis. This is needed because we might have derived Analysis that
697    // are not explicitly requested (such as column_names), in which case we
698    // don't want to display them.
699    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
700        let mut builder = f.debug_struct("//");
701
702        if self.config.subtree_size {
703            let subtree_size = self.analyses.subtree_size.expect("subtree_size");
704            builder.field("subtree_size", &subtree_size);
705        }
706
707        if self.config.non_negative {
708            let non_negative = self.analyses.non_negative.expect("non_negative");
709            builder.field("non_negative", &non_negative);
710        }
711
712        if self.config.arity {
713            let arity = self.analyses.arity.expect("arity");
714            builder.field("arity", &arity);
715        }
716
717        if self.config.types {
718            let types = match self.analyses.types.as_ref().expect("types") {
719                Some(types) => {
720                    let types = types
721                        .into_iter()
722                        .map(|c| self.humanizer.humanize_column_type(c))
723                        .collect::<Vec<_>>();
724
725                    bracketed("(", ")", separated(", ", types)).to_string()
726                }
727                None => "(<error>)".to_string(),
728            };
729            builder.field("types", &types);
730        }
731
732        if self.config.keys {
733            let keys = self
734                .analyses
735                .keys
736                .as_ref()
737                .expect("keys")
738                .into_iter()
739                .map(|key| bracketed("[", "]", separated(", ", key)).to_string());
740            let keys = bracketed("(", ")", separated(", ", keys)).to_string();
741            builder.field("keys", &keys);
742        }
743
744        if self.config.cardinality {
745            let cardinality = self.analyses.cardinality.as_ref().expect("cardinality");
746            builder.field("cardinality", cardinality);
747        }
748
749        if self.config.column_names {
750            let column_names = self.analyses.column_names.as_ref().expect("column_names");
751            let column_names = column_names.into_iter().enumerate().map(|(i, c)| {
752                if c.is_empty() {
753                    Cow::Owned(format!("#{i}"))
754                } else {
755                    Cow::Borrowed(c)
756                }
757            });
758            let column_names = bracketed("(", ")", separated(", ", column_names)).to_string();
759            builder.field("column_names", &column_names);
760        }
761
762        if self.config.equivalences {
763            let equivs = self.analyses.equivalences.as_ref().expect("equivalences");
764            builder.field("equivs", equivs);
765        }
766
767        builder.finish()
768    }
769}
770
771/// A set of indexes that are used in the explained plan.
772///
773/// Each element consists of the following components:
774/// 1. The id of the index.
775/// 2. A vector of [IndexUsageType] denoting how the index is used in the plan.
776///
777/// Using a `BTreeSet` here ensures a deterministic iteration order, which in turn ensures that
778/// the corresponding EXPLAIN output is deterministic as well.
779#[derive(Clone, Debug, Default)]
780pub struct UsedIndexes(BTreeSet<(GlobalId, Vec<IndexUsageType>)>);
781
782impl UsedIndexes {
783    pub fn new(values: BTreeSet<(GlobalId, Vec<IndexUsageType>)>) -> UsedIndexes {
784        UsedIndexes(values)
785    }
786
787    pub fn is_empty(&self) -> bool {
788        self.0.is_empty()
789    }
790
791    /// Find all IDs with colliding (unqualified) humanizations.
792    pub fn ambiguous_ids(&self, humanizer: &dyn ExprHumanizer) -> BTreeSet<GlobalId> {
793        let humanized = self
794            .0
795            .iter()
796            .flat_map(|(id, _)| humanizer.humanize_id_unqualified(*id).map(|hum| (hum, *id)));
797
798        let mut by_humanization = BTreeMap::<String, BTreeSet<GlobalId>>::new();
799        for (hum, id) in humanized {
800            by_humanization.entry(hum).or_default().insert(id);
801        }
802
803        by_humanization
804            .values()
805            .filter(|ids| ids.len() > 1)
806            .flatten()
807            .cloned()
808            .collect()
809    }
810}
811
812#[derive(
813    Debug,
814    Clone,
815    Arbitrary,
816    Serialize,
817    Deserialize,
818    Eq,
819    PartialEq,
820    Ord,
821    PartialOrd,
822    Hash
823)]
824pub enum IndexUsageType {
825    /// Read the entire index.
826    FullScan,
827    /// Differential join. The work is proportional to the number of matches.
828    DifferentialJoin,
829    /// Delta join
830    DeltaJoin(DeltaJoinIndexUsageType),
831    /// `IndexedFilter`, e.g., something like `WHERE x = 42` with an index on `x`.
832    /// This also stores the id of the index that we want to do the lookup from. (This id is already
833    /// chosen by `LiteralConstraints`, and then `IndexUsageType::Lookup` communicates this inside
834    /// `CollectIndexRequests` from the `IndexedFilter` to the `Get`.)
835    Lookup(GlobalId),
836    /// This is a rare case that happens when the user creates an index that is identical to an
837    /// existing one (i.e., on the same object, and with the same keys). We'll re-use the
838    /// arrangement of the existing index. The plan is an `ArrangeBy` + `Get`, where the `ArrangeBy`
839    /// is requesting the same key as an already existing index. (`export_index` is what inserts
840    /// this `ArrangeBy`.)
841    PlanRootNoArrangement,
842    /// The index is used for directly writing to a sink. Can happen with a SUBSCRIBE to an indexed
843    /// view.
844    SinkExport,
845    /// The index is used for creating a new index. Note that either a `FullScan` or a
846    /// `PlanRootNoArrangement` usage will always accompany an `IndexExport` usage.
847    IndexExport,
848    /// When a fast path peek has a LIMIT, but no ORDER BY, then we read from the index only as many
849    /// records (approximately), as the OFFSET + LIMIT needs.
850    /// Note: When a fast path peek does a lookup and also has a limit, the usage type will be
851    /// `Lookup`. However, the smart limiting logic will still apply.
852    FastPathLimit,
853    /// We saw a dangling `ArrangeBy`, i.e., where we have no idea what the arrangement will be used
854    /// for. This is an internal error. Can be a bug either in `CollectIndexRequests`, or some
855    /// other transform that messed up the plan. It's also possible that somebody is trying to add
856    /// an `ArrangeBy` marking for some operator other than a `Join`. (Which is fine, but please
857    /// update `CollectIndexRequests`.)
858    DanglingArrangeBy,
859    /// Internal error in `CollectIndexRequests` or a failed attempt to look up
860    /// an index in `DataflowMetainfo::used_indexes`.
861    Unknown,
862}
863
864/// In a snapshot, one arrangement of the first input is scanned, all the other arrangements (of the
865/// first input, and of all other inputs) only get lookups.
866/// When later input batches are arriving, all inputs are fully read.
867#[derive(
868    Debug,
869    Clone,
870    Arbitrary,
871    Serialize,
872    Deserialize,
873    Eq,
874    PartialEq,
875    Ord,
876    PartialOrd,
877    Hash
878)]
879pub enum DeltaJoinIndexUsageType {
880    Unknown,
881    Lookup,
882    FirstInputFullScan,
883}
884
885impl std::fmt::Display for IndexUsageType {
886    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
887        write!(
888            f,
889            "{}",
890            match self {
891                IndexUsageType::FullScan => "*** full scan ***",
892                IndexUsageType::Lookup(_idx_id) => "lookup",
893                IndexUsageType::DifferentialJoin => "differential join",
894                IndexUsageType::DeltaJoin(DeltaJoinIndexUsageType::FirstInputFullScan) =>
895                    "delta join 1st input (full scan)",
896                // Technically, this is a lookup only for a snapshot. For later update batches, all
897                // records are read. However, I wrote lookup here, because in most cases the
898                // lookup/scan distinction matters only for a snapshot. This is because for arriving
899                // update records, something in the system will always do work proportional to the
900                // number of records anyway. In other words, something is always scanning new
901                // updates, but we can avoid scanning records again and again in snapshots.
902                IndexUsageType::DeltaJoin(DeltaJoinIndexUsageType::Lookup) => "delta join lookup",
903                IndexUsageType::DeltaJoin(DeltaJoinIndexUsageType::Unknown) =>
904                    "*** INTERNAL ERROR (unknown delta join usage) ***",
905                IndexUsageType::PlanRootNoArrangement => "plan root (no new arrangement)",
906                IndexUsageType::SinkExport => "sink export",
907                IndexUsageType::IndexExport => "index export",
908                IndexUsageType::FastPathLimit => "fast path limit",
909                IndexUsageType::DanglingArrangeBy => "*** INTERNAL ERROR (dangling ArrangeBy) ***",
910                IndexUsageType::Unknown => "*** INTERNAL ERROR (unknown usage) ***",
911            }
912        )
913    }
914}
915
916impl IndexUsageType {
917    pub fn display_vec<'a, I>(usage_types: I) -> impl Display + Sized + 'a
918    where
919        I: IntoIterator<Item = &'a IndexUsageType>,
920    {
921        separated(", ", usage_types.into_iter().sorted().dedup())
922    }
923}
924
925#[cfg(test)]
926mod tests {
927    use mz_ore::assert_ok;
928
929    use super::*;
930
931    struct Environment {
932        name: String,
933    }
934
935    impl Default for Environment {
936        fn default() -> Self {
937            Environment {
938                name: "test env".to_string(),
939            }
940        }
941    }
942
943    struct Frontiers<T> {
944        since: T,
945        upper: T,
946    }
947
948    impl<T> Frontiers<T> {
949        fn new(since: T, upper: T) -> Self {
950            Self { since, upper }
951        }
952    }
953
954    struct ExplainContext<'a> {
955        env: &'a mut Environment,
956        config: &'a ExplainConfig,
957        frontiers: Frontiers<u64>,
958    }
959
960    /// A test IR that should be the subject of explanations.
961    struct TestExpr {
962        lhs: i32,
963        rhs: i32,
964    }
965
966    struct TestExplanation<'a> {
967        expr: &'a TestExpr,
968        context: &'a ExplainContext<'a>,
969    }
970
971    impl<'a> DisplayText for TestExplanation<'a> {
972        fn fmt_text(&self, f: &mut fmt::Formatter<'_>, _ctx: &mut ()) -> fmt::Result {
973            let lhs = &self.expr.lhs;
974            let rhs = &self.expr.rhs;
975            writeln!(f, "expr = {lhs} + {rhs}")?;
976
977            if self.context.config.timing {
978                let since = &self.context.frontiers.since;
979                let upper = &self.context.frontiers.upper;
980                writeln!(f, "at t ∊ [{since}, {upper})")?;
981            }
982
983            let name = &self.context.env.name;
984            writeln!(f, "env = {name}")?;
985
986            Ok(())
987        }
988    }
989
990    impl<'a> Explain<'a> for TestExpr {
991        type Context = ExplainContext<'a>;
992        type Text = TestExplanation<'a>;
993        type Json = UnsupportedFormat;
994        type Dot = UnsupportedFormat;
995
996        fn explain_text(
997            &'a mut self,
998            context: &'a Self::Context,
999        ) -> Result<Self::Text, ExplainError> {
1000            Ok(TestExplanation {
1001                expr: self,
1002                context,
1003            })
1004        }
1005    }
1006
1007    fn do_explain(
1008        env: &mut Environment,
1009        frontiers: Frontiers<u64>,
1010    ) -> Result<String, ExplainError> {
1011        let mut expr = TestExpr { lhs: 1, rhs: 2 };
1012
1013        let format = ExplainFormat::Text;
1014        let config = &ExplainConfig {
1015            redacted: false,
1016            arity: false,
1017            cardinality: false,
1018            column_names: false,
1019            filter_pushdown: false,
1020            humanized_exprs: false,
1021            join_impls: false,
1022            keys: false,
1023            linear_chains: false,
1024            no_fast_path: false,
1025            no_notices: false,
1026            node_ids: false,
1027            non_negative: false,
1028            raw_plans: false,
1029            raw_syntax: false,
1030            verbose_syntax: true,
1031            subtree_size: false,
1032            equivalences: false,
1033            timing: true,
1034            types: false,
1035            features: Default::default(),
1036        };
1037        let context = ExplainContext {
1038            env,
1039            config,
1040            frontiers,
1041        };
1042
1043        expr.explain(&format, &context)
1044    }
1045
1046    #[mz_ore::test]
1047    fn test_mutable_context() {
1048        let mut env = Environment::default();
1049        let frontiers = Frontiers::<u64>::new(3, 7);
1050
1051        let act = do_explain(&mut env, frontiers);
1052        let exp = "expr = 1 + 2\nat t ∊ [3, 7)\nenv = test env\n".to_string();
1053
1054        assert_ok!(act);
1055        assert_eq!(act.unwrap(), exp);
1056    }
1057}