Skip to main content

mz_repr/
explain.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! A set of traits for modeling things that can be explained by a
11//! SQL `EXPLAIN` statement.
12//!
13//! The main trait in this module is [`Explain`].
14//!
15//! An explainable subject `S` implements [`Explain`], and as part of that:
16//!
17//! 1. Fixes the *context type* required for the explanation.
18//!    in [`Explain::Context`].
19//! 2. Fixes the *explanation type* for each [`ExplainFormat`]
20//!    in [`Explain::Text`], [`Explain::Json`], ....
21//! 3. Provides *an explanation type constructor* for each supported
22//!    [`ExplainFormat`] from references to `S`, [`ExplainConfig` ],
23//!    and the current [`Explain::Context`] in
24//!    [`Explain::explain_text`], [`Explain::explain_json`], ....
25//!
26//! The same *explanation type* can be shared by more than one
27//! [`ExplainFormat`].
28//!
29//! Use [`UnsupportedFormat`] and the default `explain_$format`
30//! constructor for [`Explain`] to indicate that the implementation does
31//! not support this `$format`.
32
33use itertools::Itertools;
34#[cfg(any(test, feature = "proptest"))]
35use proptest_derive::Arbitrary;
36use serde::{Deserialize, Serialize};
37use std::borrow::Cow;
38use std::collections::{BTreeMap, BTreeSet};
39use std::fmt;
40use std::fmt::{Display, Formatter};
41
42use mz_ore::stack::RecursionLimitError;
43use mz_ore::str::{Indent, bracketed, separated};
44
45use crate::explain::dot::{DisplayDot, dot_string};
46use crate::explain::json::{DisplayJson, json_string};
47use crate::explain::text::{DisplayText, text_string};
48use crate::optimize::OptimizerFeatureOverrides;
49use crate::{GlobalId, ReprColumnType, ReprScalarType, SqlColumnType, SqlScalarType};
50
51pub mod dot;
52pub mod json;
53pub mod text;
54#[cfg(feature = "tracing")]
55pub mod tracing;
56
57#[cfg(feature = "tracing")]
58pub use crate::explain::tracing::trace_plan;
59
60/// Possible output formats for an explanation.
61#[derive(Debug, Clone, Copy, Eq, PartialEq)]
62pub enum ExplainFormat {
63    Text,
64    Json,
65    Dot,
66}
67
68impl fmt::Display for ExplainFormat {
69    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70        match self {
71            ExplainFormat::Text => f.write_str("TEXT"),
72            ExplainFormat::Json => f.write_str("JSON"),
73            ExplainFormat::Dot => f.write_str("DOT"),
74        }
75    }
76}
77
78/// A zero-variant enum to be used as the explanation type in the
79/// [`Explain`] implementation for all formats that are not supported
80/// for `Self`.
81#[allow(missing_debug_implementations)]
82pub enum UnsupportedFormat {}
83
84/// The type of errors that may occur when an [`Explain::explain`]
85/// call goes wrong.
86#[derive(Debug)]
87pub enum ExplainError {
88    UnsupportedFormat(ExplainFormat),
89    FormatError(fmt::Error),
90    AnyhowError(anyhow::Error),
91    RecursionLimitError(RecursionLimitError),
92    SerdeJsonError(serde_json::Error),
93    LinearChainsPlusRecursive,
94    UnknownError(String),
95}
96
97impl fmt::Display for ExplainError {
98    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
99        write!(f, "error while rendering explain output: ")?;
100        match self {
101            ExplainError::UnsupportedFormat(format) => {
102                write!(f, "{} format is not supported", format)
103            }
104            ExplainError::FormatError(error) => {
105                write!(f, "{}", error)
106            }
107            ExplainError::AnyhowError(error) => {
108                write!(f, "{}", error)
109            }
110            ExplainError::RecursionLimitError(error) => {
111                write!(f, "{}", error)
112            }
113            ExplainError::SerdeJsonError(error) => {
114                write!(f, "{}", error)
115            }
116            ExplainError::LinearChainsPlusRecursive => {
117                write!(
118                    f,
119                    "The linear_chains option is not supported with WITH MUTUALLY RECURSIVE."
120                )
121            }
122            ExplainError::UnknownError(error) => {
123                write!(f, "{}", error)
124            }
125        }
126    }
127}
128
129impl From<fmt::Error> for ExplainError {
130    fn from(error: fmt::Error) -> Self {
131        ExplainError::FormatError(error)
132    }
133}
134
135impl From<anyhow::Error> for ExplainError {
136    fn from(error: anyhow::Error) -> Self {
137        ExplainError::AnyhowError(error)
138    }
139}
140
141impl From<RecursionLimitError> for ExplainError {
142    fn from(error: RecursionLimitError) -> Self {
143        ExplainError::RecursionLimitError(error)
144    }
145}
146
147impl From<serde_json::Error> for ExplainError {
148    fn from(error: serde_json::Error) -> Self {
149        ExplainError::SerdeJsonError(error)
150    }
151}
152
153/// A set of options for controlling the output of [`Explain`] implementations.
154#[derive(Clone, Debug)]
155pub struct ExplainConfig {
156    // Analyses:
157    // (These are shown only if the Analysis is supported by the backing IR.)
158    /// Show the `SubtreeSize` Analysis in the explanation.
159    pub subtree_size: bool,
160    /// Show the number of columns, i.e., the `Arity` Analysis.
161    pub arity: bool,
162    /// Show the types, i.e., the `SqlRelationType` Analysis.
163    pub types: bool,
164    /// Show the sets of unique keys, i.e., the `UniqueKeys` Analysis.
165    pub keys: bool,
166    /// Show the `NonNegative` Analysis.
167    pub non_negative: bool,
168    /// Show the `Cardinality` Analysis.
169    pub cardinality: bool,
170    /// Show the `ColumnNames` Analysis.
171    pub column_names: bool,
172    /// Show the `Equivalences` Analysis.
173    pub equivalences: bool,
174    // TODO: add an option to show the `Monotonic` Analysis. This is non-trivial, because this
175    // Analysis needs the set of monotonic GlobalIds, which are cumbersome to pass around.
176
177    // Other display options:
178    /// Render implemented MIR `Join` nodes in a way which reflects the implementation.
179    pub join_impls: bool,
180    /// Use inferred column names when rendering scalar and aggregate expressions.
181    pub humanized_exprs: bool,
182    /// Restrict output trees to linear chains. Ignored if `raw_plans` is set.
183    pub linear_chains: bool,
184    /// Show the slow path plan even if a fast path plan was created. Useful for debugging.
185    /// Enforced if `timing` is set.
186    pub no_fast_path: bool,
187    /// Don't print optimizer hints.
188    pub no_notices: bool,
189    /// Show node IDs in physical plans.
190    pub node_ids: bool,
191    /// Don't normalize plans before explaining them.
192    pub raw_plans: bool,
193    /// Disable virtual syntax in the explanation.
194    pub raw_syntax: bool,
195    /// Use verbose syntax in the explanation.
196    pub verbose_syntax: bool,
197    /// Anonymize literals in the plan.
198    pub redacted: bool,
199    /// Print optimization timings.
200    pub timing: bool,
201    /// Show MFP pushdown information.
202    pub filter_pushdown: bool,
203
204    /// Optimizer feature flags.
205    pub features: OptimizerFeatureOverrides,
206}
207
208impl Default for ExplainConfig {
209    fn default() -> Self {
210        Self {
211            // Don't redact in debug builds and in CI.
212            redacted: !mz_ore::assert::soft_assertions_enabled(),
213            arity: false,
214            cardinality: false,
215            column_names: false,
216            filter_pushdown: false,
217            humanized_exprs: false,
218            join_impls: true,
219            keys: false,
220            linear_chains: false,
221            no_fast_path: true,
222            no_notices: false,
223            node_ids: false,
224            non_negative: false,
225            raw_plans: true,
226            raw_syntax: false,
227            verbose_syntax: false,
228            subtree_size: false,
229            timing: false,
230            types: false,
231            equivalences: false,
232            features: Default::default(),
233        }
234    }
235}
236
237impl ExplainConfig {
238    pub fn requires_analyses(&self) -> bool {
239        self.subtree_size
240            || self.non_negative
241            || self.arity
242            || self.types
243            || self.keys
244            || self.cardinality
245            || self.column_names
246            || self.equivalences
247    }
248}
249
250/// The type of object to be explained
251#[derive(Clone, Debug)]
252pub enum Explainee {
253    /// An existing materialized view.
254    MaterializedView(GlobalId),
255    /// An existing index.
256    Index(GlobalId),
257    /// An object that will be served using a dataflow.
258    ///
259    /// This variant is deprecated and will be removed in database-issues#5301.
260    Dataflow(GlobalId),
261    /// The object to be explained is a one-off query and may or may not be
262    /// served using a dataflow.
263    Select,
264}
265
266/// A trait that provides a unified interface for objects that
267/// can be explained.
268///
269/// All possible subjects of the various forms of an `EXPLAIN`
270/// SQL statement should implement this trait.
271pub trait Explain<'a>: 'a {
272    /// The type of the immutable context in which
273    /// the explanation will be rendered.
274    type Context;
275
276    /// The explanation type produced by a successful
277    /// [`Explain::explain_text`] call.
278    type Text: DisplayText;
279
280    /// The explanation type produced by a successful
281    /// [`Explain::explain_json`] call.
282    type Json: DisplayJson;
283
284    /// The explanation type produced by a successful
285    /// [`Explain::explain_json`] call.
286    type Dot: DisplayDot;
287
288    /// Explain an instance of [`Self`] within the given
289    /// [`Explain::Context`].
290    ///
291    /// Implementors should never have the need to not rely on
292    /// this default implementation.
293    ///
294    /// # Errors
295    ///
296    /// If the given `format` is not supported, the implementation
297    /// should return an [`ExplainError::UnsupportedFormat`].
298    ///
299    /// If an [`ExplainConfig`] parameter cannot be honored, the
300    /// implementation should silently ignore this parameter and
301    /// proceed without returning a [`Result::Err`].
302    fn explain(
303        &'a mut self,
304        format: &'a ExplainFormat,
305        context: &'a Self::Context,
306    ) -> Result<String, ExplainError> {
307        match format {
308            ExplainFormat::Text => self.explain_text(context).map(|e| text_string(&e)),
309            ExplainFormat::Json => self.explain_json(context).map(|e| json_string(&e)),
310            ExplainFormat::Dot => self.explain_dot(context).map(|e| dot_string(&e)),
311        }
312    }
313
314    /// Construct a [`Result::Ok`] of the [`Explain::Text`] format
315    /// from the config and the context.
316    ///
317    /// # Errors
318    ///
319    /// If the [`ExplainFormat::Text`] is not supported, the implementation
320    /// should return an [`ExplainError::UnsupportedFormat`].
321    ///
322    /// If an [`ExplainConfig`] parameter cannot be honored, the
323    /// implementation should silently ignore this parameter and
324    /// proceed without returning a [`Result::Err`].
325    #[allow(unused_variables)]
326    fn explain_text(&'a mut self, context: &'a Self::Context) -> Result<Self::Text, ExplainError> {
327        Err(ExplainError::UnsupportedFormat(ExplainFormat::Text))
328    }
329
330    /// Construct a [`Result::Ok`] of the [`Explain::Json`] format
331    /// from the config and the context.
332    ///
333    /// # Errors
334    ///
335    /// If the [`ExplainFormat::Json`] is not supported, the implementation
336    /// should return an [`ExplainError::UnsupportedFormat`].
337    ///
338    /// If an [`ExplainConfig`] parameter cannot be honored, the
339    /// implementation should silently ignore this parameter and
340    /// proceed without returning a [`Result::Err`].
341    #[allow(unused_variables)]
342    fn explain_json(&'a mut self, context: &'a Self::Context) -> Result<Self::Json, ExplainError> {
343        Err(ExplainError::UnsupportedFormat(ExplainFormat::Json))
344    }
345
346    /// Construct a [`Result::Ok`] of the [`Explain::Dot`] format
347    /// from the config and the context.
348    ///
349    /// # Errors
350    ///
351    /// If the [`ExplainFormat::Dot`] is not supported, the implementation
352    /// should return an [`ExplainError::UnsupportedFormat`].
353    ///
354    /// If an [`ExplainConfig`] parameter cannot be honored, the
355    /// implementation should silently ignore this parameter and
356    /// proceed without returning a [`Result::Err`].
357    #[allow(unused_variables)]
358    fn explain_dot(&'a mut self, context: &'a Self::Context) -> Result<Self::Dot, ExplainError> {
359        Err(ExplainError::UnsupportedFormat(ExplainFormat::Dot))
360    }
361}
362
363/// A helper struct which will most commonly be used as the generic
364/// rendering context type `C` for various `Explain$Format`
365/// implementations.
366#[derive(Debug)]
367pub struct RenderingContext<'a> {
368    pub indent: Indent,
369    pub humanizer: &'a dyn ExprHumanizer,
370}
371
372impl<'a> RenderingContext<'a> {
373    pub fn new(indent: Indent, humanizer: &'a dyn ExprHumanizer) -> RenderingContext<'a> {
374        RenderingContext { indent, humanizer }
375    }
376}
377
378impl<'a> AsMut<Indent> for RenderingContext<'a> {
379    fn as_mut(&mut self) -> &mut Indent {
380        &mut self.indent
381    }
382}
383
384impl<'a> AsRef<&'a dyn ExprHumanizer> for RenderingContext<'a> {
385    fn as_ref(&self) -> &&'a dyn ExprHumanizer {
386        &self.humanizer
387    }
388}
389
390#[allow(missing_debug_implementations)]
391pub struct PlanRenderingContext<'a, T> {
392    pub indent: Indent,
393    pub humanizer: &'a dyn ExprHumanizer,
394    pub annotations: BTreeMap<&'a T, Analyses>,
395    pub config: &'a ExplainConfig,
396    /// IDs that must be qualified in the output.
397    pub ambiguous_ids: BTreeSet<GlobalId>,
398}
399
400impl<'a, T> PlanRenderingContext<'a, T> {
401    pub fn new(
402        indent: Indent,
403        humanizer: &'a dyn ExprHumanizer,
404        annotations: BTreeMap<&'a T, Analyses>,
405        config: &'a ExplainConfig,
406        ambiguous_ids: BTreeSet<GlobalId>,
407    ) -> PlanRenderingContext<'a, T> {
408        PlanRenderingContext {
409            indent,
410            humanizer,
411            annotations,
412            config,
413            ambiguous_ids,
414        }
415    }
416
417    /// Unqualified names where unambiguous. Qualified names otherwise.
418    pub fn humanize_id_maybe_unqualified(&self, id: GlobalId) -> Option<String> {
419        if self.ambiguous_ids.contains(&id) {
420            self.humanizer.humanize_id(id)
421        } else {
422            self.humanizer.humanize_id_unqualified(id)
423        }
424    }
425}
426
427impl<'a, T> AsMut<Indent> for PlanRenderingContext<'a, T> {
428    fn as_mut(&mut self) -> &mut Indent {
429        &mut self.indent
430    }
431}
432
433impl<'a, T> AsRef<&'a dyn ExprHumanizer> for PlanRenderingContext<'a, T> {
434    fn as_ref(&self) -> &&'a dyn ExprHumanizer {
435        &self.humanizer
436    }
437}
438
439/// A trait for humanizing components of an expression.
440///
441/// This will be most often used as part of the rendering context
442/// type for various `Display$Format` implementation.
443pub trait ExprHumanizer: fmt::Debug + Sync {
444    /// Attempts to return a human-readable string for the relation
445    /// identified by `id`.
446    fn humanize_id(&self, id: GlobalId) -> Option<String>;
447
448    /// Same as above, but without qualifications, e.g., only `foo` for `materialize.public.foo`.
449    fn humanize_id_unqualified(&self, id: GlobalId) -> Option<String>;
450
451    /// Like [`Self::humanize_id`], but returns the constituent parts of the
452    /// name as individual elements.
453    fn humanize_id_parts(&self, id: GlobalId) -> Option<Vec<String>>;
454
455    /// Returns a human-readable name for the specified scalar type.
456    /// Used in, e.g., EXPLAIN and error msgs, in which case exact Postgres compatibility is less
457    /// important than showing as much detail as possible. Also used in `pg_typeof`, where Postgres
458    /// compatibility is more important.
459    fn humanize_sql_scalar_type(&self, ty: &SqlScalarType, postgres_compat: bool) -> String;
460
461    /// Returns a human-readable name for the specified scalar type.
462    ///
463    /// Uses std::fmt::Display, since we don't need to worry about resolving
464    ///  custom type IDs or postgres compatibility.
465    fn humanize_scalar_type(&self, typ: &ReprScalarType) -> String {
466        typ.to_string()
467    }
468
469    /// Returns a human-readable name for the specified column type.
470    /// Used in, e.g., EXPLAIN and error msgs, in which case exact Postgres compatibility is less
471    /// important than showing as much detail as possible. Also used in `pg_typeof`, where Postgres
472    /// compatibility is more important.
473    fn humanize_sql_column_type(&self, typ: &SqlColumnType, postgres_compat: bool) -> String {
474        format!(
475            "{}{}",
476            self.humanize_sql_scalar_type(&typ.scalar_type, postgres_compat),
477            if typ.nullable { "?" } else { "" }
478        )
479    }
480
481    /// Returns a human-readable name for the specified column type.
482    ///
483    /// Uses std::fmt::Display, since we don't need to worry about resolving
484    ///  custom type IDs or postgres compatibility.
485    fn humanize_column_type(&self, typ: &ReprColumnType) -> String {
486        typ.to_string()
487    }
488
489    /// Returns a vector of column names for the relation identified by `id`.
490    fn column_names_for_id(&self, id: GlobalId) -> Option<Vec<String>>;
491
492    /// Returns the `#column` name for the relation identified by `id`.
493    fn humanize_column(&self, id: GlobalId, column: usize) -> Option<String>;
494
495    /// Returns whether the specified id exists.
496    fn id_exists(&self, id: GlobalId) -> bool;
497}
498
499/// An [`ExprHumanizer`] that extends the `inner` instance with shadow items
500/// that are reported as present, even though they might not exist in `inner`.
501#[derive(Debug)]
502pub struct ExprHumanizerExt<'a> {
503    /// A map of custom items that might not exist in the backing `inner`
504    /// humanizer, but are reported as present by this humanizer instance.
505    items: BTreeMap<GlobalId, TransientItem>,
506    /// The inner humanizer used to resolve queries for [GlobalId] values not
507    /// present in the `items` map.
508    inner: &'a dyn ExprHumanizer,
509}
510
511impl<'a> ExprHumanizerExt<'a> {
512    pub fn new(items: BTreeMap<GlobalId, TransientItem>, inner: &'a dyn ExprHumanizer) -> Self {
513        Self { items, inner }
514    }
515}
516
517impl<'a> ExprHumanizer for ExprHumanizerExt<'a> {
518    fn humanize_id(&self, id: GlobalId) -> Option<String> {
519        match self.items.get(&id) {
520            Some(item) => item
521                .humanized_id_parts
522                .as_ref()
523                .map(|parts| parts.join(".")),
524            None => self.inner.humanize_id(id),
525        }
526    }
527
528    fn humanize_id_unqualified(&self, id: GlobalId) -> Option<String> {
529        match self.items.get(&id) {
530            Some(item) => item
531                .humanized_id_parts
532                .as_ref()
533                .and_then(|parts| parts.last().cloned()),
534            None => self.inner.humanize_id_unqualified(id),
535        }
536    }
537
538    fn humanize_id_parts(&self, id: GlobalId) -> Option<Vec<String>> {
539        match self.items.get(&id) {
540            Some(item) => item.humanized_id_parts.clone(),
541            None => self.inner.humanize_id_parts(id),
542        }
543    }
544
545    fn humanize_sql_scalar_type(&self, ty: &SqlScalarType, postgres_compat: bool) -> String {
546        self.inner.humanize_sql_scalar_type(ty, postgres_compat)
547    }
548
549    fn column_names_for_id(&self, id: GlobalId) -> Option<Vec<String>> {
550        match self.items.get(&id) {
551            Some(item) => item.column_names.clone(),
552            None => self.inner.column_names_for_id(id),
553        }
554    }
555
556    fn humanize_column(&self, id: GlobalId, column: usize) -> Option<String> {
557        match self.items.get(&id) {
558            Some(item) => match &item.column_names {
559                Some(column_names) => Some(column_names[column].clone()),
560                None => None,
561            },
562            None => self.inner.humanize_column(id, column),
563        }
564    }
565
566    fn id_exists(&self, id: GlobalId) -> bool {
567        self.items.contains_key(&id) || self.inner.id_exists(id)
568    }
569}
570
571/// A description of a catalog item that does not exist, but can be reported as
572/// present in the catalog by a [`ExprHumanizerExt`] instance that has it in its
573/// `items` list.
574#[derive(Debug)]
575pub struct TransientItem {
576    humanized_id_parts: Option<Vec<String>>,
577    column_names: Option<Vec<String>>,
578}
579
580impl TransientItem {
581    pub fn new(humanized_id_parts: Option<Vec<String>>, column_names: Option<Vec<String>>) -> Self {
582        Self {
583            humanized_id_parts,
584            column_names,
585        }
586    }
587}
588
589/// A bare-minimum implementation of [`ExprHumanizer`].
590///
591/// The `DummyHumanizer` does a poor job of humanizing expressions. It is
592/// intended for use in contexts where polish is not required, like in tests or
593/// while debugging.
594#[derive(Debug)]
595pub struct DummyHumanizer;
596
597impl ExprHumanizer for DummyHumanizer {
598    fn humanize_id(&self, _: GlobalId) -> Option<String> {
599        // Returning `None` allows the caller to fall back to displaying the
600        // ID, if they so desire.
601        None
602    }
603
604    fn humanize_id_unqualified(&self, _id: GlobalId) -> Option<String> {
605        None
606    }
607
608    fn humanize_id_parts(&self, _id: GlobalId) -> Option<Vec<String>> {
609        None
610    }
611
612    fn humanize_sql_scalar_type(&self, ty: &SqlScalarType, _postgres_compat: bool) -> String {
613        // The debug implementation is better than nothing.
614        format!("{:?}", ty)
615    }
616
617    fn column_names_for_id(&self, _id: GlobalId) -> Option<Vec<String>> {
618        None
619    }
620
621    fn humanize_column(&self, _id: GlobalId, _column: usize) -> Option<String> {
622        None
623    }
624
625    fn id_exists(&self, _id: GlobalId) -> bool {
626        false
627    }
628}
629
630/// Pretty-prints a list of indices.
631#[derive(Debug)]
632pub struct Indices<'a>(pub &'a [usize]);
633
634/// Pretty-prints a list of scalar expressions that may have runs of column
635/// indices as a comma-separated list interleaved with interval expressions.
636///
637/// Interval expressions are used only for runs of three or more elements.
638#[derive(Debug)]
639pub struct CompactScalarSeq<'a, T: ScalarOps>(pub &'a [T]); // TODO(cloud#8196) remove this
640
641/// Pretty-prints a list of scalar expressions that may have runs of column
642/// indices as a comma-separated list interleaved with interval expressions.
643///
644/// Interval expressions are used only for runs of three or more elements.
645#[derive(Debug)]
646pub struct CompactScalars<T, I>(pub I)
647where
648    T: ScalarOps,
649    I: Iterator<Item = T> + Clone;
650
651pub trait ScalarOps {
652    /// If this expression is a column-reference, return the column referenced.
653    fn match_col_ref(&self) -> Option<usize>;
654
655    /// Returns true if this expression is a reference to the given column.
656    fn references(&self, col_ref: usize) -> bool;
657}
658
659impl ScalarOps for usize {
660    fn match_col_ref(&self) -> Option<usize> {
661        Some(*self)
662    }
663
664    fn references(&self, col_ref: usize) -> bool {
665        *self == col_ref
666    }
667}
668
669/// A somewhat ad-hoc way to keep carry a plan with a set
670/// of analyses derived for each node in that plan.
671#[allow(missing_debug_implementations)]
672pub struct AnnotatedPlan<'a, T> {
673    pub plan: &'a T,
674    pub annotations: BTreeMap<&'a T, Analyses>,
675}
676
677/// A container for derived analyses.
678#[derive(Clone, Default, Debug)]
679pub struct Analyses {
680    pub non_negative: Option<bool>,
681    pub subtree_size: Option<usize>,
682    pub arity: Option<usize>,
683    pub types: Option<Option<Vec<ReprColumnType>>>,
684    pub keys: Option<Vec<Vec<usize>>>,
685    pub cardinality: Option<String>,
686    pub column_names: Option<Vec<String>>,
687    pub equivalences: Option<String>,
688}
689
690#[derive(Debug, Clone)]
691pub struct HumanizedAnalyses<'a> {
692    analyses: &'a Analyses,
693    humanizer: &'a dyn ExprHumanizer,
694    config: &'a ExplainConfig,
695}
696
697impl<'a> HumanizedAnalyses<'a> {
698    pub fn new<T>(analyses: &'a Analyses, ctx: &PlanRenderingContext<'a, T>) -> Self {
699        Self {
700            analyses,
701            humanizer: ctx.humanizer,
702            config: ctx.config,
703        }
704    }
705}
706
707impl<'a> Display for HumanizedAnalyses<'a> {
708    // Analysis rendering is guarded by the ExplainConfig flag for each
709    // Analysis. This is needed because we might have derived Analysis that
710    // are not explicitly requested (such as column_names), in which case we
711    // don't want to display them.
712    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
713        let mut builder = f.debug_struct("//");
714
715        if self.config.subtree_size {
716            let subtree_size = self.analyses.subtree_size.expect("subtree_size");
717            builder.field("subtree_size", &subtree_size);
718        }
719
720        if self.config.non_negative {
721            let non_negative = self.analyses.non_negative.expect("non_negative");
722            builder.field("non_negative", &non_negative);
723        }
724
725        if self.config.arity {
726            let arity = self.analyses.arity.expect("arity");
727            builder.field("arity", &arity);
728        }
729
730        if self.config.types {
731            let types = match self.analyses.types.as_ref().expect("types") {
732                Some(types) => {
733                    let types = types
734                        .into_iter()
735                        .map(|c| self.humanizer.humanize_column_type(c))
736                        .collect::<Vec<_>>();
737
738                    bracketed("(", ")", separated(", ", types)).to_string()
739                }
740                None => "(<error>)".to_string(),
741            };
742            builder.field("types", &types);
743        }
744
745        if self.config.keys {
746            let keys = self
747                .analyses
748                .keys
749                .as_ref()
750                .expect("keys")
751                .into_iter()
752                .map(|key| bracketed("[", "]", separated(", ", key)).to_string());
753            let keys = bracketed("(", ")", separated(", ", keys)).to_string();
754            builder.field("keys", &keys);
755        }
756
757        if self.config.cardinality {
758            let cardinality = self.analyses.cardinality.as_ref().expect("cardinality");
759            builder.field("cardinality", cardinality);
760        }
761
762        if self.config.column_names {
763            let column_names = self.analyses.column_names.as_ref().expect("column_names");
764            let column_names = column_names.into_iter().enumerate().map(|(i, c)| {
765                if c.is_empty() {
766                    Cow::Owned(format!("#{i}"))
767                } else {
768                    Cow::Borrowed(c)
769                }
770            });
771            let column_names = bracketed("(", ")", separated(", ", column_names)).to_string();
772            builder.field("column_names", &column_names);
773        }
774
775        if self.config.equivalences {
776            let equivs = self.analyses.equivalences.as_ref().expect("equivalences");
777            builder.field("equivs", equivs);
778        }
779
780        builder.finish()
781    }
782}
783
784/// A set of indexes that are used in the explained plan.
785///
786/// Each element consists of the following components:
787/// 1. The id of the index.
788/// 2. A vector of [IndexUsageType] denoting how the index is used in the plan.
789///
790/// Using a `BTreeSet` here ensures a deterministic iteration order, which in turn ensures that
791/// the corresponding EXPLAIN output is deterministic as well.
792#[derive(Clone, Debug, Default)]
793pub struct UsedIndexes(BTreeSet<(GlobalId, Vec<IndexUsageType>)>);
794
795impl UsedIndexes {
796    pub fn new(values: BTreeSet<(GlobalId, Vec<IndexUsageType>)>) -> UsedIndexes {
797        UsedIndexes(values)
798    }
799
800    pub fn is_empty(&self) -> bool {
801        self.0.is_empty()
802    }
803
804    /// Find all IDs with colliding (unqualified) humanizations.
805    pub fn ambiguous_ids(&self, humanizer: &dyn ExprHumanizer) -> BTreeSet<GlobalId> {
806        let humanized = self
807            .0
808            .iter()
809            .flat_map(|(id, _)| humanizer.humanize_id_unqualified(*id).map(|hum| (hum, *id)));
810
811        let mut by_humanization = BTreeMap::<String, BTreeSet<GlobalId>>::new();
812        for (hum, id) in humanized {
813            by_humanization.entry(hum).or_default().insert(id);
814        }
815
816        by_humanization
817            .values()
818            .filter(|ids| ids.len() > 1)
819            .flatten()
820            .cloned()
821            .collect()
822    }
823}
824
825#[derive(
826    Debug,
827    Clone,
828    Serialize,
829    Deserialize,
830    Eq,
831    PartialEq,
832    Ord,
833    PartialOrd,
834    Hash
835)]
836#[cfg_attr(any(test, feature = "proptest"), derive(Arbitrary))]
837pub enum IndexUsageType {
838    /// Read the entire index.
839    FullScan,
840    /// Differential join. The work is proportional to the number of matches.
841    DifferentialJoin,
842    /// Delta join
843    DeltaJoin(DeltaJoinIndexUsageType),
844    /// `IndexedFilter`, e.g., something like `WHERE x = 42` with an index on `x`.
845    /// This also stores the id of the index that we want to do the lookup from. (This id is already
846    /// chosen by `LiteralConstraints`, and then `IndexUsageType::Lookup` communicates this inside
847    /// `CollectIndexRequests` from the `IndexedFilter` to the `Get`.)
848    Lookup(GlobalId),
849    /// This is a rare case that happens when the user creates an index that is identical to an
850    /// existing one (i.e., on the same object, and with the same keys). We'll re-use the
851    /// arrangement of the existing index. The plan is an `ArrangeBy` + `Get`, where the `ArrangeBy`
852    /// is requesting the same key as an already existing index. (`export_index` is what inserts
853    /// this `ArrangeBy`.)
854    PlanRootNoArrangement,
855    /// The index is used for directly writing to a sink. Can happen with a SUBSCRIBE to an indexed
856    /// view.
857    SinkExport,
858    /// The index is used for creating a new index. Note that either a `FullScan` or a
859    /// `PlanRootNoArrangement` usage will always accompany an `IndexExport` usage.
860    IndexExport,
861    /// When a fast path peek has a LIMIT, but no ORDER BY, then we read from the index only as many
862    /// records (approximately), as the OFFSET + LIMIT needs.
863    /// Note: When a fast path peek does a lookup and also has a limit, the usage type will be
864    /// `Lookup`. However, the smart limiting logic will still apply.
865    FastPathLimit,
866    /// We saw a dangling `ArrangeBy`, i.e., where we have no idea what the arrangement will be used
867    /// for. This is an internal error. Can be a bug either in `CollectIndexRequests`, or some
868    /// other transform that messed up the plan. It's also possible that somebody is trying to add
869    /// an `ArrangeBy` marking for some operator other than a `Join`. (Which is fine, but please
870    /// update `CollectIndexRequests`.)
871    DanglingArrangeBy,
872    /// Internal error in `CollectIndexRequests` or a failed attempt to look up
873    /// an index in `DataflowMetainfo::used_indexes`.
874    Unknown,
875}
876
877/// In a snapshot, one arrangement of the first input is scanned, all the other arrangements (of the
878/// first input, and of all other inputs) only get lookups.
879/// When later input batches are arriving, all inputs are fully read.
880#[derive(
881    Debug,
882    Clone,
883    Serialize,
884    Deserialize,
885    Eq,
886    PartialEq,
887    Ord,
888    PartialOrd,
889    Hash
890)]
891#[cfg_attr(any(test, feature = "proptest"), derive(Arbitrary))]
892pub enum DeltaJoinIndexUsageType {
893    Unknown,
894    Lookup,
895    FirstInputFullScan,
896}
897
898impl std::fmt::Display for IndexUsageType {
899    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
900        write!(
901            f,
902            "{}",
903            match self {
904                IndexUsageType::FullScan => "*** full scan ***",
905                IndexUsageType::Lookup(_idx_id) => "lookup",
906                IndexUsageType::DifferentialJoin => "differential join",
907                IndexUsageType::DeltaJoin(DeltaJoinIndexUsageType::FirstInputFullScan) =>
908                    "delta join 1st input (full scan)",
909                // Technically, this is a lookup only for a snapshot. For later update batches, all
910                // records are read. However, I wrote lookup here, because in most cases the
911                // lookup/scan distinction matters only for a snapshot. This is because for arriving
912                // update records, something in the system will always do work proportional to the
913                // number of records anyway. In other words, something is always scanning new
914                // updates, but we can avoid scanning records again and again in snapshots.
915                IndexUsageType::DeltaJoin(DeltaJoinIndexUsageType::Lookup) => "delta join lookup",
916                IndexUsageType::DeltaJoin(DeltaJoinIndexUsageType::Unknown) =>
917                    "*** INTERNAL ERROR (unknown delta join usage) ***",
918                IndexUsageType::PlanRootNoArrangement => "plan root (no new arrangement)",
919                IndexUsageType::SinkExport => "sink export",
920                IndexUsageType::IndexExport => "index export",
921                IndexUsageType::FastPathLimit => "fast path limit",
922                IndexUsageType::DanglingArrangeBy => "*** INTERNAL ERROR (dangling ArrangeBy) ***",
923                IndexUsageType::Unknown => "*** INTERNAL ERROR (unknown usage) ***",
924            }
925        )
926    }
927}
928
929impl IndexUsageType {
930    pub fn display_vec<'a, I>(usage_types: I) -> impl Display + Sized + 'a
931    where
932        I: IntoIterator<Item = &'a IndexUsageType>,
933    {
934        separated(", ", usage_types.into_iter().sorted().dedup())
935    }
936}
937
938#[cfg(test)]
939mod tests {
940    use mz_ore::assert_ok;
941
942    use super::*;
943
944    struct Environment {
945        name: String,
946    }
947
948    impl Default for Environment {
949        fn default() -> Self {
950            Environment {
951                name: "test env".to_string(),
952            }
953        }
954    }
955
956    struct Frontiers<T> {
957        since: T,
958        upper: T,
959    }
960
961    impl<T> Frontiers<T> {
962        fn new(since: T, upper: T) -> Self {
963            Self { since, upper }
964        }
965    }
966
967    struct ExplainContext<'a> {
968        env: &'a mut Environment,
969        config: &'a ExplainConfig,
970        frontiers: Frontiers<u64>,
971    }
972
973    /// A test IR that should be the subject of explanations.
974    struct TestExpr {
975        lhs: i32,
976        rhs: i32,
977    }
978
979    struct TestExplanation<'a> {
980        expr: &'a TestExpr,
981        context: &'a ExplainContext<'a>,
982    }
983
984    impl<'a> DisplayText for TestExplanation<'a> {
985        fn fmt_text(&self, f: &mut fmt::Formatter<'_>, _ctx: &mut ()) -> fmt::Result {
986            let lhs = &self.expr.lhs;
987            let rhs = &self.expr.rhs;
988            writeln!(f, "expr = {lhs} + {rhs}")?;
989
990            if self.context.config.timing {
991                let since = &self.context.frontiers.since;
992                let upper = &self.context.frontiers.upper;
993                writeln!(f, "at t ∊ [{since}, {upper})")?;
994            }
995
996            let name = &self.context.env.name;
997            writeln!(f, "env = {name}")?;
998
999            Ok(())
1000        }
1001    }
1002
1003    impl<'a> Explain<'a> for TestExpr {
1004        type Context = ExplainContext<'a>;
1005        type Text = TestExplanation<'a>;
1006        type Json = UnsupportedFormat;
1007        type Dot = UnsupportedFormat;
1008
1009        fn explain_text(
1010            &'a mut self,
1011            context: &'a Self::Context,
1012        ) -> Result<Self::Text, ExplainError> {
1013            Ok(TestExplanation {
1014                expr: self,
1015                context,
1016            })
1017        }
1018    }
1019
1020    fn do_explain(
1021        env: &mut Environment,
1022        frontiers: Frontiers<u64>,
1023    ) -> Result<String, ExplainError> {
1024        let mut expr = TestExpr { lhs: 1, rhs: 2 };
1025
1026        let format = ExplainFormat::Text;
1027        let config = &ExplainConfig {
1028            redacted: false,
1029            arity: false,
1030            cardinality: false,
1031            column_names: false,
1032            filter_pushdown: false,
1033            humanized_exprs: false,
1034            join_impls: false,
1035            keys: false,
1036            linear_chains: false,
1037            no_fast_path: false,
1038            no_notices: false,
1039            node_ids: false,
1040            non_negative: false,
1041            raw_plans: false,
1042            raw_syntax: false,
1043            verbose_syntax: true,
1044            subtree_size: false,
1045            equivalences: false,
1046            timing: true,
1047            types: false,
1048            features: Default::default(),
1049        };
1050        let context = ExplainContext {
1051            env,
1052            config,
1053            frontiers,
1054        };
1055
1056        expr.explain(&format, &context)
1057    }
1058
1059    #[mz_ore::test]
1060    fn test_mutable_context() {
1061        let mut env = Environment::default();
1062        let frontiers = Frontiers::<u64>::new(3, 7);
1063
1064        let act = do_explain(&mut env, frontiers);
1065        let exp = "expr = 1 + 2\nat t ∊ [3, 7)\nenv = test env\n".to_string();
1066
1067        assert_ok!(act);
1068        assert_eq!(act.unwrap(), exp);
1069    }
1070}