mz_repr/
explain.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! A set of traits for modeling things that can be explained by a
11//! SQL `EXPLAIN` statement.
12//!
13//! The main trait in this module is [`Explain`].
14//!
15//! An explainable subject `S` implements [`Explain`], and as part of that:
16//!
17//! 1. Fixes the *context type* required for the explanation.
18//!    in [`Explain::Context`].
19//! 2. Fixes the *explanation type* for each [`ExplainFormat`]
20//!    in [`Explain::Text`], [`Explain::Json`], ....
21//! 3. Provides *an explanation type constructor* for each supported
22//!    [`ExplainFormat`] from references to `S`, [`ExplainConfig` ],
23//!    and the current [`Explain::Context`] in
24//!    [`Explain::explain_text`], [`Explain::explain_json`], ....
25//!
26//! The same *explanation type* can be shared by more than one
27//! [`ExplainFormat`].
28//!
29//! Use [`UnsupportedFormat`] and the default `explain_$format`
30//! constructor for [`Explain`] to indicate that the implementation does
31//! not support this `$format`.
32
33use itertools::Itertools;
34use proptest_derive::Arbitrary;
35use serde::{Deserialize, Serialize};
36use std::borrow::Cow;
37use std::collections::{BTreeMap, BTreeSet};
38use std::fmt;
39use std::fmt::{Display, Formatter};
40
41use mz_ore::stack::RecursionLimitError;
42use mz_ore::str::{Indent, bracketed, separated};
43
44use crate::explain::dot::{DisplayDot, dot_string};
45use crate::explain::json::{DisplayJson, json_string};
46use crate::explain::text::{DisplayText, text_string};
47use crate::optimize::OptimizerFeatureOverrides;
48use crate::{ColumnType, GlobalId, ScalarType};
49
50pub mod dot;
51pub mod json;
52pub mod text;
53#[cfg(feature = "tracing")]
54pub mod tracing;
55
56#[cfg(feature = "tracing")]
57pub use crate::explain::tracing::trace_plan;
58
59/// Possible output formats for an explanation.
60#[derive(Debug, Clone, Copy, Eq, PartialEq)]
61pub enum ExplainFormat {
62    Text,
63    VerboseText,
64    Json,
65    Dot,
66}
67
68impl fmt::Display for ExplainFormat {
69    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70        match self {
71            ExplainFormat::Text => f.write_str("TEXT"),
72            ExplainFormat::VerboseText => f.write_str("VERBOSE TEXT"),
73            ExplainFormat::Json => f.write_str("JSON"),
74            ExplainFormat::Dot => f.write_str("DOT"),
75        }
76    }
77}
78
79/// A zero-variant enum to be used as the explanation type in the
80/// [`Explain`] implementation for all formats that are not supported
81/// for `Self`.
82#[allow(missing_debug_implementations)]
83pub enum UnsupportedFormat {}
84
85/// The type of errors that may occur when an [`Explain::explain`]
86/// call goes wrong.
87#[derive(Debug)]
88pub enum ExplainError {
89    UnsupportedFormat(ExplainFormat),
90    FormatError(fmt::Error),
91    AnyhowError(anyhow::Error),
92    RecursionLimitError(RecursionLimitError),
93    SerdeJsonError(serde_json::Error),
94    LinearChainsPlusRecursive,
95    UnknownError(String),
96}
97
98impl fmt::Display for ExplainError {
99    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
100        write!(f, "error while rendering explain output: ")?;
101        match self {
102            ExplainError::UnsupportedFormat(format) => {
103                write!(f, "{} format is not supported", format)
104            }
105            ExplainError::FormatError(error) => {
106                write!(f, "{}", error)
107            }
108            ExplainError::AnyhowError(error) => {
109                write!(f, "{}", error)
110            }
111            ExplainError::RecursionLimitError(error) => {
112                write!(f, "{}", error)
113            }
114            ExplainError::SerdeJsonError(error) => {
115                write!(f, "{}", error)
116            }
117            ExplainError::LinearChainsPlusRecursive => {
118                write!(
119                    f,
120                    "The linear_chains option is not supported with WITH MUTUALLY RECURSIVE."
121                )
122            }
123            ExplainError::UnknownError(error) => {
124                write!(f, "{}", error)
125            }
126        }
127    }
128}
129
130impl From<fmt::Error> for ExplainError {
131    fn from(error: fmt::Error) -> Self {
132        ExplainError::FormatError(error)
133    }
134}
135
136impl From<anyhow::Error> for ExplainError {
137    fn from(error: anyhow::Error) -> Self {
138        ExplainError::AnyhowError(error)
139    }
140}
141
142impl From<RecursionLimitError> for ExplainError {
143    fn from(error: RecursionLimitError) -> Self {
144        ExplainError::RecursionLimitError(error)
145    }
146}
147
148impl From<serde_json::Error> for ExplainError {
149    fn from(error: serde_json::Error) -> Self {
150        ExplainError::SerdeJsonError(error)
151    }
152}
153
154/// A set of options for controlling the output of [`Explain`] implementations.
155#[derive(Clone, Debug)]
156pub struct ExplainConfig {
157    // Analyses:
158    // (These are shown only if the Analysis is supported by the backing IR.)
159    /// Show the `SubtreeSize` Analysis in the explanation.
160    pub subtree_size: bool,
161    /// Show the number of columns, i.e., the `Arity` Analysis.
162    pub arity: bool,
163    /// Show the types, i.e., the `RelationType` Analysis.
164    pub types: bool,
165    /// Show the sets of unique keys, i.e., the `UniqueKeys` Analysis.
166    pub keys: bool,
167    /// Show the `NonNegative` Analysis.
168    pub non_negative: bool,
169    /// Show the `Cardinality` Analysis.
170    pub cardinality: bool,
171    /// Show the `ColumnNames` Analysis.
172    pub column_names: bool,
173    /// Show the `Equivalences` Analysis.
174    pub equivalences: bool,
175    // TODO: add an option to show the `Monotonic` Analysis. This is non-trivial, because this
176    // Analysis needs the set of monotonic GlobalIds, which are cumbersome to pass around.
177
178    // Other display options:
179    /// Render implemented MIR `Join` nodes in a way which reflects the implementation.
180    pub join_impls: bool,
181    /// Use inferred column names when rendering scalar and aggregate expressions.
182    pub humanized_exprs: bool,
183    /// Restrict output trees to linear chains. Ignored if `raw_plans` is set.
184    pub linear_chains: bool,
185    /// Show the slow path plan even if a fast path plan was created. Useful for debugging.
186    /// Enforced if `timing` is set.
187    pub no_fast_path: bool,
188    /// Don't print optimizer hints.
189    pub no_notices: bool,
190    /// Show node IDs in physical plans.
191    pub node_ids: bool,
192    /// Don't normalize plans before explaining them.
193    pub raw_plans: bool,
194    /// Disable virtual syntax in the explanation.
195    pub raw_syntax: bool,
196    /// Anonymize literals in the plan.
197    pub redacted: bool,
198    /// Print optimization timings.
199    pub timing: bool,
200    /// Show MFP pushdown information.
201    pub filter_pushdown: bool,
202
203    /// Optimizer feature flags.
204    pub features: OptimizerFeatureOverrides,
205}
206
207impl Default for ExplainConfig {
208    fn default() -> Self {
209        Self {
210            // Don't redact in debug builds and in CI.
211            redacted: !mz_ore::assert::soft_assertions_enabled(),
212            arity: false,
213            cardinality: false,
214            column_names: false,
215            filter_pushdown: false,
216            humanized_exprs: false,
217            join_impls: true,
218            keys: false,
219            linear_chains: false,
220            no_fast_path: true,
221            no_notices: false,
222            node_ids: false,
223            non_negative: false,
224            raw_plans: true,
225            raw_syntax: false,
226            subtree_size: false,
227            timing: false,
228            types: false,
229            equivalences: false,
230            features: Default::default(),
231        }
232    }
233}
234
235impl ExplainConfig {
236    pub fn requires_analyses(&self) -> bool {
237        self.subtree_size
238            || self.non_negative
239            || self.arity
240            || self.types
241            || self.keys
242            || self.cardinality
243            || self.column_names
244            || self.equivalences
245    }
246}
247
248/// The type of object to be explained
249#[derive(Clone, Debug)]
250pub enum Explainee {
251    /// An existing materialized view.
252    MaterializedView(GlobalId),
253    /// An existing index.
254    Index(GlobalId),
255    /// An object that will be served using a dataflow.
256    ///
257    /// This variant is deprecated and will be removed in database-issues#5301.
258    Dataflow(GlobalId),
259    /// The object to be explained is a one-off query and may or may not be
260    /// served using a dataflow.
261    Select,
262}
263
264/// A trait that provides a unified interface for objects that
265/// can be explained.
266///
267/// All possible subjects of the various forms of an `EXPLAIN`
268/// SQL statement should implement this trait.
269pub trait Explain<'a>: 'a {
270    /// The type of the immutable context in which
271    /// the explanation will be rendered.
272    type Context;
273
274    /// The explanation type produced by a successful
275    /// [`Explain::explain_text`] call.
276    type Text: DisplayText;
277
278    /// The explanation type produced by a successful
279    /// [`Explain::explain_verbose_text`] call.
280    type VerboseText: DisplayText;
281
282    /// The explanation type produced by a successful
283    /// [`Explain::explain_json`] call.
284    type Json: DisplayJson;
285
286    /// The explanation type produced by a successful
287    /// [`Explain::explain_json`] call.
288    type Dot: DisplayDot;
289
290    /// Explain an instance of [`Self`] within the given
291    /// [`Explain::Context`].
292    ///
293    /// Implementors should never have the need to not rely on
294    /// this default implementation.
295    ///
296    /// # Errors
297    ///
298    /// If the given `format` is not supported, the implementation
299    /// should return an [`ExplainError::UnsupportedFormat`].
300    ///
301    /// If an [`ExplainConfig`] parameter cannot be honored, the
302    /// implementation should silently ignore this parameter and
303    /// proceed without returning a [`Result::Err`].
304    fn explain(
305        &'a mut self,
306        format: &'a ExplainFormat,
307        context: &'a Self::Context,
308    ) -> Result<String, ExplainError> {
309        match format {
310            ExplainFormat::Text => self.explain_text(context).map(|e| text_string(&e)),
311            ExplainFormat::VerboseText => {
312                self.explain_verbose_text(context).map(|e| text_string(&e))
313            }
314            ExplainFormat::Json => self.explain_json(context).map(|e| json_string(&e)),
315            ExplainFormat::Dot => self.explain_dot(context).map(|e| dot_string(&e)),
316        }
317    }
318
319    /// Construct a [`Result::Ok`] of the [`Explain::Text`] format
320    /// from the config and the context.
321    ///
322    /// # Errors
323    ///
324    /// If the [`ExplainFormat::Text`] is not supported, the implementation
325    /// should return an [`ExplainError::UnsupportedFormat`].
326    ///
327    /// If an [`ExplainConfig`] parameter cannot be honored, the
328    /// implementation should silently ignore this parameter and
329    /// proceed without returning a [`Result::Err`].
330    #[allow(unused_variables)]
331    fn explain_text(&'a mut self, context: &'a Self::Context) -> Result<Self::Text, ExplainError> {
332        Err(ExplainError::UnsupportedFormat(ExplainFormat::Text))
333    }
334
335    /// Construct a [`Result::Ok`] of the [`Explain::VerboseText`] format
336    /// from the config and the context.
337    ///
338    /// # Errors
339    ///
340    /// If the [`ExplainFormat::VerboseText`] is not supported, the implementation
341    /// should return an [`ExplainError::UnsupportedFormat`].
342    ///
343    /// If an [`ExplainConfig`] parameter cannot be honored, the
344    /// implementation should silently ignore this parameter and
345    /// proceed without returning a [`Result::Err`].
346    #[allow(unused_variables)]
347    fn explain_verbose_text(
348        &'a mut self,
349        context: &'a Self::Context,
350    ) -> Result<Self::VerboseText, ExplainError> {
351        Err(ExplainError::UnsupportedFormat(ExplainFormat::VerboseText))
352    }
353
354    /// Construct a [`Result::Ok`] of the [`Explain::Json`] format
355    /// from the config and the context.
356    ///
357    /// # Errors
358    ///
359    /// If the [`ExplainFormat::Json`] is not supported, the implementation
360    /// should return an [`ExplainError::UnsupportedFormat`].
361    ///
362    /// If an [`ExplainConfig`] parameter cannot be honored, the
363    /// implementation should silently ignore this parameter and
364    /// proceed without returning a [`Result::Err`].
365    #[allow(unused_variables)]
366    fn explain_json(&'a mut self, context: &'a Self::Context) -> Result<Self::Json, ExplainError> {
367        Err(ExplainError::UnsupportedFormat(ExplainFormat::Json))
368    }
369
370    /// Construct a [`Result::Ok`] of the [`Explain::Dot`] format
371    /// from the config and the context.
372    ///
373    /// # Errors
374    ///
375    /// If the [`ExplainFormat::Dot`] is not supported, the implementation
376    /// should return an [`ExplainError::UnsupportedFormat`].
377    ///
378    /// If an [`ExplainConfig`] parameter cannot be honored, the
379    /// implementation should silently ignore this parameter and
380    /// proceed without returning a [`Result::Err`].
381    #[allow(unused_variables)]
382    fn explain_dot(&'a mut self, context: &'a Self::Context) -> Result<Self::Dot, ExplainError> {
383        Err(ExplainError::UnsupportedFormat(ExplainFormat::Dot))
384    }
385}
386
387/// A helper struct which will most commonly be used as the generic
388/// rendering context type `C` for various `Explain$Format`
389/// implementations.
390#[derive(Debug)]
391pub struct RenderingContext<'a> {
392    pub indent: Indent,
393    pub humanizer: &'a dyn ExprHumanizer,
394}
395
396impl<'a> RenderingContext<'a> {
397    pub fn new(indent: Indent, humanizer: &'a dyn ExprHumanizer) -> RenderingContext<'a> {
398        RenderingContext { indent, humanizer }
399    }
400}
401
402impl<'a> AsMut<Indent> for RenderingContext<'a> {
403    fn as_mut(&mut self) -> &mut Indent {
404        &mut self.indent
405    }
406}
407
408impl<'a> AsRef<&'a dyn ExprHumanizer> for RenderingContext<'a> {
409    fn as_ref(&self) -> &&'a dyn ExprHumanizer {
410        &self.humanizer
411    }
412}
413
414#[allow(missing_debug_implementations)]
415pub struct PlanRenderingContext<'a, T> {
416    pub indent: Indent,
417    pub humanizer: &'a dyn ExprHumanizer,
418    pub annotations: BTreeMap<&'a T, Analyses>,
419    pub config: &'a ExplainConfig,
420}
421
422impl<'a, T> PlanRenderingContext<'a, T> {
423    pub fn new(
424        indent: Indent,
425        humanizer: &'a dyn ExprHumanizer,
426        annotations: BTreeMap<&'a T, Analyses>,
427        config: &'a ExplainConfig,
428    ) -> PlanRenderingContext<'a, T> {
429        PlanRenderingContext {
430            indent,
431            humanizer,
432            annotations,
433            config,
434        }
435    }
436}
437
438impl<'a, T> AsMut<Indent> for PlanRenderingContext<'a, T> {
439    fn as_mut(&mut self) -> &mut Indent {
440        &mut self.indent
441    }
442}
443
444impl<'a, T> AsRef<&'a dyn ExprHumanizer> for PlanRenderingContext<'a, T> {
445    fn as_ref(&self) -> &&'a dyn ExprHumanizer {
446        &self.humanizer
447    }
448}
449
450/// A trait for humanizing components of an expression.
451///
452/// This will be most often used as part of the rendering context
453/// type for various `Display$Format` implementation.
454pub trait ExprHumanizer: fmt::Debug {
455    /// Attempts to return a human-readable string for the relation
456    /// identified by `id`.
457    fn humanize_id(&self, id: GlobalId) -> Option<String>;
458
459    /// Same as above, but without qualifications, e.g., only `foo` for `materialize.public.foo`.
460    fn humanize_id_unqualified(&self, id: GlobalId) -> Option<String>;
461
462    /// Like [`Self::humanize_id`], but returns the constituent parts of the
463    /// name as individual elements.
464    fn humanize_id_parts(&self, id: GlobalId) -> Option<Vec<String>>;
465
466    /// Returns a human-readable name for the specified scalar type.
467    /// Used in, e.g., EXPLAIN and error msgs, in which case exact Postgres compatibility is less
468    /// important than showing as much detail as possible. Also used in `pg_typeof`, where Postgres
469    /// compatibility is more important.
470    fn humanize_scalar_type(&self, ty: &ScalarType, postgres_compat: bool) -> String;
471
472    /// Returns a human-readable name for the specified column type.
473    /// Used in, e.g., EXPLAIN and error msgs, in which case exact Postgres compatibility is less
474    /// important than showing as much detail as possible. Also used in `pg_typeof`, where Postgres
475    /// compatibility is more important.
476    fn humanize_column_type(&self, typ: &ColumnType, postgres_compat: bool) -> String {
477        format!(
478            "{}{}",
479            self.humanize_scalar_type(&typ.scalar_type, postgres_compat),
480            if typ.nullable { "?" } else { "" }
481        )
482    }
483
484    /// Returns a vector of column names for the relation identified by `id`.
485    fn column_names_for_id(&self, id: GlobalId) -> Option<Vec<String>>;
486
487    /// Returns the `#column` name for the relation identified by `id`.
488    fn humanize_column(&self, id: GlobalId, column: usize) -> Option<String>;
489
490    /// Returns whether the specified id exists.
491    fn id_exists(&self, id: GlobalId) -> bool;
492}
493
494/// An [`ExprHumanizer`] that extends the `inner` instance with shadow items
495/// that are reported as present, even though they might not exist in `inner`.
496#[derive(Debug)]
497pub struct ExprHumanizerExt<'a> {
498    /// A map of custom items that might not exist in the backing `inner`
499    /// humanizer, but are reported as present by this humanizer instance.
500    items: BTreeMap<GlobalId, TransientItem>,
501    /// The inner humanizer used to resolve queries for [GlobalId] values not
502    /// present in the `items` map.
503    inner: &'a dyn ExprHumanizer,
504}
505
506impl<'a> ExprHumanizerExt<'a> {
507    pub fn new(items: BTreeMap<GlobalId, TransientItem>, inner: &'a dyn ExprHumanizer) -> Self {
508        Self { items, inner }
509    }
510}
511
512impl<'a> ExprHumanizer for ExprHumanizerExt<'a> {
513    fn humanize_id(&self, id: GlobalId) -> Option<String> {
514        match self.items.get(&id) {
515            Some(item) => item
516                .humanized_id_parts
517                .as_ref()
518                .map(|parts| parts.join(".")),
519            None => self.inner.humanize_id(id),
520        }
521    }
522
523    fn humanize_id_unqualified(&self, id: GlobalId) -> Option<String> {
524        match self.items.get(&id) {
525            Some(item) => item
526                .humanized_id_parts
527                .as_ref()
528                .and_then(|parts| parts.last().cloned()),
529            None => self.inner.humanize_id_unqualified(id),
530        }
531    }
532
533    fn humanize_id_parts(&self, id: GlobalId) -> Option<Vec<String>> {
534        match self.items.get(&id) {
535            Some(item) => item.humanized_id_parts.clone(),
536            None => self.inner.humanize_id_parts(id),
537        }
538    }
539
540    fn humanize_scalar_type(&self, ty: &ScalarType, postgres_compat: bool) -> String {
541        self.inner.humanize_scalar_type(ty, postgres_compat)
542    }
543
544    fn column_names_for_id(&self, id: GlobalId) -> Option<Vec<String>> {
545        match self.items.get(&id) {
546            Some(item) => item.column_names.clone(),
547            None => self.inner.column_names_for_id(id),
548        }
549    }
550
551    fn humanize_column(&self, id: GlobalId, column: usize) -> Option<String> {
552        match self.items.get(&id) {
553            Some(item) => match &item.column_names {
554                Some(column_names) => Some(column_names[column].clone()),
555                None => None,
556            },
557            None => self.inner.humanize_column(id, column),
558        }
559    }
560
561    fn id_exists(&self, id: GlobalId) -> bool {
562        self.items.contains_key(&id) || self.inner.id_exists(id)
563    }
564}
565
566/// A description of a catalog item that does not exist, but can be reported as
567/// present in the catalog by a [`ExprHumanizerExt`] instance that has it in its
568/// `items` list.
569#[derive(Debug)]
570pub struct TransientItem {
571    humanized_id_parts: Option<Vec<String>>,
572    column_names: Option<Vec<String>>,
573}
574
575impl TransientItem {
576    pub fn new(humanized_id_parts: Option<Vec<String>>, column_names: Option<Vec<String>>) -> Self {
577        Self {
578            humanized_id_parts,
579            column_names,
580        }
581    }
582}
583
584/// A bare-minimum implementation of [`ExprHumanizer`].
585///
586/// The `DummyHumanizer` does a poor job of humanizing expressions. It is
587/// intended for use in contexts where polish is not required, like in tests or
588/// while debugging.
589#[derive(Debug)]
590pub struct DummyHumanizer;
591
592impl ExprHumanizer for DummyHumanizer {
593    fn humanize_id(&self, _: GlobalId) -> Option<String> {
594        // Returning `None` allows the caller to fall back to displaying the
595        // ID, if they so desire.
596        None
597    }
598
599    fn humanize_id_unqualified(&self, _id: GlobalId) -> Option<String> {
600        None
601    }
602
603    fn humanize_id_parts(&self, _id: GlobalId) -> Option<Vec<String>> {
604        None
605    }
606
607    fn humanize_scalar_type(&self, ty: &ScalarType, _postgres_compat: bool) -> String {
608        // The debug implementation is better than nothing.
609        format!("{:?}", ty)
610    }
611
612    fn column_names_for_id(&self, _id: GlobalId) -> Option<Vec<String>> {
613        None
614    }
615
616    fn humanize_column(&self, _id: GlobalId, _column: usize) -> Option<String> {
617        None
618    }
619
620    fn id_exists(&self, _id: GlobalId) -> bool {
621        false
622    }
623}
624
625/// Pretty-prints a list of indices.
626#[derive(Debug)]
627pub struct Indices<'a>(pub &'a [usize]);
628
629/// Pretty-prints a list of scalar expressions that may have runs of column
630/// indices as a comma-separated list interleaved with interval expressions.
631///
632/// Interval expressions are used only for runs of three or more elements.
633#[derive(Debug)]
634pub struct CompactScalarSeq<'a, T: ScalarOps>(pub &'a [T]); // TODO(cloud#8196) remove this
635
636/// Pretty-prints a list of scalar expressions that may have runs of column
637/// indices as a comma-separated list interleaved with interval expressions.
638///
639/// Interval expressions are used only for runs of three or more elements.
640#[derive(Debug)]
641pub struct CompactScalars<T, I>(pub I)
642where
643    T: ScalarOps,
644    I: Iterator<Item = T> + Clone;
645
646pub trait ScalarOps {
647    fn match_col_ref(&self) -> Option<usize>;
648
649    fn references(&self, col_ref: usize) -> bool;
650}
651
652/// A somewhat ad-hoc way to keep carry a plan with a set
653/// of analyses derived for each node in that plan.
654#[allow(missing_debug_implementations)]
655pub struct AnnotatedPlan<'a, T> {
656    pub plan: &'a T,
657    pub annotations: BTreeMap<&'a T, Analyses>,
658}
659
660/// A container for derived analyses.
661#[derive(Clone, Default, Debug)]
662pub struct Analyses {
663    pub non_negative: Option<bool>,
664    pub subtree_size: Option<usize>,
665    pub arity: Option<usize>,
666    pub types: Option<Option<Vec<ColumnType>>>,
667    pub keys: Option<Vec<Vec<usize>>>,
668    pub cardinality: Option<String>,
669    pub column_names: Option<Vec<String>>,
670    pub equivalences: Option<String>,
671}
672
673#[derive(Debug, Clone)]
674pub struct HumanizedAnalyses<'a> {
675    analyses: &'a Analyses,
676    humanizer: &'a dyn ExprHumanizer,
677    config: &'a ExplainConfig,
678}
679
680impl<'a> HumanizedAnalyses<'a> {
681    pub fn new<T>(analyses: &'a Analyses, ctx: &PlanRenderingContext<'a, T>) -> Self {
682        Self {
683            analyses,
684            humanizer: ctx.humanizer,
685            config: ctx.config,
686        }
687    }
688}
689
690impl<'a> Display for HumanizedAnalyses<'a> {
691    // Analysis rendering is guarded by the ExplainConfig flag for each
692    // Analysis. This is needed because we might have derived Analysis that
693    // are not explicitly requested (such as column_names), in which case we
694    // don't want to display them.
695    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
696        let mut builder = f.debug_struct("//");
697
698        if self.config.subtree_size {
699            let subtree_size = self.analyses.subtree_size.expect("subtree_size");
700            builder.field("subtree_size", &subtree_size);
701        }
702
703        if self.config.non_negative {
704            let non_negative = self.analyses.non_negative.expect("non_negative");
705            builder.field("non_negative", &non_negative);
706        }
707
708        if self.config.arity {
709            let arity = self.analyses.arity.expect("arity");
710            builder.field("arity", &arity);
711        }
712
713        if self.config.types {
714            let types = match self.analyses.types.as_ref().expect("types") {
715                Some(types) => {
716                    let types = types
717                        .into_iter()
718                        .map(|c| self.humanizer.humanize_column_type(c, false))
719                        .collect::<Vec<_>>();
720
721                    bracketed("(", ")", separated(", ", types)).to_string()
722                }
723                None => "(<error>)".to_string(),
724            };
725            builder.field("types", &types);
726        }
727
728        if self.config.keys {
729            let keys = self
730                .analyses
731                .keys
732                .as_ref()
733                .expect("keys")
734                .into_iter()
735                .map(|key| bracketed("[", "]", separated(", ", key)).to_string());
736            let keys = bracketed("(", ")", separated(", ", keys)).to_string();
737            builder.field("keys", &keys);
738        }
739
740        if self.config.cardinality {
741            let cardinality = self.analyses.cardinality.as_ref().expect("cardinality");
742            builder.field("cardinality", cardinality);
743        }
744
745        if self.config.column_names {
746            let column_names = self.analyses.column_names.as_ref().expect("column_names");
747            let column_names = column_names.into_iter().enumerate().map(|(i, c)| {
748                if c.is_empty() {
749                    Cow::Owned(format!("#{i}"))
750                } else {
751                    Cow::Borrowed(c)
752                }
753            });
754            let column_names = bracketed("(", ")", separated(", ", column_names)).to_string();
755            builder.field("column_names", &column_names);
756        }
757
758        if self.config.equivalences {
759            let equivs = self.analyses.equivalences.as_ref().expect("equivalences");
760            builder.field("equivs", equivs);
761        }
762
763        builder.finish()
764    }
765}
766
767/// A set of indexes that are used in the explained plan.
768///
769/// Each element consists of the following components:
770/// 1. The id of the index.
771/// 2. A vector of [IndexUsageType] denoting how the index is used in the plan.
772///
773/// Using a `BTreeSet` here ensures a deterministic iteration order, which in turn ensures that
774/// the corresponding EXPLAIN output is deterministic as well.
775#[derive(Clone, Debug, Default)]
776pub struct UsedIndexes(BTreeSet<(GlobalId, Vec<IndexUsageType>)>);
777
778impl UsedIndexes {
779    pub fn new(values: BTreeSet<(GlobalId, Vec<IndexUsageType>)>) -> UsedIndexes {
780        UsedIndexes(values)
781    }
782
783    pub fn is_empty(&self) -> bool {
784        self.0.is_empty()
785    }
786}
787
788#[derive(Debug, Clone, Arbitrary, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd, Hash)]
789pub enum IndexUsageType {
790    /// Read the entire index.
791    FullScan,
792    /// Differential join. The work is proportional to the number of matches.
793    DifferentialJoin,
794    /// Delta join
795    DeltaJoin(DeltaJoinIndexUsageType),
796    /// `IndexedFilter`, e.g., something like `WHERE x = 42` with an index on `x`.
797    /// This also stores the id of the index that we want to do the lookup from. (This id is already
798    /// chosen by `LiteralConstraints`, and then `IndexUsageType::Lookup` communicates this inside
799    /// `CollectIndexRequests` from the `IndexedFilter` to the `Get`.)
800    Lookup(GlobalId),
801    /// This is a rare case that happens when the user creates an index that is identical to an
802    /// existing one (i.e., on the same object, and with the same keys). We'll re-use the
803    /// arrangement of the existing index. The plan is an `ArrangeBy` + `Get`, where the `ArrangeBy`
804    /// is requesting the same key as an already existing index. (`export_index` is what inserts
805    /// this `ArrangeBy`.)
806    PlanRootNoArrangement,
807    /// The index is used for directly writing to a sink. Can happen with a SUBSCRIBE to an indexed
808    /// view.
809    SinkExport,
810    /// The index is used for creating a new index. Note that either a `FullScan` or a
811    /// `PlanRootNoArrangement` usage will always accompany an `IndexExport` usage.
812    IndexExport,
813    /// When a fast path peek has a LIMIT, but no ORDER BY, then we read from the index only as many
814    /// records (approximately), as the OFFSET + LIMIT needs.
815    /// Note: When a fast path peek does a lookup and also has a limit, the usage type will be
816    /// `Lookup`. However, the smart limiting logic will still apply.
817    FastPathLimit,
818    /// We saw a dangling `ArrangeBy`, i.e., where we have no idea what the arrangement will be used
819    /// for. This is an internal error. Can be a bug either in `CollectIndexRequests`, or some
820    /// other transform that messed up the plan. It's also possible that somebody is trying to add
821    /// an `ArrangeBy` marking for some operator other than a `Join`. (Which is fine, but please
822    /// update `CollectIndexRequests`.)
823    DanglingArrangeBy,
824    /// Internal error in `CollectIndexRequests` or a failed attempt to look up
825    /// an index in `DataflowMetainfo::used_indexes`.
826    Unknown,
827}
828
829/// In a snapshot, one arrangement of the first input is scanned, all the other arrangements (of the
830/// first input, and of all other inputs) only get lookups.
831/// When later input batches are arriving, all inputs are fully read.
832#[derive(Debug, Clone, Arbitrary, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd, Hash)]
833pub enum DeltaJoinIndexUsageType {
834    Unknown,
835    Lookup,
836    FirstInputFullScan,
837}
838
839impl std::fmt::Display for IndexUsageType {
840    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
841        write!(
842            f,
843            "{}",
844            match self {
845                IndexUsageType::FullScan => "*** full scan ***",
846                IndexUsageType::Lookup(_idx_id) => "lookup",
847                IndexUsageType::DifferentialJoin => "differential join",
848                IndexUsageType::DeltaJoin(DeltaJoinIndexUsageType::FirstInputFullScan) =>
849                    "delta join 1st input (full scan)",
850                // Technically, this is a lookup only for a snapshot. For later update batches, all
851                // records are read. However, I wrote lookup here, because in most cases the
852                // lookup/scan distinction matters only for a snapshot. This is because for arriving
853                // update records, something in the system will always do work proportional to the
854                // number of records anyway. In other words, something is always scanning new
855                // updates, but we can avoid scanning records again and again in snapshots.
856                IndexUsageType::DeltaJoin(DeltaJoinIndexUsageType::Lookup) => "delta join lookup",
857                IndexUsageType::DeltaJoin(DeltaJoinIndexUsageType::Unknown) =>
858                    "*** INTERNAL ERROR (unknown delta join usage) ***",
859                IndexUsageType::PlanRootNoArrangement => "plan root (no new arrangement)",
860                IndexUsageType::SinkExport => "sink export",
861                IndexUsageType::IndexExport => "index export",
862                IndexUsageType::FastPathLimit => "fast path limit",
863                IndexUsageType::DanglingArrangeBy => "*** INTERNAL ERROR (dangling ArrangeBy) ***",
864                IndexUsageType::Unknown => "*** INTERNAL ERROR (unknown usage) ***",
865            }
866        )
867    }
868}
869
870impl IndexUsageType {
871    pub fn display_vec<'a, I>(usage_types: I) -> impl Display + Sized + 'a
872    where
873        I: IntoIterator<Item = &'a IndexUsageType>,
874    {
875        separated(", ", usage_types.into_iter().sorted().dedup())
876    }
877}
878
879#[cfg(test)]
880mod tests {
881    use mz_ore::assert_ok;
882
883    use super::*;
884
885    struct Environment {
886        name: String,
887    }
888
889    impl Default for Environment {
890        fn default() -> Self {
891            Environment {
892                name: "test env".to_string(),
893            }
894        }
895    }
896
897    struct Frontiers<T> {
898        since: T,
899        upper: T,
900    }
901
902    impl<T> Frontiers<T> {
903        fn new(since: T, upper: T) -> Self {
904            Self { since, upper }
905        }
906    }
907
908    struct ExplainContext<'a> {
909        env: &'a mut Environment,
910        config: &'a ExplainConfig,
911        frontiers: Frontiers<u64>,
912    }
913
914    /// A test IR that should be the subject of explanations.
915    struct TestExpr {
916        lhs: i32,
917        rhs: i32,
918    }
919
920    struct TestExplanation<'a> {
921        expr: &'a TestExpr,
922        context: &'a ExplainContext<'a>,
923    }
924
925    impl<'a> DisplayText for TestExplanation<'a> {
926        fn fmt_text(&self, f: &mut fmt::Formatter<'_>, _ctx: &mut ()) -> fmt::Result {
927            let lhs = &self.expr.lhs;
928            let rhs = &self.expr.rhs;
929            writeln!(f, "expr = {lhs} + {rhs}")?;
930
931            if self.context.config.timing {
932                let since = &self.context.frontiers.since;
933                let upper = &self.context.frontiers.upper;
934                writeln!(f, "at t ∊ [{since}, {upper})")?;
935            }
936
937            let name = &self.context.env.name;
938            writeln!(f, "env = {name}")?;
939
940            Ok(())
941        }
942    }
943
944    impl<'a> Explain<'a> for TestExpr {
945        type Context = ExplainContext<'a>;
946        type Text = UnsupportedFormat;
947        type VerboseText = TestExplanation<'a>;
948        type Json = UnsupportedFormat;
949        type Dot = UnsupportedFormat;
950
951        fn explain_verbose_text(
952            &'a mut self,
953            context: &'a Self::Context,
954        ) -> Result<Self::VerboseText, ExplainError> {
955            Ok(TestExplanation {
956                expr: self,
957                context,
958            })
959        }
960    }
961
962    fn do_explain(
963        env: &mut Environment,
964        frontiers: Frontiers<u64>,
965    ) -> Result<String, ExplainError> {
966        let mut expr = TestExpr { lhs: 1, rhs: 2 };
967
968        let format = ExplainFormat::VerboseText;
969        let config = &ExplainConfig {
970            redacted: false,
971            arity: false,
972            cardinality: false,
973            column_names: false,
974            filter_pushdown: false,
975            humanized_exprs: false,
976            join_impls: false,
977            keys: false,
978            linear_chains: false,
979            no_fast_path: false,
980            no_notices: false,
981            node_ids: false,
982            non_negative: false,
983            raw_plans: false,
984            raw_syntax: false,
985            subtree_size: false,
986            equivalences: false,
987            timing: true,
988            types: false,
989            features: Default::default(),
990        };
991        let context = ExplainContext {
992            env,
993            config,
994            frontiers,
995        };
996
997        expr.explain(&format, &context)
998    }
999
1000    #[mz_ore::test]
1001    fn test_mutable_context() {
1002        let mut env = Environment::default();
1003        let frontiers = Frontiers::<u64>::new(3, 7);
1004
1005        let act = do_explain(&mut env, frontiers);
1006        let exp = "expr = 1 + 2\nat t ∊ [3, 7)\nenv = test env\n".to_string();
1007
1008        assert_ok!(act);
1009        assert_eq!(act.unwrap(), exp);
1010    }
1011}