mz_transform/
will_distinct.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Transform that pushes down the information that a collection will be subjected to a `Distinct` on specific columns.
11
12use itertools::Itertools;
13use mz_expr::MirRelationExpr;
14
15use crate::analysis::{DerivedView, NonNegative};
16
17use crate::{TransformCtx, TransformError};
18
19/// Pushes down the information that a collection will be subjected to a `Distinct` on specific columns.
20///
21/// This intends to recognize idiomatic stacks of `UNION` from SQL, which look like `Distinct` over `Union`, potentially
22/// over other `Distinct` expressions. It is only able to see patterns of `DISTINCT UNION DISTINCT, ..` and other operators
23/// in between will prevent the necessary insight to remove the second `DISTINCT`.
24///
25/// There are other potential optimizations, for example fusing multiple reads of the same source, where distinctness may
26/// mean they could be a single read (with additional requirements).
27#[derive(Debug, Default)]
28pub struct WillDistinct;
29
30impl crate::Transform for WillDistinct {
31    fn name(&self) -> &'static str {
32        "WillDistinct"
33    }
34
35    #[mz_ore::instrument(
36        target = "optimizer"
37        level = "trace",
38        fields(path.segment = "will_distinct")
39    )]
40    fn actually_perform_transform(
41        &self,
42        relation: &mut MirRelationExpr,
43        ctx: &mut TransformCtx,
44    ) -> Result<(), TransformError> {
45        // Perform bottom-up equivalence class analysis.
46        use crate::analysis::DerivedBuilder;
47        let mut builder = DerivedBuilder::new(ctx.features);
48        builder.require(NonNegative);
49        let derived = builder.visit(relation);
50        let derived = derived.as_view();
51
52        self.apply(relation, derived);
53
54        mz_repr::explain::trace_plan(&*relation);
55        Ok(())
56    }
57}
58
59impl WillDistinct {
60    fn apply(&self, expr: &mut MirRelationExpr, derived: DerivedView) {
61        // Maintain a todo list of triples of 1. expression, 2. child analysis results, and 3. a "will distinct" bit.
62        // The "will distinct" bit says that a subsequent operator will make the specific multiplicities of each record
63        // irrelevant, and the expression only needs to present the correct *multiset* of record, with any non-negative
64        // cardinality allowed.
65        let mut todo = vec![(expr, derived, false)];
66        while let Some((expr, derived, distinct_by)) = todo.pop() {
67            // If we find a `Distinct` expression in the shadow of another `Distinct` that will apply to its key columns,
68            // we can remove this `Distinct` operator as the distinctness will be enforced by the other expression.
69            if let (
70                MirRelationExpr::Reduce {
71                    input,
72                    group_key,
73                    aggregates,
74                    ..
75                },
76                true,
77            ) = (&mut *expr, distinct_by)
78            {
79                if aggregates.is_empty() {
80                    // We can remove the `Distinct`, but we must install a `Map` and a `Project` to implement that
81                    // aspect of the operator. We do this by hand so that we can still descend down `input` and
82                    // continue to remove shadowed `Distinct` operators.
83                    let arity = input.arity();
84                    *expr = MirRelationExpr::Project {
85                        outputs: (arity..arity + group_key.len()).collect::<Vec<_>>(),
86                        input: Box::new(MirRelationExpr::Map {
87                            scalars: group_key.clone(),
88                            input: Box::new(input.take_dangerous()),
89                        }),
90                    };
91                    // We are certain to have a specific pattern of AST nodes, which we need to push through so that
92                    // we can continue recursively.
93                    if let MirRelationExpr::Project { input, .. } = expr {
94                        // `input` is a `Map` node, but it has a single input like the `Distinct` it came from.
95                        // Although it reads a bit weird, this lines up the child of the distinct with its derived
96                        // analysis results.
97                        todo.extend(
98                            input
99                                .children_mut()
100                                .rev()
101                                .zip_eq(derived.children_rev())
102                                .map(|(x, y)| (x, y, true)),
103                        );
104                    }
105                } else {
106                    todo.extend(
107                        expr.children_mut()
108                            .rev()
109                            .zip_eq(derived.children_rev())
110                            .map(|(x, y)| (x, y, false)),
111                    );
112                }
113            } else {
114                match (expr, distinct_by) {
115                    (
116                        MirRelationExpr::Reduce {
117                            input, aggregates, ..
118                        },
119                        _,
120                    ) => {
121                        if aggregates.is_empty() {
122                            todo.push((input, derived.last_child(), true));
123                        } else {
124                            todo.push((input, derived.last_child(), false))
125                        }
126                    }
127                    // If all inputs to the union are non-negative, any distinct enforced above the expression can be
128                    // communicated on to each input.
129                    (MirRelationExpr::Union { base, inputs }, will_distinct) => {
130                        if derived
131                            .children_rev()
132                            .all(|v| *v.value::<NonNegative>().unwrap())
133                        {
134                            let children_rev = inputs.iter_mut().rev().chain(Some(&mut **base));
135                            todo.extend(
136                                children_rev
137                                    .zip_eq(derived.children_rev())
138                                    .map(|(x, y)| (x, y, will_distinct)),
139                            );
140                        } else {
141                            let children_rev = inputs.iter_mut().rev().chain(Some(&mut **base));
142                            todo.extend(
143                                children_rev
144                                    .zip_eq(derived.children_rev())
145                                    .map(|(x, y)| (x, y, false)),
146                            );
147                        }
148                    }
149                    (x, _) => {
150                        todo.extend(
151                            x.children_mut()
152                                .rev()
153                                .zip_eq(derived.children_rev())
154                                .map(|(x, y)| (x, y, false)),
155                        );
156                    }
157                }
158            }
159        }
160    }
161}