mz_transform/
will_distinct.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Transform that pushes down the information that a collection will be subjected to a `Distinct` on specific columns.
11
12use mz_expr::MirRelationExpr;
13
14use crate::analysis::{DerivedView, NonNegative};
15
16use crate::{TransformCtx, TransformError};
17
18/// Pushes down the information that a collection will be subjected to a `Distinct` on specific columns.
19///
20/// This intends to recognize idiomatic stacks of `UNION` from SQL, which look like `Distinct` over `Union`, potentially
21/// over other `Distinct` expressions. It is only able to see patterns of `DISTINCT UNION DISTINCT, ..` and other operators
22/// in between will prevent the necessary insight to remove the second `DISTINCT`.
23///
24/// There are other potential optimizations, for example fusing multiple reads of the same source, where distinctness may
25/// mean they could be a single read (with additional requirements).
26#[derive(Debug, Default)]
27pub struct WillDistinct;
28
29impl crate::Transform for WillDistinct {
30    fn name(&self) -> &'static str {
31        "WillDistinct"
32    }
33
34    #[mz_ore::instrument(
35        target = "optimizer"
36        level = "trace",
37        fields(path.segment = "will_distinct")
38    )]
39    fn actually_perform_transform(
40        &self,
41        relation: &mut MirRelationExpr,
42        ctx: &mut TransformCtx,
43    ) -> Result<(), TransformError> {
44        // Perform bottom-up equivalence class analysis.
45        use crate::analysis::DerivedBuilder;
46        let mut builder = DerivedBuilder::new(ctx.features);
47        builder.require(NonNegative);
48        let derived = builder.visit(relation);
49        let derived = derived.as_view();
50
51        self.apply(relation, derived);
52
53        mz_repr::explain::trace_plan(&*relation);
54        Ok(())
55    }
56}
57
58impl WillDistinct {
59    fn apply(&self, expr: &mut MirRelationExpr, derived: DerivedView) {
60        // Maintain a todo list of triples of 1. expression, 2. child analysis results, and 3. a "will distinct" bit.
61        // The "will distinct" bit says that a subsequent operator will make the specific multiplicities of each record
62        // irrelevant, and the expression only needs to present the correct *multiset* of record, with any non-negative
63        // cardinality allowed.
64        let mut todo = vec![(expr, derived, false)];
65        while let Some((expr, derived, distinct_by)) = todo.pop() {
66            // If we find a `Distinct` expression in the shadow of another `Distinct` that will apply to its key columns,
67            // we can remove this `Distinct` operator as the distinctness will be enforced by the other expression.
68            if let (
69                MirRelationExpr::Reduce {
70                    input,
71                    group_key,
72                    aggregates,
73                    ..
74                },
75                true,
76            ) = (&mut *expr, distinct_by)
77            {
78                if aggregates.is_empty() {
79                    // We can remove the `Distinct`, but we must install a `Map` and a `Project` to implement that
80                    // aspect of the operator. We do this by hand so that we can still descend down `input` and
81                    // continue to remove shadowed `Distinct` operators.
82                    let arity = input.arity();
83                    *expr = MirRelationExpr::Project {
84                        outputs: (arity..arity + group_key.len()).collect::<Vec<_>>(),
85                        input: Box::new(MirRelationExpr::Map {
86                            scalars: group_key.clone(),
87                            input: Box::new(input.take_dangerous()),
88                        }),
89                    };
90                    // We are certain to have a specific pattern of AST nodes, which we need to push through so that
91                    // we can continue recursively.
92                    if let MirRelationExpr::Project { input, .. } = expr {
93                        // `input` is a `Map` node, but it has a single input like the `Distinct` it came from.
94                        // Although it reads a bit weird, this lines up the child of the distinct with its derived
95                        // analysis results.
96                        todo.extend(
97                            input
98                                .children_mut()
99                                .rev()
100                                .zip(derived.children_rev())
101                                .map(|(x, y)| (x, y, true)),
102                        );
103                    }
104                } else {
105                    todo.extend(
106                        expr.children_mut()
107                            .rev()
108                            .zip(derived.children_rev())
109                            .map(|(x, y)| (x, y, false)),
110                    );
111                }
112            } else {
113                match (expr, distinct_by) {
114                    (
115                        MirRelationExpr::Reduce {
116                            input, aggregates, ..
117                        },
118                        _,
119                    ) => {
120                        if aggregates.is_empty() {
121                            todo.push((input, derived.last_child(), true));
122                        } else {
123                            todo.push((input, derived.last_child(), false))
124                        }
125                    }
126                    // If all inputs to the union are non-negative, any distinct enforced above the expression can be
127                    // communicated on to each input.
128                    (MirRelationExpr::Union { base, inputs }, will_distinct) => {
129                        if derived
130                            .children_rev()
131                            .all(|v| *v.value::<NonNegative>().unwrap())
132                        {
133                            let children_rev = inputs.iter_mut().rev().chain(Some(&mut **base));
134                            todo.extend(
135                                children_rev
136                                    .zip(derived.children_rev())
137                                    .map(|(x, y)| (x, y, will_distinct)),
138                            );
139                        } else {
140                            let children_rev = inputs.iter_mut().rev().chain(Some(&mut **base));
141                            todo.extend(
142                                children_rev
143                                    .zip(derived.children_rev())
144                                    .map(|(x, y)| (x, y, false)),
145                            );
146                        }
147                    }
148                    (x, _) => {
149                        todo.extend(
150                            x.children_mut()
151                                .rev()
152                                .zip(derived.children_rev())
153                                .map(|(x, y)| (x, y, false)),
154                        );
155                    }
156                }
157            }
158        }
159    }
160}