mz_transform/will_distinct.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Transform that pushes down the information that a collection will be subjected to a `Distinct` on specific columns.
11
12use mz_expr::MirRelationExpr;
13
14use crate::analysis::{DerivedView, NonNegative};
15
16use crate::{TransformCtx, TransformError};
17
18/// Pushes down the information that a collection will be subjected to a `Distinct` on specific columns.
19///
20/// This intends to recognize idiomatic stacks of `UNION` from SQL, which look like `Distinct` over `Union`, potentially
21/// over other `Distinct` expressions. It is only able to see patterns of `DISTINCT UNION DISTINCT, ..` and other operators
22/// in between will prevent the necessary insight to remove the second `DISTINCT`.
23///
24/// There are other potential optimizations, for example fusing multiple reads of the same source, where distinctness may
25/// mean they could be a single read (with additional requirements).
26#[derive(Debug, Default)]
27pub struct WillDistinct;
28
29impl crate::Transform for WillDistinct {
30 fn name(&self) -> &'static str {
31 "WillDistinct"
32 }
33
34 #[mz_ore::instrument(
35 target = "optimizer"
36 level = "trace",
37 fields(path.segment = "will_distinct")
38 )]
39 fn actually_perform_transform(
40 &self,
41 relation: &mut MirRelationExpr,
42 ctx: &mut TransformCtx,
43 ) -> Result<(), TransformError> {
44 // Perform bottom-up equivalence class analysis.
45 use crate::analysis::DerivedBuilder;
46 let mut builder = DerivedBuilder::new(ctx.features);
47 builder.require(NonNegative);
48 let derived = builder.visit(relation);
49 let derived = derived.as_view();
50
51 self.apply(relation, derived);
52
53 mz_repr::explain::trace_plan(&*relation);
54 Ok(())
55 }
56}
57
58impl WillDistinct {
59 fn apply(&self, expr: &mut MirRelationExpr, derived: DerivedView) {
60 // Maintain a todo list of triples of 1. expression, 2. child analysis results, and 3. a "will distinct" bit.
61 // The "will distinct" bit says that a subsequent operator will make the specific multiplicities of each record
62 // irrelevant, and the expression only needs to present the correct *multiset* of record, with any non-negative
63 // cardinality allowed.
64 let mut todo = vec![(expr, derived, false)];
65 while let Some((expr, derived, distinct_by)) = todo.pop() {
66 // If we find a `Distinct` expression in the shadow of another `Distinct` that will apply to its key columns,
67 // we can remove this `Distinct` operator as the distinctness will be enforced by the other expression.
68 if let (
69 MirRelationExpr::Reduce {
70 input,
71 group_key,
72 aggregates,
73 ..
74 },
75 true,
76 ) = (&mut *expr, distinct_by)
77 {
78 if aggregates.is_empty() {
79 // We can remove the `Distinct`, but we must install a `Map` and a `Project` to implement that
80 // aspect of the operator. We do this by hand so that we can still descend down `input` and
81 // continue to remove shadowed `Distinct` operators.
82 let arity = input.arity();
83 *expr = MirRelationExpr::Project {
84 outputs: (arity..arity + group_key.len()).collect::<Vec<_>>(),
85 input: Box::new(MirRelationExpr::Map {
86 scalars: group_key.clone(),
87 input: Box::new(input.take_dangerous()),
88 }),
89 };
90 // We are certain to have a specific pattern of AST nodes, which we need to push through so that
91 // we can continue recursively.
92 if let MirRelationExpr::Project { input, .. } = expr {
93 // `input` is a `Map` node, but it has a single input like the `Distinct` it came from.
94 // Although it reads a bit weird, this lines up the child of the distinct with its derived
95 // analysis results.
96 todo.extend(
97 input
98 .children_mut()
99 .rev()
100 .zip(derived.children_rev())
101 .map(|(x, y)| (x, y, true)),
102 );
103 }
104 } else {
105 todo.extend(
106 expr.children_mut()
107 .rev()
108 .zip(derived.children_rev())
109 .map(|(x, y)| (x, y, false)),
110 );
111 }
112 } else {
113 match (expr, distinct_by) {
114 (
115 MirRelationExpr::Reduce {
116 input, aggregates, ..
117 },
118 _,
119 ) => {
120 if aggregates.is_empty() {
121 todo.push((input, derived.last_child(), true));
122 } else {
123 todo.push((input, derived.last_child(), false))
124 }
125 }
126 // If all inputs to the union are non-negative, any distinct enforced above the expression can be
127 // communicated on to each input.
128 (MirRelationExpr::Union { base, inputs }, will_distinct) => {
129 if derived
130 .children_rev()
131 .all(|v| *v.value::<NonNegative>().unwrap())
132 {
133 let children_rev = inputs.iter_mut().rev().chain(Some(&mut **base));
134 todo.extend(
135 children_rev
136 .zip(derived.children_rev())
137 .map(|(x, y)| (x, y, will_distinct)),
138 );
139 } else {
140 let children_rev = inputs.iter_mut().rev().chain(Some(&mut **base));
141 todo.extend(
142 children_rev
143 .zip(derived.children_rev())
144 .map(|(x, y)| (x, y, false)),
145 );
146 }
147 }
148 (x, _) => {
149 todo.extend(
150 x.children_mut()
151 .rev()
152 .zip(derived.children_rev())
153 .map(|(x, y)| (x, y, false)),
154 );
155 }
156 }
157 }
158 }
159 }
160}