mz_transform/will_distinct.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Transform that pushes down the information that a collection will be subjected to a `Distinct` on specific columns.
11
12use itertools::Itertools;
13use mz_expr::MirRelationExpr;
14
15use crate::analysis::{DerivedView, NonNegative};
16
17use crate::{TransformCtx, TransformError};
18
19/// Pushes down the information that a collection will be subjected to a `Distinct` on specific columns.
20///
21/// This intends to recognize idiomatic stacks of `UNION` from SQL, which look like `Distinct` over `Union`, potentially
22/// over other `Distinct` expressions. It is only able to see patterns of `DISTINCT UNION DISTINCT, ..` and other operators
23/// in between will prevent the necessary insight to remove the second `DISTINCT`.
24///
25/// There are other potential optimizations, for example fusing multiple reads of the same source, where distinctness may
26/// mean they could be a single read (with additional requirements).
27#[derive(Debug, Default)]
28pub struct WillDistinct;
29
30impl crate::Transform for WillDistinct {
31 fn name(&self) -> &'static str {
32 "WillDistinct"
33 }
34
35 #[mz_ore::instrument(
36 target = "optimizer"
37 level = "trace",
38 fields(path.segment = "will_distinct")
39 )]
40 fn actually_perform_transform(
41 &self,
42 relation: &mut MirRelationExpr,
43 ctx: &mut TransformCtx,
44 ) -> Result<(), TransformError> {
45 // Perform bottom-up equivalence class analysis.
46 use crate::analysis::DerivedBuilder;
47 let mut builder = DerivedBuilder::new(ctx.features);
48 builder.require(NonNegative);
49 let derived = builder.visit(relation);
50 let derived = derived.as_view();
51
52 self.apply(relation, derived);
53
54 mz_repr::explain::trace_plan(&*relation);
55 Ok(())
56 }
57}
58
59impl WillDistinct {
60 fn apply(&self, expr: &mut MirRelationExpr, derived: DerivedView) {
61 // Maintain a todo list of triples of 1. expression, 2. child analysis results, and 3. a "will distinct" bit.
62 // The "will distinct" bit says that a subsequent operator will make the specific multiplicities of each record
63 // irrelevant, and the expression only needs to present the correct *multiset* of record, with any non-negative
64 // cardinality allowed.
65 let mut todo = vec![(expr, derived, false)];
66 while let Some((expr, derived, distinct_by)) = todo.pop() {
67 // If we find a `Distinct` expression in the shadow of another `Distinct` that will apply to its key columns,
68 // we can remove this `Distinct` operator as the distinctness will be enforced by the other expression.
69 if let (
70 MirRelationExpr::Reduce {
71 input,
72 group_key,
73 aggregates,
74 ..
75 },
76 true,
77 ) = (&mut *expr, distinct_by)
78 {
79 if aggregates.is_empty() {
80 // We can remove the `Distinct`, but we must install a `Map` and a `Project` to implement that
81 // aspect of the operator. We do this by hand so that we can still descend down `input` and
82 // continue to remove shadowed `Distinct` operators.
83 let arity = input.arity();
84 *expr = MirRelationExpr::Project {
85 outputs: (arity..arity + group_key.len()).collect::<Vec<_>>(),
86 input: Box::new(MirRelationExpr::Map {
87 scalars: group_key.clone(),
88 input: Box::new(input.take_dangerous()),
89 }),
90 };
91 // We are certain to have a specific pattern of AST nodes, which we need to push through so that
92 // we can continue recursively.
93 if let MirRelationExpr::Project { input, .. } = expr {
94 // `input` is a `Map` node, but it has a single input like the `Distinct` it came from.
95 // Although it reads a bit weird, this lines up the child of the distinct with its derived
96 // analysis results.
97 todo.extend(
98 input
99 .children_mut()
100 .rev()
101 .zip_eq(derived.children_rev())
102 .map(|(x, y)| (x, y, true)),
103 );
104 }
105 } else {
106 todo.extend(
107 expr.children_mut()
108 .rev()
109 .zip_eq(derived.children_rev())
110 .map(|(x, y)| (x, y, false)),
111 );
112 }
113 } else {
114 match (expr, distinct_by) {
115 (
116 MirRelationExpr::Reduce {
117 input, aggregates, ..
118 },
119 _,
120 ) => {
121 if aggregates.is_empty() {
122 todo.push((input, derived.last_child(), true));
123 } else {
124 todo.push((input, derived.last_child(), false))
125 }
126 }
127 // If all inputs to the union are non-negative, any distinct enforced above the expression can be
128 // communicated on to each input.
129 (MirRelationExpr::Union { base, inputs }, will_distinct) => {
130 if derived
131 .children_rev()
132 .all(|v| *v.value::<NonNegative>().unwrap())
133 {
134 let children_rev = inputs.iter_mut().rev().chain(Some(&mut **base));
135 todo.extend(
136 children_rev
137 .zip_eq(derived.children_rev())
138 .map(|(x, y)| (x, y, will_distinct)),
139 );
140 } else {
141 let children_rev = inputs.iter_mut().rev().chain(Some(&mut **base));
142 todo.extend(
143 children_rev
144 .zip_eq(derived.children_rev())
145 .map(|(x, y)| (x, y, false)),
146 );
147 }
148 }
149 (x, _) => {
150 todo.extend(
151 x.children_mut()
152 .rev()
153 .zip_eq(derived.children_rev())
154 .map(|(x, y)| (x, y, false)),
155 );
156 }
157 }
158 }
159 }
160 }
161}