mz_transform/column_knowledge.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Transformations based on pulling information about individual columns from sources.
11
12use std::collections::BTreeMap;
13
14use itertools::{Itertools, zip_eq};
15use mz_expr::JoinImplementation::IndexedFilter;
16use mz_expr::visit::Visit;
17use mz_expr::{
18 EvalError, LetRecLimit, MirRelationExpr, MirScalarExpr, RECURSION_LIMIT, UnaryFunc, func,
19};
20use mz_ore::cast::CastFrom;
21use mz_ore::stack::{CheckedRecursion, RecursionGuard};
22use mz_ore::{assert_none, soft_panic_or_log};
23use mz_repr::{Datum, ReprColumnType, ReprScalarType, Row};
24
25use crate::{TransformCtx, TransformError};
26
27/// Harvest and act upon per-column information.
28#[derive(Debug)]
29pub struct ColumnKnowledge {
30 recursion_guard: RecursionGuard,
31}
32
33impl Default for ColumnKnowledge {
34 fn default() -> ColumnKnowledge {
35 ColumnKnowledge {
36 recursion_guard: RecursionGuard::with_limit(RECURSION_LIMIT),
37 }
38 }
39}
40
41impl CheckedRecursion for ColumnKnowledge {
42 fn recursion_guard(&self) -> &RecursionGuard {
43 &self.recursion_guard
44 }
45}
46
47impl crate::Transform for ColumnKnowledge {
48 fn name(&self) -> &'static str {
49 "ColumnKnowledge"
50 }
51
52 /// Transforms an expression through accumulated knowledge.
53 #[mz_ore::instrument(
54 target = "optimizer",
55 level = "debug",
56 fields(path.segment = "column_knowledge")
57 )]
58 fn actually_perform_transform(
59 &self,
60 expr: &mut MirRelationExpr,
61 _: &mut TransformCtx,
62 ) -> Result<(), TransformError> {
63 let mut knowledge_stack = Vec::<DatumKnowledge>::new();
64 let result = self
65 .harvest(expr, &mut BTreeMap::new(), &mut knowledge_stack)
66 .map(|_| ());
67 mz_repr::explain::trace_plan(&*expr);
68 result
69 }
70}
71
72impl ColumnKnowledge {
73 /// Harvest per-column knowledge.
74 ///
75 /// `knowledge_stack` is a pre-allocated vector but is expected not to contain any elements.
76 fn harvest(
77 &self,
78 expr: &mut MirRelationExpr,
79 knowledge: &mut BTreeMap<mz_expr::Id, Vec<DatumKnowledge>>,
80 knowledge_stack: &mut Vec<DatumKnowledge>,
81 ) -> Result<Vec<DatumKnowledge>, TransformError> {
82 self.checked_recur(|_| {
83 let result = match expr {
84 MirRelationExpr::ArrangeBy { input, .. } => {
85 self.harvest(input, knowledge, knowledge_stack)
86 }
87 MirRelationExpr::Get { id, typ, .. } => {
88 Ok(knowledge.get(id).cloned().unwrap_or_else(|| {
89 typ.column_types.iter().map(DatumKnowledge::from).collect()
90 }))
91 }
92 MirRelationExpr::Constant { rows, typ } => {
93 // TODO: handle multi-row cases with some constant columns.
94 if let Ok([(row, _diff)]) = rows.as_deref() {
95 let knowledge = std::iter::zip(row.iter(), typ.column_types.iter())
96 .map(DatumKnowledge::from)
97 .collect();
98 Ok(knowledge)
99 } else {
100 Ok(typ.column_types.iter().map(DatumKnowledge::from).collect())
101 }
102 }
103 MirRelationExpr::Let { id, value, body } => {
104 let value_knowledge = self.harvest(value, knowledge, knowledge_stack)?;
105 let prior_knowledge =
106 knowledge.insert(mz_expr::Id::Local(id.clone()), value_knowledge);
107 let body_knowledge = self.harvest(body, knowledge, knowledge_stack)?;
108 knowledge.remove(&mz_expr::Id::Local(id.clone()));
109 if let Some(prior_knowledge) = prior_knowledge {
110 knowledge.insert(mz_expr::Id::Local(id.clone()), prior_knowledge);
111 }
112 Ok(body_knowledge)
113 }
114 MirRelationExpr::LetRec {
115 ids,
116 values,
117 limits,
118 body,
119 } => {
120 // Set knowledge[i][j] = DatumKnowledge::bottom() for each
121 // column j and CTE i. This corresponds to the normal
122 // evaluation semantics where each recursive CTE is
123 // initialized to the empty collection.
124 for (id, value) in zip_eq(ids.iter(), values.iter()) {
125 let id = mz_expr::Id::Local(id.clone());
126 let knowledge_new = vec![DatumKnowledge::bottom(); value.arity()];
127 let knowledge_old = knowledge.insert(id, knowledge_new);
128 assert_none!(knowledge_old);
129 }
130
131 // Sum up the arity of all ids in the enclosing LetRec node.
132 let let_rec_arity = ids.iter().fold(0, |acc, id| {
133 let id = mz_expr::Id::Local(id.clone());
134 acc + u64::cast_from(knowledge[&id].len())
135 });
136
137 // Sequentially union knowledge[i][j] with the result of
138 // descending into a clone of values[i]. Repeat until one of
139 // the following conditions is met:
140 //
141 // 1. The knowledge bindings have stabilized at a fixpoint.
142 // 2. No fixpoint was found after `max_iterations`. If this
143 // is the case reset the knowledge vectors for all
144 // recursive CTEs to DatumKnowledge::top().
145 // 3. We reach the user-specified recursion limit of any of the bindings.
146 // In this case, we also give up similarly to 2., because we don't want to
147 // complicate things with handling different limits per binding.
148 let min_max_iter = LetRecLimit::min_max_iter(limits);
149 let max_iterations = 100;
150 let mut curr_iteration = 0;
151 loop {
152 // Check for conditions (2) and (3).
153 if curr_iteration >= max_iterations
154 || min_max_iter
155 .map(|min_max_iter| curr_iteration >= min_max_iter)
156 .unwrap_or(false)
157 {
158 if curr_iteration > 3 * let_rec_arity {
159 soft_panic_or_log!(
160 "LetRec loop in ColumnKnowledge has not converged in 3 * |{}|",
161 let_rec_arity
162 );
163 }
164
165 for (id, value) in zip_eq(ids.iter(), values.iter()) {
166 let id = mz_expr::Id::Local(id.clone());
167 let knowledge_new = vec![DatumKnowledge::top(); value.arity()];
168 knowledge.insert(id, knowledge_new);
169 }
170 break;
171 }
172
173 // Check for condition (1).
174 let mut change = false;
175 for (id, mut value) in zip_eq(ids.iter(), values.iter().cloned()) {
176 let id = mz_expr::Id::Local(id.clone());
177 let value = &mut value;
178 let next_knowledge = self.harvest(value, knowledge, knowledge_stack)?;
179 let curr_knowledge = knowledge.get_mut(&id).unwrap();
180 for (curr, next) in zip_eq(curr_knowledge.iter_mut(), next_knowledge) {
181 let prev = curr.clone();
182 curr.join_assign(&next);
183 change |= prev != *curr;
184 }
185 }
186 if !change {
187 break;
188 }
189
190 curr_iteration += 1;
191 }
192
193 // Descend into the values with the inferred knowledge.
194 for value in values.iter_mut() {
195 self.harvest(value, knowledge, knowledge_stack)?;
196 }
197
198 // Descend and return the knowledge from the body.
199 let body_knowledge = self.harvest(body, knowledge, knowledge_stack)?;
200
201 // Remove shadowed bindings. This is good hygiene, as
202 // otherwise with nested LetRec blocks the `loop { ... }`
203 // above will carry inner LetRec IDs across outer LetRec
204 // iterations. As a consequence, the "no shadowing"
205 // assertion at the beginning of this block will fail at the
206 // inner LetRec for the second outer LetRec iteration.
207 for id in ids.iter() {
208 let id = mz_expr::Id::Local(id.clone());
209 knowledge.remove(&id);
210 }
211
212 Ok(body_knowledge)
213 }
214 MirRelationExpr::Project { input, outputs } => {
215 let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
216 Ok(outputs
217 .iter()
218 .map(|i| input_knowledge[*i].clone())
219 .collect())
220 }
221 MirRelationExpr::Map { input, scalars } => {
222 let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
223 let mut column_types: Vec<ReprColumnType> = input.typ().column_types;
224 for scalar in scalars.iter_mut() {
225 input_knowledge.push(optimize(
226 scalar,
227 &column_types,
228 &input_knowledge[..],
229 knowledge_stack,
230 )?);
231 column_types.push(scalar.typ(&column_types));
232 }
233 Ok(input_knowledge)
234 }
235 MirRelationExpr::FlatMap { input, func, exprs } => {
236 let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
237 let input_col_types: Vec<ReprColumnType> = input.typ().column_types;
238 for expr in exprs {
239 optimize(
240 expr,
241 &input_col_types,
242 &input_knowledge[..],
243 knowledge_stack,
244 )?;
245 }
246 let func_typ = func.output_type();
247 input_knowledge.extend(func_typ.column_types.iter().map(DatumKnowledge::from));
248 Ok(input_knowledge)
249 }
250 MirRelationExpr::Filter { input, predicates } => {
251 let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
252 let input_col_types: Vec<ReprColumnType> = input.typ().column_types;
253 for predicate in predicates.iter_mut() {
254 optimize(
255 predicate,
256 &input_col_types,
257 &input_knowledge[..],
258 knowledge_stack,
259 )?;
260 }
261 // If any predicate tests a column for equality, truth, or is_null, we learn stuff.
262 for predicate in predicates.iter() {
263 // Equality tests allow us to unify the column knowledge of each input.
264 if let MirScalarExpr::CallBinary {
265 func: mz_expr::BinaryFunc::Eq(_),
266 expr1,
267 expr2,
268 } = predicate
269 {
270 // Collect knowledge about the inputs (for columns and literals).
271 let mut knowledge = DatumKnowledge::top();
272 if let MirScalarExpr::Column(c, _) = &**expr1 {
273 knowledge.meet_assign(&input_knowledge[*c]);
274 }
275 if let MirScalarExpr::Column(c, _) = &**expr2 {
276 knowledge.meet_assign(&input_knowledge[*c]);
277 }
278
279 // Absorb literal knowledge about columns.
280 if let MirScalarExpr::Literal(..) = &**expr1 {
281 knowledge.meet_assign(&DatumKnowledge::from(&**expr1));
282 }
283 if let MirScalarExpr::Literal(..) = &**expr2 {
284 knowledge.meet_assign(&DatumKnowledge::from(&**expr2));
285 }
286
287 // Write back unified knowledge to each column.
288 if let MirScalarExpr::Column(c, _) = &**expr1 {
289 input_knowledge[*c].meet_assign(&knowledge);
290 }
291 if let MirScalarExpr::Column(c, _) = &**expr2 {
292 input_knowledge[*c].meet_assign(&knowledge);
293 }
294 }
295 if let MirScalarExpr::CallUnary {
296 func: UnaryFunc::Not(func::Not),
297 expr,
298 } = predicate
299 {
300 if let MirScalarExpr::CallUnary {
301 func: UnaryFunc::IsNull(func::IsNull),
302 expr,
303 } = &**expr
304 {
305 if let MirScalarExpr::Column(c, _) = &**expr {
306 input_knowledge[*c].meet_assign(&DatumKnowledge::any(false));
307 }
308 }
309 }
310 }
311
312 Ok(input_knowledge)
313 }
314 MirRelationExpr::Join {
315 inputs,
316 equivalences,
317 implementation,
318 ..
319 } => {
320 // Aggregate column knowledge from each input into one `Vec`.
321 let mut knowledges = Vec::new();
322 for input in inputs.iter_mut() {
323 for mut knowledge in self.harvest(input, knowledge, knowledge_stack)? {
324 // Do not propagate error literals beyond join inputs, since that may result
325 // in them being propagated to other inputs of the join and evaluated when
326 // they should not.
327 if let DatumKnowledge::Lit { value: Err(_), .. } = knowledge {
328 knowledge.join_assign(&DatumKnowledge::any(false));
329 }
330 knowledges.push(knowledge);
331 }
332 }
333
334 // This only aggregates the column types of each input, not the
335 // keys of the inputs. It is unnecessary to aggregate the keys
336 // of the inputs since input keys are unnecessary for reducing
337 // `MirScalarExpr`s.
338 let folded_input_col_types: Vec<ReprColumnType> = inputs
339 .iter()
340 .flat_map(|input| input.typ().column_types)
341 .collect();
342
343 for equivalence in equivalences.iter_mut() {
344 let mut knowledge = DatumKnowledge::top();
345
346 // We can produce composite knowledge for everything in the equivalence class.
347 for expr in equivalence.iter_mut() {
348 if !matches!(implementation, IndexedFilter(..)) {
349 optimize(
350 expr,
351 &folded_input_col_types,
352 &knowledges,
353 knowledge_stack,
354 )?;
355 }
356 if let MirScalarExpr::Column(c, _) = expr {
357 knowledge.meet_assign(&knowledges[*c]);
358 }
359 if let MirScalarExpr::Literal(..) = expr {
360 knowledge.meet_assign(&DatumKnowledge::from(&*expr));
361 }
362 }
363 for expr in equivalence.iter_mut() {
364 if let MirScalarExpr::Column(c, _) = expr {
365 knowledges[*c] = knowledge.clone();
366 }
367 }
368 }
369
370 Ok(knowledges)
371 }
372 MirRelationExpr::Reduce {
373 input,
374 group_key,
375 aggregates,
376 monotonic: _,
377 expected_group_size: _,
378 } => {
379 let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
380 let input_col_types: Vec<ReprColumnType> = input.typ().column_types;
381 let mut output = group_key
382 .iter_mut()
383 .map(|k| {
384 optimize(k, &input_col_types, &input_knowledge[..], knowledge_stack)
385 })
386 .collect::<Result<Vec<_>, _>>()?;
387 for aggregate in aggregates.iter_mut() {
388 use mz_expr::AggregateFunc;
389 let knowledge = optimize(
390 &mut aggregate.expr,
391 &input_col_types,
392 &input_knowledge[..],
393 knowledge_stack,
394 )?;
395 // This could be improved.
396 let knowledge = match aggregate.func {
397 AggregateFunc::MaxInt16
398 | AggregateFunc::MaxInt32
399 | AggregateFunc::MaxInt64
400 | AggregateFunc::MaxUInt16
401 | AggregateFunc::MaxUInt32
402 | AggregateFunc::MaxUInt64
403 | AggregateFunc::MaxMzTimestamp
404 | AggregateFunc::MaxFloat32
405 | AggregateFunc::MaxFloat64
406 | AggregateFunc::MaxBool
407 | AggregateFunc::MaxString
408 | AggregateFunc::MaxDate
409 | AggregateFunc::MaxTimestamp
410 | AggregateFunc::MaxTimestampTz
411 | AggregateFunc::MinInt16
412 | AggregateFunc::MinInt32
413 | AggregateFunc::MinInt64
414 | AggregateFunc::MinUInt16
415 | AggregateFunc::MinUInt32
416 | AggregateFunc::MinUInt64
417 | AggregateFunc::MinMzTimestamp
418 | AggregateFunc::MinFloat32
419 | AggregateFunc::MinFloat64
420 | AggregateFunc::MinBool
421 | AggregateFunc::MinString
422 | AggregateFunc::MinDate
423 | AggregateFunc::MinTimestamp
424 | AggregateFunc::MinTimestampTz
425 | AggregateFunc::Any
426 | AggregateFunc::All => {
427 // These methods propagate constant values exactly.
428 knowledge
429 }
430 AggregateFunc::Count => DatumKnowledge::any(false),
431 _ => {
432 // The remaining aggregates are non-null if
433 // their inputs are non-null. This is correct
434 // because in Mir~ we reduce an empty collection
435 // to an empty collection (in Hir~ the result
436 // often is singleton null collection).
437 DatumKnowledge::any(knowledge.nullable())
438 }
439 };
440 output.push(knowledge);
441 }
442 Ok(output)
443 }
444 MirRelationExpr::TopK { input, limit, .. } => {
445 let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
446 if let Some(limit) = limit.as_mut() {
447 let input_col_types: Vec<ReprColumnType> = input.typ().column_types;
448 optimize(
449 limit,
450 &input_col_types,
451 &input_knowledge[..],
452 knowledge_stack,
453 )?;
454 }
455 Ok(input_knowledge)
456 }
457 MirRelationExpr::Negate { input } => {
458 self.harvest(input, knowledge, knowledge_stack)
459 }
460 MirRelationExpr::Threshold { input } => {
461 self.harvest(input, knowledge, knowledge_stack)
462 }
463 MirRelationExpr::Union { base, inputs } => {
464 let mut know = self.harvest(base, knowledge, knowledge_stack)?;
465 for input in inputs {
466 know = know
467 .into_iter()
468 .zip_eq(self.harvest(input, knowledge, knowledge_stack)?)
469 .map(|(mut k1, k2)| {
470 k1.join_assign(&k2);
471 k1
472 })
473 .collect();
474 }
475 Ok(know)
476 }
477 }?;
478
479 // println!("# Plan");
480 // println!("{}", expr.pretty());
481 // println!("# Knowledge");
482 // print_knowledge_vec(&result);
483 // println!("---");
484
485 Ok(result)
486 })
487 }
488}
489
490/// Information about a specific column.
491///
492/// The values should form a [complete lattice].
493///
494/// [complete lattice]: https://en.wikipedia.org/wiki/Complete_lattice
495#[derive(Clone, Debug, PartialEq, Eq)]
496enum DatumKnowledge {
497 // Any possible value, optionally known to be NOT NULL.
498 Any {
499 nullable: bool,
500 },
501 // A known literal value of a specific type.
502 Lit {
503 value: Result<mz_repr::Row, EvalError>,
504 typ: ReprScalarType,
505 },
506 // A value that cannot exist.
507 Nothing,
508}
509
510impl From<&MirScalarExpr> for DatumKnowledge {
511 fn from(expr: &MirScalarExpr) -> Self {
512 if let MirScalarExpr::Literal(l, t) = expr {
513 let value = l.clone();
514 let typ = t.scalar_type.clone();
515 Self::Lit { value, typ }
516 } else {
517 Self::top()
518 }
519 }
520}
521
522impl From<(Datum<'_>, &ReprColumnType)> for DatumKnowledge {
523 fn from((d, t): (Datum<'_>, &ReprColumnType)) -> Self {
524 let value = Ok(Row::pack_slice(std::slice::from_ref(&d)));
525 let typ = t.scalar_type.clone();
526 Self::Lit { value, typ }
527 }
528}
529impl From<&ReprColumnType> for DatumKnowledge {
530 fn from(typ: &ReprColumnType) -> Self {
531 let nullable = typ.nullable;
532 Self::Any { nullable }
533 }
534}
535
536impl DatumKnowledge {
537 /// The most general possible knowledge (the top of the complete lattice).
538 fn top() -> Self {
539 Self::Any { nullable: true }
540 }
541
542 /// The strictest possible knowledge (the bottom of the complete lattice).
543 #[allow(dead_code)]
544 fn bottom() -> Self {
545 Self::Nothing
546 }
547
548 /// Create a [`DatumKnowledge::Any`] instance with the given nullable flag.
549 fn any(nullable: bool) -> Self {
550 DatumKnowledge::Any { nullable }
551 }
552
553 /// Unions (weakens) the possible states of a column.
554 fn join_assign(&mut self, other: &Self) {
555 use DatumKnowledge::*;
556
557 // Each of the following `if` statements handles the cases marked with
558 // `x` of the (self x other) cross product (depicted as a rows x cols
559 // table where Self::bottom() is the UL and Self::top() the LR corner).
560 // Cases that are already handled are marked with `+` and cases that are
561 // yet to be handled marked with `-`.
562 //
563 // The order of handling (crossing out) of cases ensures that
564 // `*.clone()` is not called unless necessary.
565 //
566 // The final case after the ifs can assert that both sides are Lit
567 // variants.
568
569 // x - - - : Nothing
570 // x - - - : Lit { _, _ }
571 // x - - - : Any { false }
572 // x x x x : Any { true }
573 if crate::any![
574 matches!(self, Any { nullable: true }),
575 matches!(other, Nothing),
576 ] {
577 // Nothing to do.
578 }
579 // + x x x : Nothing
580 // + - - x : Lit { _, _ }
581 // + - - x : Any { false }
582 // + + + + : Any { true }
583 else if crate::any![
584 matches!(self, Nothing),
585 matches!(other, Any { nullable: true }),
586 ] {
587 *self = other.clone();
588 }
589 // + + + + : Nothing
590 // + - - + : Lit { _, _ }
591 // + x x + : Any { false }
592 // + + + + : Any { true }
593 else if matches!(self, Any { nullable: false }) {
594 if !other.nullable() {
595 // Nothing to do.
596 } else {
597 *self = Self::top() // other: Lit { null, _ }
598 }
599 // Nothing to do.
600 }
601 // + + + + : Nothing
602 // + - x + : Lit { _, _ }
603 // + + + + : Any { false }
604 // + + + + : Any { true }
605 else if matches!(other, Any { nullable: false }) {
606 if !self.nullable() {
607 *self = other.clone();
608 } else {
609 *self = Self::top() // other: Lit { null, _ }
610 }
611 }
612 // + + + + : Nothing
613 // + x + + : Lit { _, _ }
614 // + + + + : Any { false }
615 // + + + + : Any { true }
616 else {
617 let Lit {
618 value: s_val,
619 typ: s_typ,
620 } = self
621 else {
622 unreachable!();
623 };
624 let Lit {
625 value: o_val,
626 typ: o_typ,
627 } = other
628 else {
629 unreachable!();
630 };
631
632 if s_typ != o_typ {
633 soft_panic_or_log!("Undefined join of non-equal repr types {s_typ:?}, {o_typ:?}");
634 *self = Self::top();
635 } else if s_val != o_val {
636 let nullable = self.nullable() || other.nullable();
637 *self = Any { nullable }
638 } else {
639 // Value and type coincide - do nothing!
640 }
641 }
642 }
643
644 /// Intersects (strengthens) the possible states of a column.
645 fn meet_assign(&mut self, other: &Self) {
646 use DatumKnowledge::*;
647
648 // Each of the following `if` statements handles the cases marked with
649 // `x` of the (self x other) cross product (depicted as a rows x cols
650 // table where Self::bottom() is the UL and Self::top() the LR corner).
651 // Cases that are already handled are marked with `+` and cases that are
652 // yet to be handled marked with `-`.
653 //
654 // The order of handling (crossing out) of cases ensures that
655 // `*.clone()` is not called unless necessary.
656 //
657 // The final case after the ifs can assert that both sides are Lit
658 // variants.
659
660 // x x x x : Nothing
661 // - - - x : Lit { _, _ }
662 // - - - x : Any { false }
663 // - - - x : Any { true }
664 if crate::any![
665 matches!(self, Nothing),
666 matches!(other, Any { nullable: true }),
667 ] {
668 // Nothing to do.
669 }
670 // + + + + : Nothing
671 // x - - + : Lit { _, _ }
672 // x - - + : Any { false }
673 // x x x + : Any { true }
674 else if crate::any![
675 matches!(self, Any { nullable: true }),
676 matches!(other, Nothing),
677 ] {
678 *self = other.clone();
679 }
680 // + + + + : Nothing
681 // + - - + : Lit { _, _ }
682 // + x x + : Any { false }
683 // + + + + : Any { true }
684 else if matches!(self, Any { nullable: false }) {
685 match other {
686 Any { .. } => {
687 // Nothing to do.
688 }
689 Lit { .. } => {
690 if other.nullable() {
691 *self = Nothing; // other: Lit { null, _ }
692 } else {
693 *self = other.clone();
694 }
695 }
696 Nothing => unreachable!(),
697 }
698 }
699 // + + + + : Nothing
700 // + - x + : Lit { _, _ }
701 // + + + + : Any { false }
702 // + + + + : Any { true }
703 else if matches!(other, Any { nullable: false }) {
704 if self.nullable() {
705 *self = Nothing // self: Lit { null, _ }
706 }
707 }
708 // + + + + : Nothing
709 // + x + + : Lit { _, _ }
710 // + + + + : Any { false }
711 // + + + + : Any { true }
712 else {
713 let Lit {
714 value: s_val,
715 typ: s_typ,
716 } = self
717 else {
718 unreachable!();
719 };
720 let Lit {
721 value: o_val,
722 typ: o_typ,
723 } = other
724 else {
725 unreachable!();
726 };
727
728 if s_typ != o_typ {
729 soft_panic_or_log!("Undefined meet of non-equal repr types {s_typ:?}, {o_typ:?}");
730 *self = Self::top(); // this really should be Nothing
731 } else if s_val != o_val {
732 *self = Nothing;
733 } else {
734 // Value and type coincide - do nothing!
735 }
736 }
737 }
738
739 fn nullable(&self) -> bool {
740 match self {
741 DatumKnowledge::Any { nullable } => *nullable,
742 DatumKnowledge::Lit { value, .. } => match value {
743 Ok(value) => value.iter().next().unwrap().is_null(),
744 Err(_) => false,
745 },
746 DatumKnowledge::Nothing => false,
747 }
748 }
749}
750
751/// Attempts to optimize
752///
753/// `knowledge_stack` is a pre-allocated vector but is expected not to contain any elements.
754fn optimize(
755 expr: &mut MirScalarExpr,
756 column_types: &[ReprColumnType],
757 column_knowledge: &[DatumKnowledge],
758 knowledge_stack: &mut Vec<DatumKnowledge>,
759) -> Result<DatumKnowledge, TransformError> {
760 // Storage for `DatumKnowledge` being propagated up through the
761 // `MirScalarExpr`. When a node is visited, pop off as many `DatumKnowledge`
762 // as the number of children the node has, and then push the
763 // `DatumKnowledge` corresponding to the node back onto the stack.
764 // Post-order traversal means that if a node has `n` children, the top `n`
765 // `DatumKnowledge` in the stack are the `DatumKnowledge` corresponding to
766 // the children.
767 assert!(knowledge_stack.is_empty());
768 #[allow(deprecated)]
769 expr.visit_mut_pre_post(
770 &mut |e| {
771 if let MirScalarExpr::If { then, els, .. } = e {
772 Some(vec![then, els])
773 } else {
774 None
775 }
776 },
777 &mut |e| {
778 let result = match e {
779 MirScalarExpr::Column(index, _) => {
780 let index = *index;
781 if let DatumKnowledge::Lit { value, typ } = &column_knowledge[index] {
782 *e = MirScalarExpr::Literal(
783 value.clone(),
784 typ.clone().nullable(column_knowledge[index].nullable()),
785 );
786 }
787 column_knowledge[index].clone()
788 }
789 MirScalarExpr::Literal(_, _) | MirScalarExpr::CallUnmaterializable(_) => {
790 DatumKnowledge::from(&*e)
791 }
792 MirScalarExpr::CallUnary { func, expr: _ } => {
793 let knowledge = knowledge_stack.pop().unwrap();
794 if matches!(&knowledge, DatumKnowledge::Lit { .. }) {
795 e.reduce(column_types);
796 } else if func == &UnaryFunc::IsNull(func::IsNull) && !knowledge.nullable() {
797 *e = MirScalarExpr::literal_false();
798 };
799 DatumKnowledge::from(&*e)
800 }
801 MirScalarExpr::CallBinary {
802 func: _,
803 expr1: _,
804 expr2: _,
805 } => {
806 let knowledge2 = knowledge_stack.pop().unwrap();
807 let knowledge1 = knowledge_stack.pop().unwrap();
808 if crate::any![
809 matches!(knowledge1, DatumKnowledge::Lit { .. }),
810 matches!(knowledge2, DatumKnowledge::Lit { .. }),
811 ] {
812 e.reduce(column_types);
813 }
814 DatumKnowledge::from(&*e)
815 }
816 MirScalarExpr::CallVariadic { func: _, exprs } => {
817 // Drain the last `exprs.len()` knowledge, and reduce if any is `Lit`.
818 assert!(knowledge_stack.len() >= exprs.len());
819 if knowledge_stack
820 .drain(knowledge_stack.len() - exprs.len()..)
821 .any(|k| matches!(k, DatumKnowledge::Lit { .. }))
822 {
823 e.reduce(column_types);
824 }
825 DatumKnowledge::from(&*e)
826 }
827 MirScalarExpr::If {
828 cond: _,
829 then: _,
830 els: _,
831 } => {
832 // `cond` has been left un-optimized, as we should not remove the conditional
833 // nature of the evaluation based on column knowledge: the resulting
834 // expression could then move down past a filter or join that provided
835 // the guarantees, and would become wrong.
836 //
837 // Instead, each of the branches have been optimized, and we
838 // can union the states of their columns.
839 let know2 = knowledge_stack.pop().unwrap();
840 let mut know1 = knowledge_stack.pop().unwrap();
841 know1.join_assign(&know2);
842 know1
843 }
844 };
845 knowledge_stack.push(result);
846 },
847 )?;
848 let knowledge_datum = knowledge_stack.pop();
849 assert!(knowledge_stack.is_empty());
850 knowledge_datum.ok_or_else(|| {
851 TransformError::Internal(String::from("unexpectedly empty stack in optimize"))
852 })
853}
854
855#[allow(dead_code)] // keep debugging method around
856fn print_knowledge_map<'a>(
857 knowledge: &BTreeMap<mz_expr::Id, Vec<DatumKnowledge>>,
858 ids: impl Iterator<Item = &'a mz_expr::LocalId>,
859) {
860 for id in ids {
861 let id = mz_expr::Id::Local(id.clone());
862 for (i, k) in knowledge.get(&id).unwrap().iter().enumerate() {
863 println!("{id}.#{i}: {k:?}");
864 }
865 }
866 println!("");
867}
868
869#[allow(dead_code)] // keep debugging method around
870fn print_knowledge_vec(knowledge: &Vec<DatumKnowledge>) {
871 for (i, k) in knowledge.iter().enumerate() {
872 println!("#{i}: {k:?}");
873 }
874 println!("");
875}