mz_transform/column_knowledge.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Transformations based on pulling information about individual columns from sources.
11
12use std::collections::BTreeMap;
13
14use itertools::{Itertools, zip_eq};
15use mz_expr::JoinImplementation::IndexedFilter;
16use mz_expr::visit::Visit;
17use mz_expr::{
18 EvalError, LetRecLimit, MirRelationExpr, MirScalarExpr, RECURSION_LIMIT, UnaryFunc, func,
19};
20use mz_ore::cast::CastFrom;
21use mz_ore::stack::{CheckedRecursion, RecursionGuard};
22use mz_ore::{assert_none, soft_panic_or_log};
23use mz_repr::{ColumnType, Datum, RelationType, Row, ScalarType};
24
25use crate::{TransformCtx, TransformError};
26
27/// Harvest and act upon per-column information.
28#[derive(Debug)]
29pub struct ColumnKnowledge {
30 recursion_guard: RecursionGuard,
31}
32
33impl Default for ColumnKnowledge {
34 fn default() -> ColumnKnowledge {
35 ColumnKnowledge {
36 recursion_guard: RecursionGuard::with_limit(RECURSION_LIMIT),
37 }
38 }
39}
40
41impl CheckedRecursion for ColumnKnowledge {
42 fn recursion_guard(&self) -> &RecursionGuard {
43 &self.recursion_guard
44 }
45}
46
47impl crate::Transform for ColumnKnowledge {
48 fn name(&self) -> &'static str {
49 "ColumnKnowledge"
50 }
51
52 /// Transforms an expression through accumulated knowledge.
53 #[mz_ore::instrument(
54 target = "optimizer",
55 level = "debug",
56 fields(path.segment = "column_knowledge")
57 )]
58 fn actually_perform_transform(
59 &self,
60 expr: &mut MirRelationExpr,
61 _: &mut TransformCtx,
62 ) -> Result<(), TransformError> {
63 let mut knowledge_stack = Vec::<DatumKnowledge>::new();
64 let result = self
65 .harvest(expr, &mut BTreeMap::new(), &mut knowledge_stack)
66 .map(|_| ());
67 mz_repr::explain::trace_plan(&*expr);
68 result
69 }
70}
71
72impl ColumnKnowledge {
73 /// Harvest per-column knowledge.
74 ///
75 /// `knowledge_stack` is a pre-allocated vector but is expected not to contain any elements.
76 fn harvest(
77 &self,
78 expr: &mut MirRelationExpr,
79 knowledge: &mut BTreeMap<mz_expr::Id, Vec<DatumKnowledge>>,
80 knowledge_stack: &mut Vec<DatumKnowledge>,
81 ) -> Result<Vec<DatumKnowledge>, TransformError> {
82 self.checked_recur(|_| {
83 let result = match expr {
84 MirRelationExpr::ArrangeBy { input, .. } => {
85 self.harvest(input, knowledge, knowledge_stack)
86 }
87 MirRelationExpr::Get { id, typ, .. } => {
88 Ok(knowledge.get(id).cloned().unwrap_or_else(|| {
89 typ.column_types.iter().map(DatumKnowledge::from).collect()
90 }))
91 }
92 MirRelationExpr::Constant { rows, typ } => {
93 // TODO: handle multi-row cases with some constant columns.
94 if let Ok([(row, _diff)]) = rows.as_deref() {
95 let knowledge = std::iter::zip(row.iter(), typ.column_types.iter())
96 .map(DatumKnowledge::from)
97 .collect();
98 Ok(knowledge)
99 } else {
100 Ok(typ.column_types.iter().map(DatumKnowledge::from).collect())
101 }
102 }
103 MirRelationExpr::Let { id, value, body } => {
104 let value_knowledge = self.harvest(value, knowledge, knowledge_stack)?;
105 let prior_knowledge =
106 knowledge.insert(mz_expr::Id::Local(id.clone()), value_knowledge);
107 let body_knowledge = self.harvest(body, knowledge, knowledge_stack)?;
108 knowledge.remove(&mz_expr::Id::Local(id.clone()));
109 if let Some(prior_knowledge) = prior_knowledge {
110 knowledge.insert(mz_expr::Id::Local(id.clone()), prior_knowledge);
111 }
112 Ok(body_knowledge)
113 }
114 MirRelationExpr::LetRec {
115 ids,
116 values,
117 limits,
118 body,
119 } => {
120 // Set knowledge[i][j] = DatumKnowledge::bottom() for each
121 // column j and CTE i. This corresponds to the normal
122 // evaluation semantics where each recursive CTE is
123 // initialized to the empty collection.
124 for (id, value) in zip_eq(ids.iter(), values.iter()) {
125 let id = mz_expr::Id::Local(id.clone());
126 let knowledge_new = vec![DatumKnowledge::bottom(); value.arity()];
127 let knowledge_old = knowledge.insert(id, knowledge_new);
128 assert_none!(knowledge_old);
129 }
130
131 // Sum up the arity of all ids in the enclosing LetRec node.
132 let let_rec_arity = ids.iter().fold(0, |acc, id| {
133 let id = mz_expr::Id::Local(id.clone());
134 acc + u64::cast_from(knowledge[&id].len())
135 });
136
137 // Sequentially union knowledge[i][j] with the result of
138 // descending into a clone of values[i]. Repeat until one of
139 // the following conditions is met:
140 //
141 // 1. The knowledge bindings have stabilized at a fixpoint.
142 // 2. No fixpoint was found after `max_iterations`. If this
143 // is the case reset the knowledge vectors for all
144 // recursive CTEs to DatumKnowledge::top().
145 // 3. We reach the user-specified recursion limit of any of the bindings.
146 // In this case, we also give up similarly to 2., because we don't want to
147 // complicate things with handling different limits per binding.
148 let min_max_iter = LetRecLimit::min_max_iter(limits);
149 let max_iterations = 100;
150 let mut curr_iteration = 0;
151 loop {
152 // Check for conditions (2) and (3).
153 if curr_iteration >= max_iterations
154 || min_max_iter
155 .map(|min_max_iter| curr_iteration >= min_max_iter)
156 .unwrap_or(false)
157 {
158 if curr_iteration > 3 * let_rec_arity {
159 soft_panic_or_log!(
160 "LetRec loop in ColumnKnowledge has not converged in 3 * |{}|",
161 let_rec_arity
162 );
163 }
164
165 for (id, value) in zip_eq(ids.iter(), values.iter()) {
166 let id = mz_expr::Id::Local(id.clone());
167 let knowledge_new = vec![DatumKnowledge::top(); value.arity()];
168 knowledge.insert(id, knowledge_new);
169 }
170 break;
171 }
172
173 // Check for condition (1).
174 let mut change = false;
175 for (id, mut value) in zip_eq(ids.iter(), values.iter().cloned()) {
176 let id = mz_expr::Id::Local(id.clone());
177 let value = &mut value;
178 let next_knowledge = self.harvest(value, knowledge, knowledge_stack)?;
179 let curr_knowledge = knowledge.get_mut(&id).unwrap();
180 for (curr, next) in zip_eq(curr_knowledge.iter_mut(), next_knowledge) {
181 let prev = curr.clone();
182 curr.join_assign(&next);
183 change |= prev != *curr;
184 }
185 }
186 if !change {
187 break;
188 }
189
190 curr_iteration += 1;
191 }
192
193 // Descend into the values with the inferred knowledge.
194 for value in values.iter_mut() {
195 self.harvest(value, knowledge, knowledge_stack)?;
196 }
197
198 // Descend and return the knowledge from the body.
199 let body_knowledge = self.harvest(body, knowledge, knowledge_stack)?;
200
201 // Remove shadowed bindings. This is good hygiene, as
202 // otherwise with nested LetRec blocks the `loop { ... }`
203 // above will carry inner LetRec IDs across outer LetRec
204 // iterations. As a consequence, the "no shadowing"
205 // assertion at the beginning of this block will fail at the
206 // inner LetRec for the second outer LetRec iteration.
207 for id in ids.iter() {
208 let id = mz_expr::Id::Local(id.clone());
209 knowledge.remove(&id);
210 }
211
212 Ok(body_knowledge)
213 }
214 MirRelationExpr::Project { input, outputs } => {
215 let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
216 Ok(outputs
217 .iter()
218 .map(|i| input_knowledge[*i].clone())
219 .collect())
220 }
221 MirRelationExpr::Map { input, scalars } => {
222 let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
223 let mut column_types = input.typ().column_types;
224 for scalar in scalars.iter_mut() {
225 input_knowledge.push(optimize(
226 scalar,
227 &column_types,
228 &input_knowledge[..],
229 knowledge_stack,
230 )?);
231 column_types.push(scalar.typ(&column_types));
232 }
233 Ok(input_knowledge)
234 }
235 MirRelationExpr::FlatMap { input, func, exprs } => {
236 let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
237 let input_typ = input.typ();
238 for expr in exprs {
239 optimize(
240 expr,
241 &input_typ.column_types,
242 &input_knowledge[..],
243 knowledge_stack,
244 )?;
245 }
246 let func_typ = func.output_type();
247 input_knowledge.extend(func_typ.column_types.iter().map(DatumKnowledge::from));
248 Ok(input_knowledge)
249 }
250 MirRelationExpr::Filter { input, predicates } => {
251 let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
252 let input_typ = input.typ();
253 for predicate in predicates.iter_mut() {
254 optimize(
255 predicate,
256 &input_typ.column_types,
257 &input_knowledge[..],
258 knowledge_stack,
259 )?;
260 }
261 // If any predicate tests a column for equality, truth, or is_null, we learn stuff.
262 for predicate in predicates.iter() {
263 // Equality tests allow us to unify the column knowledge of each input.
264 if let MirScalarExpr::CallBinary {
265 func: mz_expr::BinaryFunc::Eq,
266 expr1,
267 expr2,
268 } = predicate
269 {
270 // Collect knowledge about the inputs (for columns and literals).
271 let mut knowledge = DatumKnowledge::top();
272 if let MirScalarExpr::Column(c) = &**expr1 {
273 knowledge.meet_assign(&input_knowledge[*c]);
274 }
275 if let MirScalarExpr::Column(c) = &**expr2 {
276 knowledge.meet_assign(&input_knowledge[*c]);
277 }
278
279 // Absorb literal knowledge about columns.
280 if let MirScalarExpr::Literal(..) = &**expr1 {
281 knowledge.meet_assign(&DatumKnowledge::from(&**expr1));
282 }
283 if let MirScalarExpr::Literal(..) = &**expr2 {
284 knowledge.meet_assign(&DatumKnowledge::from(&**expr2));
285 }
286
287 // Write back unified knowledge to each column.
288 if let MirScalarExpr::Column(c) = &**expr1 {
289 input_knowledge[*c].meet_assign(&knowledge);
290 }
291 if let MirScalarExpr::Column(c) = &**expr2 {
292 input_knowledge[*c].meet_assign(&knowledge);
293 }
294 }
295 if let MirScalarExpr::CallUnary {
296 func: UnaryFunc::Not(func::Not),
297 expr,
298 } = predicate
299 {
300 if let MirScalarExpr::CallUnary {
301 func: UnaryFunc::IsNull(func::IsNull),
302 expr,
303 } = &**expr
304 {
305 if let MirScalarExpr::Column(c) = &**expr {
306 input_knowledge[*c].meet_assign(&DatumKnowledge::any(false));
307 }
308 }
309 }
310 }
311
312 Ok(input_knowledge)
313 }
314 MirRelationExpr::Join {
315 inputs,
316 equivalences,
317 implementation,
318 ..
319 } => {
320 // Aggregate column knowledge from each input into one `Vec`.
321 let mut knowledges = Vec::new();
322 for input in inputs.iter_mut() {
323 for mut knowledge in self.harvest(input, knowledge, knowledge_stack)? {
324 // Do not propagate error literals beyond join inputs, since that may result
325 // in them being propagated to other inputs of the join and evaluated when
326 // they should not.
327 if let DatumKnowledge::Lit { value: Err(_), .. } = knowledge {
328 knowledge.join_assign(&DatumKnowledge::any(false));
329 }
330 knowledges.push(knowledge);
331 }
332 }
333
334 // This only aggregates the column types of each input, not the
335 // keys of the inputs. It is unnecessary to aggregate the keys
336 // of the inputs since input keys are unnecessary for reducing
337 // `MirScalarExpr`s.
338 let folded_inputs_typ =
339 inputs.iter().fold(RelationType::empty(), |mut typ, input| {
340 typ.column_types.append(&mut input.typ().column_types);
341 typ
342 });
343
344 for equivalence in equivalences.iter_mut() {
345 let mut knowledge = DatumKnowledge::top();
346
347 // We can produce composite knowledge for everything in the equivalence class.
348 for expr in equivalence.iter_mut() {
349 if !matches!(implementation, IndexedFilter(..)) {
350 optimize(
351 expr,
352 &folded_inputs_typ.column_types,
353 &knowledges,
354 knowledge_stack,
355 )?;
356 }
357 if let MirScalarExpr::Column(c) = expr {
358 knowledge.meet_assign(&knowledges[*c]);
359 }
360 if let MirScalarExpr::Literal(..) = expr {
361 knowledge.meet_assign(&DatumKnowledge::from(&*expr));
362 }
363 }
364 for expr in equivalence.iter_mut() {
365 if let MirScalarExpr::Column(c) = expr {
366 knowledges[*c] = knowledge.clone();
367 }
368 }
369 }
370
371 Ok(knowledges)
372 }
373 MirRelationExpr::Reduce {
374 input,
375 group_key,
376 aggregates,
377 monotonic: _,
378 expected_group_size: _,
379 } => {
380 let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
381 let input_typ = input.typ();
382 let mut output = group_key
383 .iter_mut()
384 .map(|k| {
385 optimize(
386 k,
387 &input_typ.column_types,
388 &input_knowledge[..],
389 knowledge_stack,
390 )
391 })
392 .collect::<Result<Vec<_>, _>>()?;
393 for aggregate in aggregates.iter_mut() {
394 use mz_expr::AggregateFunc;
395 let knowledge = optimize(
396 &mut aggregate.expr,
397 &input_typ.column_types,
398 &input_knowledge[..],
399 knowledge_stack,
400 )?;
401 // This could be improved.
402 let knowledge = match aggregate.func {
403 AggregateFunc::MaxInt16
404 | AggregateFunc::MaxInt32
405 | AggregateFunc::MaxInt64
406 | AggregateFunc::MaxUInt16
407 | AggregateFunc::MaxUInt32
408 | AggregateFunc::MaxUInt64
409 | AggregateFunc::MaxMzTimestamp
410 | AggregateFunc::MaxFloat32
411 | AggregateFunc::MaxFloat64
412 | AggregateFunc::MaxBool
413 | AggregateFunc::MaxString
414 | AggregateFunc::MaxDate
415 | AggregateFunc::MaxTimestamp
416 | AggregateFunc::MaxTimestampTz
417 | AggregateFunc::MinInt16
418 | AggregateFunc::MinInt32
419 | AggregateFunc::MinInt64
420 | AggregateFunc::MinUInt16
421 | AggregateFunc::MinUInt32
422 | AggregateFunc::MinUInt64
423 | AggregateFunc::MinMzTimestamp
424 | AggregateFunc::MinFloat32
425 | AggregateFunc::MinFloat64
426 | AggregateFunc::MinBool
427 | AggregateFunc::MinString
428 | AggregateFunc::MinDate
429 | AggregateFunc::MinTimestamp
430 | AggregateFunc::MinTimestampTz
431 | AggregateFunc::Any
432 | AggregateFunc::All => {
433 // These methods propagate constant values exactly.
434 knowledge
435 }
436 AggregateFunc::Count => DatumKnowledge::any(false),
437 _ => {
438 // The remaining aggregates are non-null if
439 // their inputs are non-null. This is correct
440 // because in Mir~ we reduce an empty collection
441 // to an empty collection (in Hir~ the result
442 // often is singleton null collection).
443 DatumKnowledge::any(knowledge.nullable())
444 }
445 };
446 output.push(knowledge);
447 }
448 Ok(output)
449 }
450 MirRelationExpr::TopK { input, limit, .. } => {
451 let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
452 if let Some(limit) = limit.as_mut() {
453 optimize(
454 limit,
455 &input.typ().column_types,
456 &input_knowledge[..],
457 knowledge_stack,
458 )?;
459 }
460 Ok(input_knowledge)
461 }
462 MirRelationExpr::Negate { input } => {
463 self.harvest(input, knowledge, knowledge_stack)
464 }
465 MirRelationExpr::Threshold { input } => {
466 self.harvest(input, knowledge, knowledge_stack)
467 }
468 MirRelationExpr::Union { base, inputs } => {
469 let mut know = self.harvest(base, knowledge, knowledge_stack)?;
470 for input in inputs {
471 know = know
472 .into_iter()
473 .zip_eq(self.harvest(input, knowledge, knowledge_stack)?)
474 .map(|(mut k1, k2)| {
475 k1.join_assign(&k2);
476 k1
477 })
478 .collect();
479 }
480 Ok(know)
481 }
482 }?;
483
484 // println!("# Plan");
485 // println!("{}", expr.pretty());
486 // println!("# Knowledge");
487 // print_knowledge_vec(&result);
488 // println!("---");
489
490 Ok(result)
491 })
492 }
493}
494
495/// Information about a specific column.
496///
497/// The values should form a [complete lattice].
498///
499/// [complete lattice]: https://en.wikipedia.org/wiki/Complete_lattice
500#[derive(Clone, Debug, PartialEq, Eq)]
501enum DatumKnowledge {
502 // Any possible value, optionally known to be NOT NULL.
503 Any {
504 nullable: bool,
505 },
506 // A known literal value of a specific type.
507 Lit {
508 value: Result<mz_repr::Row, EvalError>,
509 typ: ScalarType,
510 },
511 // A value that cannot exist.
512 Nothing,
513}
514
515impl From<&MirScalarExpr> for DatumKnowledge {
516 fn from(expr: &MirScalarExpr) -> Self {
517 if let MirScalarExpr::Literal(l, t) = expr {
518 let value = l.clone();
519 let typ = t.scalar_type.clone();
520 Self::Lit { value, typ }
521 } else {
522 Self::top()
523 }
524 }
525}
526
527impl From<(Datum<'_>, &ColumnType)> for DatumKnowledge {
528 fn from((d, t): (Datum<'_>, &ColumnType)) -> Self {
529 let value = Ok(Row::pack_slice(&[d.clone()]));
530 let typ = t.scalar_type.clone();
531 Self::Lit { value, typ }
532 }
533}
534
535impl From<&ColumnType> for DatumKnowledge {
536 fn from(typ: &ColumnType) -> Self {
537 let nullable = typ.nullable;
538 Self::Any { nullable }
539 }
540}
541
542impl DatumKnowledge {
543 /// The most general possible knowledge (the top of the complete lattice).
544 fn top() -> Self {
545 Self::Any { nullable: true }
546 }
547
548 /// The strictest possible knowledge (the bottom of the complete lattice).
549 #[allow(dead_code)]
550 fn bottom() -> Self {
551 Self::Nothing
552 }
553
554 /// Create a [`DatumKnowledge::Any`] instance with the given nullable flag.
555 fn any(nullable: bool) -> Self {
556 DatumKnowledge::Any { nullable }
557 }
558
559 /// Unions (weakens) the possible states of a column.
560 fn join_assign(&mut self, other: &Self) {
561 use DatumKnowledge::*;
562
563 // Each of the following `if` statements handles the cases marked with
564 // `x` of the (self x other) cross product (depicted as a rows x cols
565 // table where Self::bottom() is the UL and Self::top() the LR corner).
566 // Cases that are already handled are marked with `+` and cases that are
567 // yet to be handled marked with `-`.
568 //
569 // The order of handling (crossing out) of cases ensures that
570 // `*.clone()` is not called unless necessary.
571 //
572 // The final case after the ifs can assert that both sides are Lit
573 // variants.
574
575 // x - - - : Nothing
576 // x - - - : Lit { _, _ }
577 // x - - - : Any { false }
578 // x x x x : Any { true }
579 if crate::any![
580 matches!(self, Any { nullable: true }),
581 matches!(other, Nothing),
582 ] {
583 // Nothing to do.
584 }
585 // + x x x : Nothing
586 // + - - x : Lit { _, _ }
587 // + - - x : Any { false }
588 // + + + + : Any { true }
589 else if crate::any![
590 matches!(self, Nothing),
591 matches!(other, Any { nullable: true }),
592 ] {
593 *self = other.clone();
594 }
595 // + + + + : Nothing
596 // + - - + : Lit { _, _ }
597 // + x x + : Any { false }
598 // + + + + : Any { true }
599 else if matches!(self, Any { nullable: false }) {
600 if !other.nullable() {
601 // Nothing to do.
602 } else {
603 *self = Self::top() // other: Lit { null, _ }
604 }
605 // Nothing to do.
606 }
607 // + + + + : Nothing
608 // + - x + : Lit { _, _ }
609 // + + + + : Any { false }
610 // + + + + : Any { true }
611 else if matches!(other, Any { nullable: false }) {
612 if !self.nullable() {
613 *self = other.clone();
614 } else {
615 *self = Self::top() // other: Lit { null, _ }
616 }
617 }
618 // + + + + : Nothing
619 // + x + + : Lit { _, _ }
620 // + + + + : Any { false }
621 // + + + + : Any { true }
622 else {
623 let Lit {
624 value: s_val,
625 typ: s_typ,
626 } = self
627 else {
628 unreachable!();
629 };
630 let Lit {
631 value: o_val,
632 typ: o_typ,
633 } = other
634 else {
635 unreachable!();
636 };
637
638 if !s_typ.base_eq(o_typ) {
639 ::tracing::error!("Undefined join of non-equal base types {s_typ:?} != {o_typ:?}");
640 *self = Self::top();
641 } else if s_val != o_val {
642 let nullable = self.nullable() || other.nullable();
643 *self = Any { nullable }
644 } else if s_typ != o_typ {
645 // Same value but different concrete types - strip all modifiers!
646 // This is identical to what ColumnType::union is doing.
647 *s_typ = s_typ.without_modifiers();
648 } else {
649 // Value and type coincide - do nothing!
650 }
651 }
652 }
653
654 /// Intersects (strengthens) the possible states of a column.
655 fn meet_assign(&mut self, other: &Self) {
656 use DatumKnowledge::*;
657
658 // Each of the following `if` statements handles the cases marked with
659 // `x` of the (self x other) cross product (depicted as a rows x cols
660 // table where Self::bottom() is the UL and Self::top() the LR corner).
661 // Cases that are already handled are marked with `+` and cases that are
662 // yet to be handled marked with `-`.
663 //
664 // The order of handling (crossing out) of cases ensures that
665 // `*.clone()` is not called unless necessary.
666 //
667 // The final case after the ifs can assert that both sides are Lit
668 // variants.
669
670 // x x x x : Nothing
671 // - - - x : Lit { _, _ }
672 // - - - x : Any { false }
673 // - - - x : Any { true }
674 if crate::any![
675 matches!(self, Nothing),
676 matches!(other, Any { nullable: true }),
677 ] {
678 // Nothing to do.
679 }
680 // + + + + : Nothing
681 // x - - + : Lit { _, _ }
682 // x - - + : Any { false }
683 // x x x + : Any { true }
684 else if crate::any![
685 matches!(self, Any { nullable: true }),
686 matches!(other, Nothing),
687 ] {
688 *self = other.clone();
689 }
690 // + + + + : Nothing
691 // + - - + : Lit { _, _ }
692 // + x x + : Any { false }
693 // + + + + : Any { true }
694 else if matches!(self, Any { nullable: false }) {
695 match other {
696 Any { .. } => {
697 // Nothing to do.
698 }
699 Lit { .. } => {
700 if other.nullable() {
701 *self = Nothing; // other: Lit { null, _ }
702 } else {
703 *self = other.clone();
704 }
705 }
706 Nothing => unreachable!(),
707 }
708 }
709 // + + + + : Nothing
710 // + - x + : Lit { _, _ }
711 // + + + + : Any { false }
712 // + + + + : Any { true }
713 else if matches!(other, Any { nullable: false }) {
714 if self.nullable() {
715 *self = Nothing // self: Lit { null, _ }
716 }
717 }
718 // + + + + : Nothing
719 // + x + + : Lit { _, _ }
720 // + + + + : Any { false }
721 // + + + + : Any { true }
722 else {
723 let Lit {
724 value: s_val,
725 typ: s_typ,
726 } = self
727 else {
728 unreachable!();
729 };
730 let Lit {
731 value: o_val,
732 typ: o_typ,
733 } = other
734 else {
735 unreachable!();
736 };
737
738 if !s_typ.base_eq(o_typ) {
739 soft_panic_or_log!("Undefined meet of non-equal base types {s_typ:?} != {o_typ:?}");
740 *self = Self::top(); // this really should be Nothing
741 } else if s_val != o_val {
742 *self = Nothing;
743 } else if s_typ != o_typ {
744 // Same value but different concrete types - strip all
745 // modifiers! We should probably pick the more specific of the
746 // two types if they are ordered or return Nothing otherwise.
747 *s_typ = s_typ.without_modifiers();
748 } else {
749 // Value and type coincide - do nothing!
750 }
751 }
752 }
753
754 fn nullable(&self) -> bool {
755 match self {
756 DatumKnowledge::Any { nullable } => *nullable,
757 DatumKnowledge::Lit { value, .. } => match value {
758 Ok(value) => value.iter().next().unwrap().is_null(),
759 Err(_) => false,
760 },
761 DatumKnowledge::Nothing => false,
762 }
763 }
764}
765
766/// Attempts to optimize
767///
768/// `knowledge_stack` is a pre-allocated vector but is expected not to contain any elements.
769fn optimize(
770 expr: &mut MirScalarExpr,
771 column_types: &[ColumnType],
772 column_knowledge: &[DatumKnowledge],
773 knowledge_stack: &mut Vec<DatumKnowledge>,
774) -> Result<DatumKnowledge, TransformError> {
775 // Storage for `DatumKnowledge` being propagated up through the
776 // `MirScalarExpr`. When a node is visited, pop off as many `DatumKnowledge`
777 // as the number of children the node has, and then push the
778 // `DatumKnowledge` corresponding to the node back onto the stack.
779 // Post-order traversal means that if a node has `n` children, the top `n`
780 // `DatumKnowledge` in the stack are the `DatumKnowledge` corresponding to
781 // the children.
782 assert!(knowledge_stack.is_empty());
783 #[allow(deprecated)]
784 expr.visit_mut_pre_post(
785 &mut |e| {
786 // We should not eagerly memoize `if` branches that might not be taken.
787 // TODO: Memoize expressions in the intersection of `then` and `els`.
788 if let MirScalarExpr::If { then, els, .. } = e {
789 Some(vec![then, els])
790 } else {
791 None
792 }
793 },
794 &mut |e| {
795 let result = match e {
796 MirScalarExpr::Column(index) => {
797 let index = *index;
798 if let DatumKnowledge::Lit { value, typ } = &column_knowledge[index] {
799 let nullable = column_knowledge[index].nullable();
800 *e = MirScalarExpr::Literal(value.clone(), typ.clone().nullable(nullable));
801 }
802 column_knowledge[index].clone()
803 }
804 MirScalarExpr::Literal(_, _) | MirScalarExpr::CallUnmaterializable(_) => {
805 DatumKnowledge::from(&*e)
806 }
807 MirScalarExpr::CallUnary { func, expr: _ } => {
808 let knowledge = knowledge_stack.pop().unwrap();
809 if matches!(&knowledge, DatumKnowledge::Lit { .. }) {
810 e.reduce(column_types);
811 } else if func == &UnaryFunc::IsNull(func::IsNull) && !knowledge.nullable() {
812 *e = MirScalarExpr::literal_false();
813 };
814 DatumKnowledge::from(&*e)
815 }
816 MirScalarExpr::CallBinary {
817 func: _,
818 expr1: _,
819 expr2: _,
820 } => {
821 let knowledge2 = knowledge_stack.pop().unwrap();
822 let knowledge1 = knowledge_stack.pop().unwrap();
823 if crate::any![
824 matches!(knowledge1, DatumKnowledge::Lit { .. }),
825 matches!(knowledge2, DatumKnowledge::Lit { .. }),
826 ] {
827 e.reduce(column_types);
828 }
829 DatumKnowledge::from(&*e)
830 }
831 MirScalarExpr::CallVariadic { func: _, exprs } => {
832 // Drain the last `exprs.len()` knowledge, and reduce if any is `Lit`.
833 assert!(knowledge_stack.len() >= exprs.len());
834 if knowledge_stack
835 .drain(knowledge_stack.len() - exprs.len()..)
836 .any(|k| matches!(k, DatumKnowledge::Lit { .. }))
837 {
838 e.reduce(column_types);
839 }
840 DatumKnowledge::from(&*e)
841 }
842 MirScalarExpr::If {
843 cond: _,
844 then: _,
845 els: _,
846 } => {
847 // `cond` has been left un-optimized, as we should not remove the conditional
848 // nature of the evaluation based on column knowledge: the resulting
849 // expression could then move down past a filter or join that provided
850 // the guarantees, and would become wrong.
851 //
852 // Instead, each of the branches have been optimized, and we
853 // can union the states of their columns.
854 let know2 = knowledge_stack.pop().unwrap();
855 let mut know1 = knowledge_stack.pop().unwrap();
856 know1.join_assign(&know2);
857 know1
858 }
859 };
860 knowledge_stack.push(result);
861 },
862 )?;
863 let knowledge_datum = knowledge_stack.pop();
864 assert!(knowledge_stack.is_empty());
865 knowledge_datum.ok_or_else(|| {
866 TransformError::Internal(String::from("unexpectedly empty stack in optimize"))
867 })
868}
869
870#[allow(dead_code)] // keep debugging method around
871fn print_knowledge_map<'a>(
872 knowledge: &BTreeMap<mz_expr::Id, Vec<DatumKnowledge>>,
873 ids: impl Iterator<Item = &'a mz_expr::LocalId>,
874) {
875 for id in ids {
876 let id = mz_expr::Id::Local(id.clone());
877 for (i, k) in knowledge.get(&id).unwrap().iter().enumerate() {
878 println!("{id}.#{i}: {k:?}");
879 }
880 }
881 println!("");
882}
883
884#[allow(dead_code)] // keep debugging method around
885fn print_knowledge_vec(knowledge: &Vec<DatumKnowledge>) {
886 for (i, k) in knowledge.iter().enumerate() {
887 println!("#{i}: {k:?}");
888 }
889 println!("");
890}