mz_transform/column_knowledge.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Transformations based on pulling information about individual columns from sources.
11
12use std::collections::BTreeMap;
13
14use itertools::{Itertools, zip_eq};
15use mz_expr::JoinImplementation::IndexedFilter;
16use mz_expr::visit::Visit;
17use mz_expr::{
18 EvalError, LetRecLimit, MirRelationExpr, MirScalarExpr, RECURSION_LIMIT, UnaryFunc, func,
19};
20use mz_ore::cast::CastFrom;
21use mz_ore::stack::{CheckedRecursion, RecursionGuard};
22use mz_ore::{assert_none, soft_panic_or_log};
23use mz_repr::{Datum, Row, SqlColumnType, SqlRelationType, SqlScalarType};
24
25use crate::{TransformCtx, TransformError};
26
27/// Harvest and act upon per-column information.
28#[derive(Debug)]
29pub struct ColumnKnowledge {
30 recursion_guard: RecursionGuard,
31}
32
33impl Default for ColumnKnowledge {
34 fn default() -> ColumnKnowledge {
35 ColumnKnowledge {
36 recursion_guard: RecursionGuard::with_limit(RECURSION_LIMIT),
37 }
38 }
39}
40
41impl CheckedRecursion for ColumnKnowledge {
42 fn recursion_guard(&self) -> &RecursionGuard {
43 &self.recursion_guard
44 }
45}
46
47impl crate::Transform for ColumnKnowledge {
48 fn name(&self) -> &'static str {
49 "ColumnKnowledge"
50 }
51
52 /// Transforms an expression through accumulated knowledge.
53 #[mz_ore::instrument(
54 target = "optimizer",
55 level = "debug",
56 fields(path.segment = "column_knowledge")
57 )]
58 fn actually_perform_transform(
59 &self,
60 expr: &mut MirRelationExpr,
61 _: &mut TransformCtx,
62 ) -> Result<(), TransformError> {
63 let mut knowledge_stack = Vec::<DatumKnowledge>::new();
64 let result = self
65 .harvest(expr, &mut BTreeMap::new(), &mut knowledge_stack)
66 .map(|_| ());
67 mz_repr::explain::trace_plan(&*expr);
68 result
69 }
70}
71
72impl ColumnKnowledge {
73 /// Harvest per-column knowledge.
74 ///
75 /// `knowledge_stack` is a pre-allocated vector but is expected not to contain any elements.
76 fn harvest(
77 &self,
78 expr: &mut MirRelationExpr,
79 knowledge: &mut BTreeMap<mz_expr::Id, Vec<DatumKnowledge>>,
80 knowledge_stack: &mut Vec<DatumKnowledge>,
81 ) -> Result<Vec<DatumKnowledge>, TransformError> {
82 self.checked_recur(|_| {
83 let result = match expr {
84 MirRelationExpr::ArrangeBy { input, .. } => {
85 self.harvest(input, knowledge, knowledge_stack)
86 }
87 MirRelationExpr::Get { id, typ, .. } => {
88 Ok(knowledge.get(id).cloned().unwrap_or_else(|| {
89 typ.column_types.iter().map(DatumKnowledge::from).collect()
90 }))
91 }
92 MirRelationExpr::Constant { rows, typ } => {
93 // TODO: handle multi-row cases with some constant columns.
94 if let Ok([(row, _diff)]) = rows.as_deref() {
95 let knowledge = std::iter::zip(row.iter(), typ.column_types.iter())
96 .map(DatumKnowledge::from)
97 .collect();
98 Ok(knowledge)
99 } else {
100 Ok(typ.column_types.iter().map(DatumKnowledge::from).collect())
101 }
102 }
103 MirRelationExpr::Let { id, value, body } => {
104 let value_knowledge = self.harvest(value, knowledge, knowledge_stack)?;
105 let prior_knowledge =
106 knowledge.insert(mz_expr::Id::Local(id.clone()), value_knowledge);
107 let body_knowledge = self.harvest(body, knowledge, knowledge_stack)?;
108 knowledge.remove(&mz_expr::Id::Local(id.clone()));
109 if let Some(prior_knowledge) = prior_knowledge {
110 knowledge.insert(mz_expr::Id::Local(id.clone()), prior_knowledge);
111 }
112 Ok(body_knowledge)
113 }
114 MirRelationExpr::LetRec {
115 ids,
116 values,
117 limits,
118 body,
119 } => {
120 // Set knowledge[i][j] = DatumKnowledge::bottom() for each
121 // column j and CTE i. This corresponds to the normal
122 // evaluation semantics where each recursive CTE is
123 // initialized to the empty collection.
124 for (id, value) in zip_eq(ids.iter(), values.iter()) {
125 let id = mz_expr::Id::Local(id.clone());
126 let knowledge_new = vec![DatumKnowledge::bottom(); value.arity()];
127 let knowledge_old = knowledge.insert(id, knowledge_new);
128 assert_none!(knowledge_old);
129 }
130
131 // Sum up the arity of all ids in the enclosing LetRec node.
132 let let_rec_arity = ids.iter().fold(0, |acc, id| {
133 let id = mz_expr::Id::Local(id.clone());
134 acc + u64::cast_from(knowledge[&id].len())
135 });
136
137 // Sequentially union knowledge[i][j] with the result of
138 // descending into a clone of values[i]. Repeat until one of
139 // the following conditions is met:
140 //
141 // 1. The knowledge bindings have stabilized at a fixpoint.
142 // 2. No fixpoint was found after `max_iterations`. If this
143 // is the case reset the knowledge vectors for all
144 // recursive CTEs to DatumKnowledge::top().
145 // 3. We reach the user-specified recursion limit of any of the bindings.
146 // In this case, we also give up similarly to 2., because we don't want to
147 // complicate things with handling different limits per binding.
148 let min_max_iter = LetRecLimit::min_max_iter(limits);
149 let max_iterations = 100;
150 let mut curr_iteration = 0;
151 loop {
152 // Check for conditions (2) and (3).
153 if curr_iteration >= max_iterations
154 || min_max_iter
155 .map(|min_max_iter| curr_iteration >= min_max_iter)
156 .unwrap_or(false)
157 {
158 if curr_iteration > 3 * let_rec_arity {
159 soft_panic_or_log!(
160 "LetRec loop in ColumnKnowledge has not converged in 3 * |{}|",
161 let_rec_arity
162 );
163 }
164
165 for (id, value) in zip_eq(ids.iter(), values.iter()) {
166 let id = mz_expr::Id::Local(id.clone());
167 let knowledge_new = vec![DatumKnowledge::top(); value.arity()];
168 knowledge.insert(id, knowledge_new);
169 }
170 break;
171 }
172
173 // Check for condition (1).
174 let mut change = false;
175 for (id, mut value) in zip_eq(ids.iter(), values.iter().cloned()) {
176 let id = mz_expr::Id::Local(id.clone());
177 let value = &mut value;
178 let next_knowledge = self.harvest(value, knowledge, knowledge_stack)?;
179 let curr_knowledge = knowledge.get_mut(&id).unwrap();
180 for (curr, next) in zip_eq(curr_knowledge.iter_mut(), next_knowledge) {
181 let prev = curr.clone();
182 curr.join_assign(&next);
183 change |= prev != *curr;
184 }
185 }
186 if !change {
187 break;
188 }
189
190 curr_iteration += 1;
191 }
192
193 // Descend into the values with the inferred knowledge.
194 for value in values.iter_mut() {
195 self.harvest(value, knowledge, knowledge_stack)?;
196 }
197
198 // Descend and return the knowledge from the body.
199 let body_knowledge = self.harvest(body, knowledge, knowledge_stack)?;
200
201 // Remove shadowed bindings. This is good hygiene, as
202 // otherwise with nested LetRec blocks the `loop { ... }`
203 // above will carry inner LetRec IDs across outer LetRec
204 // iterations. As a consequence, the "no shadowing"
205 // assertion at the beginning of this block will fail at the
206 // inner LetRec for the second outer LetRec iteration.
207 for id in ids.iter() {
208 let id = mz_expr::Id::Local(id.clone());
209 knowledge.remove(&id);
210 }
211
212 Ok(body_knowledge)
213 }
214 MirRelationExpr::Project { input, outputs } => {
215 let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
216 Ok(outputs
217 .iter()
218 .map(|i| input_knowledge[*i].clone())
219 .collect())
220 }
221 MirRelationExpr::Map { input, scalars } => {
222 let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
223 let mut column_types = input.typ().column_types;
224 for scalar in scalars.iter_mut() {
225 input_knowledge.push(optimize(
226 scalar,
227 &column_types,
228 &input_knowledge[..],
229 knowledge_stack,
230 )?);
231 column_types.push(scalar.typ(&column_types));
232 }
233 Ok(input_knowledge)
234 }
235 MirRelationExpr::FlatMap { input, func, exprs } => {
236 let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
237 let input_typ = input.typ();
238 for expr in exprs {
239 optimize(
240 expr,
241 &input_typ.column_types,
242 &input_knowledge[..],
243 knowledge_stack,
244 )?;
245 }
246 let func_typ = func.output_type();
247 input_knowledge.extend(func_typ.column_types.iter().map(DatumKnowledge::from));
248 Ok(input_knowledge)
249 }
250 MirRelationExpr::Filter { input, predicates } => {
251 let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
252 let input_typ = input.typ();
253 for predicate in predicates.iter_mut() {
254 optimize(
255 predicate,
256 &input_typ.column_types,
257 &input_knowledge[..],
258 knowledge_stack,
259 )?;
260 }
261 // If any predicate tests a column for equality, truth, or is_null, we learn stuff.
262 for predicate in predicates.iter() {
263 // Equality tests allow us to unify the column knowledge of each input.
264 if let MirScalarExpr::CallBinary {
265 func: mz_expr::BinaryFunc::Eq(_),
266 expr1,
267 expr2,
268 } = predicate
269 {
270 // Collect knowledge about the inputs (for columns and literals).
271 let mut knowledge = DatumKnowledge::top();
272 if let MirScalarExpr::Column(c, _) = &**expr1 {
273 knowledge.meet_assign(&input_knowledge[*c]);
274 }
275 if let MirScalarExpr::Column(c, _) = &**expr2 {
276 knowledge.meet_assign(&input_knowledge[*c]);
277 }
278
279 // Absorb literal knowledge about columns.
280 if let MirScalarExpr::Literal(..) = &**expr1 {
281 knowledge.meet_assign(&DatumKnowledge::from(&**expr1));
282 }
283 if let MirScalarExpr::Literal(..) = &**expr2 {
284 knowledge.meet_assign(&DatumKnowledge::from(&**expr2));
285 }
286
287 // Write back unified knowledge to each column.
288 if let MirScalarExpr::Column(c, _) = &**expr1 {
289 input_knowledge[*c].meet_assign(&knowledge);
290 }
291 if let MirScalarExpr::Column(c, _) = &**expr2 {
292 input_knowledge[*c].meet_assign(&knowledge);
293 }
294 }
295 if let MirScalarExpr::CallUnary {
296 func: UnaryFunc::Not(func::Not),
297 expr,
298 } = predicate
299 {
300 if let MirScalarExpr::CallUnary {
301 func: UnaryFunc::IsNull(func::IsNull),
302 expr,
303 } = &**expr
304 {
305 if let MirScalarExpr::Column(c, _) = &**expr {
306 input_knowledge[*c].meet_assign(&DatumKnowledge::any(false));
307 }
308 }
309 }
310 }
311
312 Ok(input_knowledge)
313 }
314 MirRelationExpr::Join {
315 inputs,
316 equivalences,
317 implementation,
318 ..
319 } => {
320 // Aggregate column knowledge from each input into one `Vec`.
321 let mut knowledges = Vec::new();
322 for input in inputs.iter_mut() {
323 for mut knowledge in self.harvest(input, knowledge, knowledge_stack)? {
324 // Do not propagate error literals beyond join inputs, since that may result
325 // in them being propagated to other inputs of the join and evaluated when
326 // they should not.
327 if let DatumKnowledge::Lit { value: Err(_), .. } = knowledge {
328 knowledge.join_assign(&DatumKnowledge::any(false));
329 }
330 knowledges.push(knowledge);
331 }
332 }
333
334 // This only aggregates the column types of each input, not the
335 // keys of the inputs. It is unnecessary to aggregate the keys
336 // of the inputs since input keys are unnecessary for reducing
337 // `MirScalarExpr`s.
338 let folded_inputs_typ =
339 inputs
340 .iter()
341 .fold(SqlRelationType::empty(), |mut typ, input| {
342 typ.column_types.append(&mut input.typ().column_types);
343 typ
344 });
345
346 for equivalence in equivalences.iter_mut() {
347 let mut knowledge = DatumKnowledge::top();
348
349 // We can produce composite knowledge for everything in the equivalence class.
350 for expr in equivalence.iter_mut() {
351 if !matches!(implementation, IndexedFilter(..)) {
352 optimize(
353 expr,
354 &folded_inputs_typ.column_types,
355 &knowledges,
356 knowledge_stack,
357 )?;
358 }
359 if let MirScalarExpr::Column(c, _) = expr {
360 knowledge.meet_assign(&knowledges[*c]);
361 }
362 if let MirScalarExpr::Literal(..) = expr {
363 knowledge.meet_assign(&DatumKnowledge::from(&*expr));
364 }
365 }
366 for expr in equivalence.iter_mut() {
367 if let MirScalarExpr::Column(c, _) = expr {
368 knowledges[*c] = knowledge.clone();
369 }
370 }
371 }
372
373 Ok(knowledges)
374 }
375 MirRelationExpr::Reduce {
376 input,
377 group_key,
378 aggregates,
379 monotonic: _,
380 expected_group_size: _,
381 } => {
382 let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
383 let input_typ = input.typ();
384 let mut output = group_key
385 .iter_mut()
386 .map(|k| {
387 optimize(
388 k,
389 &input_typ.column_types,
390 &input_knowledge[..],
391 knowledge_stack,
392 )
393 })
394 .collect::<Result<Vec<_>, _>>()?;
395 for aggregate in aggregates.iter_mut() {
396 use mz_expr::AggregateFunc;
397 let knowledge = optimize(
398 &mut aggregate.expr,
399 &input_typ.column_types,
400 &input_knowledge[..],
401 knowledge_stack,
402 )?;
403 // This could be improved.
404 let knowledge = match aggregate.func {
405 AggregateFunc::MaxInt16
406 | AggregateFunc::MaxInt32
407 | AggregateFunc::MaxInt64
408 | AggregateFunc::MaxUInt16
409 | AggregateFunc::MaxUInt32
410 | AggregateFunc::MaxUInt64
411 | AggregateFunc::MaxMzTimestamp
412 | AggregateFunc::MaxFloat32
413 | AggregateFunc::MaxFloat64
414 | AggregateFunc::MaxBool
415 | AggregateFunc::MaxString
416 | AggregateFunc::MaxDate
417 | AggregateFunc::MaxTimestamp
418 | AggregateFunc::MaxTimestampTz
419 | AggregateFunc::MinInt16
420 | AggregateFunc::MinInt32
421 | AggregateFunc::MinInt64
422 | AggregateFunc::MinUInt16
423 | AggregateFunc::MinUInt32
424 | AggregateFunc::MinUInt64
425 | AggregateFunc::MinMzTimestamp
426 | AggregateFunc::MinFloat32
427 | AggregateFunc::MinFloat64
428 | AggregateFunc::MinBool
429 | AggregateFunc::MinString
430 | AggregateFunc::MinDate
431 | AggregateFunc::MinTimestamp
432 | AggregateFunc::MinTimestampTz
433 | AggregateFunc::Any
434 | AggregateFunc::All => {
435 // These methods propagate constant values exactly.
436 knowledge
437 }
438 AggregateFunc::Count => DatumKnowledge::any(false),
439 _ => {
440 // The remaining aggregates are non-null if
441 // their inputs are non-null. This is correct
442 // because in Mir~ we reduce an empty collection
443 // to an empty collection (in Hir~ the result
444 // often is singleton null collection).
445 DatumKnowledge::any(knowledge.nullable())
446 }
447 };
448 output.push(knowledge);
449 }
450 Ok(output)
451 }
452 MirRelationExpr::TopK { input, limit, .. } => {
453 let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
454 if let Some(limit) = limit.as_mut() {
455 optimize(
456 limit,
457 &input.typ().column_types,
458 &input_knowledge[..],
459 knowledge_stack,
460 )?;
461 }
462 Ok(input_knowledge)
463 }
464 MirRelationExpr::Negate { input } => {
465 self.harvest(input, knowledge, knowledge_stack)
466 }
467 MirRelationExpr::Threshold { input } => {
468 self.harvest(input, knowledge, knowledge_stack)
469 }
470 MirRelationExpr::Union { base, inputs } => {
471 let mut know = self.harvest(base, knowledge, knowledge_stack)?;
472 for input in inputs {
473 know = know
474 .into_iter()
475 .zip_eq(self.harvest(input, knowledge, knowledge_stack)?)
476 .map(|(mut k1, k2)| {
477 k1.join_assign(&k2);
478 k1
479 })
480 .collect();
481 }
482 Ok(know)
483 }
484 }?;
485
486 // println!("# Plan");
487 // println!("{}", expr.pretty());
488 // println!("# Knowledge");
489 // print_knowledge_vec(&result);
490 // println!("---");
491
492 Ok(result)
493 })
494 }
495}
496
497/// Information about a specific column.
498///
499/// The values should form a [complete lattice].
500///
501/// [complete lattice]: https://en.wikipedia.org/wiki/Complete_lattice
502#[derive(Clone, Debug, PartialEq, Eq)]
503enum DatumKnowledge {
504 // Any possible value, optionally known to be NOT NULL.
505 Any {
506 nullable: bool,
507 },
508 // A known literal value of a specific type.
509 Lit {
510 value: Result<mz_repr::Row, EvalError>,
511 typ: SqlScalarType,
512 },
513 // A value that cannot exist.
514 Nothing,
515}
516
517impl From<&MirScalarExpr> for DatumKnowledge {
518 fn from(expr: &MirScalarExpr) -> Self {
519 if let MirScalarExpr::Literal(l, t) = expr {
520 let value = l.clone();
521 let typ = t.scalar_type.clone();
522 Self::Lit { value, typ }
523 } else {
524 Self::top()
525 }
526 }
527}
528
529impl From<(Datum<'_>, &SqlColumnType)> for DatumKnowledge {
530 fn from((d, t): (Datum<'_>, &SqlColumnType)) -> Self {
531 let value = Ok(Row::pack_slice(std::slice::from_ref(&d)));
532 let typ = t.scalar_type.clone();
533 Self::Lit { value, typ }
534 }
535}
536
537impl From<&SqlColumnType> for DatumKnowledge {
538 fn from(typ: &SqlColumnType) -> Self {
539 let nullable = typ.nullable;
540 Self::Any { nullable }
541 }
542}
543
544impl DatumKnowledge {
545 /// The most general possible knowledge (the top of the complete lattice).
546 fn top() -> Self {
547 Self::Any { nullable: true }
548 }
549
550 /// The strictest possible knowledge (the bottom of the complete lattice).
551 #[allow(dead_code)]
552 fn bottom() -> Self {
553 Self::Nothing
554 }
555
556 /// Create a [`DatumKnowledge::Any`] instance with the given nullable flag.
557 fn any(nullable: bool) -> Self {
558 DatumKnowledge::Any { nullable }
559 }
560
561 /// Unions (weakens) the possible states of a column.
562 fn join_assign(&mut self, other: &Self) {
563 use DatumKnowledge::*;
564
565 // Each of the following `if` statements handles the cases marked with
566 // `x` of the (self x other) cross product (depicted as a rows x cols
567 // table where Self::bottom() is the UL and Self::top() the LR corner).
568 // Cases that are already handled are marked with `+` and cases that are
569 // yet to be handled marked with `-`.
570 //
571 // The order of handling (crossing out) of cases ensures that
572 // `*.clone()` is not called unless necessary.
573 //
574 // The final case after the ifs can assert that both sides are Lit
575 // variants.
576
577 // x - - - : Nothing
578 // x - - - : Lit { _, _ }
579 // x - - - : Any { false }
580 // x x x x : Any { true }
581 if crate::any![
582 matches!(self, Any { nullable: true }),
583 matches!(other, Nothing),
584 ] {
585 // Nothing to do.
586 }
587 // + x x x : Nothing
588 // + - - x : Lit { _, _ }
589 // + - - x : Any { false }
590 // + + + + : Any { true }
591 else if crate::any![
592 matches!(self, Nothing),
593 matches!(other, Any { nullable: true }),
594 ] {
595 *self = other.clone();
596 }
597 // + + + + : Nothing
598 // + - - + : Lit { _, _ }
599 // + x x + : Any { false }
600 // + + + + : Any { true }
601 else if matches!(self, Any { nullable: false }) {
602 if !other.nullable() {
603 // Nothing to do.
604 } else {
605 *self = Self::top() // other: Lit { null, _ }
606 }
607 // Nothing to do.
608 }
609 // + + + + : Nothing
610 // + - x + : Lit { _, _ }
611 // + + + + : Any { false }
612 // + + + + : Any { true }
613 else if matches!(other, Any { nullable: false }) {
614 if !self.nullable() {
615 *self = other.clone();
616 } else {
617 *self = Self::top() // other: Lit { null, _ }
618 }
619 }
620 // + + + + : Nothing
621 // + x + + : Lit { _, _ }
622 // + + + + : Any { false }
623 // + + + + : Any { true }
624 else {
625 let Lit {
626 value: s_val,
627 typ: s_typ,
628 } = self
629 else {
630 unreachable!();
631 };
632 let Lit {
633 value: o_val,
634 typ: o_typ,
635 } = other
636 else {
637 unreachable!();
638 };
639
640 if !s_typ.base_eq(o_typ) {
641 ::tracing::error!("Undefined join of non-equal base types {s_typ:?} != {o_typ:?}");
642 *self = Self::top();
643 } else if s_val != o_val {
644 let nullable = self.nullable() || other.nullable();
645 *self = Any { nullable }
646 } else if s_typ != o_typ {
647 // Same value but different concrete types - strip all modifiers!
648 // This is identical to what SqlColumnType::union is doing.
649 *s_typ = s_typ.without_modifiers();
650 } else {
651 // Value and type coincide - do nothing!
652 }
653 }
654 }
655
656 /// Intersects (strengthens) the possible states of a column.
657 fn meet_assign(&mut self, other: &Self) {
658 use DatumKnowledge::*;
659
660 // Each of the following `if` statements handles the cases marked with
661 // `x` of the (self x other) cross product (depicted as a rows x cols
662 // table where Self::bottom() is the UL and Self::top() the LR corner).
663 // Cases that are already handled are marked with `+` and cases that are
664 // yet to be handled marked with `-`.
665 //
666 // The order of handling (crossing out) of cases ensures that
667 // `*.clone()` is not called unless necessary.
668 //
669 // The final case after the ifs can assert that both sides are Lit
670 // variants.
671
672 // x x x x : Nothing
673 // - - - x : Lit { _, _ }
674 // - - - x : Any { false }
675 // - - - x : Any { true }
676 if crate::any![
677 matches!(self, Nothing),
678 matches!(other, Any { nullable: true }),
679 ] {
680 // Nothing to do.
681 }
682 // + + + + : Nothing
683 // x - - + : Lit { _, _ }
684 // x - - + : Any { false }
685 // x x x + : Any { true }
686 else if crate::any![
687 matches!(self, Any { nullable: true }),
688 matches!(other, Nothing),
689 ] {
690 *self = other.clone();
691 }
692 // + + + + : Nothing
693 // + - - + : Lit { _, _ }
694 // + x x + : Any { false }
695 // + + + + : Any { true }
696 else if matches!(self, Any { nullable: false }) {
697 match other {
698 Any { .. } => {
699 // Nothing to do.
700 }
701 Lit { .. } => {
702 if other.nullable() {
703 *self = Nothing; // other: Lit { null, _ }
704 } else {
705 *self = other.clone();
706 }
707 }
708 Nothing => unreachable!(),
709 }
710 }
711 // + + + + : Nothing
712 // + - x + : Lit { _, _ }
713 // + + + + : Any { false }
714 // + + + + : Any { true }
715 else if matches!(other, Any { nullable: false }) {
716 if self.nullable() {
717 *self = Nothing // self: Lit { null, _ }
718 }
719 }
720 // + + + + : Nothing
721 // + x + + : Lit { _, _ }
722 // + + + + : Any { false }
723 // + + + + : Any { true }
724 else {
725 let Lit {
726 value: s_val,
727 typ: s_typ,
728 } = self
729 else {
730 unreachable!();
731 };
732 let Lit {
733 value: o_val,
734 typ: o_typ,
735 } = other
736 else {
737 unreachable!();
738 };
739
740 if !s_typ.base_eq(o_typ) {
741 soft_panic_or_log!("Undefined meet of non-equal base types {s_typ:?} != {o_typ:?}");
742 *self = Self::top(); // this really should be Nothing
743 } else if s_val != o_val {
744 *self = Nothing;
745 } else if s_typ != o_typ {
746 // Same value but different concrete types - strip all
747 // modifiers! We should probably pick the more specific of the
748 // two types if they are ordered or return Nothing otherwise.
749 *s_typ = s_typ.without_modifiers();
750 } else {
751 // Value and type coincide - do nothing!
752 }
753 }
754 }
755
756 fn nullable(&self) -> bool {
757 match self {
758 DatumKnowledge::Any { nullable } => *nullable,
759 DatumKnowledge::Lit { value, .. } => match value {
760 Ok(value) => value.iter().next().unwrap().is_null(),
761 Err(_) => false,
762 },
763 DatumKnowledge::Nothing => false,
764 }
765 }
766}
767
768/// Attempts to optimize
769///
770/// `knowledge_stack` is a pre-allocated vector but is expected not to contain any elements.
771fn optimize(
772 expr: &mut MirScalarExpr,
773 column_types: &[SqlColumnType],
774 column_knowledge: &[DatumKnowledge],
775 knowledge_stack: &mut Vec<DatumKnowledge>,
776) -> Result<DatumKnowledge, TransformError> {
777 // Storage for `DatumKnowledge` being propagated up through the
778 // `MirScalarExpr`. When a node is visited, pop off as many `DatumKnowledge`
779 // as the number of children the node has, and then push the
780 // `DatumKnowledge` corresponding to the node back onto the stack.
781 // Post-order traversal means that if a node has `n` children, the top `n`
782 // `DatumKnowledge` in the stack are the `DatumKnowledge` corresponding to
783 // the children.
784 assert!(knowledge_stack.is_empty());
785 #[allow(deprecated)]
786 expr.visit_mut_pre_post(
787 &mut |e| {
788 if let MirScalarExpr::If { then, els, .. } = e {
789 Some(vec![then, els])
790 } else {
791 None
792 }
793 },
794 &mut |e| {
795 let result = match e {
796 MirScalarExpr::Column(index, _) => {
797 let index = *index;
798 if let DatumKnowledge::Lit { value, typ } = &column_knowledge[index] {
799 let nullable = column_knowledge[index].nullable();
800 *e = MirScalarExpr::Literal(value.clone(), typ.clone().nullable(nullable));
801 }
802 column_knowledge[index].clone()
803 }
804 MirScalarExpr::Literal(_, _) | MirScalarExpr::CallUnmaterializable(_) => {
805 DatumKnowledge::from(&*e)
806 }
807 MirScalarExpr::CallUnary { func, expr: _ } => {
808 let knowledge = knowledge_stack.pop().unwrap();
809 if matches!(&knowledge, DatumKnowledge::Lit { .. }) {
810 e.reduce(column_types);
811 } else if func == &UnaryFunc::IsNull(func::IsNull) && !knowledge.nullable() {
812 *e = MirScalarExpr::literal_false();
813 };
814 DatumKnowledge::from(&*e)
815 }
816 MirScalarExpr::CallBinary {
817 func: _,
818 expr1: _,
819 expr2: _,
820 } => {
821 let knowledge2 = knowledge_stack.pop().unwrap();
822 let knowledge1 = knowledge_stack.pop().unwrap();
823 if crate::any![
824 matches!(knowledge1, DatumKnowledge::Lit { .. }),
825 matches!(knowledge2, DatumKnowledge::Lit { .. }),
826 ] {
827 e.reduce(column_types);
828 }
829 DatumKnowledge::from(&*e)
830 }
831 MirScalarExpr::CallVariadic { func: _, exprs } => {
832 // Drain the last `exprs.len()` knowledge, and reduce if any is `Lit`.
833 assert!(knowledge_stack.len() >= exprs.len());
834 if knowledge_stack
835 .drain(knowledge_stack.len() - exprs.len()..)
836 .any(|k| matches!(k, DatumKnowledge::Lit { .. }))
837 {
838 e.reduce(column_types);
839 }
840 DatumKnowledge::from(&*e)
841 }
842 MirScalarExpr::If {
843 cond: _,
844 then: _,
845 els: _,
846 } => {
847 // `cond` has been left un-optimized, as we should not remove the conditional
848 // nature of the evaluation based on column knowledge: the resulting
849 // expression could then move down past a filter or join that provided
850 // the guarantees, and would become wrong.
851 //
852 // Instead, each of the branches have been optimized, and we
853 // can union the states of their columns.
854 let know2 = knowledge_stack.pop().unwrap();
855 let mut know1 = knowledge_stack.pop().unwrap();
856 know1.join_assign(&know2);
857 know1
858 }
859 };
860 knowledge_stack.push(result);
861 },
862 )?;
863 let knowledge_datum = knowledge_stack.pop();
864 assert!(knowledge_stack.is_empty());
865 knowledge_datum.ok_or_else(|| {
866 TransformError::Internal(String::from("unexpectedly empty stack in optimize"))
867 })
868}
869
870#[allow(dead_code)] // keep debugging method around
871fn print_knowledge_map<'a>(
872 knowledge: &BTreeMap<mz_expr::Id, Vec<DatumKnowledge>>,
873 ids: impl Iterator<Item = &'a mz_expr::LocalId>,
874) {
875 for id in ids {
876 let id = mz_expr::Id::Local(id.clone());
877 for (i, k) in knowledge.get(&id).unwrap().iter().enumerate() {
878 println!("{id}.#{i}: {k:?}");
879 }
880 }
881 println!("");
882}
883
884#[allow(dead_code)] // keep debugging method around
885fn print_knowledge_vec(knowledge: &Vec<DatumKnowledge>) {
886 for (i, k) in knowledge.iter().enumerate() {
887 println!("#{i}: {k:?}");
888 }
889 println!("");
890}