mz_transform/
column_knowledge.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Transformations based on pulling information about individual columns from sources.
11
12use std::collections::BTreeMap;
13
14use itertools::{Itertools, zip_eq};
15use mz_expr::JoinImplementation::IndexedFilter;
16use mz_expr::visit::Visit;
17use mz_expr::{
18    EvalError, LetRecLimit, MirRelationExpr, MirScalarExpr, RECURSION_LIMIT, UnaryFunc, func,
19};
20use mz_ore::cast::CastFrom;
21use mz_ore::stack::{CheckedRecursion, RecursionGuard};
22use mz_ore::{assert_none, soft_panic_or_log};
23use mz_repr::{Datum, Row, SqlColumnType, SqlRelationType, SqlScalarType};
24
25use crate::{TransformCtx, TransformError};
26
27/// Harvest and act upon per-column information.
28#[derive(Debug)]
29pub struct ColumnKnowledge {
30    recursion_guard: RecursionGuard,
31}
32
33impl Default for ColumnKnowledge {
34    fn default() -> ColumnKnowledge {
35        ColumnKnowledge {
36            recursion_guard: RecursionGuard::with_limit(RECURSION_LIMIT),
37        }
38    }
39}
40
41impl CheckedRecursion for ColumnKnowledge {
42    fn recursion_guard(&self) -> &RecursionGuard {
43        &self.recursion_guard
44    }
45}
46
47impl crate::Transform for ColumnKnowledge {
48    fn name(&self) -> &'static str {
49        "ColumnKnowledge"
50    }
51
52    /// Transforms an expression through accumulated knowledge.
53    #[mz_ore::instrument(
54        target = "optimizer",
55        level = "debug",
56        fields(path.segment = "column_knowledge")
57    )]
58    fn actually_perform_transform(
59        &self,
60        expr: &mut MirRelationExpr,
61        _: &mut TransformCtx,
62    ) -> Result<(), TransformError> {
63        let mut knowledge_stack = Vec::<DatumKnowledge>::new();
64        let result = self
65            .harvest(expr, &mut BTreeMap::new(), &mut knowledge_stack)
66            .map(|_| ());
67        mz_repr::explain::trace_plan(&*expr);
68        result
69    }
70}
71
72impl ColumnKnowledge {
73    /// Harvest per-column knowledge.
74    ///
75    /// `knowledge_stack` is a pre-allocated vector but is expected not to contain any elements.
76    fn harvest(
77        &self,
78        expr: &mut MirRelationExpr,
79        knowledge: &mut BTreeMap<mz_expr::Id, Vec<DatumKnowledge>>,
80        knowledge_stack: &mut Vec<DatumKnowledge>,
81    ) -> Result<Vec<DatumKnowledge>, TransformError> {
82        self.checked_recur(|_| {
83            let result = match expr {
84                MirRelationExpr::ArrangeBy { input, .. } => {
85                    self.harvest(input, knowledge, knowledge_stack)
86                }
87                MirRelationExpr::Get { id, typ, .. } => {
88                    Ok(knowledge.get(id).cloned().unwrap_or_else(|| {
89                        typ.column_types.iter().map(DatumKnowledge::from).collect()
90                    }))
91                }
92                MirRelationExpr::Constant { rows, typ } => {
93                    // TODO: handle multi-row cases with some constant columns.
94                    if let Ok([(row, _diff)]) = rows.as_deref() {
95                        let knowledge = std::iter::zip(row.iter(), typ.column_types.iter())
96                            .map(DatumKnowledge::from)
97                            .collect();
98                        Ok(knowledge)
99                    } else {
100                        Ok(typ.column_types.iter().map(DatumKnowledge::from).collect())
101                    }
102                }
103                MirRelationExpr::Let { id, value, body } => {
104                    let value_knowledge = self.harvest(value, knowledge, knowledge_stack)?;
105                    let prior_knowledge =
106                        knowledge.insert(mz_expr::Id::Local(id.clone()), value_knowledge);
107                    let body_knowledge = self.harvest(body, knowledge, knowledge_stack)?;
108                    knowledge.remove(&mz_expr::Id::Local(id.clone()));
109                    if let Some(prior_knowledge) = prior_knowledge {
110                        knowledge.insert(mz_expr::Id::Local(id.clone()), prior_knowledge);
111                    }
112                    Ok(body_knowledge)
113                }
114                MirRelationExpr::LetRec {
115                    ids,
116                    values,
117                    limits,
118                    body,
119                } => {
120                    // Set knowledge[i][j] = DatumKnowledge::bottom() for each
121                    // column j and CTE i. This corresponds to the normal
122                    // evaluation semantics where each recursive CTE is
123                    // initialized to the empty collection.
124                    for (id, value) in zip_eq(ids.iter(), values.iter()) {
125                        let id = mz_expr::Id::Local(id.clone());
126                        let knowledge_new = vec![DatumKnowledge::bottom(); value.arity()];
127                        let knowledge_old = knowledge.insert(id, knowledge_new);
128                        assert_none!(knowledge_old);
129                    }
130
131                    // Sum up the arity of all ids in the enclosing LetRec node.
132                    let let_rec_arity = ids.iter().fold(0, |acc, id| {
133                        let id = mz_expr::Id::Local(id.clone());
134                        acc + u64::cast_from(knowledge[&id].len())
135                    });
136
137                    // Sequentially union knowledge[i][j] with the result of
138                    // descending into a clone of values[i]. Repeat until one of
139                    // the following conditions is met:
140                    //
141                    // 1. The knowledge bindings have stabilized at a fixpoint.
142                    // 2. No fixpoint was found after `max_iterations`. If this
143                    //    is the case reset the knowledge vectors for all
144                    //    recursive CTEs to DatumKnowledge::top().
145                    // 3. We reach the user-specified recursion limit of any of the bindings.
146                    //    In this case, we also give up similarly to 2., because we don't want to
147                    //    complicate things with handling different limits per binding.
148                    let min_max_iter = LetRecLimit::min_max_iter(limits);
149                    let max_iterations = 100;
150                    let mut curr_iteration = 0;
151                    loop {
152                        // Check for conditions (2) and (3).
153                        if curr_iteration >= max_iterations
154                            || min_max_iter
155                                .map(|min_max_iter| curr_iteration >= min_max_iter)
156                                .unwrap_or(false)
157                        {
158                            if curr_iteration > 3 * let_rec_arity {
159                                soft_panic_or_log!(
160                                    "LetRec loop in ColumnKnowledge has not converged in 3 * |{}|",
161                                    let_rec_arity
162                                );
163                            }
164
165                            for (id, value) in zip_eq(ids.iter(), values.iter()) {
166                                let id = mz_expr::Id::Local(id.clone());
167                                let knowledge_new = vec![DatumKnowledge::top(); value.arity()];
168                                knowledge.insert(id, knowledge_new);
169                            }
170                            break;
171                        }
172
173                        // Check for condition (1).
174                        let mut change = false;
175                        for (id, mut value) in zip_eq(ids.iter(), values.iter().cloned()) {
176                            let id = mz_expr::Id::Local(id.clone());
177                            let value = &mut value;
178                            let next_knowledge = self.harvest(value, knowledge, knowledge_stack)?;
179                            let curr_knowledge = knowledge.get_mut(&id).unwrap();
180                            for (curr, next) in zip_eq(curr_knowledge.iter_mut(), next_knowledge) {
181                                let prev = curr.clone();
182                                curr.join_assign(&next);
183                                change |= prev != *curr;
184                            }
185                        }
186                        if !change {
187                            break;
188                        }
189
190                        curr_iteration += 1;
191                    }
192
193                    // Descend into the values with the inferred knowledge.
194                    for value in values.iter_mut() {
195                        self.harvest(value, knowledge, knowledge_stack)?;
196                    }
197
198                    // Descend and return the knowledge from the body.
199                    let body_knowledge = self.harvest(body, knowledge, knowledge_stack)?;
200
201                    // Remove shadowed bindings. This is good hygiene, as
202                    // otherwise with nested LetRec blocks the `loop { ... }`
203                    // above will carry inner LetRec IDs across outer LetRec
204                    // iterations. As a consequence, the "no shadowing"
205                    // assertion at the beginning of this block will fail at the
206                    // inner LetRec for the second outer LetRec iteration.
207                    for id in ids.iter() {
208                        let id = mz_expr::Id::Local(id.clone());
209                        knowledge.remove(&id);
210                    }
211
212                    Ok(body_knowledge)
213                }
214                MirRelationExpr::Project { input, outputs } => {
215                    let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
216                    Ok(outputs
217                        .iter()
218                        .map(|i| input_knowledge[*i].clone())
219                        .collect())
220                }
221                MirRelationExpr::Map { input, scalars } => {
222                    let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
223                    let mut column_types = input.typ().column_types;
224                    for scalar in scalars.iter_mut() {
225                        input_knowledge.push(optimize(
226                            scalar,
227                            &column_types,
228                            &input_knowledge[..],
229                            knowledge_stack,
230                        )?);
231                        column_types.push(scalar.typ(&column_types));
232                    }
233                    Ok(input_knowledge)
234                }
235                MirRelationExpr::FlatMap { input, func, exprs } => {
236                    let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
237                    let input_typ = input.typ();
238                    for expr in exprs {
239                        optimize(
240                            expr,
241                            &input_typ.column_types,
242                            &input_knowledge[..],
243                            knowledge_stack,
244                        )?;
245                    }
246                    let func_typ = func.output_type();
247                    input_knowledge.extend(func_typ.column_types.iter().map(DatumKnowledge::from));
248                    Ok(input_knowledge)
249                }
250                MirRelationExpr::Filter { input, predicates } => {
251                    let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
252                    let input_typ = input.typ();
253                    for predicate in predicates.iter_mut() {
254                        optimize(
255                            predicate,
256                            &input_typ.column_types,
257                            &input_knowledge[..],
258                            knowledge_stack,
259                        )?;
260                    }
261                    // If any predicate tests a column for equality, truth, or is_null, we learn stuff.
262                    for predicate in predicates.iter() {
263                        // Equality tests allow us to unify the column knowledge of each input.
264                        if let MirScalarExpr::CallBinary {
265                            func: mz_expr::BinaryFunc::Eq(_),
266                            expr1,
267                            expr2,
268                        } = predicate
269                        {
270                            // Collect knowledge about the inputs (for columns and literals).
271                            let mut knowledge = DatumKnowledge::top();
272                            if let MirScalarExpr::Column(c, _) = &**expr1 {
273                                knowledge.meet_assign(&input_knowledge[*c]);
274                            }
275                            if let MirScalarExpr::Column(c, _) = &**expr2 {
276                                knowledge.meet_assign(&input_knowledge[*c]);
277                            }
278
279                            // Absorb literal knowledge about columns.
280                            if let MirScalarExpr::Literal(..) = &**expr1 {
281                                knowledge.meet_assign(&DatumKnowledge::from(&**expr1));
282                            }
283                            if let MirScalarExpr::Literal(..) = &**expr2 {
284                                knowledge.meet_assign(&DatumKnowledge::from(&**expr2));
285                            }
286
287                            // Write back unified knowledge to each column.
288                            if let MirScalarExpr::Column(c, _) = &**expr1 {
289                                input_knowledge[*c].meet_assign(&knowledge);
290                            }
291                            if let MirScalarExpr::Column(c, _) = &**expr2 {
292                                input_knowledge[*c].meet_assign(&knowledge);
293                            }
294                        }
295                        if let MirScalarExpr::CallUnary {
296                            func: UnaryFunc::Not(func::Not),
297                            expr,
298                        } = predicate
299                        {
300                            if let MirScalarExpr::CallUnary {
301                                func: UnaryFunc::IsNull(func::IsNull),
302                                expr,
303                            } = &**expr
304                            {
305                                if let MirScalarExpr::Column(c, _) = &**expr {
306                                    input_knowledge[*c].meet_assign(&DatumKnowledge::any(false));
307                                }
308                            }
309                        }
310                    }
311
312                    Ok(input_knowledge)
313                }
314                MirRelationExpr::Join {
315                    inputs,
316                    equivalences,
317                    implementation,
318                    ..
319                } => {
320                    // Aggregate column knowledge from each input into one `Vec`.
321                    let mut knowledges = Vec::new();
322                    for input in inputs.iter_mut() {
323                        for mut knowledge in self.harvest(input, knowledge, knowledge_stack)? {
324                            // Do not propagate error literals beyond join inputs, since that may result
325                            // in them being propagated to other inputs of the join and evaluated when
326                            // they should not.
327                            if let DatumKnowledge::Lit { value: Err(_), .. } = knowledge {
328                                knowledge.join_assign(&DatumKnowledge::any(false));
329                            }
330                            knowledges.push(knowledge);
331                        }
332                    }
333
334                    // This only aggregates the column types of each input, not the
335                    // keys of the inputs. It is unnecessary to aggregate the keys
336                    // of the inputs since input keys are unnecessary for reducing
337                    // `MirScalarExpr`s.
338                    let folded_inputs_typ =
339                        inputs
340                            .iter()
341                            .fold(SqlRelationType::empty(), |mut typ, input| {
342                                typ.column_types.append(&mut input.typ().column_types);
343                                typ
344                            });
345
346                    for equivalence in equivalences.iter_mut() {
347                        let mut knowledge = DatumKnowledge::top();
348
349                        // We can produce composite knowledge for everything in the equivalence class.
350                        for expr in equivalence.iter_mut() {
351                            if !matches!(implementation, IndexedFilter(..)) {
352                                optimize(
353                                    expr,
354                                    &folded_inputs_typ.column_types,
355                                    &knowledges,
356                                    knowledge_stack,
357                                )?;
358                            }
359                            if let MirScalarExpr::Column(c, _) = expr {
360                                knowledge.meet_assign(&knowledges[*c]);
361                            }
362                            if let MirScalarExpr::Literal(..) = expr {
363                                knowledge.meet_assign(&DatumKnowledge::from(&*expr));
364                            }
365                        }
366                        for expr in equivalence.iter_mut() {
367                            if let MirScalarExpr::Column(c, _) = expr {
368                                knowledges[*c] = knowledge.clone();
369                            }
370                        }
371                    }
372
373                    Ok(knowledges)
374                }
375                MirRelationExpr::Reduce {
376                    input,
377                    group_key,
378                    aggregates,
379                    monotonic: _,
380                    expected_group_size: _,
381                } => {
382                    let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
383                    let input_typ = input.typ();
384                    let mut output = group_key
385                        .iter_mut()
386                        .map(|k| {
387                            optimize(
388                                k,
389                                &input_typ.column_types,
390                                &input_knowledge[..],
391                                knowledge_stack,
392                            )
393                        })
394                        .collect::<Result<Vec<_>, _>>()?;
395                    for aggregate in aggregates.iter_mut() {
396                        use mz_expr::AggregateFunc;
397                        let knowledge = optimize(
398                            &mut aggregate.expr,
399                            &input_typ.column_types,
400                            &input_knowledge[..],
401                            knowledge_stack,
402                        )?;
403                        // This could be improved.
404                        let knowledge = match aggregate.func {
405                            AggregateFunc::MaxInt16
406                            | AggregateFunc::MaxInt32
407                            | AggregateFunc::MaxInt64
408                            | AggregateFunc::MaxUInt16
409                            | AggregateFunc::MaxUInt32
410                            | AggregateFunc::MaxUInt64
411                            | AggregateFunc::MaxMzTimestamp
412                            | AggregateFunc::MaxFloat32
413                            | AggregateFunc::MaxFloat64
414                            | AggregateFunc::MaxBool
415                            | AggregateFunc::MaxString
416                            | AggregateFunc::MaxDate
417                            | AggregateFunc::MaxTimestamp
418                            | AggregateFunc::MaxTimestampTz
419                            | AggregateFunc::MinInt16
420                            | AggregateFunc::MinInt32
421                            | AggregateFunc::MinInt64
422                            | AggregateFunc::MinUInt16
423                            | AggregateFunc::MinUInt32
424                            | AggregateFunc::MinUInt64
425                            | AggregateFunc::MinMzTimestamp
426                            | AggregateFunc::MinFloat32
427                            | AggregateFunc::MinFloat64
428                            | AggregateFunc::MinBool
429                            | AggregateFunc::MinString
430                            | AggregateFunc::MinDate
431                            | AggregateFunc::MinTimestamp
432                            | AggregateFunc::MinTimestampTz
433                            | AggregateFunc::Any
434                            | AggregateFunc::All => {
435                                // These methods propagate constant values exactly.
436                                knowledge
437                            }
438                            AggregateFunc::Count => DatumKnowledge::any(false),
439                            _ => {
440                                // The remaining aggregates are non-null if
441                                // their inputs are non-null. This is correct
442                                // because in Mir~ we reduce an empty collection
443                                // to an empty collection (in Hir~ the result
444                                // often is singleton null collection).
445                                DatumKnowledge::any(knowledge.nullable())
446                            }
447                        };
448                        output.push(knowledge);
449                    }
450                    Ok(output)
451                }
452                MirRelationExpr::TopK { input, limit, .. } => {
453                    let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
454                    if let Some(limit) = limit.as_mut() {
455                        optimize(
456                            limit,
457                            &input.typ().column_types,
458                            &input_knowledge[..],
459                            knowledge_stack,
460                        )?;
461                    }
462                    Ok(input_knowledge)
463                }
464                MirRelationExpr::Negate { input } => {
465                    self.harvest(input, knowledge, knowledge_stack)
466                }
467                MirRelationExpr::Threshold { input } => {
468                    self.harvest(input, knowledge, knowledge_stack)
469                }
470                MirRelationExpr::Union { base, inputs } => {
471                    let mut know = self.harvest(base, knowledge, knowledge_stack)?;
472                    for input in inputs {
473                        know = know
474                            .into_iter()
475                            .zip_eq(self.harvest(input, knowledge, knowledge_stack)?)
476                            .map(|(mut k1, k2)| {
477                                k1.join_assign(&k2);
478                                k1
479                            })
480                            .collect();
481                    }
482                    Ok(know)
483                }
484            }?;
485
486            // println!("# Plan");
487            // println!("{}", expr.pretty());
488            // println!("# Knowledge");
489            // print_knowledge_vec(&result);
490            // println!("---");
491
492            Ok(result)
493        })
494    }
495}
496
497/// Information about a specific column.
498///
499/// The values should form a [complete lattice].
500///
501/// [complete lattice]: https://en.wikipedia.org/wiki/Complete_lattice
502#[derive(Clone, Debug, PartialEq, Eq)]
503enum DatumKnowledge {
504    // Any possible value, optionally known to be NOT NULL.
505    Any {
506        nullable: bool,
507    },
508    // A known literal value of a specific type.
509    Lit {
510        value: Result<mz_repr::Row, EvalError>,
511        typ: SqlScalarType,
512    },
513    // A value that cannot exist.
514    Nothing,
515}
516
517impl From<&MirScalarExpr> for DatumKnowledge {
518    fn from(expr: &MirScalarExpr) -> Self {
519        if let MirScalarExpr::Literal(l, t) = expr {
520            let value = l.clone();
521            let typ = t.scalar_type.clone();
522            Self::Lit { value, typ }
523        } else {
524            Self::top()
525        }
526    }
527}
528
529impl From<(Datum<'_>, &SqlColumnType)> for DatumKnowledge {
530    fn from((d, t): (Datum<'_>, &SqlColumnType)) -> Self {
531        let value = Ok(Row::pack_slice(std::slice::from_ref(&d)));
532        let typ = t.scalar_type.clone();
533        Self::Lit { value, typ }
534    }
535}
536
537impl From<&SqlColumnType> for DatumKnowledge {
538    fn from(typ: &SqlColumnType) -> Self {
539        let nullable = typ.nullable;
540        Self::Any { nullable }
541    }
542}
543
544impl DatumKnowledge {
545    /// The most general possible knowledge (the top of the complete lattice).
546    fn top() -> Self {
547        Self::Any { nullable: true }
548    }
549
550    /// The strictest possible knowledge (the bottom of the complete lattice).
551    #[allow(dead_code)]
552    fn bottom() -> Self {
553        Self::Nothing
554    }
555
556    /// Create a [`DatumKnowledge::Any`] instance with the given nullable flag.
557    fn any(nullable: bool) -> Self {
558        DatumKnowledge::Any { nullable }
559    }
560
561    /// Unions (weakens) the possible states of a column.
562    fn join_assign(&mut self, other: &Self) {
563        use DatumKnowledge::*;
564
565        // Each of the following `if` statements handles the cases marked with
566        // `x` of the (self x other) cross product (depicted as a rows x cols
567        // table where Self::bottom() is the UL and Self::top() the LR corner).
568        // Cases that are already handled are marked with `+` and cases that are
569        // yet to be handled marked with `-`.
570        //
571        // The order of handling (crossing out) of cases ensures that
572        // `*.clone()` is not called unless necessary.
573        //
574        // The final case after the ifs can assert that both sides are Lit
575        // variants.
576
577        // x - - - : Nothing
578        // x - - - : Lit { _, _ }
579        // x - - - : Any { false }
580        // x x x x : Any { true }
581        if crate::any![
582            matches!(self, Any { nullable: true }),
583            matches!(other, Nothing),
584        ] {
585            // Nothing to do.
586        }
587        // + x x x : Nothing
588        // + - - x : Lit { _, _ }
589        // + - - x : Any { false }
590        // + + + + : Any { true }
591        else if crate::any![
592            matches!(self, Nothing),
593            matches!(other, Any { nullable: true }),
594        ] {
595            *self = other.clone();
596        }
597        // + + + + : Nothing
598        // + - - + : Lit { _, _ }
599        // + x x + : Any { false }
600        // + + + + : Any { true }
601        else if matches!(self, Any { nullable: false }) {
602            if !other.nullable() {
603                // Nothing to do.
604            } else {
605                *self = Self::top() // other: Lit { null, _ }
606            }
607            // Nothing to do.
608        }
609        // + + + + : Nothing
610        // + - x + : Lit { _, _ }
611        // + + + + : Any { false }
612        // + + + + : Any { true }
613        else if matches!(other, Any { nullable: false }) {
614            if !self.nullable() {
615                *self = other.clone();
616            } else {
617                *self = Self::top() // other: Lit { null, _ }
618            }
619        }
620        // + + + + : Nothing
621        // + x + + : Lit { _, _ }
622        // + + + + : Any { false }
623        // + + + + : Any { true }
624        else {
625            let Lit {
626                value: s_val,
627                typ: s_typ,
628            } = self
629            else {
630                unreachable!();
631            };
632            let Lit {
633                value: o_val,
634                typ: o_typ,
635            } = other
636            else {
637                unreachable!();
638            };
639
640            if !s_typ.base_eq(o_typ) {
641                ::tracing::error!("Undefined join of non-equal base types {s_typ:?} != {o_typ:?}");
642                *self = Self::top();
643            } else if s_val != o_val {
644                let nullable = self.nullable() || other.nullable();
645                *self = Any { nullable }
646            } else if s_typ != o_typ {
647                // Same value but different concrete types - strip all modifiers!
648                // This is identical to what SqlColumnType::union is doing.
649                *s_typ = s_typ.without_modifiers();
650            } else {
651                // Value and type coincide - do nothing!
652            }
653        }
654    }
655
656    /// Intersects (strengthens) the possible states of a column.
657    fn meet_assign(&mut self, other: &Self) {
658        use DatumKnowledge::*;
659
660        // Each of the following `if` statements handles the cases marked with
661        // `x` of the (self x other) cross product (depicted as a rows x cols
662        // table where Self::bottom() is the UL and Self::top() the LR corner).
663        // Cases that are already handled are marked with `+` and cases that are
664        // yet to be handled marked with `-`.
665        //
666        // The order of handling (crossing out) of cases ensures that
667        // `*.clone()` is not called unless necessary.
668        //
669        // The final case after the ifs can assert that both sides are Lit
670        // variants.
671
672        // x x x x : Nothing
673        // - - - x : Lit { _, _ }
674        // - - - x : Any { false }
675        // - - - x : Any { true }
676        if crate::any![
677            matches!(self, Nothing),
678            matches!(other, Any { nullable: true }),
679        ] {
680            // Nothing to do.
681        }
682        // + + + + : Nothing
683        // x - - + : Lit { _, _ }
684        // x - - + : Any { false }
685        // x x x + : Any { true }
686        else if crate::any![
687            matches!(self, Any { nullable: true }),
688            matches!(other, Nothing),
689        ] {
690            *self = other.clone();
691        }
692        // + + + + : Nothing
693        // + - - + : Lit { _, _ }
694        // + x x + : Any { false }
695        // + + + + : Any { true }
696        else if matches!(self, Any { nullable: false }) {
697            match other {
698                Any { .. } => {
699                    // Nothing to do.
700                }
701                Lit { .. } => {
702                    if other.nullable() {
703                        *self = Nothing; // other: Lit { null, _ }
704                    } else {
705                        *self = other.clone();
706                    }
707                }
708                Nothing => unreachable!(),
709            }
710        }
711        // + + + + : Nothing
712        // + - x + : Lit { _, _ }
713        // + + + + : Any { false }
714        // + + + + : Any { true }
715        else if matches!(other, Any { nullable: false }) {
716            if self.nullable() {
717                *self = Nothing // self: Lit { null, _ }
718            }
719        }
720        // + + + + : Nothing
721        // + x + + : Lit { _, _ }
722        // + + + + : Any { false }
723        // + + + + : Any { true }
724        else {
725            let Lit {
726                value: s_val,
727                typ: s_typ,
728            } = self
729            else {
730                unreachable!();
731            };
732            let Lit {
733                value: o_val,
734                typ: o_typ,
735            } = other
736            else {
737                unreachable!();
738            };
739
740            if !s_typ.base_eq(o_typ) {
741                soft_panic_or_log!("Undefined meet of non-equal base types {s_typ:?} != {o_typ:?}");
742                *self = Self::top(); // this really should be Nothing
743            } else if s_val != o_val {
744                *self = Nothing;
745            } else if s_typ != o_typ {
746                // Same value but different concrete types - strip all
747                // modifiers! We should probably pick the more specific of the
748                // two types if they are ordered or return Nothing otherwise.
749                *s_typ = s_typ.without_modifiers();
750            } else {
751                // Value and type coincide - do nothing!
752            }
753        }
754    }
755
756    fn nullable(&self) -> bool {
757        match self {
758            DatumKnowledge::Any { nullable } => *nullable,
759            DatumKnowledge::Lit { value, .. } => match value {
760                Ok(value) => value.iter().next().unwrap().is_null(),
761                Err(_) => false,
762            },
763            DatumKnowledge::Nothing => false,
764        }
765    }
766}
767
768/// Attempts to optimize
769///
770/// `knowledge_stack` is a pre-allocated vector but is expected not to contain any elements.
771fn optimize(
772    expr: &mut MirScalarExpr,
773    column_types: &[SqlColumnType],
774    column_knowledge: &[DatumKnowledge],
775    knowledge_stack: &mut Vec<DatumKnowledge>,
776) -> Result<DatumKnowledge, TransformError> {
777    // Storage for `DatumKnowledge` being propagated up through the
778    // `MirScalarExpr`. When a node is visited, pop off as many `DatumKnowledge`
779    // as the number of children the node has, and then push the
780    // `DatumKnowledge` corresponding to the node back onto the stack.
781    // Post-order traversal means that if a node has `n` children, the top `n`
782    // `DatumKnowledge` in the stack are the `DatumKnowledge` corresponding to
783    // the children.
784    assert!(knowledge_stack.is_empty());
785    #[allow(deprecated)]
786    expr.visit_mut_pre_post(
787        &mut |e| {
788            if let MirScalarExpr::If { then, els, .. } = e {
789                Some(vec![then, els])
790            } else {
791                None
792            }
793        },
794        &mut |e| {
795            let result = match e {
796                MirScalarExpr::Column(index, _) => {
797                    let index = *index;
798                    if let DatumKnowledge::Lit { value, typ } = &column_knowledge[index] {
799                        let nullable = column_knowledge[index].nullable();
800                        *e = MirScalarExpr::Literal(value.clone(), typ.clone().nullable(nullable));
801                    }
802                    column_knowledge[index].clone()
803                }
804                MirScalarExpr::Literal(_, _) | MirScalarExpr::CallUnmaterializable(_) => {
805                    DatumKnowledge::from(&*e)
806                }
807                MirScalarExpr::CallUnary { func, expr: _ } => {
808                    let knowledge = knowledge_stack.pop().unwrap();
809                    if matches!(&knowledge, DatumKnowledge::Lit { .. }) {
810                        e.reduce(column_types);
811                    } else if func == &UnaryFunc::IsNull(func::IsNull) && !knowledge.nullable() {
812                        *e = MirScalarExpr::literal_false();
813                    };
814                    DatumKnowledge::from(&*e)
815                }
816                MirScalarExpr::CallBinary {
817                    func: _,
818                    expr1: _,
819                    expr2: _,
820                } => {
821                    let knowledge2 = knowledge_stack.pop().unwrap();
822                    let knowledge1 = knowledge_stack.pop().unwrap();
823                    if crate::any![
824                        matches!(knowledge1, DatumKnowledge::Lit { .. }),
825                        matches!(knowledge2, DatumKnowledge::Lit { .. }),
826                    ] {
827                        e.reduce(column_types);
828                    }
829                    DatumKnowledge::from(&*e)
830                }
831                MirScalarExpr::CallVariadic { func: _, exprs } => {
832                    // Drain the last `exprs.len()` knowledge, and reduce if any is `Lit`.
833                    assert!(knowledge_stack.len() >= exprs.len());
834                    if knowledge_stack
835                        .drain(knowledge_stack.len() - exprs.len()..)
836                        .any(|k| matches!(k, DatumKnowledge::Lit { .. }))
837                    {
838                        e.reduce(column_types);
839                    }
840                    DatumKnowledge::from(&*e)
841                }
842                MirScalarExpr::If {
843                    cond: _,
844                    then: _,
845                    els: _,
846                } => {
847                    // `cond` has been left un-optimized, as we should not remove the conditional
848                    // nature of the evaluation based on column knowledge: the resulting
849                    // expression could then move down past a filter or join that provided
850                    // the guarantees, and would become wrong.
851                    //
852                    // Instead, each of the branches have been optimized, and we
853                    // can union the states of their columns.
854                    let know2 = knowledge_stack.pop().unwrap();
855                    let mut know1 = knowledge_stack.pop().unwrap();
856                    know1.join_assign(&know2);
857                    know1
858                }
859            };
860            knowledge_stack.push(result);
861        },
862    )?;
863    let knowledge_datum = knowledge_stack.pop();
864    assert!(knowledge_stack.is_empty());
865    knowledge_datum.ok_or_else(|| {
866        TransformError::Internal(String::from("unexpectedly empty stack in optimize"))
867    })
868}
869
870#[allow(dead_code)] // keep debugging method around
871fn print_knowledge_map<'a>(
872    knowledge: &BTreeMap<mz_expr::Id, Vec<DatumKnowledge>>,
873    ids: impl Iterator<Item = &'a mz_expr::LocalId>,
874) {
875    for id in ids {
876        let id = mz_expr::Id::Local(id.clone());
877        for (i, k) in knowledge.get(&id).unwrap().iter().enumerate() {
878            println!("{id}.#{i}: {k:?}");
879        }
880    }
881    println!("");
882}
883
884#[allow(dead_code)] // keep debugging method around
885fn print_knowledge_vec(knowledge: &Vec<DatumKnowledge>) {
886    for (i, k) in knowledge.iter().enumerate() {
887        println!("#{i}: {k:?}");
888    }
889    println!("");
890}