Skip to main content

mz_transform/
column_knowledge.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Transformations based on pulling information about individual columns from sources.
11
12use std::collections::BTreeMap;
13
14use itertools::{Itertools, zip_eq};
15use mz_expr::JoinImplementation::IndexedFilter;
16use mz_expr::visit::Visit;
17use mz_expr::{
18    EvalError, LetRecLimit, MirRelationExpr, MirScalarExpr, RECURSION_LIMIT, UnaryFunc, func,
19};
20use mz_ore::cast::CastFrom;
21use mz_ore::stack::{CheckedRecursion, RecursionGuard};
22use mz_ore::{assert_none, soft_panic_or_log};
23use mz_repr::{Datum, ReprColumnType, ReprScalarType, Row};
24
25use crate::{TransformCtx, TransformError};
26
27/// Harvest and act upon per-column information.
28#[derive(Debug)]
29pub struct ColumnKnowledge {
30    recursion_guard: RecursionGuard,
31}
32
33impl Default for ColumnKnowledge {
34    fn default() -> ColumnKnowledge {
35        ColumnKnowledge {
36            recursion_guard: RecursionGuard::with_limit(RECURSION_LIMIT),
37        }
38    }
39}
40
41impl CheckedRecursion for ColumnKnowledge {
42    fn recursion_guard(&self) -> &RecursionGuard {
43        &self.recursion_guard
44    }
45}
46
47impl crate::Transform for ColumnKnowledge {
48    fn name(&self) -> &'static str {
49        "ColumnKnowledge"
50    }
51
52    /// Transforms an expression through accumulated knowledge.
53    #[mz_ore::instrument(
54        target = "optimizer",
55        level = "debug",
56        fields(path.segment = "column_knowledge")
57    )]
58    fn actually_perform_transform(
59        &self,
60        expr: &mut MirRelationExpr,
61        _: &mut TransformCtx,
62    ) -> Result<(), TransformError> {
63        let mut knowledge_stack = Vec::<DatumKnowledge>::new();
64        let result = self
65            .harvest(expr, &mut BTreeMap::new(), &mut knowledge_stack)
66            .map(|_| ());
67        mz_repr::explain::trace_plan(&*expr);
68        result
69    }
70}
71
72impl ColumnKnowledge {
73    /// Harvest per-column knowledge.
74    ///
75    /// `knowledge_stack` is a pre-allocated vector but is expected not to contain any elements.
76    fn harvest(
77        &self,
78        expr: &mut MirRelationExpr,
79        knowledge: &mut BTreeMap<mz_expr::Id, Vec<DatumKnowledge>>,
80        knowledge_stack: &mut Vec<DatumKnowledge>,
81    ) -> Result<Vec<DatumKnowledge>, TransformError> {
82        self.checked_recur(|_| {
83            let result = match expr {
84                MirRelationExpr::ArrangeBy { input, .. } => {
85                    self.harvest(input, knowledge, knowledge_stack)
86                }
87                MirRelationExpr::Get { id, typ, .. } => {
88                    Ok(knowledge.get(id).cloned().unwrap_or_else(|| {
89                        typ.column_types.iter().map(DatumKnowledge::from).collect()
90                    }))
91                }
92                MirRelationExpr::Constant { rows, typ } => {
93                    // TODO: handle multi-row cases with some constant columns.
94                    if let Ok([(row, _diff)]) = rows.as_deref() {
95                        let knowledge = std::iter::zip(row.iter(), typ.column_types.iter())
96                            .map(DatumKnowledge::from)
97                            .collect();
98                        Ok(knowledge)
99                    } else {
100                        Ok(typ.column_types.iter().map(DatumKnowledge::from).collect())
101                    }
102                }
103                MirRelationExpr::Let { id, value, body } => {
104                    let value_knowledge = self.harvest(value, knowledge, knowledge_stack)?;
105                    let prior_knowledge =
106                        knowledge.insert(mz_expr::Id::Local(id.clone()), value_knowledge);
107                    let body_knowledge = self.harvest(body, knowledge, knowledge_stack)?;
108                    knowledge.remove(&mz_expr::Id::Local(id.clone()));
109                    if let Some(prior_knowledge) = prior_knowledge {
110                        knowledge.insert(mz_expr::Id::Local(id.clone()), prior_knowledge);
111                    }
112                    Ok(body_knowledge)
113                }
114                MirRelationExpr::LetRec {
115                    ids,
116                    values,
117                    limits,
118                    body,
119                } => {
120                    // Set knowledge[i][j] = DatumKnowledge::bottom() for each
121                    // column j and CTE i. This corresponds to the normal
122                    // evaluation semantics where each recursive CTE is
123                    // initialized to the empty collection.
124                    for (id, value) in zip_eq(ids.iter(), values.iter()) {
125                        let id = mz_expr::Id::Local(id.clone());
126                        let knowledge_new = vec![DatumKnowledge::bottom(); value.arity()];
127                        let knowledge_old = knowledge.insert(id, knowledge_new);
128                        assert_none!(knowledge_old);
129                    }
130
131                    // Sum up the arity of all ids in the enclosing LetRec node.
132                    let let_rec_arity = ids.iter().fold(0, |acc, id| {
133                        let id = mz_expr::Id::Local(id.clone());
134                        acc + u64::cast_from(knowledge[&id].len())
135                    });
136
137                    // Sequentially union knowledge[i][j] with the result of
138                    // descending into a clone of values[i]. Repeat until one of
139                    // the following conditions is met:
140                    //
141                    // 1. The knowledge bindings have stabilized at a fixpoint.
142                    // 2. No fixpoint was found after `max_iterations`. If this
143                    //    is the case reset the knowledge vectors for all
144                    //    recursive CTEs to DatumKnowledge::top().
145                    // 3. We reach the user-specified recursion limit of any of the bindings.
146                    //    In this case, we also give up similarly to 2., because we don't want to
147                    //    complicate things with handling different limits per binding.
148                    let min_max_iter = LetRecLimit::min_max_iter(limits);
149                    let max_iterations = 100;
150                    let mut curr_iteration = 0;
151                    loop {
152                        // Check for conditions (2) and (3).
153                        if curr_iteration >= max_iterations
154                            || min_max_iter
155                                .map(|min_max_iter| curr_iteration >= min_max_iter)
156                                .unwrap_or(false)
157                        {
158                            if curr_iteration > 3 * let_rec_arity {
159                                soft_panic_or_log!(
160                                    "LetRec loop in ColumnKnowledge has not converged in 3 * |{}|",
161                                    let_rec_arity
162                                );
163                            }
164
165                            for (id, value) in zip_eq(ids.iter(), values.iter()) {
166                                let id = mz_expr::Id::Local(id.clone());
167                                let knowledge_new = vec![DatumKnowledge::top(); value.arity()];
168                                knowledge.insert(id, knowledge_new);
169                            }
170                            break;
171                        }
172
173                        // Check for condition (1).
174                        let mut change = false;
175                        for (id, mut value) in zip_eq(ids.iter(), values.iter().cloned()) {
176                            let id = mz_expr::Id::Local(id.clone());
177                            let value = &mut value;
178                            let next_knowledge = self.harvest(value, knowledge, knowledge_stack)?;
179                            let curr_knowledge = knowledge.get_mut(&id).unwrap();
180                            for (curr, next) in zip_eq(curr_knowledge.iter_mut(), next_knowledge) {
181                                let prev = curr.clone();
182                                curr.join_assign(&next);
183                                change |= prev != *curr;
184                            }
185                        }
186                        if !change {
187                            break;
188                        }
189
190                        curr_iteration += 1;
191                    }
192
193                    // Descend into the values with the inferred knowledge.
194                    for value in values.iter_mut() {
195                        self.harvest(value, knowledge, knowledge_stack)?;
196                    }
197
198                    // Descend and return the knowledge from the body.
199                    let body_knowledge = self.harvest(body, knowledge, knowledge_stack)?;
200
201                    // Remove shadowed bindings. This is good hygiene, as
202                    // otherwise with nested LetRec blocks the `loop { ... }`
203                    // above will carry inner LetRec IDs across outer LetRec
204                    // iterations. As a consequence, the "no shadowing"
205                    // assertion at the beginning of this block will fail at the
206                    // inner LetRec for the second outer LetRec iteration.
207                    for id in ids.iter() {
208                        let id = mz_expr::Id::Local(id.clone());
209                        knowledge.remove(&id);
210                    }
211
212                    Ok(body_knowledge)
213                }
214                MirRelationExpr::Project { input, outputs } => {
215                    let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
216                    Ok(outputs
217                        .iter()
218                        .map(|i| input_knowledge[*i].clone())
219                        .collect())
220                }
221                MirRelationExpr::Map { input, scalars } => {
222                    let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
223                    let mut column_types: Vec<ReprColumnType> = input.typ().column_types;
224                    for scalar in scalars.iter_mut() {
225                        input_knowledge.push(optimize(
226                            scalar,
227                            &column_types,
228                            &input_knowledge[..],
229                            knowledge_stack,
230                        )?);
231                        column_types.push(scalar.typ(&column_types));
232                    }
233                    Ok(input_knowledge)
234                }
235                MirRelationExpr::FlatMap { input, func, exprs } => {
236                    let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
237                    let input_col_types: Vec<ReprColumnType> = input.typ().column_types;
238                    for expr in exprs {
239                        optimize(
240                            expr,
241                            &input_col_types,
242                            &input_knowledge[..],
243                            knowledge_stack,
244                        )?;
245                    }
246                    let func_typ = func.output_type();
247                    input_knowledge.extend(func_typ.column_types.iter().map(DatumKnowledge::from));
248                    Ok(input_knowledge)
249                }
250                MirRelationExpr::Filter { input, predicates } => {
251                    let mut input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
252                    let input_col_types: Vec<ReprColumnType> = input.typ().column_types;
253                    for predicate in predicates.iter_mut() {
254                        optimize(
255                            predicate,
256                            &input_col_types,
257                            &input_knowledge[..],
258                            knowledge_stack,
259                        )?;
260                    }
261                    // If any predicate tests a column for equality, truth, or is_null, we learn stuff.
262                    for predicate in predicates.iter() {
263                        // Equality tests allow us to unify the column knowledge of each input.
264                        if let MirScalarExpr::CallBinary {
265                            func: mz_expr::BinaryFunc::Eq(_),
266                            expr1,
267                            expr2,
268                        } = predicate
269                        {
270                            // Collect knowledge about the inputs (for columns and literals).
271                            let mut knowledge = DatumKnowledge::top();
272                            if let MirScalarExpr::Column(c, _) = &**expr1 {
273                                knowledge.meet_assign(&input_knowledge[*c]);
274                            }
275                            if let MirScalarExpr::Column(c, _) = &**expr2 {
276                                knowledge.meet_assign(&input_knowledge[*c]);
277                            }
278
279                            // Absorb literal knowledge about columns.
280                            if let MirScalarExpr::Literal(..) = &**expr1 {
281                                knowledge.meet_assign(&DatumKnowledge::from(&**expr1));
282                            }
283                            if let MirScalarExpr::Literal(..) = &**expr2 {
284                                knowledge.meet_assign(&DatumKnowledge::from(&**expr2));
285                            }
286
287                            // Write back unified knowledge to each column.
288                            if let MirScalarExpr::Column(c, _) = &**expr1 {
289                                input_knowledge[*c].meet_assign(&knowledge);
290                            }
291                            if let MirScalarExpr::Column(c, _) = &**expr2 {
292                                input_knowledge[*c].meet_assign(&knowledge);
293                            }
294                        }
295                        if let MirScalarExpr::CallUnary {
296                            func: UnaryFunc::Not(func::Not),
297                            expr,
298                        } = predicate
299                        {
300                            if let MirScalarExpr::CallUnary {
301                                func: UnaryFunc::IsNull(func::IsNull),
302                                expr,
303                            } = &**expr
304                            {
305                                if let MirScalarExpr::Column(c, _) = &**expr {
306                                    input_knowledge[*c].meet_assign(&DatumKnowledge::any(false));
307                                }
308                            }
309                        }
310                    }
311
312                    Ok(input_knowledge)
313                }
314                MirRelationExpr::Join {
315                    inputs,
316                    equivalences,
317                    implementation,
318                    ..
319                } => {
320                    // Aggregate column knowledge from each input into one `Vec`.
321                    let mut knowledges = Vec::new();
322                    for input in inputs.iter_mut() {
323                        for mut knowledge in self.harvest(input, knowledge, knowledge_stack)? {
324                            // Do not propagate error literals beyond join inputs, since that may result
325                            // in them being propagated to other inputs of the join and evaluated when
326                            // they should not.
327                            if let DatumKnowledge::Lit { value: Err(_), .. } = knowledge {
328                                knowledge.join_assign(&DatumKnowledge::any(false));
329                            }
330                            knowledges.push(knowledge);
331                        }
332                    }
333
334                    // This only aggregates the column types of each input, not the
335                    // keys of the inputs. It is unnecessary to aggregate the keys
336                    // of the inputs since input keys are unnecessary for reducing
337                    // `MirScalarExpr`s.
338                    let folded_input_col_types: Vec<ReprColumnType> = inputs
339                        .iter()
340                        .flat_map(|input| input.typ().column_types)
341                        .collect();
342
343                    for equivalence in equivalences.iter_mut() {
344                        let mut knowledge = DatumKnowledge::top();
345
346                        // We can produce composite knowledge for everything in the equivalence class.
347                        for expr in equivalence.iter_mut() {
348                            if !matches!(implementation, IndexedFilter(..)) {
349                                optimize(
350                                    expr,
351                                    &folded_input_col_types,
352                                    &knowledges,
353                                    knowledge_stack,
354                                )?;
355                            }
356                            if let MirScalarExpr::Column(c, _) = expr {
357                                knowledge.meet_assign(&knowledges[*c]);
358                            }
359                            if let MirScalarExpr::Literal(..) = expr {
360                                knowledge.meet_assign(&DatumKnowledge::from(&*expr));
361                            }
362                        }
363                        for expr in equivalence.iter_mut() {
364                            if let MirScalarExpr::Column(c, _) = expr {
365                                knowledges[*c] = knowledge.clone();
366                            }
367                        }
368                    }
369
370                    Ok(knowledges)
371                }
372                MirRelationExpr::Reduce {
373                    input,
374                    group_key,
375                    aggregates,
376                    monotonic: _,
377                    expected_group_size: _,
378                } => {
379                    let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
380                    let input_col_types: Vec<ReprColumnType> = input.typ().column_types;
381                    let mut output = group_key
382                        .iter_mut()
383                        .map(|k| {
384                            optimize(k, &input_col_types, &input_knowledge[..], knowledge_stack)
385                        })
386                        .collect::<Result<Vec<_>, _>>()?;
387                    for aggregate in aggregates.iter_mut() {
388                        use mz_expr::AggregateFunc;
389                        let knowledge = optimize(
390                            &mut aggregate.expr,
391                            &input_col_types,
392                            &input_knowledge[..],
393                            knowledge_stack,
394                        )?;
395                        // This could be improved.
396                        let knowledge = match aggregate.func {
397                            AggregateFunc::MaxInt16
398                            | AggregateFunc::MaxInt32
399                            | AggregateFunc::MaxInt64
400                            | AggregateFunc::MaxUInt16
401                            | AggregateFunc::MaxUInt32
402                            | AggregateFunc::MaxUInt64
403                            | AggregateFunc::MaxMzTimestamp
404                            | AggregateFunc::MaxFloat32
405                            | AggregateFunc::MaxFloat64
406                            | AggregateFunc::MaxBool
407                            | AggregateFunc::MaxString
408                            | AggregateFunc::MaxDate
409                            | AggregateFunc::MaxTimestamp
410                            | AggregateFunc::MaxTimestampTz
411                            | AggregateFunc::MinInt16
412                            | AggregateFunc::MinInt32
413                            | AggregateFunc::MinInt64
414                            | AggregateFunc::MinUInt16
415                            | AggregateFunc::MinUInt32
416                            | AggregateFunc::MinUInt64
417                            | AggregateFunc::MinMzTimestamp
418                            | AggregateFunc::MinFloat32
419                            | AggregateFunc::MinFloat64
420                            | AggregateFunc::MinBool
421                            | AggregateFunc::MinString
422                            | AggregateFunc::MinDate
423                            | AggregateFunc::MinTimestamp
424                            | AggregateFunc::MinTimestampTz
425                            | AggregateFunc::Any
426                            | AggregateFunc::All => {
427                                // These methods propagate constant values exactly.
428                                knowledge
429                            }
430                            AggregateFunc::Count => DatumKnowledge::any(false),
431                            _ => {
432                                // The remaining aggregates are non-null if
433                                // their inputs are non-null. This is correct
434                                // because in Mir~ we reduce an empty collection
435                                // to an empty collection (in Hir~ the result
436                                // often is singleton null collection).
437                                DatumKnowledge::any(knowledge.nullable())
438                            }
439                        };
440                        output.push(knowledge);
441                    }
442                    Ok(output)
443                }
444                MirRelationExpr::TopK { input, limit, .. } => {
445                    let input_knowledge = self.harvest(input, knowledge, knowledge_stack)?;
446                    if let Some(limit) = limit.as_mut() {
447                        let input_col_types: Vec<ReprColumnType> = input.typ().column_types;
448                        optimize(
449                            limit,
450                            &input_col_types,
451                            &input_knowledge[..],
452                            knowledge_stack,
453                        )?;
454                    }
455                    Ok(input_knowledge)
456                }
457                MirRelationExpr::Negate { input } => {
458                    self.harvest(input, knowledge, knowledge_stack)
459                }
460                MirRelationExpr::Threshold { input } => {
461                    self.harvest(input, knowledge, knowledge_stack)
462                }
463                MirRelationExpr::Union { base, inputs } => {
464                    let mut know = self.harvest(base, knowledge, knowledge_stack)?;
465                    for input in inputs {
466                        know = know
467                            .into_iter()
468                            .zip_eq(self.harvest(input, knowledge, knowledge_stack)?)
469                            .map(|(mut k1, k2)| {
470                                k1.join_assign(&k2);
471                                k1
472                            })
473                            .collect();
474                    }
475                    Ok(know)
476                }
477            }?;
478
479            // println!("# Plan");
480            // println!("{}", expr.pretty());
481            // println!("# Knowledge");
482            // print_knowledge_vec(&result);
483            // println!("---");
484
485            Ok(result)
486        })
487    }
488}
489
490/// Information about a specific column.
491///
492/// The values should form a [complete lattice].
493///
494/// [complete lattice]: https://en.wikipedia.org/wiki/Complete_lattice
495#[derive(Clone, Debug, PartialEq, Eq)]
496enum DatumKnowledge {
497    // Any possible value, optionally known to be NOT NULL.
498    Any {
499        nullable: bool,
500    },
501    // A known literal value of a specific type.
502    Lit {
503        value: Result<mz_repr::Row, EvalError>,
504        typ: ReprScalarType,
505    },
506    // A value that cannot exist.
507    Nothing,
508}
509
510impl From<&MirScalarExpr> for DatumKnowledge {
511    fn from(expr: &MirScalarExpr) -> Self {
512        if let MirScalarExpr::Literal(l, t) = expr {
513            let value = l.clone();
514            let typ = t.scalar_type.clone();
515            Self::Lit { value, typ }
516        } else {
517            Self::top()
518        }
519    }
520}
521
522impl From<(Datum<'_>, &ReprColumnType)> for DatumKnowledge {
523    fn from((d, t): (Datum<'_>, &ReprColumnType)) -> Self {
524        let value = Ok(Row::pack_slice(std::slice::from_ref(&d)));
525        let typ = t.scalar_type.clone();
526        Self::Lit { value, typ }
527    }
528}
529impl From<&ReprColumnType> for DatumKnowledge {
530    fn from(typ: &ReprColumnType) -> Self {
531        let nullable = typ.nullable;
532        Self::Any { nullable }
533    }
534}
535
536impl DatumKnowledge {
537    /// The most general possible knowledge (the top of the complete lattice).
538    fn top() -> Self {
539        Self::Any { nullable: true }
540    }
541
542    /// The strictest possible knowledge (the bottom of the complete lattice).
543    #[allow(dead_code)]
544    fn bottom() -> Self {
545        Self::Nothing
546    }
547
548    /// Create a [`DatumKnowledge::Any`] instance with the given nullable flag.
549    fn any(nullable: bool) -> Self {
550        DatumKnowledge::Any { nullable }
551    }
552
553    /// Unions (weakens) the possible states of a column.
554    fn join_assign(&mut self, other: &Self) {
555        use DatumKnowledge::*;
556
557        // Each of the following `if` statements handles the cases marked with
558        // `x` of the (self x other) cross product (depicted as a rows x cols
559        // table where Self::bottom() is the UL and Self::top() the LR corner).
560        // Cases that are already handled are marked with `+` and cases that are
561        // yet to be handled marked with `-`.
562        //
563        // The order of handling (crossing out) of cases ensures that
564        // `*.clone()` is not called unless necessary.
565        //
566        // The final case after the ifs can assert that both sides are Lit
567        // variants.
568
569        // x - - - : Nothing
570        // x - - - : Lit { _, _ }
571        // x - - - : Any { false }
572        // x x x x : Any { true }
573        if crate::any![
574            matches!(self, Any { nullable: true }),
575            matches!(other, Nothing),
576        ] {
577            // Nothing to do.
578        }
579        // + x x x : Nothing
580        // + - - x : Lit { _, _ }
581        // + - - x : Any { false }
582        // + + + + : Any { true }
583        else if crate::any![
584            matches!(self, Nothing),
585            matches!(other, Any { nullable: true }),
586        ] {
587            *self = other.clone();
588        }
589        // + + + + : Nothing
590        // + - - + : Lit { _, _ }
591        // + x x + : Any { false }
592        // + + + + : Any { true }
593        else if matches!(self, Any { nullable: false }) {
594            if !other.nullable() {
595                // Nothing to do.
596            } else {
597                *self = Self::top() // other: Lit { null, _ }
598            }
599            // Nothing to do.
600        }
601        // + + + + : Nothing
602        // + - x + : Lit { _, _ }
603        // + + + + : Any { false }
604        // + + + + : Any { true }
605        else if matches!(other, Any { nullable: false }) {
606            if !self.nullable() {
607                *self = other.clone();
608            } else {
609                *self = Self::top() // other: Lit { null, _ }
610            }
611        }
612        // + + + + : Nothing
613        // + x + + : Lit { _, _ }
614        // + + + + : Any { false }
615        // + + + + : Any { true }
616        else {
617            let Lit {
618                value: s_val,
619                typ: s_typ,
620            } = self
621            else {
622                unreachable!();
623            };
624            let Lit {
625                value: o_val,
626                typ: o_typ,
627            } = other
628            else {
629                unreachable!();
630            };
631
632            if s_typ != o_typ {
633                soft_panic_or_log!("Undefined join of non-equal repr types {s_typ:?}, {o_typ:?}");
634                *self = Self::top();
635            } else if s_val != o_val {
636                let nullable = self.nullable() || other.nullable();
637                *self = Any { nullable }
638            } else {
639                // Value and type coincide - do nothing!
640            }
641        }
642    }
643
644    /// Intersects (strengthens) the possible states of a column.
645    fn meet_assign(&mut self, other: &Self) {
646        use DatumKnowledge::*;
647
648        // Each of the following `if` statements handles the cases marked with
649        // `x` of the (self x other) cross product (depicted as a rows x cols
650        // table where Self::bottom() is the UL and Self::top() the LR corner).
651        // Cases that are already handled are marked with `+` and cases that are
652        // yet to be handled marked with `-`.
653        //
654        // The order of handling (crossing out) of cases ensures that
655        // `*.clone()` is not called unless necessary.
656        //
657        // The final case after the ifs can assert that both sides are Lit
658        // variants.
659
660        // x x x x : Nothing
661        // - - - x : Lit { _, _ }
662        // - - - x : Any { false }
663        // - - - x : Any { true }
664        if crate::any![
665            matches!(self, Nothing),
666            matches!(other, Any { nullable: true }),
667        ] {
668            // Nothing to do.
669        }
670        // + + + + : Nothing
671        // x - - + : Lit { _, _ }
672        // x - - + : Any { false }
673        // x x x + : Any { true }
674        else if crate::any![
675            matches!(self, Any { nullable: true }),
676            matches!(other, Nothing),
677        ] {
678            *self = other.clone();
679        }
680        // + + + + : Nothing
681        // + - - + : Lit { _, _ }
682        // + x x + : Any { false }
683        // + + + + : Any { true }
684        else if matches!(self, Any { nullable: false }) {
685            match other {
686                Any { .. } => {
687                    // Nothing to do.
688                }
689                Lit { .. } => {
690                    if other.nullable() {
691                        *self = Nothing; // other: Lit { null, _ }
692                    } else {
693                        *self = other.clone();
694                    }
695                }
696                Nothing => unreachable!(),
697            }
698        }
699        // + + + + : Nothing
700        // + - x + : Lit { _, _ }
701        // + + + + : Any { false }
702        // + + + + : Any { true }
703        else if matches!(other, Any { nullable: false }) {
704            if self.nullable() {
705                *self = Nothing // self: Lit { null, _ }
706            }
707        }
708        // + + + + : Nothing
709        // + x + + : Lit { _, _ }
710        // + + + + : Any { false }
711        // + + + + : Any { true }
712        else {
713            let Lit {
714                value: s_val,
715                typ: s_typ,
716            } = self
717            else {
718                unreachable!();
719            };
720            let Lit {
721                value: o_val,
722                typ: o_typ,
723            } = other
724            else {
725                unreachable!();
726            };
727
728            if s_typ != o_typ {
729                soft_panic_or_log!("Undefined meet of non-equal repr types {s_typ:?}, {o_typ:?}");
730                *self = Self::top(); // this really should be Nothing
731            } else if s_val != o_val {
732                *self = Nothing;
733            } else {
734                // Value and type coincide - do nothing!
735            }
736        }
737    }
738
739    fn nullable(&self) -> bool {
740        match self {
741            DatumKnowledge::Any { nullable } => *nullable,
742            DatumKnowledge::Lit { value, .. } => match value {
743                Ok(value) => value.iter().next().unwrap().is_null(),
744                Err(_) => false,
745            },
746            DatumKnowledge::Nothing => false,
747        }
748    }
749}
750
751/// Attempts to optimize
752///
753/// `knowledge_stack` is a pre-allocated vector but is expected not to contain any elements.
754fn optimize(
755    expr: &mut MirScalarExpr,
756    column_types: &[ReprColumnType],
757    column_knowledge: &[DatumKnowledge],
758    knowledge_stack: &mut Vec<DatumKnowledge>,
759) -> Result<DatumKnowledge, TransformError> {
760    // Storage for `DatumKnowledge` being propagated up through the
761    // `MirScalarExpr`. When a node is visited, pop off as many `DatumKnowledge`
762    // as the number of children the node has, and then push the
763    // `DatumKnowledge` corresponding to the node back onto the stack.
764    // Post-order traversal means that if a node has `n` children, the top `n`
765    // `DatumKnowledge` in the stack are the `DatumKnowledge` corresponding to
766    // the children.
767    assert!(knowledge_stack.is_empty());
768    #[allow(deprecated)]
769    expr.visit_mut_pre_post(
770        &mut |e| {
771            if let MirScalarExpr::If { then, els, .. } = e {
772                Some(vec![then, els])
773            } else {
774                None
775            }
776        },
777        &mut |e| {
778            let result = match e {
779                MirScalarExpr::Column(index, _) => {
780                    let index = *index;
781                    if let DatumKnowledge::Lit { value, typ } = &column_knowledge[index] {
782                        *e = MirScalarExpr::Literal(
783                            value.clone(),
784                            typ.clone().nullable(column_knowledge[index].nullable()),
785                        );
786                    }
787                    column_knowledge[index].clone()
788                }
789                MirScalarExpr::Literal(_, _) | MirScalarExpr::CallUnmaterializable(_) => {
790                    DatumKnowledge::from(&*e)
791                }
792                MirScalarExpr::CallUnary { func, expr: _ } => {
793                    let knowledge = knowledge_stack.pop().unwrap();
794                    if matches!(&knowledge, DatumKnowledge::Lit { .. }) {
795                        e.reduce(column_types);
796                    } else if func == &UnaryFunc::IsNull(func::IsNull) && !knowledge.nullable() {
797                        *e = MirScalarExpr::literal_false();
798                    };
799                    DatumKnowledge::from(&*e)
800                }
801                MirScalarExpr::CallBinary {
802                    func: _,
803                    expr1: _,
804                    expr2: _,
805                } => {
806                    let knowledge2 = knowledge_stack.pop().unwrap();
807                    let knowledge1 = knowledge_stack.pop().unwrap();
808                    if crate::any![
809                        matches!(knowledge1, DatumKnowledge::Lit { .. }),
810                        matches!(knowledge2, DatumKnowledge::Lit { .. }),
811                    ] {
812                        e.reduce(column_types);
813                    }
814                    DatumKnowledge::from(&*e)
815                }
816                MirScalarExpr::CallVariadic { func: _, exprs } => {
817                    // Drain the last `exprs.len()` knowledge, and reduce if any is `Lit`.
818                    assert!(knowledge_stack.len() >= exprs.len());
819                    if knowledge_stack
820                        .drain(knowledge_stack.len() - exprs.len()..)
821                        .any(|k| matches!(k, DatumKnowledge::Lit { .. }))
822                    {
823                        e.reduce(column_types);
824                    }
825                    DatumKnowledge::from(&*e)
826                }
827                MirScalarExpr::If {
828                    cond: _,
829                    then: _,
830                    els: _,
831                } => {
832                    // `cond` has been left un-optimized, as we should not remove the conditional
833                    // nature of the evaluation based on column knowledge: the resulting
834                    // expression could then move down past a filter or join that provided
835                    // the guarantees, and would become wrong.
836                    //
837                    // Instead, each of the branches have been optimized, and we
838                    // can union the states of their columns.
839                    let know2 = knowledge_stack.pop().unwrap();
840                    let mut know1 = knowledge_stack.pop().unwrap();
841                    know1.join_assign(&know2);
842                    know1
843                }
844            };
845            knowledge_stack.push(result);
846        },
847    )?;
848    let knowledge_datum = knowledge_stack.pop();
849    assert!(knowledge_stack.is_empty());
850    knowledge_datum.ok_or_else(|| {
851        TransformError::Internal(String::from("unexpectedly empty stack in optimize"))
852    })
853}
854
855#[allow(dead_code)] // keep debugging method around
856fn print_knowledge_map<'a>(
857    knowledge: &BTreeMap<mz_expr::Id, Vec<DatumKnowledge>>,
858    ids: impl Iterator<Item = &'a mz_expr::LocalId>,
859) {
860    for id in ids {
861        let id = mz_expr::Id::Local(id.clone());
862        for (i, k) in knowledge.get(&id).unwrap().iter().enumerate() {
863            println!("{id}.#{i}: {k:?}");
864        }
865    }
866    println!("");
867}
868
869#[allow(dead_code)] // keep debugging method around
870fn print_knowledge_vec(knowledge: &Vec<DatumKnowledge>) {
871    for (i, k) in knowledge.iter().enumerate() {
872        println!("#{i}: {k:?}");
873    }
874    println!("");
875}