mz_expr/
interpret.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::collections::BTreeMap;
11use std::fmt::Debug;
12
13use mz_repr::{ColumnType, Datum, RelationType, Row, RowArena, ScalarType};
14
15use crate::{
16    BinaryFunc, EvalError, MapFilterProject, MfpPlan, MirScalarExpr, UnaryFunc,
17    UnmaterializableFunc, VariadicFunc,
18};
19
20/// An inclusive range of non-null datum values.
21#[derive(Clone, Eq, PartialEq, Debug)]
22enum Values<'a> {
23    /// This range contains no values.
24    Empty,
25    /// An inclusive range. Invariant: the first element is always <= the second.
26    // TODO: a variant for small sets of data would avoid losing precision here.
27    Within(Datum<'a>, Datum<'a>),
28    /// Constraints on structured fields, useful for recursive structures like maps.
29    /// Fields that are not present in the map default to Values::All.
30    // TODO: consider using this variant, or similar, for Datum::List.
31    Nested(BTreeMap<Datum<'a>, ResultSpec<'a>>),
32    /// This range might contain any value. Since we're overapproximating, this is often used
33    /// as a "safe fallback" when we can't determine the right boundaries for a range.
34    All,
35}
36
37impl<'a> Values<'a> {
38    fn just(a: Datum<'a>) -> Values<'a> {
39        match a {
40            Datum::Map(datum_map) => Values::Nested(
41                datum_map
42                    .iter()
43                    .map(|(key, val)| (key.into(), ResultSpec::value(val)))
44                    .collect(),
45            ),
46            other => Self::Within(other, other),
47        }
48    }
49
50    fn union(self, other: Values<'a>) -> Values<'a> {
51        match (self, other) {
52            (Values::Empty, r) => r,
53            (r, Values::Empty) => r,
54            (Values::Within(a0, a1), Values::Within(b0, b1)) => {
55                Values::Within(a0.min(b0), a1.max(b1))
56            }
57            (Values::Nested(mut a), Values::Nested(mut b)) => {
58                a.retain(|datum, values| {
59                    if let Some(other_values) = b.remove(datum) {
60                        *values = values.clone().union(other_values);
61                    }
62                    *values != ResultSpec::anything()
63                });
64                if a.is_empty() {
65                    Values::All
66                } else {
67                    Values::Nested(a)
68                }
69            }
70            _ => Values::All,
71        }
72    }
73
74    fn intersect(self, other: Values<'a>) -> Values<'a> {
75        match (self, other) {
76            (Values::Empty, _) => Values::Empty,
77            (_, Values::Empty) => Values::Empty,
78            (Values::Within(a0, a1), Values::Within(b0, b1)) => {
79                let min = a0.max(b0);
80                let max = a1.min(b1);
81                if min <= max {
82                    Values::Within(min, max)
83                } else {
84                    Values::Empty
85                }
86            }
87            (Values::Nested(mut a), Values::Nested(b)) => {
88                for (datum, other_spec) in b {
89                    let spec = a.entry(datum).or_insert_with(ResultSpec::anything);
90                    *spec = spec.clone().intersect(other_spec);
91                }
92                Values::Nested(a)
93            }
94            (Values::All, v) => v,
95            (v, Values::All) => v,
96            (Values::Nested(_), Values::Within(_, _)) => Values::Empty,
97            (Values::Within(_, _), Values::Nested(_)) => Values::Empty,
98        }
99    }
100
101    fn may_contain(&self, value: Datum<'a>) -> bool {
102        match self {
103            Values::Empty => false,
104            Values::Within(min, max) => *min <= value && value <= *max,
105            Values::All => true,
106            Values::Nested(field_map) => match value {
107                Datum::Map(datum_map) => {
108                    datum_map
109                        .iter()
110                        .all(|(key, val)| match field_map.get(&key.into()) {
111                            None => true,
112                            Some(nested) => nested.may_contain(val),
113                        })
114                }
115                _ => false,
116            },
117        }
118    }
119}
120
121/// An approximation of the set of values an expression might have, including whether or not it
122/// might be null or an error value. This is generally an _overapproximation_, in the sense that
123/// [ResultSpec::may_contain] may return true even if the argument is not included in the set.
124/// (However, it should never return false when the value _is_ included!)
125#[derive(Debug, Clone, Eq, PartialEq)]
126pub struct ResultSpec<'a> {
127    /// True if the expression may evaluate to [Datum::Null].
128    nullable: bool,
129    /// True if the expression may evaluate to an error.
130    fallible: bool,
131    /// The range of possible (non-null) values that the expression may evaluate to.
132    values: Values<'a>,
133}
134
135impl<'a> ResultSpec<'a> {
136    /// No results match this spec. (For example, an empty table.)
137    pub fn nothing() -> Self {
138        ResultSpec {
139            nullable: false,
140            fallible: false,
141            values: Values::Empty,
142        }
143    }
144
145    /// Every result matches this spec.
146    pub fn anything() -> Self {
147        ResultSpec {
148            nullable: true,
149            fallible: true,
150            values: Values::All,
151        }
152    }
153
154    /// Every result matches this spec.
155    pub fn any_infallible() -> Self {
156        ResultSpec {
157            nullable: true,
158            fallible: false,
159            values: Values::All,
160        }
161    }
162
163    /// A spec that only matches null.
164    pub fn null() -> Self {
165        ResultSpec {
166            nullable: true,
167            ..Self::nothing()
168        }
169    }
170
171    /// A spec that only matches error values.
172    pub fn fails() -> Self {
173        ResultSpec {
174            fallible: true,
175            ..Self::nothing()
176        }
177    }
178
179    /// A spec that matches all values of a given type.
180    pub fn has_type(col: &ColumnType, fallible: bool) -> ResultSpec<'a> {
181        let values = match &col.scalar_type {
182            ScalarType::Bool => Values::Within(Datum::False, Datum::True),
183            // TODO: add bounds for other bounded types, like integers
184            _ => Values::All,
185        };
186        ResultSpec {
187            nullable: col.nullable,
188            fallible,
189            values,
190        }
191    }
192
193    /// A spec that only matches the given value.
194    pub fn value(value: Datum<'a>) -> ResultSpec<'a> {
195        match value {
196            Datum::Null => Self::null(),
197            nonnull => ResultSpec {
198                values: Values::just(nonnull),
199                ..Self::nothing()
200            },
201        }
202    }
203
204    /// A spec that matches values between the given (non-null) min and max.
205    pub fn value_between(min: Datum<'a>, max: Datum<'a>) -> ResultSpec<'a> {
206        assert!(!min.is_null());
207        assert!(!max.is_null());
208        if min <= max {
209            ResultSpec {
210                values: Values::Within(min, max),
211                ..ResultSpec::nothing()
212            }
213        } else {
214            ResultSpec::nothing()
215        }
216    }
217
218    /// A spec that matches any non-null value.
219    pub fn value_all() -> ResultSpec<'a> {
220        ResultSpec {
221            values: Values::All,
222            ..ResultSpec::nothing()
223        }
224    }
225
226    /// A spec that matches Datum::Maps of the given type.
227    pub fn map_spec(map: BTreeMap<Datum<'a>, ResultSpec<'a>>) -> ResultSpec<'a> {
228        ResultSpec {
229            values: Values::Nested(map),
230            ..ResultSpec::nothing()
231        }
232    }
233
234    /// Given two specs, returns a new spec that matches anything that either original spec would match.
235    pub fn union(self, other: ResultSpec<'a>) -> ResultSpec<'a> {
236        ResultSpec {
237            nullable: self.nullable || other.nullable,
238            fallible: self.fallible || other.fallible,
239            values: self.values.union(other.values),
240        }
241    }
242
243    /// Given two specs, returns a new spec that only matches things that both original specs would match.
244    pub fn intersect(self, other: ResultSpec<'a>) -> ResultSpec<'a> {
245        ResultSpec {
246            nullable: self.nullable && other.nullable,
247            fallible: self.fallible && other.fallible,
248            values: self.values.intersect(other.values),
249        }
250    }
251
252    /// Check if a particular value matches the spec.
253    pub fn may_contain(&self, value: Datum<'a>) -> bool {
254        if value == Datum::Null {
255            return self.nullable;
256        }
257
258        self.values.may_contain(value)
259    }
260
261    /// Check if an error value matches the spec.
262    pub fn may_fail(&self) -> bool {
263        self.fallible
264    }
265
266    /// This method "maps" a function across the `ResultSpec`.
267    ///
268    /// As mentioned above, `ResultSpec` represents an approximate set of results.
269    /// If we actually stored each result in the set, `flat_map` could be implemented by passing
270    /// each result to the function one-by-one and unioning the resulting sets. This is possible
271    /// when our values set is empty or contains a single datum, but when it contains a range,
272    /// we can't enumerate all possible values of the set. We handle this by:
273    /// - tracking whether the function is monotone, in which case we can map the range by just
274    ///   mapping the endpoints;
275    /// - using a safe default when we can't infer a tighter bound on the set, eg. [Self::anything].
276    fn flat_map(
277        &self,
278        is_monotone: bool,
279        mut result_map: impl FnMut(Result<Datum<'a>, EvalError>) -> ResultSpec<'a>,
280    ) -> ResultSpec<'a> {
281        let null_spec = if self.nullable {
282            result_map(Ok(Datum::Null))
283        } else {
284            ResultSpec::nothing()
285        };
286
287        let error_spec = if self.fallible {
288            // Since we only care about whether / not an error is possible, and not the specific
289            // error, create an arbitrary error here.
290            // NOTE! This assumes that functions do not discriminate on the type of the error.
291            let map_err = result_map(Err(EvalError::Internal("".into())));
292            let raise_err = ResultSpec::fails();
293            // SQL has a very loose notion of evaluation order: https://www.postgresql.org/docs/current/sql-expressions.html#SYNTAX-EXPRESS-EVAL
294            // Here, we account for the possibility that the expression is evaluated strictly,
295            // raising the error, or that it's evaluated lazily by the result_map function
296            // (which may return a non-error result even when given an error as input).
297            raise_err.union(map_err)
298        } else {
299            ResultSpec::nothing()
300        };
301
302        let values_spec = match self.values {
303            Values::Empty => ResultSpec::nothing(),
304            // If this range contains a single datum, just call the function.
305            Values::Within(min, max) if min == max => result_map(Ok(min)),
306            // If this is a range of booleans, we know all the values... just try them.
307            Values::Within(Datum::False, Datum::True) => {
308                result_map(Ok(Datum::False)).union(result_map(Ok(Datum::True)))
309            }
310            // Otherwise, if our function is monotonic, we can try mapping the input
311            // range to an output range.
312            Values::Within(min, max) if is_monotone => {
313                let min_result = result_map(Ok(min));
314                let max_result = result_map(Ok(max));
315                match (min_result, max_result) {
316                    // If both endpoints give us a range, the result is a union of those ranges.
317                    (
318                        ResultSpec {
319                            nullable: false,
320                            fallible: false,
321                            values: a_values,
322                        },
323                        ResultSpec {
324                            nullable: false,
325                            fallible: false,
326                            values: b_values,
327                        },
328                    ) => ResultSpec {
329                        nullable: false,
330                        fallible: false,
331                        values: a_values.union(b_values),
332                    },
333                    // If both endpoints are null, we assume the whole range maps to null.
334                    (
335                        ResultSpec {
336                            nullable: true,
337                            fallible: false,
338                            values: Values::Empty,
339                        },
340                        ResultSpec {
341                            nullable: true,
342                            fallible: false,
343                            values: Values::Empty,
344                        },
345                    ) => ResultSpec::null(),
346                    // Otherwise we can't assume anything about the output.
347                    _ => ResultSpec::anything(),
348                }
349            }
350            // TODO: we could return a narrower result for eg. `Values::Nested` with all-`Within` fields.
351            Values::Within(_, _) | Values::Nested(_) | Values::All => ResultSpec::anything(),
352        };
353
354        null_spec.union(error_spec).union(values_spec)
355    }
356}
357
358/// [Abstract interpretation](https://en.wikipedia.org/wiki/Abstract_interpretation) for
359/// [MirScalarExpr].
360///
361/// [MirScalarExpr::eval] implements a "concrete interpreter" for the expression type: given an
362/// expression and specific column values as input, it returns a specific value for the output.
363/// This could be reimplemented using this trait... but it's most useful for "abstract"
364/// interpretations of the expression, where we generalize about sets of possible inputs and outputs.
365/// See [Trace] and [ColumnSpecs] for how this can be useful in practice.
366pub trait Interpreter {
367    type Summary: Clone + Debug + Sized;
368
369    /// A column of the input row.
370    fn column(&self, id: usize) -> Self::Summary;
371
372    /// A literal value.
373    /// (Stored as a row, because we can't own a Datum.)
374    fn literal(&self, result: &Result<Row, EvalError>, col_type: &ColumnType) -> Self::Summary;
375
376    /// A call to an unmaterializable function.
377    ///
378    /// These functions cannot be evaluated by `MirScalarExpr::eval`. They must
379    /// be transformed away by a higher layer.
380    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary;
381
382    /// A function call that takes one expression as an argument.
383    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary;
384
385    /// A function call that takes two expressions as arguments.
386    fn binary(&self, func: &BinaryFunc, left: Self::Summary, right: Self::Summary)
387    -> Self::Summary;
388
389    /// A function call that takes an arbitrary number of arguments.
390    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary;
391
392    /// Conditionally evaluated expressions.
393    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary;
394
395    /// Evaluate an entire expression, by delegating to the fine-grained methods on [Interpreter].
396    fn expr(&self, expr: &MirScalarExpr) -> Self::Summary {
397        match expr {
398            MirScalarExpr::Column(id) => self.column(*id),
399            MirScalarExpr::Literal(value, col_type) => self.literal(value, col_type),
400            MirScalarExpr::CallUnmaterializable(func) => self.unmaterializable(func),
401            MirScalarExpr::CallUnary { func, expr } => {
402                let expr_range = self.expr(expr);
403                self.unary(func, expr_range)
404            }
405            MirScalarExpr::CallBinary { func, expr1, expr2 } => {
406                let expr1_range = self.expr(expr1);
407                let expr2_range = self.expr(expr2);
408                self.binary(func, expr1_range, expr2_range)
409            }
410            MirScalarExpr::CallVariadic { func, exprs } => {
411                let exprs: Vec<_> = exprs.into_iter().map(|e| self.expr(e)).collect();
412                self.variadic(func, exprs)
413            }
414            MirScalarExpr::If { cond, then, els } => {
415                let cond_range = self.expr(cond);
416                let then_range = self.expr(then);
417                let els_range = self.expr(els);
418                self.cond(cond_range, then_range, els_range)
419            }
420        }
421    }
422
423    /// Specifically, this evaluates the map and filters stages of an MFP: summarize each of the
424    /// map expressions, then `and` together all of the filters.
425    fn mfp_filter(&self, mfp: &MapFilterProject) -> Self::Summary {
426        let mfp_eval = MfpEval::new(self, mfp.input_arity, &mfp.expressions);
427        // NB: self should not be used after this point!
428        let predicates = mfp
429            .predicates
430            .iter()
431            .map(|(_, e)| mfp_eval.expr(e))
432            .collect();
433        mfp_eval.variadic(&VariadicFunc::And, predicates)
434    }
435
436    /// Similar to [Self::mfp_filter], but includes the additional temporal filters that have been
437    /// broken out.
438    fn mfp_plan_filter(&self, plan: &MfpPlan) -> Self::Summary {
439        let mfp_eval = MfpEval::new(self, plan.mfp.input_arity, &plan.mfp.expressions);
440        // NB: self should not be used after this point!
441        let mut results: Vec<_> = plan
442            .mfp
443            .predicates
444            .iter()
445            .map(|(_, e)| mfp_eval.expr(e))
446            .collect();
447        let mz_now = mfp_eval.unmaterializable(&UnmaterializableFunc::MzNow);
448        for bound in &plan.lower_bounds {
449            let bound_range = mfp_eval.expr(bound);
450            let result = mfp_eval.binary(&BinaryFunc::Lte, bound_range, mz_now.clone());
451            results.push(result);
452        }
453        for bound in &plan.upper_bounds {
454            let bound_range = mfp_eval.expr(bound);
455            let result = mfp_eval.binary(&BinaryFunc::Gte, bound_range, mz_now.clone());
456            results.push(result);
457        }
458        self.variadic(&VariadicFunc::And, results)
459    }
460}
461
462/// Wrap another interpreter, but tack a few extra columns on at the end. An internal implementation
463/// detail of `eval_mfp` and `eval_mfp_plan`.
464pub(crate) struct MfpEval<'a, E: Interpreter + ?Sized> {
465    evaluator: &'a E,
466    input_arity: usize,
467    expressions: Vec<E::Summary>,
468}
469
470impl<'a, E: Interpreter + ?Sized> MfpEval<'a, E> {
471    pub(crate) fn new(evaluator: &'a E, input_arity: usize, expressions: &[MirScalarExpr]) -> Self {
472        let mut mfp_eval = MfpEval {
473            evaluator,
474            input_arity,
475            expressions: vec![],
476        };
477        for expr in expressions {
478            let result = mfp_eval.expr(expr);
479            mfp_eval.expressions.push(result);
480        }
481        mfp_eval
482    }
483}
484
485impl<'a, E: Interpreter + ?Sized> Interpreter for MfpEval<'a, E> {
486    type Summary = E::Summary;
487
488    fn column(&self, id: usize) -> Self::Summary {
489        if id < self.input_arity {
490            self.evaluator.column(id)
491        } else {
492            self.expressions[id - self.input_arity].clone()
493        }
494    }
495
496    fn literal(&self, result: &Result<Row, EvalError>, col_type: &ColumnType) -> Self::Summary {
497        self.evaluator.literal(result, col_type)
498    }
499
500    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary {
501        self.evaluator.unmaterializable(func)
502    }
503
504    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary {
505        self.evaluator.unary(func, expr)
506    }
507
508    fn binary(
509        &self,
510        func: &BinaryFunc,
511        left: Self::Summary,
512        right: Self::Summary,
513    ) -> Self::Summary {
514        self.evaluator.binary(func, left, right)
515    }
516
517    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary {
518        self.evaluator.variadic(func, exprs)
519    }
520
521    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
522        self.evaluator.cond(cond, then, els)
523    }
524}
525
526/// A unary function we've added special-case handling for; including:
527/// - A three-argument function, taking and returning [ResultSpec]s. This
528///   overrides the default function-handling logic entirely.
529/// - Metadata on whether / not this function is pushdownable. See [Trace].
530struct SpecialUnary {
531    map_fn: for<'a, 'b> fn(&'b ColumnSpecs<'a>, ResultSpec<'a>) -> ResultSpec<'a>,
532    pushdownable: bool,
533}
534
535impl SpecialUnary {
536    /// Returns the special-case handling for a particular function, if it exists.
537    fn for_func(func: &UnaryFunc) -> Option<SpecialUnary> {
538        /// Eager in the same sense as `func.rs` uses the term; this assumes that
539        /// nulls and errors propagate up, and we only need to define the behaviour
540        /// on values.
541        fn eagerly<'b>(
542            spec: ResultSpec<'b>,
543            value_fn: impl FnOnce(Values<'b>) -> ResultSpec<'b>,
544        ) -> ResultSpec<'b> {
545            let result = match spec.values {
546                Values::Empty => ResultSpec::nothing(),
547                other => value_fn(other),
548            };
549            ResultSpec {
550                fallible: spec.fallible || result.fallible,
551                nullable: spec.nullable || result.nullable,
552                values: result.values,
553            }
554        }
555        match func {
556            UnaryFunc::TryParseMonotonicIso8601Timestamp(_) => Some(SpecialUnary {
557                map_fn: |specs, range| {
558                    let expr = MirScalarExpr::CallUnary {
559                        func: UnaryFunc::TryParseMonotonicIso8601Timestamp(
560                            crate::func::TryParseMonotonicIso8601Timestamp,
561                        ),
562                        expr: Box::new(MirScalarExpr::Column(0)),
563                    };
564                    let eval = |d| specs.eval_result(expr.eval(&[d], specs.arena));
565
566                    eagerly(range, |values| {
567                        match values {
568                            Values::Within(a, b) if a == b => eval(a),
569                            Values::Within(a, b) => {
570                                let spec = eval(a).union(eval(b));
571                                let values_spec = if spec.nullable {
572                                    // At least one of the endpoints of the range wasn't a valid
573                                    // timestamp. We can't compute a precise range in this case.
574                                    // If we used the general is_monotone handling, that code would
575                                    // incorrectly assume the whole range mapped to null if each
576                                    // endpoint did.
577                                    ResultSpec::value_all()
578                                } else {
579                                    spec
580                                };
581                                // A range of strings will always contain strings that don't parse
582                                // as timestamps - so unlike the general case, we'll assume null
583                                // is present in every range of output values.
584                                values_spec.union(ResultSpec::null())
585                            }
586                            // Otherwise, assume the worst: this function may return either a valid
587                            // value or null.
588                            _ => ResultSpec::any_infallible(),
589                        }
590                    })
591                },
592                pushdownable: true,
593            }),
594            _ => None,
595        }
596    }
597}
598
599/// A binary function we've added special-case handling for; including:
600/// - A two-argument function, taking and returning [ResultSpec]s. This overrides the
601///   default function-handling logic entirely.
602/// - Metadata on whether / not this function is pushdownable. See [Trace].
603struct SpecialBinary {
604    map_fn: for<'a> fn(ResultSpec<'a>, ResultSpec<'a>) -> ResultSpec<'a>,
605    pushdownable: (bool, bool),
606}
607
608impl SpecialBinary {
609    /// Returns the special-case handling for a particular function, if it exists.
610    fn for_func(func: &BinaryFunc) -> Option<SpecialBinary> {
611        /// Eager in the same sense as `func.rs` uses the term; this assumes that
612        /// nulls and errors propagate up, and we only need to define the behaviour
613        /// on values.
614        fn eagerly<'b>(
615            left: ResultSpec<'b>,
616            right: ResultSpec<'b>,
617            value_fn: impl FnOnce(Values<'b>, Values<'b>) -> ResultSpec<'b>,
618        ) -> ResultSpec<'b> {
619            let result = match (left.values, right.values) {
620                (Values::Empty, _) | (_, Values::Empty) => ResultSpec::nothing(),
621                (l, r) => value_fn(l, r),
622            };
623            ResultSpec {
624                fallible: left.fallible || right.fallible || result.fallible,
625                nullable: left.nullable || right.nullable || result.nullable,
626                values: result.values,
627            }
628        }
629
630        fn jsonb_get_string<'b>(
631            left: ResultSpec<'b>,
632            right: ResultSpec<'b>,
633            stringify: bool,
634        ) -> ResultSpec<'b> {
635            eagerly(left, right, |left, right| {
636                let nested_spec = match (left, right) {
637                    (Values::Nested(mut map_spec), Values::Within(key, key2)) if key == key2 => {
638                        map_spec.remove(&key)
639                    }
640                    _ => None,
641                };
642
643                if let Some(field_spec) = nested_spec {
644                    if stringify {
645                        // We only preserve value-range information when stringification
646                        // is a noop. (Common in real queries.)
647                        let values = match field_spec.values {
648                            Values::Empty => Values::Empty,
649                            Values::Within(min @ Datum::String(_), max @ Datum::String(_)) => {
650                                Values::Within(min, max)
651                            }
652                            Values::Within(_, _) | Values::Nested(_) | Values::All => Values::All,
653                        };
654                        ResultSpec {
655                            values,
656                            ..field_spec
657                        }
658                    } else {
659                        field_spec
660                    }
661                } else {
662                    // The implementation of `jsonb_get_string` always returns
663                    // `Ok(...)`. Morally, everything has a string
664                    // representation, and the worst you can get is a NULL,
665                    // which maps to a NULL.
666                    ResultSpec::any_infallible()
667                }
668            })
669        }
670
671        fn eq<'b>(left: ResultSpec<'b>, right: ResultSpec<'b>) -> ResultSpec<'b> {
672            eagerly(left, right, |left, right| {
673                // `eq` might return true if there's any overlap between the range of its two arguments...
674                let maybe_true = match left.clone().intersect(right.clone()) {
675                    Values::Empty => ResultSpec::nothing(),
676                    _ => ResultSpec::value(Datum::True),
677                };
678
679                // ...and may return false if the union contains at least two distinct values.
680                // Note that the `Empty` case is handled by `eagerly` above.
681                let maybe_false = match left.union(right) {
682                    Values::Within(a, b) if a == b => ResultSpec::nothing(),
683                    _ => ResultSpec::value(Datum::False),
684                };
685
686                maybe_true.union(maybe_false)
687            })
688        }
689
690        match func {
691            BinaryFunc::JsonbGetString => Some(SpecialBinary {
692                map_fn: |l, r| jsonb_get_string(l, r, false),
693                pushdownable: (true, false),
694            }),
695            BinaryFunc::JsonbGetStringStringify => Some(SpecialBinary {
696                map_fn: |l, r| jsonb_get_string(l, r, true),
697                pushdownable: (true, false),
698            }),
699            BinaryFunc::Eq => Some(SpecialBinary {
700                map_fn: eq,
701                pushdownable: (true, true),
702            }),
703            _ => None,
704        }
705    }
706}
707
708#[derive(Clone, Debug)]
709pub struct ColumnSpec<'a> {
710    pub col_type: ColumnType,
711    pub range: ResultSpec<'a>,
712}
713
714/// An interpreter that:
715/// - stores both the type and the range of possible values for every column and
716///   unmaterializable function. (See the `push_` methods.)
717/// - given an expression (or MFP, etc.), returns the range of possible results that evaluating that
718///   expression might have. (See the `eval_` methods.)
719#[derive(Clone, Debug)]
720pub struct ColumnSpecs<'a> {
721    pub relation: &'a RelationType,
722    pub columns: Vec<ResultSpec<'a>>,
723    pub unmaterializables: BTreeMap<UnmaterializableFunc, ResultSpec<'a>>,
724    pub arena: &'a RowArena,
725}
726
727impl<'a> ColumnSpecs<'a> {
728    // Interpreting a variadic function can lead to exponential blowup: there are up to 4 possibly-
729    // interesting values for each argument (error, null, range bounds...) and in the worst case
730    // we may need to test every combination. We mitigate that here in two ways:
731    // - Adding a linear-time optimization for associative functions like AND, OR, COALESCE.
732    // - Limiting the number of arguments we'll pass on to eval. If this limit is crossed, we'll
733    //   return our default / safe overapproximation instead.
734    const MAX_EVAL_ARGS: usize = 6;
735
736    /// Create a new, empty set of column specs. (Initially, the only assumption we make about the
737    /// data in the column is that it matches the type.)
738    pub fn new(relation: &'a RelationType, arena: &'a RowArena) -> Self {
739        let columns = relation
740            .column_types
741            .iter()
742            .map(|ct| ResultSpec::has_type(ct, false))
743            .collect();
744        ColumnSpecs {
745            relation,
746            columns,
747            unmaterializables: Default::default(),
748            arena,
749        }
750    }
751
752    /// Restrict the set of possible values in a given column. (By intersecting it with the existing
753    /// spec.)
754    pub fn push_column(&mut self, id: usize, update: ResultSpec<'a>) {
755        let range = self.columns.get_mut(id).expect("valid column id");
756        *range = range.clone().intersect(update);
757    }
758
759    /// Restrict the set of possible values a given unmaterializable func might return. (By
760    /// intersecting it with the existing spec.)
761    pub fn push_unmaterializable(&mut self, func: UnmaterializableFunc, update: ResultSpec<'a>) {
762        let range = self
763            .unmaterializables
764            .entry(func.clone())
765            .or_insert_with(|| ResultSpec::has_type(&func.output_type(), true));
766        *range = range.clone().intersect(update);
767    }
768
769    fn eval_result<'b, E>(&self, result: Result<Datum<'b>, E>) -> ResultSpec<'a> {
770        match result {
771            Ok(Datum::Null) => ResultSpec {
772                nullable: true,
773                ..ResultSpec::nothing()
774            },
775            Ok(d) => ResultSpec {
776                values: Values::just(self.arena.make_datum(|packer| packer.push(d))),
777                ..ResultSpec::nothing()
778            },
779            Err(_) => ResultSpec {
780                fallible: true,
781                ..ResultSpec::nothing()
782            },
783        }
784    }
785
786    fn set_literal(expr: &mut MirScalarExpr, update: Result<Datum, EvalError>) {
787        match expr {
788            MirScalarExpr::Literal(literal, col_type) => match update {
789                Err(error) => *literal = Err(error),
790                Ok(datum) => {
791                    assert!(
792                        datum.is_instance_of(col_type),
793                        "{datum:?} must be an instance of {col_type:?}"
794                    );
795                    match literal {
796                        // Reuse the allocation if we can
797                        Ok(row) => row.packer().push(datum),
798                        literal => *literal = Ok(Row::pack_slice(&[datum])),
799                    }
800                }
801            },
802            _ => panic!("not a literal"),
803        }
804    }
805
806    fn set_argument(expr: &mut MirScalarExpr, arg: usize, value: Result<Datum, EvalError>) {
807        match (expr, arg) {
808            (MirScalarExpr::CallUnary { expr, .. }, 0) => Self::set_literal(expr, value),
809            (MirScalarExpr::CallBinary { expr1, .. }, 0) => Self::set_literal(expr1, value),
810            (MirScalarExpr::CallBinary { expr2, .. }, 1) => Self::set_literal(expr2, value),
811            (MirScalarExpr::CallVariadic { exprs, .. }, n) if n < exprs.len() => {
812                Self::set_literal(&mut exprs[n], value)
813            }
814            _ => panic!("illegal argument for expression"),
815        }
816    }
817
818    /// A literal with the given type and a trivial default value. Callers should ensure that
819    /// [Self::set_literal] is called on the resulting expression to give it a meaningful value
820    /// before evaluating.
821    fn placeholder(col_type: ColumnType) -> MirScalarExpr {
822        MirScalarExpr::Literal(Err(EvalError::Internal("".into())), col_type)
823    }
824}
825
826impl<'a> Interpreter for ColumnSpecs<'a> {
827    type Summary = ColumnSpec<'a>;
828
829    fn column(&self, id: usize) -> Self::Summary {
830        let col_type = self.relation.column_types[id].clone();
831        let range = self.columns[id].clone();
832        ColumnSpec { col_type, range }
833    }
834
835    fn literal(&self, result: &Result<Row, EvalError>, col_type: &ColumnType) -> Self::Summary {
836        let col_type = col_type.clone();
837        let range = self.eval_result(result.as_ref().map(|row| {
838            self.arena
839                .make_datum(|packer| packer.push(row.unpack_first()))
840        }));
841        ColumnSpec { col_type, range }
842    }
843
844    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary {
845        let col_type = func.output_type();
846        let range = self
847            .unmaterializables
848            .get(func)
849            .cloned()
850            .unwrap_or_else(|| ResultSpec::has_type(&func.output_type(), true));
851        ColumnSpec { col_type, range }
852    }
853
854    fn unary(&self, func: &UnaryFunc, summary: Self::Summary) -> Self::Summary {
855        let fallible = func.could_error() || summary.range.fallible;
856        let mapped_spec = if let Some(special) = SpecialUnary::for_func(func) {
857            (special.map_fn)(self, summary.range)
858        } else {
859            let is_monotone = func.is_monotone();
860            let mut expr = MirScalarExpr::CallUnary {
861                func: func.clone(),
862                expr: Box::new(Self::placeholder(summary.col_type.clone())),
863            };
864            summary.range.flat_map(is_monotone, |datum| {
865                Self::set_argument(&mut expr, 0, datum);
866                self.eval_result(expr.eval(&[], self.arena))
867            })
868        };
869
870        let col_type = func.output_type(summary.col_type);
871
872        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
873        ColumnSpec { col_type, range }
874    }
875
876    fn binary(
877        &self,
878        func: &BinaryFunc,
879        left: Self::Summary,
880        right: Self::Summary,
881    ) -> Self::Summary {
882        let (left_monotonic, right_monotonic) = func.is_monotone();
883        let fallible = func.could_error() || left.range.fallible || right.range.fallible;
884
885        let mapped_spec = if let Some(special) = SpecialBinary::for_func(func) {
886            (special.map_fn)(left.range, right.range)
887        } else {
888            let mut expr = MirScalarExpr::CallBinary {
889                func: func.clone(),
890                expr1: Box::new(Self::placeholder(left.col_type.clone())),
891                expr2: Box::new(Self::placeholder(right.col_type.clone())),
892            };
893            left.range.flat_map(left_monotonic, |left_result| {
894                Self::set_argument(&mut expr, 0, left_result);
895                right.range.flat_map(right_monotonic, |right_result| {
896                    Self::set_argument(&mut expr, 1, right_result);
897                    self.eval_result(expr.eval(&[], self.arena))
898                })
899            })
900        };
901
902        let col_type = func.output_type(left.col_type, right.col_type);
903
904        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
905        ColumnSpec { col_type, range }
906    }
907
908    fn variadic(&self, func: &VariadicFunc, args: Vec<Self::Summary>) -> Self::Summary {
909        let fallible = func.could_error() || args.iter().any(|s| s.range.fallible);
910        if func.is_associative() && args.len() > 2 {
911            // To avoid a combinatorial explosion, evaluate large variadic calls as a series of
912            // smaller ones, since associativity guarantees we'll get compatible results.
913            return args
914                .into_iter()
915                .reduce(|a, b| self.variadic(func, vec![a, b]))
916                .expect("reducing over a non-empty argument list");
917        }
918
919        let mapped_spec = if args.len() >= Self::MAX_EVAL_ARGS {
920            ResultSpec::anything()
921        } else {
922            fn eval_loop<'a>(
923                is_monotonic: bool,
924                expr: &mut MirScalarExpr,
925                args: &[ColumnSpec<'a>],
926                index: usize,
927                datum_map: &mut impl FnMut(&MirScalarExpr) -> ResultSpec<'a>,
928            ) -> ResultSpec<'a> {
929                if index >= args.len() {
930                    datum_map(expr)
931                } else {
932                    args[index].range.flat_map(is_monotonic, |datum| {
933                        ColumnSpecs::set_argument(expr, index, datum);
934                        eval_loop(is_monotonic, expr, args, index + 1, datum_map)
935                    })
936                }
937            }
938
939            let mut fn_expr = MirScalarExpr::CallVariadic {
940                func: func.clone(),
941                exprs: args
942                    .iter()
943                    .map(|spec| Self::placeholder(spec.col_type.clone()))
944                    .collect(),
945            };
946            eval_loop(func.is_monotone(), &mut fn_expr, &args, 0, &mut |expr| {
947                self.eval_result(expr.eval(&[], self.arena))
948            })
949        };
950
951        let col_types = args.into_iter().map(|spec| spec.col_type).collect();
952        let col_type = func.output_type(col_types);
953
954        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
955
956        ColumnSpec { col_type, range }
957    }
958
959    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
960        let col_type = ColumnType {
961            scalar_type: then.col_type.scalar_type,
962            nullable: then.col_type.nullable || els.col_type.nullable,
963        };
964
965        let range = cond
966            .range
967            .flat_map(true, |datum| match datum {
968                Ok(Datum::True) => then.range.clone(),
969                Ok(Datum::False) => els.range.clone(),
970                _ => ResultSpec::fails(),
971            })
972            .intersect(ResultSpec::has_type(&col_type, true));
973
974        ColumnSpec { col_type, range }
975    }
976}
977
978/// An interpreter that returns whether or not a particular expression is "pushdownable".
979/// Broadly speaking, an expression is pushdownable if the result of evaluating the expression
980/// depends on the range of possible column values in a way that `ColumnSpecs` is able to reason about.
981///
982/// In practice, we internally need to distinguish between expressions that are trivially predicable
983/// (because they're constant) and expressions that depend on the column ranges themselves.
984/// See the [TraceSummary] variants for those distinctions, and [TraceSummary::pushdownable] for
985/// the overall assessment.
986#[derive(Debug)]
987pub struct Trace;
988
989/// A summary type for the [Trace] interpreter.
990///
991/// The ordering of this type is meaningful: the "smaller" the summary, the more information we have
992/// about the possible values of the expression. This means we can eg. use `max` in the
993/// interpreter below to find the summary for a function-call expression based on the summaries
994/// of its arguments.
995#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Ord, Eq)]
996pub enum TraceSummary {
997    /// The expression is constant: we can evaluate it without any runtime information.
998    /// This corresponds to a `ResultSpec` of a single value.
999    Constant,
1000    /// The expression depends on runtime information, but in "predictable" way... ie. if we know
1001    /// the range of possible values for all columns and unmaterializable functions, we can
1002    /// predict the possible values of the output.
1003    /// This corresponds to a `ResultSpec` of a perhaps range of values.
1004    Dynamic,
1005    /// The expression depends on runtime information in an unpredictable way.
1006    /// This corresponds to a `ResultSpec::value_all()` or something similarly vague.
1007    Unknown,
1008}
1009
1010impl TraceSummary {
1011    /// We say that a function is "pushdownable" for a particular
1012    /// argument if `ColumnSpecs` can determine the spec of the function's output given the input spec for
1013    /// that argument. (In practice, this is true when either the function is monotone in that argument
1014    /// or it's been special-cased in the interpreter.)
1015    fn apply_fn(self, pushdownable: bool) -> Self {
1016        match self {
1017            TraceSummary::Constant => TraceSummary::Constant,
1018            TraceSummary::Dynamic => match pushdownable {
1019                true => TraceSummary::Dynamic,
1020                false => TraceSummary::Unknown,
1021            },
1022            TraceSummary::Unknown => TraceSummary::Unknown,
1023        }
1024    }
1025
1026    /// We say that an expression is "pushdownable" if it's either constant or dynamic.
1027    pub fn pushdownable(self) -> bool {
1028        match self {
1029            TraceSummary::Constant | TraceSummary::Dynamic => true,
1030            TraceSummary::Unknown => false,
1031        }
1032    }
1033}
1034
1035impl Interpreter for Trace {
1036    type Summary = TraceSummary;
1037
1038    fn column(&self, _id: usize) -> Self::Summary {
1039        TraceSummary::Dynamic
1040    }
1041
1042    fn literal(&self, _result: &Result<Row, EvalError>, _col_type: &ColumnType) -> Self::Summary {
1043        TraceSummary::Constant
1044    }
1045
1046    fn unmaterializable(&self, _func: &UnmaterializableFunc) -> Self::Summary {
1047        TraceSummary::Dynamic
1048    }
1049
1050    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary {
1051        let pushdownable = match SpecialUnary::for_func(func) {
1052            None => func.is_monotone(),
1053            Some(special) => special.pushdownable,
1054        };
1055        expr.apply_fn(pushdownable)
1056    }
1057
1058    fn binary(
1059        &self,
1060        func: &BinaryFunc,
1061        left: Self::Summary,
1062        right: Self::Summary,
1063    ) -> Self::Summary {
1064        let (left_pushdownable, right_pushdownable) = match SpecialBinary::for_func(func) {
1065            None => func.is_monotone(),
1066            Some(special) => special.pushdownable,
1067        };
1068        left.apply_fn(left_pushdownable)
1069            .max(right.apply_fn(right_pushdownable))
1070    }
1071
1072    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary {
1073        if !func.is_associative() && exprs.len() >= ColumnSpecs::MAX_EVAL_ARGS {
1074            // We can't efficiently evaluate functions with very large argument lists;
1075            // see the comment on ColumnSpecs::MAX_EVAL_ARGS for details.
1076            return TraceSummary::Unknown;
1077        }
1078
1079        let pushdownable_fn = func.is_monotone();
1080        exprs
1081            .into_iter()
1082            .map(|pushdownable_arg| pushdownable_arg.apply_fn(pushdownable_fn))
1083            .max()
1084            .unwrap_or(TraceSummary::Constant)
1085    }
1086
1087    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
1088        // We don't actually need to be able to predict the condition precisely to predict the output,
1089        // since we can union the ranges of the two branches for a conservative estimate.
1090        let cond = cond.min(TraceSummary::Dynamic);
1091        cond.max(then).max(els)
1092    }
1093}
1094
1095#[cfg(test)]
1096mod tests {
1097    use itertools::Itertools;
1098    use mz_repr::adt::datetime::DateTimeUnits;
1099    use mz_repr::{Datum, PropDatum, RowArena, ScalarType};
1100    use proptest::prelude::*;
1101    use proptest::sample::{Index, select};
1102
1103    use crate::func::*;
1104    use crate::{BinaryFunc, MirScalarExpr, UnaryFunc};
1105
1106    use super::*;
1107
1108    #[derive(Debug)]
1109    struct ExpressionData {
1110        relation_type: RelationType,
1111        specs: Vec<ResultSpec<'static>>,
1112        rows: Vec<Row>,
1113        expr: MirScalarExpr,
1114    }
1115
1116    // Currently there's no good way to check whether a particular function accepts a particular
1117    // type as argument, which means we need to list everything out explicitly here. Restrict our interest
1118    // to a reasonable number of functions, to keep things tractable
1119    // TODO: replace this with function-level info once it's available.
1120    const NUM_TYPE: ScalarType = ScalarType::Numeric { max_scale: None };
1121    static SCALAR_TYPES: &[ScalarType] = &[
1122        ScalarType::Bool,
1123        ScalarType::Jsonb,
1124        NUM_TYPE,
1125        ScalarType::Date,
1126        ScalarType::Timestamp { precision: None },
1127        ScalarType::MzTimestamp,
1128        ScalarType::String,
1129    ];
1130
1131    const INTERESTING_UNARY_FUNCS: &[UnaryFunc] = {
1132        &[
1133            UnaryFunc::CastNumericToMzTimestamp(CastNumericToMzTimestamp),
1134            UnaryFunc::NegNumeric(NegNumeric),
1135            UnaryFunc::CastJsonbToNumeric(CastJsonbToNumeric(None)),
1136            UnaryFunc::CastJsonbToBool(CastJsonbToBool),
1137            UnaryFunc::CastJsonbToString(CastJsonbToString),
1138            UnaryFunc::DateTruncTimestamp(DateTruncTimestamp(DateTimeUnits::Epoch)),
1139            UnaryFunc::ExtractTimestamp(ExtractTimestamp(DateTimeUnits::Epoch)),
1140            UnaryFunc::ExtractDate(ExtractDate(DateTimeUnits::Epoch)),
1141            UnaryFunc::Not(Not),
1142            UnaryFunc::IsNull(IsNull),
1143            UnaryFunc::IsFalse(IsFalse),
1144            UnaryFunc::TryParseMonotonicIso8601Timestamp(TryParseMonotonicIso8601Timestamp),
1145        ]
1146    };
1147
1148    fn unary_typecheck(func: &UnaryFunc, arg: &ColumnType) -> bool {
1149        use UnaryFunc::*;
1150        match func {
1151            CastNumericToMzTimestamp(_) | NegNumeric(_) => arg.scalar_type.base_eq(&NUM_TYPE),
1152            CastJsonbToNumeric(_) | CastJsonbToBool(_) | CastJsonbToString(_) => {
1153                arg.scalar_type.base_eq(&ScalarType::Jsonb)
1154            }
1155            ExtractTimestamp(_) | DateTruncTimestamp(_) => arg
1156                .scalar_type
1157                .base_eq(&ScalarType::Timestamp { precision: None }),
1158            ExtractDate(_) => arg.scalar_type.base_eq(&ScalarType::Date),
1159            Not(_) => arg.scalar_type.base_eq(&ScalarType::Bool),
1160            IsNull(_) => true,
1161            TryParseMonotonicIso8601Timestamp(_) => arg.scalar_type.base_eq(&ScalarType::String),
1162            _ => false,
1163        }
1164    }
1165
1166    const INTERESTING_BINARY_FUNCS: &[BinaryFunc] = {
1167        use BinaryFunc::*;
1168        &[
1169            AddTimestampInterval,
1170            AddNumeric,
1171            SubNumeric,
1172            MulNumeric,
1173            DivNumeric,
1174            Eq,
1175            Lt,
1176            Gt,
1177            Lte,
1178            Gte,
1179            DateTruncTimestamp,
1180            JsonbGetString,
1181            JsonbGetStringStringify,
1182        ]
1183    };
1184
1185    fn binary_typecheck(func: &BinaryFunc, arg0: &ColumnType, arg1: &ColumnType) -> bool {
1186        use BinaryFunc::*;
1187        match func {
1188            AddTimestampInterval => {
1189                arg0.scalar_type
1190                    .base_eq(&ScalarType::Timestamp { precision: None })
1191                    && arg1.scalar_type.base_eq(&ScalarType::Interval)
1192            }
1193            AddNumeric | SubNumeric | MulNumeric | DivNumeric => {
1194                arg0.scalar_type.base_eq(&NUM_TYPE) && arg1.scalar_type.base_eq(&NUM_TYPE)
1195            }
1196            Eq | Lt | Gt | Lte | Gte => arg0.scalar_type.base_eq(&arg1.scalar_type),
1197            DateTruncTimestamp => {
1198                arg0.scalar_type.base_eq(&ScalarType::String)
1199                    && arg1
1200                        .scalar_type
1201                        .base_eq(&ScalarType::Timestamp { precision: None })
1202            }
1203            JsonbGetString | JsonbGetStringStringify => {
1204                arg0.scalar_type.base_eq(&ScalarType::Jsonb)
1205                    && arg1.scalar_type.base_eq(&ScalarType::String)
1206            }
1207            _ => false,
1208        }
1209    }
1210
1211    const INTERESTING_VARIADIC_FUNCS: &[VariadicFunc] = {
1212        use VariadicFunc::*;
1213        &[Coalesce, Greatest, Least, And, Or, Concat, ConcatWs]
1214    };
1215
1216    fn variadic_typecheck(func: &VariadicFunc, args: &[ColumnType]) -> bool {
1217        use VariadicFunc::*;
1218        fn all_eq<'a>(iter: impl IntoIterator<Item = &'a ColumnType>, other: &ScalarType) -> bool {
1219            iter.into_iter().all(|t| t.scalar_type.base_eq(other))
1220        }
1221        match func {
1222            Coalesce | Greatest | Least => match args {
1223                [] => true,
1224                [first, rest @ ..] => all_eq(rest, &first.scalar_type),
1225            },
1226            And | Or => all_eq(args, &ScalarType::Bool),
1227            Concat => all_eq(args, &ScalarType::String),
1228            ConcatWs => args.len() > 1 && all_eq(args, &ScalarType::String),
1229            _ => false,
1230        }
1231    }
1232
1233    fn gen_datums_for_type(typ: &ColumnType) -> BoxedStrategy<Datum<'static>> {
1234        let mut values: Vec<Datum<'static>> = typ.scalar_type.interesting_datums().collect();
1235        if typ.nullable {
1236            values.push(Datum::Null)
1237        }
1238        select(values).boxed()
1239    }
1240
1241    fn gen_column() -> impl Strategy<Value = (ColumnType, Datum<'static>, ResultSpec<'static>)> {
1242        let col_type = (select(SCALAR_TYPES), any::<bool>())
1243            .prop_map(|(t, b)| t.nullable(b))
1244            .prop_filter("need at least one value", |c| {
1245                c.scalar_type.interesting_datums().count() > 0
1246            });
1247
1248        let result_spec = select(vec![
1249            ResultSpec::nothing(),
1250            ResultSpec::null(),
1251            ResultSpec::anything(),
1252            ResultSpec::value_all(),
1253        ]);
1254
1255        (col_type, result_spec).prop_flat_map(|(col, result_spec)| {
1256            gen_datums_for_type(&col).prop_map(move |datum| {
1257                let result_spec = result_spec.clone().union(ResultSpec::value(datum));
1258                (col.clone(), datum, result_spec)
1259            })
1260        })
1261    }
1262
1263    fn gen_expr_for_relation(
1264        relation: &RelationType,
1265    ) -> BoxedStrategy<(MirScalarExpr, ColumnType)> {
1266        let column_gen = {
1267            let column_types = relation.column_types.clone();
1268            any::<Index>()
1269                .prop_map(move |idx| {
1270                    let id = idx.index(column_types.len());
1271                    (MirScalarExpr::Column(id), column_types[id].clone())
1272                })
1273                .boxed()
1274        };
1275
1276        let literal_gen = (select(SCALAR_TYPES), any::<bool>())
1277            .prop_map(|(s, b)| s.nullable(b))
1278            .prop_flat_map(|ct| {
1279                let error_gen = any::<EvalError>().prop_map(Err).boxed();
1280                let value_gen = gen_datums_for_type(&ct)
1281                    .prop_map(move |datum| Ok(Row::pack_slice(&[datum])))
1282                    .boxed();
1283                error_gen.prop_union(value_gen).prop_map(move |result| {
1284                    (MirScalarExpr::Literal(result, ct.clone()), ct.clone())
1285                })
1286            })
1287            .boxed();
1288
1289        column_gen
1290            .prop_union(literal_gen)
1291            .prop_recursive(4, 64, 8, |self_gen| {
1292                let unary_gen = (select(INTERESTING_UNARY_FUNCS), self_gen.clone())
1293                    .prop_filter_map("unary func", |(func, (expr_in, type_in))| {
1294                        if !unary_typecheck(&func, &type_in) {
1295                            return None;
1296                        }
1297                        let type_out = func.output_type(type_in);
1298                        let expr_out = MirScalarExpr::CallUnary {
1299                            func,
1300                            expr: Box::new(expr_in),
1301                        };
1302                        Some((expr_out, type_out))
1303                    })
1304                    .boxed();
1305                let binary_gen = (
1306                    select(INTERESTING_BINARY_FUNCS),
1307                    self_gen.clone(),
1308                    self_gen.clone(),
1309                )
1310                    .prop_filter_map(
1311                        "binary func",
1312                        |(func, (expr_left, type_left), (expr_right, type_right))| {
1313                            if !binary_typecheck(&func, &type_left, &type_right) {
1314                                return None;
1315                            }
1316                            let type_out = func.output_type(type_left, type_right);
1317                            let expr_out = MirScalarExpr::CallBinary {
1318                                func,
1319                                expr1: Box::new(expr_left),
1320                                expr2: Box::new(expr_right),
1321                            };
1322                            Some((expr_out, type_out))
1323                        },
1324                    )
1325                    .boxed();
1326                let variadic_gen = (
1327                    select(INTERESTING_VARIADIC_FUNCS),
1328                    prop::collection::vec(self_gen, 1..4),
1329                )
1330                    .prop_filter_map("variadic func", |(func, exprs)| {
1331                        let (exprs_in, type_in): (_, Vec<_>) = exprs.into_iter().unzip();
1332                        if !variadic_typecheck(&func, &type_in) {
1333                            return None;
1334                        }
1335                        let type_out = func.output_type(type_in);
1336                        let expr_out = MirScalarExpr::CallVariadic {
1337                            func,
1338                            exprs: exprs_in,
1339                        };
1340                        Some((expr_out, type_out))
1341                    })
1342                    .boxed();
1343
1344                unary_gen
1345                    .prop_union(binary_gen)
1346                    .boxed()
1347                    .prop_union(variadic_gen)
1348            })
1349            .boxed()
1350    }
1351
1352    fn gen_expr_data() -> impl Strategy<Value = ExpressionData> {
1353        let columns = prop::collection::vec(gen_column(), 1..10);
1354        columns.prop_flat_map(|data| {
1355            let (columns, datums, specs): (Vec<_>, Vec<_>, Vec<_>) = data.into_iter().multiunzip();
1356            let relation = RelationType::new(columns);
1357            let row = Row::pack_slice(&datums);
1358            gen_expr_for_relation(&relation).prop_map(move |(expr, _)| ExpressionData {
1359                relation_type: relation.clone(),
1360                specs: specs.clone(),
1361                rows: vec![row.clone()],
1362                expr,
1363            })
1364        })
1365    }
1366
1367    #[mz_ore::test]
1368    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decContextDefault` on OS `linux`
1369    fn test_trivial_spec_matches() {
1370        fn check(datum: PropDatum) -> Result<(), TestCaseError> {
1371            let datum: Datum = (&datum).into();
1372            let spec = if datum.is_null() {
1373                ResultSpec::null()
1374            } else {
1375                ResultSpec::value(datum)
1376            };
1377            assert!(spec.may_contain(datum));
1378            Ok(())
1379        }
1380
1381        proptest!(|(datum in mz_repr::arb_datum())| {
1382            check(datum)?;
1383        });
1384
1385        assert!(ResultSpec::fails().may_fail());
1386    }
1387
1388    #[mz_ore::test]
1389    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decContextDefault` on OS `linux`
1390    fn test_equivalence() {
1391        fn check(data: ExpressionData) -> Result<(), TestCaseError> {
1392            let ExpressionData {
1393                relation_type,
1394                specs,
1395                rows,
1396                expr,
1397            } = data;
1398
1399            // We want to ensure that the spec we get when evaluating an expression using
1400            // `ColumnSpecs` always contains the _actual_ value of that column when evaluated with
1401            // eval. (This is an important correctness property of abstract interpretation.)
1402            let arena = RowArena::new();
1403            let mut interpreter = ColumnSpecs::new(&relation_type, &arena);
1404            for (id, spec) in specs.into_iter().enumerate() {
1405                interpreter.push_column(id, spec);
1406            }
1407
1408            let spec = interpreter.expr(&expr);
1409
1410            for row in &rows {
1411                let datums: Vec<_> = row.iter().collect();
1412                let eval_result = expr.eval(&datums, &arena);
1413                match eval_result {
1414                    Ok(value) => {
1415                        assert!(spec.range.may_contain(value))
1416                    }
1417                    Err(_) => {
1418                        assert!(spec.range.may_fail());
1419                    }
1420                }
1421            }
1422
1423            Ok(())
1424        }
1425
1426        proptest!(|(data in gen_expr_data())| {
1427            check(data)?;
1428        });
1429    }
1430
1431    #[mz_ore::test]
1432    fn test_mfp() {
1433        // Regression test for https://github.com/MaterializeInc/database-issues/issues/5736
1434        use MirScalarExpr::*;
1435
1436        let mfp = MapFilterProject {
1437            expressions: vec![],
1438            predicates: vec![
1439                // Always fails on the known input range
1440                (
1441                    1,
1442                    CallUnary {
1443                        func: UnaryFunc::IsNull(IsNull),
1444                        expr: Box::new(CallBinary {
1445                            func: BinaryFunc::MulInt32,
1446                            expr1: Box::new(Column(0)),
1447                            expr2: Box::new(Column(0)),
1448                        }),
1449                    },
1450                ),
1451                // Always returns false on the known input range
1452                (
1453                    1,
1454                    CallBinary {
1455                        func: BinaryFunc::Eq,
1456                        expr1: Box::new(Column(0)),
1457                        expr2: Box::new(Literal(
1458                            Ok(Row::pack_slice(&[Datum::Int32(1727694505)])),
1459                            ScalarType::Int32.nullable(false),
1460                        )),
1461                    },
1462                ),
1463            ],
1464            projection: vec![],
1465            input_arity: 1,
1466        };
1467
1468        let relation = RelationType::new(vec![ScalarType::Int32.nullable(true)]);
1469        let arena = RowArena::new();
1470        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1471        interpreter.push_column(0, ResultSpec::value(Datum::Int32(-1294725158)));
1472        let spec = interpreter.mfp_filter(&mfp);
1473        assert!(spec.range.may_fail());
1474    }
1475
1476    #[mz_ore::test]
1477    fn test_concat() {
1478        let expr = MirScalarExpr::CallVariadic {
1479            func: VariadicFunc::Concat,
1480            exprs: vec![
1481                MirScalarExpr::Column(0),
1482                MirScalarExpr::literal_ok(Datum::String("a"), ScalarType::String),
1483                MirScalarExpr::literal_ok(Datum::String("b"), ScalarType::String),
1484            ],
1485        };
1486
1487        let relation = RelationType::new(vec![ScalarType::String.nullable(false)]);
1488        let arena = RowArena::new();
1489        let interpreter = ColumnSpecs::new(&relation, &arena);
1490        let spec = interpreter.expr(&expr);
1491        assert!(spec.range.may_contain(Datum::String("blab")));
1492    }
1493
1494    #[mz_ore::test]
1495    fn test_eval_range() {
1496        // Example inspired by the tumbling windows temporal filter in the docs
1497        let period_ms = MirScalarExpr::Literal(
1498            Ok(Row::pack_slice(&[Datum::Int64(10)])),
1499            ScalarType::Int64.nullable(false),
1500        );
1501        let expr = MirScalarExpr::CallBinary {
1502            func: BinaryFunc::Gte,
1503            expr1: Box::new(MirScalarExpr::CallUnmaterializable(
1504                UnmaterializableFunc::MzNow,
1505            )),
1506            expr2: Box::new(MirScalarExpr::CallUnary {
1507                func: UnaryFunc::CastInt64ToMzTimestamp(CastInt64ToMzTimestamp),
1508                expr: Box::new(MirScalarExpr::CallBinary {
1509                    func: BinaryFunc::MulInt64,
1510                    expr1: Box::new(period_ms.clone()),
1511                    expr2: Box::new(MirScalarExpr::CallBinary {
1512                        func: BinaryFunc::DivInt64,
1513                        expr1: Box::new(MirScalarExpr::Column(0)),
1514                        expr2: Box::new(period_ms),
1515                    }),
1516                }),
1517            }),
1518        };
1519        let relation = RelationType::new(vec![ScalarType::Int64.nullable(false)]);
1520
1521        {
1522            // Non-overlapping windows
1523            let arena = RowArena::new();
1524            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1525            interpreter.push_unmaterializable(
1526                UnmaterializableFunc::MzNow,
1527                ResultSpec::value_between(
1528                    Datum::MzTimestamp(10.into()),
1529                    Datum::MzTimestamp(20.into()),
1530                ),
1531            );
1532            interpreter.push_column(0, ResultSpec::value_between(30i64.into(), 40i64.into()));
1533
1534            let range_out = interpreter.expr(&expr).range;
1535            assert!(range_out.may_contain(Datum::False));
1536            assert!(!range_out.may_contain(Datum::True));
1537            assert!(!range_out.may_contain(Datum::Null));
1538            assert!(!range_out.may_fail());
1539        }
1540
1541        {
1542            // Overlapping windows
1543            let arena = RowArena::new();
1544            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1545            interpreter.push_unmaterializable(
1546                UnmaterializableFunc::MzNow,
1547                ResultSpec::value_between(
1548                    Datum::MzTimestamp(10.into()),
1549                    Datum::MzTimestamp(35.into()),
1550                ),
1551            );
1552            interpreter.push_column(0, ResultSpec::value_between(30i64.into(), 40i64.into()));
1553
1554            let range_out = interpreter.expr(&expr).range;
1555            assert!(range_out.may_contain(Datum::False));
1556            assert!(range_out.may_contain(Datum::True));
1557            assert!(!range_out.may_contain(Datum::Null));
1558            assert!(!range_out.may_fail());
1559        }
1560    }
1561
1562    #[mz_ore::test]
1563    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decNumberFromInt32` on OS `linux`
1564    fn test_jsonb() {
1565        let arena = RowArena::new();
1566
1567        let expr = MirScalarExpr::CallUnary {
1568            func: UnaryFunc::CastJsonbToNumeric(CastJsonbToNumeric(None)),
1569            expr: Box::new(MirScalarExpr::CallBinary {
1570                func: BinaryFunc::JsonbGetString,
1571                expr1: Box::new(MirScalarExpr::Column(0)),
1572                expr2: Box::new(MirScalarExpr::Literal(
1573                    Ok(Row::pack_slice(&["ts".into()])),
1574                    ScalarType::String.nullable(false),
1575                )),
1576            }),
1577        };
1578
1579        let relation = RelationType::new(vec![ScalarType::Jsonb.nullable(true)]);
1580        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1581        interpreter.push_column(
1582            0,
1583            ResultSpec::map_spec(
1584                [(
1585                    "ts".into(),
1586                    ResultSpec::value_between(
1587                        Datum::Numeric(100.into()),
1588                        Datum::Numeric(300.into()),
1589                    ),
1590                )]
1591                .into_iter()
1592                .collect(),
1593            ),
1594        );
1595
1596        let range_out = interpreter.expr(&expr).range;
1597        assert!(!range_out.may_contain(Datum::Numeric(0.into())));
1598        assert!(range_out.may_contain(Datum::Numeric(200.into())));
1599        assert!(!range_out.may_contain(Datum::Numeric(400.into())));
1600    }
1601
1602    #[mz_ore::test]
1603    fn test_like() {
1604        let arena = RowArena::new();
1605
1606        let expr = MirScalarExpr::CallUnary {
1607            func: UnaryFunc::IsLikeMatch(IsLikeMatch(
1608                crate::like_pattern::compile("%whatever%", true).unwrap(),
1609            )),
1610            expr: Box::new(MirScalarExpr::Column(0)),
1611        };
1612
1613        let relation = RelationType::new(vec![ScalarType::String.nullable(true)]);
1614        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1615        interpreter.push_column(
1616            0,
1617            ResultSpec::value_between(Datum::String("aardvark"), Datum::String("zebra")),
1618        );
1619
1620        let range_out = interpreter.expr(&expr).range;
1621        assert!(
1622            !range_out.fallible,
1623            "like function should not error on non-error input"
1624        );
1625        assert!(range_out.may_contain(Datum::True));
1626        assert!(range_out.may_contain(Datum::False));
1627        assert!(range_out.may_contain(Datum::Null));
1628    }
1629
1630    #[mz_ore::test]
1631    fn test_try_parse_monotonic_iso8601_timestamp() {
1632        use chrono::NaiveDateTime;
1633
1634        let arena = RowArena::new();
1635
1636        let expr = MirScalarExpr::CallUnary {
1637            func: UnaryFunc::TryParseMonotonicIso8601Timestamp(TryParseMonotonicIso8601Timestamp),
1638            expr: Box::new(MirScalarExpr::Column(0)),
1639        };
1640
1641        let relation = RelationType::new(vec![ScalarType::String.nullable(true)]);
1642        // Test the case where we have full timestamps as bounds.
1643        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1644        interpreter.push_column(
1645            0,
1646            ResultSpec::value_between(
1647                Datum::String("2024-01-11T00:00:00.000Z"),
1648                Datum::String("2024-01-11T20:00:00.000Z"),
1649            ),
1650        );
1651
1652        let timestamp = |ts| {
1653            Datum::Timestamp(
1654                NaiveDateTime::parse_from_str(ts, "%Y-%m-%dT%H:%M:%S")
1655                    .unwrap()
1656                    .try_into()
1657                    .unwrap(),
1658            )
1659        };
1660
1661        let range_out = interpreter.expr(&expr).range;
1662        assert!(!range_out.fallible);
1663        assert!(range_out.nullable);
1664        assert!(!range_out.may_contain(timestamp("2024-01-10T10:00:00")));
1665        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
1666        assert!(!range_out.may_contain(timestamp("2024-01-12T10:00:00")));
1667
1668        // Test the case where we have truncated / useless bounds.
1669        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1670        interpreter.push_column(
1671            0,
1672            ResultSpec::value_between(Datum::String("2024-01-1"), Datum::String("2024-01-2")),
1673        );
1674
1675        let range_out = interpreter.expr(&expr).range;
1676        assert!(!range_out.fallible);
1677        assert!(range_out.nullable);
1678        assert!(range_out.may_contain(timestamp("2024-01-10T10:00:00")));
1679        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
1680        assert!(range_out.may_contain(timestamp("2024-01-12T10:00:00")));
1681
1682        // Test the case where only one bound is truncated
1683        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1684        interpreter.push_column(
1685            0,
1686            ResultSpec::value_between(
1687                Datum::String("2024-01-1"),
1688                Datum::String("2024-01-12T10:00:00"),
1689            )
1690            .union(ResultSpec::null()),
1691        );
1692
1693        let range_out = interpreter.expr(&expr).range;
1694        assert!(!range_out.fallible);
1695        assert!(range_out.nullable);
1696        assert!(range_out.may_contain(timestamp("2024-01-10T10:00:00")));
1697        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
1698        assert!(range_out.may_contain(timestamp("2024-01-12T10:00:00")));
1699
1700        // Test the case where the upper and lower bound are identical
1701        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1702        interpreter.push_column(
1703            0,
1704            ResultSpec::value_between(
1705                Datum::String("2024-01-11T10:00:00.000Z"),
1706                Datum::String("2024-01-11T10:00:00.000Z"),
1707            ),
1708        );
1709
1710        let range_out = interpreter.expr(&expr).range;
1711        assert!(!range_out.fallible);
1712        assert!(!range_out.nullable);
1713        assert!(!range_out.may_contain(timestamp("2024-01-10T10:00:00")));
1714        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
1715        assert!(!range_out.may_contain(timestamp("2024-01-12T10:00:00")));
1716    }
1717
1718    #[mz_ore::test]
1719    fn test_inequality() {
1720        let arena = RowArena::new();
1721
1722        let expr = MirScalarExpr::CallBinary {
1723            func: BinaryFunc::Gte,
1724            expr1: Box::new(MirScalarExpr::Column(0)),
1725            expr2: Box::new(MirScalarExpr::CallUnmaterializable(
1726                UnmaterializableFunc::MzNow,
1727            )),
1728        };
1729
1730        let relation = RelationType::new(vec![ScalarType::MzTimestamp.nullable(true)]);
1731        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1732        interpreter.push_column(
1733            0,
1734            ResultSpec::value_between(
1735                Datum::MzTimestamp(1704736444949u64.into()),
1736                Datum::MzTimestamp(1704736444949u64.into()),
1737            )
1738            .union(ResultSpec::null()),
1739        );
1740        interpreter.push_unmaterializable(
1741            UnmaterializableFunc::MzNow,
1742            ResultSpec::value_between(
1743                Datum::MzTimestamp(1704738791000u64.into()),
1744                Datum::MzTimestamp(18446744073709551615u64.into()),
1745            ),
1746        );
1747
1748        let range_out = interpreter.expr(&expr).range;
1749        assert!(
1750            !range_out.fallible,
1751            "<= function should not error on non-error input"
1752        );
1753        assert!(!range_out.may_contain(Datum::True));
1754        assert!(range_out.may_contain(Datum::False));
1755        assert!(range_out.may_contain(Datum::Null));
1756    }
1757
1758    #[mz_ore::test]
1759    fn test_trace() {
1760        use super::Trace;
1761
1762        let expr = MirScalarExpr::CallBinary {
1763            func: BinaryFunc::Gte,
1764            expr1: Box::new(MirScalarExpr::Column(0)),
1765            expr2: Box::new(MirScalarExpr::CallBinary {
1766                func: BinaryFunc::AddInt64,
1767                expr1: Box::new(MirScalarExpr::Column(1)),
1768                expr2: Box::new(MirScalarExpr::CallUnary {
1769                    func: UnaryFunc::NegInt64(NegInt64),
1770                    expr: Box::new(MirScalarExpr::Column(3)),
1771                }),
1772            }),
1773        };
1774        let summary = Trace.expr(&expr);
1775        assert!(summary.pushdownable());
1776    }
1777}