mz_expr/
interpret.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::collections::BTreeMap;
11use std::fmt::Debug;
12
13use mz_repr::{Datum, Row, RowArena, SqlColumnType, SqlRelationType, SqlScalarType};
14
15use crate::{
16    BinaryFunc, EvalError, MapFilterProject, MfpPlan, MirScalarExpr, UnaryFunc,
17    UnmaterializableFunc, VariadicFunc, func,
18};
19
20/// An inclusive range of non-null datum values.
21#[derive(Clone, Eq, PartialEq, Debug)]
22enum Values<'a> {
23    /// This range contains no values.
24    Empty,
25    /// An inclusive range. Invariant: the first element is always <= the second.
26    // TODO: a variant for small sets of data would avoid losing precision here.
27    Within(Datum<'a>, Datum<'a>),
28    /// Constraints on structured fields, useful for recursive structures like maps.
29    /// Fields that are not present in the map default to Values::All.
30    // TODO: consider using this variant, or similar, for Datum::List.
31    Nested(BTreeMap<Datum<'a>, ResultSpec<'a>>),
32    /// This range might contain any value. Since we're overapproximating, this is often used
33    /// as a "safe fallback" when we can't determine the right boundaries for a range.
34    All,
35}
36
37impl<'a> Values<'a> {
38    fn just(a: Datum<'a>) -> Values<'a> {
39        match a {
40            Datum::Map(datum_map) => Values::Nested(
41                datum_map
42                    .iter()
43                    .map(|(key, val)| (key.into(), ResultSpec::value(val)))
44                    .collect(),
45            ),
46            other => Self::Within(other, other),
47        }
48    }
49
50    fn union(self, other: Values<'a>) -> Values<'a> {
51        match (self, other) {
52            (Values::Empty, r) => r,
53            (r, Values::Empty) => r,
54            (Values::Within(a0, a1), Values::Within(b0, b1)) => {
55                Values::Within(a0.min(b0), a1.max(b1))
56            }
57            (Values::Nested(mut a), Values::Nested(mut b)) => {
58                a.retain(|datum, values| {
59                    if let Some(other_values) = b.remove(datum) {
60                        *values = values.clone().union(other_values);
61                    }
62                    *values != ResultSpec::anything()
63                });
64                if a.is_empty() {
65                    Values::All
66                } else {
67                    Values::Nested(a)
68                }
69            }
70            _ => Values::All,
71        }
72    }
73
74    fn intersect(self, other: Values<'a>) -> Values<'a> {
75        match (self, other) {
76            (Values::Empty, _) => Values::Empty,
77            (_, Values::Empty) => Values::Empty,
78            (Values::Within(a0, a1), Values::Within(b0, b1)) => {
79                let min = a0.max(b0);
80                let max = a1.min(b1);
81                if min <= max {
82                    Values::Within(min, max)
83                } else {
84                    Values::Empty
85                }
86            }
87            (Values::Nested(mut a), Values::Nested(b)) => {
88                for (datum, other_spec) in b {
89                    let spec = a.entry(datum).or_insert_with(ResultSpec::anything);
90                    *spec = spec.clone().intersect(other_spec);
91                }
92                Values::Nested(a)
93            }
94            (Values::All, v) => v,
95            (v, Values::All) => v,
96            (Values::Nested(_), Values::Within(_, _)) => Values::Empty,
97            (Values::Within(_, _), Values::Nested(_)) => Values::Empty,
98        }
99    }
100
101    fn may_contain(&self, value: Datum<'a>) -> bool {
102        match self {
103            Values::Empty => false,
104            Values::Within(min, max) => *min <= value && value <= *max,
105            Values::All => true,
106            Values::Nested(field_map) => match value {
107                Datum::Map(datum_map) => {
108                    datum_map
109                        .iter()
110                        .all(|(key, val)| match field_map.get(&key.into()) {
111                            None => true,
112                            Some(nested) => nested.may_contain(val),
113                        })
114                }
115                _ => false,
116            },
117        }
118    }
119}
120
121/// An approximation of the set of values an expression might have, including whether or not it
122/// might be null or an error value. This is generally an _overapproximation_, in the sense that
123/// [ResultSpec::may_contain] may return true even if the argument is not included in the set.
124/// (However, it should never return false when the value _is_ included!)
125#[derive(Debug, Clone, Eq, PartialEq)]
126pub struct ResultSpec<'a> {
127    /// True if the expression may evaluate to [Datum::Null].
128    nullable: bool,
129    /// True if the expression may evaluate to an error.
130    fallible: bool,
131    /// The range of possible (non-null) values that the expression may evaluate to.
132    values: Values<'a>,
133}
134
135impl<'a> ResultSpec<'a> {
136    /// No results match this spec. (For example, an empty table.)
137    pub fn nothing() -> Self {
138        ResultSpec {
139            nullable: false,
140            fallible: false,
141            values: Values::Empty,
142        }
143    }
144
145    /// Every result matches this spec.
146    pub fn anything() -> Self {
147        ResultSpec {
148            nullable: true,
149            fallible: true,
150            values: Values::All,
151        }
152    }
153
154    /// Every result matches this spec.
155    pub fn any_infallible() -> Self {
156        ResultSpec {
157            nullable: true,
158            fallible: false,
159            values: Values::All,
160        }
161    }
162
163    /// A spec that only matches null.
164    pub fn null() -> Self {
165        ResultSpec {
166            nullable: true,
167            ..Self::nothing()
168        }
169    }
170
171    /// A spec that only matches error values.
172    pub fn fails() -> Self {
173        ResultSpec {
174            fallible: true,
175            ..Self::nothing()
176        }
177    }
178
179    /// A spec that matches all values of a given type.
180    pub fn has_type(col: &SqlColumnType, fallible: bool) -> ResultSpec<'a> {
181        let values = match &col.scalar_type {
182            SqlScalarType::Bool => Values::Within(Datum::False, Datum::True),
183            // TODO: add bounds for other bounded types, like integers
184            _ => Values::All,
185        };
186        ResultSpec {
187            nullable: col.nullable,
188            fallible,
189            values,
190        }
191    }
192
193    /// A spec that only matches the given value.
194    pub fn value(value: Datum<'a>) -> ResultSpec<'a> {
195        match value {
196            Datum::Null => Self::null(),
197            nonnull => ResultSpec {
198                values: Values::just(nonnull),
199                ..Self::nothing()
200            },
201        }
202    }
203
204    /// A spec that matches values between the given (non-null) min and max.
205    pub fn value_between(min: Datum<'a>, max: Datum<'a>) -> ResultSpec<'a> {
206        assert!(!min.is_null());
207        assert!(!max.is_null());
208        if min <= max {
209            ResultSpec {
210                values: Values::Within(min, max),
211                ..ResultSpec::nothing()
212            }
213        } else {
214            ResultSpec::nothing()
215        }
216    }
217
218    /// A spec that matches any non-null value.
219    pub fn value_all() -> ResultSpec<'a> {
220        ResultSpec {
221            values: Values::All,
222            ..ResultSpec::nothing()
223        }
224    }
225
226    /// A spec that matches Datum::Maps of the given type.
227    pub fn map_spec(map: BTreeMap<Datum<'a>, ResultSpec<'a>>) -> ResultSpec<'a> {
228        ResultSpec {
229            values: Values::Nested(map),
230            ..ResultSpec::nothing()
231        }
232    }
233
234    /// Given two specs, returns a new spec that matches anything that either original spec would match.
235    pub fn union(self, other: ResultSpec<'a>) -> ResultSpec<'a> {
236        ResultSpec {
237            nullable: self.nullable || other.nullable,
238            fallible: self.fallible || other.fallible,
239            values: self.values.union(other.values),
240        }
241    }
242
243    /// Given two specs, returns a new spec that only matches things that both original specs would match.
244    pub fn intersect(self, other: ResultSpec<'a>) -> ResultSpec<'a> {
245        ResultSpec {
246            nullable: self.nullable && other.nullable,
247            fallible: self.fallible && other.fallible,
248            values: self.values.intersect(other.values),
249        }
250    }
251
252    /// Check if a particular value matches the spec.
253    pub fn may_contain(&self, value: Datum<'a>) -> bool {
254        if value == Datum::Null {
255            return self.nullable;
256        }
257
258        self.values.may_contain(value)
259    }
260
261    /// Check if an error value matches the spec.
262    pub fn may_fail(&self) -> bool {
263        self.fallible
264    }
265
266    /// This method "maps" a function across the `ResultSpec`.
267    ///
268    /// As mentioned above, `ResultSpec` represents an approximate set of results.
269    /// If we actually stored each result in the set, `flat_map` could be implemented by passing
270    /// each result to the function one-by-one and unioning the resulting sets. This is possible
271    /// when our values set is empty or contains a single datum, but when it contains a range,
272    /// we can't enumerate all possible values of the set. We handle this by:
273    /// - tracking whether the function is monotone, in which case we can map the range by just
274    ///   mapping the endpoints;
275    /// - using a safe default when we can't infer a tighter bound on the set, eg. [Self::anything].
276    fn flat_map(
277        &self,
278        is_monotone: bool,
279        mut result_map: impl FnMut(Result<Datum<'a>, EvalError>) -> ResultSpec<'a>,
280    ) -> ResultSpec<'a> {
281        let null_spec = if self.nullable {
282            result_map(Ok(Datum::Null))
283        } else {
284            ResultSpec::nothing()
285        };
286
287        let error_spec = if self.fallible {
288            // Since we only care about whether / not an error is possible, and not the specific
289            // error, create an arbitrary error here.
290            // NOTE! This assumes that functions do not discriminate on the type of the error.
291            let map_err = result_map(Err(EvalError::Internal("".into())));
292            let raise_err = ResultSpec::fails();
293            // SQL has a very loose notion of evaluation order: https://www.postgresql.org/docs/current/sql-expressions.html#SYNTAX-EXPRESS-EVAL
294            // Here, we account for the possibility that the expression is evaluated strictly,
295            // raising the error, or that it's evaluated lazily by the result_map function
296            // (which may return a non-error result even when given an error as input).
297            raise_err.union(map_err)
298        } else {
299            ResultSpec::nothing()
300        };
301
302        let values_spec = match self.values {
303            Values::Empty => ResultSpec::nothing(),
304            // If this range contains a single datum, just call the function.
305            Values::Within(min, max) if min == max => result_map(Ok(min)),
306            // If this is a range of booleans, we know all the values... just try them.
307            Values::Within(Datum::False, Datum::True) => {
308                result_map(Ok(Datum::False)).union(result_map(Ok(Datum::True)))
309            }
310            // Otherwise, if our function is monotonic, we can try mapping the input
311            // range to an output range.
312            Values::Within(min, max) if is_monotone => {
313                let min_result = result_map(Ok(min));
314                let max_result = result_map(Ok(max));
315                match (min_result, max_result) {
316                    // If both endpoints give us a range, the result is a union of those ranges.
317                    (
318                        ResultSpec {
319                            nullable: false,
320                            fallible: false,
321                            values: a_values,
322                        },
323                        ResultSpec {
324                            nullable: false,
325                            fallible: false,
326                            values: b_values,
327                        },
328                    ) => ResultSpec {
329                        nullable: false,
330                        fallible: false,
331                        values: a_values.union(b_values),
332                    },
333                    // If both endpoints are null, we assume the whole range maps to null.
334                    (
335                        ResultSpec {
336                            nullable: true,
337                            fallible: false,
338                            values: Values::Empty,
339                        },
340                        ResultSpec {
341                            nullable: true,
342                            fallible: false,
343                            values: Values::Empty,
344                        },
345                    ) => ResultSpec::null(),
346                    // Otherwise we can't assume anything about the output.
347                    _ => ResultSpec::anything(),
348                }
349            }
350            // TODO: we could return a narrower result for eg. `Values::Nested` with all-`Within` fields.
351            Values::Within(_, _) | Values::Nested(_) | Values::All => ResultSpec::anything(),
352        };
353
354        null_spec.union(error_spec).union(values_spec)
355    }
356}
357
358/// [Abstract interpretation](https://en.wikipedia.org/wiki/Abstract_interpretation) for
359/// [MirScalarExpr].
360///
361/// [MirScalarExpr::eval] implements a "concrete interpreter" for the expression type: given an
362/// expression and specific column values as input, it returns a specific value for the output.
363/// This could be reimplemented using this trait... but it's most useful for "abstract"
364/// interpretations of the expression, where we generalize about sets of possible inputs and outputs.
365/// See [Trace] and [ColumnSpecs] for how this can be useful in practice.
366pub trait Interpreter {
367    type Summary: Clone + Debug + Sized;
368
369    /// A column of the input row.
370    fn column(&self, id: usize) -> Self::Summary;
371
372    /// A literal value.
373    /// (Stored as a row, because we can't own a Datum.)
374    fn literal(&self, result: &Result<Row, EvalError>, col_type: &SqlColumnType) -> Self::Summary;
375
376    /// A call to an unmaterializable function.
377    ///
378    /// These functions cannot be evaluated by `MirScalarExpr::eval`. They must
379    /// be transformed away by a higher layer.
380    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary;
381
382    /// A function call that takes one expression as an argument.
383    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary;
384
385    /// A function call that takes two expressions as arguments.
386    fn binary(&self, func: &BinaryFunc, left: Self::Summary, right: Self::Summary)
387    -> Self::Summary;
388
389    /// A function call that takes an arbitrary number of arguments.
390    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary;
391
392    /// Conditionally evaluated expressions.
393    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary;
394
395    /// Evaluate an entire expression, by delegating to the fine-grained methods on [Interpreter].
396    fn expr(&self, expr: &MirScalarExpr) -> Self::Summary {
397        match expr {
398            MirScalarExpr::Column(id, _name) => self.column(*id),
399            MirScalarExpr::Literal(value, col_type) => self.literal(value, col_type),
400            MirScalarExpr::CallUnmaterializable(func) => self.unmaterializable(func),
401            MirScalarExpr::CallUnary { func, expr } => {
402                let expr_range = self.expr(expr);
403                self.unary(func, expr_range)
404            }
405            MirScalarExpr::CallBinary { func, expr1, expr2 } => {
406                let expr1_range = self.expr(expr1);
407                let expr2_range = self.expr(expr2);
408                self.binary(func, expr1_range, expr2_range)
409            }
410            MirScalarExpr::CallVariadic { func, exprs } => {
411                let exprs: Vec<_> = exprs.into_iter().map(|e| self.expr(e)).collect();
412                self.variadic(func, exprs)
413            }
414            MirScalarExpr::If { cond, then, els } => {
415                let cond_range = self.expr(cond);
416                let then_range = self.expr(then);
417                let els_range = self.expr(els);
418                self.cond(cond_range, then_range, els_range)
419            }
420        }
421    }
422
423    /// Specifically, this evaluates the map and filters stages of an MFP: summarize each of the
424    /// map expressions, then `and` together all of the filters.
425    fn mfp_filter(&self, mfp: &MapFilterProject) -> Self::Summary {
426        let mfp_eval = MfpEval::new(self, mfp.input_arity, &mfp.expressions);
427        // NB: self should not be used after this point!
428        let predicates = mfp
429            .predicates
430            .iter()
431            .map(|(_, e)| mfp_eval.expr(e))
432            .collect();
433        mfp_eval.variadic(&VariadicFunc::And, predicates)
434    }
435
436    /// Similar to [Self::mfp_filter], but includes the additional temporal filters that have been
437    /// broken out.
438    fn mfp_plan_filter(&self, plan: &MfpPlan) -> Self::Summary {
439        let mfp_eval = MfpEval::new(self, plan.mfp.input_arity, &plan.mfp.expressions);
440        // NB: self should not be used after this point!
441        let mut results: Vec<_> = plan
442            .mfp
443            .predicates
444            .iter()
445            .map(|(_, e)| mfp_eval.expr(e))
446            .collect();
447        let mz_now = mfp_eval.unmaterializable(&UnmaterializableFunc::MzNow);
448        for bound in &plan.lower_bounds {
449            let bound_range = mfp_eval.expr(bound);
450            let result = mfp_eval.binary(&BinaryFunc::Lte(func::Lte), bound_range, mz_now.clone());
451            results.push(result);
452        }
453        for bound in &plan.upper_bounds {
454            let bound_range = mfp_eval.expr(bound);
455            let result = mfp_eval.binary(&BinaryFunc::Gte(func::Gte), bound_range, mz_now.clone());
456            results.push(result);
457        }
458        self.variadic(&VariadicFunc::And, results)
459    }
460}
461
462/// Wrap another interpreter, but tack a few extra columns on at the end. An internal implementation
463/// detail of `eval_mfp` and `eval_mfp_plan`.
464pub(crate) struct MfpEval<'a, E: Interpreter + ?Sized> {
465    evaluator: &'a E,
466    input_arity: usize,
467    expressions: Vec<E::Summary>,
468}
469
470impl<'a, E: Interpreter + ?Sized> MfpEval<'a, E> {
471    pub(crate) fn new(evaluator: &'a E, input_arity: usize, expressions: &[MirScalarExpr]) -> Self {
472        let mut mfp_eval = MfpEval {
473            evaluator,
474            input_arity,
475            expressions: vec![],
476        };
477        for expr in expressions {
478            let result = mfp_eval.expr(expr);
479            mfp_eval.expressions.push(result);
480        }
481        mfp_eval
482    }
483}
484
485impl<'a, E: Interpreter + ?Sized> Interpreter for MfpEval<'a, E> {
486    type Summary = E::Summary;
487
488    fn column(&self, id: usize) -> Self::Summary {
489        if id < self.input_arity {
490            self.evaluator.column(id)
491        } else {
492            self.expressions[id - self.input_arity].clone()
493        }
494    }
495
496    fn literal(&self, result: &Result<Row, EvalError>, col_type: &SqlColumnType) -> Self::Summary {
497        self.evaluator.literal(result, col_type)
498    }
499
500    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary {
501        self.evaluator.unmaterializable(func)
502    }
503
504    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary {
505        self.evaluator.unary(func, expr)
506    }
507
508    fn binary(
509        &self,
510        func: &BinaryFunc,
511        left: Self::Summary,
512        right: Self::Summary,
513    ) -> Self::Summary {
514        self.evaluator.binary(func, left, right)
515    }
516
517    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary {
518        self.evaluator.variadic(func, exprs)
519    }
520
521    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
522        self.evaluator.cond(cond, then, els)
523    }
524}
525
526/// A unary function we've added special-case handling for; including:
527/// - A three-argument function, taking and returning [ResultSpec]s. This
528///   overrides the default function-handling logic entirely.
529/// - Metadata on whether / not this function is pushdownable. See [Trace].
530struct SpecialUnary {
531    map_fn: for<'a, 'b> fn(&'b ColumnSpecs<'a>, ResultSpec<'a>) -> ResultSpec<'a>,
532    pushdownable: bool,
533}
534
535impl SpecialUnary {
536    /// Returns the special-case handling for a particular function, if it exists.
537    fn for_func(func: &UnaryFunc) -> Option<SpecialUnary> {
538        /// Eager in the same sense as `func.rs` uses the term; this assumes that
539        /// nulls and errors propagate up, and we only need to define the behaviour
540        /// on values.
541        fn eagerly<'b>(
542            spec: ResultSpec<'b>,
543            value_fn: impl FnOnce(Values<'b>) -> ResultSpec<'b>,
544        ) -> ResultSpec<'b> {
545            let result = match spec.values {
546                Values::Empty => ResultSpec::nothing(),
547                other => value_fn(other),
548            };
549            ResultSpec {
550                fallible: spec.fallible || result.fallible,
551                nullable: spec.nullable || result.nullable,
552                values: result.values,
553            }
554        }
555        match func {
556            UnaryFunc::TryParseMonotonicIso8601Timestamp(_) => Some(SpecialUnary {
557                map_fn: |specs, range| {
558                    let expr = MirScalarExpr::CallUnary {
559                        func: UnaryFunc::TryParseMonotonicIso8601Timestamp(
560                            crate::func::TryParseMonotonicIso8601Timestamp,
561                        ),
562                        expr: Box::new(MirScalarExpr::column(0)),
563                    };
564                    let eval = |d| specs.eval_result(expr.eval(&[d], specs.arena));
565
566                    eagerly(range, |values| {
567                        match values {
568                            Values::Within(a, b) if a == b => eval(a),
569                            Values::Within(a, b) => {
570                                let spec = eval(a).union(eval(b));
571                                let values_spec = if spec.nullable {
572                                    // At least one of the endpoints of the range wasn't a valid
573                                    // timestamp. We can't compute a precise range in this case.
574                                    // If we used the general is_monotone handling, that code would
575                                    // incorrectly assume the whole range mapped to null if each
576                                    // endpoint did.
577                                    ResultSpec::value_all()
578                                } else {
579                                    spec
580                                };
581                                // A range of strings will always contain strings that don't parse
582                                // as timestamps - so unlike the general case, we'll assume null
583                                // is present in every range of output values.
584                                values_spec.union(ResultSpec::null())
585                            }
586                            // Otherwise, assume the worst: this function may return either a valid
587                            // value or null.
588                            _ => ResultSpec::any_infallible(),
589                        }
590                    })
591                },
592                pushdownable: true,
593            }),
594            _ => None,
595        }
596    }
597}
598
599/// A binary function we've added special-case handling for; including:
600/// - A two-argument function, taking and returning [ResultSpec]s. This overrides the
601///   default function-handling logic entirely.
602/// - Metadata on whether / not this function is pushdownable. See [Trace].
603struct SpecialBinary {
604    map_fn: for<'a> fn(ResultSpec<'a>, ResultSpec<'a>) -> ResultSpec<'a>,
605    pushdownable: (bool, bool),
606}
607
608impl SpecialBinary {
609    /// Returns the special-case handling for a particular function, if it exists.
610    fn for_func(func: &BinaryFunc) -> Option<SpecialBinary> {
611        /// Eager in the same sense as `func.rs` uses the term; this assumes that
612        /// nulls and errors propagate up, and we only need to define the behaviour
613        /// on values.
614        fn eagerly<'b>(
615            left: ResultSpec<'b>,
616            right: ResultSpec<'b>,
617            value_fn: impl FnOnce(Values<'b>, Values<'b>) -> ResultSpec<'b>,
618        ) -> ResultSpec<'b> {
619            let result = match (left.values, right.values) {
620                (Values::Empty, _) | (_, Values::Empty) => ResultSpec::nothing(),
621                (l, r) => value_fn(l, r),
622            };
623            ResultSpec {
624                fallible: left.fallible || right.fallible || result.fallible,
625                nullable: left.nullable || right.nullable || result.nullable,
626                values: result.values,
627            }
628        }
629
630        fn jsonb_get_string<'b>(
631            left: ResultSpec<'b>,
632            right: ResultSpec<'b>,
633            stringify: bool,
634        ) -> ResultSpec<'b> {
635            eagerly(left, right, |left, right| {
636                let nested_spec = match (left, right) {
637                    (Values::Nested(mut map_spec), Values::Within(key, key2)) if key == key2 => {
638                        map_spec.remove(&key)
639                    }
640                    _ => None,
641                };
642
643                if let Some(field_spec) = nested_spec {
644                    if stringify {
645                        // We only preserve value-range information when stringification
646                        // is a noop. (Common in real queries.)
647                        let values = match field_spec.values {
648                            Values::Empty => Values::Empty,
649                            Values::Within(min @ Datum::String(_), max @ Datum::String(_)) => {
650                                Values::Within(min, max)
651                            }
652                            Values::Within(_, _) | Values::Nested(_) | Values::All => Values::All,
653                        };
654                        ResultSpec {
655                            values,
656                            ..field_spec
657                        }
658                    } else {
659                        field_spec
660                    }
661                } else {
662                    // The implementation of `jsonb_get_string` always returns
663                    // `Ok(...)`. Morally, everything has a string
664                    // representation, and the worst you can get is a NULL,
665                    // which maps to a NULL.
666                    ResultSpec::any_infallible()
667                }
668            })
669        }
670
671        fn eq<'b>(left: ResultSpec<'b>, right: ResultSpec<'b>) -> ResultSpec<'b> {
672            eagerly(left, right, |left, right| {
673                // `eq` might return true if there's any overlap between the range of its two arguments...
674                let maybe_true = match left.clone().intersect(right.clone()) {
675                    Values::Empty => ResultSpec::nothing(),
676                    _ => ResultSpec::value(Datum::True),
677                };
678
679                // ...and may return false if the union contains at least two distinct values.
680                // Note that the `Empty` case is handled by `eagerly` above.
681                let maybe_false = match left.union(right) {
682                    Values::Within(a, b) if a == b => ResultSpec::nothing(),
683                    _ => ResultSpec::value(Datum::False),
684                };
685
686                maybe_true.union(maybe_false)
687            })
688        }
689
690        match func {
691            BinaryFunc::JsonbGetString => Some(SpecialBinary {
692                map_fn: |l, r| jsonb_get_string(l, r, false),
693                pushdownable: (true, false),
694            }),
695            BinaryFunc::JsonbGetStringStringify => Some(SpecialBinary {
696                map_fn: |l, r| jsonb_get_string(l, r, true),
697                pushdownable: (true, false),
698            }),
699            BinaryFunc::Eq(_) => Some(SpecialBinary {
700                map_fn: eq,
701                pushdownable: (true, true),
702            }),
703            _ => None,
704        }
705    }
706}
707
708#[derive(Clone, Debug)]
709pub struct ColumnSpec<'a> {
710    pub col_type: SqlColumnType,
711    pub range: ResultSpec<'a>,
712}
713
714/// An interpreter that:
715/// - stores both the type and the range of possible values for every column and
716///   unmaterializable function. (See the `push_` methods.)
717/// - given an expression (or MFP, etc.), returns the range of possible results that evaluating that
718///   expression might have. (See the `eval_` methods.)
719#[derive(Clone, Debug)]
720pub struct ColumnSpecs<'a> {
721    pub relation: &'a SqlRelationType,
722    pub columns: Vec<ResultSpec<'a>>,
723    pub unmaterializables: BTreeMap<UnmaterializableFunc, ResultSpec<'a>>,
724    pub arena: &'a RowArena,
725}
726
727impl<'a> ColumnSpecs<'a> {
728    // Interpreting a variadic function can lead to exponential blowup: there are up to 4 possibly-
729    // interesting values for each argument (error, null, range bounds...) and in the worst case
730    // we may need to test every combination. We mitigate that here in two ways:
731    // - Adding a linear-time optimization for associative functions like AND, OR, COALESCE.
732    // - Limiting the number of arguments we'll pass on to eval. If this limit is crossed, we'll
733    //   return our default / safe overapproximation instead.
734    const MAX_EVAL_ARGS: usize = 6;
735
736    /// Create a new, empty set of column specs. (Initially, the only assumption we make about the
737    /// data in the column is that it matches the type.)
738    pub fn new(relation: &'a SqlRelationType, arena: &'a RowArena) -> Self {
739        let columns = relation
740            .column_types
741            .iter()
742            .map(|ct| ResultSpec::has_type(ct, false))
743            .collect();
744        ColumnSpecs {
745            relation,
746            columns,
747            unmaterializables: Default::default(),
748            arena,
749        }
750    }
751
752    /// Restrict the set of possible values in a given column. (By intersecting it with the existing
753    /// spec.)
754    pub fn push_column(&mut self, id: usize, update: ResultSpec<'a>) {
755        let range = self.columns.get_mut(id).expect("valid column id");
756        *range = range.clone().intersect(update);
757    }
758
759    /// Restrict the set of possible values a given unmaterializable func might return. (By
760    /// intersecting it with the existing spec.)
761    pub fn push_unmaterializable(&mut self, func: UnmaterializableFunc, update: ResultSpec<'a>) {
762        let range = self
763            .unmaterializables
764            .entry(func.clone())
765            .or_insert_with(|| ResultSpec::has_type(&func.output_type(), true));
766        *range = range.clone().intersect(update);
767    }
768
769    fn eval_result<'b, E>(&self, result: Result<Datum<'b>, E>) -> ResultSpec<'a> {
770        match result {
771            Ok(Datum::Null) => ResultSpec {
772                nullable: true,
773                ..ResultSpec::nothing()
774            },
775            Ok(d) => ResultSpec {
776                values: Values::just(self.arena.make_datum(|packer| packer.push(d))),
777                ..ResultSpec::nothing()
778            },
779            Err(_) => ResultSpec {
780                fallible: true,
781                ..ResultSpec::nothing()
782            },
783        }
784    }
785
786    fn set_literal(expr: &mut MirScalarExpr, update: Result<Datum, EvalError>) {
787        match expr {
788            MirScalarExpr::Literal(literal, col_type) => match update {
789                Err(error) => *literal = Err(error),
790                Ok(datum) => {
791                    assert!(
792                        datum.is_instance_of_sql(col_type),
793                        "{datum:?} must be an instance of {col_type:?}"
794                    );
795                    match literal {
796                        // Reuse the allocation if we can
797                        Ok(row) => row.packer().push(datum),
798                        literal => *literal = Ok(Row::pack_slice(&[datum])),
799                    }
800                }
801            },
802            _ => panic!("not a literal"),
803        }
804    }
805
806    fn set_argument(expr: &mut MirScalarExpr, arg: usize, value: Result<Datum, EvalError>) {
807        match (expr, arg) {
808            (MirScalarExpr::CallUnary { expr, .. }, 0) => Self::set_literal(expr, value),
809            (MirScalarExpr::CallBinary { expr1, .. }, 0) => Self::set_literal(expr1, value),
810            (MirScalarExpr::CallBinary { expr2, .. }, 1) => Self::set_literal(expr2, value),
811            (MirScalarExpr::CallVariadic { exprs, .. }, n) if n < exprs.len() => {
812                Self::set_literal(&mut exprs[n], value)
813            }
814            _ => panic!("illegal argument for expression"),
815        }
816    }
817
818    /// A literal with the given type and a trivial default value. Callers should ensure that
819    /// [Self::set_literal] is called on the resulting expression to give it a meaningful value
820    /// before evaluating.
821    fn placeholder(col_type: SqlColumnType) -> MirScalarExpr {
822        MirScalarExpr::Literal(Err(EvalError::Internal("".into())), col_type)
823    }
824}
825
826impl<'a> Interpreter for ColumnSpecs<'a> {
827    type Summary = ColumnSpec<'a>;
828
829    fn column(&self, id: usize) -> Self::Summary {
830        let col_type = self.relation.column_types[id].clone();
831        let range = self.columns[id].clone();
832        ColumnSpec { col_type, range }
833    }
834
835    fn literal(&self, result: &Result<Row, EvalError>, col_type: &SqlColumnType) -> Self::Summary {
836        let col_type = col_type.clone();
837        let range = self.eval_result(result.as_ref().map(|row| {
838            self.arena
839                .make_datum(|packer| packer.push(row.unpack_first()))
840        }));
841        ColumnSpec { col_type, range }
842    }
843
844    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary {
845        let col_type = func.output_type();
846        let range = self
847            .unmaterializables
848            .get(func)
849            .cloned()
850            .unwrap_or_else(|| ResultSpec::has_type(&func.output_type(), true));
851        ColumnSpec { col_type, range }
852    }
853
854    fn unary(&self, func: &UnaryFunc, summary: Self::Summary) -> Self::Summary {
855        let fallible = func.could_error() || summary.range.fallible;
856        let mapped_spec = if let Some(special) = SpecialUnary::for_func(func) {
857            (special.map_fn)(self, summary.range)
858        } else {
859            let is_monotone = func.is_monotone();
860            let mut expr = MirScalarExpr::CallUnary {
861                func: func.clone(),
862                expr: Box::new(Self::placeholder(summary.col_type.clone())),
863            };
864            summary.range.flat_map(is_monotone, |datum| {
865                Self::set_argument(&mut expr, 0, datum);
866                self.eval_result(expr.eval(&[], self.arena))
867            })
868        };
869
870        let col_type = func.output_type(summary.col_type);
871
872        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
873        ColumnSpec { col_type, range }
874    }
875
876    fn binary(
877        &self,
878        func: &BinaryFunc,
879        left: Self::Summary,
880        right: Self::Summary,
881    ) -> Self::Summary {
882        let (left_monotonic, right_monotonic) = func.is_monotone();
883        let fallible = func.could_error() || left.range.fallible || right.range.fallible;
884
885        let mapped_spec = if let Some(special) = SpecialBinary::for_func(func) {
886            (special.map_fn)(left.range, right.range)
887        } else {
888            let mut expr = MirScalarExpr::CallBinary {
889                func: func.clone(),
890                expr1: Box::new(Self::placeholder(left.col_type.clone())),
891                expr2: Box::new(Self::placeholder(right.col_type.clone())),
892            };
893            left.range.flat_map(left_monotonic, |left_result| {
894                Self::set_argument(&mut expr, 0, left_result);
895                right.range.flat_map(right_monotonic, |right_result| {
896                    Self::set_argument(&mut expr, 1, right_result);
897                    self.eval_result(expr.eval(&[], self.arena))
898                })
899            })
900        };
901
902        let col_type = func.output_type(left.col_type, right.col_type);
903
904        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
905        ColumnSpec { col_type, range }
906    }
907
908    fn variadic(&self, func: &VariadicFunc, args: Vec<Self::Summary>) -> Self::Summary {
909        let fallible = func.could_error() || args.iter().any(|s| s.range.fallible);
910        if func.is_associative() && args.len() > 2 {
911            // To avoid a combinatorial explosion, evaluate large variadic calls as a series of
912            // smaller ones, since associativity guarantees we'll get compatible results.
913            return args
914                .into_iter()
915                .reduce(|a, b| self.variadic(func, vec![a, b]))
916                .expect("reducing over a non-empty argument list");
917        }
918
919        let mapped_spec = if args.len() >= Self::MAX_EVAL_ARGS {
920            ResultSpec::anything()
921        } else {
922            fn eval_loop<'a>(
923                is_monotonic: bool,
924                expr: &mut MirScalarExpr,
925                args: &[ColumnSpec<'a>],
926                index: usize,
927                datum_map: &mut impl FnMut(&MirScalarExpr) -> ResultSpec<'a>,
928            ) -> ResultSpec<'a> {
929                if index >= args.len() {
930                    datum_map(expr)
931                } else {
932                    args[index].range.flat_map(is_monotonic, |datum| {
933                        ColumnSpecs::set_argument(expr, index, datum);
934                        eval_loop(is_monotonic, expr, args, index + 1, datum_map)
935                    })
936                }
937            }
938
939            let mut fn_expr = MirScalarExpr::CallVariadic {
940                func: func.clone(),
941                exprs: args
942                    .iter()
943                    .map(|spec| Self::placeholder(spec.col_type.clone()))
944                    .collect(),
945            };
946            eval_loop(func.is_monotone(), &mut fn_expr, &args, 0, &mut |expr| {
947                self.eval_result(expr.eval(&[], self.arena))
948            })
949        };
950
951        let col_types = args.into_iter().map(|spec| spec.col_type).collect();
952        let col_type = func.output_type(col_types);
953
954        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
955
956        ColumnSpec { col_type, range }
957    }
958
959    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
960        let col_type = SqlColumnType {
961            scalar_type: then.col_type.scalar_type,
962            nullable: then.col_type.nullable || els.col_type.nullable,
963        };
964
965        let range = cond
966            .range
967            .flat_map(true, |datum| match datum {
968                Ok(Datum::True) => then.range.clone(),
969                Ok(Datum::False) => els.range.clone(),
970                _ => ResultSpec::fails(),
971            })
972            .intersect(ResultSpec::has_type(&col_type, true));
973
974        ColumnSpec { col_type, range }
975    }
976}
977
978/// An interpreter that returns whether or not a particular expression is "pushdownable".
979/// Broadly speaking, an expression is pushdownable if the result of evaluating the expression
980/// depends on the range of possible column values in a way that `ColumnSpecs` is able to reason about.
981///
982/// In practice, we internally need to distinguish between expressions that are trivially predicable
983/// (because they're constant) and expressions that depend on the column ranges themselves.
984/// See the [TraceSummary] variants for those distinctions, and [TraceSummary::pushdownable] for
985/// the overall assessment.
986#[derive(Debug)]
987pub struct Trace;
988
989/// A summary type for the [Trace] interpreter.
990///
991/// The ordering of this type is meaningful: the "smaller" the summary, the more information we have
992/// about the possible values of the expression. This means we can eg. use `max` in the
993/// interpreter below to find the summary for a function-call expression based on the summaries
994/// of its arguments.
995#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Ord, Eq)]
996pub enum TraceSummary {
997    /// The expression is constant: we can evaluate it without any runtime information.
998    /// This corresponds to a `ResultSpec` of a single value.
999    Constant,
1000    /// The expression depends on runtime information, but in "predictable" way... ie. if we know
1001    /// the range of possible values for all columns and unmaterializable functions, we can
1002    /// predict the possible values of the output.
1003    /// This corresponds to a `ResultSpec` of a perhaps range of values.
1004    Dynamic,
1005    /// The expression depends on runtime information in an unpredictable way.
1006    /// This corresponds to a `ResultSpec::value_all()` or something similarly vague.
1007    Unknown,
1008}
1009
1010impl TraceSummary {
1011    /// We say that a function is "pushdownable" for a particular
1012    /// argument if `ColumnSpecs` can determine the spec of the function's output given the input spec for
1013    /// that argument. (In practice, this is true when either the function is monotone in that argument
1014    /// or it's been special-cased in the interpreter.)
1015    fn apply_fn(self, pushdownable: bool) -> Self {
1016        match self {
1017            TraceSummary::Constant => TraceSummary::Constant,
1018            TraceSummary::Dynamic => match pushdownable {
1019                true => TraceSummary::Dynamic,
1020                false => TraceSummary::Unknown,
1021            },
1022            TraceSummary::Unknown => TraceSummary::Unknown,
1023        }
1024    }
1025
1026    /// We say that an expression is "pushdownable" if it's either constant or dynamic.
1027    pub fn pushdownable(self) -> bool {
1028        match self {
1029            TraceSummary::Constant | TraceSummary::Dynamic => true,
1030            TraceSummary::Unknown => false,
1031        }
1032    }
1033}
1034
1035impl Interpreter for Trace {
1036    type Summary = TraceSummary;
1037
1038    fn column(&self, _id: usize) -> Self::Summary {
1039        TraceSummary::Dynamic
1040    }
1041
1042    fn literal(
1043        &self,
1044        _result: &Result<Row, EvalError>,
1045        _col_type: &SqlColumnType,
1046    ) -> Self::Summary {
1047        TraceSummary::Constant
1048    }
1049
1050    fn unmaterializable(&self, _func: &UnmaterializableFunc) -> Self::Summary {
1051        TraceSummary::Dynamic
1052    }
1053
1054    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary {
1055        let pushdownable = match SpecialUnary::for_func(func) {
1056            None => func.is_monotone(),
1057            Some(special) => special.pushdownable,
1058        };
1059        expr.apply_fn(pushdownable)
1060    }
1061
1062    fn binary(
1063        &self,
1064        func: &BinaryFunc,
1065        left: Self::Summary,
1066        right: Self::Summary,
1067    ) -> Self::Summary {
1068        let (left_pushdownable, right_pushdownable) = match SpecialBinary::for_func(func) {
1069            None => func.is_monotone(),
1070            Some(special) => special.pushdownable,
1071        };
1072        left.apply_fn(left_pushdownable)
1073            .max(right.apply_fn(right_pushdownable))
1074    }
1075
1076    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary {
1077        if !func.is_associative() && exprs.len() >= ColumnSpecs::MAX_EVAL_ARGS {
1078            // We can't efficiently evaluate functions with very large argument lists;
1079            // see the comment on ColumnSpecs::MAX_EVAL_ARGS for details.
1080            return TraceSummary::Unknown;
1081        }
1082
1083        let pushdownable_fn = func.is_monotone();
1084        exprs
1085            .into_iter()
1086            .map(|pushdownable_arg| pushdownable_arg.apply_fn(pushdownable_fn))
1087            .max()
1088            .unwrap_or(TraceSummary::Constant)
1089    }
1090
1091    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
1092        // We don't actually need to be able to predict the condition precisely to predict the output,
1093        // since we can union the ranges of the two branches for a conservative estimate.
1094        let cond = cond.min(TraceSummary::Dynamic);
1095        cond.max(then).max(els)
1096    }
1097}
1098
1099#[cfg(test)]
1100mod tests {
1101    use itertools::Itertools;
1102    use mz_repr::adt::datetime::DateTimeUnits;
1103    use mz_repr::{Datum, PropDatum, RowArena, SqlScalarType};
1104    use proptest::prelude::*;
1105    use proptest::sample::{Index, select};
1106
1107    use crate::func::*;
1108    use crate::{BinaryFunc, MirScalarExpr, UnaryFunc};
1109
1110    use super::*;
1111
1112    #[derive(Debug)]
1113    struct ExpressionData {
1114        relation_type: SqlRelationType,
1115        specs: Vec<ResultSpec<'static>>,
1116        rows: Vec<Row>,
1117        expr: MirScalarExpr,
1118    }
1119
1120    // Currently there's no good way to check whether a particular function accepts a particular
1121    // type as argument, which means we need to list everything out explicitly here. Restrict our interest
1122    // to a reasonable number of functions, to keep things tractable
1123    // TODO: replace this with function-level info once it's available.
1124    const NUM_TYPE: SqlScalarType = SqlScalarType::Numeric { max_scale: None };
1125    static SCALAR_TYPES: &[SqlScalarType] = &[
1126        SqlScalarType::Bool,
1127        SqlScalarType::Jsonb,
1128        NUM_TYPE,
1129        SqlScalarType::Date,
1130        SqlScalarType::Timestamp { precision: None },
1131        SqlScalarType::MzTimestamp,
1132        SqlScalarType::String,
1133    ];
1134
1135    const INTERESTING_UNARY_FUNCS: &[UnaryFunc] = {
1136        &[
1137            UnaryFunc::CastNumericToMzTimestamp(CastNumericToMzTimestamp),
1138            UnaryFunc::NegNumeric(NegNumeric),
1139            UnaryFunc::CastJsonbToNumeric(CastJsonbToNumeric(None)),
1140            UnaryFunc::CastJsonbToBool(CastJsonbToBool),
1141            UnaryFunc::CastJsonbToString(CastJsonbToString),
1142            UnaryFunc::DateTruncTimestamp(DateTruncTimestamp(DateTimeUnits::Epoch)),
1143            UnaryFunc::ExtractTimestamp(ExtractTimestamp(DateTimeUnits::Epoch)),
1144            UnaryFunc::ExtractDate(ExtractDate(DateTimeUnits::Epoch)),
1145            UnaryFunc::Not(Not),
1146            UnaryFunc::IsNull(IsNull),
1147            UnaryFunc::IsFalse(IsFalse),
1148            UnaryFunc::TryParseMonotonicIso8601Timestamp(TryParseMonotonicIso8601Timestamp),
1149        ]
1150    };
1151
1152    fn unary_typecheck(func: &UnaryFunc, arg: &SqlColumnType) -> bool {
1153        use UnaryFunc::*;
1154        match func {
1155            CastNumericToMzTimestamp(_) | NegNumeric(_) => arg.scalar_type.base_eq(&NUM_TYPE),
1156            CastJsonbToNumeric(_) | CastJsonbToBool(_) | CastJsonbToString(_) => {
1157                arg.scalar_type.base_eq(&SqlScalarType::Jsonb)
1158            }
1159            ExtractTimestamp(_) | DateTruncTimestamp(_) => arg
1160                .scalar_type
1161                .base_eq(&SqlScalarType::Timestamp { precision: None }),
1162            ExtractDate(_) => arg.scalar_type.base_eq(&SqlScalarType::Date),
1163            Not(_) => arg.scalar_type.base_eq(&SqlScalarType::Bool),
1164            IsNull(_) => true,
1165            TryParseMonotonicIso8601Timestamp(_) => arg.scalar_type.base_eq(&SqlScalarType::String),
1166            _ => false,
1167        }
1168    }
1169
1170    fn interesting_binary_funcs() -> Vec<BinaryFunc> {
1171        vec![
1172            AddTimestampInterval.into(),
1173            AddNumeric.into(),
1174            SubNumeric.into(),
1175            MulNumeric.into(),
1176            DivNumeric.into(),
1177            Eq.into(),
1178            Lt.into(),
1179            Gt.into(),
1180            Lte.into(),
1181            Gte.into(),
1182            DateTruncUnitsTimestamp.into(),
1183            BinaryFunc::JsonbGetString,
1184            BinaryFunc::JsonbGetStringStringify,
1185        ]
1186    }
1187
1188    fn binary_typecheck(func: &BinaryFunc, arg0: &SqlColumnType, arg1: &SqlColumnType) -> bool {
1189        use BinaryFunc::*;
1190        match func {
1191            AddTimestampInterval(_) => {
1192                arg0.scalar_type
1193                    .base_eq(&SqlScalarType::Timestamp { precision: None })
1194                    && arg1.scalar_type.base_eq(&SqlScalarType::Interval)
1195            }
1196            AddNumeric(_) | SubNumeric(_) | MulNumeric(_) | DivNumeric(_) => {
1197                arg0.scalar_type.base_eq(&NUM_TYPE) && arg1.scalar_type.base_eq(&NUM_TYPE)
1198            }
1199            Eq(_) | Lt(_) | Gt(_) | Lte(_) | Gte(_) => arg0.scalar_type.base_eq(&arg1.scalar_type),
1200            DateTruncTimestamp(_) => {
1201                arg0.scalar_type.base_eq(&SqlScalarType::String)
1202                    && arg1
1203                        .scalar_type
1204                        .base_eq(&SqlScalarType::Timestamp { precision: None })
1205            }
1206            JsonbGetString | JsonbGetStringStringify => {
1207                arg0.scalar_type.base_eq(&SqlScalarType::Jsonb)
1208                    && arg1.scalar_type.base_eq(&SqlScalarType::String)
1209            }
1210            _ => false,
1211        }
1212    }
1213
1214    const INTERESTING_VARIADIC_FUNCS: &[VariadicFunc] = {
1215        use VariadicFunc::*;
1216        &[Coalesce, Greatest, Least, And, Or, Concat, ConcatWs]
1217    };
1218
1219    fn variadic_typecheck(func: &VariadicFunc, args: &[SqlColumnType]) -> bool {
1220        use VariadicFunc::*;
1221        fn all_eq<'a>(
1222            iter: impl IntoIterator<Item = &'a SqlColumnType>,
1223            other: &SqlScalarType,
1224        ) -> bool {
1225            iter.into_iter().all(|t| t.scalar_type.base_eq(other))
1226        }
1227        match func {
1228            Coalesce | Greatest | Least => match args {
1229                [] => true,
1230                [first, rest @ ..] => all_eq(rest, &first.scalar_type),
1231            },
1232            And | Or => all_eq(args, &SqlScalarType::Bool),
1233            Concat => all_eq(args, &SqlScalarType::String),
1234            ConcatWs => args.len() > 1 && all_eq(args, &SqlScalarType::String),
1235            _ => false,
1236        }
1237    }
1238
1239    fn gen_datums_for_type(typ: &SqlColumnType) -> BoxedStrategy<Datum<'static>> {
1240        let mut values: Vec<Datum<'static>> = typ.scalar_type.interesting_datums().collect();
1241        if typ.nullable {
1242            values.push(Datum::Null)
1243        }
1244        select(values).boxed()
1245    }
1246
1247    fn gen_column() -> impl Strategy<Value = (SqlColumnType, Datum<'static>, ResultSpec<'static>)> {
1248        let col_type = (select(SCALAR_TYPES), any::<bool>())
1249            .prop_map(|(t, b)| t.nullable(b))
1250            .prop_filter("need at least one value", |c| {
1251                c.scalar_type.interesting_datums().count() > 0
1252            });
1253
1254        let result_spec = select(vec![
1255            ResultSpec::nothing(),
1256            ResultSpec::null(),
1257            ResultSpec::anything(),
1258            ResultSpec::value_all(),
1259        ]);
1260
1261        (col_type, result_spec).prop_flat_map(|(col, result_spec)| {
1262            gen_datums_for_type(&col).prop_map(move |datum| {
1263                let result_spec = result_spec.clone().union(ResultSpec::value(datum));
1264                (col.clone(), datum, result_spec)
1265            })
1266        })
1267    }
1268
1269    fn gen_expr_for_relation(
1270        relation: &SqlRelationType,
1271    ) -> BoxedStrategy<(MirScalarExpr, SqlColumnType)> {
1272        let column_gen = {
1273            let column_types = relation.column_types.clone();
1274            any::<Index>()
1275                .prop_map(move |idx| {
1276                    let id = idx.index(column_types.len());
1277                    (MirScalarExpr::column(id), column_types[id].clone())
1278                })
1279                .boxed()
1280        };
1281
1282        let literal_gen = (select(SCALAR_TYPES), any::<bool>())
1283            .prop_map(|(s, b)| s.nullable(b))
1284            .prop_flat_map(|ct| {
1285                let error_gen = any::<EvalError>().prop_map(Err).boxed();
1286                let value_gen = gen_datums_for_type(&ct)
1287                    .prop_map(move |datum| Ok(Row::pack_slice(&[datum])))
1288                    .boxed();
1289                error_gen.prop_union(value_gen).prop_map(move |result| {
1290                    (MirScalarExpr::Literal(result, ct.clone()), ct.clone())
1291                })
1292            })
1293            .boxed();
1294
1295        column_gen
1296            .prop_union(literal_gen)
1297            .prop_recursive(4, 64, 8, |self_gen| {
1298                let unary_gen = (select(INTERESTING_UNARY_FUNCS), self_gen.clone())
1299                    .prop_filter_map("unary func", |(func, (expr_in, type_in))| {
1300                        if !unary_typecheck(&func, &type_in) {
1301                            return None;
1302                        }
1303                        let type_out = func.output_type(type_in);
1304                        let expr_out = MirScalarExpr::CallUnary {
1305                            func,
1306                            expr: Box::new(expr_in),
1307                        };
1308                        Some((expr_out, type_out))
1309                    })
1310                    .boxed();
1311                let binary_gen = (
1312                    select(interesting_binary_funcs()),
1313                    self_gen.clone(),
1314                    self_gen.clone(),
1315                )
1316                    .prop_filter_map(
1317                        "binary func",
1318                        |(func, (expr_left, type_left), (expr_right, type_right))| {
1319                            if !binary_typecheck(&func, &type_left, &type_right) {
1320                                return None;
1321                            }
1322                            let type_out = func.output_type(type_left, type_right);
1323                            let expr_out = MirScalarExpr::CallBinary {
1324                                func,
1325                                expr1: Box::new(expr_left),
1326                                expr2: Box::new(expr_right),
1327                            };
1328                            Some((expr_out, type_out))
1329                        },
1330                    )
1331                    .boxed();
1332                let variadic_gen = (
1333                    select(INTERESTING_VARIADIC_FUNCS),
1334                    prop::collection::vec(self_gen, 1..4),
1335                )
1336                    .prop_filter_map("variadic func", |(func, exprs)| {
1337                        let (exprs_in, type_in): (_, Vec<_>) = exprs.into_iter().unzip();
1338                        if !variadic_typecheck(&func, &type_in) {
1339                            return None;
1340                        }
1341                        let type_out = func.output_type(type_in);
1342                        let expr_out = MirScalarExpr::CallVariadic {
1343                            func,
1344                            exprs: exprs_in,
1345                        };
1346                        Some((expr_out, type_out))
1347                    })
1348                    .boxed();
1349
1350                unary_gen
1351                    .prop_union(binary_gen)
1352                    .boxed()
1353                    .prop_union(variadic_gen)
1354            })
1355            .boxed()
1356    }
1357
1358    fn gen_expr_data() -> impl Strategy<Value = ExpressionData> {
1359        let columns = prop::collection::vec(gen_column(), 1..10);
1360        columns.prop_flat_map(|data| {
1361            let (columns, datums, specs): (Vec<_>, Vec<_>, Vec<_>) = data.into_iter().multiunzip();
1362            let relation = SqlRelationType::new(columns);
1363            let row = Row::pack_slice(&datums);
1364            gen_expr_for_relation(&relation).prop_map(move |(expr, _)| ExpressionData {
1365                relation_type: relation.clone(),
1366                specs: specs.clone(),
1367                rows: vec![row.clone()],
1368                expr,
1369            })
1370        })
1371    }
1372
1373    #[mz_ore::test]
1374    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decContextDefault` on OS `linux`
1375    fn test_trivial_spec_matches() {
1376        fn check(datum: PropDatum) -> Result<(), TestCaseError> {
1377            let datum: Datum = (&datum).into();
1378            let spec = if datum.is_null() {
1379                ResultSpec::null()
1380            } else {
1381                ResultSpec::value(datum)
1382            };
1383            assert!(spec.may_contain(datum));
1384            Ok(())
1385        }
1386
1387        proptest!(|(datum in mz_repr::arb_datum())| {
1388            check(datum)?;
1389        });
1390
1391        assert!(ResultSpec::fails().may_fail());
1392    }
1393
1394    #[mz_ore::test]
1395    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decContextDefault` on OS `linux`
1396    fn test_equivalence() {
1397        fn check(data: ExpressionData) -> Result<(), TestCaseError> {
1398            let ExpressionData {
1399                relation_type,
1400                specs,
1401                rows,
1402                expr,
1403            } = data;
1404
1405            // We want to ensure that the spec we get when evaluating an expression using
1406            // `ColumnSpecs` always contains the _actual_ value of that column when evaluated with
1407            // eval. (This is an important correctness property of abstract interpretation.)
1408            let arena = RowArena::new();
1409            let mut interpreter = ColumnSpecs::new(&relation_type, &arena);
1410            for (id, spec) in specs.into_iter().enumerate() {
1411                interpreter.push_column(id, spec);
1412            }
1413
1414            let spec = interpreter.expr(&expr);
1415
1416            for row in &rows {
1417                let datums: Vec<_> = row.iter().collect();
1418                let eval_result = expr.eval(&datums, &arena);
1419                match eval_result {
1420                    Ok(value) => {
1421                        assert!(spec.range.may_contain(value))
1422                    }
1423                    Err(_) => {
1424                        assert!(spec.range.may_fail());
1425                    }
1426                }
1427            }
1428
1429            Ok(())
1430        }
1431
1432        proptest!(|(data in gen_expr_data())| {
1433            check(data)?;
1434        });
1435    }
1436
1437    #[mz_ore::test]
1438    fn test_mfp() {
1439        // Regression test for https://github.com/MaterializeInc/database-issues/issues/5736
1440        use MirScalarExpr::*;
1441
1442        let mfp = MapFilterProject {
1443            expressions: vec![],
1444            predicates: vec![
1445                // Always fails on the known input range
1446                (
1447                    1,
1448                    CallUnary {
1449                        func: UnaryFunc::IsNull(IsNull),
1450                        expr: Box::new(CallBinary {
1451                            func: MulInt32.into(),
1452                            expr1: Box::new(MirScalarExpr::column(0)),
1453                            expr2: Box::new(MirScalarExpr::column(0)),
1454                        }),
1455                    },
1456                ),
1457                // Always returns false on the known input range
1458                (
1459                    1,
1460                    CallBinary {
1461                        func: Eq.into(),
1462                        expr1: Box::new(MirScalarExpr::column(0)),
1463                        expr2: Box::new(Literal(
1464                            Ok(Row::pack_slice(&[Datum::Int32(1727694505)])),
1465                            SqlScalarType::Int32.nullable(false),
1466                        )),
1467                    },
1468                ),
1469            ],
1470            projection: vec![],
1471            input_arity: 1,
1472        };
1473
1474        let relation = SqlRelationType::new(vec![SqlScalarType::Int32.nullable(true)]);
1475        let arena = RowArena::new();
1476        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1477        interpreter.push_column(0, ResultSpec::value(Datum::Int32(-1294725158)));
1478        let spec = interpreter.mfp_filter(&mfp);
1479        assert!(spec.range.may_fail());
1480    }
1481
1482    #[mz_ore::test]
1483    fn test_concat() {
1484        let expr = MirScalarExpr::CallVariadic {
1485            func: VariadicFunc::Concat,
1486            exprs: vec![
1487                MirScalarExpr::column(0),
1488                MirScalarExpr::literal_ok(Datum::String("a"), SqlScalarType::String),
1489                MirScalarExpr::literal_ok(Datum::String("b"), SqlScalarType::String),
1490            ],
1491        };
1492
1493        let relation = SqlRelationType::new(vec![SqlScalarType::String.nullable(false)]);
1494        let arena = RowArena::new();
1495        let interpreter = ColumnSpecs::new(&relation, &arena);
1496        let spec = interpreter.expr(&expr);
1497        assert!(spec.range.may_contain(Datum::String("blab")));
1498    }
1499
1500    #[mz_ore::test]
1501    fn test_eval_range() {
1502        // Example inspired by the tumbling windows temporal filter in the docs
1503        let period_ms = MirScalarExpr::Literal(
1504            Ok(Row::pack_slice(&[Datum::Int64(10)])),
1505            SqlScalarType::Int64.nullable(false),
1506        );
1507        let expr = MirScalarExpr::CallBinary {
1508            func: Gte.into(),
1509            expr1: Box::new(MirScalarExpr::CallUnmaterializable(
1510                UnmaterializableFunc::MzNow,
1511            )),
1512            expr2: Box::new(MirScalarExpr::CallUnary {
1513                func: UnaryFunc::CastInt64ToMzTimestamp(CastInt64ToMzTimestamp),
1514                expr: Box::new(MirScalarExpr::CallBinary {
1515                    func: MulInt64.into(),
1516                    expr1: Box::new(period_ms.clone()),
1517                    expr2: Box::new(MirScalarExpr::CallBinary {
1518                        func: DivInt64.into(),
1519                        expr1: Box::new(MirScalarExpr::column(0)),
1520                        expr2: Box::new(period_ms),
1521                    }),
1522                }),
1523            }),
1524        };
1525        let relation = SqlRelationType::new(vec![SqlScalarType::Int64.nullable(false)]);
1526
1527        {
1528            // Non-overlapping windows
1529            let arena = RowArena::new();
1530            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1531            interpreter.push_unmaterializable(
1532                UnmaterializableFunc::MzNow,
1533                ResultSpec::value_between(
1534                    Datum::MzTimestamp(10.into()),
1535                    Datum::MzTimestamp(20.into()),
1536                ),
1537            );
1538            interpreter.push_column(0, ResultSpec::value_between(30i64.into(), 40i64.into()));
1539
1540            let range_out = interpreter.expr(&expr).range;
1541            assert!(range_out.may_contain(Datum::False));
1542            assert!(!range_out.may_contain(Datum::True));
1543            assert!(!range_out.may_contain(Datum::Null));
1544            assert!(!range_out.may_fail());
1545        }
1546
1547        {
1548            // Overlapping windows
1549            let arena = RowArena::new();
1550            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1551            interpreter.push_unmaterializable(
1552                UnmaterializableFunc::MzNow,
1553                ResultSpec::value_between(
1554                    Datum::MzTimestamp(10.into()),
1555                    Datum::MzTimestamp(35.into()),
1556                ),
1557            );
1558            interpreter.push_column(0, ResultSpec::value_between(30i64.into(), 40i64.into()));
1559
1560            let range_out = interpreter.expr(&expr).range;
1561            assert!(range_out.may_contain(Datum::False));
1562            assert!(range_out.may_contain(Datum::True));
1563            assert!(!range_out.may_contain(Datum::Null));
1564            assert!(!range_out.may_fail());
1565        }
1566    }
1567
1568    #[mz_ore::test]
1569    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decNumberFromInt32` on OS `linux`
1570    fn test_jsonb() {
1571        let arena = RowArena::new();
1572
1573        let expr = MirScalarExpr::CallUnary {
1574            func: UnaryFunc::CastJsonbToNumeric(CastJsonbToNumeric(None)),
1575            expr: Box::new(MirScalarExpr::CallBinary {
1576                func: BinaryFunc::JsonbGetString,
1577                expr1: Box::new(MirScalarExpr::column(0)),
1578                expr2: Box::new(MirScalarExpr::Literal(
1579                    Ok(Row::pack_slice(&["ts".into()])),
1580                    SqlScalarType::String.nullable(false),
1581                )),
1582            }),
1583        };
1584
1585        let relation = SqlRelationType::new(vec![SqlScalarType::Jsonb.nullable(true)]);
1586        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1587        interpreter.push_column(
1588            0,
1589            ResultSpec::map_spec(
1590                [(
1591                    "ts".into(),
1592                    ResultSpec::value_between(
1593                        Datum::Numeric(100.into()),
1594                        Datum::Numeric(300.into()),
1595                    ),
1596                )]
1597                .into_iter()
1598                .collect(),
1599            ),
1600        );
1601
1602        let range_out = interpreter.expr(&expr).range;
1603        assert!(!range_out.may_contain(Datum::Numeric(0.into())));
1604        assert!(range_out.may_contain(Datum::Numeric(200.into())));
1605        assert!(!range_out.may_contain(Datum::Numeric(400.into())));
1606    }
1607
1608    #[mz_ore::test]
1609    fn test_like() {
1610        let arena = RowArena::new();
1611
1612        let expr = MirScalarExpr::CallUnary {
1613            func: UnaryFunc::IsLikeMatch(IsLikeMatch(
1614                crate::like_pattern::compile("%whatever%", true).unwrap(),
1615            )),
1616            expr: Box::new(MirScalarExpr::column(0)),
1617        };
1618
1619        let relation = SqlRelationType::new(vec![SqlScalarType::String.nullable(true)]);
1620        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1621        interpreter.push_column(
1622            0,
1623            ResultSpec::value_between(Datum::String("aardvark"), Datum::String("zebra")),
1624        );
1625
1626        let range_out = interpreter.expr(&expr).range;
1627        assert!(
1628            !range_out.fallible,
1629            "like function should not error on non-error input"
1630        );
1631        assert!(range_out.may_contain(Datum::True));
1632        assert!(range_out.may_contain(Datum::False));
1633        assert!(range_out.may_contain(Datum::Null));
1634    }
1635
1636    #[mz_ore::test]
1637    fn test_try_parse_monotonic_iso8601_timestamp() {
1638        use chrono::NaiveDateTime;
1639
1640        let arena = RowArena::new();
1641
1642        let expr = MirScalarExpr::CallUnary {
1643            func: UnaryFunc::TryParseMonotonicIso8601Timestamp(TryParseMonotonicIso8601Timestamp),
1644            expr: Box::new(MirScalarExpr::column(0)),
1645        };
1646
1647        let relation = SqlRelationType::new(vec![SqlScalarType::String.nullable(true)]);
1648        // Test the case where we have full timestamps as bounds.
1649        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1650        interpreter.push_column(
1651            0,
1652            ResultSpec::value_between(
1653                Datum::String("2024-01-11T00:00:00.000Z"),
1654                Datum::String("2024-01-11T20:00:00.000Z"),
1655            ),
1656        );
1657
1658        let timestamp = |ts| {
1659            Datum::Timestamp(
1660                NaiveDateTime::parse_from_str(ts, "%Y-%m-%dT%H:%M:%S")
1661                    .unwrap()
1662                    .try_into()
1663                    .unwrap(),
1664            )
1665        };
1666
1667        let range_out = interpreter.expr(&expr).range;
1668        assert!(!range_out.fallible);
1669        assert!(range_out.nullable);
1670        assert!(!range_out.may_contain(timestamp("2024-01-10T10:00:00")));
1671        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
1672        assert!(!range_out.may_contain(timestamp("2024-01-12T10:00:00")));
1673
1674        // Test the case where we have truncated / useless bounds.
1675        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1676        interpreter.push_column(
1677            0,
1678            ResultSpec::value_between(Datum::String("2024-01-1"), Datum::String("2024-01-2")),
1679        );
1680
1681        let range_out = interpreter.expr(&expr).range;
1682        assert!(!range_out.fallible);
1683        assert!(range_out.nullable);
1684        assert!(range_out.may_contain(timestamp("2024-01-10T10:00:00")));
1685        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
1686        assert!(range_out.may_contain(timestamp("2024-01-12T10:00:00")));
1687
1688        // Test the case where only one bound is truncated
1689        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1690        interpreter.push_column(
1691            0,
1692            ResultSpec::value_between(
1693                Datum::String("2024-01-1"),
1694                Datum::String("2024-01-12T10:00:00"),
1695            )
1696            .union(ResultSpec::null()),
1697        );
1698
1699        let range_out = interpreter.expr(&expr).range;
1700        assert!(!range_out.fallible);
1701        assert!(range_out.nullable);
1702        assert!(range_out.may_contain(timestamp("2024-01-10T10:00:00")));
1703        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
1704        assert!(range_out.may_contain(timestamp("2024-01-12T10:00:00")));
1705
1706        // Test the case where the upper and lower bound are identical
1707        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1708        interpreter.push_column(
1709            0,
1710            ResultSpec::value_between(
1711                Datum::String("2024-01-11T10:00:00.000Z"),
1712                Datum::String("2024-01-11T10:00:00.000Z"),
1713            ),
1714        );
1715
1716        let range_out = interpreter.expr(&expr).range;
1717        assert!(!range_out.fallible);
1718        assert!(!range_out.nullable);
1719        assert!(!range_out.may_contain(timestamp("2024-01-10T10:00:00")));
1720        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
1721        assert!(!range_out.may_contain(timestamp("2024-01-12T10:00:00")));
1722    }
1723
1724    #[mz_ore::test]
1725    fn test_inequality() {
1726        let arena = RowArena::new();
1727
1728        let expr = MirScalarExpr::column(0).call_binary(
1729            MirScalarExpr::CallUnmaterializable(UnmaterializableFunc::MzNow),
1730            Gte,
1731        );
1732
1733        let relation = SqlRelationType::new(vec![SqlScalarType::MzTimestamp.nullable(true)]);
1734        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1735        interpreter.push_column(
1736            0,
1737            ResultSpec::value_between(
1738                Datum::MzTimestamp(1704736444949u64.into()),
1739                Datum::MzTimestamp(1704736444949u64.into()),
1740            )
1741            .union(ResultSpec::null()),
1742        );
1743        interpreter.push_unmaterializable(
1744            UnmaterializableFunc::MzNow,
1745            ResultSpec::value_between(
1746                Datum::MzTimestamp(1704738791000u64.into()),
1747                Datum::MzTimestamp(18446744073709551615u64.into()),
1748            ),
1749        );
1750
1751        let range_out = interpreter.expr(&expr).range;
1752        assert!(
1753            !range_out.fallible,
1754            "<= function should not error on non-error input"
1755        );
1756        assert!(!range_out.may_contain(Datum::True));
1757        assert!(range_out.may_contain(Datum::False));
1758        assert!(range_out.may_contain(Datum::Null));
1759    }
1760
1761    #[mz_ore::test]
1762    fn test_trace() {
1763        use super::Trace;
1764
1765        let expr = MirScalarExpr::column(0).call_binary(
1766            MirScalarExpr::column(1)
1767                .call_binary(MirScalarExpr::column(3).call_unary(NegInt64), AddInt64),
1768            Gte,
1769        );
1770        let summary = Trace.expr(&expr);
1771        assert!(summary.pushdownable());
1772    }
1773}