Skip to main content

mz_expr/
interpret.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::collections::BTreeMap;
11use std::fmt::Debug;
12
13use mz_repr::{Datum, ReprColumnType, ReprRelationType, ReprScalarType, Row, RowArena};
14
15use crate::scalar::func::variadic::And;
16use crate::{
17    BinaryFunc, Eval, EvalError, MapFilterProject, MfpPlan, MirScalarExpr, UnaryFunc,
18    UnmaterializableFunc, VariadicFunc, func,
19};
20/// An inclusive range of non-null datum values.
21#[derive(Clone, Eq, PartialEq, Debug)]
22enum Values<'a> {
23    /// This range contains no values.
24    Empty,
25    /// An inclusive range. Invariant: the first element is always <= the second.
26    // TODO: a variant for small sets of data would avoid losing precision here.
27    Within(Datum<'a>, Datum<'a>),
28    /// Constraints on structured fields, useful for recursive structures like maps.
29    /// Fields that are not present in the map default to Values::All.
30    // TODO: consider using this variant, or similar, for Datum::List.
31    Nested(BTreeMap<Datum<'a>, ResultSpec<'a>>),
32    /// This range might contain any value. Since we're overapproximating, this is often used
33    /// as a "safe fallback" when we can't determine the right boundaries for a range.
34    All,
35}
36
37impl<'a> Values<'a> {
38    fn just(a: Datum<'a>) -> Values<'a> {
39        match a {
40            Datum::Map(datum_map) => Values::Nested(
41                datum_map
42                    .iter()
43                    .map(|(key, val)| (key.into(), ResultSpec::value(val)))
44                    .collect(),
45            ),
46            other => Self::Within(other, other),
47        }
48    }
49
50    fn union(self, other: Values<'a>) -> Values<'a> {
51        match (self, other) {
52            (Values::Empty, r) => r,
53            (r, Values::Empty) => r,
54            (Values::Within(a0, a1), Values::Within(b0, b1)) => {
55                Values::Within(a0.min(b0), a1.max(b1))
56            }
57            (Values::Nested(a), Values::Nested(mut b)) => {
58                // `Nested(map)` treats keys missing from `map` as fully unconstrained, so a
59                // key present in only one side of the union must be treated as `anything`
60                // on the other side. Because `x ∪ anything = anything`, such keys drop
61                // out of the merged map (the Nested default is already "anything").
62                let mut merged = BTreeMap::new();
63                for (key, a_spec) in a {
64                    if let Some(b_spec) = b.remove(&key) {
65                        let unioned = a_spec.union(b_spec);
66                        if unioned != ResultSpec::anything() {
67                            merged.insert(key, unioned);
68                        }
69                    }
70                }
71                if merged.is_empty() {
72                    Values::All
73                } else {
74                    Values::Nested(merged)
75                }
76            }
77            _ => Values::All,
78        }
79    }
80
81    fn intersect(self, other: Values<'a>) -> Values<'a> {
82        match (self, other) {
83            (Values::Empty, _) => Values::Empty,
84            (_, Values::Empty) => Values::Empty,
85            (Values::Within(a0, a1), Values::Within(b0, b1)) => {
86                let min = a0.max(b0);
87                let max = a1.min(b1);
88                if min <= max {
89                    Values::Within(min, max)
90                } else {
91                    Values::Empty
92                }
93            }
94            (Values::Nested(mut a), Values::Nested(b)) => {
95                for (datum, other_spec) in b {
96                    let spec = a.entry(datum).or_insert_with(ResultSpec::anything);
97                    *spec = spec.clone().intersect(other_spec);
98                }
99                Values::Nested(a)
100            }
101            (Values::All, v) => v,
102            (v, Values::All) => v,
103            (Values::Nested(_), Values::Within(_, _)) => Values::Empty,
104            (Values::Within(_, _), Values::Nested(_)) => Values::Empty,
105        }
106    }
107
108    fn may_contain(&self, value: Datum<'a>) -> bool {
109        match self {
110            Values::Empty => false,
111            Values::Within(min, max) => *min <= value && value <= *max,
112            Values::All => true,
113            Values::Nested(field_map) => match value {
114                Datum::Map(datum_map) => {
115                    datum_map
116                        .iter()
117                        .all(|(key, val)| match field_map.get(&key.into()) {
118                            None => true,
119                            Some(nested) => nested.may_contain(val),
120                        })
121                }
122                _ => false,
123            },
124        }
125    }
126}
127
128/// An approximation of the set of values an expression might have, including whether or not it
129/// might be null or an error value. This is generally an _overapproximation_, in the sense that
130/// [ResultSpec::may_contain] may return true even if the argument is not included in the set.
131/// (However, it should never return false when the value _is_ included!)
132#[derive(Debug, Clone, Eq, PartialEq)]
133pub struct ResultSpec<'a> {
134    /// True if the expression may evaluate to [Datum::Null].
135    nullable: bool,
136    /// True if the expression may evaluate to an error.
137    fallible: bool,
138    /// The range of possible (non-null) values that the expression may evaluate to.
139    values: Values<'a>,
140}
141
142impl<'a> ResultSpec<'a> {
143    /// No results match this spec. (For example, an empty table.)
144    pub fn nothing() -> Self {
145        ResultSpec {
146            nullable: false,
147            fallible: false,
148            values: Values::Empty,
149        }
150    }
151
152    /// Every result matches this spec.
153    pub fn anything() -> Self {
154        ResultSpec {
155            nullable: true,
156            fallible: true,
157            values: Values::All,
158        }
159    }
160
161    /// Every result matches this spec.
162    pub fn any_infallible() -> Self {
163        ResultSpec {
164            nullable: true,
165            fallible: false,
166            values: Values::All,
167        }
168    }
169
170    /// A spec that only matches null.
171    pub fn null() -> Self {
172        ResultSpec {
173            nullable: true,
174            ..Self::nothing()
175        }
176    }
177
178    /// A spec that only matches error values.
179    pub fn fails() -> Self {
180        ResultSpec {
181            fallible: true,
182            ..Self::nothing()
183        }
184    }
185
186    /// A spec that matches all values of a given type.
187    pub fn has_type(col: &ReprColumnType, fallible: bool) -> ResultSpec<'a> {
188        let values = match &col.scalar_type {
189            ReprScalarType::Bool => Values::Within(Datum::False, Datum::True),
190            // TODO: add bounds for other bounded types, like integers
191            _ => Values::All,
192        };
193        ResultSpec {
194            nullable: col.nullable,
195            fallible,
196            values,
197        }
198    }
199
200    /// A spec that only matches the given value.
201    pub fn value(value: Datum<'a>) -> ResultSpec<'a> {
202        match value {
203            Datum::Null => Self::null(),
204            nonnull => ResultSpec {
205                values: Values::just(nonnull),
206                ..Self::nothing()
207            },
208        }
209    }
210
211    /// A spec that matches values between the given (non-null) min and max.
212    pub fn value_between(min: Datum<'a>, max: Datum<'a>) -> ResultSpec<'a> {
213        assert!(!min.is_null());
214        assert!(!max.is_null());
215        if min <= max {
216            ResultSpec {
217                values: Values::Within(min, max),
218                ..ResultSpec::nothing()
219            }
220        } else {
221            ResultSpec::nothing()
222        }
223    }
224
225    /// A spec that matches any non-null value.
226    pub fn value_all() -> ResultSpec<'a> {
227        ResultSpec {
228            values: Values::All,
229            ..ResultSpec::nothing()
230        }
231    }
232
233    /// A spec that matches Datum::Maps of the given type.
234    pub fn map_spec(map: BTreeMap<Datum<'a>, ResultSpec<'a>>) -> ResultSpec<'a> {
235        ResultSpec {
236            values: Values::Nested(map),
237            ..ResultSpec::nothing()
238        }
239    }
240
241    /// Given two specs, returns a new spec that matches anything that either original spec would match.
242    pub fn union(self, other: ResultSpec<'a>) -> ResultSpec<'a> {
243        ResultSpec {
244            nullable: self.nullable || other.nullable,
245            fallible: self.fallible || other.fallible,
246            values: self.values.union(other.values),
247        }
248    }
249
250    /// Given two specs, returns a new spec that only matches things that both original specs would match.
251    pub fn intersect(self, other: ResultSpec<'a>) -> ResultSpec<'a> {
252        ResultSpec {
253            nullable: self.nullable && other.nullable,
254            fallible: self.fallible && other.fallible,
255            values: self.values.intersect(other.values),
256        }
257    }
258
259    /// Check if a particular value matches the spec.
260    pub fn may_contain(&self, value: Datum<'a>) -> bool {
261        if value == Datum::Null {
262            return self.nullable;
263        }
264
265        self.values.may_contain(value)
266    }
267
268    /// Check if an error value matches the spec.
269    pub fn may_fail(&self) -> bool {
270        self.fallible
271    }
272
273    /// This method "maps" a function across the `ResultSpec`.
274    ///
275    /// As mentioned above, `ResultSpec` represents an approximate set of results.
276    /// If we actually stored each result in the set, `flat_map` could be implemented by passing
277    /// each result to the function one-by-one and unioning the resulting sets. This is possible
278    /// when our values set is empty or contains a single datum, but when it contains a range,
279    /// we can't enumerate all possible values of the set. We handle this by:
280    /// - tracking whether the function is monotone, in which case we can map the range by just
281    ///   mapping the endpoints;
282    /// - using a safe default when we can't infer a tighter bound on the set, eg. [Self::anything].
283    fn flat_map(
284        &self,
285        is_monotone: bool,
286        mut result_map: impl FnMut(Result<Datum<'a>, EvalError>) -> ResultSpec<'a>,
287    ) -> ResultSpec<'a> {
288        let null_spec = if self.nullable {
289            result_map(Ok(Datum::Null))
290        } else {
291            ResultSpec::nothing()
292        };
293
294        let error_spec = if self.fallible {
295            // Since we only care about whether / not an error is possible, and not the specific
296            // error, create an arbitrary error here.
297            // NOTE! This assumes that functions do not discriminate on the type of the error.
298            let map_err = result_map(Err(EvalError::Internal("".into())));
299            let raise_err = ResultSpec::fails();
300            // SQL has a very loose notion of evaluation order: https://www.postgresql.org/docs/current/sql-expressions.html#SYNTAX-EXPRESS-EVAL
301            // Here, we account for the possibility that the expression is evaluated strictly,
302            // raising the error, or that it's evaluated lazily by the result_map function
303            // (which may return a non-error result even when given an error as input).
304            raise_err.union(map_err)
305        } else {
306            ResultSpec::nothing()
307        };
308
309        let values_spec = match self.values {
310            Values::Empty => ResultSpec::nothing(),
311            // If this range contains a single datum, just call the function.
312            Values::Within(min, max) if min == max => result_map(Ok(min)),
313            // If this is a range of booleans, we know all the values... just try them.
314            Values::Within(Datum::False, Datum::True) => {
315                result_map(Ok(Datum::False)).union(result_map(Ok(Datum::True)))
316            }
317            // Otherwise, if our function is monotonic, we can try mapping the input
318            // range to an output range.
319            Values::Within(min, max) if is_monotone => {
320                let min_result = result_map(Ok(min));
321                let max_result = result_map(Ok(max));
322                match (min_result, max_result) {
323                    // If both endpoints give us a range, the result is a union of those ranges.
324                    (
325                        ResultSpec {
326                            nullable: false,
327                            fallible: false,
328                            values: a_values,
329                        },
330                        ResultSpec {
331                            nullable: false,
332                            fallible: false,
333                            values: b_values,
334                        },
335                    ) => ResultSpec {
336                        nullable: false,
337                        fallible: false,
338                        values: a_values.union(b_values),
339                    },
340                    // If both endpoints are null, we assume the whole range maps to null.
341                    (
342                        ResultSpec {
343                            nullable: true,
344                            fallible: false,
345                            values: Values::Empty,
346                        },
347                        ResultSpec {
348                            nullable: true,
349                            fallible: false,
350                            values: Values::Empty,
351                        },
352                    ) => ResultSpec::null(),
353                    // Otherwise we can't assume anything about the output.
354                    _ => ResultSpec::anything(),
355                }
356            }
357            // TODO: we could return a narrower result for eg. `Values::Nested` with all-`Within` fields.
358            Values::Within(_, _) | Values::Nested(_) | Values::All => ResultSpec::anything(),
359        };
360
361        null_spec.union(error_spec).union(values_spec)
362    }
363}
364
365/// [Abstract interpretation](https://en.wikipedia.org/wiki/Abstract_interpretation) for
366/// [MirScalarExpr].
367///
368/// [MirScalarExpr::eval] implements a "concrete interpreter" for the expression type: given an
369/// expression and specific column values as input, it returns a specific value for the output.
370/// This could be reimplemented using this trait... but it's most useful for "abstract"
371/// interpretations of the expression, where we generalize about sets of possible inputs and outputs.
372/// See [Trace] and [ColumnSpecs] for how this can be useful in practice.
373pub trait Interpreter {
374    type Summary: Clone + Debug + Sized;
375
376    /// A column of the input row.
377    fn column(&self, id: usize) -> Self::Summary;
378
379    /// A literal value.
380    /// (Stored as a row, because we can't own a Datum.)
381    fn literal(&self, result: &Result<Row, EvalError>, col_type: &ReprColumnType) -> Self::Summary;
382    /// A call to an unmaterializable function.
383    ///
384    /// These functions cannot be evaluated by `MirScalarExpr::eval`. They must
385    /// be transformed away by a higher layer.
386    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary;
387
388    /// A function call that takes one expression as an argument.
389    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary;
390
391    /// A function call that takes two expressions as arguments.
392    fn binary(&self, func: &BinaryFunc, left: Self::Summary, right: Self::Summary)
393    -> Self::Summary;
394
395    /// A function call that takes an arbitrary number of arguments.
396    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary;
397
398    /// Conditionally evaluated expressions.
399    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary;
400
401    /// Evaluate an entire expression, by delegating to the fine-grained methods on [Interpreter].
402    fn expr(&self, expr: &MirScalarExpr) -> Self::Summary {
403        match expr {
404            MirScalarExpr::Column(id, _name) => self.column(*id),
405            MirScalarExpr::Literal(value, col_type) => self.literal(value, col_type),
406            MirScalarExpr::CallUnmaterializable(func) => self.unmaterializable(func),
407            MirScalarExpr::CallUnary { func, expr } => {
408                let expr_range = self.expr(expr);
409                self.unary(func, expr_range)
410            }
411            MirScalarExpr::CallBinary { func, expr1, expr2 } => {
412                let expr1_range = self.expr(expr1);
413                let expr2_range = self.expr(expr2);
414                self.binary(func, expr1_range, expr2_range)
415            }
416            MirScalarExpr::CallVariadic { func, exprs } => {
417                let exprs: Vec<_> = exprs.into_iter().map(|e| self.expr(e)).collect();
418                self.variadic(func, exprs)
419            }
420            MirScalarExpr::If { cond, then, els } => {
421                let cond_range = self.expr(cond);
422                let then_range = self.expr(then);
423                let els_range = self.expr(els);
424                self.cond(cond_range, then_range, els_range)
425            }
426        }
427    }
428
429    /// Specifically, this evaluates the map and filters stages of an MFP: summarize each of the
430    /// map expressions, then `and` together all of the filters.
431    fn mfp_filter(&self, mfp: &MapFilterProject) -> Self::Summary {
432        let mfp_eval = MfpEval::new(self, mfp.input_arity, &mfp.expressions);
433        // NB: self should not be used after this point!
434        let predicates = mfp
435            .predicates
436            .iter()
437            .map(|(_, e)| mfp_eval.expr(e))
438            .collect();
439        mfp_eval.variadic(&And.into(), predicates)
440    }
441
442    /// Similar to [Self::mfp_filter], but includes the additional temporal filters that have been
443    /// broken out.
444    fn mfp_plan_filter(&self, plan: &MfpPlan) -> Self::Summary {
445        let mfp_eval = MfpEval::new(self, plan.mfp.input_arity, &plan.mfp.expressions);
446        // NB: self should not be used after this point!
447        let mut results: Vec<_> = plan
448            .mfp
449            .predicates
450            .iter()
451            .map(|(_, e)| mfp_eval.expr(e))
452            .collect();
453        let mz_now = mfp_eval.unmaterializable(&UnmaterializableFunc::MzNow);
454        for bound in &plan.lower_bounds {
455            let bound_range = mfp_eval.expr(bound);
456            let result = mfp_eval.binary(&BinaryFunc::Lte(func::Lte), bound_range, mz_now.clone());
457            results.push(result);
458        }
459        for bound in &plan.upper_bounds {
460            let bound_range = mfp_eval.expr(bound);
461            let result = mfp_eval.binary(&BinaryFunc::Gte(func::Gte), bound_range, mz_now.clone());
462            results.push(result);
463        }
464        self.variadic(&And.into(), results)
465    }
466}
467
468/// Wrap another interpreter, but tack a few extra columns on at the end. An internal implementation
469/// detail of `eval_mfp` and `eval_mfp_plan`.
470pub(crate) struct MfpEval<'a, E: Interpreter + ?Sized> {
471    evaluator: &'a E,
472    input_arity: usize,
473    expressions: Vec<E::Summary>,
474}
475
476impl<'a, E: Interpreter + ?Sized> MfpEval<'a, E> {
477    pub(crate) fn new(evaluator: &'a E, input_arity: usize, expressions: &[MirScalarExpr]) -> Self {
478        let mut mfp_eval = MfpEval {
479            evaluator,
480            input_arity,
481            expressions: vec![],
482        };
483        for expr in expressions {
484            let result = mfp_eval.expr(expr);
485            mfp_eval.expressions.push(result);
486        }
487        mfp_eval
488    }
489}
490
491impl<'a, E: Interpreter + ?Sized> Interpreter for MfpEval<'a, E> {
492    type Summary = E::Summary;
493
494    fn column(&self, id: usize) -> Self::Summary {
495        if id < self.input_arity {
496            self.evaluator.column(id)
497        } else {
498            self.expressions[id - self.input_arity].clone()
499        }
500    }
501
502    fn literal(&self, result: &Result<Row, EvalError>, col_type: &ReprColumnType) -> Self::Summary {
503        self.evaluator.literal(result, col_type)
504    }
505
506    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary {
507        self.evaluator.unmaterializable(func)
508    }
509
510    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary {
511        self.evaluator.unary(func, expr)
512    }
513
514    fn binary(
515        &self,
516        func: &BinaryFunc,
517        left: Self::Summary,
518        right: Self::Summary,
519    ) -> Self::Summary {
520        self.evaluator.binary(func, left, right)
521    }
522
523    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary {
524        self.evaluator.variadic(func, exprs)
525    }
526
527    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
528        self.evaluator.cond(cond, then, els)
529    }
530}
531
532/// A unary function we've added special-case handling for; including:
533/// - A three-argument function, taking and returning [ResultSpec]s. This
534///   overrides the default function-handling logic entirely.
535/// - Metadata on whether / not this function is pushdownable. See [Trace].
536struct SpecialUnary {
537    map_fn: for<'a, 'b> fn(&'b ColumnSpecs<'a>, ResultSpec<'a>) -> ResultSpec<'a>,
538    pushdownable: bool,
539}
540
541impl SpecialUnary {
542    /// Returns the special-case handling for a particular function, if it exists.
543    fn for_func(func: &UnaryFunc) -> Option<SpecialUnary> {
544        /// Eager in the same sense as `func.rs` uses the term; this assumes that
545        /// nulls and errors propagate up, and we only need to define the behaviour
546        /// on values.
547        fn eagerly<'b>(
548            spec: ResultSpec<'b>,
549            value_fn: impl FnOnce(Values<'b>) -> ResultSpec<'b>,
550        ) -> ResultSpec<'b> {
551            let result = match spec.values {
552                Values::Empty => ResultSpec::nothing(),
553                other => value_fn(other),
554            };
555            ResultSpec {
556                fallible: spec.fallible || result.fallible,
557                nullable: spec.nullable || result.nullable,
558                values: result.values,
559            }
560        }
561        match func {
562            UnaryFunc::TryParseMonotonicIso8601Timestamp(_) => Some(SpecialUnary {
563                map_fn: |specs, range| {
564                    let expr = MirScalarExpr::CallUnary {
565                        func: UnaryFunc::TryParseMonotonicIso8601Timestamp(
566                            crate::func::TryParseMonotonicIso8601Timestamp,
567                        ),
568                        expr: Box::new(MirScalarExpr::column(0)),
569                    };
570                    let eval = |d| specs.eval_result(expr.eval(&[d], specs.arena));
571
572                    eagerly(range, |values| {
573                        match values {
574                            Values::Within(a, b) if a == b => eval(a),
575                            Values::Within(a, b) => {
576                                let spec = eval(a).union(eval(b));
577                                let values_spec = if spec.nullable {
578                                    // At least one of the endpoints of the range wasn't a valid
579                                    // timestamp. We can't compute a precise range in this case.
580                                    // If we used the general is_monotone handling, that code would
581                                    // incorrectly assume the whole range mapped to null if each
582                                    // endpoint did.
583                                    ResultSpec::value_all()
584                                } else {
585                                    spec
586                                };
587                                // A range of strings will always contain strings that don't parse
588                                // as timestamps - so unlike the general case, we'll assume null
589                                // is present in every range of output values.
590                                values_spec.union(ResultSpec::null())
591                            }
592                            // Otherwise, assume the worst: this function may return either a valid
593                            // value or null.
594                            _ => ResultSpec::any_infallible(),
595                        }
596                    })
597                },
598                pushdownable: true,
599            }),
600            _ => None,
601        }
602    }
603}
604
605/// A binary function we've added special-case handling for; including:
606/// - A two-argument function, taking and returning [ResultSpec]s. This overrides the
607///   default function-handling logic entirely.
608/// - Metadata on whether / not this function is pushdownable. See [Trace].
609struct SpecialBinary {
610    map_fn: for<'a> fn(ResultSpec<'a>, ResultSpec<'a>) -> ResultSpec<'a>,
611    pushdownable: (bool, bool),
612}
613
614impl SpecialBinary {
615    /// Returns the special-case handling for a particular function, if it exists.
616    fn for_func(func: &BinaryFunc) -> Option<SpecialBinary> {
617        /// Eager in the same sense as `func.rs` uses the term; this assumes that
618        /// nulls and errors propagate up, and we only need to define the behaviour
619        /// on values.
620        fn eagerly<'b>(
621            left: ResultSpec<'b>,
622            right: ResultSpec<'b>,
623            value_fn: impl FnOnce(Values<'b>, Values<'b>) -> ResultSpec<'b>,
624        ) -> ResultSpec<'b> {
625            let result = match (left.values, right.values) {
626                (Values::Empty, _) | (_, Values::Empty) => ResultSpec::nothing(),
627                (l, r) => value_fn(l, r),
628            };
629            ResultSpec {
630                fallible: left.fallible || right.fallible || result.fallible,
631                nullable: left.nullable || right.nullable || result.nullable,
632                values: result.values,
633            }
634        }
635
636        fn jsonb_get_string<'b>(
637            left: ResultSpec<'b>,
638            right: ResultSpec<'b>,
639            stringify: bool,
640        ) -> ResultSpec<'b> {
641            eagerly(left, right, |left, right| {
642                let nested_spec = match (left, right) {
643                    (Values::Nested(mut map_spec), Values::Within(key, key2)) if key == key2 => {
644                        map_spec.remove(&key)
645                    }
646                    _ => None,
647                };
648
649                if let Some(field_spec) = nested_spec {
650                    if stringify {
651                        // We only preserve value-range information when stringification
652                        // is a noop. (Common in real queries.)
653                        let values = match field_spec.values {
654                            Values::Empty => Values::Empty,
655                            Values::Within(min @ Datum::String(_), max @ Datum::String(_)) => {
656                                Values::Within(min, max)
657                            }
658                            Values::Within(_, _) | Values::Nested(_) | Values::All => Values::All,
659                        };
660                        ResultSpec {
661                            values,
662                            ..field_spec
663                        }
664                    } else {
665                        field_spec
666                    }
667                } else {
668                    // The implementation of `jsonb_get_string` always returns
669                    // `Ok(...)`. Morally, everything has a string
670                    // representation, and the worst you can get is a NULL,
671                    // which maps to a NULL.
672                    ResultSpec::any_infallible()
673                }
674            })
675        }
676
677        fn eq<'b>(left: ResultSpec<'b>, right: ResultSpec<'b>) -> ResultSpec<'b> {
678            eagerly(left, right, |left, right| {
679                // `eq` might return true if there's any overlap between the range of its two arguments...
680                let maybe_true = match left.clone().intersect(right.clone()) {
681                    Values::Empty => ResultSpec::nothing(),
682                    _ => ResultSpec::value(Datum::True),
683                };
684
685                // ...and may return false if the union contains at least two distinct values.
686                // Note that the `Empty` case is handled by `eagerly` above.
687                let maybe_false = match left.union(right) {
688                    Values::Within(a, b) if a == b => ResultSpec::nothing(),
689                    _ => ResultSpec::value(Datum::False),
690                };
691
692                maybe_true.union(maybe_false)
693            })
694        }
695
696        match func {
697            BinaryFunc::JsonbGetString(_) => Some(SpecialBinary {
698                map_fn: |l, r| jsonb_get_string(l, r, false),
699                pushdownable: (true, false),
700            }),
701            BinaryFunc::JsonbGetStringStringify(_) => Some(SpecialBinary {
702                map_fn: |l, r| jsonb_get_string(l, r, true),
703                pushdownable: (true, false),
704            }),
705            BinaryFunc::Eq(_) => Some(SpecialBinary {
706                map_fn: eq,
707                pushdownable: (true, true),
708            }),
709            _ => None,
710        }
711    }
712}
713
714#[derive(Clone, Debug)]
715pub struct ColumnSpec<'a> {
716    pub col_type: ReprColumnType,
717    pub range: ResultSpec<'a>,
718}
719
720/// An interpreter that:
721/// - stores both the type and the range of possible values for every column and
722///   unmaterializable function. (See the `push_` methods.)
723/// - given an expression (or MFP, etc.), returns the range of possible results that evaluating that
724///   expression might have. (See the `eval_` methods.)
725#[derive(Clone, Debug)]
726pub struct ColumnSpecs<'a> {
727    pub relation: &'a ReprRelationType,
728    pub columns: Vec<ResultSpec<'a>>,
729    pub unmaterializables: BTreeMap<UnmaterializableFunc, ResultSpec<'a>>,
730    pub arena: &'a RowArena,
731}
732
733impl<'a> ColumnSpecs<'a> {
734    // Interpreting a variadic function can lead to exponential blowup: there are up to 4 possibly-
735    // interesting values for each argument (error, null, range bounds...) and in the worst case
736    // we may need to test every combination. We mitigate that here in two ways:
737    // - Adding a linear-time optimization for associative functions like AND, OR, COALESCE.
738    // - Limiting the number of arguments we'll pass on to eval. If this limit is crossed, we'll
739    //   return our default / safe overapproximation instead.
740    const MAX_EVAL_ARGS: usize = 6;
741
742    /// Create a new, empty set of column specs. (Initially, the only assumption we make about the
743    /// data in the column is that it matches the type.)
744    pub fn new(relation: &'a ReprRelationType, arena: &'a RowArena) -> Self {
745        let columns = relation
746            .column_types
747            .iter()
748            .map(|ct| ResultSpec::has_type(ct, false))
749            .collect();
750        ColumnSpecs {
751            relation,
752            columns,
753            unmaterializables: Default::default(),
754            arena,
755        }
756    }
757
758    /// Restrict the set of possible values in a given column. (By intersecting it with the existing
759    /// spec.)
760    pub fn push_column(&mut self, id: usize, update: ResultSpec<'a>) {
761        let range = self.columns.get_mut(id).expect("valid column id");
762        *range = range.clone().intersect(update);
763    }
764
765    /// Restrict the set of possible values a given unmaterializable func might return. (By
766    /// intersecting it with the existing spec.)
767    pub fn push_unmaterializable(&mut self, func: UnmaterializableFunc, update: ResultSpec<'a>) {
768        let range = self
769            .unmaterializables
770            .entry(func.clone())
771            .or_insert_with(|| ResultSpec::has_type(&func.output_type(), true));
772        *range = range.clone().intersect(update);
773    }
774
775    fn eval_result<'b, E>(&self, result: Result<Datum<'b>, E>) -> ResultSpec<'a> {
776        match result {
777            Ok(Datum::Null) => ResultSpec {
778                nullable: true,
779                ..ResultSpec::nothing()
780            },
781            Ok(d) => ResultSpec {
782                values: Values::just(self.arena.make_datum(|packer| packer.push(d))),
783                ..ResultSpec::nothing()
784            },
785            Err(_) => ResultSpec {
786                fallible: true,
787                ..ResultSpec::nothing()
788            },
789        }
790    }
791
792    fn set_literal(expr: &mut MirScalarExpr, update: Result<Datum, EvalError>) {
793        match expr {
794            MirScalarExpr::Literal(literal, col_type) => match update {
795                Err(error) => *literal = Err(error),
796                Ok(datum) => {
797                    assert!(
798                        datum.is_instance_of(col_type),
799                        "{datum:?} must be an instance of {col_type:?}"
800                    );
801                    match literal {
802                        // Reuse the allocation if we can
803                        Ok(row) => row.packer().push(datum),
804                        literal => *literal = Ok(Row::pack_slice(&[datum])),
805                    }
806                }
807            },
808            _ => panic!("not a literal"),
809        }
810    }
811
812    fn set_argument(expr: &mut MirScalarExpr, arg: usize, value: Result<Datum, EvalError>) {
813        match (expr, arg) {
814            (MirScalarExpr::CallUnary { expr, .. }, 0) => Self::set_literal(expr, value),
815            (MirScalarExpr::CallBinary { expr1, .. }, 0) => Self::set_literal(expr1, value),
816            (MirScalarExpr::CallBinary { expr2, .. }, 1) => Self::set_literal(expr2, value),
817            (MirScalarExpr::CallVariadic { exprs, .. }, n) if n < exprs.len() => {
818                Self::set_literal(&mut exprs[n], value)
819            }
820            _ => panic!("illegal argument for expression"),
821        }
822    }
823
824    /// A literal with the given type and a trivial default value. Callers should ensure that
825    /// [Self::set_literal] is called on the resulting expression to give it a meaningful value
826    /// before evaluating.
827    fn placeholder(col_type: ReprColumnType) -> MirScalarExpr {
828        MirScalarExpr::Literal(Err(EvalError::Internal("".into())), col_type)
829    }
830}
831
832impl<'a> Interpreter for ColumnSpecs<'a> {
833    type Summary = ColumnSpec<'a>;
834
835    fn column(&self, id: usize) -> Self::Summary {
836        let col_type = self.relation.column_types[id].clone();
837        let range = self.columns[id].clone();
838        ColumnSpec { col_type, range }
839    }
840
841    fn literal(&self, result: &Result<Row, EvalError>, col_type: &ReprColumnType) -> Self::Summary {
842        let col_type = col_type.clone();
843        let range = self.eval_result(result.as_ref().map(|row| {
844            self.arena
845                .make_datum(|packer| packer.push(row.unpack_first()))
846        }));
847        ColumnSpec { col_type, range }
848    }
849
850    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary {
851        let col_type = func.output_type();
852        let range = self
853            .unmaterializables
854            .get(func)
855            .cloned()
856            .unwrap_or_else(|| ResultSpec::has_type(&func.output_type(), true));
857        ColumnSpec { col_type, range }
858    }
859
860    fn unary(&self, func: &UnaryFunc, summary: Self::Summary) -> Self::Summary {
861        let fallible = func.could_error() || summary.range.fallible;
862        let mapped_spec = if let Some(special) = SpecialUnary::for_func(func) {
863            (special.map_fn)(self, summary.range)
864        } else {
865            let is_monotone = func.is_monotone();
866            let mut expr = MirScalarExpr::CallUnary {
867                func: func.clone(),
868                expr: Box::new(Self::placeholder(summary.col_type.clone())),
869            };
870            summary.range.flat_map(is_monotone, |datum| {
871                Self::set_argument(&mut expr, 0, datum);
872                self.eval_result(expr.eval(&[], self.arena))
873            })
874        };
875
876        let col_type = func.output_type(summary.col_type);
877
878        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
879        ColumnSpec { col_type, range }
880    }
881
882    fn binary(
883        &self,
884        func: &BinaryFunc,
885        left: Self::Summary,
886        right: Self::Summary,
887    ) -> Self::Summary {
888        let (left_monotonic, right_monotonic) = func.is_monotone();
889        let fallible = func.could_error() || left.range.fallible || right.range.fallible;
890
891        let mapped_spec = if let Some(special) = SpecialBinary::for_func(func) {
892            (special.map_fn)(left.range, right.range)
893        } else {
894            let mut expr = MirScalarExpr::CallBinary {
895                func: func.clone(),
896                expr1: Box::new(Self::placeholder(left.col_type.clone())),
897                expr2: Box::new(Self::placeholder(right.col_type.clone())),
898            };
899            left.range.flat_map(left_monotonic, |left_result| {
900                Self::set_argument(&mut expr, 0, left_result);
901                right.range.flat_map(right_monotonic, |right_result| {
902                    Self::set_argument(&mut expr, 1, right_result);
903                    self.eval_result(expr.eval(&[], self.arena))
904                })
905            })
906        };
907
908        let col_type = func.output_type(&[left.col_type, right.col_type]);
909
910        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
911        ColumnSpec { col_type, range }
912    }
913
914    fn variadic(&self, func: &VariadicFunc, args: Vec<Self::Summary>) -> Self::Summary {
915        let fallible = func.could_error() || args.iter().any(|s| s.range.fallible);
916        if func.is_associative() && args.len() > 2 {
917            // To avoid a combinatorial explosion, evaluate large variadic calls as a series of
918            // smaller ones, since associativity guarantees we'll get compatible results.
919            return args
920                .into_iter()
921                .reduce(|a, b| self.variadic(func, vec![a, b]))
922                .expect("reducing over a non-empty argument list");
923        }
924
925        let mapped_spec = if args.len() >= Self::MAX_EVAL_ARGS {
926            ResultSpec::anything()
927        } else {
928            fn eval_loop<'a>(
929                is_monotonic: bool,
930                expr: &mut MirScalarExpr,
931                args: &[ColumnSpec<'a>],
932                index: usize,
933                datum_map: &mut impl FnMut(&MirScalarExpr) -> ResultSpec<'a>,
934            ) -> ResultSpec<'a> {
935                if index >= args.len() {
936                    datum_map(expr)
937                } else {
938                    args[index].range.flat_map(is_monotonic, |datum| {
939                        ColumnSpecs::set_argument(expr, index, datum);
940                        eval_loop(is_monotonic, expr, args, index + 1, datum_map)
941                    })
942                }
943            }
944
945            let mut fn_expr = MirScalarExpr::CallVariadic {
946                func: func.clone(),
947                exprs: args
948                    .iter()
949                    .map(|spec| Self::placeholder(spec.col_type.clone()))
950                    .collect(),
951            };
952            eval_loop(func.is_monotone(), &mut fn_expr, &args, 0, &mut |expr| {
953                self.eval_result(expr.eval(&[], self.arena))
954            })
955        };
956
957        let col_types = args.into_iter().map(|spec| spec.col_type).collect();
958        let col_type = func.output_type(col_types);
959
960        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
961
962        ColumnSpec { col_type, range }
963    }
964
965    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
966        let col_type = then
967            .col_type
968            .union(&els.col_type)
969            .expect("failed type union for cond during abstract interpretation");
970
971        let range = cond
972            .range
973            .flat_map(true, |datum| match datum {
974                Ok(Datum::True) => then.range.clone(),
975                Ok(Datum::False) => els.range.clone(),
976                _ => ResultSpec::fails(),
977            })
978            .intersect(ResultSpec::has_type(&col_type, true));
979
980        ColumnSpec { col_type, range }
981    }
982
983    /// Override the default implementations of [Self::mfp_filter] and
984    /// [Self::mfp_plan_filter] so that the fallibility of MFP expressions
985    /// surfaces in the result, even when the expression's result column isn't
986    /// referenced by a predicate or temporal bound.
987    ///
988    /// The runtime MFP evaluator runs every expression once all the preceding
989    /// predicates pass (see [`crate::SafeMfpPlan::evaluate_inner`]), so an
990    /// expression that errors on the actual data will turn the whole row into
991    /// an `Err` — even if no predicate or bound mentions that expression. The
992    /// default `mfp_filter` / `mfp_plan_filter` only AND together the
993    /// predicates and bounds, so the AND result misses the expression's
994    /// `fallible` flag and persist filter pushdown can wrongly discard a part
995    /// that actually produces error rows. See database-issues#9656.
996    fn mfp_filter(&self, mfp: &MapFilterProject) -> Self::Summary {
997        let mfp_eval = MfpEval::new(self, mfp.input_arity, &mfp.expressions);
998        let predicates = mfp
999            .predicates
1000            .iter()
1001            .map(|(_, e)| mfp_eval.expr(e))
1002            .collect();
1003        let mut result = self.variadic(&And.into(), predicates);
1004        if mfp_eval.expressions.iter().any(|s| s.range.fallible) {
1005            result.range.fallible = true;
1006        }
1007        result
1008    }
1009
1010    fn mfp_plan_filter(&self, plan: &MfpPlan) -> Self::Summary {
1011        let mfp_eval = MfpEval::new(self, plan.mfp.input_arity, &plan.mfp.expressions);
1012        let mut results: Vec<_> = plan
1013            .mfp
1014            .predicates
1015            .iter()
1016            .map(|(_, e)| mfp_eval.expr(e))
1017            .collect();
1018        let mz_now = mfp_eval.unmaterializable(&UnmaterializableFunc::MzNow);
1019        for bound in &plan.lower_bounds {
1020            let bound_range = mfp_eval.expr(bound);
1021            let result = mfp_eval.binary(&BinaryFunc::Lte(func::Lte), bound_range, mz_now.clone());
1022            results.push(result);
1023        }
1024        for bound in &plan.upper_bounds {
1025            let bound_range = mfp_eval.expr(bound);
1026            let result = mfp_eval.binary(&BinaryFunc::Gte(func::Gte), bound_range, mz_now.clone());
1027            results.push(result);
1028        }
1029        let mut result = self.variadic(&And.into(), results);
1030        if mfp_eval.expressions.iter().any(|s| s.range.fallible) {
1031            result.range.fallible = true;
1032        }
1033        result
1034    }
1035}
1036
1037/// An interpreter that returns whether or not a particular expression is "pushdownable".
1038/// Broadly speaking, an expression is pushdownable if the result of evaluating the expression
1039/// depends on the range of possible column values in a way that `ColumnSpecs` is able to reason about.
1040///
1041/// In practice, we internally need to distinguish between expressions that are trivially predicable
1042/// (because they're constant) and expressions that depend on the column ranges themselves.
1043/// See the [TraceSummary] variants for those distinctions, and [TraceSummary::pushdownable] for
1044/// the overall assessment.
1045#[derive(Debug)]
1046pub struct Trace;
1047
1048/// A summary type for the [Trace] interpreter.
1049///
1050/// The ordering of this type is meaningful: the "smaller" the summary, the more information we have
1051/// about the possible values of the expression. This means we can eg. use `max` in the
1052/// interpreter below to find the summary for a function-call expression based on the summaries
1053/// of its arguments.
1054#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Ord, Eq)]
1055pub enum TraceSummary {
1056    /// The expression is constant: we can evaluate it without any runtime information.
1057    /// This corresponds to a `ResultSpec` of a single value.
1058    Constant,
1059    /// The expression depends on runtime information, but in "predictable" way... ie. if we know
1060    /// the range of possible values for all columns and unmaterializable functions, we can
1061    /// predict the possible values of the output.
1062    /// This corresponds to a `ResultSpec` of a perhaps range of values.
1063    Dynamic,
1064    /// The expression depends on runtime information in an unpredictable way.
1065    /// This corresponds to a `ResultSpec::value_all()` or something similarly vague.
1066    Unknown,
1067}
1068
1069impl TraceSummary {
1070    /// We say that a function is "pushdownable" for a particular
1071    /// argument if `ColumnSpecs` can determine the spec of the function's output given the input spec for
1072    /// that argument. (In practice, this is true when either the function is monotone in that argument
1073    /// or it's been special-cased in the interpreter.)
1074    fn apply_fn(self, pushdownable: bool) -> Self {
1075        match self {
1076            TraceSummary::Constant => TraceSummary::Constant,
1077            TraceSummary::Dynamic => match pushdownable {
1078                true => TraceSummary::Dynamic,
1079                false => TraceSummary::Unknown,
1080            },
1081            TraceSummary::Unknown => TraceSummary::Unknown,
1082        }
1083    }
1084
1085    /// We say that an expression is "pushdownable" if it's either constant or dynamic.
1086    pub fn pushdownable(self) -> bool {
1087        match self {
1088            TraceSummary::Constant | TraceSummary::Dynamic => true,
1089            TraceSummary::Unknown => false,
1090        }
1091    }
1092}
1093
1094impl Interpreter for Trace {
1095    type Summary = TraceSummary;
1096
1097    fn column(&self, _id: usize) -> Self::Summary {
1098        TraceSummary::Dynamic
1099    }
1100
1101    fn literal(
1102        &self,
1103        _result: &Result<Row, EvalError>,
1104        _col_type: &ReprColumnType,
1105    ) -> Self::Summary {
1106        TraceSummary::Constant
1107    }
1108
1109    fn unmaterializable(&self, _func: &UnmaterializableFunc) -> Self::Summary {
1110        TraceSummary::Dynamic
1111    }
1112
1113    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary {
1114        let pushdownable = match SpecialUnary::for_func(func) {
1115            None => func.is_monotone(),
1116            Some(special) => special.pushdownable,
1117        };
1118        expr.apply_fn(pushdownable)
1119    }
1120
1121    fn binary(
1122        &self,
1123        func: &BinaryFunc,
1124        left: Self::Summary,
1125        right: Self::Summary,
1126    ) -> Self::Summary {
1127        let (left_pushdownable, right_pushdownable) = match SpecialBinary::for_func(func) {
1128            None => func.is_monotone(),
1129            Some(special) => special.pushdownable,
1130        };
1131        left.apply_fn(left_pushdownable)
1132            .max(right.apply_fn(right_pushdownable))
1133    }
1134
1135    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary {
1136        if !func.is_associative() && exprs.len() >= ColumnSpecs::MAX_EVAL_ARGS {
1137            // We can't efficiently evaluate functions with very large argument lists;
1138            // see the comment on ColumnSpecs::MAX_EVAL_ARGS for details.
1139            return TraceSummary::Unknown;
1140        }
1141
1142        let pushdownable_fn = func.is_monotone();
1143        exprs
1144            .into_iter()
1145            .map(|pushdownable_arg| pushdownable_arg.apply_fn(pushdownable_fn))
1146            .max()
1147            .unwrap_or(TraceSummary::Constant)
1148    }
1149
1150    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
1151        // We don't actually need to be able to predict the condition precisely to predict the output,
1152        // since we can union the ranges of the two branches for a conservative estimate.
1153        let cond = cond.min(TraceSummary::Dynamic);
1154        cond.max(then).max(els)
1155    }
1156}
1157
1158#[cfg(test)]
1159mod tests {
1160    use itertools::Itertools;
1161    use mz_repr::adt::datetime::DateTimeUnits;
1162    use mz_repr::{Datum, PropDatum, RowArena, SqlScalarType};
1163    use proptest::prelude::*;
1164    use proptest::sample::{Index, select};
1165
1166    use crate::func::*;
1167    use crate::scalar::func::variadic::Concat;
1168    use crate::{BinaryFunc, MirScalarExpr, UnaryFunc};
1169
1170    use super::*;
1171
1172    #[derive(Debug)]
1173    struct ExpressionData {
1174        relation_type: ReprRelationType,
1175        specs: Vec<ResultSpec<'static>>,
1176        rows: Vec<Row>,
1177        expr: MirScalarExpr,
1178    }
1179
1180    // Currently there's no good way to check whether a particular function accepts a particular
1181    // type as argument, which means we need to list everything out explicitly here. Restrict our interest
1182    // to a reasonable number of functions, to keep things tractable
1183    // TODO: replace this with function-level info once it's available.
1184    const NUM_TYPE: ReprScalarType = ReprScalarType::Numeric;
1185    static SCALAR_TYPES: &[ReprScalarType] = &[
1186        ReprScalarType::Bool,
1187        ReprScalarType::Jsonb,
1188        NUM_TYPE,
1189        ReprScalarType::Date,
1190        ReprScalarType::Timestamp,
1191        ReprScalarType::MzTimestamp,
1192        ReprScalarType::String,
1193    ];
1194
1195    const INTERESTING_UNARY_FUNCS: &[UnaryFunc] = {
1196        &[
1197            UnaryFunc::CastNumericToMzTimestamp(CastNumericToMzTimestamp),
1198            UnaryFunc::NegNumeric(NegNumeric),
1199            UnaryFunc::CastJsonbToNumeric(CastJsonbToNumeric(None)),
1200            UnaryFunc::CastJsonbToBool(CastJsonbToBool),
1201            UnaryFunc::CastJsonbToString(CastJsonbToString),
1202            UnaryFunc::DateTruncTimestamp(DateTruncTimestamp(DateTimeUnits::Epoch)),
1203            UnaryFunc::ExtractTimestamp(ExtractTimestamp(DateTimeUnits::Epoch)),
1204            UnaryFunc::ExtractDate(ExtractDate(DateTimeUnits::Epoch)),
1205            UnaryFunc::Not(Not),
1206            UnaryFunc::IsNull(IsNull),
1207            UnaryFunc::IsFalse(IsFalse),
1208            UnaryFunc::TryParseMonotonicIso8601Timestamp(TryParseMonotonicIso8601Timestamp),
1209        ]
1210    };
1211
1212    fn unary_typecheck(func: &UnaryFunc, arg: &ReprColumnType) -> bool {
1213        use UnaryFunc::*;
1214        match func {
1215            CastNumericToMzTimestamp(_) | NegNumeric(_) => arg.scalar_type == NUM_TYPE,
1216            CastJsonbToNumeric(_) | CastJsonbToBool(_) | CastJsonbToString(_) => {
1217                arg.scalar_type == ReprScalarType::Jsonb
1218            }
1219            ExtractTimestamp(_) | DateTruncTimestamp(_) => {
1220                arg.scalar_type == ReprScalarType::Timestamp
1221            }
1222            ExtractDate(_) => arg.scalar_type == ReprScalarType::Date,
1223            Not(_) => arg.scalar_type == ReprScalarType::Bool,
1224            IsNull(_) => true,
1225            TryParseMonotonicIso8601Timestamp(_) => arg.scalar_type == ReprScalarType::String,
1226            _ => false,
1227        }
1228    }
1229
1230    fn interesting_binary_funcs() -> Vec<BinaryFunc> {
1231        vec![
1232            AddTimestampInterval.into(),
1233            AddNumeric.into(),
1234            SubNumeric.into(),
1235            MulNumeric.into(),
1236            DivNumeric.into(),
1237            Eq.into(),
1238            Lt.into(),
1239            Gt.into(),
1240            Lte.into(),
1241            Gte.into(),
1242            DateTruncUnitsTimestamp.into(),
1243            JsonbGetString.into(),
1244            JsonbGetStringStringify.into(),
1245        ]
1246    }
1247
1248    fn binary_typecheck(func: &BinaryFunc, arg0: &ReprColumnType, arg1: &ReprColumnType) -> bool {
1249        use BinaryFunc::*;
1250        match func {
1251            AddTimestampInterval(_) => {
1252                arg0.scalar_type == ReprScalarType::Timestamp
1253                    && arg1.scalar_type == ReprScalarType::Interval
1254            }
1255            AddNumeric(_) | SubNumeric(_) | MulNumeric(_) | DivNumeric(_) => {
1256                arg0.scalar_type == NUM_TYPE && arg1.scalar_type == NUM_TYPE
1257            }
1258            Eq(_) | Lt(_) | Gt(_) | Lte(_) | Gte(_) => arg0.scalar_type == arg1.scalar_type,
1259            DateTruncTimestamp(_) => {
1260                arg0.scalar_type == ReprScalarType::String
1261                    && arg1.scalar_type == ReprScalarType::Timestamp
1262            }
1263            JsonbGetString(_) | JsonbGetStringStringify(_) => {
1264                arg0.scalar_type == ReprScalarType::Jsonb
1265                    && arg1.scalar_type == ReprScalarType::String
1266            }
1267            _ => false,
1268        }
1269    }
1270
1271    const INTERESTING_VARIADIC_FUNCS: &[VariadicFunc] = {
1272        use crate::scalar::func::variadic as v;
1273        use VariadicFunc::*;
1274        &[
1275            Coalesce(v::Coalesce),
1276            Greatest(v::Greatest),
1277            Least(v::Least),
1278            And(v::And),
1279            Or(v::Or),
1280            Concat(v::Concat),
1281            ConcatWs(v::ConcatWs),
1282        ]
1283    };
1284
1285    fn variadic_typecheck(func: &VariadicFunc, args: &[ReprColumnType]) -> bool {
1286        use VariadicFunc::*;
1287        fn all_eq<'a>(
1288            iter: impl IntoIterator<Item = &'a ReprColumnType>,
1289            other: &ReprScalarType,
1290        ) -> bool {
1291            iter.into_iter().all(|t| t.scalar_type == *other)
1292        }
1293        match func {
1294            Coalesce(_) | Greatest(_) | Least(_) => match args {
1295                [] => true,
1296                [first, rest @ ..] => all_eq(rest, &first.scalar_type),
1297            },
1298            And(_) | Or(_) => all_eq(args, &ReprScalarType::Bool),
1299            Concat(_) => all_eq(args, &ReprScalarType::String),
1300            ConcatWs(_) => args.len() > 1 && all_eq(args, &ReprScalarType::String),
1301            _ => false,
1302        }
1303    }
1304
1305    fn gen_datums_for_type(typ: &ReprColumnType) -> BoxedStrategy<Datum<'static>> {
1306        let mut values: Vec<Datum<'static>> = SqlScalarType::from_repr(&typ.scalar_type)
1307            .interesting_datums()
1308            .collect();
1309        if typ.nullable {
1310            values.push(Datum::Null)
1311        }
1312        select(values).boxed()
1313    }
1314
1315    fn gen_column() -> impl Strategy<Value = (ReprColumnType, Datum<'static>, ResultSpec<'static>)>
1316    {
1317        let col_type = (select(SCALAR_TYPES), any::<bool>())
1318            .prop_map(|(t, b)| t.nullable(b))
1319            .prop_filter("need at least one value", |c| {
1320                SqlScalarType::from_repr(&c.scalar_type)
1321                    .interesting_datums()
1322                    .count()
1323                    > 0
1324            });
1325
1326        let result_spec = select(vec![
1327            ResultSpec::nothing(),
1328            ResultSpec::null(),
1329            ResultSpec::anything(),
1330            ResultSpec::value_all(),
1331        ]);
1332
1333        (col_type, result_spec).prop_flat_map(|(col, result_spec)| {
1334            gen_datums_for_type(&col).prop_map(move |datum| {
1335                let result_spec = result_spec.clone().union(ResultSpec::value(datum));
1336                (col.clone(), datum, result_spec)
1337            })
1338        })
1339    }
1340
1341    fn gen_expr_for_relation(
1342        relation: &ReprRelationType,
1343    ) -> BoxedStrategy<(MirScalarExpr, ReprColumnType)> {
1344        let column_gen = {
1345            let column_types = relation.column_types.clone();
1346            any::<Index>()
1347                .prop_map(move |idx| {
1348                    let id = idx.index(column_types.len());
1349                    (MirScalarExpr::column(id), column_types[id].clone())
1350                })
1351                .boxed()
1352        };
1353
1354        let literal_gen = (select(SCALAR_TYPES), any::<bool>())
1355            .prop_map(|(s, b)| s.nullable(b))
1356            .prop_flat_map(|ct| {
1357                let error_gen = any::<EvalError>().prop_map(Err).boxed();
1358                let value_gen = gen_datums_for_type(&ct)
1359                    .prop_map(move |datum| Ok(Row::pack_slice(&[datum])))
1360                    .boxed();
1361                error_gen.prop_union(value_gen).prop_map(move |result| {
1362                    (MirScalarExpr::Literal(result, ct.clone()), ct.clone())
1363                })
1364            })
1365            .boxed();
1366
1367        column_gen
1368            .prop_union(literal_gen)
1369            .prop_recursive(4, 64, 8, |self_gen| {
1370                let unary_gen = (select(INTERESTING_UNARY_FUNCS), self_gen.clone())
1371                    .prop_filter_map("unary func", |(func, (expr_in, type_in))| {
1372                        if !unary_typecheck(&func, &type_in) {
1373                            return None;
1374                        }
1375                        let type_out = func.output_type(type_in);
1376                        let expr_out = MirScalarExpr::CallUnary {
1377                            func,
1378                            expr: Box::new(expr_in),
1379                        };
1380                        Some((expr_out, type_out))
1381                    })
1382                    .boxed();
1383                let binary_gen = (
1384                    select(interesting_binary_funcs()),
1385                    self_gen.clone(),
1386                    self_gen.clone(),
1387                )
1388                    .prop_filter_map(
1389                        "binary func",
1390                        |(func, (expr_left, type_left), (expr_right, type_right))| {
1391                            if !binary_typecheck(&func, &type_left, &type_right) {
1392                                return None;
1393                            }
1394                            let type_out = func.output_type(&[type_left, type_right]);
1395                            let expr_out = MirScalarExpr::CallBinary {
1396                                func,
1397                                expr1: Box::new(expr_left),
1398                                expr2: Box::new(expr_right),
1399                            };
1400                            Some((expr_out, type_out))
1401                        },
1402                    )
1403                    .boxed();
1404                let variadic_gen = (
1405                    select(INTERESTING_VARIADIC_FUNCS),
1406                    prop::collection::vec(self_gen, 1..4),
1407                )
1408                    .prop_filter_map("variadic func", |(func, exprs)| {
1409                        let (exprs_in, type_in): (_, Vec<_>) = exprs.into_iter().unzip();
1410                        if !variadic_typecheck(&func, &type_in) {
1411                            return None;
1412                        }
1413                        let type_out = func.output_type(type_in);
1414                        let expr_out = MirScalarExpr::CallVariadic {
1415                            func,
1416                            exprs: exprs_in,
1417                        };
1418                        Some((expr_out, type_out))
1419                    })
1420                    .boxed();
1421
1422                unary_gen
1423                    .prop_union(binary_gen)
1424                    .boxed()
1425                    .prop_union(variadic_gen)
1426            })
1427            .boxed()
1428    }
1429
1430    fn gen_expr_data() -> impl Strategy<Value = ExpressionData> {
1431        let columns = prop::collection::vec(gen_column(), 1..10);
1432        columns.prop_flat_map(|data| {
1433            let (columns, datums, specs): (Vec<_>, Vec<_>, Vec<_>) = data.into_iter().multiunzip();
1434            let relation = ReprRelationType::new(columns);
1435            let row = Row::pack_slice(&datums);
1436            gen_expr_for_relation(&relation).prop_map(move |(expr, _)| ExpressionData {
1437                relation_type: relation.clone(),
1438                specs: specs.clone(),
1439                rows: vec![row.clone()],
1440                expr,
1441            })
1442        })
1443    }
1444
1445    #[mz_ore::test]
1446    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decContextDefault` on OS `linux`
1447    fn test_trivial_spec_matches() {
1448        fn check(datum: PropDatum) -> Result<(), TestCaseError> {
1449            let datum: Datum = (&datum).into();
1450            let spec = if datum.is_null() {
1451                ResultSpec::null()
1452            } else {
1453                ResultSpec::value(datum)
1454            };
1455            assert!(spec.may_contain(datum));
1456            Ok(())
1457        }
1458
1459        proptest!(|(datum in mz_repr::arb_datum(true))| {
1460            check(datum)?;
1461        });
1462
1463        assert!(ResultSpec::fails().may_fail());
1464    }
1465
1466    #[mz_ore::test]
1467    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decContextDefault` on OS `linux`
1468    fn test_equivalence() {
1469        fn check(data: ExpressionData) -> Result<(), TestCaseError> {
1470            let ExpressionData {
1471                relation_type,
1472                specs,
1473                rows,
1474                expr,
1475            } = data;
1476
1477            // We want to ensure that the spec we get when evaluating an expression using
1478            // `ColumnSpecs` always contains the _actual_ value of that column when evaluated with
1479            // eval. (This is an important correctness property of abstract interpretation.)
1480            let arena = RowArena::new();
1481            let mut interpreter = ColumnSpecs::new(&relation_type, &arena);
1482            for (id, spec) in specs.into_iter().enumerate() {
1483                interpreter.push_column(id, spec);
1484            }
1485
1486            let spec = interpreter.expr(&expr);
1487
1488            for row in &rows {
1489                let datums: Vec<_> = row.iter().collect();
1490                let eval_result = expr.eval(&datums, &arena);
1491                match eval_result {
1492                    Ok(value) => {
1493                        assert!(spec.range.may_contain(value))
1494                    }
1495                    Err(_) => {
1496                        assert!(spec.range.may_fail());
1497                    }
1498                }
1499            }
1500
1501            Ok(())
1502        }
1503
1504        proptest!(|(data in gen_expr_data())| {
1505            check(data)?;
1506        });
1507    }
1508
1509    /// Regression test for database-issues#9656.
1510    ///
1511    /// The interpreter must surface the fallibility of MFP expressions that
1512    /// aren't referenced by any predicate or temporal bound. The runtime MFP
1513    /// evaluator runs every expression once predicates pass, so an expression
1514    /// that errors on the actual data makes the whole row an `Err` — and
1515    /// `filter_result` must keep the part to emit that error.
1516    #[mz_ore::test]
1517    #[cfg_attr(miri, ignore)]
1518    fn test_mfp_unreferenced_fallible_expression() {
1519        use crate::scalar::func::CastStringToUuid;
1520
1521        // MFP: one expression that always errors on the input range, and one
1522        // predicate that always passes. The expression's result column is
1523        // *not* referenced by the predicate, so the default interpreter
1524        // implementation would AND together just `True` and miss the
1525        // fallibility.
1526        let mfp = MapFilterProject {
1527            expressions: vec![MirScalarExpr::CallUnary {
1528                func: UnaryFunc::CastStringToUuid(CastStringToUuid),
1529                expr: Box::new(MirScalarExpr::column(0)),
1530            }],
1531            predicates: vec![(
1532                1,
1533                MirScalarExpr::literal_ok(Datum::True, ReprScalarType::Bool),
1534            )],
1535            projection: vec![0, 1],
1536            input_arity: 1,
1537        };
1538
1539        let relation = ReprRelationType::new(vec![ReprScalarType::String.nullable(false)]);
1540        let arena = RowArena::new();
1541        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1542        // "not-a-uuid" is in the stats range and definitely doesn't parse as a UUID.
1543        interpreter.push_column(
1544            0,
1545            ResultSpec::value_between(Datum::String("not-a-uuid"), Datum::String("not-a-uuid")),
1546        );
1547        let spec = interpreter.mfp_filter(&mfp);
1548        assert!(
1549            spec.range.may_fail(),
1550            "an MFP expression that errors on the stats range must propagate \
1551             fallibility, otherwise persist filter pushdown can wrongly discard \
1552             a part that produces error rows",
1553        );
1554    }
1555
1556    /// Proptest companion to [`test_mfp_unreferenced_fallible_expression`]:
1557    /// directly verifies the fallibility claim of [`ColumnSpecs::mfp_filter`]
1558    /// against the runtime MFP semantics. For a random expression placed in
1559    /// `MapFilterProject::expressions` (i.e. as an unreferenced Map step), if
1560    /// evaluating the expression on a row drawn from the stats range produces
1561    /// an error at runtime, then the interpreter's summary must report
1562    /// `may_fail()`. Without the `expressions.any(|s| s.range.fallible)` patch
1563    /// in `mfp_filter`, the AND over an empty predicate list collapses to
1564    /// `True` and the runtime error is wrongly ruled out.
1565    #[mz_ore::test]
1566    #[cfg_attr(miri, ignore)]
1567    fn test_mfp_filter_fallibility_equivalence() {
1568        fn check(data: ExpressionData) -> Result<(), TestCaseError> {
1569            let ExpressionData {
1570                relation_type,
1571                specs,
1572                rows,
1573                expr,
1574            } = data;
1575
1576            let input_arity = relation_type.column_types.len();
1577            let mfp = MapFilterProject {
1578                expressions: vec![expr.clone()],
1579                predicates: vec![],
1580                projection: (0..input_arity).collect(),
1581                input_arity,
1582            };
1583
1584            let arena = RowArena::new();
1585            let mut interpreter = ColumnSpecs::new(&relation_type, &arena);
1586            for (id, spec) in specs.into_iter().enumerate() {
1587                interpreter.push_column(id, spec);
1588            }
1589            let summary = interpreter.mfp_filter(&mfp);
1590
1591            for row in &rows {
1592                let datums: Vec<_> = row.iter().collect();
1593                if expr.eval(&datums, &arena).is_err() {
1594                    prop_assert!(
1595                        summary.range.may_fail(),
1596                        "mfp_filter must surface the fallibility of an \
1597                         unreferenced MFP expression: row {:?} errored at \
1598                         runtime but the interpreter ruled out errors",
1599                        row,
1600                    );
1601                }
1602            }
1603            Ok(())
1604        }
1605
1606        proptest!(|(data in gen_expr_data())| {
1607            check(data)?;
1608        });
1609    }
1610
1611    #[mz_ore::test]
1612    fn test_mfp() {
1613        // Regression test for https://github.com/MaterializeInc/database-issues/issues/5736
1614        use MirScalarExpr::*;
1615
1616        let mfp = MapFilterProject {
1617            expressions: vec![],
1618            predicates: vec![
1619                // Always fails on the known input range
1620                (
1621                    1,
1622                    CallUnary {
1623                        func: UnaryFunc::IsNull(IsNull),
1624                        expr: Box::new(CallBinary {
1625                            func: MulInt32.into(),
1626                            expr1: Box::new(MirScalarExpr::column(0)),
1627                            expr2: Box::new(MirScalarExpr::column(0)),
1628                        }),
1629                    },
1630                ),
1631                // Always returns false on the known input range
1632                (
1633                    1,
1634                    CallBinary {
1635                        func: Eq.into(),
1636                        expr1: Box::new(MirScalarExpr::column(0)),
1637                        expr2: Box::new(MirScalarExpr::literal_ok(
1638                            Datum::Int32(1727694505),
1639                            ReprScalarType::Int32,
1640                        )),
1641                    },
1642                ),
1643            ],
1644            projection: vec![],
1645            input_arity: 1,
1646        };
1647
1648        let relation = ReprRelationType::new(vec![ReprScalarType::Int32.nullable(true)]);
1649        let arena = RowArena::new();
1650        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1651        interpreter.push_column(0, ResultSpec::value(Datum::Int32(-1294725158)));
1652        let spec = interpreter.mfp_filter(&mfp);
1653        assert!(spec.range.may_fail());
1654    }
1655
1656    #[mz_ore::test]
1657    fn test_concat() {
1658        let expr = MirScalarExpr::call_variadic(
1659            Concat,
1660            vec![
1661                MirScalarExpr::column(0),
1662                MirScalarExpr::literal_ok(Datum::String("a"), ReprScalarType::String),
1663                MirScalarExpr::literal_ok(Datum::String("b"), ReprScalarType::String),
1664            ],
1665        );
1666
1667        let relation = ReprRelationType::new(vec![ReprScalarType::String.nullable(false)]);
1668        let arena = RowArena::new();
1669        let interpreter = ColumnSpecs::new(&relation, &arena);
1670        let spec = interpreter.expr(&expr);
1671        assert!(spec.range.may_contain(Datum::String("blab")));
1672    }
1673
1674    #[mz_ore::test]
1675    fn test_eval_range() {
1676        // Example inspired by the tumbling windows temporal filter in the docs
1677        let period_ms = MirScalarExpr::literal_ok(Datum::Int64(10), ReprScalarType::Int64);
1678        let expr = MirScalarExpr::CallBinary {
1679            func: Gte.into(),
1680            expr1: Box::new(MirScalarExpr::CallUnmaterializable(
1681                UnmaterializableFunc::MzNow,
1682            )),
1683            expr2: Box::new(MirScalarExpr::CallUnary {
1684                func: UnaryFunc::CastInt64ToMzTimestamp(CastInt64ToMzTimestamp),
1685                expr: Box::new(MirScalarExpr::CallBinary {
1686                    func: MulInt64.into(),
1687                    expr1: Box::new(period_ms.clone()),
1688                    expr2: Box::new(MirScalarExpr::CallBinary {
1689                        func: DivInt64.into(),
1690                        expr1: Box::new(MirScalarExpr::column(0)),
1691                        expr2: Box::new(period_ms),
1692                    }),
1693                }),
1694            }),
1695        };
1696        let relation = ReprRelationType::new(vec![ReprScalarType::Int64.nullable(false)]);
1697
1698        {
1699            // Non-overlapping windows
1700            let arena = RowArena::new();
1701            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1702            interpreter.push_unmaterializable(
1703                UnmaterializableFunc::MzNow,
1704                ResultSpec::value_between(
1705                    Datum::MzTimestamp(10.into()),
1706                    Datum::MzTimestamp(20.into()),
1707                ),
1708            );
1709            interpreter.push_column(0, ResultSpec::value_between(30i64.into(), 40i64.into()));
1710
1711            let range_out = interpreter.expr(&expr).range;
1712            assert!(range_out.may_contain(Datum::False));
1713            assert!(!range_out.may_contain(Datum::True));
1714            assert!(!range_out.may_contain(Datum::Null));
1715            assert!(!range_out.may_fail());
1716        }
1717
1718        {
1719            // Overlapping windows
1720            let arena = RowArena::new();
1721            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1722            interpreter.push_unmaterializable(
1723                UnmaterializableFunc::MzNow,
1724                ResultSpec::value_between(
1725                    Datum::MzTimestamp(10.into()),
1726                    Datum::MzTimestamp(35.into()),
1727                ),
1728            );
1729            interpreter.push_column(0, ResultSpec::value_between(30i64.into(), 40i64.into()));
1730
1731            let range_out = interpreter.expr(&expr).range;
1732            assert!(range_out.may_contain(Datum::False));
1733            assert!(range_out.may_contain(Datum::True));
1734            assert!(!range_out.may_contain(Datum::Null));
1735            assert!(!range_out.may_fail());
1736        }
1737    }
1738
1739    #[mz_ore::test]
1740    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decNumberFromInt32` on OS `linux`
1741    fn test_jsonb() {
1742        let arena = RowArena::new();
1743
1744        let expr = MirScalarExpr::column(0)
1745            .call_binary(
1746                MirScalarExpr::literal_ok(Datum::from("ts"), ReprScalarType::String),
1747                JsonbGetString,
1748            )
1749            .call_unary(CastJsonbToNumeric(None));
1750
1751        let relation = ReprRelationType::new(vec![ReprScalarType::Jsonb.nullable(true)]);
1752        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1753        interpreter.push_column(
1754            0,
1755            ResultSpec::map_spec(
1756                [(
1757                    "ts".into(),
1758                    ResultSpec::value_between(
1759                        Datum::Numeric(100.into()),
1760                        Datum::Numeric(300.into()),
1761                    ),
1762                )]
1763                .into_iter()
1764                .collect(),
1765            ),
1766        );
1767
1768        let range_out = interpreter.expr(&expr).range;
1769        assert!(!range_out.may_contain(Datum::Numeric(0.into())));
1770        assert!(range_out.may_contain(Datum::Numeric(200.into())));
1771        assert!(!range_out.may_contain(Datum::Numeric(400.into())));
1772    }
1773
1774    #[mz_ore::test]
1775    fn test_nested_union_partial_overlap() {
1776        // `Nested(map)` constrains a key only when the key is present in `map`; absent
1777        // keys mean "anything". So the union of two Nested specs must drop any key
1778        // that's missing from one side, because `x ∪ anything = anything`. Only keys
1779        // present in *both* sides survive (with their per-key specs unioned).
1780        let a = ResultSpec::map_spec(
1781            [
1782                ("x".into(), ResultSpec::value(Datum::String("a"))),
1783                ("y".into(), ResultSpec::value(Datum::String("b"))),
1784                ("c".into(), ResultSpec::value(Datum::String("c"))),
1785            ]
1786            .into_iter()
1787            .collect(),
1788        );
1789        let b = ResultSpec::map_spec(
1790            [
1791                ("x".into(), ResultSpec::value(Datum::String("a2"))),
1792                ("y".into(), ResultSpec::value(Datum::String("b2"))),
1793                ("z".into(), ResultSpec::value(Datum::String("z"))),
1794            ]
1795            .into_iter()
1796            .collect(),
1797        );
1798
1799        let unioned = a.union(b);
1800
1801        // Push the unioned spec through `->> <key>`: keys only in one side must
1802        // admit NULL (the other side is unconstrained, so the field could be absent
1803        // there); shared keys must include both observed values.
1804        let arena = RowArena::new();
1805        let relation = ReprRelationType::new(vec![ReprScalarType::Jsonb.nullable(false)]);
1806
1807        // Key only in `a`: the union must admit NULL.
1808        {
1809            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1810            interpreter.push_column(0, unioned.clone());
1811            let expr = MirScalarExpr::column(0).call_binary(
1812                MirScalarExpr::literal_ok(Datum::from("c"), ReprScalarType::String),
1813                JsonbGetStringStringify,
1814            );
1815            assert!(interpreter.expr(&expr).range.may_contain(Datum::Null));
1816        }
1817
1818        // Key only in `b`: symmetric.
1819        {
1820            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1821            interpreter.push_column(0, unioned.clone());
1822            let expr = MirScalarExpr::column(0).call_binary(
1823                MirScalarExpr::literal_ok(Datum::from("z"), ReprScalarType::String),
1824                JsonbGetStringStringify,
1825            );
1826            assert!(interpreter.expr(&expr).range.may_contain(Datum::Null));
1827        }
1828
1829        // Key in both: result must include both observed values.
1830        {
1831            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1832            interpreter.push_column(0, unioned);
1833            let expr = MirScalarExpr::column(0).call_binary(
1834                MirScalarExpr::literal_ok(Datum::from("x"), ReprScalarType::String),
1835                JsonbGetStringStringify,
1836            );
1837            let x_range = interpreter.expr(&expr).range;
1838            assert!(x_range.may_contain(Datum::String("a")));
1839            assert!(x_range.may_contain(Datum::String("a2")));
1840        }
1841    }
1842
1843    #[mz_ore::test]
1844    #[cfg_attr(miri, ignore)] // unsupported foreign call in numeric decoding
1845    fn test_case_over_jsonb_columns() {
1846        // Regression test for PER-6: when CASE picks between two JSON columns whose
1847        // observed keys are disjoint, filter pushdown must not prune parts where
1848        // accessing a key only present in one branch might yield NULL in the other.
1849        let arena = RowArena::new();
1850
1851        // `(CASE WHEN col0 THEN col1 ELSE col2 END) ->> 'y' IS NULL`
1852        let expr = MirScalarExpr::If {
1853            cond: Box::new(MirScalarExpr::column(0)),
1854            then: Box::new(MirScalarExpr::column(1)),
1855            els: Box::new(MirScalarExpr::column(2)),
1856        }
1857        .call_binary(
1858            MirScalarExpr::literal_ok(Datum::from("y"), ReprScalarType::String),
1859            JsonbGetStringStringify,
1860        )
1861        .call_unary(UnaryFunc::IsNull(IsNull));
1862
1863        let relation = ReprRelationType::new(vec![
1864            ReprScalarType::Bool.nullable(false),
1865            ReprScalarType::Jsonb.nullable(false),
1866            ReprScalarType::Jsonb.nullable(false),
1867        ]);
1868        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1869        interpreter.push_column(0, ResultSpec::value_between(Datum::False, Datum::True));
1870        interpreter.push_column(
1871            1,
1872            ResultSpec::map_spec(
1873                [("x".into(), ResultSpec::value(Datum::String("a")))]
1874                    .into_iter()
1875                    .collect(),
1876            ),
1877        );
1878        interpreter.push_column(
1879            2,
1880            ResultSpec::map_spec(
1881                [("y".into(), ResultSpec::value(Datum::String("b")))]
1882                    .into_iter()
1883                    .collect(),
1884            ),
1885        );
1886
1887        let range_out = interpreter.expr(&expr).range;
1888        // When the CASE selects column 1, "y" is absent and `->> 'y'` yields NULL, so
1889        // `IS NULL` is True. The filter must not prune a part that could match.
1890        assert!(range_out.may_contain(Datum::True));
1891    }
1892
1893    #[mz_ore::test]
1894    fn test_like() {
1895        let arena = RowArena::new();
1896
1897        let expr = MirScalarExpr::CallUnary {
1898            func: UnaryFunc::IsLikeMatch(IsLikeMatch(
1899                crate::like_pattern::compile("%whatever%", true).unwrap(),
1900            )),
1901            expr: Box::new(MirScalarExpr::column(0)),
1902        };
1903
1904        let relation = ReprRelationType::new(vec![ReprScalarType::String.nullable(true)]);
1905        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1906        interpreter.push_column(
1907            0,
1908            ResultSpec::value_between(Datum::String("aardvark"), Datum::String("zebra")),
1909        );
1910
1911        let range_out = interpreter.expr(&expr).range;
1912        assert!(
1913            !range_out.fallible,
1914            "like function should not error on non-error input"
1915        );
1916        assert!(range_out.may_contain(Datum::True));
1917        assert!(range_out.may_contain(Datum::False));
1918        assert!(range_out.may_contain(Datum::Null));
1919    }
1920
1921    #[mz_ore::test]
1922    fn test_try_parse_monotonic_iso8601_timestamp() {
1923        use chrono::NaiveDateTime;
1924
1925        let arena = RowArena::new();
1926
1927        let expr = MirScalarExpr::CallUnary {
1928            func: UnaryFunc::TryParseMonotonicIso8601Timestamp(TryParseMonotonicIso8601Timestamp),
1929            expr: Box::new(MirScalarExpr::column(0)),
1930        };
1931
1932        let relation = ReprRelationType::new(vec![ReprScalarType::String.nullable(true)]);
1933        // Test the case where we have full timestamps as bounds.
1934        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1935        interpreter.push_column(
1936            0,
1937            ResultSpec::value_between(
1938                Datum::String("2024-01-11T00:00:00.000Z"),
1939                Datum::String("2024-01-11T20:00:00.000Z"),
1940            ),
1941        );
1942
1943        let timestamp = |ts| {
1944            Datum::Timestamp(
1945                NaiveDateTime::parse_from_str(ts, "%Y-%m-%dT%H:%M:%S")
1946                    .unwrap()
1947                    .try_into()
1948                    .unwrap(),
1949            )
1950        };
1951
1952        let range_out = interpreter.expr(&expr).range;
1953        assert!(!range_out.fallible);
1954        assert!(range_out.nullable);
1955        assert!(!range_out.may_contain(timestamp("2024-01-10T10:00:00")));
1956        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
1957        assert!(!range_out.may_contain(timestamp("2024-01-12T10:00:00")));
1958
1959        // Test the case where we have truncated / useless bounds.
1960        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1961        interpreter.push_column(
1962            0,
1963            ResultSpec::value_between(Datum::String("2024-01-1"), Datum::String("2024-01-2")),
1964        );
1965
1966        let range_out = interpreter.expr(&expr).range;
1967        assert!(!range_out.fallible);
1968        assert!(range_out.nullable);
1969        assert!(range_out.may_contain(timestamp("2024-01-10T10:00:00")));
1970        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
1971        assert!(range_out.may_contain(timestamp("2024-01-12T10:00:00")));
1972
1973        // Test the case where only one bound is truncated
1974        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1975        interpreter.push_column(
1976            0,
1977            ResultSpec::value_between(
1978                Datum::String("2024-01-1"),
1979                Datum::String("2024-01-12T10:00:00"),
1980            )
1981            .union(ResultSpec::null()),
1982        );
1983
1984        let range_out = interpreter.expr(&expr).range;
1985        assert!(!range_out.fallible);
1986        assert!(range_out.nullable);
1987        assert!(range_out.may_contain(timestamp("2024-01-10T10:00:00")));
1988        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
1989        assert!(range_out.may_contain(timestamp("2024-01-12T10:00:00")));
1990
1991        // Test the case where the upper and lower bound are identical
1992        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1993        interpreter.push_column(
1994            0,
1995            ResultSpec::value_between(
1996                Datum::String("2024-01-11T10:00:00.000Z"),
1997                Datum::String("2024-01-11T10:00:00.000Z"),
1998            ),
1999        );
2000
2001        let range_out = interpreter.expr(&expr).range;
2002        assert!(!range_out.fallible);
2003        assert!(!range_out.nullable);
2004        assert!(!range_out.may_contain(timestamp("2024-01-10T10:00:00")));
2005        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
2006        assert!(!range_out.may_contain(timestamp("2024-01-12T10:00:00")));
2007    }
2008
2009    #[mz_ore::test]
2010    fn test_inequality() {
2011        let arena = RowArena::new();
2012
2013        let expr = MirScalarExpr::column(0).call_binary(
2014            MirScalarExpr::CallUnmaterializable(UnmaterializableFunc::MzNow),
2015            Gte,
2016        );
2017
2018        let relation = ReprRelationType::new(vec![ReprScalarType::MzTimestamp.nullable(true)]);
2019        let mut interpreter = ColumnSpecs::new(&relation, &arena);
2020        interpreter.push_column(
2021            0,
2022            ResultSpec::value_between(
2023                Datum::MzTimestamp(1704736444949u64.into()),
2024                Datum::MzTimestamp(1704736444949u64.into()),
2025            )
2026            .union(ResultSpec::null()),
2027        );
2028        interpreter.push_unmaterializable(
2029            UnmaterializableFunc::MzNow,
2030            ResultSpec::value_between(
2031                Datum::MzTimestamp(1704738791000u64.into()),
2032                Datum::MzTimestamp(18446744073709551615u64.into()),
2033            ),
2034        );
2035
2036        let range_out = interpreter.expr(&expr).range;
2037        assert!(
2038            !range_out.fallible,
2039            "<= function should not error on non-error input"
2040        );
2041        assert!(!range_out.may_contain(Datum::True));
2042        assert!(range_out.may_contain(Datum::False));
2043        assert!(range_out.may_contain(Datum::Null));
2044    }
2045
2046    /// Regression test for database-issues#9656.
2047    ///
2048    /// Adding an `Interval` to a `Timestamp` is non-monotone in the interval
2049    /// argument: the lex order of intervals (months, days, micros) does not
2050    /// respect calendar-month arithmetic with day-clamping. The interpreter
2051    /// must therefore not assume monotonicity, otherwise persist filter
2052    /// pushdown can incorrectly conclude that a part has no matching rows.
2053    #[mz_ore::test]
2054    #[cfg_attr(miri, ignore)]
2055    fn test_add_timestamp_interval_non_monotone() {
2056        use chrono::NaiveDateTime;
2057        use mz_repr::adt::interval::Interval;
2058        use mz_repr::adt::timestamp::CheckedTimestamp;
2059        use mz_repr::{Datum, Row};
2060
2061        let arena = RowArena::new();
2062
2063        // The setup: a timestamp literal `t = 2024-01-31 00:00:00`, and an
2064        // interval column whose stats-range spans
2065        // `[{0 months, 31 days, 0 us}, {1 month, 0 days, 0 us}]`. In lex order,
2066        // the 31-day interval is the lower bound and the 1-month interval is
2067        // the upper bound. The function values at the endpoints are:
2068        //   t + {0,31,0} = 2024-03-02
2069        //   t + {1, 0,0} = 2024-02-29
2070        // But an *interior* interval like {0, 60, 0} maps to 2024-03-31, which
2071        // lies far outside `[Feb 29, Mar 2]`. Under the (incorrect) monotone
2072        // assumption, the interpreter would conclude the output is in that
2073        // narrow window, and rule out predicates like `>= 2024-03-15`.
2074        let ts_lit = |s: &str| {
2075            let mut row = Row::default();
2076            row.packer().push(Datum::Timestamp(
2077                CheckedTimestamp::from_timestamplike(
2078                    NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(),
2079                )
2080                .unwrap(),
2081            ));
2082            MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false))
2083        };
2084        let interval = |months: i32, days: i32, micros: i64| {
2085            Datum::Interval(Interval {
2086                months,
2087                days,
2088                micros,
2089            })
2090        };
2091
2092        // Expression: `(timestamp_lit + interval_col) >= 2024-03-15`.
2093        let expr = ts_lit("2024-01-31T00:00:00")
2094            .call_binary(MirScalarExpr::column(0), AddTimestampInterval)
2095            .call_binary(ts_lit("2024-03-15T00:00:00"), Gte);
2096
2097        let relation = ReprRelationType::new(vec![ReprScalarType::Interval.nullable(false)]);
2098        let mut interpreter = ColumnSpecs::new(&relation, &arena);
2099        interpreter.push_column(
2100            0,
2101            ResultSpec::value_between(interval(0, 31, 0), interval(1, 0, 0)),
2102        );
2103
2104        let range_out = interpreter.expr(&expr).range;
2105        // The actual data may include e.g. `{0, 60, 0}` → 2024-03-31, which
2106        // satisfies `>= 2024-03-15`. The interpreter must admit `True` so that
2107        // filter pushdown does not skip the part. Under the buggy
2108        // `(true, true)` annotation, the output range would be
2109        // `[Feb 29, Mar 2]`, all of which is `< Mar 15`, and the interpreter
2110        // would (wrongly) admit only `False`.
2111        assert!(
2112            range_out.may_contain(Datum::True),
2113            "interpreter incorrectly ruled out matching rows; \
2114             add_timestamp_interval is not monotone in the interval argument",
2115        );
2116    }
2117
2118    /// Regression test for `date_bin_timestamp`, which is non-monotone in the
2119    /// `stride` argument: a larger stride can bin a source timestamp to an
2120    /// *earlier* result than a smaller stride, because the bin alignment to
2121    /// the unix epoch depends on the stride magnitude rather than on lex order.
2122    #[mz_ore::test]
2123    #[cfg_attr(miri, ignore)]
2124    fn test_date_bin_timestamp_non_monotone() {
2125        use chrono::NaiveDateTime;
2126        use mz_repr::adt::interval::Interval;
2127        use mz_repr::adt::timestamp::CheckedTimestamp;
2128        use mz_repr::{Datum, Row};
2129
2130        let arena = RowArena::new();
2131
2132        let ts_lit = |s: &str| {
2133            let mut row = Row::default();
2134            row.packer().push(Datum::Timestamp(
2135                CheckedTimestamp::from_timestamplike(
2136                    NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(),
2137                )
2138                .unwrap(),
2139            ));
2140            MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false))
2141        };
2142        let interval = |months: i32, days: i32, micros: i64| {
2143            Datum::Interval(Interval {
2144                months,
2145                days,
2146                micros,
2147            })
2148        };
2149
2150        // Expression: `date_bin(stride_col, 2024-01-01 12:00:00) > 2024-01-01 06:00:00`.
2151        // stride_col ranges over `[1 day, 2 days]`.
2152        //
2153        // Endpoint evaluations:
2154        //   1 day stride → bins to 2024-01-01 00:00:00
2155        //   2 day stride → bins to 2023-12-31 00:00:00
2156        //
2157        // Interior strides produce results *outside* that endpoint box. For
2158        // example, a 1.5-day stride (i.e. `{0 months, 1 day, 12 h micros}`,
2159        // which sorts between the two endpoints in lex order) bins
2160        // 2024-01-01 12:00:00 to exactly 2024-01-01 12:00:00 — well above the
2161        // endpoint maximum of 2024-01-01 00:00:00. With the buggy
2162        // `(true, true)` annotation, the interpreter narrows the output to
2163        // `[Dec 31 00:00, Jan 1 00:00]`, both of which are `<= Jan 1 06:00`,
2164        // so the predicate is wrongly proved `False`. With the non-monotone
2165        // fix the output is `anything()`, so `True` is correctly admitted.
2166        let expr = MirScalarExpr::column(0)
2167            .call_binary(ts_lit("2024-01-01T12:00:00"), DateBinTimestamp)
2168            .call_binary(ts_lit("2024-01-01T06:00:00"), Gt);
2169
2170        let relation = ReprRelationType::new(vec![ReprScalarType::Interval.nullable(false)]);
2171        let mut interpreter = ColumnSpecs::new(&relation, &arena);
2172        interpreter.push_column(
2173            0,
2174            ResultSpec::value_between(interval(0, 1, 0), interval(0, 2, 0)),
2175        );
2176
2177        let range_out = interpreter.expr(&expr).range;
2178        assert!(
2179            range_out.may_contain(Datum::True),
2180            "date_bin is not monotone in the stride argument; \
2181             interior strides can produce outputs outside the endpoint-bounded \
2182             box, so the interpreter must admit True for `>`-style predicates",
2183        );
2184    }
2185
2186    #[mz_ore::test]
2187    fn test_trace() {
2188        use super::Trace;
2189
2190        let expr = MirScalarExpr::column(0).call_binary(
2191            MirScalarExpr::column(1)
2192                .call_binary(MirScalarExpr::column(3).call_unary(NegInt64), AddInt64),
2193            Gte,
2194        );
2195        let summary = Trace.expr(&expr);
2196        assert!(summary.pushdownable());
2197    }
2198}