mz_expr/
interpret.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::collections::BTreeMap;
11use std::fmt::Debug;
12
13use mz_repr::{Datum, ReprColumnType, ReprRelationType, ReprScalarType, Row, RowArena};
14
15use crate::scalar::func::variadic::And;
16use crate::{
17    BinaryFunc, Eval, EvalError, MapFilterProject, MfpPlan, MirScalarExpr, UnaryFunc,
18    UnmaterializableFunc, VariadicFunc, func,
19};
20/// An inclusive range of non-null datum values.
21#[derive(Clone, Eq, PartialEq, Debug)]
22enum Values<'a> {
23    /// This range contains no values.
24    Empty,
25    /// An inclusive range. Invariant: the first element is always <= the second.
26    // TODO: a variant for small sets of data would avoid losing precision here.
27    Within(Datum<'a>, Datum<'a>),
28    /// Constraints on structured fields, useful for recursive structures like maps.
29    /// Fields that are not present in the map default to Values::All.
30    // TODO: consider using this variant, or similar, for Datum::List.
31    Nested(BTreeMap<Datum<'a>, ResultSpec<'a>>),
32    /// This range might contain any value. Since we're overapproximating, this is often used
33    /// as a "safe fallback" when we can't determine the right boundaries for a range.
34    All,
35}
36
37impl<'a> Values<'a> {
38    fn just(a: Datum<'a>) -> Values<'a> {
39        match a {
40            Datum::Map(datum_map) => Values::Nested(
41                datum_map
42                    .iter()
43                    .map(|(key, val)| (key.into(), ResultSpec::value(val)))
44                    .collect(),
45            ),
46            other => Self::Within(other, other),
47        }
48    }
49
50    fn union(self, other: Values<'a>) -> Values<'a> {
51        match (self, other) {
52            (Values::Empty, r) => r,
53            (r, Values::Empty) => r,
54            (Values::Within(a0, a1), Values::Within(b0, b1)) => {
55                Values::Within(a0.min(b0), a1.max(b1))
56            }
57            (Values::Nested(a), Values::Nested(mut b)) => {
58                // `Nested(map)` treats keys missing from `map` as fully unconstrained, so a
59                // key present in only one side of the union must be treated as `anything`
60                // on the other side. Because `x ∪ anything = anything`, such keys drop
61                // out of the merged map (the Nested default is already "anything").
62                let mut merged = BTreeMap::new();
63                for (key, a_spec) in a {
64                    if let Some(b_spec) = b.remove(&key) {
65                        let unioned = a_spec.union(b_spec);
66                        if unioned != ResultSpec::anything() {
67                            merged.insert(key, unioned);
68                        }
69                    }
70                }
71                if merged.is_empty() {
72                    Values::All
73                } else {
74                    Values::Nested(merged)
75                }
76            }
77            _ => Values::All,
78        }
79    }
80
81    fn intersect(self, other: Values<'a>) -> Values<'a> {
82        match (self, other) {
83            (Values::Empty, _) => Values::Empty,
84            (_, Values::Empty) => Values::Empty,
85            (Values::Within(a0, a1), Values::Within(b0, b1)) => {
86                let min = a0.max(b0);
87                let max = a1.min(b1);
88                if min <= max {
89                    Values::Within(min, max)
90                } else {
91                    Values::Empty
92                }
93            }
94            (Values::Nested(mut a), Values::Nested(b)) => {
95                for (datum, other_spec) in b {
96                    let spec = a.entry(datum).or_insert_with(ResultSpec::anything);
97                    *spec = spec.clone().intersect(other_spec);
98                }
99                Values::Nested(a)
100            }
101            (Values::All, v) => v,
102            (v, Values::All) => v,
103            (Values::Nested(_), Values::Within(_, _)) => Values::Empty,
104            (Values::Within(_, _), Values::Nested(_)) => Values::Empty,
105        }
106    }
107
108    fn may_contain(&self, value: Datum<'a>) -> bool {
109        match self {
110            Values::Empty => false,
111            Values::Within(min, max) => *min <= value && value <= *max,
112            Values::All => true,
113            Values::Nested(field_map) => match value {
114                Datum::Map(datum_map) => {
115                    datum_map
116                        .iter()
117                        .all(|(key, val)| match field_map.get(&key.into()) {
118                            None => true,
119                            Some(nested) => nested.may_contain(val),
120                        })
121                }
122                _ => false,
123            },
124        }
125    }
126
127    /// Returns the sole datum in this value set, if it is known to be a single
128    /// value. Returns `None` otherwise (for empty sets, ranges with distinct
129    /// endpoints, structured constraints, and the unconstrained set).
130    ///
131    /// Prefer this over pattern-matching on [Values::Within] directly when you
132    /// only need the "single known value" case: it's robust against future
133    /// variants of [Values] (e.g. a small-set representation) automatically
134    /// degrading to "not a single value" rather than silently mis-matching.
135    fn as_single(&self) -> Option<Datum<'a>> {
136        match self {
137            Values::Within(a, b) if a == b => Some(*a),
138            _ => None,
139        }
140    }
141}
142
143/// An approximation of the set of values an expression might have, including whether or not it
144/// might be null or an error value. This is generally an _overapproximation_, in the sense that
145/// [ResultSpec::may_contain] may return true even if the argument is not included in the set.
146/// (However, it should never return false when the value _is_ included!)
147#[derive(Debug, Clone, Eq, PartialEq)]
148pub struct ResultSpec<'a> {
149    /// True if the expression may evaluate to [Datum::Null].
150    nullable: bool,
151    /// True if the expression may evaluate to an error.
152    fallible: bool,
153    /// The range of possible (non-null) values that the expression may evaluate to.
154    values: Values<'a>,
155}
156
157impl<'a> ResultSpec<'a> {
158    /// No results match this spec. (For example, an empty table.)
159    pub fn nothing() -> Self {
160        ResultSpec {
161            nullable: false,
162            fallible: false,
163            values: Values::Empty,
164        }
165    }
166
167    /// Every result matches this spec.
168    pub fn anything() -> Self {
169        ResultSpec {
170            nullable: true,
171            fallible: true,
172            values: Values::All,
173        }
174    }
175
176    /// Every result matches this spec.
177    pub fn any_infallible() -> Self {
178        ResultSpec {
179            nullable: true,
180            fallible: false,
181            values: Values::All,
182        }
183    }
184
185    /// A spec that only matches null.
186    pub fn null() -> Self {
187        ResultSpec {
188            nullable: true,
189            ..Self::nothing()
190        }
191    }
192
193    /// A spec that only matches error values.
194    pub fn fails() -> Self {
195        ResultSpec {
196            fallible: true,
197            ..Self::nothing()
198        }
199    }
200
201    /// A spec that matches all values of a given type.
202    pub fn has_type(col: &ReprColumnType, fallible: bool) -> ResultSpec<'a> {
203        let values = match &col.scalar_type {
204            ReprScalarType::Bool => Values::Within(Datum::False, Datum::True),
205            // TODO: add bounds for other bounded types, like integers
206            _ => Values::All,
207        };
208        ResultSpec {
209            nullable: col.nullable,
210            fallible,
211            values,
212        }
213    }
214
215    /// A spec that only matches the given value.
216    pub fn value(value: Datum<'a>) -> ResultSpec<'a> {
217        match value {
218            Datum::Null => Self::null(),
219            nonnull => ResultSpec {
220                values: Values::just(nonnull),
221                ..Self::nothing()
222            },
223        }
224    }
225
226    /// A spec that matches values between the given (non-null) min and max.
227    pub fn value_between(min: Datum<'a>, max: Datum<'a>) -> ResultSpec<'a> {
228        assert!(!min.is_null());
229        assert!(!max.is_null());
230        if min <= max {
231            ResultSpec {
232                values: Values::Within(min, max),
233                ..ResultSpec::nothing()
234            }
235        } else {
236            ResultSpec::nothing()
237        }
238    }
239
240    /// A spec that matches any non-null value.
241    pub fn value_all() -> ResultSpec<'a> {
242        ResultSpec {
243            values: Values::All,
244            ..ResultSpec::nothing()
245        }
246    }
247
248    /// A spec that matches Datum::Maps of the given type.
249    pub fn map_spec(map: BTreeMap<Datum<'a>, ResultSpec<'a>>) -> ResultSpec<'a> {
250        ResultSpec {
251            values: Values::Nested(map),
252            ..ResultSpec::nothing()
253        }
254    }
255
256    /// Given two specs, returns a new spec that matches anything that either original spec would match.
257    pub fn union(self, other: ResultSpec<'a>) -> ResultSpec<'a> {
258        ResultSpec {
259            nullable: self.nullable || other.nullable,
260            fallible: self.fallible || other.fallible,
261            values: self.values.union(other.values),
262        }
263    }
264
265    /// Given two specs, returns a new spec that only matches things that both original specs would match.
266    pub fn intersect(self, other: ResultSpec<'a>) -> ResultSpec<'a> {
267        ResultSpec {
268            nullable: self.nullable && other.nullable,
269            fallible: self.fallible && other.fallible,
270            values: self.values.intersect(other.values),
271        }
272    }
273
274    /// Check if a particular value matches the spec.
275    pub fn may_contain(&self, value: Datum<'a>) -> bool {
276        if value == Datum::Null {
277            return self.nullable;
278        }
279
280        self.values.may_contain(value)
281    }
282
283    /// Check if an error value matches the spec.
284    pub fn may_fail(&self) -> bool {
285        self.fallible
286    }
287
288    /// This method "maps" a function across the `ResultSpec`.
289    ///
290    /// As mentioned above, `ResultSpec` represents an approximate set of results.
291    /// If we actually stored each result in the set, `flat_map` could be implemented by passing
292    /// each result to the function one-by-one and unioning the resulting sets. This is possible
293    /// when our values set is empty or contains a single datum, but when it contains a range,
294    /// we can't enumerate all possible values of the set. We handle this by:
295    /// - tracking whether the function is monotone, in which case we can map the range by just
296    ///   mapping the endpoints;
297    /// - using a safe default when we can't infer a tighter bound on the set, eg. [Self::anything].
298    fn flat_map(
299        &self,
300        is_monotone: bool,
301        mut result_map: impl FnMut(Result<Datum<'a>, EvalError>) -> ResultSpec<'a>,
302    ) -> ResultSpec<'a> {
303        let null_spec = if self.nullable {
304            result_map(Ok(Datum::Null))
305        } else {
306            ResultSpec::nothing()
307        };
308
309        let error_spec = if self.fallible {
310            // Since we only care about whether / not an error is possible, and not the specific
311            // error, create an arbitrary error here.
312            // NOTE! This assumes that functions do not discriminate on the type of the error.
313            let map_err = result_map(Err(EvalError::Internal("".into())));
314            let raise_err = ResultSpec::fails();
315            // SQL has a very loose notion of evaluation order: https://www.postgresql.org/docs/current/sql-expressions.html#SYNTAX-EXPRESS-EVAL
316            // Here, we account for the possibility that the expression is evaluated strictly,
317            // raising the error, or that it's evaluated lazily by the result_map function
318            // (which may return a non-error result even when given an error as input).
319            raise_err.union(map_err)
320        } else {
321            ResultSpec::nothing()
322        };
323
324        let values_spec = match self.values {
325            Values::Empty => ResultSpec::nothing(),
326            // If this range contains a single datum, just call the function.
327            Values::Within(min, max) if min == max => result_map(Ok(min)),
328            // If this is a range of booleans, we know all the values... just try them.
329            Values::Within(Datum::False, Datum::True) => {
330                result_map(Ok(Datum::False)).union(result_map(Ok(Datum::True)))
331            }
332            // Otherwise, if our function is monotonic, we can try mapping the input
333            // range to an output range.
334            Values::Within(min, max) if is_monotone => {
335                let min_result = result_map(Ok(min));
336                let max_result = result_map(Ok(max));
337                match (min_result, max_result) {
338                    // If both endpoints give us a range, the result is a union of those ranges.
339                    (
340                        ResultSpec {
341                            nullable: false,
342                            fallible: false,
343                            values: a_values,
344                        },
345                        ResultSpec {
346                            nullable: false,
347                            fallible: false,
348                            values: b_values,
349                        },
350                    ) => ResultSpec {
351                        nullable: false,
352                        fallible: false,
353                        values: a_values.union(b_values),
354                    },
355                    // If both endpoints are null, we assume the whole range maps to null.
356                    (
357                        ResultSpec {
358                            nullable: true,
359                            fallible: false,
360                            values: Values::Empty,
361                        },
362                        ResultSpec {
363                            nullable: true,
364                            fallible: false,
365                            values: Values::Empty,
366                        },
367                    ) => ResultSpec::null(),
368                    // Otherwise we can't assume anything about the output.
369                    _ => ResultSpec::anything(),
370                }
371            }
372            // TODO: we could return a narrower result for eg. `Values::Nested` with all-`Within` fields.
373            Values::Within(_, _) | Values::Nested(_) | Values::All => ResultSpec::anything(),
374        };
375
376        null_spec.union(error_spec).union(values_spec)
377    }
378}
379
380/// [Abstract interpretation](https://en.wikipedia.org/wiki/Abstract_interpretation) for
381/// [MirScalarExpr].
382///
383/// [MirScalarExpr::eval] implements a "concrete interpreter" for the expression type: given an
384/// expression and specific column values as input, it returns a specific value for the output.
385/// This could be reimplemented using this trait... but it's most useful for "abstract"
386/// interpretations of the expression, where we generalize about sets of possible inputs and outputs.
387/// See [Trace] and [ColumnSpecs] for how this can be useful in practice.
388pub trait Interpreter {
389    type Summary: Clone + Debug + Sized;
390
391    /// A column of the input row.
392    fn column(&self, id: usize) -> Self::Summary;
393
394    /// A literal value.
395    /// (Stored as a row, because we can't own a Datum.)
396    fn literal(&self, result: &Result<Row, EvalError>, col_type: &ReprColumnType) -> Self::Summary;
397    /// A call to an unmaterializable function.
398    ///
399    /// These functions cannot be evaluated by `MirScalarExpr::eval`. They must
400    /// be transformed away by a higher layer.
401    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary;
402
403    /// A function call that takes one expression as an argument.
404    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary;
405
406    /// A function call that takes two expressions as arguments.
407    fn binary(&self, func: &BinaryFunc, left: Self::Summary, right: Self::Summary)
408    -> Self::Summary;
409
410    /// A function call that takes an arbitrary number of arguments.
411    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary;
412
413    /// Conditionally evaluated expressions.
414    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary;
415
416    /// Evaluate an entire expression, by delegating to the fine-grained methods on [Interpreter].
417    fn expr(&self, expr: &MirScalarExpr) -> Self::Summary {
418        match expr {
419            MirScalarExpr::Column(id, _name) => self.column(*id),
420            MirScalarExpr::Literal(value, col_type) => self.literal(value, col_type),
421            MirScalarExpr::CallUnmaterializable(func) => self.unmaterializable(func),
422            MirScalarExpr::CallUnary { func, expr } => {
423                let expr_range = self.expr(expr);
424                self.unary(func, expr_range)
425            }
426            MirScalarExpr::CallBinary { func, expr1, expr2 } => {
427                let expr1_range = self.expr(expr1);
428                let expr2_range = self.expr(expr2);
429                self.binary(func, expr1_range, expr2_range)
430            }
431            MirScalarExpr::CallVariadic { func, exprs } => {
432                let exprs: Vec<_> = exprs.into_iter().map(|e| self.expr(e)).collect();
433                self.variadic(func, exprs)
434            }
435            MirScalarExpr::If { cond, then, els } => {
436                let cond_range = self.expr(cond);
437                let then_range = self.expr(then);
438                let els_range = self.expr(els);
439                self.cond(cond_range, then_range, els_range)
440            }
441        }
442    }
443
444    /// Specifically, this evaluates the map and filters stages of an MFP: summarize each of the
445    /// map expressions, then `and` together all of the filters.
446    fn mfp_filter(&self, mfp: &MapFilterProject) -> Self::Summary {
447        let mfp_eval = MfpEval::new(self, mfp.input_arity, &mfp.expressions);
448        // NB: self should not be used after this point!
449        let predicates = mfp
450            .predicates
451            .iter()
452            .map(|(_, e)| mfp_eval.expr(e))
453            .collect();
454        mfp_eval.variadic(&And.into(), predicates)
455    }
456
457    /// Similar to [Self::mfp_filter], but includes the additional temporal filters that have been
458    /// broken out.
459    fn mfp_plan_filter(&self, plan: &MfpPlan) -> Self::Summary {
460        let mfp_eval = MfpEval::new(self, plan.mfp.input_arity, &plan.mfp.expressions);
461        // NB: self should not be used after this point!
462        let mut results: Vec<_> = plan
463            .mfp
464            .predicates
465            .iter()
466            .map(|(_, e)| mfp_eval.expr(e))
467            .collect();
468        let mz_now = mfp_eval.unmaterializable(&UnmaterializableFunc::MzNow);
469        for bound in &plan.lower_bounds {
470            let bound_range = mfp_eval.expr(bound);
471            let result = mfp_eval.binary(&BinaryFunc::Lte(func::Lte), bound_range, mz_now.clone());
472            results.push(result);
473        }
474        for bound in &plan.upper_bounds {
475            let bound_range = mfp_eval.expr(bound);
476            let result = mfp_eval.binary(&BinaryFunc::Gte(func::Gte), bound_range, mz_now.clone());
477            results.push(result);
478        }
479        self.variadic(&And.into(), results)
480    }
481}
482
483/// Wrap another interpreter, but tack a few extra columns on at the end. An internal implementation
484/// detail of `eval_mfp` and `eval_mfp_plan`.
485pub(crate) struct MfpEval<'a, E: Interpreter + ?Sized> {
486    evaluator: &'a E,
487    input_arity: usize,
488    expressions: Vec<E::Summary>,
489}
490
491impl<'a, E: Interpreter + ?Sized> MfpEval<'a, E> {
492    pub(crate) fn new(evaluator: &'a E, input_arity: usize, expressions: &[MirScalarExpr]) -> Self {
493        let mut mfp_eval = MfpEval {
494            evaluator,
495            input_arity,
496            expressions: vec![],
497        };
498        for expr in expressions {
499            let result = mfp_eval.expr(expr);
500            mfp_eval.expressions.push(result);
501        }
502        mfp_eval
503    }
504}
505
506impl<'a, E: Interpreter + ?Sized> Interpreter for MfpEval<'a, E> {
507    type Summary = E::Summary;
508
509    fn column(&self, id: usize) -> Self::Summary {
510        if id < self.input_arity {
511            self.evaluator.column(id)
512        } else {
513            self.expressions[id - self.input_arity].clone()
514        }
515    }
516
517    fn literal(&self, result: &Result<Row, EvalError>, col_type: &ReprColumnType) -> Self::Summary {
518        self.evaluator.literal(result, col_type)
519    }
520
521    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary {
522        self.evaluator.unmaterializable(func)
523    }
524
525    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary {
526        self.evaluator.unary(func, expr)
527    }
528
529    fn binary(
530        &self,
531        func: &BinaryFunc,
532        left: Self::Summary,
533        right: Self::Summary,
534    ) -> Self::Summary {
535        self.evaluator.binary(func, left, right)
536    }
537
538    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary {
539        self.evaluator.variadic(func, exprs)
540    }
541
542    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
543        self.evaluator.cond(cond, then, els)
544    }
545}
546
547/// A unary function we've added special-case handling for; including:
548/// - A three-argument function, taking and returning [ResultSpec]s. This
549///   overrides the default function-handling logic entirely.
550/// - Metadata on whether / not this function is pushdownable. See [Trace].
551struct SpecialUnary {
552    map_fn: for<'a, 'b> fn(&'b ColumnSpecs<'a>, ResultSpec<'a>) -> ResultSpec<'a>,
553    pushdownable: bool,
554}
555
556impl SpecialUnary {
557    /// Returns the special-case handling for a particular function, if it exists.
558    fn for_func(func: &UnaryFunc) -> Option<SpecialUnary> {
559        /// Eager in the same sense as `func.rs` uses the term; this assumes that
560        /// nulls and errors propagate up, and we only need to define the behaviour
561        /// on values.
562        fn eagerly<'b>(
563            spec: ResultSpec<'b>,
564            value_fn: impl FnOnce(Values<'b>) -> ResultSpec<'b>,
565        ) -> ResultSpec<'b> {
566            let result = match spec.values {
567                Values::Empty => ResultSpec::nothing(),
568                other => value_fn(other),
569            };
570            ResultSpec {
571                fallible: spec.fallible || result.fallible,
572                nullable: spec.nullable || result.nullable,
573                values: result.values,
574            }
575        }
576        match func {
577            UnaryFunc::TryParseMonotonicIso8601Timestamp(_) => Some(SpecialUnary {
578                map_fn: |specs, range| {
579                    let expr = MirScalarExpr::CallUnary {
580                        func: UnaryFunc::TryParseMonotonicIso8601Timestamp(
581                            crate::func::TryParseMonotonicIso8601Timestamp,
582                        ),
583                        expr: Box::new(MirScalarExpr::column(0)),
584                    };
585                    let eval = |d| specs.eval_result(expr.eval(&[d], specs.arena));
586
587                    eagerly(range, |values| {
588                        match values {
589                            Values::Within(a, b) if a == b => eval(a),
590                            Values::Within(a, b) => {
591                                let spec = eval(a).union(eval(b));
592                                let values_spec = if spec.nullable {
593                                    // At least one of the endpoints of the range wasn't a valid
594                                    // timestamp. We can't compute a precise range in this case.
595                                    // If we used the general is_monotone handling, that code would
596                                    // incorrectly assume the whole range mapped to null if each
597                                    // endpoint did.
598                                    ResultSpec::value_all()
599                                } else {
600                                    spec
601                                };
602                                // A range of strings will always contain strings that don't parse
603                                // as timestamps - so unlike the general case, we'll assume null
604                                // is present in every range of output values.
605                                values_spec.union(ResultSpec::null())
606                            }
607                            // Otherwise, assume the worst: this function may return either a valid
608                            // value or null.
609                            _ => ResultSpec::any_infallible(),
610                        }
611                    })
612                },
613                pushdownable: true,
614            }),
615            _ => None,
616        }
617    }
618}
619
620/// The abstract-domain counterpart of a [BinaryFunc]: a binary function
621/// we've added special-case handling for; including:
622/// - Either a complete override of [ResultSpec] computation, or a way to
623///   compute monotonicity dynamically from the input specs.
624/// - Metadata on whether / not this function is pushdownable. See [Trace].
625///
626/// Note: today a function can have *either* a handler override *or* a
627/// dynamic-monotonicity verdict, but not both. If a future function wants
628/// both, promote [AbstractFuncHandler] from an enum to a struct with two
629/// optional fields.
630struct AbstractFunc {
631    handler: AbstractFuncHandler,
632    /// `(left, right)`: per-argument pushdownability hint consumed by
633    /// [Trace]. `true` for an argument means the function preserves enough
634    /// structure that, with sufficient information about that argument's
635    /// range, the output spec can be predicted — i.e. the predicate is a
636    /// pushdown candidate when that argument is constant or a tight range.
637    pushdownable: (bool, bool),
638}
639
640/// How an [AbstractFunc] computes the output [ResultSpec].
641enum AbstractFuncHandler {
642    /// Completely override the spec computation; the default flat-map machinery
643    /// is bypassed.
644    Override(for<'a> fn(ResultSpec<'a>, ResultSpec<'a>) -> ResultSpec<'a>),
645    /// Use the default flat-map machinery, but with a monotonicity verdict that
646    /// depends on the input specs. This lets us claim monotonicity for cases
647    /// the static `LazyBinaryFunc::is_monotone` annotation can't safely claim:
648    /// for instance, `t + INTERVAL '1' day` is monotone in `t`, but `t + i`
649    /// generally isn't (the calendar-month / day-clamping arithmetic in
650    /// `add_timestamp_interval` is non-monotone when `i.months != 0`).
651    DynamicMonotone(fn(&ResultSpec<'_>, &ResultSpec<'_>) -> (bool, bool)),
652}
653
654impl AbstractFunc {
655    /// Returns the special-case handling for a particular function, if it exists.
656    fn for_func(func: &BinaryFunc) -> Option<AbstractFunc> {
657        /// Eager in the same sense as `func.rs` uses the term; this assumes that
658        /// nulls and errors propagate up, and we only need to define the behaviour
659        /// on values.
660        fn eagerly<'b>(
661            left: ResultSpec<'b>,
662            right: ResultSpec<'b>,
663            value_fn: impl FnOnce(Values<'b>, Values<'b>) -> ResultSpec<'b>,
664        ) -> ResultSpec<'b> {
665            let result = match (left.values, right.values) {
666                (Values::Empty, _) | (_, Values::Empty) => ResultSpec::nothing(),
667                (l, r) => value_fn(l, r),
668            };
669            ResultSpec {
670                fallible: left.fallible || right.fallible || result.fallible,
671                nullable: left.nullable || right.nullable || result.nullable,
672                values: result.values,
673            }
674        }
675
676        fn jsonb_get_string<'b>(
677            left: ResultSpec<'b>,
678            right: ResultSpec<'b>,
679            stringify: bool,
680        ) -> ResultSpec<'b> {
681            eagerly(left, right, |left, right| {
682                let nested_spec = match (left, right) {
683                    (Values::Nested(mut map_spec), Values::Within(key, key2)) if key == key2 => {
684                        map_spec.remove(&key)
685                    }
686                    _ => None,
687                };
688
689                if let Some(field_spec) = nested_spec {
690                    if stringify {
691                        // We only preserve value-range information when stringification
692                        // is a noop. (Common in real queries.)
693                        let values = match field_spec.values {
694                            Values::Empty => Values::Empty,
695                            Values::Within(min @ Datum::String(_), max @ Datum::String(_)) => {
696                                Values::Within(min, max)
697                            }
698                            Values::Within(_, _) | Values::Nested(_) | Values::All => Values::All,
699                        };
700                        ResultSpec {
701                            values,
702                            ..field_spec
703                        }
704                    } else {
705                        field_spec
706                    }
707                } else {
708                    // The implementation of `jsonb_get_string` always returns
709                    // `Ok(...)`. Morally, everything has a string
710                    // representation, and the worst you can get is a NULL,
711                    // which maps to a NULL.
712                    ResultSpec::any_infallible()
713                }
714            })
715        }
716
717        fn eq<'b>(left: ResultSpec<'b>, right: ResultSpec<'b>) -> ResultSpec<'b> {
718            eagerly(left, right, |left, right| {
719                // `eq` might return true if there's any overlap between the range of its two arguments...
720                let maybe_true = match left.clone().intersect(right.clone()) {
721                    Values::Empty => ResultSpec::nothing(),
722                    _ => ResultSpec::value(Datum::True),
723                };
724
725                // ...and may return false if the union contains at least two distinct values.
726                // Note that the `Empty` case is handled by `eagerly` above.
727                let maybe_false = match left.union(right) {
728                    Values::Within(a, b) if a == b => ResultSpec::nothing(),
729                    _ => ResultSpec::value(Datum::False),
730                };
731
732                maybe_true.union(maybe_false)
733            })
734        }
735
736        /// `add_timestamp_interval` and friends do calendar-month arithmetic
737        /// with day-clamping, which is non-monotone in either argument when
738        /// `interval.months != 0`. But when `interval.months == 0` the
739        /// operation reduces to adding a fixed number of microseconds, which
740        /// *is* monotone in both arguments. The static `is_monotone`
741        /// annotation has to pick the conservative answer; this dynamic check
742        /// recovers filter pushdown for the common case of literal
743        /// `INTERVAL '<N>' day`-style predicates.
744        fn timestamp_plus_interval_monotone(
745            _left: &ResultSpec<'_>,
746            right: &ResultSpec<'_>,
747        ) -> (bool, bool) {
748            let months_zero = matches!(
749                right.values.as_single(),
750                Some(Datum::Interval(i)) if i.months == 0,
751            );
752            (months_zero, months_zero)
753        }
754
755        match func {
756            BinaryFunc::JsonbGetString(_) => Some(AbstractFunc {
757                handler: AbstractFuncHandler::Override(|l, r| jsonb_get_string(l, r, false)),
758                pushdownable: (true, false),
759            }),
760            BinaryFunc::JsonbGetStringStringify(_) => Some(AbstractFunc {
761                handler: AbstractFuncHandler::Override(|l, r| jsonb_get_string(l, r, true)),
762                pushdownable: (true, false),
763            }),
764            BinaryFunc::Eq(_) => Some(AbstractFunc {
765                handler: AbstractFuncHandler::Override(eq),
766                pushdownable: (true, true),
767            }),
768            BinaryFunc::AddTimestampInterval(_)
769            | BinaryFunc::AddTimestampTzInterval(_)
770            | BinaryFunc::SubTimestampInterval(_)
771            | BinaryFunc::SubTimestampTzInterval(_) => Some(AbstractFunc {
772                handler: AbstractFuncHandler::DynamicMonotone(timestamp_plus_interval_monotone),
773                // For [Trace]: we *might* be pushdownable in the first argument
774                // (we are when the interval is a literal with no months). The
775                // interval argument is reported as non-pushdownable so that
776                // `t_col +/- col_interval` doesn't get routed through pushdown
777                // for no benefit; if both sides are constants the predicate
778                // collapses anyway.
779                pushdownable: (true, false),
780            }),
781            _ => None,
782        }
783    }
784}
785
786#[derive(Clone, Debug)]
787pub struct ColumnSpec<'a> {
788    pub col_type: ReprColumnType,
789    pub range: ResultSpec<'a>,
790}
791
792/// An interpreter that:
793/// - stores both the type and the range of possible values for every column and
794///   unmaterializable function. (See the `push_` methods.)
795/// - given an expression (or MFP, etc.), returns the range of possible results that evaluating that
796///   expression might have. (See the `eval_` methods.)
797#[derive(Clone, Debug)]
798pub struct ColumnSpecs<'a> {
799    pub relation: &'a ReprRelationType,
800    pub columns: Vec<ResultSpec<'a>>,
801    pub unmaterializables: BTreeMap<UnmaterializableFunc, ResultSpec<'a>>,
802    pub arena: &'a RowArena,
803}
804
805impl<'a> ColumnSpecs<'a> {
806    // Interpreting a variadic function can lead to exponential blowup: there are up to 4 possibly-
807    // interesting values for each argument (error, null, range bounds...) and in the worst case
808    // we may need to test every combination. We mitigate that here in two ways:
809    // - Adding a linear-time optimization for associative functions like AND, OR, COALESCE.
810    // - Limiting the number of arguments we'll pass on to eval. If this limit is crossed, we'll
811    //   return our default / safe overapproximation instead.
812    const MAX_EVAL_ARGS: usize = 6;
813
814    /// Create a new, empty set of column specs. (Initially, the only assumption we make about the
815    /// data in the column is that it matches the type.)
816    pub fn new(relation: &'a ReprRelationType, arena: &'a RowArena) -> Self {
817        let columns = relation
818            .column_types
819            .iter()
820            .map(|ct| ResultSpec::has_type(ct, false))
821            .collect();
822        ColumnSpecs {
823            relation,
824            columns,
825            unmaterializables: Default::default(),
826            arena,
827        }
828    }
829
830    /// Restrict the set of possible values in a given column. (By intersecting it with the existing
831    /// spec.)
832    pub fn push_column(&mut self, id: usize, update: ResultSpec<'a>) {
833        let range = self.columns.get_mut(id).expect("valid column id");
834        *range = range.clone().intersect(update);
835    }
836
837    /// Restrict the set of possible values a given unmaterializable func might return. (By
838    /// intersecting it with the existing spec.)
839    pub fn push_unmaterializable(&mut self, func: UnmaterializableFunc, update: ResultSpec<'a>) {
840        let range = self
841            .unmaterializables
842            .entry(func.clone())
843            .or_insert_with(|| ResultSpec::has_type(&func.output_type(), true));
844        *range = range.clone().intersect(update);
845    }
846
847    fn eval_result<'b, E>(&self, result: Result<Datum<'b>, E>) -> ResultSpec<'a> {
848        match result {
849            Ok(Datum::Null) => ResultSpec {
850                nullable: true,
851                ..ResultSpec::nothing()
852            },
853            Ok(d) => ResultSpec {
854                values: Values::just(self.arena.make_datum(|packer| packer.push(d))),
855                ..ResultSpec::nothing()
856            },
857            Err(_) => ResultSpec {
858                fallible: true,
859                ..ResultSpec::nothing()
860            },
861        }
862    }
863
864    fn set_literal(expr: &mut MirScalarExpr, update: Result<Datum, EvalError>) {
865        match expr {
866            MirScalarExpr::Literal(literal, col_type) => match update {
867                Err(error) => *literal = Err(error),
868                Ok(datum) => {
869                    assert!(
870                        datum.is_instance_of(col_type),
871                        "{datum:?} must be an instance of {col_type:?}"
872                    );
873                    match literal {
874                        // Reuse the allocation if we can
875                        Ok(row) => row.packer().push(datum),
876                        literal => *literal = Ok(Row::pack_slice(&[datum])),
877                    }
878                }
879            },
880            _ => panic!("not a literal"),
881        }
882    }
883
884    fn set_argument(expr: &mut MirScalarExpr, arg: usize, value: Result<Datum, EvalError>) {
885        match (expr, arg) {
886            (MirScalarExpr::CallUnary { expr, .. }, 0) => Self::set_literal(expr, value),
887            (MirScalarExpr::CallBinary { expr1, .. }, 0) => Self::set_literal(expr1, value),
888            (MirScalarExpr::CallBinary { expr2, .. }, 1) => Self::set_literal(expr2, value),
889            (MirScalarExpr::CallVariadic { exprs, .. }, n) if n < exprs.len() => {
890                Self::set_literal(&mut exprs[n], value)
891            }
892            _ => panic!("illegal argument for expression"),
893        }
894    }
895
896    /// A literal with the given type and a trivial default value. Callers should ensure that
897    /// [Self::set_literal] is called on the resulting expression to give it a meaningful value
898    /// before evaluating.
899    fn placeholder(col_type: ReprColumnType) -> MirScalarExpr {
900        MirScalarExpr::Literal(Err(EvalError::Internal("".into())), col_type)
901    }
902}
903
904impl<'a> Interpreter for ColumnSpecs<'a> {
905    type Summary = ColumnSpec<'a>;
906
907    fn column(&self, id: usize) -> Self::Summary {
908        let col_type = self.relation.column_types[id].clone();
909        let range = self.columns[id].clone();
910        ColumnSpec { col_type, range }
911    }
912
913    fn literal(&self, result: &Result<Row, EvalError>, col_type: &ReprColumnType) -> Self::Summary {
914        let col_type = col_type.clone();
915        let range = self.eval_result(result.as_ref().map(|row| {
916            self.arena
917                .make_datum(|packer| packer.push(row.unpack_first()))
918        }));
919        ColumnSpec { col_type, range }
920    }
921
922    fn unmaterializable(&self, func: &UnmaterializableFunc) -> Self::Summary {
923        let col_type = func.output_type();
924        let range = self
925            .unmaterializables
926            .get(func)
927            .cloned()
928            .unwrap_or_else(|| ResultSpec::has_type(&func.output_type(), true));
929        ColumnSpec { col_type, range }
930    }
931
932    fn unary(&self, func: &UnaryFunc, summary: Self::Summary) -> Self::Summary {
933        let fallible = func.could_error() || summary.range.fallible;
934        let mapped_spec = if let Some(special) = SpecialUnary::for_func(func) {
935            (special.map_fn)(self, summary.range)
936        } else {
937            let is_monotone = func.is_monotone();
938            let mut expr = MirScalarExpr::CallUnary {
939                func: func.clone(),
940                expr: Box::new(Self::placeholder(summary.col_type.clone())),
941            };
942            summary.range.flat_map(is_monotone, |datum| {
943                Self::set_argument(&mut expr, 0, datum);
944                self.eval_result(expr.eval(&[], self.arena))
945            })
946        };
947
948        let col_type = func.output_type(summary.col_type);
949
950        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
951        ColumnSpec { col_type, range }
952    }
953
954    fn binary(
955        &self,
956        func: &BinaryFunc,
957        left: Self::Summary,
958        right: Self::Summary,
959    ) -> Self::Summary {
960        let fallible = func.could_error() || left.range.fallible || right.range.fallible;
961
962        let special = AbstractFunc::for_func(func);
963        let (left_monotonic, right_monotonic) = match &special {
964            Some(AbstractFunc {
965                handler: AbstractFuncHandler::DynamicMonotone(monotone_fn),
966                ..
967            }) => monotone_fn(&left.range, &right.range),
968            _ => func.is_monotone(),
969        };
970
971        let mapped_spec = match special {
972            Some(AbstractFunc {
973                handler: AbstractFuncHandler::Override(f),
974                ..
975            }) => f(left.range, right.range),
976            _ => {
977                let mut expr = MirScalarExpr::CallBinary {
978                    func: func.clone(),
979                    expr1: Box::new(Self::placeholder(left.col_type.clone())),
980                    expr2: Box::new(Self::placeholder(right.col_type.clone())),
981                };
982                left.range.flat_map(left_monotonic, |left_result| {
983                    Self::set_argument(&mut expr, 0, left_result);
984                    right.range.flat_map(right_monotonic, |right_result| {
985                        Self::set_argument(&mut expr, 1, right_result);
986                        self.eval_result(expr.eval(&[], self.arena))
987                    })
988                })
989            }
990        };
991
992        let col_type = func.output_type(&[left.col_type, right.col_type]);
993
994        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
995        ColumnSpec { col_type, range }
996    }
997
998    fn variadic(&self, func: &VariadicFunc, args: Vec<Self::Summary>) -> Self::Summary {
999        let fallible = func.could_error() || args.iter().any(|s| s.range.fallible);
1000        if func.is_associative() && args.len() > 2 {
1001            // To avoid a combinatorial explosion, evaluate large variadic calls as a series of
1002            // smaller ones, since associativity guarantees we'll get compatible results.
1003            return args
1004                .into_iter()
1005                .reduce(|a, b| self.variadic(func, vec![a, b]))
1006                .expect("reducing over a non-empty argument list");
1007        }
1008
1009        let mapped_spec = if args.len() >= Self::MAX_EVAL_ARGS {
1010            ResultSpec::anything()
1011        } else {
1012            fn eval_loop<'a>(
1013                is_monotonic: bool,
1014                expr: &mut MirScalarExpr,
1015                args: &[ColumnSpec<'a>],
1016                index: usize,
1017                datum_map: &mut impl FnMut(&MirScalarExpr) -> ResultSpec<'a>,
1018            ) -> ResultSpec<'a> {
1019                if index >= args.len() {
1020                    datum_map(expr)
1021                } else {
1022                    args[index].range.flat_map(is_monotonic, |datum| {
1023                        ColumnSpecs::set_argument(expr, index, datum);
1024                        eval_loop(is_monotonic, expr, args, index + 1, datum_map)
1025                    })
1026                }
1027            }
1028
1029            let mut fn_expr = MirScalarExpr::CallVariadic {
1030                func: func.clone(),
1031                exprs: args
1032                    .iter()
1033                    .map(|spec| Self::placeholder(spec.col_type.clone()))
1034                    .collect(),
1035            };
1036            eval_loop(func.is_monotone(), &mut fn_expr, &args, 0, &mut |expr| {
1037                self.eval_result(expr.eval(&[], self.arena))
1038            })
1039        };
1040
1041        let col_types = args.into_iter().map(|spec| spec.col_type).collect();
1042        let col_type = func.output_type(col_types);
1043
1044        let range = mapped_spec.intersect(ResultSpec::has_type(&col_type, fallible));
1045
1046        ColumnSpec { col_type, range }
1047    }
1048
1049    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
1050        let col_type = then
1051            .col_type
1052            .union(&els.col_type)
1053            .expect("failed type union for cond during abstract interpretation");
1054
1055        let range = cond
1056            .range
1057            .flat_map(true, |datum| match datum {
1058                Ok(Datum::True) => then.range.clone(),
1059                Ok(Datum::False) => els.range.clone(),
1060                _ => ResultSpec::fails(),
1061            })
1062            .intersect(ResultSpec::has_type(&col_type, true));
1063
1064        ColumnSpec { col_type, range }
1065    }
1066
1067    /// Override the default implementations of [Self::mfp_filter] and
1068    /// [Self::mfp_plan_filter] so that the fallibility of MFP expressions
1069    /// surfaces in the result, even when the expression's result column isn't
1070    /// referenced by a predicate or temporal bound.
1071    ///
1072    /// The runtime MFP evaluator runs every expression once all the preceding
1073    /// predicates pass (see [`crate::SafeMfpPlan::evaluate_inner`]), so an
1074    /// expression that errors on the actual data will turn the whole row into
1075    /// an `Err` — even if no predicate or bound mentions that expression. The
1076    /// default `mfp_filter` / `mfp_plan_filter` only AND together the
1077    /// predicates and bounds, so the AND result misses the expression's
1078    /// `fallible` flag and persist filter pushdown can wrongly discard a part
1079    /// that actually produces error rows. See database-issues#9656.
1080    fn mfp_filter(&self, mfp: &MapFilterProject) -> Self::Summary {
1081        let mfp_eval = MfpEval::new(self, mfp.input_arity, &mfp.expressions);
1082        let predicates = mfp
1083            .predicates
1084            .iter()
1085            .map(|(_, e)| mfp_eval.expr(e))
1086            .collect();
1087        let mut result = self.variadic(&And.into(), predicates);
1088        if mfp_eval.expressions.iter().any(|s| s.range.fallible) {
1089            result.range.fallible = true;
1090        }
1091        result
1092    }
1093
1094    fn mfp_plan_filter(&self, plan: &MfpPlan) -> Self::Summary {
1095        let mfp_eval = MfpEval::new(self, plan.mfp.input_arity, &plan.mfp.expressions);
1096        let mut results: Vec<_> = plan
1097            .mfp
1098            .predicates
1099            .iter()
1100            .map(|(_, e)| mfp_eval.expr(e))
1101            .collect();
1102        let mz_now = mfp_eval.unmaterializable(&UnmaterializableFunc::MzNow);
1103        for bound in &plan.lower_bounds {
1104            let bound_range = mfp_eval.expr(bound);
1105            let result = mfp_eval.binary(&BinaryFunc::Lte(func::Lte), bound_range, mz_now.clone());
1106            results.push(result);
1107        }
1108        for bound in &plan.upper_bounds {
1109            let bound_range = mfp_eval.expr(bound);
1110            let result = mfp_eval.binary(&BinaryFunc::Gte(func::Gte), bound_range, mz_now.clone());
1111            results.push(result);
1112        }
1113        let mut result = self.variadic(&And.into(), results);
1114        if mfp_eval.expressions.iter().any(|s| s.range.fallible) {
1115            result.range.fallible = true;
1116        }
1117        result
1118    }
1119}
1120
1121/// An interpreter that returns whether or not a particular expression is "pushdownable".
1122/// Broadly speaking, an expression is pushdownable if the result of evaluating the expression
1123/// depends on the range of possible column values in a way that `ColumnSpecs` is able to reason about.
1124///
1125/// In practice, we internally need to distinguish between expressions that are trivially predicable
1126/// (because they're constant) and expressions that depend on the column ranges themselves.
1127/// See the [TraceSummary] variants for those distinctions, and [TraceSummary::pushdownable] for
1128/// the overall assessment.
1129#[derive(Debug)]
1130pub struct Trace;
1131
1132/// A summary type for the [Trace] interpreter.
1133///
1134/// The ordering of this type is meaningful: the "smaller" the summary, the more information we have
1135/// about the possible values of the expression. This means we can eg. use `max` in the
1136/// interpreter below to find the summary for a function-call expression based on the summaries
1137/// of its arguments.
1138#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Ord, Eq)]
1139pub enum TraceSummary {
1140    /// The expression is constant: we can evaluate it without any runtime information.
1141    /// This corresponds to a `ResultSpec` of a single value.
1142    Constant,
1143    /// The expression depends on runtime information, but in "predictable" way... ie. if we know
1144    /// the range of possible values for all columns and unmaterializable functions, we can
1145    /// predict the possible values of the output.
1146    /// This corresponds to a `ResultSpec` of a perhaps range of values.
1147    Dynamic,
1148    /// The expression depends on runtime information in an unpredictable way.
1149    /// This corresponds to a `ResultSpec::value_all()` or something similarly vague.
1150    Unknown,
1151}
1152
1153impl TraceSummary {
1154    /// We say that a function is "pushdownable" for a particular
1155    /// argument if `ColumnSpecs` can determine the spec of the function's output given the input spec for
1156    /// that argument. (In practice, this is true when either the function is monotone in that argument
1157    /// or it's been special-cased in the interpreter.)
1158    fn apply_fn(self, pushdownable: bool) -> Self {
1159        match self {
1160            TraceSummary::Constant => TraceSummary::Constant,
1161            TraceSummary::Dynamic => match pushdownable {
1162                true => TraceSummary::Dynamic,
1163                false => TraceSummary::Unknown,
1164            },
1165            TraceSummary::Unknown => TraceSummary::Unknown,
1166        }
1167    }
1168
1169    /// We say that an expression is "pushdownable" if it's either constant or dynamic.
1170    pub fn pushdownable(self) -> bool {
1171        match self {
1172            TraceSummary::Constant | TraceSummary::Dynamic => true,
1173            TraceSummary::Unknown => false,
1174        }
1175    }
1176}
1177
1178impl Interpreter for Trace {
1179    type Summary = TraceSummary;
1180
1181    fn column(&self, _id: usize) -> Self::Summary {
1182        TraceSummary::Dynamic
1183    }
1184
1185    fn literal(
1186        &self,
1187        _result: &Result<Row, EvalError>,
1188        _col_type: &ReprColumnType,
1189    ) -> Self::Summary {
1190        TraceSummary::Constant
1191    }
1192
1193    fn unmaterializable(&self, _func: &UnmaterializableFunc) -> Self::Summary {
1194        TraceSummary::Dynamic
1195    }
1196
1197    fn unary(&self, func: &UnaryFunc, expr: Self::Summary) -> Self::Summary {
1198        let pushdownable = match SpecialUnary::for_func(func) {
1199            None => func.is_monotone(),
1200            Some(special) => special.pushdownable,
1201        };
1202        expr.apply_fn(pushdownable)
1203    }
1204
1205    fn binary(
1206        &self,
1207        func: &BinaryFunc,
1208        left: Self::Summary,
1209        right: Self::Summary,
1210    ) -> Self::Summary {
1211        let (left_pushdownable, right_pushdownable) = match AbstractFunc::for_func(func) {
1212            None => func.is_monotone(),
1213            Some(special) => special.pushdownable,
1214        };
1215        left.apply_fn(left_pushdownable)
1216            .max(right.apply_fn(right_pushdownable))
1217    }
1218
1219    fn variadic(&self, func: &VariadicFunc, exprs: Vec<Self::Summary>) -> Self::Summary {
1220        if !func.is_associative() && exprs.len() >= ColumnSpecs::MAX_EVAL_ARGS {
1221            // We can't efficiently evaluate functions with very large argument lists;
1222            // see the comment on ColumnSpecs::MAX_EVAL_ARGS for details.
1223            return TraceSummary::Unknown;
1224        }
1225
1226        let pushdownable_fn = func.is_monotone();
1227        exprs
1228            .into_iter()
1229            .map(|pushdownable_arg| pushdownable_arg.apply_fn(pushdownable_fn))
1230            .max()
1231            .unwrap_or(TraceSummary::Constant)
1232    }
1233
1234    fn cond(&self, cond: Self::Summary, then: Self::Summary, els: Self::Summary) -> Self::Summary {
1235        // We don't actually need to be able to predict the condition precisely to predict the output,
1236        // since we can union the ranges of the two branches for a conservative estimate.
1237        let cond = cond.min(TraceSummary::Dynamic);
1238        cond.max(then).max(els)
1239    }
1240}
1241
1242#[cfg(test)]
1243mod tests {
1244    use itertools::Itertools;
1245    use mz_repr::adt::datetime::DateTimeUnits;
1246    use mz_repr::{Datum, PropDatum, RowArena, SqlScalarType};
1247    use proptest::prelude::*;
1248    use proptest::sample::{Index, select};
1249
1250    use crate::func::*;
1251    use crate::scalar::func::variadic::Concat;
1252    use crate::{BinaryFunc, MirScalarExpr, UnaryFunc};
1253
1254    use super::*;
1255
1256    #[derive(Debug)]
1257    struct ExpressionData {
1258        relation_type: ReprRelationType,
1259        specs: Vec<ResultSpec<'static>>,
1260        rows: Vec<Row>,
1261        expr: MirScalarExpr,
1262    }
1263
1264    // Currently there's no good way to check whether a particular function accepts a particular
1265    // type as argument, which means we need to list everything out explicitly here. Restrict our interest
1266    // to a reasonable number of functions, to keep things tractable
1267    // TODO: replace this with function-level info once it's available.
1268    const NUM_TYPE: ReprScalarType = ReprScalarType::Numeric;
1269    static SCALAR_TYPES: &[ReprScalarType] = &[
1270        ReprScalarType::Bool,
1271        ReprScalarType::Jsonb,
1272        NUM_TYPE,
1273        ReprScalarType::Date,
1274        ReprScalarType::Timestamp,
1275        ReprScalarType::MzTimestamp,
1276        ReprScalarType::String,
1277    ];
1278
1279    const INTERESTING_UNARY_FUNCS: &[UnaryFunc] = {
1280        &[
1281            UnaryFunc::CastNumericToMzTimestamp(CastNumericToMzTimestamp),
1282            UnaryFunc::NegNumeric(NegNumeric),
1283            UnaryFunc::CastJsonbToNumeric(CastJsonbToNumeric(None)),
1284            UnaryFunc::CastJsonbToBool(CastJsonbToBool),
1285            UnaryFunc::CastJsonbToString(CastJsonbToString),
1286            UnaryFunc::DateTruncTimestamp(DateTruncTimestamp(DateTimeUnits::Epoch)),
1287            UnaryFunc::ExtractTimestamp(ExtractTimestamp(DateTimeUnits::Epoch)),
1288            UnaryFunc::ExtractDate(ExtractDate(DateTimeUnits::Epoch)),
1289            UnaryFunc::Not(Not),
1290            UnaryFunc::IsNull(IsNull),
1291            UnaryFunc::IsFalse(IsFalse),
1292            UnaryFunc::TryParseMonotonicIso8601Timestamp(TryParseMonotonicIso8601Timestamp),
1293        ]
1294    };
1295
1296    fn unary_typecheck(func: &UnaryFunc, arg: &ReprColumnType) -> bool {
1297        use UnaryFunc::*;
1298        match func {
1299            CastNumericToMzTimestamp(_) | NegNumeric(_) => arg.scalar_type == NUM_TYPE,
1300            CastJsonbToNumeric(_) | CastJsonbToBool(_) | CastJsonbToString(_) => {
1301                arg.scalar_type == ReprScalarType::Jsonb
1302            }
1303            ExtractTimestamp(_) | DateTruncTimestamp(_) => {
1304                arg.scalar_type == ReprScalarType::Timestamp
1305            }
1306            ExtractDate(_) => arg.scalar_type == ReprScalarType::Date,
1307            Not(_) => arg.scalar_type == ReprScalarType::Bool,
1308            IsNull(_) => true,
1309            TryParseMonotonicIso8601Timestamp(_) => arg.scalar_type == ReprScalarType::String,
1310            _ => false,
1311        }
1312    }
1313
1314    fn interesting_binary_funcs() -> Vec<BinaryFunc> {
1315        vec![
1316            AddTimestampInterval.into(),
1317            AddNumeric.into(),
1318            SubNumeric.into(),
1319            MulNumeric.into(),
1320            DivNumeric.into(),
1321            Eq.into(),
1322            Lt.into(),
1323            Gt.into(),
1324            Lte.into(),
1325            Gte.into(),
1326            DateTruncUnitsTimestamp.into(),
1327            JsonbGetString.into(),
1328            JsonbGetStringStringify.into(),
1329        ]
1330    }
1331
1332    fn binary_typecheck(func: &BinaryFunc, arg0: &ReprColumnType, arg1: &ReprColumnType) -> bool {
1333        use BinaryFunc::*;
1334        match func {
1335            AddTimestampInterval(_) => {
1336                arg0.scalar_type == ReprScalarType::Timestamp
1337                    && arg1.scalar_type == ReprScalarType::Interval
1338            }
1339            AddNumeric(_) | SubNumeric(_) | MulNumeric(_) | DivNumeric(_) => {
1340                arg0.scalar_type == NUM_TYPE && arg1.scalar_type == NUM_TYPE
1341            }
1342            Eq(_) | Lt(_) | Gt(_) | Lte(_) | Gte(_) => arg0.scalar_type == arg1.scalar_type,
1343            DateTruncTimestamp(_) => {
1344                arg0.scalar_type == ReprScalarType::String
1345                    && arg1.scalar_type == ReprScalarType::Timestamp
1346            }
1347            JsonbGetString(_) | JsonbGetStringStringify(_) => {
1348                arg0.scalar_type == ReprScalarType::Jsonb
1349                    && arg1.scalar_type == ReprScalarType::String
1350            }
1351            _ => false,
1352        }
1353    }
1354
1355    const INTERESTING_VARIADIC_FUNCS: &[VariadicFunc] = {
1356        use crate::scalar::func::variadic as v;
1357        use VariadicFunc::*;
1358        &[
1359            Coalesce(v::Coalesce),
1360            Greatest(v::Greatest),
1361            Least(v::Least),
1362            And(v::And),
1363            Or(v::Or),
1364            Concat(v::Concat),
1365            ConcatWs(v::ConcatWs),
1366        ]
1367    };
1368
1369    fn variadic_typecheck(func: &VariadicFunc, args: &[ReprColumnType]) -> bool {
1370        use VariadicFunc::*;
1371        fn all_eq<'a>(
1372            iter: impl IntoIterator<Item = &'a ReprColumnType>,
1373            other: &ReprScalarType,
1374        ) -> bool {
1375            iter.into_iter().all(|t| t.scalar_type == *other)
1376        }
1377        match func {
1378            Coalesce(_) | Greatest(_) | Least(_) => match args {
1379                [] => true,
1380                [first, rest @ ..] => all_eq(rest, &first.scalar_type),
1381            },
1382            And(_) | Or(_) => all_eq(args, &ReprScalarType::Bool),
1383            Concat(_) => all_eq(args, &ReprScalarType::String),
1384            ConcatWs(_) => args.len() > 1 && all_eq(args, &ReprScalarType::String),
1385            _ => false,
1386        }
1387    }
1388
1389    fn gen_datums_for_type(typ: &ReprColumnType) -> BoxedStrategy<Datum<'static>> {
1390        let mut values: Vec<Datum<'static>> = SqlScalarType::from_repr(&typ.scalar_type)
1391            .interesting_datums()
1392            .collect();
1393        if typ.nullable {
1394            values.push(Datum::Null)
1395        }
1396        select(values).boxed()
1397    }
1398
1399    fn gen_column() -> impl Strategy<Value = (ReprColumnType, Datum<'static>, ResultSpec<'static>)>
1400    {
1401        let col_type = (select(SCALAR_TYPES), any::<bool>())
1402            .prop_map(|(t, b)| t.nullable(b))
1403            .prop_filter("need at least one value", |c| {
1404                SqlScalarType::from_repr(&c.scalar_type)
1405                    .interesting_datums()
1406                    .count()
1407                    > 0
1408            });
1409
1410        let result_spec = select(vec![
1411            ResultSpec::nothing(),
1412            ResultSpec::null(),
1413            ResultSpec::anything(),
1414            ResultSpec::value_all(),
1415        ]);
1416
1417        (col_type, result_spec).prop_flat_map(|(col, result_spec)| {
1418            gen_datums_for_type(&col).prop_map(move |datum| {
1419                let result_spec = result_spec.clone().union(ResultSpec::value(datum));
1420                (col.clone(), datum, result_spec)
1421            })
1422        })
1423    }
1424
1425    fn gen_expr_for_relation(
1426        relation: &ReprRelationType,
1427    ) -> BoxedStrategy<(MirScalarExpr, ReprColumnType)> {
1428        let column_gen = {
1429            let column_types = relation.column_types.clone();
1430            any::<Index>()
1431                .prop_map(move |idx| {
1432                    let id = idx.index(column_types.len());
1433                    (MirScalarExpr::column(id), column_types[id].clone())
1434                })
1435                .boxed()
1436        };
1437
1438        let literal_gen = (select(SCALAR_TYPES), any::<bool>())
1439            .prop_map(|(s, b)| s.nullable(b))
1440            .prop_flat_map(|ct| {
1441                let error_gen = any::<EvalError>().prop_map(Err).boxed();
1442                let value_gen = gen_datums_for_type(&ct)
1443                    .prop_map(move |datum| Ok(Row::pack_slice(&[datum])))
1444                    .boxed();
1445                error_gen.prop_union(value_gen).prop_map(move |result| {
1446                    (MirScalarExpr::Literal(result, ct.clone()), ct.clone())
1447                })
1448            })
1449            .boxed();
1450
1451        column_gen
1452            .prop_union(literal_gen)
1453            .prop_recursive(4, 64, 8, |self_gen| {
1454                let unary_gen = (select(INTERESTING_UNARY_FUNCS), self_gen.clone())
1455                    .prop_filter_map("unary func", |(func, (expr_in, type_in))| {
1456                        if !unary_typecheck(&func, &type_in) {
1457                            return None;
1458                        }
1459                        let type_out = func.output_type(type_in);
1460                        let expr_out = MirScalarExpr::CallUnary {
1461                            func,
1462                            expr: Box::new(expr_in),
1463                        };
1464                        Some((expr_out, type_out))
1465                    })
1466                    .boxed();
1467                let binary_gen = (
1468                    select(interesting_binary_funcs()),
1469                    self_gen.clone(),
1470                    self_gen.clone(),
1471                )
1472                    .prop_filter_map(
1473                        "binary func",
1474                        |(func, (expr_left, type_left), (expr_right, type_right))| {
1475                            if !binary_typecheck(&func, &type_left, &type_right) {
1476                                return None;
1477                            }
1478                            let type_out = func.output_type(&[type_left, type_right]);
1479                            let expr_out = MirScalarExpr::CallBinary {
1480                                func,
1481                                expr1: Box::new(expr_left),
1482                                expr2: Box::new(expr_right),
1483                            };
1484                            Some((expr_out, type_out))
1485                        },
1486                    )
1487                    .boxed();
1488                let variadic_gen = (
1489                    select(INTERESTING_VARIADIC_FUNCS),
1490                    prop::collection::vec(self_gen, 1..4),
1491                )
1492                    .prop_filter_map("variadic func", |(func, exprs)| {
1493                        let (exprs_in, type_in): (_, Vec<_>) = exprs.into_iter().unzip();
1494                        if !variadic_typecheck(&func, &type_in) {
1495                            return None;
1496                        }
1497                        let type_out = func.output_type(type_in);
1498                        let expr_out = MirScalarExpr::CallVariadic {
1499                            func,
1500                            exprs: exprs_in,
1501                        };
1502                        Some((expr_out, type_out))
1503                    })
1504                    .boxed();
1505
1506                unary_gen
1507                    .prop_union(binary_gen)
1508                    .boxed()
1509                    .prop_union(variadic_gen)
1510            })
1511            .boxed()
1512    }
1513
1514    fn gen_expr_data() -> impl Strategy<Value = ExpressionData> {
1515        let columns = prop::collection::vec(gen_column(), 1..10);
1516        columns.prop_flat_map(|data| {
1517            let (columns, datums, specs): (Vec<_>, Vec<_>, Vec<_>) = data.into_iter().multiunzip();
1518            let relation = ReprRelationType::new(columns);
1519            let row = Row::pack_slice(&datums);
1520            gen_expr_for_relation(&relation).prop_map(move |(expr, _)| ExpressionData {
1521                relation_type: relation.clone(),
1522                specs: specs.clone(),
1523                rows: vec![row.clone()],
1524                expr,
1525            })
1526        })
1527    }
1528
1529    #[mz_ore::test]
1530    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decContextDefault` on OS `linux`
1531    fn test_trivial_spec_matches() {
1532        fn check(datum: PropDatum) -> Result<(), TestCaseError> {
1533            let datum: Datum = (&datum).into();
1534            let spec = if datum.is_null() {
1535                ResultSpec::null()
1536            } else {
1537                ResultSpec::value(datum)
1538            };
1539            assert!(spec.may_contain(datum));
1540            Ok(())
1541        }
1542
1543        proptest!(|(datum in mz_repr::arb_datum(true))| {
1544            check(datum)?;
1545        });
1546
1547        assert!(ResultSpec::fails().may_fail());
1548    }
1549
1550    #[mz_ore::test]
1551    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decContextDefault` on OS `linux`
1552    fn test_equivalence() {
1553        fn check(data: ExpressionData) -> Result<(), TestCaseError> {
1554            let ExpressionData {
1555                relation_type,
1556                specs,
1557                rows,
1558                expr,
1559            } = data;
1560
1561            // We want to ensure that the spec we get when evaluating an expression using
1562            // `ColumnSpecs` always contains the _actual_ value of that column when evaluated with
1563            // eval. (This is an important correctness property of abstract interpretation.)
1564            let arena = RowArena::new();
1565            let mut interpreter = ColumnSpecs::new(&relation_type, &arena);
1566            for (id, spec) in specs.into_iter().enumerate() {
1567                interpreter.push_column(id, spec);
1568            }
1569
1570            let spec = interpreter.expr(&expr);
1571
1572            for row in &rows {
1573                let datums: Vec<_> = row.iter().collect();
1574                let eval_result = expr.eval(&datums, &arena);
1575                match eval_result {
1576                    Ok(value) => {
1577                        assert!(spec.range.may_contain(value))
1578                    }
1579                    Err(_) => {
1580                        assert!(spec.range.may_fail());
1581                    }
1582                }
1583            }
1584
1585            Ok(())
1586        }
1587
1588        proptest!(|(data in gen_expr_data())| {
1589            check(data)?;
1590        });
1591    }
1592
1593    /// Regression test for database-issues#9656.
1594    ///
1595    /// The interpreter must surface the fallibility of MFP expressions that
1596    /// aren't referenced by any predicate or temporal bound. The runtime MFP
1597    /// evaluator runs every expression once predicates pass, so an expression
1598    /// that errors on the actual data makes the whole row an `Err` — and
1599    /// `filter_result` must keep the part to emit that error.
1600    #[mz_ore::test]
1601    #[cfg_attr(miri, ignore)]
1602    fn test_mfp_unreferenced_fallible_expression() {
1603        use crate::scalar::func::CastStringToUuid;
1604
1605        // MFP: one expression that always errors on the input range, and one
1606        // predicate that always passes. The expression's result column is
1607        // *not* referenced by the predicate, so the default interpreter
1608        // implementation would AND together just `True` and miss the
1609        // fallibility.
1610        let mfp = MapFilterProject {
1611            expressions: vec![MirScalarExpr::CallUnary {
1612                func: UnaryFunc::CastStringToUuid(CastStringToUuid),
1613                expr: Box::new(MirScalarExpr::column(0)),
1614            }],
1615            predicates: vec![(
1616                1,
1617                MirScalarExpr::literal_ok(Datum::True, ReprScalarType::Bool),
1618            )],
1619            projection: vec![0, 1],
1620            input_arity: 1,
1621        };
1622
1623        let relation = ReprRelationType::new(vec![ReprScalarType::String.nullable(false)]);
1624        let arena = RowArena::new();
1625        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1626        // "not-a-uuid" is in the stats range and definitely doesn't parse as a UUID.
1627        interpreter.push_column(
1628            0,
1629            ResultSpec::value_between(Datum::String("not-a-uuid"), Datum::String("not-a-uuid")),
1630        );
1631        let spec = interpreter.mfp_filter(&mfp);
1632        assert!(
1633            spec.range.may_fail(),
1634            "an MFP expression that errors on the stats range must propagate \
1635             fallibility, otherwise persist filter pushdown can wrongly discard \
1636             a part that produces error rows",
1637        );
1638    }
1639
1640    /// Proptest companion to [`test_mfp_unreferenced_fallible_expression`]:
1641    /// directly verifies the fallibility claim of [`ColumnSpecs::mfp_filter`]
1642    /// against the runtime MFP semantics. For a random expression placed in
1643    /// `MapFilterProject::expressions` (i.e. as an unreferenced Map step), if
1644    /// evaluating the expression on a row drawn from the stats range produces
1645    /// an error at runtime, then the interpreter's summary must report
1646    /// `may_fail()`. Without the `expressions.any(|s| s.range.fallible)` patch
1647    /// in `mfp_filter`, the AND over an empty predicate list collapses to
1648    /// `True` and the runtime error is wrongly ruled out.
1649    #[mz_ore::test]
1650    #[cfg_attr(miri, ignore)]
1651    fn test_mfp_filter_fallibility_equivalence() {
1652        fn check(data: ExpressionData) -> Result<(), TestCaseError> {
1653            let ExpressionData {
1654                relation_type,
1655                specs,
1656                rows,
1657                expr,
1658            } = data;
1659
1660            let input_arity = relation_type.column_types.len();
1661            let mfp = MapFilterProject {
1662                expressions: vec![expr.clone()],
1663                predicates: vec![],
1664                projection: (0..input_arity).collect(),
1665                input_arity,
1666            };
1667
1668            let arena = RowArena::new();
1669            let mut interpreter = ColumnSpecs::new(&relation_type, &arena);
1670            for (id, spec) in specs.into_iter().enumerate() {
1671                interpreter.push_column(id, spec);
1672            }
1673            let summary = interpreter.mfp_filter(&mfp);
1674
1675            for row in &rows {
1676                let datums: Vec<_> = row.iter().collect();
1677                if expr.eval(&datums, &arena).is_err() {
1678                    prop_assert!(
1679                        summary.range.may_fail(),
1680                        "mfp_filter must surface the fallibility of an \
1681                         unreferenced MFP expression: row {:?} errored at \
1682                         runtime but the interpreter ruled out errors",
1683                        row,
1684                    );
1685                }
1686            }
1687            Ok(())
1688        }
1689
1690        proptest!(|(data in gen_expr_data())| {
1691            check(data)?;
1692        });
1693    }
1694
1695    #[mz_ore::test]
1696    fn test_mfp() {
1697        // Regression test for https://github.com/MaterializeInc/database-issues/issues/5736
1698        use MirScalarExpr::*;
1699
1700        let mfp = MapFilterProject {
1701            expressions: vec![],
1702            predicates: vec![
1703                // Always fails on the known input range
1704                (
1705                    1,
1706                    CallUnary {
1707                        func: UnaryFunc::IsNull(IsNull),
1708                        expr: Box::new(CallBinary {
1709                            func: MulInt32.into(),
1710                            expr1: Box::new(MirScalarExpr::column(0)),
1711                            expr2: Box::new(MirScalarExpr::column(0)),
1712                        }),
1713                    },
1714                ),
1715                // Always returns false on the known input range
1716                (
1717                    1,
1718                    CallBinary {
1719                        func: Eq.into(),
1720                        expr1: Box::new(MirScalarExpr::column(0)),
1721                        expr2: Box::new(MirScalarExpr::literal_ok(
1722                            Datum::Int32(1727694505),
1723                            ReprScalarType::Int32,
1724                        )),
1725                    },
1726                ),
1727            ],
1728            projection: vec![],
1729            input_arity: 1,
1730        };
1731
1732        let relation = ReprRelationType::new(vec![ReprScalarType::Int32.nullable(true)]);
1733        let arena = RowArena::new();
1734        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1735        interpreter.push_column(0, ResultSpec::value(Datum::Int32(-1294725158)));
1736        let spec = interpreter.mfp_filter(&mfp);
1737        assert!(spec.range.may_fail());
1738    }
1739
1740    #[mz_ore::test]
1741    fn test_concat() {
1742        let expr = MirScalarExpr::call_variadic(
1743            Concat,
1744            vec![
1745                MirScalarExpr::column(0),
1746                MirScalarExpr::literal_ok(Datum::String("a"), ReprScalarType::String),
1747                MirScalarExpr::literal_ok(Datum::String("b"), ReprScalarType::String),
1748            ],
1749        );
1750
1751        let relation = ReprRelationType::new(vec![ReprScalarType::String.nullable(false)]);
1752        let arena = RowArena::new();
1753        let interpreter = ColumnSpecs::new(&relation, &arena);
1754        let spec = interpreter.expr(&expr);
1755        assert!(spec.range.may_contain(Datum::String("blab")));
1756    }
1757
1758    #[mz_ore::test]
1759    fn test_eval_range() {
1760        // Example inspired by the tumbling windows temporal filter in the docs
1761        let period_ms = MirScalarExpr::literal_ok(Datum::Int64(10), ReprScalarType::Int64);
1762        let expr = MirScalarExpr::CallBinary {
1763            func: Gte.into(),
1764            expr1: Box::new(MirScalarExpr::CallUnmaterializable(
1765                UnmaterializableFunc::MzNow,
1766            )),
1767            expr2: Box::new(MirScalarExpr::CallUnary {
1768                func: UnaryFunc::CastInt64ToMzTimestamp(CastInt64ToMzTimestamp),
1769                expr: Box::new(MirScalarExpr::CallBinary {
1770                    func: MulInt64.into(),
1771                    expr1: Box::new(period_ms.clone()),
1772                    expr2: Box::new(MirScalarExpr::CallBinary {
1773                        func: DivInt64.into(),
1774                        expr1: Box::new(MirScalarExpr::column(0)),
1775                        expr2: Box::new(period_ms),
1776                    }),
1777                }),
1778            }),
1779        };
1780        let relation = ReprRelationType::new(vec![ReprScalarType::Int64.nullable(false)]);
1781
1782        {
1783            // Non-overlapping windows
1784            let arena = RowArena::new();
1785            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1786            interpreter.push_unmaterializable(
1787                UnmaterializableFunc::MzNow,
1788                ResultSpec::value_between(
1789                    Datum::MzTimestamp(10.into()),
1790                    Datum::MzTimestamp(20.into()),
1791                ),
1792            );
1793            interpreter.push_column(0, ResultSpec::value_between(30i64.into(), 40i64.into()));
1794
1795            let range_out = interpreter.expr(&expr).range;
1796            assert!(range_out.may_contain(Datum::False));
1797            assert!(!range_out.may_contain(Datum::True));
1798            assert!(!range_out.may_contain(Datum::Null));
1799            assert!(!range_out.may_fail());
1800        }
1801
1802        {
1803            // Overlapping windows
1804            let arena = RowArena::new();
1805            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1806            interpreter.push_unmaterializable(
1807                UnmaterializableFunc::MzNow,
1808                ResultSpec::value_between(
1809                    Datum::MzTimestamp(10.into()),
1810                    Datum::MzTimestamp(35.into()),
1811                ),
1812            );
1813            interpreter.push_column(0, ResultSpec::value_between(30i64.into(), 40i64.into()));
1814
1815            let range_out = interpreter.expr(&expr).range;
1816            assert!(range_out.may_contain(Datum::False));
1817            assert!(range_out.may_contain(Datum::True));
1818            assert!(!range_out.may_contain(Datum::Null));
1819            assert!(!range_out.may_fail());
1820        }
1821    }
1822
1823    #[mz_ore::test]
1824    #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `decNumberFromInt32` on OS `linux`
1825    fn test_jsonb() {
1826        let arena = RowArena::new();
1827
1828        let expr = MirScalarExpr::column(0)
1829            .call_binary(
1830                MirScalarExpr::literal_ok(Datum::from("ts"), ReprScalarType::String),
1831                JsonbGetString,
1832            )
1833            .call_unary(CastJsonbToNumeric(None));
1834
1835        let relation = ReprRelationType::new(vec![ReprScalarType::Jsonb.nullable(true)]);
1836        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1837        interpreter.push_column(
1838            0,
1839            ResultSpec::map_spec(
1840                [(
1841                    "ts".into(),
1842                    ResultSpec::value_between(
1843                        Datum::Numeric(100.into()),
1844                        Datum::Numeric(300.into()),
1845                    ),
1846                )]
1847                .into_iter()
1848                .collect(),
1849            ),
1850        );
1851
1852        let range_out = interpreter.expr(&expr).range;
1853        assert!(!range_out.may_contain(Datum::Numeric(0.into())));
1854        assert!(range_out.may_contain(Datum::Numeric(200.into())));
1855        assert!(!range_out.may_contain(Datum::Numeric(400.into())));
1856    }
1857
1858    #[mz_ore::test]
1859    fn test_nested_union_partial_overlap() {
1860        // `Nested(map)` constrains a key only when the key is present in `map`; absent
1861        // keys mean "anything". So the union of two Nested specs must drop any key
1862        // that's missing from one side, because `x ∪ anything = anything`. Only keys
1863        // present in *both* sides survive (with their per-key specs unioned).
1864        let a = ResultSpec::map_spec(
1865            [
1866                ("x".into(), ResultSpec::value(Datum::String("a"))),
1867                ("y".into(), ResultSpec::value(Datum::String("b"))),
1868                ("c".into(), ResultSpec::value(Datum::String("c"))),
1869            ]
1870            .into_iter()
1871            .collect(),
1872        );
1873        let b = ResultSpec::map_spec(
1874            [
1875                ("x".into(), ResultSpec::value(Datum::String("a2"))),
1876                ("y".into(), ResultSpec::value(Datum::String("b2"))),
1877                ("z".into(), ResultSpec::value(Datum::String("z"))),
1878            ]
1879            .into_iter()
1880            .collect(),
1881        );
1882
1883        let unioned = a.union(b);
1884
1885        // Push the unioned spec through `->> <key>`: keys only in one side must
1886        // admit NULL (the other side is unconstrained, so the field could be absent
1887        // there); shared keys must include both observed values.
1888        let arena = RowArena::new();
1889        let relation = ReprRelationType::new(vec![ReprScalarType::Jsonb.nullable(false)]);
1890
1891        // Key only in `a`: the union must admit NULL.
1892        {
1893            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1894            interpreter.push_column(0, unioned.clone());
1895            let expr = MirScalarExpr::column(0).call_binary(
1896                MirScalarExpr::literal_ok(Datum::from("c"), ReprScalarType::String),
1897                JsonbGetStringStringify,
1898            );
1899            assert!(interpreter.expr(&expr).range.may_contain(Datum::Null));
1900        }
1901
1902        // Key only in `b`: symmetric.
1903        {
1904            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1905            interpreter.push_column(0, unioned.clone());
1906            let expr = MirScalarExpr::column(0).call_binary(
1907                MirScalarExpr::literal_ok(Datum::from("z"), ReprScalarType::String),
1908                JsonbGetStringStringify,
1909            );
1910            assert!(interpreter.expr(&expr).range.may_contain(Datum::Null));
1911        }
1912
1913        // Key in both: result must include both observed values.
1914        {
1915            let mut interpreter = ColumnSpecs::new(&relation, &arena);
1916            interpreter.push_column(0, unioned);
1917            let expr = MirScalarExpr::column(0).call_binary(
1918                MirScalarExpr::literal_ok(Datum::from("x"), ReprScalarType::String),
1919                JsonbGetStringStringify,
1920            );
1921            let x_range = interpreter.expr(&expr).range;
1922            assert!(x_range.may_contain(Datum::String("a")));
1923            assert!(x_range.may_contain(Datum::String("a2")));
1924        }
1925    }
1926
1927    #[mz_ore::test]
1928    #[cfg_attr(miri, ignore)] // unsupported foreign call in numeric decoding
1929    fn test_case_over_jsonb_columns() {
1930        // Regression test for PER-6: when CASE picks between two JSON columns whose
1931        // observed keys are disjoint, filter pushdown must not prune parts where
1932        // accessing a key only present in one branch might yield NULL in the other.
1933        let arena = RowArena::new();
1934
1935        // `(CASE WHEN col0 THEN col1 ELSE col2 END) ->> 'y' IS NULL`
1936        let expr = MirScalarExpr::If {
1937            cond: Box::new(MirScalarExpr::column(0)),
1938            then: Box::new(MirScalarExpr::column(1)),
1939            els: Box::new(MirScalarExpr::column(2)),
1940        }
1941        .call_binary(
1942            MirScalarExpr::literal_ok(Datum::from("y"), ReprScalarType::String),
1943            JsonbGetStringStringify,
1944        )
1945        .call_unary(UnaryFunc::IsNull(IsNull));
1946
1947        let relation = ReprRelationType::new(vec![
1948            ReprScalarType::Bool.nullable(false),
1949            ReprScalarType::Jsonb.nullable(false),
1950            ReprScalarType::Jsonb.nullable(false),
1951        ]);
1952        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1953        interpreter.push_column(0, ResultSpec::value_between(Datum::False, Datum::True));
1954        interpreter.push_column(
1955            1,
1956            ResultSpec::map_spec(
1957                [("x".into(), ResultSpec::value(Datum::String("a")))]
1958                    .into_iter()
1959                    .collect(),
1960            ),
1961        );
1962        interpreter.push_column(
1963            2,
1964            ResultSpec::map_spec(
1965                [("y".into(), ResultSpec::value(Datum::String("b")))]
1966                    .into_iter()
1967                    .collect(),
1968            ),
1969        );
1970
1971        let range_out = interpreter.expr(&expr).range;
1972        // When the CASE selects column 1, "y" is absent and `->> 'y'` yields NULL, so
1973        // `IS NULL` is True. The filter must not prune a part that could match.
1974        assert!(range_out.may_contain(Datum::True));
1975    }
1976
1977    #[mz_ore::test]
1978    fn test_like() {
1979        let arena = RowArena::new();
1980
1981        let expr = MirScalarExpr::CallUnary {
1982            func: UnaryFunc::IsLikeMatch(IsLikeMatch(
1983                crate::like_pattern::compile("%whatever%", true).unwrap(),
1984            )),
1985            expr: Box::new(MirScalarExpr::column(0)),
1986        };
1987
1988        let relation = ReprRelationType::new(vec![ReprScalarType::String.nullable(true)]);
1989        let mut interpreter = ColumnSpecs::new(&relation, &arena);
1990        interpreter.push_column(
1991            0,
1992            ResultSpec::value_between(Datum::String("aardvark"), Datum::String("zebra")),
1993        );
1994
1995        let range_out = interpreter.expr(&expr).range;
1996        assert!(
1997            !range_out.fallible,
1998            "like function should not error on non-error input"
1999        );
2000        assert!(range_out.may_contain(Datum::True));
2001        assert!(range_out.may_contain(Datum::False));
2002        assert!(range_out.may_contain(Datum::Null));
2003    }
2004
2005    #[mz_ore::test]
2006    fn test_try_parse_monotonic_iso8601_timestamp() {
2007        use chrono::NaiveDateTime;
2008
2009        let arena = RowArena::new();
2010
2011        let expr = MirScalarExpr::CallUnary {
2012            func: UnaryFunc::TryParseMonotonicIso8601Timestamp(TryParseMonotonicIso8601Timestamp),
2013            expr: Box::new(MirScalarExpr::column(0)),
2014        };
2015
2016        let relation = ReprRelationType::new(vec![ReprScalarType::String.nullable(true)]);
2017        // Test the case where we have full timestamps as bounds.
2018        let mut interpreter = ColumnSpecs::new(&relation, &arena);
2019        interpreter.push_column(
2020            0,
2021            ResultSpec::value_between(
2022                Datum::String("2024-01-11T00:00:00.000Z"),
2023                Datum::String("2024-01-11T20:00:00.000Z"),
2024            ),
2025        );
2026
2027        let timestamp = |ts| {
2028            Datum::Timestamp(
2029                NaiveDateTime::parse_from_str(ts, "%Y-%m-%dT%H:%M:%S")
2030                    .unwrap()
2031                    .try_into()
2032                    .unwrap(),
2033            )
2034        };
2035
2036        let range_out = interpreter.expr(&expr).range;
2037        assert!(!range_out.fallible);
2038        assert!(range_out.nullable);
2039        assert!(!range_out.may_contain(timestamp("2024-01-10T10:00:00")));
2040        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
2041        assert!(!range_out.may_contain(timestamp("2024-01-12T10:00:00")));
2042
2043        // Test the case where we have truncated / useless bounds.
2044        let mut interpreter = ColumnSpecs::new(&relation, &arena);
2045        interpreter.push_column(
2046            0,
2047            ResultSpec::value_between(Datum::String("2024-01-1"), Datum::String("2024-01-2")),
2048        );
2049
2050        let range_out = interpreter.expr(&expr).range;
2051        assert!(!range_out.fallible);
2052        assert!(range_out.nullable);
2053        assert!(range_out.may_contain(timestamp("2024-01-10T10:00:00")));
2054        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
2055        assert!(range_out.may_contain(timestamp("2024-01-12T10:00:00")));
2056
2057        // Test the case where only one bound is truncated
2058        let mut interpreter = ColumnSpecs::new(&relation, &arena);
2059        interpreter.push_column(
2060            0,
2061            ResultSpec::value_between(
2062                Datum::String("2024-01-1"),
2063                Datum::String("2024-01-12T10:00:00"),
2064            )
2065            .union(ResultSpec::null()),
2066        );
2067
2068        let range_out = interpreter.expr(&expr).range;
2069        assert!(!range_out.fallible);
2070        assert!(range_out.nullable);
2071        assert!(range_out.may_contain(timestamp("2024-01-10T10:00:00")));
2072        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
2073        assert!(range_out.may_contain(timestamp("2024-01-12T10:00:00")));
2074
2075        // Test the case where the upper and lower bound are identical
2076        let mut interpreter = ColumnSpecs::new(&relation, &arena);
2077        interpreter.push_column(
2078            0,
2079            ResultSpec::value_between(
2080                Datum::String("2024-01-11T10:00:00.000Z"),
2081                Datum::String("2024-01-11T10:00:00.000Z"),
2082            ),
2083        );
2084
2085        let range_out = interpreter.expr(&expr).range;
2086        assert!(!range_out.fallible);
2087        assert!(!range_out.nullable);
2088        assert!(!range_out.may_contain(timestamp("2024-01-10T10:00:00")));
2089        assert!(range_out.may_contain(timestamp("2024-01-11T10:00:00")));
2090        assert!(!range_out.may_contain(timestamp("2024-01-12T10:00:00")));
2091    }
2092
2093    #[mz_ore::test]
2094    fn test_inequality() {
2095        let arena = RowArena::new();
2096
2097        let expr = MirScalarExpr::column(0).call_binary(
2098            MirScalarExpr::CallUnmaterializable(UnmaterializableFunc::MzNow),
2099            Gte,
2100        );
2101
2102        let relation = ReprRelationType::new(vec![ReprScalarType::MzTimestamp.nullable(true)]);
2103        let mut interpreter = ColumnSpecs::new(&relation, &arena);
2104        interpreter.push_column(
2105            0,
2106            ResultSpec::value_between(
2107                Datum::MzTimestamp(1704736444949u64.into()),
2108                Datum::MzTimestamp(1704736444949u64.into()),
2109            )
2110            .union(ResultSpec::null()),
2111        );
2112        interpreter.push_unmaterializable(
2113            UnmaterializableFunc::MzNow,
2114            ResultSpec::value_between(
2115                Datum::MzTimestamp(1704738791000u64.into()),
2116                Datum::MzTimestamp(18446744073709551615u64.into()),
2117            ),
2118        );
2119
2120        let range_out = interpreter.expr(&expr).range;
2121        assert!(
2122            !range_out.fallible,
2123            "<= function should not error on non-error input"
2124        );
2125        assert!(!range_out.may_contain(Datum::True));
2126        assert!(range_out.may_contain(Datum::False));
2127        assert!(range_out.may_contain(Datum::Null));
2128    }
2129
2130    /// Regression test for database-issues#9656.
2131    ///
2132    /// Adding an `Interval` to a `Timestamp` is non-monotone in the interval
2133    /// argument: the lex order of intervals (months, days, micros) does not
2134    /// respect calendar-month arithmetic with day-clamping. The interpreter
2135    /// must therefore not assume monotonicity, otherwise persist filter
2136    /// pushdown can incorrectly conclude that a part has no matching rows.
2137    #[mz_ore::test]
2138    #[cfg_attr(miri, ignore)]
2139    fn test_add_timestamp_interval_non_monotone() {
2140        use chrono::NaiveDateTime;
2141        use mz_repr::adt::interval::Interval;
2142        use mz_repr::adt::timestamp::CheckedTimestamp;
2143        use mz_repr::{Datum, Row};
2144
2145        let arena = RowArena::new();
2146
2147        // The setup: a timestamp literal `t = 2024-01-31 00:00:00`, and an
2148        // interval column whose stats-range spans
2149        // `[{0 months, 31 days, 0 us}, {1 month, 0 days, 0 us}]`. In lex order,
2150        // the 31-day interval is the lower bound and the 1-month interval is
2151        // the upper bound. The function values at the endpoints are:
2152        //   t + {0,31,0} = 2024-03-02
2153        //   t + {1, 0,0} = 2024-02-29
2154        // But an *interior* interval like {0, 60, 0} maps to 2024-03-31, which
2155        // lies far outside `[Feb 29, Mar 2]`. Under the (incorrect) monotone
2156        // assumption, the interpreter would conclude the output is in that
2157        // narrow window, and rule out predicates like `>= 2024-03-15`.
2158        let ts_lit = |s: &str| {
2159            let mut row = Row::default();
2160            row.packer().push(Datum::Timestamp(
2161                CheckedTimestamp::from_timestamplike(
2162                    NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(),
2163                )
2164                .unwrap(),
2165            ));
2166            MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false))
2167        };
2168        let interval = |months: i32, days: i32, micros: i64| {
2169            Datum::Interval(Interval {
2170                months,
2171                days,
2172                micros,
2173            })
2174        };
2175
2176        // Expression: `(timestamp_lit + interval_col) >= 2024-03-15`.
2177        let expr = ts_lit("2024-01-31T00:00:00")
2178            .call_binary(MirScalarExpr::column(0), AddTimestampInterval)
2179            .call_binary(ts_lit("2024-03-15T00:00:00"), Gte);
2180
2181        let relation = ReprRelationType::new(vec![ReprScalarType::Interval.nullable(false)]);
2182        let mut interpreter = ColumnSpecs::new(&relation, &arena);
2183        interpreter.push_column(
2184            0,
2185            ResultSpec::value_between(interval(0, 31, 0), interval(1, 0, 0)),
2186        );
2187
2188        let range_out = interpreter.expr(&expr).range;
2189        // The actual data may include e.g. `{0, 60, 0}` → 2024-03-31, which
2190        // satisfies `>= 2024-03-15`. The interpreter must admit `True` so that
2191        // filter pushdown does not skip the part. Under the buggy
2192        // `(true, true)` annotation, the output range would be
2193        // `[Feb 29, Mar 2]`, all of which is `< Mar 15`, and the interpreter
2194        // would (wrongly) admit only `False`.
2195        assert!(
2196            range_out.may_contain(Datum::True),
2197            "interpreter incorrectly ruled out matching rows; \
2198             add_timestamp_interval is not monotone in the interval argument",
2199        );
2200    }
2201
2202    /// Companion test to `test_add_timestamp_interval_non_monotone`: when the
2203    /// interval argument is a literal with `months == 0`, the function reduces
2204    /// to a pure linear shift in microseconds and *is* monotone in the
2205    /// timestamp. The dynamic-monotonicity handler in `AbstractFunc` should
2206    /// recover the tight output range in that case, so that filter pushdown
2207    /// can still narrow predicates like `t - INTERVAL '1' day < literal`.
2208    #[mz_ore::test]
2209    #[cfg_attr(miri, ignore)]
2210    fn test_timestamp_plus_interval_dynamic_monotone() {
2211        use chrono::NaiveDateTime;
2212        use mz_repr::adt::interval::Interval;
2213        use mz_repr::adt::timestamp::CheckedTimestamp;
2214        use mz_repr::{Datum, Row};
2215
2216        let arena = RowArena::new();
2217
2218        let ts = |s: &str| {
2219            Datum::Timestamp(
2220                CheckedTimestamp::from_timestamplike(
2221                    NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(),
2222                )
2223                .unwrap(),
2224            )
2225        };
2226        let interval_lit = |months: i32, days: i32, micros: i64| {
2227            let mut row = Row::default();
2228            row.packer().push(Datum::Interval(Interval {
2229                months,
2230                days,
2231                micros,
2232            }));
2233            MirScalarExpr::Literal(Ok(row), ReprScalarType::Interval.nullable(false))
2234        };
2235
2236        let relation = ReprRelationType::new(vec![ReprScalarType::Timestamp.nullable(false)]);
2237
2238        // (a) `t_col - INTERVAL '1' day < 2024-01-15`, with `t_col` ranging
2239        // over `[2024-01-15, 2024-01-20]`. With the days-only interval, the
2240        // subtraction is monotone, so endpoints alone determine the output:
2241        // [2024-01-14, 2024-01-19]. Only `2024-01-14` satisfies `< 2024-01-15`,
2242        // so both True and False are reachable.
2243        {
2244            let expr = MirScalarExpr::column(0)
2245                .call_binary(interval_lit(0, 1, 0), SubTimestampInterval)
2246                .call_binary(
2247                    MirScalarExpr::Literal(
2248                        Ok({
2249                            let mut r = Row::default();
2250                            r.packer().push(ts("2024-01-15T00:00:00"));
2251                            r
2252                        }),
2253                        ReprScalarType::Timestamp.nullable(false),
2254                    ),
2255                    Lt,
2256                );
2257            let mut interpreter = ColumnSpecs::new(&relation, &arena);
2258            interpreter.push_column(
2259                0,
2260                ResultSpec::value_between(ts("2024-01-15T00:00:00"), ts("2024-01-20T00:00:00")),
2261            );
2262            let range_out = interpreter.expr(&expr).range;
2263            assert!(
2264                range_out.may_contain(Datum::True),
2265                "day-only interval should preserve tight bounds",
2266            );
2267            assert!(
2268                range_out.may_contain(Datum::False),
2269                "day-only interval should preserve tight bounds",
2270            );
2271        }
2272
2273        // (b) Same predicate, but with `t_col` strictly *after* the literal:
2274        // `[2024-01-17, 2024-01-20]`. Output of `t - 1 day`:
2275        // `[2024-01-16, 2024-01-19]`, none of which is `< 2024-01-15`. The
2276        // interpreter must rule out `True`.
2277        {
2278            let expr = MirScalarExpr::column(0)
2279                .call_binary(interval_lit(0, 1, 0), SubTimestampInterval)
2280                .call_binary(
2281                    MirScalarExpr::Literal(
2282                        Ok({
2283                            let mut r = Row::default();
2284                            r.packer().push(ts("2024-01-15T00:00:00"));
2285                            r
2286                        }),
2287                        ReprScalarType::Timestamp.nullable(false),
2288                    ),
2289                    Lt,
2290                );
2291            let mut interpreter = ColumnSpecs::new(&relation, &arena);
2292            interpreter.push_column(
2293                0,
2294                ResultSpec::value_between(ts("2024-01-17T00:00:00"), ts("2024-01-20T00:00:00")),
2295            );
2296            let range_out = interpreter.expr(&expr).range;
2297            assert!(
2298                !range_out.may_contain(Datum::True),
2299                "day-only interval should narrow out impossible matches",
2300            );
2301        }
2302
2303        // (c) With a *month*-bearing literal interval, the operation is no
2304        // longer monotone (day-clamping), so the dynamic-monotonicity handler
2305        // must fall back to `anything()` — the interpreter cannot rule out
2306        // either outcome even when the column range is narrow.
2307        {
2308            let expr = MirScalarExpr::column(0)
2309                .call_binary(interval_lit(1, 0, 0), SubTimestampInterval)
2310                .call_binary(
2311                    MirScalarExpr::Literal(
2312                        Ok({
2313                            let mut r = Row::default();
2314                            r.packer().push(ts("2024-01-15T00:00:00"));
2315                            r
2316                        }),
2317                        ReprScalarType::Timestamp.nullable(false),
2318                    ),
2319                    Lt,
2320                );
2321            let mut interpreter = ColumnSpecs::new(&relation, &arena);
2322            interpreter.push_column(
2323                0,
2324                ResultSpec::value_between(ts("2024-01-17T00:00:00"), ts("2024-01-20T00:00:00")),
2325            );
2326            let range_out = interpreter.expr(&expr).range;
2327            assert!(
2328                range_out.may_contain(Datum::True),
2329                "month-bearing interval must conservatively admit True",
2330            );
2331            assert!(
2332                range_out.may_contain(Datum::False),
2333                "month-bearing interval must conservatively admit False",
2334            );
2335        }
2336    }
2337
2338    /// Proptest companion to [`test_timestamp_plus_interval_dynamic_monotone`]:
2339    /// the dynamic-monotonicity handler in [`AbstractFunc`] claims that
2340    /// `add_timestamp_interval(t, i)` is monotone in `t` whenever `i.months == 0`
2341    /// (the only case it actually claims monotonicity for at runtime: the
2342    /// matches above require the right argument to be a single value with
2343    /// `months == 0`). This proptest verifies that claim directly against the
2344    /// function impl by sampling random timestamps and zero-month intervals
2345    /// and checking that input ordering is preserved in the output.
2346    #[mz_ore::test]
2347    #[cfg_attr(miri, ignore)]
2348    fn proptest_timestamp_plus_interval_monotone_when_months_zero() {
2349        use mz_repr::adt::interval::Interval;
2350        use mz_repr::{Datum, RowArena, SqlScalarType, arb_datum_for_scalar};
2351        use proptest::prelude::*;
2352
2353        let timestamp_strat = || arb_datum_for_scalar(SqlScalarType::Timestamp { precision: None });
2354        // Lex order on `Interval` does *not* match total-microseconds order when
2355        // both days and micros vary independently (e.g. `{0, 0, 86_400_000_001}`
2356        // is lex-less than `{0, 1, 0}` but evaluates to a strictly larger
2357        // timestamp), so we only claim monotonicity for *fixed* zero-month
2358        // intervals — which is exactly what the DynamicMonotone handler does.
2359        // The proptest accordingly varies `t` with `i` held constant.
2360        let zero_month_interval_strat =
2361            (any::<i32>(), any::<i64>()).prop_map(|(days, micros)| Interval {
2362                months: 0,
2363                days,
2364                micros,
2365            });
2366
2367        let expr = MirScalarExpr::CallBinary {
2368            func: AddTimestampInterval.into(),
2369            expr1: Box::new(MirScalarExpr::column(0)),
2370            expr2: Box::new(MirScalarExpr::column(1)),
2371        };
2372        let arena = RowArena::new();
2373
2374        proptest!(|(
2375            t1 in timestamp_strat(),
2376            t2 in timestamp_strat(),
2377            i in zero_month_interval_strat,
2378        )| {
2379            let t1 = match t1 { PropDatum::Timestamp(t) => t, _ => unreachable!() };
2380            let t2 = match t2 { PropDatum::Timestamp(t) => t, _ => unreachable!() };
2381            let i = Datum::Interval(i);
2382            let r1 = expr.eval(&[Datum::Timestamp(t1), i], &arena);
2383            let r2 = expr.eval(&[Datum::Timestamp(t2), i], &arena);
2384            // Only compare when both calls succeed; the monotonicity claim
2385            // applies only within the success domain.
2386            if let (Ok(Datum::Timestamp(r1)), Ok(Datum::Timestamp(r2))) = (r1, r2) {
2387                prop_assert_eq!(t1.cmp(&t2), r1.cmp(&r2));
2388            }
2389        });
2390    }
2391
2392    /// Regression test for `date_bin_timestamp`, which is non-monotone in the
2393    /// `stride` argument: a larger stride can bin a source timestamp to an
2394    /// *earlier* result than a smaller stride, because the bin alignment to
2395    /// the unix epoch depends on the stride magnitude rather than on lex order.
2396    #[mz_ore::test]
2397    #[cfg_attr(miri, ignore)]
2398    fn test_date_bin_timestamp_non_monotone() {
2399        use chrono::NaiveDateTime;
2400        use mz_repr::adt::interval::Interval;
2401        use mz_repr::adt::timestamp::CheckedTimestamp;
2402        use mz_repr::{Datum, Row};
2403
2404        let arena = RowArena::new();
2405
2406        let ts_lit = |s: &str| {
2407            let mut row = Row::default();
2408            row.packer().push(Datum::Timestamp(
2409                CheckedTimestamp::from_timestamplike(
2410                    NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(),
2411                )
2412                .unwrap(),
2413            ));
2414            MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false))
2415        };
2416        let interval = |months: i32, days: i32, micros: i64| {
2417            Datum::Interval(Interval {
2418                months,
2419                days,
2420                micros,
2421            })
2422        };
2423
2424        // Expression: `date_bin(stride_col, 2024-01-01 12:00:00) > 2024-01-01 06:00:00`.
2425        // stride_col ranges over `[1 day, 2 days]`.
2426        //
2427        // Endpoint evaluations:
2428        //   1 day stride → bins to 2024-01-01 00:00:00
2429        //   2 day stride → bins to 2023-12-31 00:00:00
2430        //
2431        // Interior strides produce results *outside* that endpoint box. For
2432        // example, a 1.5-day stride (i.e. `{0 months, 1 day, 12 h micros}`,
2433        // which sorts between the two endpoints in lex order) bins
2434        // 2024-01-01 12:00:00 to exactly 2024-01-01 12:00:00 — well above the
2435        // endpoint maximum of 2024-01-01 00:00:00. With the buggy
2436        // `(true, true)` annotation, the interpreter narrows the output to
2437        // `[Dec 31 00:00, Jan 1 00:00]`, both of which are `<= Jan 1 06:00`,
2438        // so the predicate is wrongly proved `False`. With the non-monotone
2439        // fix the output is `anything()`, so `True` is correctly admitted.
2440        let expr = MirScalarExpr::column(0)
2441            .call_binary(ts_lit("2024-01-01T12:00:00"), DateBinTimestamp)
2442            .call_binary(ts_lit("2024-01-01T06:00:00"), Gt);
2443
2444        let relation = ReprRelationType::new(vec![ReprScalarType::Interval.nullable(false)]);
2445        let mut interpreter = ColumnSpecs::new(&relation, &arena);
2446        interpreter.push_column(
2447            0,
2448            ResultSpec::value_between(interval(0, 1, 0), interval(0, 2, 0)),
2449        );
2450
2451        let range_out = interpreter.expr(&expr).range;
2452        assert!(
2453            range_out.may_contain(Datum::True),
2454            "date_bin is not monotone in the stride argument; \
2455             interior strides can produce outputs outside the endpoint-bounded \
2456             box, so the interpreter must admit True for `>`-style predicates",
2457        );
2458    }
2459
2460    #[mz_ore::test]
2461    fn test_trace() {
2462        use super::Trace;
2463
2464        let expr = MirScalarExpr::column(0).call_binary(
2465            MirScalarExpr::column(1)
2466                .call_binary(MirScalarExpr::column(3).call_unary(NegInt64), AddInt64),
2467            Gte,
2468        );
2469        let summary = Trace.expr(&expr);
2470        assert!(summary.pushdownable());
2471    }
2472}
mz_expr/interpret.rs

mz_expr/
interpret.rs