mz_compute_types/plan/
threshold.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Threshold planning logic.
11//!
12//! The threshold operator produces only rows with a positive cardinality, for example required to
13//! provide SQL except and intersect semantics.
14//!
15//! We build a plan ([ThresholdPlan]) encapsulating all decisions and requirements on the specific
16//! threshold implementation. The idea is to decouple the logic deciding which plan to select from
17//! the actual implementation of each variant available.
18//!
19//! Currently, we provide two variants:
20//! * The [BasicThresholdPlan] maintains all its outputs as an arrangement. It is beneficial if the
21//!     threshold is the final operation, or a downstream operators expects arranged inputs.
22//! * The [RetractionsThresholdPlan] maintains retractions, i.e. rows that are not in the output. It
23//!     is beneficial to use this operator if the number of retractions is expected to be small, and
24//!     if a potential downstream operator does not expect its input to be arranged.
25
26use mz_expr::{MirScalarExpr, permutation_for_arrangement};
27use serde::{Deserialize, Serialize};
28
29use crate::plan::AvailableCollections;
30
31/// A plan describing how to compute a threshold operation.
32#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
33pub enum ThresholdPlan {
34    /// Basic threshold maintains all positive inputs.
35    Basic(BasicThresholdPlan),
36}
37
38impl ThresholdPlan {
39    /// Reports all keys of produced arrangements, with optionally
40    /// given types describing the rows that would be in the raw
41    /// form of the collection.
42    ///
43    /// This is likely either an empty vector, for no arrangement,
44    /// or a singleton vector containing the list of expressions
45    /// that key a single arrangement.
46    pub fn keys(&self) -> AvailableCollections {
47        match self {
48            ThresholdPlan::Basic(plan) => {
49                AvailableCollections::new_arranged(vec![plan.ensure_arrangement.clone()])
50            }
51        }
52    }
53}
54
55/// A plan to maintain all inputs with positive counts.
56#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
57pub struct BasicThresholdPlan {
58    /// Description of how the input has been arranged, and how to arrange the output
59    pub ensure_arrangement: (Vec<MirScalarExpr>, Vec<usize>, Vec<usize>),
60}
61
62/// A plan to maintain all inputs with negative counts, which are subtracted from the output
63/// in order to maintain an equivalent collection compared to [BasicThresholdPlan].
64#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
65pub struct RetractionsThresholdPlan {
66    /// Description of how the input has been arranged
67    pub ensure_arrangement: (Vec<MirScalarExpr>, Vec<usize>, Vec<usize>),
68}
69
70impl ThresholdPlan {
71    /// Construct the plan from the number of columns (`arity`).
72    ///
73    /// Also returns the arrangement and thinning required for the input.
74    pub fn create_from(arity: usize) -> (Self, (Vec<MirScalarExpr>, Vec<usize>, Vec<usize>)) {
75        // Arrange the input by all columns in order.
76        let mut all_columns = Vec::new();
77        for column in 0..arity {
78            all_columns.push(mz_expr::MirScalarExpr::column(column));
79        }
80        let (permutation, thinning) = permutation_for_arrangement(&all_columns, arity);
81        let ensure_arrangement = (all_columns, permutation, thinning);
82        let plan = ThresholdPlan::Basic(BasicThresholdPlan {
83            ensure_arrangement: ensure_arrangement.clone(),
84        });
85        (plan, ensure_arrangement)
86    }
87}