mz_compute_types/plan/threshold.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Threshold planning logic.
11//!
12//! The threshold operator produces only rows with a positive cardinality, for example required to
13//! provide SQL except and intersect semantics.
14//!
15//! We build a plan ([ThresholdPlan]) encapsulating all decisions and requirements on the specific
16//! threshold implementation. The idea is to decouple the logic deciding which plan to select from
17//! the actual implementation of each variant available.
18//!
19//! Currently, we provide two variants:
20//! * The [BasicThresholdPlan] maintains all its outputs as an arrangement. It is beneficial if the
21//! threshold is the final operation, or a downstream operators expects arranged inputs.
22//! * The [RetractionsThresholdPlan] maintains retractions, i.e. rows that are not in the output. It
23//! is beneficial to use this operator if the number of retractions is expected to be small, and
24//! if a potential downstream operator does not expect its input to be arranged.
25
26use mz_expr::{MirScalarExpr, permutation_for_arrangement};
27use serde::{Deserialize, Serialize};
28
29use crate::plan::AvailableCollections;
30
31/// A plan describing how to compute a threshold operation.
32#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
33pub enum ThresholdPlan {
34 /// Basic threshold maintains all positive inputs.
35 Basic(BasicThresholdPlan),
36}
37
38impl ThresholdPlan {
39 /// Reports all keys of produced arrangements, with optionally
40 /// given types describing the rows that would be in the raw
41 /// form of the collection.
42 ///
43 /// This is likely either an empty vector, for no arrangement,
44 /// or a singleton vector containing the list of expressions
45 /// that key a single arrangement.
46 pub fn keys(&self) -> AvailableCollections {
47 match self {
48 ThresholdPlan::Basic(plan) => {
49 AvailableCollections::new_arranged(vec![plan.ensure_arrangement.clone()])
50 }
51 }
52 }
53}
54
55/// A plan to maintain all inputs with positive counts.
56#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
57pub struct BasicThresholdPlan {
58 /// Description of how the input has been arranged, and how to arrange the output
59 pub ensure_arrangement: (Vec<MirScalarExpr>, Vec<usize>, Vec<usize>),
60}
61
62/// A plan to maintain all inputs with negative counts, which are subtracted from the output
63/// in order to maintain an equivalent collection compared to [BasicThresholdPlan].
64#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
65pub struct RetractionsThresholdPlan {
66 /// Description of how the input has been arranged
67 pub ensure_arrangement: (Vec<MirScalarExpr>, Vec<usize>, Vec<usize>),
68}
69
70impl ThresholdPlan {
71 /// Construct the plan from the number of columns (`arity`).
72 ///
73 /// Also returns the arrangement and thinning required for the input.
74 pub fn create_from(arity: usize) -> (Self, (Vec<MirScalarExpr>, Vec<usize>, Vec<usize>)) {
75 // Arrange the input by all columns in order.
76 let mut all_columns = Vec::new();
77 for column in 0..arity {
78 all_columns.push(mz_expr::MirScalarExpr::column(column));
79 }
80 let (permutation, thinning) = permutation_for_arrangement(&all_columns, arity);
81 let ensure_arrangement = (all_columns, permutation, thinning);
82 let plan = ThresholdPlan::Basic(BasicThresholdPlan {
83 ensure_arrangement: ensure_arrangement.clone(),
84 });
85 (plan, ensure_arrangement)
86 }
87}