Skip to main content

mz_compute/render/
threshold.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Threshold execution logic.
11//!
12//! Consult [ThresholdPlan] documentation for details.
13
14use differential_dataflow::Data;
15use differential_dataflow::operators::arrange::{Arranged, TraceAgent};
16use differential_dataflow::trace::implementations::BatchContainer;
17use differential_dataflow::trace::implementations::merge_batcher::container::InternalMerge;
18use differential_dataflow::trace::{Builder, Trace, TraceReader};
19use mz_compute_types::plan::threshold::{BasicThresholdPlan, ThresholdPlan};
20use mz_expr::MirScalarExpr;
21use mz_repr::Diff;
22use timely::Container;
23use timely::container::PushInto;
24
25use crate::extensions::arrange::{ArrangementSize, KeyCollection, MzArrange};
26use crate::extensions::reduce::{ClearContainer, MzReduce};
27use crate::render::RenderTimestamp;
28use crate::render::context::{ArrangementFlavor, CollectionBundle, Context};
29use crate::row_spine::RowRowBuilder;
30use crate::typedefs::{ErrBatcher, ErrBuilder, MzData, MzTimestamp};
31
32/// Shared function to compute an arrangement of values matching `logic`.
33fn threshold_arrangement<'scope, Ts, T1, Bu2, T2, L>(
34    arrangement: Arranged<'scope, T1>,
35    name: &str,
36    logic: L,
37) -> Arranged<'scope, TraceAgent<T2>>
38where
39    Ts: MzTimestamp,
40    T1: TraceReader<
41            KeyContainer: BatchContainer<Owned: MzData + Data>,
42            ValOwn: MzData + Data,
43            Time = Ts,
44            Diff = Diff,
45        > + Clone
46        + 'static,
47    Bu2: Builder<
48            Time = Ts,
49            Input: Container
50                       + InternalMerge
51                       + ClearContainer
52                       + PushInto<(
53                (<T1::KeyContainer as BatchContainer>::Owned, T1::ValOwn),
54                Ts,
55                Diff,
56            )>,
57            Output = T2::Batch,
58        >,
59    T2: for<'a> Trace<
60            Key<'a> = T1::Key<'a>,
61            Val<'a> = T1::Val<'a>,
62            KeyContainer: BatchContainer<Owned = <T1::KeyContainer as BatchContainer>::Owned>,
63            ValOwn = T1::ValOwn,
64            Time = Ts,
65            Diff = Diff,
66        > + 'static,
67    L: Fn(&Diff) -> bool + 'static,
68    Arranged<'scope, TraceAgent<T2>>: ArrangementSize,
69{
70    arrangement.mz_reduce_abelian::<_, Bu2, T2>(name, move |_key, s, t| {
71        for (record, count) in s.iter() {
72            if logic(count) {
73                t.push((T1::owned_val(*record), *count));
74            }
75        }
76    })
77}
78
79/// Build a dataflow to threshold the input data.
80///
81/// This implementation maintains rows in the output, i.e. all rows that have a count greater than
82/// zero. It returns a [CollectionBundle] populated from a local arrangement.
83pub fn build_threshold_basic<'scope, T: RenderTimestamp>(
84    input: CollectionBundle<'scope, T>,
85    key: Vec<MirScalarExpr>,
86) -> CollectionBundle<'scope, T> {
87    let arrangement = input
88        .arrangement(&key)
89        .expect("Arrangement ensured to exist");
90    match arrangement {
91        ArrangementFlavor::Local(oks, errs) => {
92            let oks = threshold_arrangement::<_, _, RowRowBuilder<_, _>, _, _>(
93                oks,
94                "Threshold local",
95                |count| count.is_positive(),
96            );
97            CollectionBundle::from_expressions(key, ArrangementFlavor::Local(oks, errs))
98        }
99        ArrangementFlavor::Trace(_, oks, errs) => {
100            let oks = threshold_arrangement::<_, _, RowRowBuilder<_, _>, _, _>(
101                oks,
102                "Threshold trace",
103                |count| count.is_positive(),
104            );
105            let errs: KeyCollection<_, _, _> = errs.as_collection(|k, _| k.clone()).into();
106            let errs = errs
107                .mz_arrange::<ErrBatcher<_, _>, ErrBuilder<_, _>, _>("Arrange threshold basic err");
108            CollectionBundle::from_expressions(key, ArrangementFlavor::Local(oks, errs))
109        }
110    }
111}
112
113impl<'scope, T: RenderTimestamp> Context<'scope, T> {
114    pub(crate) fn render_threshold(
115        &self,
116        input: CollectionBundle<'scope, T>,
117        threshold_plan: ThresholdPlan,
118    ) -> CollectionBundle<'scope, T> {
119        match threshold_plan {
120            ThresholdPlan::Basic(BasicThresholdPlan {
121                ensure_arrangement: (key, _, _),
122            }) => {
123                // We do not need to apply the permutation here,
124                // since threshold doesn't inspect the values, but only
125                // their counts.
126                build_threshold_basic(input, key)
127            }
128        }
129    }
130}