differential_dataflow/operators/
consolidate.rs

1//! Aggregates the weights of equal records into at most one record.
2//!
3//! As differential dataflow streams are unordered and taken to be the accumulation of all records,
4//! no semantic change happens via `consolidate`. However, there is a practical difference between
5//! a collection that aggregates down to zero records, and one that actually has no records. The
6//! underlying system can more clearly see that no work must be done in the later case, and we can
7//! drop out of, e.g. iterative computations.
8
9use timely::dataflow::Scope;
10
11use crate::{IntoOwned, Collection, ExchangeData, Hashable};
12use crate::consolidation::ConsolidatingContainerBuilder;
13use crate::difference::Semigroup;
14
15use crate::Data;
16use crate::lattice::Lattice;
17use crate::trace::{Batcher, Builder};
18
19/// Methods which require data be arrangeable.
20impl<G, D, R> Collection<G, D, R>
21where
22    G: Scope,
23    G::Timestamp: Data+Lattice,
24    D: ExchangeData+Hashable,
25    R: Semigroup+ExchangeData,
26{
27    /// Aggregates the weights of equal records into at most one record.
28    ///
29    /// This method uses the type `D`'s `hashed()` method to partition the data. The data are
30    /// accumulated in place, each held back until their timestamp has completed.
31    ///
32    /// # Examples
33    ///
34    /// ```
35    /// use differential_dataflow::input::Input;
36    ///
37    /// ::timely::example(|scope| {
38    ///
39    ///     let x = scope.new_collection_from(1 .. 10u32).1;
40    ///
41    ///     x.negate()
42    ///      .concat(&x)
43    ///      .consolidate() // <-- ensures cancellation occurs
44    ///      .assert_empty();
45    /// });
46    /// ```
47    pub fn consolidate(&self) -> Self {
48        use crate::trace::implementations::{KeyBatcher, KeyBuilder, KeySpine};
49        self.consolidate_named::<KeyBatcher<_, _, _>,KeyBuilder<_,_,_>, KeySpine<_,_,_>>("Consolidate")
50    }
51
52    /// As `consolidate` but with the ability to name the operator and specify the trace type.
53    pub fn consolidate_named<Ba, Bu, Tr>(&self, name: &str) -> Self
54    where
55        Ba: Batcher<Input=Vec<((D,()),G::Timestamp,R)>, Time=G::Timestamp> + 'static,
56        Tr: crate::trace::Trace<Time=G::Timestamp,Diff=R>+'static,
57        for<'a> Tr::Key<'a>: IntoOwned<'a, Owned = D>,
58        Tr::Batch: crate::trace::Batch,
59        Bu: Builder<Time=Tr::Time, Input=Ba::Output, Output=Tr::Batch>,
60    {
61        use crate::operators::arrange::arrangement::Arrange;
62        self.map(|k| (k, ()))
63            .arrange_named::<Ba, Bu, Tr>(name)
64            .as_collection(|d, _| d.into_owned())
65    }
66
67    /// Aggregates the weights of equal records.
68    ///
69    /// Unlike `consolidate`, this method does not exchange data and does not
70    /// ensure that at most one copy of each `(data, time)` pair exists in the
71    /// results. Instead, it acts on each batch of data and collapses equivalent
72    /// `(data, time)` pairs found therein, suppressing any that accumulate to
73    /// zero.
74    ///
75    /// # Examples
76    ///
77    /// ```
78    /// use differential_dataflow::input::Input;
79    ///
80    /// ::timely::example(|scope| {
81    ///
82    ///     let x = scope.new_collection_from(1 .. 10u32).1;
83    ///
84    ///     // nothing to assert, as no particular guarantees.
85    ///     x.negate()
86    ///      .concat(&x)
87    ///      .consolidate_stream();
88    /// });
89    /// ```
90    pub fn consolidate_stream(&self) -> Self {
91
92        use timely::dataflow::channels::pact::Pipeline;
93        use timely::dataflow::operators::Operator;
94        use crate::collection::AsCollection;
95
96        self.inner
97            .unary::<ConsolidatingContainerBuilder<_>, _, _, _>(Pipeline, "ConsolidateStream", |_cap, _info| {
98
99                move |input, output| {
100                    input.for_each(|time, data| {
101                        output.session_with_builder(&time).give_iterator(data.drain(..));
102                    })
103                }
104            })
105            .as_collection()
106    }
107}