Skip to main content

timely/dataflow/operators/core/
partition.rs

1//! Partition a stream of records into multiple streams.
2use std::collections::BTreeMap;
3
4use crate::container::{DrainContainer, PushInto};
5use crate::dataflow::channels::pact::Pipeline;
6use crate::dataflow::operators::generic::builder_rc::OperatorBuilder;
7use crate::dataflow::{Scope, Stream};
8use crate::{Container, ContainerBuilder};
9
10/// Partition a stream of records into multiple streams.
11pub trait Partition<G: Scope, C: DrainContainer> {
12    /// Produces `parts` output streams, containing records produced and assigned by `route`.
13    ///
14    /// # Examples
15    /// ```
16    /// use timely::dataflow::operators::{ToStream, Inspect};
17    /// use timely::dataflow::operators::core::Partition;
18    /// use timely_container::CapacityContainerBuilder;
19    ///
20    /// timely::example(|scope| {
21    ///     let streams = (0..10).to_stream(scope)
22    ///                          .container::<Vec<_>>()
23    ///                          .partition::<CapacityContainerBuilder<Vec<_>>, _, _>(3, |x| (x % 3, x));
24    ///
25    ///     for (idx, stream) in streams.into_iter().enumerate() {
26    ///         stream
27    ///             .inspect(move |x| println!("seen {idx}: {x:?}"));
28    ///     }
29    /// });
30    /// ```
31    fn partition<CB, D2, F>(self, parts: u64, route: F) -> Vec<Stream<G, CB::Container>>
32    where
33        CB: ContainerBuilder + PushInto<D2>,
34        F: FnMut(C::Item<'_>) -> (u64, D2) + 'static;
35}
36
37impl<G: Scope, C: Container + DrainContainer> Partition<G, C> for Stream<G, C> {
38    fn partition<CB, D2, F>(self, parts: u64, mut route: F) -> Vec<Stream<G, CB::Container>>
39    where
40        CB: ContainerBuilder + PushInto<D2>,
41        F: FnMut(C::Item<'_>) -> (u64, D2) + 'static,
42    {
43        let mut builder = OperatorBuilder::new("Partition".to_owned(), self.scope());
44        builder.set_notify(false);
45
46        let mut input = builder.new_input(self, Pipeline);
47        let mut outputs = Vec::with_capacity(parts as usize);
48        let mut streams = Vec::with_capacity(parts as usize);
49
50        let mut c_build = CB::default();
51
52        for _ in 0..parts {
53            let (output, stream) = builder.new_output::<CB::Container>();
54            outputs.push(output);
55            streams.push(stream);
56        }
57
58        builder.build(move |_| {
59            move |_frontiers| {
60                let mut handles = outputs.iter_mut().map(|o| o.activate()).collect::<Vec<_>>();
61                let mut targets = BTreeMap::<u64,Vec<_>>::default();
62                input.for_each_time(|time, data| {
63                    // Sort data by intended output.
64                    for datum in data.flat_map(|d| d.drain()) {
65                        let (part, datum) = route(datum);
66                        targets.entry(part).or_default().push(datum);
67                    }
68                    // Form each intended output into a container and ship.
69                    while let Some((part, data)) = targets.pop_first() {
70                        for datum in data.into_iter() {
71                            c_build.push_into(datum);
72                            while let Some(container) = c_build.extract() {
73                                handles[part as usize].give(&time, container);
74                            }
75                        }
76                        while let Some(container) = c_build.finish() {
77                            handles[part as usize].give(&time, container);
78                        }
79                    }
80                });
81            }
82        });
83
84        streams
85    }
86}