timely/dataflow/operators/core/
partition.rs

1//! Partition a stream of records into multiple streams.
2
3use crate::container::{DrainContainer, ContainerBuilder, PushInto};
4use crate::dataflow::channels::pact::Pipeline;
5use crate::dataflow::operators::generic::builder_rc::OperatorBuilder;
6use crate::dataflow::operators::InputCapability;
7use crate::dataflow::{Scope, StreamCore};
8use crate::Container;
9
10/// Partition a stream of records into multiple streams.
11pub trait Partition<G: Scope, C: DrainContainer> {
12    /// Produces `parts` output streams, containing records produced and assigned by `route`.
13    ///
14    /// # Examples
15    /// ```
16    /// use timely::dataflow::operators::ToStream;
17    /// use timely::dataflow::operators::core::{Partition, Inspect};
18    /// use timely_container::CapacityContainerBuilder;
19    ///
20    /// timely::example(|scope| {
21    ///     let streams = (0..10).to_stream(scope)
22    ///                          .partition::<CapacityContainerBuilder<Vec<_>>, _, _>(3, |x| (x % 3, x));
23    ///
24    ///     for (idx, stream) in streams.into_iter().enumerate() {
25    ///         stream
26    ///             .inspect(move |x| println!("seen {idx}: {x:?}"));
27    ///     }
28    /// });
29    /// ```
30    fn partition<CB, D2, F>(&self, parts: u64, route: F) -> Vec<StreamCore<G, CB::Container>>
31    where
32        CB: ContainerBuilder + PushInto<D2>,
33        F: FnMut(C::Item<'_>) -> (u64, D2) + 'static;
34}
35
36impl<G: Scope, C: Container + DrainContainer> Partition<G, C> for StreamCore<G, C> {
37    fn partition<CB, D2, F>(&self, parts: u64, mut route: F) -> Vec<StreamCore<G, CB::Container>>
38    where
39        CB: ContainerBuilder + PushInto<D2>,
40        F: FnMut(C::Item<'_>) -> (u64, D2) + 'static,
41    {
42        let mut builder = OperatorBuilder::new("Partition".to_owned(), self.scope());
43        builder.set_notify(false);
44
45        let mut input = builder.new_input(self, Pipeline);
46        let mut outputs = Vec::with_capacity(parts as usize);
47        let mut streams = Vec::with_capacity(parts as usize);
48
49        for _ in 0..parts {
50            let (output, stream) = builder.new_output::<CB>();
51            outputs.push(output);
52            streams.push(stream);
53        }
54
55        builder.build(move |_| {
56            let mut todo = vec![];
57            move |_frontiers| {
58                let mut handles = outputs.iter_mut().map(|o| o.activate()).collect::<Vec<_>>();
59
60                // The capability associated with each session in `sessions`.
61                let mut sessions_cap: Option<InputCapability<G::Timestamp>> = None;
62                let mut sessions = vec![];
63
64                while let Some((cap, data)) = input.next() {
65                    todo.push((cap, std::mem::take(data)));
66                }
67                todo.sort_unstable_by(|a, b| a.0.cmp(&b.0));
68
69                for (cap, mut data) in todo.drain(..) {
70                    if sessions_cap.as_ref().map_or(true, |s_cap| s_cap.time() != cap.time()) {
71                        sessions = handles.iter_mut().map(|h| (None, Some(h))).collect();
72                        sessions_cap = Some(cap);
73                    }
74                    for datum in data.drain() {
75                        let (part, datum2) = route(datum);
76
77                        let session = match sessions[part as usize] {
78                            (Some(ref mut s), _) => s,
79                            (ref mut session_slot, ref mut handle) => {
80                                let handle = handle.take().unwrap();
81                                let session = handle.session_with_builder(sessions_cap.as_ref().unwrap());
82                                session_slot.insert(session)
83                            }
84                        };
85                        session.give(datum2);
86                    }
87                }
88            }
89        });
90
91        streams
92    }
93}