Skip to main content

timely/dataflow/operators/core/
unordered_input.rs

1//! Create new `Stream`s connected to external inputs.
2
3use std::rc::Rc;
4use std::cell::RefCell;
5
6use crate::ContainerBuilder;
7
8use crate::scheduling::{Schedule, ActivateOnDrop};
9
10use crate::progress::{Operate, operate::SharedProgress, Timestamp};
11use crate::progress::Source;
12use crate::progress::ChangeBatch;
13use crate::progress::operate::Connectivity;
14use crate::dataflow::channels::pushers::{Counter, Output, Tee};
15use crate::dataflow::operators::generic::{OutputBuilder, OutputBuilderSession};
16use crate::dataflow::operators::{ActivateCapability, Capability};
17use crate::dataflow::{Scope, Stream};
18
19use crate::scheduling::Activations;
20
21/// Create a new `Stream` and `Handle` through which to supply input.
22pub trait UnorderedInput<G: Scope> {
23    /// Create a new capability-based [Stream] and [UnorderedHandle] through which to supply input. This
24    /// input supports multiple open epochs (timestamps) at the same time.
25    ///
26    /// The `new_unordered_input_core` method returns `((HandleCore, Capability), Stream)` where the `Stream` can be used
27    /// immediately for timely dataflow construction, `HandleCore` and `Capability` are later used to introduce
28    /// data into the timely dataflow computation.
29    ///
30    /// The `Capability` returned is for the default value of the timestamp type in use. The
31    /// capability can be dropped to inform the system that the input has advanced beyond the
32    /// capability's timestamp. To retain the ability to send, a new capability at a later timestamp
33    /// should be obtained first, via the `delayed` function for `Capability`.
34    ///
35    /// To communicate the end-of-input drop all available capabilities.
36    ///
37    /// # Examples
38    ///
39    /// ```
40    /// use std::sync::{Arc, Mutex};
41    ///
42    /// use timely::*;
43    /// use timely::dataflow::operators::{capture::Extract, Capture};
44    /// use timely::dataflow::operators::core::{UnorderedInput};
45    ///
46    /// // get send and recv endpoints, wrap send to share
47    /// let (send, recv) = ::std::sync::mpsc::channel();
48    /// let send = Arc::new(Mutex::new(send));
49    ///
50    /// timely::execute(Config::thread(), move |worker| {
51    ///
52    ///     // this is only to validate the output.
53    ///     let send = send.lock().unwrap().clone();
54    ///
55    ///     // create and capture the unordered input.
56    ///     let (mut input, mut cap) = worker.dataflow::<usize,_,_>(|scope| {
57    ///         let (input, stream) = scope.new_unordered_input();
58    ///         stream
59    ///             .container::<Vec<_>>()
60    ///             .capture_into(send);
61    ///         input
62    ///     });
63    ///
64    ///     // feed values 0..10 at times 0..10.
65    ///     for round in 0..10 {
66    ///         input.activate().session(&cap).give(round);
67    ///         cap = cap.delayed(&(round + 1));
68    ///         worker.step();
69    ///     }
70    /// }).unwrap();
71    ///
72    /// let extract = recv.extract();
73    /// for i in 0..10 {
74    ///     assert_eq!(extract[i], (i, vec![i]));
75    /// }
76    /// ```
77    fn new_unordered_input<CB: ContainerBuilder>(&mut self) -> ((UnorderedHandle<G::Timestamp, CB>, ActivateCapability<G::Timestamp>), Stream<G, CB::Container>);
78}
79
80impl<G: Scope> UnorderedInput<G> for G {
81    fn new_unordered_input<CB: ContainerBuilder>(&mut self) -> ((UnorderedHandle<G::Timestamp, CB>, ActivateCapability<G::Timestamp>), Stream<G, CB::Container>) {
82
83        let (output, registrar) = Tee::<G::Timestamp, CB::Container>::new();
84        let internal = Rc::new(RefCell::new(ChangeBatch::new()));
85        // let produced = Rc::new(RefCell::new(ChangeBatch::new()));
86        let cap = Capability::new(G::Timestamp::minimum(), Rc::clone(&internal));
87        let counter = Counter::new(output);
88        let produced = Rc::clone(counter.produced());
89        let counter = Output::new(counter, Rc::clone(&internal), 0);
90        let peers = self.peers();
91
92        let index = self.allocate_operator_index();
93        let address = self.addr_for_child(index);
94
95        let cap = ActivateCapability::new(cap, Rc::clone(&address), self.activations());
96
97        let helper = UnorderedHandle::new(counter, Rc::clone(&address), self.activations());
98
99        self.add_operator_with_index(Box::new(UnorderedOperator {
100            name: "UnorderedInput".to_owned(),
101            address,
102            shared_progress: Rc::new(RefCell::new(SharedProgress::new(0, 1))),
103            internal,
104            produced,
105            peers,
106        }), index);
107
108        ((helper, cap), Stream::new(Source::new(index, 0), registrar, self.clone()))
109    }
110}
111
112struct UnorderedOperator<T:Timestamp> {
113    name: String,
114    address: Rc<[usize]>,
115    shared_progress: Rc<RefCell<SharedProgress<T>>>,
116    internal:   Rc<RefCell<ChangeBatch<T>>>,
117    produced:   Rc<RefCell<ChangeBatch<T>>>,
118    peers:     usize,
119}
120
121impl<T:Timestamp> Schedule for UnorderedOperator<T> {
122    fn name(&self) -> &str { &self.name }
123    fn path(&self) -> &[usize] { &self.address[..] }
124    fn schedule(&mut self) -> bool {
125        let shared_progress = &mut *self.shared_progress.borrow_mut();
126        self.internal.borrow_mut().drain_into(&mut shared_progress.internals[0]);
127        self.produced.borrow_mut().drain_into(&mut shared_progress.produceds[0]);
128        false
129    }
130}
131
132impl<T:Timestamp> Operate<T> for UnorderedOperator<T> {
133    fn inputs(&self) -> usize { 0 }
134    fn outputs(&self) -> usize { 1 }
135
136    fn get_internal_summary(&mut self) -> (Connectivity<<T as Timestamp>::Summary>, Rc<RefCell<SharedProgress<T>>>) {
137        let mut borrow = self.internal.borrow_mut();
138        for (time, count) in borrow.drain() {
139            self.shared_progress.borrow_mut().internals[0].update(time, count * (self.peers as i64));
140        }
141        (Vec::new(), Rc::clone(&self.shared_progress))
142    }
143
144    fn notify_me(&self) -> bool { false }
145}
146
147/// A handle to an input [Stream], used to introduce data to a timely dataflow computation.
148pub struct UnorderedHandle<T: Timestamp, CB: ContainerBuilder> {
149    output: OutputBuilder<T, CB>,
150    address: Rc<[usize]>,
151    activations: Rc<RefCell<Activations>>,
152}
153
154impl<T: Timestamp, CB: ContainerBuilder> UnorderedHandle<T, CB> {
155    fn new(output: Output<T, CB::Container>, address: Rc<[usize]>, activations: Rc<RefCell<Activations>>) -> Self {
156        Self {
157            output: OutputBuilder::from(output),
158            address,
159            activations,
160        }
161    }
162
163    /// Allocates a new automatically flushing session based on the supplied capability.
164    #[inline]
165    pub fn activate(&mut self) -> ActivateOnDrop<OutputBuilderSession<'_, T, CB>> {
166        ActivateOnDrop::new(self.output.activate(), Rc::clone(&self.address), Rc::clone(&self.activations))
167    }
168}