Trait differential_dataflow::operators::join::Join
source · pub trait Join<G: Scope, K: Data, V: Data, R: Semigroup> {
// Required methods
fn join_map<V2, R2, D, L>(
&self,
other: &Collection<G, (K, V2), R2>,
logic: L,
) -> Collection<G, D, <R as Multiply<R2>>::Output>
where K: ExchangeData,
V2: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2>,
<R as Multiply<R2>>::Output: Semigroup + 'static,
D: Data,
L: FnMut(&K, &V, &V2) -> D + 'static;
fn semijoin<R2>(
&self,
other: &Collection<G, K, R2>,
) -> Collection<G, (K, V), <R as Multiply<R2>>::Output>
where K: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2>,
<R as Multiply<R2>>::Output: Semigroup + 'static;
fn antijoin<R2>(
&self,
other: &Collection<G, K, R2>,
) -> Collection<G, (K, V), R>
where K: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2, Output = R> + Abelian + 'static;
// Provided method
fn join<V2, R2>(
&self,
other: &Collection<G, (K, V2), R2>,
) -> Collection<G, (K, (V, V2)), <R as Multiply<R2>>::Output>
where K: ExchangeData,
V2: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2>,
<R as Multiply<R2>>::Output: Semigroup + 'static { ... }
}
Expand description
Join implementations for (key,val)
data.
Required Methods§
sourcefn join_map<V2, R2, D, L>(
&self,
other: &Collection<G, (K, V2), R2>,
logic: L,
) -> Collection<G, D, <R as Multiply<R2>>::Output>where
K: ExchangeData,
V2: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2>,
<R as Multiply<R2>>::Output: Semigroup + 'static,
D: Data,
L: FnMut(&K, &V, &V2) -> D + 'static,
fn join_map<V2, R2, D, L>(
&self,
other: &Collection<G, (K, V2), R2>,
logic: L,
) -> Collection<G, D, <R as Multiply<R2>>::Output>where
K: ExchangeData,
V2: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2>,
<R as Multiply<R2>>::Output: Semigroup + 'static,
D: Data,
L: FnMut(&K, &V, &V2) -> D + 'static,
Matches pairs (key,val1)
and (key,val2)
based on key
and then applies a function.
§Examples
use differential_dataflow::input::Input;
use differential_dataflow::operators::Join;
::timely::example(|scope| {
let x = scope.new_collection_from(vec![(0, 1), (1, 3)]).1;
let y = scope.new_collection_from(vec![(0, 'a'), (1, 'b')]).1;
let z = scope.new_collection_from(vec![(1, 'a'), (3, 'b')]).1;
x.join_map(&y, |_key, &a, &b| (a,b))
.assert_eq(&z);
});
sourcefn semijoin<R2>(
&self,
other: &Collection<G, K, R2>,
) -> Collection<G, (K, V), <R as Multiply<R2>>::Output>where
K: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2>,
<R as Multiply<R2>>::Output: Semigroup + 'static,
fn semijoin<R2>(
&self,
other: &Collection<G, K, R2>,
) -> Collection<G, (K, V), <R as Multiply<R2>>::Output>where
K: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2>,
<R as Multiply<R2>>::Output: Semigroup + 'static,
Matches pairs (key, val)
and key
based on key
, producing the former with frequencies multiplied.
When the second collection contains frequencies that are either zero or one this is the more traditional relational semijoin. When the second collection may contain multiplicities, this operation may scale up the counts of the records in the first input.
§Examples
use differential_dataflow::input::Input;
use differential_dataflow::operators::Join;
::timely::example(|scope| {
let x = scope.new_collection_from(vec![(0, 1), (1, 3)]).1;
let y = scope.new_collection_from(vec![0, 2]).1;
let z = scope.new_collection_from(vec![(0, 1)]).1;
x.semijoin(&y)
.assert_eq(&z);
});
sourcefn antijoin<R2>(&self, other: &Collection<G, K, R2>) -> Collection<G, (K, V), R>where
K: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2, Output = R> + Abelian + 'static,
fn antijoin<R2>(&self, other: &Collection<G, K, R2>) -> Collection<G, (K, V), R>where
K: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2, Output = R> + Abelian + 'static,
Subtracts the semijoin with other
from self
.
In the case that other
has multiplicities zero or one this results
in a relational antijoin, in which we discard input records whose key
is present in other
. If the multiplicities could be other than zero
or one, the semantic interpretation of this operator is less clear.
In almost all cases, you should ensure that other
has multiplicities
that are zero or one, perhaps by using the distinct
operator.
§Examples
use differential_dataflow::input::Input;
use differential_dataflow::operators::Join;
::timely::example(|scope| {
let x = scope.new_collection_from(vec![(0, 1), (1, 3)]).1;
let y = scope.new_collection_from(vec![0, 2]).1;
let z = scope.new_collection_from(vec![(1, 3)]).1;
x.antijoin(&y)
.assert_eq(&z);
});
Provided Methods§
sourcefn join<V2, R2>(
&self,
other: &Collection<G, (K, V2), R2>,
) -> Collection<G, (K, (V, V2)), <R as Multiply<R2>>::Output>where
K: ExchangeData,
V2: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2>,
<R as Multiply<R2>>::Output: Semigroup + 'static,
fn join<V2, R2>(
&self,
other: &Collection<G, (K, V2), R2>,
) -> Collection<G, (K, (V, V2)), <R as Multiply<R2>>::Output>where
K: ExchangeData,
V2: ExchangeData,
R2: ExchangeData + Semigroup,
R: Multiply<R2>,
<R as Multiply<R2>>::Output: Semigroup + 'static,
Matches pairs (key,val1)
and (key,val2)
based on key
and yields pairs (key, (val1, val2))
.
The join_map
method may be more convenient for non-trivial processing pipelines.
§Examples
use differential_dataflow::input::Input;
use differential_dataflow::operators::Join;
::timely::example(|scope| {
let x = scope.new_collection_from(vec![(0, 1), (1, 3)]).1;
let y = scope.new_collection_from(vec![(0, 'a'), (1, 'b')]).1;
let z = scope.new_collection_from(vec![(0, (1, 'a')), (1, (3, 'b'))]).1;
x.join(&y)
.assert_eq(&z);
});