mz_compute/logging/
reachability.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Logging dataflows for events generated by timely dataflow.
11
12use std::collections::BTreeMap;
13use std::convert::TryInto;
14use std::rc::Rc;
15use std::time::Duration;
16
17use columnar::Index;
18use mz_compute_client::logging::LoggingConfig;
19use mz_ore::cast::CastFrom;
20use mz_repr::{Datum, Diff, Row, Timestamp};
21use mz_timely_util::columnar::builder::ColumnBuilder;
22use mz_timely_util::columnar::{Col2ValBatcher, Column, columnar_exchange};
23use mz_timely_util::replay::MzReplay;
24use timely::dataflow::Scope;
25use timely::dataflow::channels::pact::{ExchangeCore, Pipeline};
26use timely::dataflow::operators::Operator;
27use timely::dataflow::operators::generic::operator::empty;
28
29use crate::extensions::arrange::MzArrangeCore;
30use crate::logging::initialize::ReachabilityEvent;
31use crate::logging::{EventQueue, LogCollection, LogVariant, TimelyLog, consolidate_and_pack};
32use crate::row_spine::RowRowBuilder;
33use crate::typedefs::RowRowSpine;
34
35/// The return type of [`construct`].
36pub(super) struct Return {
37    /// Collections to export.
38    pub collections: BTreeMap<LogVariant, LogCollection>,
39}
40
41/// Constructs the logging dataflow fragment for reachability logs.
42///
43/// Params
44/// * `scope`: The Timely scope hosting the log analysis dataflow.
45/// * `config`: Logging configuration
46/// * `event_queue`: The source to read log events from.
47pub(super) fn construct<G: Scope<Timestamp = Timestamp>>(
48    mut scope: G,
49    config: &LoggingConfig,
50    event_queue: EventQueue<Column<(Duration, ReachabilityEvent)>, 3>,
51) -> Return {
52    let collections = scope.scoped("timely reachability logging", move |scope| {
53        let enable_logging = config.enable_logging;
54        let interval_ms = std::cmp::max(1, config.interval.as_millis());
55        type UpdatesKey = (bool, usize, usize, usize, Timestamp);
56
57        type CB = ColumnBuilder<((UpdatesKey, ()), Timestamp, Diff)>;
58        let (logs, token) = if enable_logging {
59            event_queue.links.mz_replay(
60                scope,
61                "reachability logs",
62                config.interval,
63                event_queue.activator,
64            )
65        } else {
66            let token: Rc<dyn std::any::Any> = Rc::new(Box::new(()));
67            (empty(scope), token)
68        };
69        let logs = logs.unary::<CB, _, _, _>(Pipeline, "FlatMapReachability", move |_,_| move |input, output| {
70            input.for_each_time(|time, data| {
71                output.session_with_builder(&time)
72                    .give_iterator(data.flat_map(|d|
73                        d.borrow().into_index_iter().flat_map(move |(time, (operator_id, massaged))| {
74                            let time_ms = ((time.as_millis() / interval_ms) + 1) * interval_ms;
75                            let time_ms: Timestamp = time_ms.try_into().expect("must fit");
76                            massaged.into_iter().map(move |(source, port, update_type, ts, diff)| {
77                                let datum = (update_type, operator_id, source, port, ts);
78                                ((datum, ()), time_ms, diff)
79                            })
80                        }
81                    )));
82            });
83        });
84
85        // Restrict results by those logs that are meant to be active.
86        let logs_active = [LogVariant::Timely(TimelyLog::Reachability)];
87        let worker_id = scope.index();
88
89        let updates = consolidate_and_pack::<_, Col2ValBatcher<UpdatesKey, _, _, _>, ColumnBuilder<_>, _, _>(
90            &logs,
91            TimelyLog::Reachability,
92            move |data, packer, session| {
93                for ((datum, ()), time, diff) in data.iter() {
94                    let (update_type, operator_id, source, port, ts) = datum;
95                    let update_type = if *update_type { "source" } else { "target" };
96                    let data = packer.pack_slice(&[
97                        Datum::UInt64(u64::cast_from(*operator_id)),
98                        Datum::UInt64(u64::cast_from(worker_id)),
99                        Datum::UInt64(u64::cast_from(*source)),
100                        Datum::UInt64(u64::cast_from(*port)),
101                        Datum::String(update_type),
102                        Datum::from(*ts),
103                    ]);
104                    session.give((data, time, diff));
105                }
106            }
107        );
108
109        let mut result = BTreeMap::new();
110        for variant in logs_active {
111            if config.index_logs.contains_key(&variant) {
112                let trace = updates
113                    .mz_arrange_core::<_, Col2ValBatcher<_, _, _, _>, RowRowBuilder<_, _>, RowRowSpine<_, _>>(
114                        ExchangeCore::<ColumnBuilder<_>, _>::new_core(columnar_exchange::<Row, Row, Timestamp, Diff>),
115                        &format!("Arrange {variant:?}"),
116                    )
117                    .trace;
118                let collection = LogCollection {
119                    trace,
120                    token: Rc::clone(&token),
121                };
122                result.insert(variant, collection);
123            }
124        }
125        result
126    });
127
128    Return { collections }
129}