mz_timely_util/
panic.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use std::panic;
17
18use mz_ore::halt;
19
20/// Intercepts expected [`timely::communication`] panics and downgrades them to
21/// [`halt`]s.
22///
23/// Because processes in a timely cluster are shared fate, once one process in
24/// the cluster crashes, the other processes in the cluster are expected to
25/// panic with communication errors. This function sniffs out these
26/// communication errors and downgrades them to halts, to keep the attention on
27/// the process that crashed first.
28pub fn halt_on_timely_communication_panic() {
29    let old_hook = panic::take_hook();
30    panic::set_hook(Box::new(move |panic_info| {
31        // We have to sniff out expected panics based on their message because
32        // Rust does not have good support for panicking with structured
33        // payloads.
34        match panic_info.payload().downcast_ref::<String>() {
35            Some(e) if e.starts_with("timely communication error:") => {
36                halt!("{}", e);
37            }
38            _ => old_hook(panic_info),
39        }
40    }))
41}