mz_timely_util/panic.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use std::panic;
17
18use mz_ore::halt;
19
20/// Intercepts expected [`timely::communication`] panics and downgrades them to
21/// [`halt`]s.
22///
23/// Because processes in a timely cluster are shared fate, once one process in
24/// the cluster crashes, the other processes in the cluster are expected to
25/// panic with communication errors. This function sniffs out these
26/// communication errors and downgrades them to halts, to keep the attention on
27/// the process that crashed first.
28pub fn halt_on_timely_communication_panic() {
29 let old_hook = panic::take_hook();
30 panic::set_hook(Box::new(move |panic_info| {
31 // We have to sniff out expected panics based on their message because
32 // Rust does not have good support for panicking with structured
33 // payloads.
34 match panic_info.payload().downcast_ref::<String>() {
35 Some(e) if e.starts_with("timely communication error:") => {
36 halt!("{}", e);
37 }
38 _ => old_hook(panic_info),
39 }
40 }))
41}