mz_persist_client/internal/
restore.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! See documentation on [[restore_blob]].
11
12use crate::ShardId;
13use crate::internal::encoding::UntypedState;
14use crate::internal::paths::BlobKey;
15use crate::internal::state::{BatchPart, RunPart, State};
16use crate::internal::state_diff::{StateDiff, StateFieldValDiff};
17use crate::internal::state_versions::StateVersions;
18use crate::metrics::Metrics;
19use anyhow::anyhow;
20use mz_persist::location::Blob;
21use timely::Container;
22use tracing::info;
23
24/// Attempt to restore all the blobs referenced by the current state in consensus.
25/// Returns a list of blobs that were not possible to restore.
26pub(crate) async fn restore_blob(
27    versions: &StateVersions,
28    blob: &dyn Blob,
29    build_version: &semver::Version,
30    shard_id: ShardId,
31    metrics: &Metrics,
32) -> anyhow::Result<Vec<BlobKey>> {
33    let diffs = versions.fetch_all_live_diffs(&shard_id).await;
34    let Some(first_live_seqno) = diffs.0.first().map(|d| d.seqno) else {
35        info!("No diffs for shard {shard_id}.");
36        return Ok(vec![]);
37    };
38
39    fn after<A>(diff: StateFieldValDiff<A>) -> Option<A> {
40        match diff {
41            StateFieldValDiff::Insert(a) => Some(a),
42            StateFieldValDiff::Update(_, a) => Some(a),
43            StateFieldValDiff::Delete(_) => None,
44        }
45    }
46
47    let mut not_restored = vec![];
48    let mut check_restored = |key: &BlobKey, result: Result<(), _>| {
49        if result.is_err() {
50            not_restored.push(key.clone());
51        }
52    };
53
54    for diff in diffs.0 {
55        let mut diff: StateDiff<u64> = StateDiff::decode(build_version, diff.data);
56        let mut part_queue = vec![];
57
58        for rollup in std::mem::take(&mut diff.rollups) {
59            // We never actually reference rollups from before the first live diff.
60            if rollup.key < first_live_seqno {
61                continue;
62            }
63            let Some(value) = after(rollup.val) else {
64                continue;
65            };
66            let key = value.key.complete(&shard_id);
67            let rollup_result = blob.restore(&key).await;
68            let rollup_restored = rollup_result.is_ok();
69            check_restored(&key, rollup_result);
70
71            // Elsewhere, we restore any state referenced in live diffs... but we also
72            // need to restore everything referenced in that first rollup.
73            // If restoring the rollup failed, let's
74            // keep going to try and recover the rest of the blobs before bailing out.
75            if rollup.key != first_live_seqno || !rollup_restored {
76                continue;
77            }
78            let rollup_bytes = blob
79                .get(&key)
80                .await?
81                .ok_or_else(|| anyhow!("fetching just-restored rollup"))?;
82            let rollup_state: State<u64> =
83                UntypedState::decode(build_version, rollup_bytes).check_ts_codec(&shard_id)?;
84            for (seqno, rollup) in &rollup_state.collections.rollups {
85                // We never actually reference rollups from before the first live diff.
86                if *seqno < first_live_seqno {
87                    continue;
88                }
89                let key = rollup.key.complete(&shard_id);
90                check_restored(&key, blob.restore(&key).await);
91            }
92            for batch in rollup_state.collections.trace.batches() {
93                part_queue.extend(batch.parts.iter().cloned());
94            }
95        }
96        for diff in diff.referenced_batches() {
97            if let Some(after) = after(diff) {
98                part_queue.extend(after.parts.iter().cloned())
99            }
100        }
101        while let Some(part) = part_queue.pop() {
102            match part {
103                RunPart::Single(BatchPart::Inline { .. }) => {}
104                RunPart::Single(BatchPart::Hollow(part)) => {
105                    let key = part.key.complete(&shard_id);
106                    check_restored(&key, blob.restore(&key).await);
107                }
108                RunPart::Many(runs) => {
109                    let key = runs.key.complete(&shard_id);
110                    check_restored(&key, blob.restore(&key).await);
111                    let runs = runs
112                        .get(shard_id, blob, metrics)
113                        .await
114                        .ok_or_else(|| anyhow!("fetching just-restored run"))?;
115                    part_queue.extend(runs.parts);
116                }
117            }
118        }
119    }
120    Ok(not_restored)
121}