1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
// Copyright Materialize, Inc. and contributors. All rights reserved.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

//! Generated protobuf code and companion impls.

#![allow(missing_docs)]

include!(concat!(env!("OUT_DIR"), "/mod.rs"));

use std::io::Read;

use bytes::BufMut;
use md5::{Digest, Md5};
use prost::Message;

use crate::error::Error;
use crate::gen::persist::ProtoMeta;

impl ProtoMeta {
    /// A versioning for how we store the protobuf serialization.
    ///
    /// Protobuf handles most of our backward and forward compatibility, but we
    /// don't just store the raw protobuf message serialization. This version
    /// number determines the exactly what that format is.
    ///
    /// Once we commit to backward compatibility, this should only change if we
    /// decide to e.g. switch from protobuf to some other encoding entirely.
    ///
    /// All versions less than this were developmental. If encountered, it's
    /// safe to delete all data in blob storage. If a greater version is seen,
    /// then some major change has happened and this code has no idea what is
    /// going on and should refuse to touch it.
    ///
    /// The following is an EBNF-ish spec for the format:
    ///
    /// ```none
    /// encoding = 9u8 v9_encoding
    /// v9_encoding = proto_meta md5_checksum
    /// proto_meta = u8* (the protobuf serialization of ProtoMeta)
    /// md5_checksum = u8 u8 u8 u8 (little endian, md5 of proto_meta)
    /// ```
    // TODO: Once this gets bumped to 8, we can clean up:
    // - The TODO in Blob::Cache.check_meta_build_version.
    pub const ENCODING_VERSION: u8 = 9;

    /// The [Self::ENCODING_VERSION] of this previously encoded ProtoMeta.
    ///
    /// Returns an error if the input is malformed.
    pub fn encoded_version(buf: &[u8]) -> Result<u8, Error> {
        buf.get(0)
            .copied()
            .ok_or_else(|| Error::from("missing encoding version"))
    }

    // NB: This len is intentionally hardcoded (not derived from the md5 crate)
    // so that a change to the crate can't break us. The compiler statically
    // checks that they match because this const is used in the return type.
    // (MD5 is not going to change, so this is all a moot point, but still
    // better to be defensive.)
    const CHECKSUM_LEN: usize = 16;
    fn md5_checksum(buf: &[u8]) -> [u8; Self::CHECKSUM_LEN] {
        let mut h = Md5::new();
        h.update(&buf);
        h.finalize().into()
    }
}

impl persist_types::Codec for ProtoMeta {
    fn codec_name() -> String {
        "protobuf+md5[ProtoMeta]".into()
    }

    fn encode<B>(&self, buf: &mut B)
    where
        B: BufMut,
    {
        // TODO: Move checksum to be a field on the proto instead. We can encode
        // the proto, checksum'ing as we go, and then manually append it onto
        // the end.
        //
        // TODO: Regardless of the above TODO, compute the checksum as we go and
        // avoid this temp Vec.
        let temp = self.encode_to_vec();
        buf.put_slice(&[Self::ENCODING_VERSION]);
        buf.put_slice(&temp);
        let checksum = Self::md5_checksum(&temp);
        buf.put_slice(&checksum);
    }

    fn decode<'a>(buf: &'a [u8]) -> Result<Self, String> {
        let mut buf = buf;

        let mut version = [0u8; 1];
        match buf.read_exact(&mut version) {
            Ok(_) if version == [Self::ENCODING_VERSION] => {}
            Ok(_) => return Err(format!("unsupported version: {}", version[0])),
            Err(_) => return Err("missing version".into()),
        }

        if buf.len() < Self::CHECKSUM_LEN {
            return Err("missing/incomplete checksum".into());
        }
        let checksum_start = buf.len() - Self::CHECKSUM_LEN;
        let (buf, checksum) = (&buf[..checksum_start], &buf[checksum_start..]);
        if checksum != &Self::md5_checksum(&buf) {
            return Err("checksum mismatch".into());
        }

        <Self as Message>::decode(buf).map_err(|err| err.to_string())
    }
}

#[cfg(test)]
mod tests {
    use persist_types::Codec;

    use super::*;

    #[test]
    fn checksum() {
        let meta = ProtoMeta::default();
        let mut encoded = Vec::new();
        Codec::encode(&meta, &mut encoded);

        // Intact checksum matches.
        assert_eq!(<ProtoMeta as Codec>::decode(&encoded), Ok(meta));

        // Data has been mutated.
        let mut bad_data = encoded.clone();
        bad_data[1] += 1;
        assert_eq!(
            <ProtoMeta as Codec>::decode(&bad_data),
            Err("checksum mismatch".into())
        );

        // Checksum has been mutated.
        let mut bad_checksum = encoded.clone();
        *bad_checksum.last_mut().unwrap() += 1;
        assert_eq!(
            <ProtoMeta as Codec>::decode(&bad_checksum),
            Err("checksum mismatch".into())
        );
    }

    #[test]
    fn decode_errors() {
        // This is not a test of protobuf's roundtrip-ability, so don't
        // bother too much with the test data.
        let meta = ProtoMeta {
            seqno: 7,
            ..Default::default()
        };
        let mut encoded = Vec::new();
        Codec::encode(&meta, &mut encoded);

        // Sanity check that we don't just always return errors.
        assert_eq!(<ProtoMeta as Codec>::decode(&encoded), Ok(meta));

        // Every subset that's missing at least one byte should error, not panic
        // or succeed.
        for i in 0..encoded.len() - 1 {
            assert!(<ProtoMeta as Codec>::decode(&encoded[..i]).is_err());
        }
    }
}