1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoMeta {
    /// For data written by some vX.Y.Z of Materialize, we'll support reading it
    /// back in by later versions (backward compatibility, so users can upgrade)
    /// and earlier versions (forward compatiblity, so users can roll back an
    /// upgrade). The specific policy is yet to be determined, but each of these
    /// is likely to be bounded, especially forward compatatibility.
    ///
    /// For us to reason about this (e.g. to prevent startup if a binary is
    /// pointed at data it can't handle), we store the version that wrote data
    /// alongside the data itself.
    #[prost(string, tag="1")]
    pub version: ::prost::alloc::string::String,
    #[prost(uint64, tag="2")]
    pub seqno: u64,
    #[prost(map="uint64, message", tag="3")]
    pub id_mapping: ::std::collections::HashMap<u64, ProtoStreamRegistration>,
    #[prost(map="uint64, message", tag="4")]
    pub graveyard: ::std::collections::HashMap<u64, ProtoStreamRegistration>,
    #[prost(map="uint64, message", tag="5")]
    pub arrangements: ::std::collections::HashMap<u64, ProtoArrangement>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoStreamRegistration {
    #[prost(string, tag="1")]
    pub name: ::prost::alloc::string::String,
    #[prost(string, tag="2")]
    pub key_codec_name: ::prost::alloc::string::String,
    #[prost(string, tag="3")]
    pub val_codec_name: ::prost::alloc::string::String,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoArrangement {
    #[prost(message, optional, tag="1")]
    pub since: ::core::option::Option<ProtoU64Antichain>,
    #[prost(message, optional, tag="2")]
    pub seal: ::core::option::Option<ProtoU64Antichain>,
    #[prost(message, repeated, tag="3")]
    pub unsealed_batches: ::prost::alloc::vec::Vec<ProtoUnsealedBatchMeta>,
    #[prost(message, repeated, tag="4")]
    pub trace_batches: ::prost::alloc::vec::Vec<ProtoTraceBatchMeta>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoUnsealedBatchMeta {
    #[prost(string, tag="1")]
    pub key: ::prost::alloc::string::String,
    #[prost(enumeration="ProtoBatchFormat", tag="7")]
    pub format: i32,
    #[prost(uint64, tag="2")]
    pub seqno_lower: u64,
    #[prost(uint64, tag="3")]
    pub seqno_upper: u64,
    #[prost(uint64, tag="4")]
    pub ts_lower: u64,
    #[prost(uint64, tag="5")]
    pub ts_upper: u64,
    #[prost(uint64, tag="6")]
    pub size_bytes: u64,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoTraceBatchMeta {
    #[prost(string, repeated, tag="1")]
    pub keys: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
    #[prost(enumeration="ProtoBatchFormat", tag="5")]
    pub format: i32,
    #[prost(message, optional, tag="2")]
    pub desc: ::core::option::Option<ProtoU64Description>,
    #[prost(uint64, tag="3")]
    pub size_bytes: u64,
    #[prost(uint64, tag="4")]
    pub level: u64,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoU64Antichain {
    #[prost(uint64, repeated, tag="1")]
    pub elements: ::prost::alloc::vec::Vec<u64>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoU64Description {
    #[prost(message, optional, tag="1")]
    pub lower: ::core::option::Option<ProtoU64Antichain>,
    #[prost(message, optional, tag="2")]
    pub upper: ::core::option::Option<ProtoU64Antichain>,
    #[prost(message, optional, tag="3")]
    pub since: ::core::option::Option<ProtoU64Antichain>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoBatchInline {
    #[prost(oneof="proto_batch_inline::BatchType", tags="1, 2")]
    pub batch_type: ::core::option::Option<proto_batch_inline::BatchType>,
}
/// Nested message and enum types in `ProtoBatchInline`.
pub mod proto_batch_inline {
    #[derive(Clone, PartialEq, ::prost::Oneof)]
    pub enum BatchType {
        #[prost(message, tag="1")]
        Unsealed(super::ProtoUnsealedBatchInline),
        #[prost(message, tag="2")]
        Trace(super::ProtoTraceBatchPartInline),
    }
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoUnsealedBatchInline {
    #[prost(enumeration="ProtoBatchFormat", tag="7")]
    pub format: i32,
    #[prost(uint64, tag="2")]
    pub seqno_lower: u64,
    #[prost(uint64, tag="3")]
    pub seqno_upper: u64,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoTraceBatchPartInline {
    #[prost(enumeration="ProtoBatchFormat", tag="5")]
    pub format: i32,
    /// TraceBatchParts can contain partial data for a given trace batch, and so
    /// this desc means only that the records contained in this part have to
    /// abide by the constraints in the description. There may be other parts
    /// for the same trace batch with the same description. However, there will
    /// be only one trace batch with the same description and index.
    #[prost(message, optional, tag="2")]
    pub desc: ::core::option::Option<ProtoU64Description>,
    #[prost(uint64, tag="6")]
    pub index: u64,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
#[repr(i32)]
pub enum ProtoBatchFormat {
    Unknown = 0,
    /// Arrow, which we'd use for the local blob cache if we use it, gets a
    /// structure like `[(K, V, T, D)]` so that we could mmap it and use it
    /// directly as our in-mem batches (which have this structure).
    ArrowKvtd = 1,
    /// We have more flexibility with Parquet. Initially we'll start with the
    /// same `[(K, V, T, D)]` as our in-mem batches. Another option would be
    /// something like `[(K, [(V, [(T, D)])])]`, which would only store each
    /// key's and each val's data once (this is similar to the
    /// \[differential_dataflow::trace::layers::Trie\] structure of
    /// \[differential_dataflow::trace::implementations::ord::OrdValBatch\]).
    ///
    /// Which is better probably comes down to how much duplication we expect of
    /// keys and vals in a batch as well as how big the batches are (the trie
    /// structure introduces more columns, so has some amount of overhead).
    ///
    /// For unsealed batches, we have a better chance of duplicates than trace,
    /// but we probably don't want to pay the cost of converting between the
    /// in-mem `[(K, V, T, D)]` representation and anything else (to keep the hot
    /// path clean). Unsealed batches are also likely to be our smallest. For
    /// this reason, they'll probably always stay as ParquetKvtd.
    ///
    /// For trace batches, we consolidate them before writing them out, so we're
    /// guaranteed to get nothing from the V level of the trie. For duplicate
    /// keys, we'll probably get a good amount of benefit from column specific
    /// compression, and I'd like to exhaust that direction first before dealing
    /// with a trie-like column structure.
    ParquetKvtd = 2,
}