1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoU64Antichain {
    #[prost(uint64, repeated, tag = "1")]
    pub elements: ::prost::alloc::vec::Vec<u64>,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoU64Description {
    #[prost(message, optional, tag = "1")]
    pub lower: ::core::option::Option<ProtoU64Antichain>,
    #[prost(message, optional, tag = "2")]
    pub upper: ::core::option::Option<ProtoU64Antichain>,
    #[prost(message, optional, tag = "3")]
    pub since: ::core::option::Option<ProtoU64Antichain>,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoBatchPartInline {
    #[prost(enumeration = "ProtoBatchFormat", tag = "1")]
    pub format: i32,
    /// TraceBatchParts can contain partial data for a given trace batch, and so
    /// this desc means only that the records contained in this part have to
    /// abide by the constraints in the description. There may be other parts
    /// for the same trace batch with the same description. However, there will
    /// be only one trace batch with the same description and index.
    #[prost(message, optional, tag = "2")]
    pub desc: ::core::option::Option<ProtoU64Description>,
    #[prost(uint64, tag = "3")]
    pub index: u64,
}
#[derive(serde::Serialize)]
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ProtoColumnarRecords {
    #[prost(uint64, tag = "1")]
    pub len: u64,
    #[prost(int32, repeated, tag = "2")]
    pub key_offsets: ::prost::alloc::vec::Vec<i32>,
    #[prost(bytes = "bytes", tag = "3")]
    pub key_data: ::prost::bytes::Bytes,
    #[prost(int32, repeated, tag = "4")]
    pub val_offsets: ::prost::alloc::vec::Vec<i32>,
    #[prost(bytes = "bytes", tag = "5")]
    pub val_data: ::prost::bytes::Bytes,
    #[prost(int64, repeated, tag = "6")]
    pub timestamps: ::prost::alloc::vec::Vec<i64>,
    #[prost(int64, repeated, tag = "7")]
    pub diffs: ::prost::alloc::vec::Vec<i64>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
#[repr(i32)]
pub enum ProtoBatchFormat {
    Unknown = 0,
    /// Arrow, which we'd use for the local blob cache if we use it, gets a
    /// structure like `[(K, V, T, D)]` so that we could mmap it and use it
    /// directly as our in-mem batches (which have this structure).
    ArrowKvtd = 1,
    /// We have more flexibility with Parquet. Initially we'll start with the
    /// same `[(K, V, T, D)]` as our in-mem batches. Another option would be
    /// something like `[(K, [(V, [(T, D)])])]`, which would only store each
    /// key's and each val's data once (this is similar to the
    /// \[differential_dataflow::trace::layers::Trie\] structure of
    /// \[differential_dataflow::trace::implementations::ord::OrdValBatch\]).
    ///
    /// Which is better probably comes down to how much duplication we expect of
    /// keys and vals in a batch as well as how big the batches are (the trie
    /// structure introduces more columns, so has some amount of overhead).
    ///
    /// For unsealed batches, we have a better chance of duplicates than trace,
    /// but we probably don't want to pay the cost of converting between the
    /// in-mem `[(K, V, T, D)]` representation and anything else (to keep the hot
    /// path clean). Unsealed batches are also likely to be our smallest. For
    /// this reason, they'll probably always stay as ParquetKvtd.
    ///
    /// For trace batches, we consolidate them before writing them out, so we're
    /// guaranteed to get nothing from the V level of the trie. For duplicate
    /// keys, we'll probably get a good amount of benefit from column specific
    /// compression, and I'd like to exhaust that direction first before dealing
    /// with a trie-like column structure.
    ParquetKvtd = 2,
}
impl ProtoBatchFormat {
    /// String value of the enum field names used in the ProtoBuf definition.
    ///
    /// The values are not transformed in any way and thus are considered stable
    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
    pub fn as_str_name(&self) -> &'static str {
        match self {
            ProtoBatchFormat::Unknown => "Unknown",
            ProtoBatchFormat::ArrowKvtd => "ArrowKVTD",
            ProtoBatchFormat::ParquetKvtd => "ParquetKvtd",
        }
    }
    /// Creates an enum from field names used in the ProtoBuf definition.
    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
        match value {
            "Unknown" => Some(Self::Unknown),
            "ArrowKVTD" => Some(Self::ArrowKvtd),
            "ParquetKvtd" => Some(Self::ParquetKvtd),
            _ => None,
        }
    }
}