Skip to main content

mz_avro/
encode.rs

1// Copyright 2018 Flavien Raynaud.
2// Copyright Materialize, Inc. and contributors. All rights reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License in the LICENSE file at the
7// root of this repository, or online at
8//
9//     http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
17// This file is derived from the avro-rs project, available at
18// https://github.com/flavray/avro-rs. It was incorporated
19// directly into Materialize on March 3, 2020.
20//
21// The original source code is subject to the terms of the MIT license, a copy
22// of which can be found in the LICENSE file at the root of this repository.
23
24use crate::schema::{Schema, SchemaNode, SchemaPiece};
25use crate::types::{DecimalValue, Value};
26use crate::util::{zig_i32, zig_i64};
27
28/// Encode a `Value` into avro format.
29///
30/// **NOTE** This will not perform schema validation. The value is assumed to
31/// be valid with regards to the schema. Schema are needed only to guide the
32/// encoding for complex type values.
33pub fn encode(value: &Value, schema: &Schema, buffer: &mut Vec<u8>) {
34    encode_ref(value, schema.top_node(), buffer)
35}
36
37fn encode_bytes<B: AsRef<[u8]> + ?Sized>(s: &B, buffer: &mut Vec<u8>) {
38    let bytes = s.as_ref();
39    encode(
40        &Value::Long(bytes.len() as i64),
41        &Schema {
42            named: vec![],
43            indices: Default::default(),
44            top: SchemaPiece::Long.into(),
45        },
46        buffer,
47    );
48    buffer.extend_from_slice(bytes);
49}
50
51fn encode_long(i: i64, buffer: &mut Vec<u8>) {
52    zig_i64(i, buffer)
53}
54
55fn encode_int(i: i32, buffer: &mut Vec<u8>) {
56    zig_i32(i, buffer)
57}
58
59/// Encode a `Value` into avro format.
60///
61/// **NOTE** This will not perform schema validation. The value is assumed to
62/// be valid with regards to the schema. Schema are needed only to guide the
63/// encoding for complex type values.
64pub fn encode_ref(value: &Value, schema: SchemaNode, buffer: &mut Vec<u8>) {
65    match value {
66        Value::Null => (),
67        Value::Boolean(b) => buffer.push(if *b { 1u8 } else { 0u8 }),
68        Value::Int(i) => encode_int(*i, buffer),
69        Value::Long(i) => encode_long(*i, buffer),
70        Value::Float(x) => buffer.extend_from_slice(&x.to_le_bytes()),
71        Value::Date(d) => encode_int(*d, buffer),
72        Value::Timestamp(d) => {
73            let mult = match schema.inner {
74                SchemaPiece::TimestampMilli => 1_000,
75                SchemaPiece::TimestampMicro => 1_000_000,
76                other => panic!("Invalid schema for timestamp: {:?}", other),
77            };
78            let ts_seconds = d
79                .and_utc()
80                .timestamp()
81                .checked_mul(mult)
82                .expect("All chrono dates can be converted to timestamps");
83            let sub_part: i64 = if mult == 1_000 {
84                d.and_utc().timestamp_subsec_millis().into()
85            } else {
86                d.and_utc().timestamp_subsec_micros().into()
87            };
88            let ts = ts_seconds + sub_part;
89            encode_long(ts, buffer)
90        }
91        Value::Double(x) => buffer.extend_from_slice(&x.to_le_bytes()),
92        Value::Decimal(DecimalValue { unscaled, .. }) => match schema.name {
93            None => encode_bytes(unscaled, buffer),
94            Some(_) => buffer.extend(unscaled),
95        },
96        Value::Bytes(bytes) => encode_bytes(bytes, buffer),
97        Value::String(s) => match schema.inner {
98            SchemaPiece::String => {
99                encode_bytes(s, buffer);
100            }
101            SchemaPiece::Enum { symbols, .. } => {
102                if let Some(index) = symbols.iter().position(|item| item == s) {
103                    encode_int(index as i32, buffer);
104                }
105            }
106            _ => (),
107        },
108        Value::Fixed(_, bytes) => buffer.extend(bytes),
109        Value::Enum(i, _) => encode_int(*i as i32, buffer),
110        Value::Union { index, inner, .. } => {
111            if let SchemaPiece::Union(schema_inner) = schema.inner {
112                let schema_inner = &schema_inner.variants()[*index];
113                encode_long(*index as i64, buffer);
114                encode_ref(&*inner, schema.step(schema_inner), buffer);
115            }
116        }
117        Value::Array(items) => {
118            if let SchemaPiece::Array(inner) = schema.inner {
119                if !items.is_empty() {
120                    encode_long(items.len() as i64, buffer);
121                    for item in items.iter() {
122                        encode_ref(item, schema.step(&**inner), buffer);
123                    }
124                }
125                buffer.push(0u8);
126            }
127        }
128        Value::Map(items) => {
129            if let SchemaPiece::Map(inner) = schema.inner {
130                if !items.is_empty() {
131                    encode_long(items.len() as i64, buffer);
132                    for (key, value) in items {
133                        encode_bytes(key, buffer);
134                        encode_ref(value, schema.step(&**inner), buffer);
135                    }
136                }
137                buffer.push(0u8);
138            }
139        }
140        Value::Record(fields) => {
141            if let SchemaPiece::Record {
142                fields: inner_fields,
143                ..
144            } = schema.inner
145            {
146                for (i, &(_, ref value)) in fields.iter().enumerate() {
147                    encode_ref(value, schema.step(&inner_fields[i].schema), buffer);
148                }
149            }
150        }
151        Value::Json(j) => {
152            encode_bytes(&j.to_string(), buffer);
153        }
154        Value::Uuid(u) => {
155            let u_str = u.to_string();
156            encode_bytes(&u_str, buffer);
157        }
158    }
159}
160
161pub fn encode_to_vec(value: &Value, schema: &Schema) -> Vec<u8> {
162    let mut buffer = Vec::new();
163    encode(value, schema, &mut buffer);
164    buffer
165}
166
167#[cfg(test)]
168mod tests {
169    use std::collections::BTreeMap;
170
171    use super::*;
172
173    #[mz_ore::test]
174    fn test_encode_empty_array() {
175        let mut buf = Vec::new();
176        let empty: Vec<Value> = Vec::new();
177        encode(
178            &Value::Array(empty),
179            &r#"{"type": "array", "items": "int"}"#.parse().unwrap(),
180            &mut buf,
181        );
182        assert_eq!(vec![0u8], buf);
183    }
184
185    #[mz_ore::test]
186    fn test_encode_empty_map() {
187        let mut buf = Vec::new();
188        let empty: BTreeMap<String, Value> = BTreeMap::new();
189        encode(
190            &Value::Map(empty),
191            &r#"{"type": "map", "values": "int"}"#.parse().unwrap(),
192            &mut buf,
193        );
194        assert_eq!(vec![0u8], buf);
195    }
196}