iceberg/spec/
datatypes.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/*!
19 * Data Types
20 */
21use std::collections::HashMap;
22use std::convert::identity;
23use std::fmt;
24use std::ops::Index;
25use std::sync::{Arc, OnceLock};
26
27use ::serde::de::{MapAccess, Visitor};
28use serde::de::{Error, IntoDeserializer};
29use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
30use serde_json::Value as JsonValue;
31
32use super::values::Literal;
33use crate::ensure_data_valid;
34use crate::error::Result;
35use crate::spec::PrimitiveLiteral;
36use crate::spec::datatypes::_decimal::{MAX_PRECISION, REQUIRED_LENGTH};
37
38/// Field name for list type.
39pub const LIST_FIELD_NAME: &str = "element";
40/// Field name for map type's key.
41pub const MAP_KEY_FIELD_NAME: &str = "key";
42/// Field name for map type's value.
43pub const MAP_VALUE_FIELD_NAME: &str = "value";
44
45pub(crate) const MAX_DECIMAL_BYTES: u32 = 24;
46pub(crate) const MAX_DECIMAL_PRECISION: u32 = 38;
47
48mod _decimal {
49    use once_cell::sync::Lazy;
50
51    use crate::spec::{MAX_DECIMAL_BYTES, MAX_DECIMAL_PRECISION};
52
53    // Max precision of bytes, starts from 1
54    pub(super) static MAX_PRECISION: Lazy<[u32; MAX_DECIMAL_BYTES as usize]> = Lazy::new(|| {
55        let mut ret: [u32; 24] = [0; 24];
56        for (i, prec) in ret.iter_mut().enumerate() {
57            *prec = 2f64.powi((8 * (i + 1) - 1) as i32).log10().floor() as u32;
58        }
59
60        ret
61    });
62
63    //  Required bytes of precision, starts from 1
64    pub(super) static REQUIRED_LENGTH: Lazy<[u32; MAX_DECIMAL_PRECISION as usize]> =
65        Lazy::new(|| {
66            let mut ret: [u32; MAX_DECIMAL_PRECISION as usize] =
67                [0; MAX_DECIMAL_PRECISION as usize];
68
69            for (i, required_len) in ret.iter_mut().enumerate() {
70                for j in 0..MAX_PRECISION.len() {
71                    if MAX_PRECISION[j] >= ((i + 1) as u32) {
72                        *required_len = (j + 1) as u32;
73                        break;
74                    }
75                }
76            }
77
78            ret
79        });
80}
81
82#[derive(Debug, PartialEq, Eq, Clone)]
83/// All data types are either primitives or nested types, which are maps, lists, or structs.
84pub enum Type {
85    /// Primitive types
86    Primitive(PrimitiveType),
87    /// Struct type
88    Struct(StructType),
89    /// List type.
90    List(ListType),
91    /// Map type
92    Map(MapType),
93}
94
95impl fmt::Display for Type {
96    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
97        match self {
98            Type::Primitive(primitive) => write!(f, "{}", primitive),
99            Type::Struct(s) => write!(f, "{}", s),
100            Type::List(_) => write!(f, "list"),
101            Type::Map(_) => write!(f, "map"),
102        }
103    }
104}
105
106impl Type {
107    /// Whether the type is primitive type.
108    #[inline(always)]
109    pub fn is_primitive(&self) -> bool {
110        matches!(self, Type::Primitive(_))
111    }
112
113    /// Whether the type is struct type.
114    #[inline(always)]
115    pub fn is_struct(&self) -> bool {
116        matches!(self, Type::Struct(_))
117    }
118
119    /// Whether the type is nested type.
120    #[inline(always)]
121    pub fn is_nested(&self) -> bool {
122        matches!(self, Type::Struct(_) | Type::List(_) | Type::Map(_))
123    }
124
125    /// Convert Type to reference of PrimitiveType
126    pub fn as_primitive_type(&self) -> Option<&PrimitiveType> {
127        if let Type::Primitive(primitive_type) = self {
128            Some(primitive_type)
129        } else {
130            None
131        }
132    }
133
134    /// Convert Type to StructType
135    pub fn to_struct_type(self) -> Option<StructType> {
136        if let Type::Struct(struct_type) = self {
137            Some(struct_type)
138        } else {
139            None
140        }
141    }
142
143    /// Return max precision for decimal given [`num_bytes`] bytes.
144    #[inline(always)]
145    pub fn decimal_max_precision(num_bytes: u32) -> Result<u32> {
146        ensure_data_valid!(
147            num_bytes > 0 && num_bytes <= MAX_DECIMAL_BYTES,
148            "Decimal length larger than {MAX_DECIMAL_BYTES} is not supported: {num_bytes}",
149        );
150        Ok(MAX_PRECISION[num_bytes as usize - 1])
151    }
152
153    /// Returns minimum bytes required for decimal with [`precision`].
154    #[inline(always)]
155    pub fn decimal_required_bytes(precision: u32) -> Result<u32> {
156        ensure_data_valid!(
157            precision > 0 && precision <= MAX_DECIMAL_PRECISION,
158            "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}",
159        );
160        Ok(REQUIRED_LENGTH[precision as usize - 1])
161    }
162
163    /// Creates  decimal type.
164    #[inline(always)]
165    pub fn decimal(precision: u32, scale: u32) -> Result<Self> {
166        ensure_data_valid!(
167            precision > 0 && precision <= MAX_DECIMAL_PRECISION,
168            "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}",
169        );
170        Ok(Type::Primitive(PrimitiveType::Decimal { precision, scale }))
171    }
172
173    /// Check if it's float or double type.
174    #[inline(always)]
175    pub fn is_floating_type(&self) -> bool {
176        matches!(
177            self,
178            Type::Primitive(PrimitiveType::Float) | Type::Primitive(PrimitiveType::Double)
179        )
180    }
181}
182
183impl From<PrimitiveType> for Type {
184    fn from(value: PrimitiveType) -> Self {
185        Self::Primitive(value)
186    }
187}
188
189impl From<StructType> for Type {
190    fn from(value: StructType) -> Self {
191        Type::Struct(value)
192    }
193}
194
195impl From<ListType> for Type {
196    fn from(value: ListType) -> Self {
197        Type::List(value)
198    }
199}
200
201impl From<MapType> for Type {
202    fn from(value: MapType) -> Self {
203        Type::Map(value)
204    }
205}
206
207/// Primitive data types
208#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Hash)]
209#[serde(rename_all = "lowercase", remote = "Self")]
210pub enum PrimitiveType {
211    /// True or False
212    Boolean,
213    /// 32-bit signed integer
214    Int,
215    /// 64-bit signed integer
216    Long,
217    /// 32-bit IEEE 754 floating point.
218    Float,
219    /// 64-bit IEEE 754 floating point.
220    Double,
221    /// Fixed point decimal
222    Decimal {
223        /// Precision, must be 38 or less
224        precision: u32,
225        /// Scale
226        scale: u32,
227    },
228    /// Calendar date without timezone or time.
229    Date,
230    /// Time of day in microsecond precision, without date or timezone.
231    Time,
232    /// Timestamp in microsecond precision, without timezone
233    Timestamp,
234    /// Timestamp in microsecond precision, with timezone
235    Timestamptz,
236    /// Timestamp in nanosecond precision, without timezone
237    #[serde(rename = "timestamp_ns")]
238    TimestampNs,
239    /// Timestamp in nanosecond precision with timezone
240    #[serde(rename = "timestamptz_ns")]
241    TimestamptzNs,
242    /// Arbitrary-length character sequences encoded in utf-8
243    String,
244    /// Universally Unique Identifiers, should use 16-byte fixed
245    Uuid,
246    /// Fixed length byte array
247    Fixed(u64),
248    /// Arbitrary-length byte array.
249    Binary,
250}
251
252impl PrimitiveType {
253    /// Check whether literal is compatible with the type.
254    pub fn compatible(&self, literal: &PrimitiveLiteral) -> bool {
255        matches!(
256            (self, literal),
257            (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(_))
258                | (PrimitiveType::Int, PrimitiveLiteral::Int(_))
259                | (PrimitiveType::Long, PrimitiveLiteral::Long(_))
260                | (PrimitiveType::Float, PrimitiveLiteral::Float(_))
261                | (PrimitiveType::Double, PrimitiveLiteral::Double(_))
262                | (PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(_))
263                | (PrimitiveType::Date, PrimitiveLiteral::Int(_))
264                | (PrimitiveType::Time, PrimitiveLiteral::Long(_))
265                | (PrimitiveType::Timestamp, PrimitiveLiteral::Long(_))
266                | (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(_))
267                | (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(_))
268                | (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(_))
269                | (PrimitiveType::String, PrimitiveLiteral::String(_))
270                | (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(_))
271                | (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(_))
272                | (PrimitiveType::Binary, PrimitiveLiteral::Binary(_))
273        )
274    }
275}
276
277impl Serialize for Type {
278    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
279    where S: Serializer {
280        let type_serde = _serde::SerdeType::from(self);
281        type_serde.serialize(serializer)
282    }
283}
284
285impl<'de> Deserialize<'de> for Type {
286    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
287    where D: Deserializer<'de> {
288        let type_serde = _serde::SerdeType::deserialize(deserializer)?;
289        Ok(Type::from(type_serde))
290    }
291}
292
293impl<'de> Deserialize<'de> for PrimitiveType {
294    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
295    where D: Deserializer<'de> {
296        let s = String::deserialize(deserializer)?;
297        if s.starts_with("decimal") {
298            deserialize_decimal(s.into_deserializer())
299        } else if s.starts_with("fixed") {
300            deserialize_fixed(s.into_deserializer())
301        } else {
302            PrimitiveType::deserialize(s.into_deserializer())
303        }
304    }
305}
306
307impl Serialize for PrimitiveType {
308    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
309    where S: Serializer {
310        match self {
311            PrimitiveType::Decimal { precision, scale } => {
312                serialize_decimal(precision, scale, serializer)
313            }
314            PrimitiveType::Fixed(l) => serialize_fixed(l, serializer),
315            _ => PrimitiveType::serialize(self, serializer),
316        }
317    }
318}
319
320fn deserialize_decimal<'de, D>(deserializer: D) -> std::result::Result<PrimitiveType, D::Error>
321where D: Deserializer<'de> {
322    let s = String::deserialize(deserializer)?;
323    let (precision, scale) = s
324        .trim_start_matches(r"decimal(")
325        .trim_end_matches(')')
326        .split_once(',')
327        .ok_or_else(|| D::Error::custom("Decimal requires precision and scale: {s}"))?;
328
329    Ok(PrimitiveType::Decimal {
330        precision: precision.trim().parse().map_err(D::Error::custom)?,
331        scale: scale.trim().parse().map_err(D::Error::custom)?,
332    })
333}
334
335fn serialize_decimal<S>(
336    precision: &u32,
337    scale: &u32,
338    serializer: S,
339) -> std::result::Result<S::Ok, S::Error>
340where
341    S: Serializer,
342{
343    serializer.serialize_str(&format!("decimal({precision},{scale})"))
344}
345
346fn deserialize_fixed<'de, D>(deserializer: D) -> std::result::Result<PrimitiveType, D::Error>
347where D: Deserializer<'de> {
348    let fixed = String::deserialize(deserializer)?
349        .trim_start_matches(r"fixed[")
350        .trim_end_matches(']')
351        .to_owned();
352
353    fixed
354        .parse()
355        .map(PrimitiveType::Fixed)
356        .map_err(D::Error::custom)
357}
358
359fn serialize_fixed<S>(value: &u64, serializer: S) -> std::result::Result<S::Ok, S::Error>
360where S: Serializer {
361    serializer.serialize_str(&format!("fixed[{value}]"))
362}
363
364impl fmt::Display for PrimitiveType {
365    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
366        match self {
367            PrimitiveType::Boolean => write!(f, "boolean"),
368            PrimitiveType::Int => write!(f, "int"),
369            PrimitiveType::Long => write!(f, "long"),
370            PrimitiveType::Float => write!(f, "float"),
371            PrimitiveType::Double => write!(f, "double"),
372            PrimitiveType::Decimal { precision, scale } => {
373                write!(f, "decimal({},{})", precision, scale)
374            }
375            PrimitiveType::Date => write!(f, "date"),
376            PrimitiveType::Time => write!(f, "time"),
377            PrimitiveType::Timestamp => write!(f, "timestamp"),
378            PrimitiveType::Timestamptz => write!(f, "timestamptz"),
379            PrimitiveType::TimestampNs => write!(f, "timestamp_ns"),
380            PrimitiveType::TimestamptzNs => write!(f, "timestamptz_ns"),
381            PrimitiveType::String => write!(f, "string"),
382            PrimitiveType::Uuid => write!(f, "uuid"),
383            PrimitiveType::Fixed(size) => write!(f, "fixed({})", size),
384            PrimitiveType::Binary => write!(f, "binary"),
385        }
386    }
387}
388
389/// DataType for a specific struct
390#[derive(Debug, Serialize, Clone, Default)]
391#[serde(rename = "struct", tag = "type")]
392pub struct StructType {
393    /// Struct fields
394    fields: Vec<NestedFieldRef>,
395    /// Lookup for index by field id
396    #[serde(skip_serializing)]
397    id_lookup: OnceLock<HashMap<i32, usize>>,
398    #[serde(skip_serializing)]
399    name_lookup: OnceLock<HashMap<String, usize>>,
400}
401
402impl<'de> Deserialize<'de> for StructType {
403    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
404    where D: Deserializer<'de> {
405        #[derive(Deserialize)]
406        #[serde(field_identifier, rename_all = "lowercase")]
407        enum Field {
408            Type,
409            Fields,
410        }
411
412        struct StructTypeVisitor;
413
414        impl<'de> Visitor<'de> for StructTypeVisitor {
415            type Value = StructType;
416
417            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
418                formatter.write_str("struct")
419            }
420
421            fn visit_map<V>(self, mut map: V) -> std::result::Result<StructType, V::Error>
422            where V: MapAccess<'de> {
423                let mut fields = None;
424                while let Some(key) = map.next_key()? {
425                    match key {
426                        Field::Type => (),
427                        Field::Fields => {
428                            if fields.is_some() {
429                                return Err(serde::de::Error::duplicate_field("fields"));
430                            }
431                            fields = Some(map.next_value()?);
432                        }
433                    }
434                }
435                let fields: Vec<NestedFieldRef> =
436                    fields.ok_or_else(|| de::Error::missing_field("fields"))?;
437
438                Ok(StructType::new(fields))
439            }
440        }
441
442        const FIELDS: &[&str] = &["type", "fields"];
443        deserializer.deserialize_struct("struct", FIELDS, StructTypeVisitor)
444    }
445}
446
447impl StructType {
448    /// Creates a struct type with the given fields.
449    pub fn new(fields: Vec<NestedFieldRef>) -> Self {
450        Self {
451            fields,
452            id_lookup: OnceLock::new(),
453            name_lookup: OnceLock::new(),
454        }
455    }
456
457    /// Get struct field with certain id
458    pub fn field_by_id(&self, id: i32) -> Option<&NestedFieldRef> {
459        self.field_id_to_index(id).map(|idx| &self.fields[idx])
460    }
461
462    fn field_id_to_index(&self, field_id: i32) -> Option<usize> {
463        self.id_lookup
464            .get_or_init(|| {
465                HashMap::from_iter(self.fields.iter().enumerate().map(|(i, x)| (x.id, i)))
466            })
467            .get(&field_id)
468            .copied()
469    }
470
471    /// Get struct field with certain field name
472    pub fn field_by_name(&self, name: &str) -> Option<&NestedFieldRef> {
473        self.field_name_to_index(name).map(|idx| &self.fields[idx])
474    }
475
476    fn field_name_to_index(&self, name: &str) -> Option<usize> {
477        self.name_lookup
478            .get_or_init(|| {
479                HashMap::from_iter(
480                    self.fields
481                        .iter()
482                        .enumerate()
483                        .map(|(i, x)| (x.name.clone(), i)),
484                )
485            })
486            .get(name)
487            .copied()
488    }
489
490    /// Get fields.
491    pub fn fields(&self) -> &[NestedFieldRef] {
492        &self.fields
493    }
494}
495
496impl PartialEq for StructType {
497    fn eq(&self, other: &Self) -> bool {
498        self.fields == other.fields
499    }
500}
501
502impl Eq for StructType {}
503
504impl Index<usize> for StructType {
505    type Output = NestedField;
506
507    fn index(&self, index: usize) -> &Self::Output {
508        &self.fields[index]
509    }
510}
511
512impl fmt::Display for StructType {
513    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
514        write!(f, "struct<")?;
515        for field in &self.fields {
516            write!(f, "{}", field.field_type)?;
517        }
518        write!(f, ">")
519    }
520}
521
522#[derive(Debug, PartialEq, Serialize, Deserialize, Eq, Clone)]
523#[serde(from = "SerdeNestedField", into = "SerdeNestedField")]
524/// A struct is a tuple of typed values. Each field in the tuple is named and has an integer id that is unique in the table schema.
525/// Each field can be either optional or required, meaning that values can (or cannot) be null. Fields may be any type.
526/// Fields may have an optional comment or doc string. Fields can have default values.
527pub struct NestedField {
528    /// Id unique in table schema
529    pub id: i32,
530    /// Field Name
531    pub name: String,
532    /// Optional or required
533    pub required: bool,
534    /// Datatype
535    pub field_type: Box<Type>,
536    /// Fields may have an optional comment or doc string.
537    pub doc: Option<String>,
538    /// Used to populate the field’s value for all records that were written before the field was added to the schema
539    pub initial_default: Option<Literal>,
540    /// Used to populate the field’s value for any records written after the field was added to the schema, if the writer does not supply the field’s value
541    pub write_default: Option<Literal>,
542}
543
544#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
545#[serde(rename_all = "kebab-case")]
546struct SerdeNestedField {
547    pub id: i32,
548    pub name: String,
549    pub required: bool,
550    #[serde(rename = "type")]
551    pub field_type: Box<Type>,
552    #[serde(skip_serializing_if = "Option::is_none")]
553    pub doc: Option<String>,
554    #[serde(skip_serializing_if = "Option::is_none")]
555    pub initial_default: Option<JsonValue>,
556    #[serde(skip_serializing_if = "Option::is_none")]
557    pub write_default: Option<JsonValue>,
558}
559
560impl From<SerdeNestedField> for NestedField {
561    fn from(value: SerdeNestedField) -> Self {
562        NestedField {
563            id: value.id,
564            name: value.name,
565            required: value.required,
566            initial_default: value.initial_default.and_then(|x| {
567                Literal::try_from_json(x, &value.field_type)
568                    .ok()
569                    .and_then(identity)
570            }),
571            write_default: value.write_default.and_then(|x| {
572                Literal::try_from_json(x, &value.field_type)
573                    .ok()
574                    .and_then(identity)
575            }),
576            field_type: value.field_type,
577            doc: value.doc,
578        }
579    }
580}
581
582impl From<NestedField> for SerdeNestedField {
583    fn from(value: NestedField) -> Self {
584        let initial_default = value.initial_default.map(|x| x.try_into_json(&value.field_type).expect("We should have checked this in NestedField::with_initial_default, it can't be converted to json value"));
585        let write_default = value.write_default.map(|x| x.try_into_json(&value.field_type).expect("We should have checked this in NestedField::with_write_default, it can't be converted to json value"));
586        SerdeNestedField {
587            id: value.id,
588            name: value.name,
589            required: value.required,
590            field_type: value.field_type,
591            doc: value.doc,
592            initial_default,
593            write_default,
594        }
595    }
596}
597
598/// Reference to nested field.
599pub type NestedFieldRef = Arc<NestedField>;
600
601impl NestedField {
602    /// Construct a new field.
603    pub fn new(id: i32, name: impl ToString, field_type: Type, required: bool) -> Self {
604        Self {
605            id,
606            name: name.to_string(),
607            required,
608            field_type: Box::new(field_type),
609            doc: None,
610            initial_default: None,
611            write_default: None,
612        }
613    }
614
615    /// Construct a required field.
616    pub fn required(id: i32, name: impl ToString, field_type: Type) -> Self {
617        Self::new(id, name, field_type, true)
618    }
619
620    /// Construct an optional field.
621    pub fn optional(id: i32, name: impl ToString, field_type: Type) -> Self {
622        Self::new(id, name, field_type, false)
623    }
624
625    /// Construct list type's element field.
626    pub fn list_element(id: i32, field_type: Type, required: bool) -> Self {
627        Self::new(id, LIST_FIELD_NAME, field_type, required)
628    }
629
630    /// Construct map type's key field.
631    pub fn map_key_element(id: i32, field_type: Type) -> Self {
632        Self::required(id, MAP_KEY_FIELD_NAME, field_type)
633    }
634
635    /// Construct map type's value field.
636    pub fn map_value_element(id: i32, field_type: Type, required: bool) -> Self {
637        Self::new(id, MAP_VALUE_FIELD_NAME, field_type, required)
638    }
639
640    /// Set the field's doc.
641    pub fn with_doc(mut self, doc: impl ToString) -> Self {
642        self.doc = Some(doc.to_string());
643        self
644    }
645
646    /// Set the field's initial default value.
647    pub fn with_initial_default(mut self, value: Literal) -> Self {
648        self.initial_default = Some(value);
649        self
650    }
651
652    /// Set the field's initial default value.
653    pub fn with_write_default(mut self, value: Literal) -> Self {
654        self.write_default = Some(value);
655        self
656    }
657
658    /// Set the id of the field.
659    pub(crate) fn with_id(mut self, id: i32) -> Self {
660        self.id = id;
661        self
662    }
663}
664
665impl fmt::Display for NestedField {
666    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
667        write!(f, "{}: ", self.id)?;
668        write!(f, "{}: ", self.name)?;
669        if self.required {
670            write!(f, "required ")?;
671        } else {
672            write!(f, "optional ")?;
673        }
674        write!(f, "{} ", self.field_type)?;
675        if let Some(doc) = &self.doc {
676            write!(f, "{}", doc)?;
677        }
678        Ok(())
679    }
680}
681
682#[derive(Debug, PartialEq, Eq, Clone)]
683/// A list is a collection of values with some element type. The element field has an integer id that is unique in the table schema.
684/// Elements can be either optional or required. Element types may be any type.
685pub struct ListType {
686    /// Element field of list type.
687    pub element_field: NestedFieldRef,
688}
689
690impl ListType {
691    /// Construct a list type with the given element field.
692    pub fn new(element_field: NestedFieldRef) -> Self {
693        Self { element_field }
694    }
695}
696
697/// Module for type serialization/deserialization.
698pub(super) mod _serde {
699    use std::borrow::Cow;
700
701    use serde_derive::{Deserialize, Serialize};
702
703    use crate::spec::datatypes::Type::Map;
704    use crate::spec::datatypes::{
705        ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type,
706    };
707
708    /// List type for serialization and deserialization
709    #[derive(Serialize, Deserialize)]
710    #[serde(untagged)]
711    pub(super) enum SerdeType<'a> {
712        #[serde(rename_all = "kebab-case")]
713        List {
714            r#type: String,
715            element_id: i32,
716            element_required: bool,
717            element: Cow<'a, Type>,
718        },
719        Struct {
720            r#type: String,
721            fields: Cow<'a, [NestedFieldRef]>,
722        },
723        #[serde(rename_all = "kebab-case")]
724        Map {
725            r#type: String,
726            key_id: i32,
727            key: Cow<'a, Type>,
728            value_id: i32,
729            value_required: bool,
730            value: Cow<'a, Type>,
731        },
732        Primitive(PrimitiveType),
733    }
734
735    impl From<SerdeType<'_>> for Type {
736        fn from(value: SerdeType) -> Self {
737            match value {
738                SerdeType::List {
739                    r#type: _,
740                    element_id,
741                    element_required,
742                    element,
743                } => Self::List(ListType {
744                    element_field: NestedField::list_element(
745                        element_id,
746                        element.into_owned(),
747                        element_required,
748                    )
749                    .into(),
750                }),
751                SerdeType::Map {
752                    r#type: _,
753                    key_id,
754                    key,
755                    value_id,
756                    value_required,
757                    value,
758                } => Map(MapType {
759                    key_field: NestedField::map_key_element(key_id, key.into_owned()).into(),
760                    value_field: NestedField::map_value_element(
761                        value_id,
762                        value.into_owned(),
763                        value_required,
764                    )
765                    .into(),
766                }),
767                SerdeType::Struct { r#type: _, fields } => {
768                    Self::Struct(StructType::new(fields.into_owned()))
769                }
770                SerdeType::Primitive(p) => Self::Primitive(p),
771            }
772        }
773    }
774
775    impl<'a> From<&'a Type> for SerdeType<'a> {
776        fn from(value: &'a Type) -> Self {
777            match value {
778                Type::List(list) => SerdeType::List {
779                    r#type: "list".to_string(),
780                    element_id: list.element_field.id,
781                    element_required: list.element_field.required,
782                    element: Cow::Borrowed(&list.element_field.field_type),
783                },
784                Type::Map(map) => SerdeType::Map {
785                    r#type: "map".to_string(),
786                    key_id: map.key_field.id,
787                    key: Cow::Borrowed(&map.key_field.field_type),
788                    value_id: map.value_field.id,
789                    value_required: map.value_field.required,
790                    value: Cow::Borrowed(&map.value_field.field_type),
791                },
792                Type::Struct(s) => SerdeType::Struct {
793                    r#type: "struct".to_string(),
794                    fields: Cow::Borrowed(&s.fields),
795                },
796                Type::Primitive(p) => SerdeType::Primitive(p.clone()),
797            }
798        }
799    }
800}
801
802#[derive(Debug, PartialEq, Eq, Clone)]
803/// A map is a collection of key-value pairs with a key type and a value type.
804/// Both the key field and value field each have an integer id that is unique in the table schema.
805/// Map keys are required and map values can be either optional or required.
806/// Both map keys and map values may be any type, including nested types.
807pub struct MapType {
808    /// Field for key.
809    pub key_field: NestedFieldRef,
810    /// Field for value.
811    pub value_field: NestedFieldRef,
812}
813
814impl MapType {
815    /// Construct a map type with the given key and value fields.
816    pub fn new(key_field: NestedFieldRef, value_field: NestedFieldRef) -> Self {
817        Self {
818            key_field,
819            value_field,
820        }
821    }
822}
823
824#[cfg(test)]
825mod tests {
826    use pretty_assertions::assert_eq;
827    use uuid::Uuid;
828
829    use super::*;
830    use crate::spec::values::PrimitiveLiteral;
831
832    fn check_type_serde(json: &str, expected_type: Type) {
833        let desered_type: Type = serde_json::from_str(json).unwrap();
834        assert_eq!(desered_type, expected_type);
835
836        let sered_json = serde_json::to_string(&expected_type).unwrap();
837        let parsed_json_value = serde_json::from_str::<serde_json::Value>(&sered_json).unwrap();
838        let raw_json_value = serde_json::from_str::<serde_json::Value>(json).unwrap();
839
840        assert_eq!(parsed_json_value, raw_json_value);
841    }
842
843    #[test]
844    fn primitive_type_serde() {
845        let record = r#"
846    {
847        "type": "struct",
848        "fields": [
849            {"id": 1, "name": "bool_field", "required": true, "type": "boolean"},
850            {"id": 2, "name": "int_field", "required": true, "type": "int"},
851            {"id": 3, "name": "long_field", "required": true, "type": "long"},
852            {"id": 4, "name": "float_field", "required": true, "type": "float"},
853            {"id": 5, "name": "double_field", "required": true, "type": "double"},
854            {"id": 6, "name": "decimal_field", "required": true, "type": "decimal(9,2)"},
855            {"id": 7, "name": "date_field", "required": true, "type": "date"},
856            {"id": 8, "name": "time_field", "required": true, "type": "time"},
857            {"id": 9, "name": "timestamp_field", "required": true, "type": "timestamp"},
858            {"id": 10, "name": "timestamptz_field", "required": true, "type": "timestamptz"},
859            {"id": 11, "name": "timestamp_ns_field", "required": true, "type": "timestamp_ns"},
860            {"id": 12, "name": "timestamptz_ns_field", "required": true, "type": "timestamptz_ns"},
861            {"id": 13, "name": "uuid_field", "required": true, "type": "uuid"},
862            {"id": 14, "name": "fixed_field", "required": true, "type": "fixed[10]"},
863            {"id": 15, "name": "binary_field", "required": true, "type": "binary"},
864            {"id": 16, "name": "string_field", "required": true, "type": "string"}
865        ]
866    }
867    "#;
868
869        check_type_serde(
870            record,
871            Type::Struct(StructType {
872                fields: vec![
873                    NestedField::required(1, "bool_field", Type::Primitive(PrimitiveType::Boolean))
874                        .into(),
875                    NestedField::required(2, "int_field", Type::Primitive(PrimitiveType::Int))
876                        .into(),
877                    NestedField::required(3, "long_field", Type::Primitive(PrimitiveType::Long))
878                        .into(),
879                    NestedField::required(4, "float_field", Type::Primitive(PrimitiveType::Float))
880                        .into(),
881                    NestedField::required(
882                        5,
883                        "double_field",
884                        Type::Primitive(PrimitiveType::Double),
885                    )
886                    .into(),
887                    NestedField::required(
888                        6,
889                        "decimal_field",
890                        Type::Primitive(PrimitiveType::Decimal {
891                            precision: 9,
892                            scale: 2,
893                        }),
894                    )
895                    .into(),
896                    NestedField::required(7, "date_field", Type::Primitive(PrimitiveType::Date))
897                        .into(),
898                    NestedField::required(8, "time_field", Type::Primitive(PrimitiveType::Time))
899                        .into(),
900                    NestedField::required(
901                        9,
902                        "timestamp_field",
903                        Type::Primitive(PrimitiveType::Timestamp),
904                    )
905                    .into(),
906                    NestedField::required(
907                        10,
908                        "timestamptz_field",
909                        Type::Primitive(PrimitiveType::Timestamptz),
910                    )
911                    .into(),
912                    NestedField::required(
913                        11,
914                        "timestamp_ns_field",
915                        Type::Primitive(PrimitiveType::TimestampNs),
916                    )
917                    .into(),
918                    NestedField::required(
919                        12,
920                        "timestamptz_ns_field",
921                        Type::Primitive(PrimitiveType::TimestamptzNs),
922                    )
923                    .into(),
924                    NestedField::required(13, "uuid_field", Type::Primitive(PrimitiveType::Uuid))
925                        .into(),
926                    NestedField::required(
927                        14,
928                        "fixed_field",
929                        Type::Primitive(PrimitiveType::Fixed(10)),
930                    )
931                    .into(),
932                    NestedField::required(
933                        15,
934                        "binary_field",
935                        Type::Primitive(PrimitiveType::Binary),
936                    )
937                    .into(),
938                    NestedField::required(
939                        16,
940                        "string_field",
941                        Type::Primitive(PrimitiveType::String),
942                    )
943                    .into(),
944                ],
945                id_lookup: OnceLock::default(),
946                name_lookup: OnceLock::default(),
947            }),
948        )
949    }
950
951    #[test]
952    fn struct_type() {
953        let record = r#"
954        {
955            "type": "struct",
956            "fields": [
957                {
958                    "id": 1,
959                    "name": "id",
960                    "required": true,
961                    "type": "uuid",
962                    "initial-default": "0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb",
963                    "write-default": "ec5911be-b0a7-458c-8438-c9a3e53cffae"
964                }, {
965                    "id": 2,
966                    "name": "data",
967                    "required": false,
968                    "type": "int"
969                }
970            ]
971        }
972        "#;
973
974        check_type_serde(
975            record,
976            Type::Struct(StructType {
977                fields: vec![
978                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid))
979                        .with_initial_default(Literal::Primitive(PrimitiveLiteral::UInt128(
980                            Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb")
981                                .unwrap()
982                                .as_u128(),
983                        )))
984                        .with_write_default(Literal::Primitive(PrimitiveLiteral::UInt128(
985                            Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae")
986                                .unwrap()
987                                .as_u128(),
988                        )))
989                        .into(),
990                    NestedField::optional(2, "data", Type::Primitive(PrimitiveType::Int)).into(),
991                ],
992                id_lookup: HashMap::from([(1, 0), (2, 1)]).into(),
993                name_lookup: HashMap::from([("id".to_string(), 0), ("data".to_string(), 1)]).into(),
994            }),
995        )
996    }
997
998    #[test]
999    fn test_deeply_nested_struct() {
1000        let record = r#"
1001{
1002  "type": "struct",
1003  "fields": [
1004    {
1005      "id": 1,
1006      "name": "id",
1007      "required": true,
1008      "type": "uuid",
1009      "initial-default": "0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb",
1010      "write-default": "ec5911be-b0a7-458c-8438-c9a3e53cffae"
1011    },
1012    {
1013      "id": 2,
1014      "name": "data",
1015      "required": false,
1016      "type": "int"
1017    },
1018    {
1019      "id": 3,
1020      "name": "address",
1021      "required": true,
1022      "type": {
1023        "type": "struct",
1024        "fields": [
1025          {
1026            "id": 4,
1027            "name": "street",
1028            "required": true,
1029            "type": "string"
1030          },
1031          {
1032            "id": 5,
1033            "name": "province",
1034            "required": false,
1035            "type": "string"
1036          },
1037          {
1038            "id": 6,
1039            "name": "zip",
1040            "required": true,
1041            "type": "int"
1042          }
1043        ]
1044      }
1045    }
1046  ]
1047}
1048"#;
1049
1050        let struct_type = Type::Struct(StructType::new(vec![
1051            NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid))
1052                .with_initial_default(Literal::Primitive(PrimitiveLiteral::UInt128(
1053                    Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb")
1054                        .unwrap()
1055                        .as_u128(),
1056                )))
1057                .with_write_default(Literal::Primitive(PrimitiveLiteral::UInt128(
1058                    Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae")
1059                        .unwrap()
1060                        .as_u128(),
1061                )))
1062                .into(),
1063            NestedField::optional(2, "data", Type::Primitive(PrimitiveType::Int)).into(),
1064            NestedField::required(
1065                3,
1066                "address",
1067                Type::Struct(StructType::new(vec![
1068                    NestedField::required(4, "street", Type::Primitive(PrimitiveType::String))
1069                        .into(),
1070                    NestedField::optional(5, "province", Type::Primitive(PrimitiveType::String))
1071                        .into(),
1072                    NestedField::required(6, "zip", Type::Primitive(PrimitiveType::Int)).into(),
1073                ])),
1074            )
1075            .into(),
1076        ]));
1077
1078        check_type_serde(record, struct_type)
1079    }
1080
1081    #[test]
1082    fn list() {
1083        let record = r#"
1084        {
1085            "type": "list",
1086            "element-id": 3,
1087            "element-required": true,
1088            "element": "string"
1089        }
1090        "#;
1091
1092        check_type_serde(
1093            record,
1094            Type::List(ListType {
1095                element_field: NestedField::list_element(
1096                    3,
1097                    Type::Primitive(PrimitiveType::String),
1098                    true,
1099                )
1100                .into(),
1101            }),
1102        );
1103    }
1104
1105    #[test]
1106    fn map() {
1107        let record = r#"
1108        {
1109            "type": "map",
1110            "key-id": 4,
1111            "key": "string",
1112            "value-id": 5,
1113            "value-required": false,
1114            "value": "double"
1115        }
1116        "#;
1117
1118        check_type_serde(
1119            record,
1120            Type::Map(MapType {
1121                key_field: NestedField::map_key_element(4, Type::Primitive(PrimitiveType::String))
1122                    .into(),
1123                value_field: NestedField::map_value_element(
1124                    5,
1125                    Type::Primitive(PrimitiveType::Double),
1126                    false,
1127                )
1128                .into(),
1129            }),
1130        );
1131    }
1132
1133    #[test]
1134    fn map_int() {
1135        let record = r#"
1136        {
1137            "type": "map",
1138            "key-id": 4,
1139            "key": "int",
1140            "value-id": 5,
1141            "value-required": false,
1142            "value": "string"
1143        }
1144        "#;
1145
1146        check_type_serde(
1147            record,
1148            Type::Map(MapType {
1149                key_field: NestedField::map_key_element(4, Type::Primitive(PrimitiveType::Int))
1150                    .into(),
1151                value_field: NestedField::map_value_element(
1152                    5,
1153                    Type::Primitive(PrimitiveType::String),
1154                    false,
1155                )
1156                .into(),
1157            }),
1158        );
1159    }
1160
1161    #[test]
1162    fn test_decimal_precision() {
1163        let expected_max_precision = [
1164            2, 4, 6, 9, 11, 14, 16, 18, 21, 23, 26, 28, 31, 33, 35, 38, 40, 43, 45, 47, 50, 52, 55,
1165            57,
1166        ];
1167        for (i, max_precision) in expected_max_precision.iter().enumerate() {
1168            assert_eq!(
1169                *max_precision,
1170                Type::decimal_max_precision(i as u32 + 1).unwrap(),
1171                "Failed calculate max precision for {i}"
1172            );
1173        }
1174
1175        assert_eq!(5, Type::decimal_required_bytes(10).unwrap());
1176        assert_eq!(16, Type::decimal_required_bytes(38).unwrap());
1177    }
1178
1179    #[test]
1180    fn test_primitive_type_compatible() {
1181        let pairs = vec![
1182            (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(true)),
1183            (PrimitiveType::Int, PrimitiveLiteral::Int(1)),
1184            (PrimitiveType::Long, PrimitiveLiteral::Long(1)),
1185            (PrimitiveType::Float, PrimitiveLiteral::Float(1.0.into())),
1186            (PrimitiveType::Double, PrimitiveLiteral::Double(1.0.into())),
1187            (
1188                PrimitiveType::Decimal {
1189                    precision: 9,
1190                    scale: 2,
1191                },
1192                PrimitiveLiteral::Int128(1),
1193            ),
1194            (PrimitiveType::Date, PrimitiveLiteral::Int(1)),
1195            (PrimitiveType::Time, PrimitiveLiteral::Long(1)),
1196            (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(1)),
1197            (PrimitiveType::Timestamp, PrimitiveLiteral::Long(1)),
1198            (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(1)),
1199            (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(1)),
1200            (
1201                PrimitiveType::Uuid,
1202                PrimitiveLiteral::UInt128(Uuid::new_v4().as_u128()),
1203            ),
1204            (PrimitiveType::Fixed(8), PrimitiveLiteral::Binary(vec![1])),
1205            (PrimitiveType::Binary, PrimitiveLiteral::Binary(vec![1])),
1206        ];
1207        for (ty, literal) in pairs {
1208            assert!(ty.compatible(&literal));
1209        }
1210    }
1211}