iceberg/spec/
datatypes.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/*!
19 * Data Types
20 */
21use std::collections::HashMap;
22use std::convert::identity;
23use std::fmt;
24use std::ops::Index;
25use std::sync::{Arc, OnceLock};
26
27use ::serde::de::{MapAccess, Visitor};
28use serde::de::{Error, IntoDeserializer};
29use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
30use serde_json::Value as JsonValue;
31
32use super::values::Literal;
33use crate::ensure_data_valid;
34use crate::error::Result;
35use crate::spec::PrimitiveLiteral;
36use crate::spec::datatypes::_decimal::{MAX_PRECISION, REQUIRED_LENGTH};
37
38/// Field name for list type.
39pub const LIST_FIELD_NAME: &str = "element";
40/// Field name for map type's key.
41pub const MAP_KEY_FIELD_NAME: &str = "key";
42/// Field name for map type's value.
43pub const MAP_VALUE_FIELD_NAME: &str = "value";
44
45pub(crate) const MAX_DECIMAL_BYTES: u32 = 24;
46pub(crate) const MAX_DECIMAL_PRECISION: u32 = 38;
47
48mod _decimal {
49    use once_cell::sync::Lazy;
50
51    use crate::spec::{MAX_DECIMAL_BYTES, MAX_DECIMAL_PRECISION};
52
53    // Max precision of bytes, starts from 1
54    pub(super) static MAX_PRECISION: Lazy<[u32; MAX_DECIMAL_BYTES as usize]> = Lazy::new(|| {
55        let mut ret: [u32; 24] = [0; 24];
56        for (i, prec) in ret.iter_mut().enumerate() {
57            *prec = 2f64.powi((8 * (i + 1) - 1) as i32).log10().floor() as u32;
58        }
59
60        ret
61    });
62
63    //  Required bytes of precision, starts from 1
64    pub(super) static REQUIRED_LENGTH: Lazy<[u32; MAX_DECIMAL_PRECISION as usize]> =
65        Lazy::new(|| {
66            let mut ret: [u32; MAX_DECIMAL_PRECISION as usize] =
67                [0; MAX_DECIMAL_PRECISION as usize];
68
69            for (i, required_len) in ret.iter_mut().enumerate() {
70                for j in 0..MAX_PRECISION.len() {
71                    if MAX_PRECISION[j] >= ((i + 1) as u32) {
72                        *required_len = (j + 1) as u32;
73                        break;
74                    }
75                }
76            }
77
78            ret
79        });
80}
81
82#[derive(Debug, PartialEq, Eq, Clone)]
83/// All data types are either primitives or nested types, which are maps, lists, or structs.
84pub enum Type {
85    /// Primitive types
86    Primitive(PrimitiveType),
87    /// Struct type
88    Struct(StructType),
89    /// List type.
90    List(ListType),
91    /// Map type
92    Map(MapType),
93}
94
95impl fmt::Display for Type {
96    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
97        match self {
98            Type::Primitive(primitive) => write!(f, "{primitive}"),
99            Type::Struct(s) => write!(f, "{s}"),
100            Type::List(_) => write!(f, "list"),
101            Type::Map(_) => write!(f, "map"),
102        }
103    }
104}
105
106impl Type {
107    /// Whether the type is primitive type.
108    #[inline(always)]
109    pub fn is_primitive(&self) -> bool {
110        matches!(self, Type::Primitive(_))
111    }
112
113    /// Whether the type is struct type.
114    #[inline(always)]
115    pub fn is_struct(&self) -> bool {
116        matches!(self, Type::Struct(_))
117    }
118
119    /// Whether the type is nested type.
120    #[inline(always)]
121    pub fn is_nested(&self) -> bool {
122        matches!(self, Type::Struct(_) | Type::List(_) | Type::Map(_))
123    }
124
125    /// Convert Type to reference of PrimitiveType
126    pub fn as_primitive_type(&self) -> Option<&PrimitiveType> {
127        if let Type::Primitive(primitive_type) = self {
128            Some(primitive_type)
129        } else {
130            None
131        }
132    }
133
134    /// Convert Type to StructType
135    pub fn to_struct_type(self) -> Option<StructType> {
136        if let Type::Struct(struct_type) = self {
137            Some(struct_type)
138        } else {
139            None
140        }
141    }
142
143    /// Return max precision for decimal given [`num_bytes`] bytes.
144    #[inline(always)]
145    pub fn decimal_max_precision(num_bytes: u32) -> Result<u32> {
146        ensure_data_valid!(
147            num_bytes > 0 && num_bytes <= MAX_DECIMAL_BYTES,
148            "Decimal length larger than {MAX_DECIMAL_BYTES} is not supported: {num_bytes}",
149        );
150        Ok(MAX_PRECISION[num_bytes as usize - 1])
151    }
152
153    /// Returns minimum bytes required for decimal with [`precision`].
154    #[inline(always)]
155    pub fn decimal_required_bytes(precision: u32) -> Result<u32> {
156        ensure_data_valid!(
157            precision > 0 && precision <= MAX_DECIMAL_PRECISION,
158            "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}",
159        );
160        Ok(REQUIRED_LENGTH[precision as usize - 1])
161    }
162
163    /// Creates  decimal type.
164    #[inline(always)]
165    pub fn decimal(precision: u32, scale: u32) -> Result<Self> {
166        ensure_data_valid!(
167            precision > 0 && precision <= MAX_DECIMAL_PRECISION,
168            "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}",
169        );
170        Ok(Type::Primitive(PrimitiveType::Decimal { precision, scale }))
171    }
172
173    /// Check if it's float or double type.
174    #[inline(always)]
175    pub fn is_floating_type(&self) -> bool {
176        matches!(
177            self,
178            Type::Primitive(PrimitiveType::Float) | Type::Primitive(PrimitiveType::Double)
179        )
180    }
181}
182
183impl From<PrimitiveType> for Type {
184    fn from(value: PrimitiveType) -> Self {
185        Self::Primitive(value)
186    }
187}
188
189impl From<StructType> for Type {
190    fn from(value: StructType) -> Self {
191        Type::Struct(value)
192    }
193}
194
195impl From<ListType> for Type {
196    fn from(value: ListType) -> Self {
197        Type::List(value)
198    }
199}
200
201impl From<MapType> for Type {
202    fn from(value: MapType) -> Self {
203        Type::Map(value)
204    }
205}
206
207/// Primitive data types
208#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Hash)]
209#[serde(rename_all = "lowercase", remote = "Self")]
210pub enum PrimitiveType {
211    /// True or False
212    Boolean,
213    /// 32-bit signed integer
214    Int,
215    /// 64-bit signed integer
216    Long,
217    /// 32-bit IEEE 754 floating point.
218    Float,
219    /// 64-bit IEEE 754 floating point.
220    Double,
221    /// Fixed point decimal
222    Decimal {
223        /// Precision, must be 38 or less
224        precision: u32,
225        /// Scale
226        scale: u32,
227    },
228    /// Calendar date without timezone or time.
229    Date,
230    /// Time of day in microsecond precision, without date or timezone.
231    Time,
232    /// Timestamp in microsecond precision, without timezone
233    Timestamp,
234    /// Timestamp in microsecond precision, with timezone
235    Timestamptz,
236    /// Timestamp in nanosecond precision, without timezone
237    #[serde(rename = "timestamp_ns")]
238    TimestampNs,
239    /// Timestamp in nanosecond precision with timezone
240    #[serde(rename = "timestamptz_ns")]
241    TimestamptzNs,
242    /// Arbitrary-length character sequences encoded in utf-8
243    String,
244    /// Universally Unique Identifiers, should use 16-byte fixed
245    Uuid,
246    /// Fixed length byte array
247    Fixed(u64),
248    /// Arbitrary-length byte array.
249    Binary,
250}
251
252impl PrimitiveType {
253    /// Check whether literal is compatible with the type.
254    pub fn compatible(&self, literal: &PrimitiveLiteral) -> bool {
255        matches!(
256            (self, literal),
257            (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(_))
258                | (PrimitiveType::Int, PrimitiveLiteral::Int(_))
259                | (PrimitiveType::Long, PrimitiveLiteral::Long(_))
260                | (PrimitiveType::Float, PrimitiveLiteral::Float(_))
261                | (PrimitiveType::Double, PrimitiveLiteral::Double(_))
262                | (PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(_))
263                | (PrimitiveType::Date, PrimitiveLiteral::Int(_))
264                | (PrimitiveType::Time, PrimitiveLiteral::Long(_))
265                | (PrimitiveType::Timestamp, PrimitiveLiteral::Long(_))
266                | (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(_))
267                | (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(_))
268                | (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(_))
269                | (PrimitiveType::String, PrimitiveLiteral::String(_))
270                | (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(_))
271                | (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(_))
272                | (PrimitiveType::Binary, PrimitiveLiteral::Binary(_))
273        )
274    }
275}
276
277impl Serialize for Type {
278    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
279    where S: Serializer {
280        let type_serde = _serde::SerdeType::from(self);
281        type_serde.serialize(serializer)
282    }
283}
284
285impl<'de> Deserialize<'de> for Type {
286    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
287    where D: Deserializer<'de> {
288        let type_serde = _serde::SerdeType::deserialize(deserializer)?;
289        Ok(Type::from(type_serde))
290    }
291}
292
293impl<'de> Deserialize<'de> for PrimitiveType {
294    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
295    where D: Deserializer<'de> {
296        let s = String::deserialize(deserializer)?;
297        if s.starts_with("decimal") {
298            deserialize_decimal(s.into_deserializer())
299        } else if s.starts_with("fixed") {
300            deserialize_fixed(s.into_deserializer())
301        } else {
302            PrimitiveType::deserialize(s.into_deserializer())
303        }
304    }
305}
306
307impl Serialize for PrimitiveType {
308    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
309    where S: Serializer {
310        match self {
311            PrimitiveType::Decimal { precision, scale } => {
312                serialize_decimal(precision, scale, serializer)
313            }
314            PrimitiveType::Fixed(l) => serialize_fixed(l, serializer),
315            _ => PrimitiveType::serialize(self, serializer),
316        }
317    }
318}
319
320fn deserialize_decimal<'de, D>(deserializer: D) -> std::result::Result<PrimitiveType, D::Error>
321where D: Deserializer<'de> {
322    let s = String::deserialize(deserializer)?;
323    let (precision, scale) = s
324        .trim_start_matches(r"decimal(")
325        .trim_end_matches(')')
326        .split_once(',')
327        .ok_or_else(|| D::Error::custom("Decimal requires precision and scale: {s}"))?;
328
329    Ok(PrimitiveType::Decimal {
330        precision: precision.trim().parse().map_err(D::Error::custom)?,
331        scale: scale.trim().parse().map_err(D::Error::custom)?,
332    })
333}
334
335fn serialize_decimal<S>(
336    precision: &u32,
337    scale: &u32,
338    serializer: S,
339) -> std::result::Result<S::Ok, S::Error>
340where
341    S: Serializer,
342{
343    serializer.serialize_str(&format!("decimal({precision},{scale})"))
344}
345
346fn deserialize_fixed<'de, D>(deserializer: D) -> std::result::Result<PrimitiveType, D::Error>
347where D: Deserializer<'de> {
348    let fixed = String::deserialize(deserializer)?
349        .trim_start_matches(r"fixed[")
350        .trim_end_matches(']')
351        .to_owned();
352
353    fixed
354        .parse()
355        .map(PrimitiveType::Fixed)
356        .map_err(D::Error::custom)
357}
358
359fn serialize_fixed<S>(value: &u64, serializer: S) -> std::result::Result<S::Ok, S::Error>
360where S: Serializer {
361    serializer.serialize_str(&format!("fixed[{value}]"))
362}
363
364impl fmt::Display for PrimitiveType {
365    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
366        match self {
367            PrimitiveType::Boolean => write!(f, "boolean"),
368            PrimitiveType::Int => write!(f, "int"),
369            PrimitiveType::Long => write!(f, "long"),
370            PrimitiveType::Float => write!(f, "float"),
371            PrimitiveType::Double => write!(f, "double"),
372            PrimitiveType::Decimal { precision, scale } => {
373                write!(f, "decimal({precision},{scale})")
374            }
375            PrimitiveType::Date => write!(f, "date"),
376            PrimitiveType::Time => write!(f, "time"),
377            PrimitiveType::Timestamp => write!(f, "timestamp"),
378            PrimitiveType::Timestamptz => write!(f, "timestamptz"),
379            PrimitiveType::TimestampNs => write!(f, "timestamp_ns"),
380            PrimitiveType::TimestamptzNs => write!(f, "timestamptz_ns"),
381            PrimitiveType::String => write!(f, "string"),
382            PrimitiveType::Uuid => write!(f, "uuid"),
383            PrimitiveType::Fixed(size) => write!(f, "fixed({size})"),
384            PrimitiveType::Binary => write!(f, "binary"),
385        }
386    }
387}
388
389/// DataType for a specific struct
390#[derive(Debug, Serialize, Clone, Default)]
391#[serde(rename = "struct", tag = "type")]
392pub struct StructType {
393    /// Struct fields
394    fields: Vec<NestedFieldRef>,
395    /// Lookup for index by field id
396    #[serde(skip_serializing)]
397    id_lookup: OnceLock<HashMap<i32, usize>>,
398    #[serde(skip_serializing)]
399    name_lookup: OnceLock<HashMap<String, usize>>,
400}
401
402impl<'de> Deserialize<'de> for StructType {
403    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
404    where D: Deserializer<'de> {
405        #[derive(Deserialize)]
406        #[serde(field_identifier, rename_all = "lowercase")]
407        enum Field {
408            Type,
409            Fields,
410        }
411
412        struct StructTypeVisitor;
413
414        impl<'de> Visitor<'de> for StructTypeVisitor {
415            type Value = StructType;
416
417            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
418                formatter.write_str("struct")
419            }
420
421            fn visit_map<V>(self, mut map: V) -> std::result::Result<StructType, V::Error>
422            where V: MapAccess<'de> {
423                let mut fields = None;
424                while let Some(key) = map.next_key()? {
425                    match key {
426                        Field::Type => {
427                            let type_val: String = map.next_value()?;
428                            if type_val != "struct" {
429                                return Err(serde::de::Error::custom(format!(
430                                    "expected type 'struct', got '{type_val}'"
431                                )));
432                            }
433                        }
434                        Field::Fields => {
435                            if fields.is_some() {
436                                return Err(serde::de::Error::duplicate_field("fields"));
437                            }
438                            fields = Some(map.next_value()?);
439                        }
440                    }
441                }
442                let fields: Vec<NestedFieldRef> =
443                    fields.ok_or_else(|| de::Error::missing_field("fields"))?;
444
445                Ok(StructType::new(fields))
446            }
447        }
448
449        const FIELDS: &[&str] = &["type", "fields"];
450        deserializer.deserialize_struct("struct", FIELDS, StructTypeVisitor)
451    }
452}
453
454impl StructType {
455    /// Creates a struct type with the given fields.
456    pub fn new(fields: Vec<NestedFieldRef>) -> Self {
457        Self {
458            fields,
459            id_lookup: OnceLock::new(),
460            name_lookup: OnceLock::new(),
461        }
462    }
463
464    /// Get struct field with certain id
465    pub fn field_by_id(&self, id: i32) -> Option<&NestedFieldRef> {
466        self.field_id_to_index(id).map(|idx| &self.fields[idx])
467    }
468
469    fn field_id_to_index(&self, field_id: i32) -> Option<usize> {
470        self.id_lookup
471            .get_or_init(|| {
472                HashMap::from_iter(self.fields.iter().enumerate().map(|(i, x)| (x.id, i)))
473            })
474            .get(&field_id)
475            .copied()
476    }
477
478    /// Get struct field with certain field name
479    pub fn field_by_name(&self, name: &str) -> Option<&NestedFieldRef> {
480        self.field_name_to_index(name).map(|idx| &self.fields[idx])
481    }
482
483    fn field_name_to_index(&self, name: &str) -> Option<usize> {
484        self.name_lookup
485            .get_or_init(|| {
486                HashMap::from_iter(
487                    self.fields
488                        .iter()
489                        .enumerate()
490                        .map(|(i, x)| (x.name.clone(), i)),
491                )
492            })
493            .get(name)
494            .copied()
495    }
496
497    /// Get fields.
498    pub fn fields(&self) -> &[NestedFieldRef] {
499        &self.fields
500    }
501}
502
503impl PartialEq for StructType {
504    fn eq(&self, other: &Self) -> bool {
505        self.fields == other.fields
506    }
507}
508
509impl Eq for StructType {}
510
511impl Index<usize> for StructType {
512    type Output = NestedField;
513
514    fn index(&self, index: usize) -> &Self::Output {
515        &self.fields[index]
516    }
517}
518
519impl fmt::Display for StructType {
520    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
521        write!(f, "struct<")?;
522        for field in &self.fields {
523            write!(f, "{}", field.field_type)?;
524        }
525        write!(f, ">")
526    }
527}
528
529#[derive(Debug, PartialEq, Serialize, Deserialize, Eq, Clone)]
530#[serde(from = "SerdeNestedField", into = "SerdeNestedField")]
531/// A struct is a tuple of typed values. Each field in the tuple is named and has an integer id that is unique in the table schema.
532/// Each field can be either optional or required, meaning that values can (or cannot) be null. Fields may be any type.
533/// Fields may have an optional comment or doc string. Fields can have default values.
534pub struct NestedField {
535    /// Id unique in table schema
536    pub id: i32,
537    /// Field Name
538    pub name: String,
539    /// Optional or required
540    pub required: bool,
541    /// Datatype
542    pub field_type: Box<Type>,
543    /// Fields may have an optional comment or doc string.
544    pub doc: Option<String>,
545    /// Used to populate the field’s value for all records that were written before the field was added to the schema
546    pub initial_default: Option<Literal>,
547    /// Used to populate the field’s value for any records written after the field was added to the schema, if the writer does not supply the field’s value
548    pub write_default: Option<Literal>,
549}
550
551#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
552#[serde(rename_all = "kebab-case")]
553struct SerdeNestedField {
554    pub id: i32,
555    pub name: String,
556    pub required: bool,
557    #[serde(rename = "type")]
558    pub field_type: Box<Type>,
559    #[serde(skip_serializing_if = "Option::is_none")]
560    pub doc: Option<String>,
561    #[serde(skip_serializing_if = "Option::is_none")]
562    pub initial_default: Option<JsonValue>,
563    #[serde(skip_serializing_if = "Option::is_none")]
564    pub write_default: Option<JsonValue>,
565}
566
567impl From<SerdeNestedField> for NestedField {
568    fn from(value: SerdeNestedField) -> Self {
569        NestedField {
570            id: value.id,
571            name: value.name,
572            required: value.required,
573            initial_default: value.initial_default.and_then(|x| {
574                Literal::try_from_json(x, &value.field_type)
575                    .ok()
576                    .and_then(identity)
577            }),
578            write_default: value.write_default.and_then(|x| {
579                Literal::try_from_json(x, &value.field_type)
580                    .ok()
581                    .and_then(identity)
582            }),
583            field_type: value.field_type,
584            doc: value.doc,
585        }
586    }
587}
588
589impl From<NestedField> for SerdeNestedField {
590    fn from(value: NestedField) -> Self {
591        let initial_default = value.initial_default.map(|x| x.try_into_json(&value.field_type).expect("We should have checked this in NestedField::with_initial_default, it can't be converted to json value"));
592        let write_default = value.write_default.map(|x| x.try_into_json(&value.field_type).expect("We should have checked this in NestedField::with_write_default, it can't be converted to json value"));
593        SerdeNestedField {
594            id: value.id,
595            name: value.name,
596            required: value.required,
597            field_type: value.field_type,
598            doc: value.doc,
599            initial_default,
600            write_default,
601        }
602    }
603}
604
605/// Reference to nested field.
606pub type NestedFieldRef = Arc<NestedField>;
607
608impl NestedField {
609    /// Construct a new field.
610    pub fn new(id: i32, name: impl ToString, field_type: Type, required: bool) -> Self {
611        Self {
612            id,
613            name: name.to_string(),
614            required,
615            field_type: Box::new(field_type),
616            doc: None,
617            initial_default: None,
618            write_default: None,
619        }
620    }
621
622    /// Construct a required field.
623    pub fn required(id: i32, name: impl ToString, field_type: Type) -> Self {
624        Self::new(id, name, field_type, true)
625    }
626
627    /// Construct an optional field.
628    pub fn optional(id: i32, name: impl ToString, field_type: Type) -> Self {
629        Self::new(id, name, field_type, false)
630    }
631
632    /// Construct list type's element field.
633    pub fn list_element(id: i32, field_type: Type, required: bool) -> Self {
634        Self::new(id, LIST_FIELD_NAME, field_type, required)
635    }
636
637    /// Construct map type's key field.
638    pub fn map_key_element(id: i32, field_type: Type) -> Self {
639        Self::required(id, MAP_KEY_FIELD_NAME, field_type)
640    }
641
642    /// Construct map type's value field.
643    pub fn map_value_element(id: i32, field_type: Type, required: bool) -> Self {
644        Self::new(id, MAP_VALUE_FIELD_NAME, field_type, required)
645    }
646
647    /// Set the field's doc.
648    pub fn with_doc(mut self, doc: impl ToString) -> Self {
649        self.doc = Some(doc.to_string());
650        self
651    }
652
653    /// Set the field's initial default value.
654    pub fn with_initial_default(mut self, value: Literal) -> Self {
655        self.initial_default = Some(value);
656        self
657    }
658
659    /// Set the field's initial default value.
660    pub fn with_write_default(mut self, value: Literal) -> Self {
661        self.write_default = Some(value);
662        self
663    }
664
665    /// Set the id of the field.
666    pub(crate) fn with_id(mut self, id: i32) -> Self {
667        self.id = id;
668        self
669    }
670}
671
672impl fmt::Display for NestedField {
673    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
674        write!(f, "{}: ", self.id)?;
675        write!(f, "{}: ", self.name)?;
676        if self.required {
677            write!(f, "required ")?;
678        } else {
679            write!(f, "optional ")?;
680        }
681        write!(f, "{} ", self.field_type)?;
682        if let Some(doc) = &self.doc {
683            write!(f, "{doc}")?;
684        }
685        Ok(())
686    }
687}
688
689#[derive(Debug, PartialEq, Eq, Clone)]
690/// A list is a collection of values with some element type. The element field has an integer id that is unique in the table schema.
691/// Elements can be either optional or required. Element types may be any type.
692pub struct ListType {
693    /// Element field of list type.
694    pub element_field: NestedFieldRef,
695}
696
697impl ListType {
698    /// Construct a list type with the given element field.
699    pub fn new(element_field: NestedFieldRef) -> Self {
700        Self { element_field }
701    }
702}
703
704/// Module for type serialization/deserialization.
705pub(super) mod _serde {
706    use std::borrow::Cow;
707
708    use serde_derive::{Deserialize, Serialize};
709
710    use crate::spec::datatypes::Type::Map;
711    use crate::spec::datatypes::{
712        ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type,
713    };
714
715    /// List type for serialization and deserialization
716    #[derive(Serialize, Deserialize)]
717    #[serde(untagged)]
718    pub(super) enum SerdeType<'a> {
719        #[serde(rename_all = "kebab-case")]
720        List {
721            r#type: String,
722            element_id: i32,
723            element_required: bool,
724            element: Cow<'a, Type>,
725        },
726        Struct {
727            r#type: String,
728            fields: Cow<'a, [NestedFieldRef]>,
729        },
730        #[serde(rename_all = "kebab-case")]
731        Map {
732            r#type: String,
733            key_id: i32,
734            key: Cow<'a, Type>,
735            value_id: i32,
736            value_required: bool,
737            value: Cow<'a, Type>,
738        },
739        Primitive(PrimitiveType),
740    }
741
742    impl From<SerdeType<'_>> for Type {
743        fn from(value: SerdeType) -> Self {
744            match value {
745                SerdeType::List {
746                    r#type: _,
747                    element_id,
748                    element_required,
749                    element,
750                } => Self::List(ListType {
751                    element_field: NestedField::list_element(
752                        element_id,
753                        element.into_owned(),
754                        element_required,
755                    )
756                    .into(),
757                }),
758                SerdeType::Map {
759                    r#type: _,
760                    key_id,
761                    key,
762                    value_id,
763                    value_required,
764                    value,
765                } => Map(MapType {
766                    key_field: NestedField::map_key_element(key_id, key.into_owned()).into(),
767                    value_field: NestedField::map_value_element(
768                        value_id,
769                        value.into_owned(),
770                        value_required,
771                    )
772                    .into(),
773                }),
774                SerdeType::Struct { r#type: _, fields } => {
775                    Self::Struct(StructType::new(fields.into_owned()))
776                }
777                SerdeType::Primitive(p) => Self::Primitive(p),
778            }
779        }
780    }
781
782    impl<'a> From<&'a Type> for SerdeType<'a> {
783        fn from(value: &'a Type) -> Self {
784            match value {
785                Type::List(list) => SerdeType::List {
786                    r#type: "list".to_string(),
787                    element_id: list.element_field.id,
788                    element_required: list.element_field.required,
789                    element: Cow::Borrowed(&list.element_field.field_type),
790                },
791                Type::Map(map) => SerdeType::Map {
792                    r#type: "map".to_string(),
793                    key_id: map.key_field.id,
794                    key: Cow::Borrowed(&map.key_field.field_type),
795                    value_id: map.value_field.id,
796                    value_required: map.value_field.required,
797                    value: Cow::Borrowed(&map.value_field.field_type),
798                },
799                Type::Struct(s) => SerdeType::Struct {
800                    r#type: "struct".to_string(),
801                    fields: Cow::Borrowed(&s.fields),
802                },
803                Type::Primitive(p) => SerdeType::Primitive(p.clone()),
804            }
805        }
806    }
807}
808
809#[derive(Debug, PartialEq, Eq, Clone)]
810/// A map is a collection of key-value pairs with a key type and a value type.
811/// Both the key field and value field each have an integer id that is unique in the table schema.
812/// Map keys are required and map values can be either optional or required.
813/// Both map keys and map values may be any type, including nested types.
814pub struct MapType {
815    /// Field for key.
816    pub key_field: NestedFieldRef,
817    /// Field for value.
818    pub value_field: NestedFieldRef,
819}
820
821impl MapType {
822    /// Construct a map type with the given key and value fields.
823    pub fn new(key_field: NestedFieldRef, value_field: NestedFieldRef) -> Self {
824        Self {
825            key_field,
826            value_field,
827        }
828    }
829}
830
831#[cfg(test)]
832mod tests {
833    use pretty_assertions::assert_eq;
834    use uuid::Uuid;
835
836    use super::*;
837    use crate::spec::values::PrimitiveLiteral;
838
839    fn check_type_serde(json: &str, expected_type: Type) {
840        let desered_type: Type = serde_json::from_str(json).unwrap();
841        assert_eq!(desered_type, expected_type);
842
843        let sered_json = serde_json::to_string(&expected_type).unwrap();
844        let parsed_json_value = serde_json::from_str::<serde_json::Value>(&sered_json).unwrap();
845        let raw_json_value = serde_json::from_str::<serde_json::Value>(json).unwrap();
846
847        assert_eq!(parsed_json_value, raw_json_value);
848    }
849
850    #[test]
851    fn primitive_type_serde() {
852        let record = r#"
853    {
854        "type": "struct",
855        "fields": [
856            {"id": 1, "name": "bool_field", "required": true, "type": "boolean"},
857            {"id": 2, "name": "int_field", "required": true, "type": "int"},
858            {"id": 3, "name": "long_field", "required": true, "type": "long"},
859            {"id": 4, "name": "float_field", "required": true, "type": "float"},
860            {"id": 5, "name": "double_field", "required": true, "type": "double"},
861            {"id": 6, "name": "decimal_field", "required": true, "type": "decimal(9,2)"},
862            {"id": 7, "name": "date_field", "required": true, "type": "date"},
863            {"id": 8, "name": "time_field", "required": true, "type": "time"},
864            {"id": 9, "name": "timestamp_field", "required": true, "type": "timestamp"},
865            {"id": 10, "name": "timestamptz_field", "required": true, "type": "timestamptz"},
866            {"id": 11, "name": "timestamp_ns_field", "required": true, "type": "timestamp_ns"},
867            {"id": 12, "name": "timestamptz_ns_field", "required": true, "type": "timestamptz_ns"},
868            {"id": 13, "name": "uuid_field", "required": true, "type": "uuid"},
869            {"id": 14, "name": "fixed_field", "required": true, "type": "fixed[10]"},
870            {"id": 15, "name": "binary_field", "required": true, "type": "binary"},
871            {"id": 16, "name": "string_field", "required": true, "type": "string"}
872        ]
873    }
874    "#;
875
876        check_type_serde(
877            record,
878            Type::Struct(StructType {
879                fields: vec![
880                    NestedField::required(1, "bool_field", Type::Primitive(PrimitiveType::Boolean))
881                        .into(),
882                    NestedField::required(2, "int_field", Type::Primitive(PrimitiveType::Int))
883                        .into(),
884                    NestedField::required(3, "long_field", Type::Primitive(PrimitiveType::Long))
885                        .into(),
886                    NestedField::required(4, "float_field", Type::Primitive(PrimitiveType::Float))
887                        .into(),
888                    NestedField::required(
889                        5,
890                        "double_field",
891                        Type::Primitive(PrimitiveType::Double),
892                    )
893                    .into(),
894                    NestedField::required(
895                        6,
896                        "decimal_field",
897                        Type::Primitive(PrimitiveType::Decimal {
898                            precision: 9,
899                            scale: 2,
900                        }),
901                    )
902                    .into(),
903                    NestedField::required(7, "date_field", Type::Primitive(PrimitiveType::Date))
904                        .into(),
905                    NestedField::required(8, "time_field", Type::Primitive(PrimitiveType::Time))
906                        .into(),
907                    NestedField::required(
908                        9,
909                        "timestamp_field",
910                        Type::Primitive(PrimitiveType::Timestamp),
911                    )
912                    .into(),
913                    NestedField::required(
914                        10,
915                        "timestamptz_field",
916                        Type::Primitive(PrimitiveType::Timestamptz),
917                    )
918                    .into(),
919                    NestedField::required(
920                        11,
921                        "timestamp_ns_field",
922                        Type::Primitive(PrimitiveType::TimestampNs),
923                    )
924                    .into(),
925                    NestedField::required(
926                        12,
927                        "timestamptz_ns_field",
928                        Type::Primitive(PrimitiveType::TimestamptzNs),
929                    )
930                    .into(),
931                    NestedField::required(13, "uuid_field", Type::Primitive(PrimitiveType::Uuid))
932                        .into(),
933                    NestedField::required(
934                        14,
935                        "fixed_field",
936                        Type::Primitive(PrimitiveType::Fixed(10)),
937                    )
938                    .into(),
939                    NestedField::required(
940                        15,
941                        "binary_field",
942                        Type::Primitive(PrimitiveType::Binary),
943                    )
944                    .into(),
945                    NestedField::required(
946                        16,
947                        "string_field",
948                        Type::Primitive(PrimitiveType::String),
949                    )
950                    .into(),
951                ],
952                id_lookup: OnceLock::default(),
953                name_lookup: OnceLock::default(),
954            }),
955        )
956    }
957
958    #[test]
959    fn struct_type() {
960        let record = r#"
961        {
962            "type": "struct",
963            "fields": [
964                {
965                    "id": 1,
966                    "name": "id",
967                    "required": true,
968                    "type": "uuid",
969                    "initial-default": "0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb",
970                    "write-default": "ec5911be-b0a7-458c-8438-c9a3e53cffae"
971                }, {
972                    "id": 2,
973                    "name": "data",
974                    "required": false,
975                    "type": "int"
976                }
977            ]
978        }
979        "#;
980
981        check_type_serde(
982            record,
983            Type::Struct(StructType {
984                fields: vec![
985                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid))
986                        .with_initial_default(Literal::Primitive(PrimitiveLiteral::UInt128(
987                            Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb")
988                                .unwrap()
989                                .as_u128(),
990                        )))
991                        .with_write_default(Literal::Primitive(PrimitiveLiteral::UInt128(
992                            Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae")
993                                .unwrap()
994                                .as_u128(),
995                        )))
996                        .into(),
997                    NestedField::optional(2, "data", Type::Primitive(PrimitiveType::Int)).into(),
998                ],
999                id_lookup: HashMap::from([(1, 0), (2, 1)]).into(),
1000                name_lookup: HashMap::from([("id".to_string(), 0), ("data".to_string(), 1)]).into(),
1001            }),
1002        )
1003    }
1004
1005    #[test]
1006    fn test_deeply_nested_struct() {
1007        let record = r#"
1008{
1009  "type": "struct",
1010  "fields": [
1011    {
1012      "id": 1,
1013      "name": "id",
1014      "required": true,
1015      "type": "uuid",
1016      "initial-default": "0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb",
1017      "write-default": "ec5911be-b0a7-458c-8438-c9a3e53cffae"
1018    },
1019    {
1020      "id": 2,
1021      "name": "data",
1022      "required": false,
1023      "type": "int"
1024    },
1025    {
1026      "id": 3,
1027      "name": "address",
1028      "required": true,
1029      "type": {
1030        "type": "struct",
1031        "fields": [
1032          {
1033            "id": 4,
1034            "name": "street",
1035            "required": true,
1036            "type": "string"
1037          },
1038          {
1039            "id": 5,
1040            "name": "province",
1041            "required": false,
1042            "type": "string"
1043          },
1044          {
1045            "id": 6,
1046            "name": "zip",
1047            "required": true,
1048            "type": "int"
1049          }
1050        ]
1051      }
1052    }
1053  ]
1054}
1055"#;
1056
1057        let struct_type = Type::Struct(StructType::new(vec![
1058            NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid))
1059                .with_initial_default(Literal::Primitive(PrimitiveLiteral::UInt128(
1060                    Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb")
1061                        .unwrap()
1062                        .as_u128(),
1063                )))
1064                .with_write_default(Literal::Primitive(PrimitiveLiteral::UInt128(
1065                    Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae")
1066                        .unwrap()
1067                        .as_u128(),
1068                )))
1069                .into(),
1070            NestedField::optional(2, "data", Type::Primitive(PrimitiveType::Int)).into(),
1071            NestedField::required(
1072                3,
1073                "address",
1074                Type::Struct(StructType::new(vec![
1075                    NestedField::required(4, "street", Type::Primitive(PrimitiveType::String))
1076                        .into(),
1077                    NestedField::optional(5, "province", Type::Primitive(PrimitiveType::String))
1078                        .into(),
1079                    NestedField::required(6, "zip", Type::Primitive(PrimitiveType::Int)).into(),
1080                ])),
1081            )
1082            .into(),
1083        ]));
1084
1085        check_type_serde(record, struct_type)
1086    }
1087
1088    #[test]
1089    fn list() {
1090        let record = r#"
1091        {
1092            "type": "list",
1093            "element-id": 3,
1094            "element-required": true,
1095            "element": "string"
1096        }
1097        "#;
1098
1099        check_type_serde(
1100            record,
1101            Type::List(ListType {
1102                element_field: NestedField::list_element(
1103                    3,
1104                    Type::Primitive(PrimitiveType::String),
1105                    true,
1106                )
1107                .into(),
1108            }),
1109        );
1110    }
1111
1112    #[test]
1113    fn map() {
1114        let record = r#"
1115        {
1116            "type": "map",
1117            "key-id": 4,
1118            "key": "string",
1119            "value-id": 5,
1120            "value-required": false,
1121            "value": "double"
1122        }
1123        "#;
1124
1125        check_type_serde(
1126            record,
1127            Type::Map(MapType {
1128                key_field: NestedField::map_key_element(4, Type::Primitive(PrimitiveType::String))
1129                    .into(),
1130                value_field: NestedField::map_value_element(
1131                    5,
1132                    Type::Primitive(PrimitiveType::Double),
1133                    false,
1134                )
1135                .into(),
1136            }),
1137        );
1138    }
1139
1140    #[test]
1141    fn map_int() {
1142        let record = r#"
1143        {
1144            "type": "map",
1145            "key-id": 4,
1146            "key": "int",
1147            "value-id": 5,
1148            "value-required": false,
1149            "value": "string"
1150        }
1151        "#;
1152
1153        check_type_serde(
1154            record,
1155            Type::Map(MapType {
1156                key_field: NestedField::map_key_element(4, Type::Primitive(PrimitiveType::Int))
1157                    .into(),
1158                value_field: NestedField::map_value_element(
1159                    5,
1160                    Type::Primitive(PrimitiveType::String),
1161                    false,
1162                )
1163                .into(),
1164            }),
1165        );
1166    }
1167
1168    #[test]
1169    fn test_decimal_precision() {
1170        let expected_max_precision = [
1171            2, 4, 6, 9, 11, 14, 16, 18, 21, 23, 26, 28, 31, 33, 35, 38, 40, 43, 45, 47, 50, 52, 55,
1172            57,
1173        ];
1174        for (i, max_precision) in expected_max_precision.iter().enumerate() {
1175            assert_eq!(
1176                *max_precision,
1177                Type::decimal_max_precision(i as u32 + 1).unwrap(),
1178                "Failed calculate max precision for {i}"
1179            );
1180        }
1181
1182        assert_eq!(5, Type::decimal_required_bytes(10).unwrap());
1183        assert_eq!(16, Type::decimal_required_bytes(38).unwrap());
1184    }
1185
1186    #[test]
1187    fn test_primitive_type_compatible() {
1188        let pairs = vec![
1189            (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(true)),
1190            (PrimitiveType::Int, PrimitiveLiteral::Int(1)),
1191            (PrimitiveType::Long, PrimitiveLiteral::Long(1)),
1192            (PrimitiveType::Float, PrimitiveLiteral::Float(1.0.into())),
1193            (PrimitiveType::Double, PrimitiveLiteral::Double(1.0.into())),
1194            (
1195                PrimitiveType::Decimal {
1196                    precision: 9,
1197                    scale: 2,
1198                },
1199                PrimitiveLiteral::Int128(1),
1200            ),
1201            (PrimitiveType::Date, PrimitiveLiteral::Int(1)),
1202            (PrimitiveType::Time, PrimitiveLiteral::Long(1)),
1203            (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(1)),
1204            (PrimitiveType::Timestamp, PrimitiveLiteral::Long(1)),
1205            (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(1)),
1206            (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(1)),
1207            (
1208                PrimitiveType::Uuid,
1209                PrimitiveLiteral::UInt128(Uuid::new_v4().as_u128()),
1210            ),
1211            (PrimitiveType::Fixed(8), PrimitiveLiteral::Binary(vec![1])),
1212            (PrimitiveType::Binary, PrimitiveLiteral::Binary(vec![1])),
1213        ];
1214        for (ty, literal) in pairs {
1215            assert!(ty.compatible(&literal));
1216        }
1217    }
1218
1219    #[test]
1220    fn struct_type_with_type_field() {
1221        // Test that StructType properly deserializes JSON with "type":"struct" field
1222        // This was previously broken because the deserializer wasn't consuming the type field value
1223        let json = r#"
1224        {
1225            "type": "struct",
1226            "fields": [
1227                {"id": 1, "name": "field1", "required": true, "type": "string"}
1228            ]
1229        }
1230        "#;
1231
1232        let struct_type: StructType = serde_json::from_str(json)
1233            .expect("Should successfully deserialize StructType with type field");
1234
1235        assert_eq!(struct_type.fields().len(), 1);
1236        assert_eq!(struct_type.fields()[0].name, "field1");
1237    }
1238
1239    #[test]
1240    fn struct_type_rejects_wrong_type() {
1241        // Test that StructType validation rejects incorrect type field values
1242        let json = r#"
1243        {
1244            "type": "list",
1245            "fields": [
1246                {"id": 1, "name": "field1", "required": true, "type": "string"}
1247            ]
1248        }
1249        "#;
1250
1251        let result = serde_json::from_str::<StructType>(json);
1252        assert!(
1253            result.is_err(),
1254            "Should reject StructType with wrong type field"
1255        );
1256        assert!(
1257            result
1258                .unwrap_err()
1259                .to_string()
1260                .contains("expected type 'struct'")
1261        );
1262    }
1263}