iceberg/spec/schema/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! This module defines schema in iceberg.
19
20use std::collections::{HashMap, HashSet};
21use std::fmt::{Display, Formatter};
22use std::sync::Arc;
23
24mod utils;
25mod visitor;
26pub use self::visitor::*;
27pub(super) mod _serde;
28mod id_reassigner;
29mod index;
30mod prune_columns;
31use bimap::BiHashMap;
32use itertools::{Itertools, zip_eq};
33use serde::{Deserialize, Serialize};
34
35use self::_serde::SchemaEnum;
36use self::id_reassigner::ReassignFieldIds;
37use self::index::{IndexByName, index_by_id, index_parents};
38pub use self::prune_columns::prune_columns;
39use super::NestedField;
40use crate::error::Result;
41use crate::expr::accessor::StructAccessor;
42use crate::spec::datatypes::{
43    LIST_FIELD_NAME, ListType, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, MapType, NestedFieldRef,
44    PrimitiveType, StructType, Type,
45};
46use crate::{Error, ErrorKind, ensure_data_valid};
47
48/// Type alias for schema id.
49pub type SchemaId = i32;
50/// Reference to [`Schema`].
51pub type SchemaRef = Arc<Schema>;
52/// Default schema id.
53pub const DEFAULT_SCHEMA_ID: SchemaId = 0;
54
55/// Defines schema in iceberg.
56#[derive(Debug, Serialize, Deserialize, Clone)]
57#[serde(try_from = "SchemaEnum", into = "SchemaEnum")]
58pub struct Schema {
59    r#struct: StructType,
60    schema_id: SchemaId,
61    highest_field_id: i32,
62    identifier_field_ids: HashSet<i32>,
63
64    alias_to_id: BiHashMap<String, i32>,
65    id_to_field: HashMap<i32, NestedFieldRef>,
66
67    name_to_id: HashMap<String, i32>,
68    lowercase_name_to_id: HashMap<String, i32>,
69    id_to_name: HashMap<i32, String>,
70
71    field_id_to_accessor: HashMap<i32, Arc<StructAccessor>>,
72}
73
74impl PartialEq for Schema {
75    fn eq(&self, other: &Self) -> bool {
76        self.r#struct == other.r#struct
77            && self.schema_id == other.schema_id
78            && self.identifier_field_ids == other.identifier_field_ids
79    }
80}
81
82impl Eq for Schema {}
83
84/// Schema builder.
85#[derive(Debug)]
86pub struct SchemaBuilder {
87    schema_id: i32,
88    fields: Vec<NestedFieldRef>,
89    alias_to_id: BiHashMap<String, i32>,
90    identifier_field_ids: HashSet<i32>,
91    reassign_field_ids_from: Option<i32>,
92}
93
94impl SchemaBuilder {
95    /// Add fields to schema builder.
96    pub fn with_fields(mut self, fields: impl IntoIterator<Item = NestedFieldRef>) -> Self {
97        self.fields.extend(fields);
98        self
99    }
100
101    /// Reassign all field-ids (including nested) on build.
102    /// Reassignment starts from the field-id specified in `start_from` (inclusive).
103    ///
104    /// All specified aliases and identifier fields will be updated to the new field-ids.
105    pub(crate) fn with_reassigned_field_ids(mut self, start_from: u32) -> Self {
106        self.reassign_field_ids_from = Some(start_from.try_into().unwrap_or(i32::MAX));
107        self
108    }
109
110    /// Set schema id.
111    pub fn with_schema_id(mut self, schema_id: i32) -> Self {
112        self.schema_id = schema_id;
113        self
114    }
115
116    /// Set identifier field ids.
117    pub fn with_identifier_field_ids(mut self, ids: impl IntoIterator<Item = i32>) -> Self {
118        self.identifier_field_ids.extend(ids);
119        self
120    }
121
122    /// Set alias to filed id mapping.
123    pub fn with_alias(mut self, alias_to_id: BiHashMap<String, i32>) -> Self {
124        self.alias_to_id = alias_to_id;
125        self
126    }
127
128    /// Builds the schema.
129    pub fn build(self) -> Result<Schema> {
130        let field_id_to_accessor = self.build_accessors();
131
132        let r#struct = StructType::new(self.fields);
133        let id_to_field = index_by_id(&r#struct)?;
134
135        Self::validate_identifier_ids(
136            &r#struct,
137            &id_to_field,
138            self.identifier_field_ids.iter().copied(),
139        )?;
140
141        let (name_to_id, id_to_name) = {
142            let mut index = IndexByName::default();
143            visit_struct(&r#struct, &mut index)?;
144            index.indexes()
145        };
146
147        let lowercase_name_to_id = name_to_id
148            .iter()
149            .map(|(k, v)| (k.to_lowercase(), *v))
150            .collect();
151
152        let highest_field_id = id_to_field.keys().max().cloned().unwrap_or(0);
153
154        let mut schema = Schema {
155            r#struct,
156            schema_id: self.schema_id,
157            highest_field_id,
158            identifier_field_ids: self.identifier_field_ids,
159            alias_to_id: self.alias_to_id,
160            id_to_field,
161
162            name_to_id,
163            lowercase_name_to_id,
164            id_to_name,
165
166            field_id_to_accessor,
167        };
168
169        if let Some(start_from) = self.reassign_field_ids_from {
170            let mut id_reassigner = ReassignFieldIds::new(start_from);
171            let new_fields = id_reassigner.reassign_field_ids(schema.r#struct.fields().to_vec())?;
172            let new_identifier_field_ids =
173                id_reassigner.apply_to_identifier_fields(schema.identifier_field_ids)?;
174            let new_alias_to_id = id_reassigner.apply_to_aliases(schema.alias_to_id.clone())?;
175
176            schema = Schema::builder()
177                .with_schema_id(schema.schema_id)
178                .with_fields(new_fields)
179                .with_identifier_field_ids(new_identifier_field_ids)
180                .with_alias(new_alias_to_id)
181                .build()?;
182        }
183
184        Ok(schema)
185    }
186
187    fn build_accessors(&self) -> HashMap<i32, Arc<StructAccessor>> {
188        let mut map = HashMap::new();
189
190        for (pos, field) in self.fields.iter().enumerate() {
191            match field.field_type.as_ref() {
192                Type::Primitive(prim_type) => {
193                    // add an accessor for this field
194                    let accessor = Arc::new(StructAccessor::new(pos, prim_type.clone()));
195                    map.insert(field.id, accessor.clone());
196                }
197
198                Type::Struct(nested) => {
199                    // add accessors for nested fields
200                    for (field_id, accessor) in Self::build_accessors_nested(nested.fields()) {
201                        let new_accessor = Arc::new(StructAccessor::wrap(pos, accessor));
202                        map.insert(field_id, new_accessor.clone());
203                    }
204                }
205                _ => {
206                    // Accessors don't get built for Map or List types
207                }
208            }
209        }
210
211        map
212    }
213
214    fn build_accessors_nested(fields: &[NestedFieldRef]) -> Vec<(i32, Box<StructAccessor>)> {
215        let mut results = vec![];
216        for (pos, field) in fields.iter().enumerate() {
217            match field.field_type.as_ref() {
218                Type::Primitive(prim_type) => {
219                    let accessor = Box::new(StructAccessor::new(pos, prim_type.clone()));
220                    results.push((field.id, accessor));
221                }
222                Type::Struct(nested) => {
223                    let nested_accessors = Self::build_accessors_nested(nested.fields());
224
225                    let wrapped_nested_accessors =
226                        nested_accessors.into_iter().map(|(id, accessor)| {
227                            let new_accessor = Box::new(StructAccessor::wrap(pos, accessor));
228                            (id, new_accessor.clone())
229                        });
230
231                    results.extend(wrapped_nested_accessors);
232                }
233                _ => {
234                    // Accessors don't get built for Map or List types
235                }
236            }
237        }
238
239        results
240    }
241
242    /// According to [the spec](https://iceberg.apache.org/spec/#identifier-fields), the identifier fields
243    /// must meet the following requirements:
244    /// - Float, double, and optional fields cannot be used as identifier fields.
245    /// - Identifier fields may be nested in structs but cannot be nested within maps or lists.
246    /// - A nested field cannot be used as an identifier field if it is nested in an optional struct, to avoid null values in identifiers.
247    fn validate_identifier_ids(
248        r#struct: &StructType,
249        id_to_field: &HashMap<i32, NestedFieldRef>,
250        identifier_field_ids: impl Iterator<Item = i32>,
251    ) -> Result<()> {
252        let id_to_parent = index_parents(r#struct)?;
253        for identifier_field_id in identifier_field_ids {
254            let field = id_to_field.get(&identifier_field_id).ok_or_else(|| {
255                Error::new(
256                    ErrorKind::DataInvalid,
257                    format!(
258                        "Cannot add identifier field {identifier_field_id}: field does not exist"
259                    ),
260                )
261            })?;
262            ensure_data_valid!(
263                field.required,
264                "Cannot add identifier field: {} is an optional field",
265                field.name
266            );
267            if let Type::Primitive(p) = field.field_type.as_ref() {
268                ensure_data_valid!(
269                    !matches!(p, PrimitiveType::Double | PrimitiveType::Float),
270                    "Cannot add identifier field {}: cannot be a float or double type",
271                    field.name
272                );
273            } else {
274                return Err(Error::new(
275                    ErrorKind::DataInvalid,
276                    format!(
277                        "Cannot add field {} as an identifier field: not a primitive type field",
278                        field.name
279                    ),
280                ));
281            }
282
283            let mut cur_field_id = identifier_field_id;
284            while let Some(parent) = id_to_parent.get(&cur_field_id) {
285                let parent_field = id_to_field
286                    .get(parent)
287                    .expect("Field id should not disappear.");
288                ensure_data_valid!(
289                    parent_field.field_type.is_struct(),
290                    "Cannot add field {} as an identifier field: must not be nested in {:?}",
291                    field.name,
292                    parent_field
293                );
294                ensure_data_valid!(
295                    parent_field.required,
296                    "Cannot add field {} as an identifier field: must not be nested in an optional field {}",
297                    field.name,
298                    parent_field
299                );
300                cur_field_id = *parent;
301            }
302        }
303
304        Ok(())
305    }
306}
307
308impl Schema {
309    /// Create a schema builder.
310    pub fn builder() -> SchemaBuilder {
311        SchemaBuilder {
312            schema_id: DEFAULT_SCHEMA_ID,
313            fields: vec![],
314            identifier_field_ids: HashSet::default(),
315            alias_to_id: BiHashMap::default(),
316            reassign_field_ids_from: None,
317        }
318    }
319
320    /// Create a new schema builder from a schema.
321    pub fn into_builder(self) -> SchemaBuilder {
322        SchemaBuilder {
323            schema_id: self.schema_id,
324            fields: self.r#struct.fields().to_vec(),
325            alias_to_id: self.alias_to_id,
326            identifier_field_ids: self.identifier_field_ids,
327            reassign_field_ids_from: None,
328        }
329    }
330
331    /// Get field by field id.
332    pub fn field_by_id(&self, field_id: i32) -> Option<&NestedFieldRef> {
333        self.id_to_field.get(&field_id)
334    }
335
336    /// Get field by field name.
337    ///
338    /// Both full name and short name could work here.
339    pub fn field_by_name(&self, field_name: &str) -> Option<&NestedFieldRef> {
340        self.name_to_id
341            .get(field_name)
342            .and_then(|id| self.field_by_id(*id))
343    }
344
345    /// Get field by field name, but in case-insensitive way.
346    ///
347    /// Both full name and short name could work here.
348    pub fn field_by_name_case_insensitive(&self, field_name: &str) -> Option<&NestedFieldRef> {
349        self.lowercase_name_to_id
350            .get(&field_name.to_lowercase())
351            .and_then(|id| self.field_by_id(*id))
352    }
353
354    /// Get field by alias.
355    pub fn field_by_alias(&self, alias: &str) -> Option<&NestedFieldRef> {
356        self.alias_to_id
357            .get_by_left(alias)
358            .and_then(|id| self.field_by_id(*id))
359    }
360
361    /// Returns [`highest_field_id`].
362    #[inline]
363    pub fn highest_field_id(&self) -> i32 {
364        self.highest_field_id
365    }
366
367    /// Returns [`schema_id`].
368    #[inline]
369    pub fn schema_id(&self) -> SchemaId {
370        self.schema_id
371    }
372
373    /// Returns [`r#struct`].
374    #[inline]
375    pub fn as_struct(&self) -> &StructType {
376        &self.r#struct
377    }
378
379    /// Returns [`identifier_field_ids`].
380    #[inline]
381    pub fn identifier_field_ids(&self) -> impl ExactSizeIterator<Item = i32> + '_ {
382        self.identifier_field_ids.iter().copied()
383    }
384
385    /// Get field id by full name.
386    pub fn field_id_by_name(&self, name: &str) -> Option<i32> {
387        self.name_to_id.get(name).copied()
388    }
389
390    /// Get full name by field id.
391    pub fn name_by_field_id(&self, field_id: i32) -> Option<&str> {
392        self.id_to_name.get(&field_id).map(String::as_str)
393    }
394
395    /// Get an accessor for retrieving data in a struct
396    pub fn accessor_by_field_id(&self, field_id: i32) -> Option<Arc<StructAccessor>> {
397        self.field_id_to_accessor.get(&field_id).cloned()
398    }
399
400    /// Check if this schema is identical to another schema semantically - excluding schema id.
401    pub(crate) fn is_same_schema(&self, other: &SchemaRef) -> bool {
402        self.as_struct().eq(other.as_struct())
403            && self.identifier_field_ids().eq(other.identifier_field_ids())
404    }
405
406    /// Change the schema id of this schema.
407    // This is redundant with the `with_schema_id` method on the builder, but useful
408    // as it is infallible in contrast to the builder `build()` method.
409    pub(crate) fn with_schema_id(self, schema_id: SchemaId) -> Self {
410        Self { schema_id, ..self }
411    }
412
413    /// Return A HashMap matching field ids to field names.
414    pub fn field_id_to_name_map(&self) -> &HashMap<i32, String> {
415        &self.id_to_name
416    }
417
418    /// Return a hashmap matching field ids to nested fields.
419    pub fn field_id_to_fields(&self) -> &HashMap<i32, NestedFieldRef> {
420        &self.id_to_field
421    }
422}
423
424impl Display for Schema {
425    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
426        writeln!(f, "table {{")?;
427        for field in self.as_struct().fields() {
428            writeln!(f, "  {}", field)?;
429        }
430        writeln!(f, "}}")
431    }
432}
433
434#[cfg(test)]
435mod tests {
436    use std::collections::HashMap;
437
438    use bimap::BiHashMap;
439
440    use crate::spec::datatypes::Type::{List, Map, Primitive, Struct};
441    use crate::spec::datatypes::{
442        ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type,
443    };
444    use crate::spec::schema::Schema;
445    use crate::spec::values::Map as MapValue;
446    use crate::spec::{Datum, Literal};
447
448    #[test]
449    fn test_construct_schema() {
450        let field1: NestedFieldRef =
451            NestedField::required(1, "f1", Type::Primitive(PrimitiveType::Boolean)).into();
452        let field2: NestedFieldRef =
453            NestedField::optional(2, "f2", Type::Primitive(PrimitiveType::Int)).into();
454
455        let schema = Schema::builder()
456            .with_fields(vec![field1.clone()])
457            .with_fields(vec![field2.clone()])
458            .with_schema_id(3)
459            .build()
460            .unwrap();
461
462        assert_eq!(3, schema.schema_id());
463        assert_eq!(2, schema.highest_field_id());
464        assert_eq!(Some(&field1), schema.field_by_id(1));
465        assert_eq!(Some(&field2), schema.field_by_id(2));
466        assert_eq!(None, schema.field_by_id(3));
467    }
468
469    pub fn table_schema_simple<'a>() -> (Schema, &'a str) {
470        let schema = Schema::builder()
471            .with_schema_id(1)
472            .with_identifier_field_ids(vec![2])
473            .with_fields(vec![
474                NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
475                NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
476                NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
477            ])
478            .build()
479            .unwrap();
480        let record = r#"{
481            "type":"struct",
482            "schema-id":1,
483            "fields":[
484                {
485                    "id":1,
486                    "name":"foo",
487                    "required":false,
488                    "type":"string"
489                },
490                {
491                    "id":2,
492                    "name":"bar",
493                    "required":true,
494                    "type":"int"
495                },
496                {
497                    "id":3,
498                    "name":"baz",
499                    "required":false,
500                    "type":"boolean"
501                }
502            ],
503            "identifier-field-ids":[2]
504        }"#;
505        (schema, record)
506    }
507
508    pub fn table_schema_nested() -> Schema {
509        Schema::builder()
510            .with_schema_id(1)
511            .with_identifier_field_ids(vec![2])
512            .with_fields(vec![
513                NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
514                NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
515                NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
516                NestedField::required(
517                    4,
518                    "qux",
519                    Type::List(ListType {
520                        element_field: NestedField::list_element(
521                            5,
522                            Type::Primitive(PrimitiveType::String),
523                            true,
524                        )
525                        .into(),
526                    }),
527                )
528                .into(),
529                NestedField::required(
530                    6,
531                    "quux",
532                    Type::Map(MapType {
533                        key_field: NestedField::map_key_element(
534                            7,
535                            Type::Primitive(PrimitiveType::String),
536                        )
537                        .into(),
538                        value_field: NestedField::map_value_element(
539                            8,
540                            Type::Map(MapType {
541                                key_field: NestedField::map_key_element(
542                                    9,
543                                    Type::Primitive(PrimitiveType::String),
544                                )
545                                .into(),
546                                value_field: NestedField::map_value_element(
547                                    10,
548                                    Type::Primitive(PrimitiveType::Int),
549                                    true,
550                                )
551                                .into(),
552                            }),
553                            true,
554                        )
555                        .into(),
556                    }),
557                )
558                .into(),
559                NestedField::required(
560                    11,
561                    "location",
562                    Type::List(ListType {
563                        element_field: NestedField::list_element(
564                            12,
565                            Type::Struct(StructType::new(vec![
566                                NestedField::optional(
567                                    13,
568                                    "latitude",
569                                    Type::Primitive(PrimitiveType::Float),
570                                )
571                                .into(),
572                                NestedField::optional(
573                                    14,
574                                    "longitude",
575                                    Type::Primitive(PrimitiveType::Float),
576                                )
577                                .into(),
578                            ])),
579                            true,
580                        )
581                        .into(),
582                    }),
583                )
584                .into(),
585                NestedField::optional(
586                    15,
587                    "person",
588                    Type::Struct(StructType::new(vec![
589                        NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
590                            .into(),
591                        NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
592                            .into(),
593                    ])),
594                )
595                .into(),
596            ])
597            .build()
598            .unwrap()
599    }
600
601    #[test]
602    fn test_schema_display() {
603        let expected_str = "
604table {
605  1: foo: optional string\x20
606  2: bar: required int\x20
607  3: baz: optional boolean\x20
608}
609";
610
611        assert_eq!(expected_str, format!("\n{}", table_schema_simple().0));
612    }
613
614    #[test]
615    fn test_schema_build_failed_on_duplicate_names() {
616        let ret = Schema::builder()
617            .with_schema_id(1)
618            .with_identifier_field_ids(vec![1])
619            .with_fields(vec![
620                NestedField::required(1, "foo", Primitive(PrimitiveType::String)).into(),
621                NestedField::required(2, "bar", Primitive(PrimitiveType::Int)).into(),
622                NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)).into(),
623                NestedField::optional(4, "baz", Primitive(PrimitiveType::Boolean)).into(),
624            ])
625            .build();
626
627        assert!(
628            ret.unwrap_err()
629                .message()
630                .contains("Invalid schema: multiple fields for name baz")
631        );
632    }
633
634    #[test]
635    fn test_schema_into_builder() {
636        let original_schema = table_schema_nested();
637        let builder = original_schema.clone().into_builder();
638        let schema = builder.build().unwrap();
639
640        assert_eq!(original_schema, schema);
641    }
642
643    #[test]
644    fn test_schema_index_by_name() {
645        let expected_name_to_id = HashMap::from(
646            [
647                ("foo", 1),
648                ("bar", 2),
649                ("baz", 3),
650                ("qux", 4),
651                ("qux.element", 5),
652                ("quux", 6),
653                ("quux.key", 7),
654                ("quux.value", 8),
655                ("quux.value.key", 9),
656                ("quux.value.value", 10),
657                ("location", 11),
658                ("location.element", 12),
659                ("location.element.latitude", 13),
660                ("location.element.longitude", 14),
661                ("location.latitude", 13),
662                ("location.longitude", 14),
663                ("person", 15),
664                ("person.name", 16),
665                ("person.age", 17),
666            ]
667            .map(|e| (e.0.to_string(), e.1)),
668        );
669
670        let schema = table_schema_nested();
671        assert_eq!(&expected_name_to_id, &schema.name_to_id);
672    }
673
674    #[test]
675    fn test_schema_index_by_name_case_insensitive() {
676        let expected_name_to_id = HashMap::from(
677            [
678                ("fOo", 1),
679                ("Bar", 2),
680                ("BAz", 3),
681                ("quX", 4),
682                ("quX.ELEment", 5),
683                ("qUUx", 6),
684                ("QUUX.KEY", 7),
685                ("QUUX.Value", 8),
686                ("qUUX.VALUE.Key", 9),
687                ("qUux.VaLue.Value", 10),
688                ("lOCAtION", 11),
689                ("LOCAtioN.ELeMENt", 12),
690                ("LoCATion.element.LATitude", 13),
691                ("locatION.ElemeNT.LONgitude", 14),
692                ("LOCAtiON.LATITUDE", 13),
693                ("LOCATION.LONGITUDE", 14),
694                ("PERSon", 15),
695                ("PERSON.Name", 16),
696                ("peRSON.AGe", 17),
697            ]
698            .map(|e| (e.0.to_string(), e.1)),
699        );
700
701        let schema = table_schema_nested();
702        for (name, id) in expected_name_to_id {
703            assert_eq!(
704                Some(id),
705                schema.field_by_name_case_insensitive(&name).map(|f| f.id)
706            );
707        }
708    }
709
710    #[test]
711    fn test_schema_find_column_name() {
712        let expected_column_name = HashMap::from([
713            (1, "foo"),
714            (2, "bar"),
715            (3, "baz"),
716            (4, "qux"),
717            (5, "qux.element"),
718            (6, "quux"),
719            (7, "quux.key"),
720            (8, "quux.value"),
721            (9, "quux.value.key"),
722            (10, "quux.value.value"),
723            (11, "location"),
724            (12, "location.element"),
725            (13, "location.element.latitude"),
726            (14, "location.element.longitude"),
727        ]);
728
729        let schema = table_schema_nested();
730        for (id, name) in expected_column_name {
731            assert_eq!(
732                Some(name),
733                schema.name_by_field_id(id),
734                "Column name for field id {} not match.",
735                id
736            );
737        }
738    }
739
740    #[test]
741    fn test_schema_find_column_name_not_found() {
742        let schema = table_schema_nested();
743
744        assert!(schema.name_by_field_id(99).is_none());
745    }
746
747    #[test]
748    fn test_schema_find_column_name_by_id_simple() {
749        let expected_id_to_name = HashMap::from([(1, "foo"), (2, "bar"), (3, "baz")]);
750
751        let schema = table_schema_simple().0;
752
753        for (id, name) in expected_id_to_name {
754            assert_eq!(
755                Some(name),
756                schema.name_by_field_id(id),
757                "Column name for field id {} not match.",
758                id
759            );
760        }
761    }
762
763    #[test]
764    fn test_schema_find_simple() {
765        let schema = table_schema_simple().0;
766
767        assert_eq!(
768            Some(schema.r#struct.fields()[0].clone()),
769            schema.field_by_id(1).cloned()
770        );
771        assert_eq!(
772            Some(schema.r#struct.fields()[1].clone()),
773            schema.field_by_id(2).cloned()
774        );
775        assert_eq!(
776            Some(schema.r#struct.fields()[2].clone()),
777            schema.field_by_id(3).cloned()
778        );
779
780        assert!(schema.field_by_id(4).is_none());
781        assert!(schema.field_by_name("non exist").is_none());
782    }
783
784    #[test]
785    fn test_schema_find_nested() {
786        let expected_id_to_field: HashMap<i32, NestedField> = HashMap::from([
787            (
788                1,
789                NestedField::optional(1, "foo", Primitive(PrimitiveType::String)),
790            ),
791            (
792                2,
793                NestedField::required(2, "bar", Primitive(PrimitiveType::Int)),
794            ),
795            (
796                3,
797                NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)),
798            ),
799            (
800                4,
801                NestedField::required(
802                    4,
803                    "qux",
804                    Type::List(ListType {
805                        element_field: NestedField::list_element(
806                            5,
807                            Type::Primitive(PrimitiveType::String),
808                            true,
809                        )
810                        .into(),
811                    }),
812                ),
813            ),
814            (
815                5,
816                NestedField::required(5, "element", Primitive(PrimitiveType::String)),
817            ),
818            (
819                6,
820                NestedField::required(
821                    6,
822                    "quux",
823                    Map(MapType {
824                        key_field: NestedField::map_key_element(
825                            7,
826                            Primitive(PrimitiveType::String),
827                        )
828                        .into(),
829                        value_field: NestedField::map_value_element(
830                            8,
831                            Map(MapType {
832                                key_field: NestedField::map_key_element(
833                                    9,
834                                    Primitive(PrimitiveType::String),
835                                )
836                                .into(),
837                                value_field: NestedField::map_value_element(
838                                    10,
839                                    Primitive(PrimitiveType::Int),
840                                    true,
841                                )
842                                .into(),
843                            }),
844                            true,
845                        )
846                        .into(),
847                    }),
848                ),
849            ),
850            (
851                7,
852                NestedField::required(7, "key", Primitive(PrimitiveType::String)),
853            ),
854            (
855                8,
856                NestedField::required(
857                    8,
858                    "value",
859                    Map(MapType {
860                        key_field: NestedField::map_key_element(
861                            9,
862                            Primitive(PrimitiveType::String),
863                        )
864                        .into(),
865                        value_field: NestedField::map_value_element(
866                            10,
867                            Primitive(PrimitiveType::Int),
868                            true,
869                        )
870                        .into(),
871                    }),
872                ),
873            ),
874            (
875                9,
876                NestedField::required(9, "key", Primitive(PrimitiveType::String)),
877            ),
878            (
879                10,
880                NestedField::required(10, "value", Primitive(PrimitiveType::Int)),
881            ),
882            (
883                11,
884                NestedField::required(
885                    11,
886                    "location",
887                    List(ListType {
888                        element_field: NestedField::list_element(
889                            12,
890                            Struct(StructType::new(vec![
891                                NestedField::optional(
892                                    13,
893                                    "latitude",
894                                    Primitive(PrimitiveType::Float),
895                                )
896                                .into(),
897                                NestedField::optional(
898                                    14,
899                                    "longitude",
900                                    Primitive(PrimitiveType::Float),
901                                )
902                                .into(),
903                            ])),
904                            true,
905                        )
906                        .into(),
907                    }),
908                ),
909            ),
910            (
911                12,
912                NestedField::list_element(
913                    12,
914                    Struct(StructType::new(vec![
915                        NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float))
916                            .into(),
917                        NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float))
918                            .into(),
919                    ])),
920                    true,
921                ),
922            ),
923            (
924                13,
925                NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float)),
926            ),
927            (
928                14,
929                NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float)),
930            ),
931            (
932                15,
933                NestedField::optional(
934                    15,
935                    "person",
936                    Type::Struct(StructType::new(vec![
937                        NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
938                            .into(),
939                        NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
940                            .into(),
941                    ])),
942                ),
943            ),
944            (
945                16,
946                NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String)),
947            ),
948            (
949                17,
950                NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int)),
951            ),
952        ]);
953
954        let schema = table_schema_nested();
955        for (id, field) in expected_id_to_field {
956            assert_eq!(
957                Some(&field),
958                schema.field_by_id(id).map(|f| f.as_ref()),
959                "Field for {} not match.",
960                id
961            );
962        }
963    }
964
965    #[test]
966    fn test_build_accessors() {
967        let schema = table_schema_nested();
968
969        let test_struct = crate::spec::Struct::from_iter(vec![
970            Some(Literal::string("foo value")),
971            Some(Literal::int(1002)),
972            Some(Literal::bool(true)),
973            Some(Literal::List(vec![
974                Some(Literal::string("qux item 1")),
975                Some(Literal::string("qux item 2")),
976            ])),
977            Some(Literal::Map(MapValue::from([(
978                Literal::string("quux key 1"),
979                Some(Literal::Map(MapValue::from([(
980                    Literal::string("quux nested key 1"),
981                    Some(Literal::int(1000)),
982                )]))),
983            )]))),
984            Some(Literal::List(vec![Some(Literal::Struct(
985                crate::spec::Struct::from_iter(vec![
986                    Some(Literal::float(52.509_09)),
987                    Some(Literal::float(-1.885_249)),
988                ]),
989            ))])),
990            Some(Literal::Struct(crate::spec::Struct::from_iter(vec![
991                Some(Literal::string("Testy McTest")),
992                Some(Literal::int(33)),
993            ]))),
994        ]);
995
996        assert_eq!(
997            schema
998                .accessor_by_field_id(1)
999                .unwrap()
1000                .get(&test_struct)
1001                .unwrap(),
1002            Some(Datum::string("foo value"))
1003        );
1004        assert_eq!(
1005            schema
1006                .accessor_by_field_id(2)
1007                .unwrap()
1008                .get(&test_struct)
1009                .unwrap(),
1010            Some(Datum::int(1002))
1011        );
1012        assert_eq!(
1013            schema
1014                .accessor_by_field_id(3)
1015                .unwrap()
1016                .get(&test_struct)
1017                .unwrap(),
1018            Some(Datum::bool(true))
1019        );
1020        assert_eq!(
1021            schema
1022                .accessor_by_field_id(16)
1023                .unwrap()
1024                .get(&test_struct)
1025                .unwrap(),
1026            Some(Datum::string("Testy McTest"))
1027        );
1028        assert_eq!(
1029            schema
1030                .accessor_by_field_id(17)
1031                .unwrap()
1032                .get(&test_struct)
1033                .unwrap(),
1034            Some(Datum::int(33))
1035        );
1036    }
1037
1038    #[test]
1039    fn test_highest_field_id() {
1040        let schema = table_schema_nested();
1041        assert_eq!(17, schema.highest_field_id());
1042
1043        let schema = table_schema_simple().0;
1044        assert_eq!(3, schema.highest_field_id());
1045    }
1046
1047    #[test]
1048    fn test_highest_field_id_no_fields() {
1049        let schema = Schema::builder().with_schema_id(1).build().unwrap();
1050        assert_eq!(0, schema.highest_field_id());
1051    }
1052
1053    #[test]
1054    fn test_field_ids_must_be_unique() {
1055        let reassigned_schema = Schema::builder()
1056            .with_schema_id(1)
1057            .with_identifier_field_ids(vec![5])
1058            .with_alias(BiHashMap::from_iter(vec![("bar_alias".to_string(), 3)]))
1059            .with_fields(vec![
1060                NestedField::required(5, "foo", Type::Primitive(PrimitiveType::String)).into(),
1061                NestedField::optional(3, "bar", Type::Primitive(PrimitiveType::Int)).into(),
1062                NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
1063            ])
1064            .build()
1065            .unwrap_err();
1066
1067        assert!(reassigned_schema.message().contains("'field.id' 3"));
1068    }
1069
1070    #[test]
1071    fn test_reassign_ids_empty_schema() {
1072        let schema = Schema::builder().with_schema_id(1).build().unwrap();
1073        let reassigned_schema = schema
1074            .clone()
1075            .into_builder()
1076            .with_reassigned_field_ids(0)
1077            .build()
1078            .unwrap();
1079
1080        assert_eq!(schema, reassigned_schema);
1081        assert_eq!(schema.highest_field_id(), 0);
1082    }
1083
1084    #[test]
1085    fn test_identifier_field_ids() {
1086        // field in map
1087        assert!(
1088            Schema::builder()
1089                .with_schema_id(1)
1090                .with_identifier_field_ids(vec![2])
1091                .with_fields(vec![
1092                    NestedField::required(
1093                        1,
1094                        "Map",
1095                        Type::Map(MapType::new(
1096                            NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1097                                .into(),
1098                            NestedField::map_value_element(
1099                                3,
1100                                Type::Primitive(PrimitiveType::Boolean),
1101                                true,
1102                            )
1103                            .into(),
1104                        )),
1105                    )
1106                    .into()
1107                ])
1108                .build()
1109                .is_err()
1110        );
1111        assert!(
1112            Schema::builder()
1113                .with_schema_id(1)
1114                .with_identifier_field_ids(vec![3])
1115                .with_fields(vec![
1116                    NestedField::required(
1117                        1,
1118                        "Map",
1119                        Type::Map(MapType::new(
1120                            NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1121                                .into(),
1122                            NestedField::map_value_element(
1123                                3,
1124                                Type::Primitive(PrimitiveType::Boolean),
1125                                true,
1126                            )
1127                            .into(),
1128                        )),
1129                    )
1130                    .into()
1131                ])
1132                .build()
1133                .is_err()
1134        );
1135
1136        // field in list
1137        assert!(
1138            Schema::builder()
1139                .with_schema_id(1)
1140                .with_identifier_field_ids(vec![2])
1141                .with_fields(vec![
1142                    NestedField::required(
1143                        1,
1144                        "List",
1145                        Type::List(ListType::new(
1146                            NestedField::list_element(
1147                                2,
1148                                Type::Primitive(PrimitiveType::String),
1149                                true
1150                            )
1151                            .into(),
1152                        )),
1153                    )
1154                    .into()
1155                ])
1156                .build()
1157                .is_err()
1158        );
1159
1160        // field in optional struct
1161        assert!(
1162            Schema::builder()
1163                .with_schema_id(1)
1164                .with_identifier_field_ids(vec![2])
1165                .with_fields(vec![
1166                    NestedField::optional(
1167                        1,
1168                        "Struct",
1169                        Type::Struct(StructType::new(vec![
1170                            NestedField::required(
1171                                2,
1172                                "name",
1173                                Type::Primitive(PrimitiveType::String)
1174                            )
1175                            .into(),
1176                            NestedField::optional(3, "age", Type::Primitive(PrimitiveType::Int))
1177                                .into(),
1178                        ])),
1179                    )
1180                    .into()
1181                ])
1182                .build()
1183                .is_err()
1184        );
1185
1186        // float and double
1187        assert!(
1188            Schema::builder()
1189                .with_schema_id(1)
1190                .with_identifier_field_ids(vec![1])
1191                .with_fields(vec![
1192                    NestedField::required(1, "Float", Type::Primitive(PrimitiveType::Float),)
1193                        .into()
1194                ])
1195                .build()
1196                .is_err()
1197        );
1198        assert!(
1199            Schema::builder()
1200                .with_schema_id(1)
1201                .with_identifier_field_ids(vec![1])
1202                .with_fields(vec![
1203                    NestedField::required(1, "Double", Type::Primitive(PrimitiveType::Double),)
1204                        .into()
1205                ])
1206                .build()
1207                .is_err()
1208        );
1209
1210        // optional field
1211        assert!(
1212            Schema::builder()
1213                .with_schema_id(1)
1214                .with_identifier_field_ids(vec![1])
1215                .with_fields(vec![
1216                    NestedField::required(1, "Required", Type::Primitive(PrimitiveType::String),)
1217                        .into()
1218                ])
1219                .build()
1220                .is_ok()
1221        );
1222        assert!(
1223            Schema::builder()
1224                .with_schema_id(1)
1225                .with_identifier_field_ids(vec![1])
1226                .with_fields(vec![
1227                    NestedField::optional(1, "Optional", Type::Primitive(PrimitiveType::String),)
1228                        .into()
1229                ])
1230                .build()
1231                .is_err()
1232        );
1233    }
1234}