1use std::collections::{HashMap, HashSet};
21use std::fmt::{Display, Formatter};
22use std::sync::Arc;
23
24mod utils;
25mod visitor;
26pub use self::visitor::*;
27pub(super) mod _serde;
28mod id_reassigner;
29mod index;
30mod prune_columns;
31use bimap::BiHashMap;
32use itertools::{Itertools, zip_eq};
33use serde::{Deserialize, Serialize};
34
35use self::_serde::SchemaEnum;
36use self::id_reassigner::ReassignFieldIds;
37use self::index::{IndexByName, index_by_id, index_parents};
38pub use self::prune_columns::prune_columns;
39use super::NestedField;
40use crate::error::Result;
41use crate::expr::accessor::StructAccessor;
42use crate::spec::datatypes::{
43 LIST_FIELD_NAME, ListType, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, MapType, NestedFieldRef,
44 PrimitiveType, StructType, Type,
45};
46use crate::{Error, ErrorKind, ensure_data_valid};
47
48pub type SchemaId = i32;
50pub type SchemaRef = Arc<Schema>;
52pub const DEFAULT_SCHEMA_ID: SchemaId = 0;
54
55#[derive(Debug, Serialize, Deserialize, Clone)]
57#[serde(try_from = "SchemaEnum", into = "SchemaEnum")]
58pub struct Schema {
59 r#struct: StructType,
60 schema_id: SchemaId,
61 highest_field_id: i32,
62 identifier_field_ids: HashSet<i32>,
63
64 alias_to_id: BiHashMap<String, i32>,
65 id_to_field: HashMap<i32, NestedFieldRef>,
66
67 name_to_id: HashMap<String, i32>,
68 lowercase_name_to_id: HashMap<String, i32>,
69 id_to_name: HashMap<i32, String>,
70
71 field_id_to_accessor: HashMap<i32, Arc<StructAccessor>>,
72}
73
74impl PartialEq for Schema {
75 fn eq(&self, other: &Self) -> bool {
76 self.r#struct == other.r#struct
77 && self.schema_id == other.schema_id
78 && self.identifier_field_ids == other.identifier_field_ids
79 }
80}
81
82impl Eq for Schema {}
83
84#[derive(Debug)]
86pub struct SchemaBuilder {
87 schema_id: i32,
88 fields: Vec<NestedFieldRef>,
89 alias_to_id: BiHashMap<String, i32>,
90 identifier_field_ids: HashSet<i32>,
91 reassign_field_ids_from: Option<i32>,
92}
93
94impl SchemaBuilder {
95 pub fn with_fields(mut self, fields: impl IntoIterator<Item = NestedFieldRef>) -> Self {
97 self.fields.extend(fields);
98 self
99 }
100
101 pub(crate) fn with_reassigned_field_ids(mut self, start_from: u32) -> Self {
106 self.reassign_field_ids_from = Some(start_from.try_into().unwrap_or(i32::MAX));
107 self
108 }
109
110 pub fn with_schema_id(mut self, schema_id: i32) -> Self {
112 self.schema_id = schema_id;
113 self
114 }
115
116 pub fn with_identifier_field_ids(mut self, ids: impl IntoIterator<Item = i32>) -> Self {
118 self.identifier_field_ids.extend(ids);
119 self
120 }
121
122 pub fn with_alias(mut self, alias_to_id: BiHashMap<String, i32>) -> Self {
124 self.alias_to_id = alias_to_id;
125 self
126 }
127
128 pub fn build(self) -> Result<Schema> {
130 let field_id_to_accessor = self.build_accessors();
131
132 let r#struct = StructType::new(self.fields);
133 let id_to_field = index_by_id(&r#struct)?;
134
135 Self::validate_identifier_ids(
136 &r#struct,
137 &id_to_field,
138 self.identifier_field_ids.iter().copied(),
139 )?;
140
141 let (name_to_id, id_to_name) = {
142 let mut index = IndexByName::default();
143 visit_struct(&r#struct, &mut index)?;
144 index.indexes()
145 };
146
147 let lowercase_name_to_id = name_to_id
148 .iter()
149 .map(|(k, v)| (k.to_lowercase(), *v))
150 .collect();
151
152 let highest_field_id = id_to_field.keys().max().cloned().unwrap_or(0);
153
154 let mut schema = Schema {
155 r#struct,
156 schema_id: self.schema_id,
157 highest_field_id,
158 identifier_field_ids: self.identifier_field_ids,
159 alias_to_id: self.alias_to_id,
160 id_to_field,
161
162 name_to_id,
163 lowercase_name_to_id,
164 id_to_name,
165
166 field_id_to_accessor,
167 };
168
169 if let Some(start_from) = self.reassign_field_ids_from {
170 let mut id_reassigner = ReassignFieldIds::new(start_from);
171 let new_fields = id_reassigner.reassign_field_ids(schema.r#struct.fields().to_vec())?;
172 let new_identifier_field_ids =
173 id_reassigner.apply_to_identifier_fields(schema.identifier_field_ids)?;
174 let new_alias_to_id = id_reassigner.apply_to_aliases(schema.alias_to_id.clone())?;
175
176 schema = Schema::builder()
177 .with_schema_id(schema.schema_id)
178 .with_fields(new_fields)
179 .with_identifier_field_ids(new_identifier_field_ids)
180 .with_alias(new_alias_to_id)
181 .build()?;
182 }
183
184 Ok(schema)
185 }
186
187 fn build_accessors(&self) -> HashMap<i32, Arc<StructAccessor>> {
188 let mut map = HashMap::new();
189
190 for (pos, field) in self.fields.iter().enumerate() {
191 match field.field_type.as_ref() {
192 Type::Primitive(prim_type) => {
193 let accessor = Arc::new(StructAccessor::new(pos, prim_type.clone()));
195 map.insert(field.id, accessor.clone());
196 }
197
198 Type::Struct(nested) => {
199 for (field_id, accessor) in Self::build_accessors_nested(nested.fields()) {
201 let new_accessor = Arc::new(StructAccessor::wrap(pos, accessor));
202 map.insert(field_id, new_accessor.clone());
203 }
204 }
205 _ => {
206 }
208 }
209 }
210
211 map
212 }
213
214 fn build_accessors_nested(fields: &[NestedFieldRef]) -> Vec<(i32, Box<StructAccessor>)> {
215 let mut results = vec![];
216 for (pos, field) in fields.iter().enumerate() {
217 match field.field_type.as_ref() {
218 Type::Primitive(prim_type) => {
219 let accessor = Box::new(StructAccessor::new(pos, prim_type.clone()));
220 results.push((field.id, accessor));
221 }
222 Type::Struct(nested) => {
223 let nested_accessors = Self::build_accessors_nested(nested.fields());
224
225 let wrapped_nested_accessors =
226 nested_accessors.into_iter().map(|(id, accessor)| {
227 let new_accessor = Box::new(StructAccessor::wrap(pos, accessor));
228 (id, new_accessor.clone())
229 });
230
231 results.extend(wrapped_nested_accessors);
232 }
233 _ => {
234 }
236 }
237 }
238
239 results
240 }
241
242 fn validate_identifier_ids(
248 r#struct: &StructType,
249 id_to_field: &HashMap<i32, NestedFieldRef>,
250 identifier_field_ids: impl Iterator<Item = i32>,
251 ) -> Result<()> {
252 let id_to_parent = index_parents(r#struct)?;
253 for identifier_field_id in identifier_field_ids {
254 let field = id_to_field.get(&identifier_field_id).ok_or_else(|| {
255 Error::new(
256 ErrorKind::DataInvalid,
257 format!(
258 "Cannot add identifier field {identifier_field_id}: field does not exist"
259 ),
260 )
261 })?;
262 ensure_data_valid!(
263 field.required,
264 "Cannot add identifier field: {} is an optional field",
265 field.name
266 );
267 if let Type::Primitive(p) = field.field_type.as_ref() {
268 ensure_data_valid!(
269 !matches!(p, PrimitiveType::Double | PrimitiveType::Float),
270 "Cannot add identifier field {}: cannot be a float or double type",
271 field.name
272 );
273 } else {
274 return Err(Error::new(
275 ErrorKind::DataInvalid,
276 format!(
277 "Cannot add field {} as an identifier field: not a primitive type field",
278 field.name
279 ),
280 ));
281 }
282
283 let mut cur_field_id = identifier_field_id;
284 while let Some(parent) = id_to_parent.get(&cur_field_id) {
285 let parent_field = id_to_field
286 .get(parent)
287 .expect("Field id should not disappear.");
288 ensure_data_valid!(
289 parent_field.field_type.is_struct(),
290 "Cannot add field {} as an identifier field: must not be nested in {:?}",
291 field.name,
292 parent_field
293 );
294 ensure_data_valid!(
295 parent_field.required,
296 "Cannot add field {} as an identifier field: must not be nested in an optional field {}",
297 field.name,
298 parent_field
299 );
300 cur_field_id = *parent;
301 }
302 }
303
304 Ok(())
305 }
306}
307
308impl Schema {
309 pub fn builder() -> SchemaBuilder {
311 SchemaBuilder {
312 schema_id: DEFAULT_SCHEMA_ID,
313 fields: vec![],
314 identifier_field_ids: HashSet::default(),
315 alias_to_id: BiHashMap::default(),
316 reassign_field_ids_from: None,
317 }
318 }
319
320 pub fn into_builder(self) -> SchemaBuilder {
322 SchemaBuilder {
323 schema_id: self.schema_id,
324 fields: self.r#struct.fields().to_vec(),
325 alias_to_id: self.alias_to_id,
326 identifier_field_ids: self.identifier_field_ids,
327 reassign_field_ids_from: None,
328 }
329 }
330
331 pub fn field_by_id(&self, field_id: i32) -> Option<&NestedFieldRef> {
333 self.id_to_field.get(&field_id)
334 }
335
336 pub fn field_by_name(&self, field_name: &str) -> Option<&NestedFieldRef> {
340 self.name_to_id
341 .get(field_name)
342 .and_then(|id| self.field_by_id(*id))
343 }
344
345 pub fn field_by_name_case_insensitive(&self, field_name: &str) -> Option<&NestedFieldRef> {
349 self.lowercase_name_to_id
350 .get(&field_name.to_lowercase())
351 .and_then(|id| self.field_by_id(*id))
352 }
353
354 pub fn field_by_alias(&self, alias: &str) -> Option<&NestedFieldRef> {
356 self.alias_to_id
357 .get_by_left(alias)
358 .and_then(|id| self.field_by_id(*id))
359 }
360
361 #[inline]
363 pub fn highest_field_id(&self) -> i32 {
364 self.highest_field_id
365 }
366
367 #[inline]
369 pub fn schema_id(&self) -> SchemaId {
370 self.schema_id
371 }
372
373 #[inline]
375 pub fn as_struct(&self) -> &StructType {
376 &self.r#struct
377 }
378
379 #[inline]
381 pub fn identifier_field_ids(&self) -> impl ExactSizeIterator<Item = i32> + '_ {
382 self.identifier_field_ids.iter().copied()
383 }
384
385 pub fn field_id_by_name(&self, name: &str) -> Option<i32> {
387 self.name_to_id.get(name).copied()
388 }
389
390 pub fn name_by_field_id(&self, field_id: i32) -> Option<&str> {
392 self.id_to_name.get(&field_id).map(String::as_str)
393 }
394
395 pub fn accessor_by_field_id(&self, field_id: i32) -> Option<Arc<StructAccessor>> {
397 self.field_id_to_accessor.get(&field_id).cloned()
398 }
399
400 pub(crate) fn is_same_schema(&self, other: &SchemaRef) -> bool {
402 self.as_struct().eq(other.as_struct())
403 && self.identifier_field_ids().eq(other.identifier_field_ids())
404 }
405
406 pub(crate) fn with_schema_id(self, schema_id: SchemaId) -> Self {
410 Self { schema_id, ..self }
411 }
412
413 pub fn field_id_to_name_map(&self) -> &HashMap<i32, String> {
415 &self.id_to_name
416 }
417
418 pub fn field_id_to_fields(&self) -> &HashMap<i32, NestedFieldRef> {
420 &self.id_to_field
421 }
422}
423
424impl Display for Schema {
425 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
426 writeln!(f, "table {{")?;
427 for field in self.as_struct().fields() {
428 writeln!(f, " {}", field)?;
429 }
430 writeln!(f, "}}")
431 }
432}
433
434#[cfg(test)]
435mod tests {
436 use std::collections::HashMap;
437
438 use bimap::BiHashMap;
439
440 use crate::spec::datatypes::Type::{List, Map, Primitive, Struct};
441 use crate::spec::datatypes::{
442 ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type,
443 };
444 use crate::spec::schema::Schema;
445 use crate::spec::values::Map as MapValue;
446 use crate::spec::{Datum, Literal};
447
448 #[test]
449 fn test_construct_schema() {
450 let field1: NestedFieldRef =
451 NestedField::required(1, "f1", Type::Primitive(PrimitiveType::Boolean)).into();
452 let field2: NestedFieldRef =
453 NestedField::optional(2, "f2", Type::Primitive(PrimitiveType::Int)).into();
454
455 let schema = Schema::builder()
456 .with_fields(vec![field1.clone()])
457 .with_fields(vec![field2.clone()])
458 .with_schema_id(3)
459 .build()
460 .unwrap();
461
462 assert_eq!(3, schema.schema_id());
463 assert_eq!(2, schema.highest_field_id());
464 assert_eq!(Some(&field1), schema.field_by_id(1));
465 assert_eq!(Some(&field2), schema.field_by_id(2));
466 assert_eq!(None, schema.field_by_id(3));
467 }
468
469 pub fn table_schema_simple<'a>() -> (Schema, &'a str) {
470 let schema = Schema::builder()
471 .with_schema_id(1)
472 .with_identifier_field_ids(vec![2])
473 .with_fields(vec![
474 NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
475 NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
476 NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
477 ])
478 .build()
479 .unwrap();
480 let record = r#"{
481 "type":"struct",
482 "schema-id":1,
483 "fields":[
484 {
485 "id":1,
486 "name":"foo",
487 "required":false,
488 "type":"string"
489 },
490 {
491 "id":2,
492 "name":"bar",
493 "required":true,
494 "type":"int"
495 },
496 {
497 "id":3,
498 "name":"baz",
499 "required":false,
500 "type":"boolean"
501 }
502 ],
503 "identifier-field-ids":[2]
504 }"#;
505 (schema, record)
506 }
507
508 pub fn table_schema_nested() -> Schema {
509 Schema::builder()
510 .with_schema_id(1)
511 .with_identifier_field_ids(vec![2])
512 .with_fields(vec![
513 NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
514 NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
515 NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
516 NestedField::required(
517 4,
518 "qux",
519 Type::List(ListType {
520 element_field: NestedField::list_element(
521 5,
522 Type::Primitive(PrimitiveType::String),
523 true,
524 )
525 .into(),
526 }),
527 )
528 .into(),
529 NestedField::required(
530 6,
531 "quux",
532 Type::Map(MapType {
533 key_field: NestedField::map_key_element(
534 7,
535 Type::Primitive(PrimitiveType::String),
536 )
537 .into(),
538 value_field: NestedField::map_value_element(
539 8,
540 Type::Map(MapType {
541 key_field: NestedField::map_key_element(
542 9,
543 Type::Primitive(PrimitiveType::String),
544 )
545 .into(),
546 value_field: NestedField::map_value_element(
547 10,
548 Type::Primitive(PrimitiveType::Int),
549 true,
550 )
551 .into(),
552 }),
553 true,
554 )
555 .into(),
556 }),
557 )
558 .into(),
559 NestedField::required(
560 11,
561 "location",
562 Type::List(ListType {
563 element_field: NestedField::list_element(
564 12,
565 Type::Struct(StructType::new(vec![
566 NestedField::optional(
567 13,
568 "latitude",
569 Type::Primitive(PrimitiveType::Float),
570 )
571 .into(),
572 NestedField::optional(
573 14,
574 "longitude",
575 Type::Primitive(PrimitiveType::Float),
576 )
577 .into(),
578 ])),
579 true,
580 )
581 .into(),
582 }),
583 )
584 .into(),
585 NestedField::optional(
586 15,
587 "person",
588 Type::Struct(StructType::new(vec![
589 NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
590 .into(),
591 NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
592 .into(),
593 ])),
594 )
595 .into(),
596 ])
597 .build()
598 .unwrap()
599 }
600
601 #[test]
602 fn test_schema_display() {
603 let expected_str = "
604table {
605 1: foo: optional string\x20
606 2: bar: required int\x20
607 3: baz: optional boolean\x20
608}
609";
610
611 assert_eq!(expected_str, format!("\n{}", table_schema_simple().0));
612 }
613
614 #[test]
615 fn test_schema_build_failed_on_duplicate_names() {
616 let ret = Schema::builder()
617 .with_schema_id(1)
618 .with_identifier_field_ids(vec![1])
619 .with_fields(vec![
620 NestedField::required(1, "foo", Primitive(PrimitiveType::String)).into(),
621 NestedField::required(2, "bar", Primitive(PrimitiveType::Int)).into(),
622 NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)).into(),
623 NestedField::optional(4, "baz", Primitive(PrimitiveType::Boolean)).into(),
624 ])
625 .build();
626
627 assert!(
628 ret.unwrap_err()
629 .message()
630 .contains("Invalid schema: multiple fields for name baz")
631 );
632 }
633
634 #[test]
635 fn test_schema_into_builder() {
636 let original_schema = table_schema_nested();
637 let builder = original_schema.clone().into_builder();
638 let schema = builder.build().unwrap();
639
640 assert_eq!(original_schema, schema);
641 }
642
643 #[test]
644 fn test_schema_index_by_name() {
645 let expected_name_to_id = HashMap::from(
646 [
647 ("foo", 1),
648 ("bar", 2),
649 ("baz", 3),
650 ("qux", 4),
651 ("qux.element", 5),
652 ("quux", 6),
653 ("quux.key", 7),
654 ("quux.value", 8),
655 ("quux.value.key", 9),
656 ("quux.value.value", 10),
657 ("location", 11),
658 ("location.element", 12),
659 ("location.element.latitude", 13),
660 ("location.element.longitude", 14),
661 ("location.latitude", 13),
662 ("location.longitude", 14),
663 ("person", 15),
664 ("person.name", 16),
665 ("person.age", 17),
666 ]
667 .map(|e| (e.0.to_string(), e.1)),
668 );
669
670 let schema = table_schema_nested();
671 assert_eq!(&expected_name_to_id, &schema.name_to_id);
672 }
673
674 #[test]
675 fn test_schema_index_by_name_case_insensitive() {
676 let expected_name_to_id = HashMap::from(
677 [
678 ("fOo", 1),
679 ("Bar", 2),
680 ("BAz", 3),
681 ("quX", 4),
682 ("quX.ELEment", 5),
683 ("qUUx", 6),
684 ("QUUX.KEY", 7),
685 ("QUUX.Value", 8),
686 ("qUUX.VALUE.Key", 9),
687 ("qUux.VaLue.Value", 10),
688 ("lOCAtION", 11),
689 ("LOCAtioN.ELeMENt", 12),
690 ("LoCATion.element.LATitude", 13),
691 ("locatION.ElemeNT.LONgitude", 14),
692 ("LOCAtiON.LATITUDE", 13),
693 ("LOCATION.LONGITUDE", 14),
694 ("PERSon", 15),
695 ("PERSON.Name", 16),
696 ("peRSON.AGe", 17),
697 ]
698 .map(|e| (e.0.to_string(), e.1)),
699 );
700
701 let schema = table_schema_nested();
702 for (name, id) in expected_name_to_id {
703 assert_eq!(
704 Some(id),
705 schema.field_by_name_case_insensitive(&name).map(|f| f.id)
706 );
707 }
708 }
709
710 #[test]
711 fn test_schema_find_column_name() {
712 let expected_column_name = HashMap::from([
713 (1, "foo"),
714 (2, "bar"),
715 (3, "baz"),
716 (4, "qux"),
717 (5, "qux.element"),
718 (6, "quux"),
719 (7, "quux.key"),
720 (8, "quux.value"),
721 (9, "quux.value.key"),
722 (10, "quux.value.value"),
723 (11, "location"),
724 (12, "location.element"),
725 (13, "location.element.latitude"),
726 (14, "location.element.longitude"),
727 ]);
728
729 let schema = table_schema_nested();
730 for (id, name) in expected_column_name {
731 assert_eq!(
732 Some(name),
733 schema.name_by_field_id(id),
734 "Column name for field id {} not match.",
735 id
736 );
737 }
738 }
739
740 #[test]
741 fn test_schema_find_column_name_not_found() {
742 let schema = table_schema_nested();
743
744 assert!(schema.name_by_field_id(99).is_none());
745 }
746
747 #[test]
748 fn test_schema_find_column_name_by_id_simple() {
749 let expected_id_to_name = HashMap::from([(1, "foo"), (2, "bar"), (3, "baz")]);
750
751 let schema = table_schema_simple().0;
752
753 for (id, name) in expected_id_to_name {
754 assert_eq!(
755 Some(name),
756 schema.name_by_field_id(id),
757 "Column name for field id {} not match.",
758 id
759 );
760 }
761 }
762
763 #[test]
764 fn test_schema_find_simple() {
765 let schema = table_schema_simple().0;
766
767 assert_eq!(
768 Some(schema.r#struct.fields()[0].clone()),
769 schema.field_by_id(1).cloned()
770 );
771 assert_eq!(
772 Some(schema.r#struct.fields()[1].clone()),
773 schema.field_by_id(2).cloned()
774 );
775 assert_eq!(
776 Some(schema.r#struct.fields()[2].clone()),
777 schema.field_by_id(3).cloned()
778 );
779
780 assert!(schema.field_by_id(4).is_none());
781 assert!(schema.field_by_name("non exist").is_none());
782 }
783
784 #[test]
785 fn test_schema_find_nested() {
786 let expected_id_to_field: HashMap<i32, NestedField> = HashMap::from([
787 (
788 1,
789 NestedField::optional(1, "foo", Primitive(PrimitiveType::String)),
790 ),
791 (
792 2,
793 NestedField::required(2, "bar", Primitive(PrimitiveType::Int)),
794 ),
795 (
796 3,
797 NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)),
798 ),
799 (
800 4,
801 NestedField::required(
802 4,
803 "qux",
804 Type::List(ListType {
805 element_field: NestedField::list_element(
806 5,
807 Type::Primitive(PrimitiveType::String),
808 true,
809 )
810 .into(),
811 }),
812 ),
813 ),
814 (
815 5,
816 NestedField::required(5, "element", Primitive(PrimitiveType::String)),
817 ),
818 (
819 6,
820 NestedField::required(
821 6,
822 "quux",
823 Map(MapType {
824 key_field: NestedField::map_key_element(
825 7,
826 Primitive(PrimitiveType::String),
827 )
828 .into(),
829 value_field: NestedField::map_value_element(
830 8,
831 Map(MapType {
832 key_field: NestedField::map_key_element(
833 9,
834 Primitive(PrimitiveType::String),
835 )
836 .into(),
837 value_field: NestedField::map_value_element(
838 10,
839 Primitive(PrimitiveType::Int),
840 true,
841 )
842 .into(),
843 }),
844 true,
845 )
846 .into(),
847 }),
848 ),
849 ),
850 (
851 7,
852 NestedField::required(7, "key", Primitive(PrimitiveType::String)),
853 ),
854 (
855 8,
856 NestedField::required(
857 8,
858 "value",
859 Map(MapType {
860 key_field: NestedField::map_key_element(
861 9,
862 Primitive(PrimitiveType::String),
863 )
864 .into(),
865 value_field: NestedField::map_value_element(
866 10,
867 Primitive(PrimitiveType::Int),
868 true,
869 )
870 .into(),
871 }),
872 ),
873 ),
874 (
875 9,
876 NestedField::required(9, "key", Primitive(PrimitiveType::String)),
877 ),
878 (
879 10,
880 NestedField::required(10, "value", Primitive(PrimitiveType::Int)),
881 ),
882 (
883 11,
884 NestedField::required(
885 11,
886 "location",
887 List(ListType {
888 element_field: NestedField::list_element(
889 12,
890 Struct(StructType::new(vec![
891 NestedField::optional(
892 13,
893 "latitude",
894 Primitive(PrimitiveType::Float),
895 )
896 .into(),
897 NestedField::optional(
898 14,
899 "longitude",
900 Primitive(PrimitiveType::Float),
901 )
902 .into(),
903 ])),
904 true,
905 )
906 .into(),
907 }),
908 ),
909 ),
910 (
911 12,
912 NestedField::list_element(
913 12,
914 Struct(StructType::new(vec![
915 NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float))
916 .into(),
917 NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float))
918 .into(),
919 ])),
920 true,
921 ),
922 ),
923 (
924 13,
925 NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float)),
926 ),
927 (
928 14,
929 NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float)),
930 ),
931 (
932 15,
933 NestedField::optional(
934 15,
935 "person",
936 Type::Struct(StructType::new(vec![
937 NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
938 .into(),
939 NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
940 .into(),
941 ])),
942 ),
943 ),
944 (
945 16,
946 NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String)),
947 ),
948 (
949 17,
950 NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int)),
951 ),
952 ]);
953
954 let schema = table_schema_nested();
955 for (id, field) in expected_id_to_field {
956 assert_eq!(
957 Some(&field),
958 schema.field_by_id(id).map(|f| f.as_ref()),
959 "Field for {} not match.",
960 id
961 );
962 }
963 }
964
965 #[test]
966 fn test_build_accessors() {
967 let schema = table_schema_nested();
968
969 let test_struct = crate::spec::Struct::from_iter(vec![
970 Some(Literal::string("foo value")),
971 Some(Literal::int(1002)),
972 Some(Literal::bool(true)),
973 Some(Literal::List(vec![
974 Some(Literal::string("qux item 1")),
975 Some(Literal::string("qux item 2")),
976 ])),
977 Some(Literal::Map(MapValue::from([(
978 Literal::string("quux key 1"),
979 Some(Literal::Map(MapValue::from([(
980 Literal::string("quux nested key 1"),
981 Some(Literal::int(1000)),
982 )]))),
983 )]))),
984 Some(Literal::List(vec![Some(Literal::Struct(
985 crate::spec::Struct::from_iter(vec![
986 Some(Literal::float(52.509_09)),
987 Some(Literal::float(-1.885_249)),
988 ]),
989 ))])),
990 Some(Literal::Struct(crate::spec::Struct::from_iter(vec![
991 Some(Literal::string("Testy McTest")),
992 Some(Literal::int(33)),
993 ]))),
994 ]);
995
996 assert_eq!(
997 schema
998 .accessor_by_field_id(1)
999 .unwrap()
1000 .get(&test_struct)
1001 .unwrap(),
1002 Some(Datum::string("foo value"))
1003 );
1004 assert_eq!(
1005 schema
1006 .accessor_by_field_id(2)
1007 .unwrap()
1008 .get(&test_struct)
1009 .unwrap(),
1010 Some(Datum::int(1002))
1011 );
1012 assert_eq!(
1013 schema
1014 .accessor_by_field_id(3)
1015 .unwrap()
1016 .get(&test_struct)
1017 .unwrap(),
1018 Some(Datum::bool(true))
1019 );
1020 assert_eq!(
1021 schema
1022 .accessor_by_field_id(16)
1023 .unwrap()
1024 .get(&test_struct)
1025 .unwrap(),
1026 Some(Datum::string("Testy McTest"))
1027 );
1028 assert_eq!(
1029 schema
1030 .accessor_by_field_id(17)
1031 .unwrap()
1032 .get(&test_struct)
1033 .unwrap(),
1034 Some(Datum::int(33))
1035 );
1036 }
1037
1038 #[test]
1039 fn test_highest_field_id() {
1040 let schema = table_schema_nested();
1041 assert_eq!(17, schema.highest_field_id());
1042
1043 let schema = table_schema_simple().0;
1044 assert_eq!(3, schema.highest_field_id());
1045 }
1046
1047 #[test]
1048 fn test_highest_field_id_no_fields() {
1049 let schema = Schema::builder().with_schema_id(1).build().unwrap();
1050 assert_eq!(0, schema.highest_field_id());
1051 }
1052
1053 #[test]
1054 fn test_field_ids_must_be_unique() {
1055 let reassigned_schema = Schema::builder()
1056 .with_schema_id(1)
1057 .with_identifier_field_ids(vec![5])
1058 .with_alias(BiHashMap::from_iter(vec![("bar_alias".to_string(), 3)]))
1059 .with_fields(vec![
1060 NestedField::required(5, "foo", Type::Primitive(PrimitiveType::String)).into(),
1061 NestedField::optional(3, "bar", Type::Primitive(PrimitiveType::Int)).into(),
1062 NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
1063 ])
1064 .build()
1065 .unwrap_err();
1066
1067 assert!(reassigned_schema.message().contains("'field.id' 3"));
1068 }
1069
1070 #[test]
1071 fn test_reassign_ids_empty_schema() {
1072 let schema = Schema::builder().with_schema_id(1).build().unwrap();
1073 let reassigned_schema = schema
1074 .clone()
1075 .into_builder()
1076 .with_reassigned_field_ids(0)
1077 .build()
1078 .unwrap();
1079
1080 assert_eq!(schema, reassigned_schema);
1081 assert_eq!(schema.highest_field_id(), 0);
1082 }
1083
1084 #[test]
1085 fn test_identifier_field_ids() {
1086 assert!(
1088 Schema::builder()
1089 .with_schema_id(1)
1090 .with_identifier_field_ids(vec![2])
1091 .with_fields(vec![
1092 NestedField::required(
1093 1,
1094 "Map",
1095 Type::Map(MapType::new(
1096 NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1097 .into(),
1098 NestedField::map_value_element(
1099 3,
1100 Type::Primitive(PrimitiveType::Boolean),
1101 true,
1102 )
1103 .into(),
1104 )),
1105 )
1106 .into()
1107 ])
1108 .build()
1109 .is_err()
1110 );
1111 assert!(
1112 Schema::builder()
1113 .with_schema_id(1)
1114 .with_identifier_field_ids(vec![3])
1115 .with_fields(vec![
1116 NestedField::required(
1117 1,
1118 "Map",
1119 Type::Map(MapType::new(
1120 NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1121 .into(),
1122 NestedField::map_value_element(
1123 3,
1124 Type::Primitive(PrimitiveType::Boolean),
1125 true,
1126 )
1127 .into(),
1128 )),
1129 )
1130 .into()
1131 ])
1132 .build()
1133 .is_err()
1134 );
1135
1136 assert!(
1138 Schema::builder()
1139 .with_schema_id(1)
1140 .with_identifier_field_ids(vec![2])
1141 .with_fields(vec![
1142 NestedField::required(
1143 1,
1144 "List",
1145 Type::List(ListType::new(
1146 NestedField::list_element(
1147 2,
1148 Type::Primitive(PrimitiveType::String),
1149 true
1150 )
1151 .into(),
1152 )),
1153 )
1154 .into()
1155 ])
1156 .build()
1157 .is_err()
1158 );
1159
1160 assert!(
1162 Schema::builder()
1163 .with_schema_id(1)
1164 .with_identifier_field_ids(vec![2])
1165 .with_fields(vec![
1166 NestedField::optional(
1167 1,
1168 "Struct",
1169 Type::Struct(StructType::new(vec![
1170 NestedField::required(
1171 2,
1172 "name",
1173 Type::Primitive(PrimitiveType::String)
1174 )
1175 .into(),
1176 NestedField::optional(3, "age", Type::Primitive(PrimitiveType::Int))
1177 .into(),
1178 ])),
1179 )
1180 .into()
1181 ])
1182 .build()
1183 .is_err()
1184 );
1185
1186 assert!(
1188 Schema::builder()
1189 .with_schema_id(1)
1190 .with_identifier_field_ids(vec![1])
1191 .with_fields(vec![
1192 NestedField::required(1, "Float", Type::Primitive(PrimitiveType::Float),)
1193 .into()
1194 ])
1195 .build()
1196 .is_err()
1197 );
1198 assert!(
1199 Schema::builder()
1200 .with_schema_id(1)
1201 .with_identifier_field_ids(vec![1])
1202 .with_fields(vec![
1203 NestedField::required(1, "Double", Type::Primitive(PrimitiveType::Double),)
1204 .into()
1205 ])
1206 .build()
1207 .is_err()
1208 );
1209
1210 assert!(
1212 Schema::builder()
1213 .with_schema_id(1)
1214 .with_identifier_field_ids(vec![1])
1215 .with_fields(vec![
1216 NestedField::required(1, "Required", Type::Primitive(PrimitiveType::String),)
1217 .into()
1218 ])
1219 .build()
1220 .is_ok()
1221 );
1222 assert!(
1223 Schema::builder()
1224 .with_schema_id(1)
1225 .with_identifier_field_ids(vec![1])
1226 .with_fields(vec![
1227 NestedField::optional(1, "Optional", Type::Primitive(PrimitiveType::String),)
1228 .into()
1229 ])
1230 .build()
1231 .is_err()
1232 );
1233 }
1234}