arrow_array/builder/
struct_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::*;
19use crate::types::Int32Type;
20use crate::StructArray;
21use arrow_buffer::NullBufferBuilder;
22use arrow_schema::{DataType, Fields, IntervalUnit, SchemaBuilder, TimeUnit};
23use std::sync::Arc;
24
25/// Builder for [`StructArray`]
26///
27/// Note that callers should make sure that methods of all the child field builders are
28/// properly called to maintain the consistency of the data structure.
29///
30///
31/// Handling arrays with complex layouts, such as `List<Struct<List<Struct>>>`, in Rust can be challenging due to its strong typing system.
32/// To construct a collection builder ([`ListBuilder`], [`LargeListBuilder`], or [`MapBuilder`]) using [`make_builder`], multiple calls are required. This complexity arises from the recursive approach utilized by [`StructBuilder::from_fields`].
33///
34/// Initially, [`StructBuilder::from_fields`] invokes [`make_builder`], which returns a `Box<dyn ArrayBuilder>`. To obtain the specific collection builder, one must first use [`StructBuilder::field_builder`] to get a `Collection<[Box<dyn ArrayBuilder>]>`. Subsequently, the `values()` result from this operation can be downcast to the desired builder type.
35///
36/// For example, when working with [`ListBuilder`], you would first call [`StructBuilder::field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>`] and then downcast the [`Box<dyn ArrayBuilder>`] to the specific [`StructBuilder`] you need.
37///
38/// For a practical example see the code below:
39///
40/// ```rust
41///    use arrow_array::builder::{ArrayBuilder, ListBuilder, StringBuilder, StructBuilder};
42///    use arrow_schema::{DataType, Field, Fields};
43///    use std::sync::Arc;
44///
45///    // This is an example column that has a List<Struct<List<Struct>>> layout
46///    let mut example_col = ListBuilder::new(StructBuilder::from_fields(
47///        vec![Field::new(
48///            "value_list",
49///            DataType::List(Arc::new(Field::new(
50///                "item",
51///                DataType::Struct(Fields::from(vec![
52///                    Field::new("key", DataType::Utf8, true),
53///                    Field::new("value", DataType::Utf8, true),
54///                ])), //In this example we are trying to get to this builder and insert key/value pairs
55///                true,
56///            ))),
57///            true,
58///        )],
59///        0,
60///    ));
61///
62///   // We can obtain the StructBuilder without issues, because example_col was created with StructBuilder
63///   let col_struct_builder: &mut StructBuilder = example_col.values();
64///
65///   // We can't obtain the ListBuilder<StructBuilder> with the expected generic types, because under the hood
66///   // the StructBuilder was returned as a Box<dyn ArrayBuilder> and passed as such to the ListBuilder constructor
67///   
68///   // This panics in runtime, even though we know that the builder is a ListBuilder<StructBuilder>.
69///   // let sb = col_struct_builder
70///   //     .field_builder::<ListBuilder<StructBuilder>>(0)
71///   //     .as_mut()
72///   //     .unwrap();
73///
74///   //To keep in line with Rust's strong typing, we fetch a ListBuilder<Box<dyn ArrayBuilder>> from the column StructBuilder first...
75///   let mut list_builder_option =
76///       col_struct_builder.field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(0);
77///
78///   let list_builder = list_builder_option.as_mut().unwrap();
79///
80///   // ... and then downcast the key/value pair values to a StructBuilder
81///   let struct_builder = list_builder
82///       .values()
83///       .as_any_mut()
84///       .downcast_mut::<StructBuilder>()
85///       .unwrap();
86///
87///   // We can now append values to the StructBuilder
88///   let key_builder = struct_builder.field_builder::<StringBuilder>(0).unwrap();
89///   key_builder.append_value("my key");
90///
91///   let value_builder = struct_builder.field_builder::<StringBuilder>(1).unwrap();
92///   value_builder.append_value("my value");
93///
94///   struct_builder.append(true);
95///   list_builder.append(true);
96///   col_struct_builder.append(true);
97///   example_col.append(true);
98///
99///   let array = example_col.finish();
100///
101///   println!("My array: {:?}", array);
102/// ```
103///
104pub struct StructBuilder {
105    fields: Fields,
106    field_builders: Vec<Box<dyn ArrayBuilder>>,
107    null_buffer_builder: NullBufferBuilder,
108}
109
110impl std::fmt::Debug for StructBuilder {
111    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112        f.debug_struct("StructBuilder")
113            .field("fields", &self.fields)
114            .field("bitmap_builder", &self.null_buffer_builder)
115            .field("len", &self.len())
116            .finish()
117    }
118}
119
120impl ArrayBuilder for StructBuilder {
121    /// Returns the number of array slots in the builder.
122    ///
123    /// Note that this always return the first child field builder's length, and it is
124    /// the caller's responsibility to maintain the consistency that all the child field
125    /// builder should have the equal number of elements.
126    fn len(&self) -> usize {
127        self.null_buffer_builder.len()
128    }
129
130    /// Builds the array.
131    fn finish(&mut self) -> ArrayRef {
132        Arc::new(self.finish())
133    }
134
135    /// Builds the array without resetting the builder.
136    fn finish_cloned(&self) -> ArrayRef {
137        Arc::new(self.finish_cloned())
138    }
139
140    /// Returns the builder as a non-mutable `Any` reference.
141    ///
142    /// This is most useful when one wants to call non-mutable APIs on a specific builder
143    /// type. In this case, one can first cast this into a `Any`, and then use
144    /// `downcast_ref` to get a reference on the specific builder.
145    fn as_any(&self) -> &dyn Any {
146        self
147    }
148
149    /// Returns the builder as a mutable `Any` reference.
150    ///
151    /// This is most useful when one wants to call mutable APIs on a specific builder
152    /// type. In this case, one can first cast this into a `Any`, and then use
153    /// `downcast_mut` to get a reference on the specific builder.
154    fn as_any_mut(&mut self) -> &mut dyn Any {
155        self
156    }
157
158    /// Returns the boxed builder as a box of `Any`.
159    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
160        self
161    }
162}
163
164/// Returns a builder with capacity for `capacity` elements of datatype
165/// `DataType`.
166///
167/// This function is useful to construct arrays from an arbitrary vectors with
168/// known/expected schema.
169///
170/// See comments on [StructBuilder] for retrieving collection builders built by
171/// make_builder.
172pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilder> {
173    use crate::builder::*;
174    match datatype {
175        DataType::Null => Box::new(NullBuilder::new()),
176        DataType::Boolean => Box::new(BooleanBuilder::with_capacity(capacity)),
177        DataType::Int8 => Box::new(Int8Builder::with_capacity(capacity)),
178        DataType::Int16 => Box::new(Int16Builder::with_capacity(capacity)),
179        DataType::Int32 => Box::new(Int32Builder::with_capacity(capacity)),
180        DataType::Int64 => Box::new(Int64Builder::with_capacity(capacity)),
181        DataType::UInt8 => Box::new(UInt8Builder::with_capacity(capacity)),
182        DataType::UInt16 => Box::new(UInt16Builder::with_capacity(capacity)),
183        DataType::UInt32 => Box::new(UInt32Builder::with_capacity(capacity)),
184        DataType::UInt64 => Box::new(UInt64Builder::with_capacity(capacity)),
185        DataType::Float16 => Box::new(Float16Builder::with_capacity(capacity)),
186        DataType::Float32 => Box::new(Float32Builder::with_capacity(capacity)),
187        DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
188        DataType::Binary => Box::new(BinaryBuilder::with_capacity(capacity, 1024)),
189        DataType::LargeBinary => Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024)),
190        DataType::FixedSizeBinary(len) => {
191            Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
192        }
193        DataType::Decimal128(p, s) => Box::new(
194            Decimal128Builder::with_capacity(capacity).with_data_type(DataType::Decimal128(*p, *s)),
195        ),
196        DataType::Decimal256(p, s) => Box::new(
197            Decimal256Builder::with_capacity(capacity).with_data_type(DataType::Decimal256(*p, *s)),
198        ),
199        DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)),
200        DataType::LargeUtf8 => Box::new(LargeStringBuilder::with_capacity(capacity, 1024)),
201        DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
202        DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
203        DataType::Time32(TimeUnit::Second) => {
204            Box::new(Time32SecondBuilder::with_capacity(capacity))
205        }
206        DataType::Time32(TimeUnit::Millisecond) => {
207            Box::new(Time32MillisecondBuilder::with_capacity(capacity))
208        }
209        DataType::Time64(TimeUnit::Microsecond) => {
210            Box::new(Time64MicrosecondBuilder::with_capacity(capacity))
211        }
212        DataType::Time64(TimeUnit::Nanosecond) => {
213            Box::new(Time64NanosecondBuilder::with_capacity(capacity))
214        }
215        DataType::Timestamp(TimeUnit::Second, tz) => Box::new(
216            TimestampSecondBuilder::with_capacity(capacity)
217                .with_data_type(DataType::Timestamp(TimeUnit::Second, tz.clone())),
218        ),
219        DataType::Timestamp(TimeUnit::Millisecond, tz) => Box::new(
220            TimestampMillisecondBuilder::with_capacity(capacity)
221                .with_data_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone())),
222        ),
223        DataType::Timestamp(TimeUnit::Microsecond, tz) => Box::new(
224            TimestampMicrosecondBuilder::with_capacity(capacity)
225                .with_data_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone())),
226        ),
227        DataType::Timestamp(TimeUnit::Nanosecond, tz) => Box::new(
228            TimestampNanosecondBuilder::with_capacity(capacity)
229                .with_data_type(DataType::Timestamp(TimeUnit::Nanosecond, tz.clone())),
230        ),
231        DataType::Interval(IntervalUnit::YearMonth) => {
232            Box::new(IntervalYearMonthBuilder::with_capacity(capacity))
233        }
234        DataType::Interval(IntervalUnit::DayTime) => {
235            Box::new(IntervalDayTimeBuilder::with_capacity(capacity))
236        }
237        DataType::Interval(IntervalUnit::MonthDayNano) => {
238            Box::new(IntervalMonthDayNanoBuilder::with_capacity(capacity))
239        }
240        DataType::Duration(TimeUnit::Second) => {
241            Box::new(DurationSecondBuilder::with_capacity(capacity))
242        }
243        DataType::Duration(TimeUnit::Millisecond) => {
244            Box::new(DurationMillisecondBuilder::with_capacity(capacity))
245        }
246        DataType::Duration(TimeUnit::Microsecond) => {
247            Box::new(DurationMicrosecondBuilder::with_capacity(capacity))
248        }
249        DataType::Duration(TimeUnit::Nanosecond) => {
250            Box::new(DurationNanosecondBuilder::with_capacity(capacity))
251        }
252        DataType::List(field) => {
253            let builder = make_builder(field.data_type(), capacity);
254            Box::new(ListBuilder::with_capacity(builder, capacity).with_field(field.clone()))
255        }
256        DataType::LargeList(field) => {
257            let builder = make_builder(field.data_type(), capacity);
258            Box::new(LargeListBuilder::with_capacity(builder, capacity).with_field(field.clone()))
259        }
260        DataType::FixedSizeList(field, size) => {
261            let size = *size;
262            let values_builder_capacity = {
263                let size: usize = size.try_into().unwrap();
264                capacity * size
265            };
266            let builder = make_builder(field.data_type(), values_builder_capacity);
267            Box::new(
268                FixedSizeListBuilder::with_capacity(builder, size, capacity)
269                    .with_field(field.clone()),
270            )
271        }
272        DataType::Map(field, _) => match field.data_type() {
273            DataType::Struct(fields) => {
274                let map_field_names = MapFieldNames {
275                    key: fields[0].name().clone(),
276                    value: fields[1].name().clone(),
277                    entry: field.name().clone(),
278                };
279                let key_builder = make_builder(fields[0].data_type(), capacity);
280                let value_builder = make_builder(fields[1].data_type(), capacity);
281                Box::new(
282                    MapBuilder::with_capacity(
283                        Some(map_field_names),
284                        key_builder,
285                        value_builder,
286                        capacity,
287                    )
288                    .with_values_field(fields[1].clone()),
289                )
290            }
291            t => panic!("The field of Map data type {t:?} should has a child Struct field"),
292        },
293        DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
294        DataType::Dictionary(key_type, value_type) if **key_type == DataType::Int32 => {
295            match &**value_type {
296                DataType::Utf8 => {
297                    let dict_builder: StringDictionaryBuilder<Int32Type> =
298                        StringDictionaryBuilder::with_capacity(capacity, 256, 1024);
299                    Box::new(dict_builder)
300                }
301                DataType::LargeUtf8 => {
302                    let dict_builder: LargeStringDictionaryBuilder<Int32Type> =
303                        LargeStringDictionaryBuilder::with_capacity(capacity, 256, 1024);
304                    Box::new(dict_builder)
305                }
306                DataType::Binary => {
307                    let dict_builder: BinaryDictionaryBuilder<Int32Type> =
308                        BinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
309                    Box::new(dict_builder)
310                }
311                DataType::LargeBinary => {
312                    let dict_builder: LargeBinaryDictionaryBuilder<Int32Type> =
313                        LargeBinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
314                    Box::new(dict_builder)
315                }
316                t => panic!("Unsupported dictionary value type {t:?} is not currently supported"),
317            }
318        }
319        t => panic!("Data type {t:?} is not currently supported"),
320    }
321}
322
323impl StructBuilder {
324    /// Creates a new `StructBuilder`
325    pub fn new(fields: impl Into<Fields>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
326        Self {
327            field_builders,
328            fields: fields.into(),
329            null_buffer_builder: NullBufferBuilder::new(0),
330        }
331    }
332
333    /// Creates a new `StructBuilder` from [`Fields`] and `capacity`
334    pub fn from_fields(fields: impl Into<Fields>, capacity: usize) -> Self {
335        let fields = fields.into();
336        let mut builders = Vec::with_capacity(fields.len());
337        for field in &fields {
338            builders.push(make_builder(field.data_type(), capacity));
339        }
340        Self::new(fields, builders)
341    }
342
343    /// Returns a mutable reference to the child field builder at index `i`.
344    /// Result will be `None` if the input type `T` provided doesn't match the actual
345    /// field builder's type.
346    pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
347        self.field_builders[i].as_any_mut().downcast_mut::<T>()
348    }
349
350    /// Returns the number of fields for the struct this builder is building.
351    pub fn num_fields(&self) -> usize {
352        self.field_builders.len()
353    }
354
355    /// Appends an element (either null or non-null) to the struct. The actual elements
356    /// should be appended for each child sub-array in a consistent way.
357    #[inline]
358    pub fn append(&mut self, is_valid: bool) {
359        self.null_buffer_builder.append(is_valid);
360    }
361
362    /// Appends a null element to the struct.
363    #[inline]
364    pub fn append_null(&mut self) {
365        self.append(false)
366    }
367
368    /// Builds the `StructArray` and reset this builder.
369    pub fn finish(&mut self) -> StructArray {
370        self.validate_content();
371        if self.fields.is_empty() {
372            return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish());
373        }
374
375        let arrays = self.field_builders.iter_mut().map(|f| f.finish()).collect();
376        let nulls = self.null_buffer_builder.finish();
377        StructArray::new(self.fields.clone(), arrays, nulls)
378    }
379
380    /// Builds the `StructArray` without resetting the builder.
381    pub fn finish_cloned(&self) -> StructArray {
382        self.validate_content();
383
384        if self.fields.is_empty() {
385            return StructArray::new_empty_fields(
386                self.len(),
387                self.null_buffer_builder.finish_cloned(),
388            );
389        }
390
391        let arrays = self
392            .field_builders
393            .iter()
394            .map(|f| f.finish_cloned())
395            .collect();
396
397        let nulls = self.null_buffer_builder.finish_cloned();
398
399        StructArray::new(self.fields.clone(), arrays, nulls)
400    }
401
402    /// Constructs and validates contents in the builder to ensure that
403    /// - fields and field_builders are of equal length
404    /// - the number of items in individual field_builders are equal to self.len()
405    fn validate_content(&self) {
406        if self.fields.len() != self.field_builders.len() {
407            panic!("Number of fields is not equal to the number of field_builders.");
408        }
409        self.field_builders.iter().enumerate().for_each(|(idx, x)| {
410            if x.len() != self.len() {
411                let builder = SchemaBuilder::from(&self.fields);
412                let schema = builder.finish();
413
414                panic!("{}", format!(
415                    "StructBuilder ({:?}) and field_builder with index {} ({:?}) are of unequal lengths: ({} != {}).",
416                    schema,
417                    idx,
418                    self.fields[idx].data_type(),
419                    self.len(),
420                    x.len()
421                ));
422            }
423        });
424    }
425
426    /// Returns the current null buffer as a slice
427    pub fn validity_slice(&self) -> Option<&[u8]> {
428        self.null_buffer_builder.as_slice()
429    }
430}
431
432#[cfg(test)]
433mod tests {
434    use super::*;
435    use arrow_buffer::Buffer;
436    use arrow_data::ArrayData;
437    use arrow_schema::Field;
438
439    use crate::array::Array;
440
441    #[test]
442    fn test_struct_array_builder() {
443        let string_builder = StringBuilder::new();
444        let int_builder = Int32Builder::new();
445
446        let fields = vec![
447            Field::new("f1", DataType::Utf8, true),
448            Field::new("f2", DataType::Int32, true),
449        ];
450        let field_builders = vec![
451            Box::new(string_builder) as Box<dyn ArrayBuilder>,
452            Box::new(int_builder) as Box<dyn ArrayBuilder>,
453        ];
454
455        let mut builder = StructBuilder::new(fields, field_builders);
456        assert_eq!(2, builder.num_fields());
457
458        let string_builder = builder
459            .field_builder::<StringBuilder>(0)
460            .expect("builder at field 0 should be string builder");
461        string_builder.append_value("joe");
462        string_builder.append_null();
463        string_builder.append_null();
464        string_builder.append_value("mark");
465
466        let int_builder = builder
467            .field_builder::<Int32Builder>(1)
468            .expect("builder at field 1 should be int builder");
469        int_builder.append_value(1);
470        int_builder.append_value(2);
471        int_builder.append_null();
472        int_builder.append_value(4);
473
474        builder.append(true);
475        builder.append(true);
476        builder.append_null();
477        builder.append(true);
478
479        let struct_data = builder.finish().into_data();
480
481        assert_eq!(4, struct_data.len());
482        assert_eq!(1, struct_data.null_count());
483        assert_eq!(&[11_u8], struct_data.nulls().unwrap().validity());
484
485        let expected_string_data = ArrayData::builder(DataType::Utf8)
486            .len(4)
487            .null_bit_buffer(Some(Buffer::from(&[9_u8])))
488            .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7]))
489            .add_buffer(Buffer::from_slice_ref(b"joemark"))
490            .build()
491            .unwrap();
492
493        let expected_int_data = ArrayData::builder(DataType::Int32)
494            .len(4)
495            .null_bit_buffer(Some(Buffer::from_slice_ref([11_u8])))
496            .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4]))
497            .build()
498            .unwrap();
499
500        assert_eq!(expected_string_data, struct_data.child_data()[0]);
501        assert_eq!(expected_int_data, struct_data.child_data()[1]);
502    }
503
504    #[test]
505    fn test_struct_array_builder_finish() {
506        let int_builder = Int32Builder::new();
507        let bool_builder = BooleanBuilder::new();
508
509        let fields = vec![
510            Field::new("f1", DataType::Int32, false),
511            Field::new("f2", DataType::Boolean, false),
512        ];
513        let field_builders = vec![
514            Box::new(int_builder) as Box<dyn ArrayBuilder>,
515            Box::new(bool_builder) as Box<dyn ArrayBuilder>,
516        ];
517
518        let mut builder = StructBuilder::new(fields, field_builders);
519        builder
520            .field_builder::<Int32Builder>(0)
521            .unwrap()
522            .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
523        builder
524            .field_builder::<BooleanBuilder>(1)
525            .unwrap()
526            .append_slice(&[
527                false, true, false, true, false, true, false, true, false, true,
528            ]);
529
530        // Append slot values - all are valid.
531        for _ in 0..10 {
532            builder.append(true);
533        }
534
535        assert_eq!(10, builder.len());
536
537        let arr = builder.finish();
538
539        assert_eq!(10, arr.len());
540        assert_eq!(0, builder.len());
541
542        builder
543            .field_builder::<Int32Builder>(0)
544            .unwrap()
545            .append_slice(&[1, 3, 5, 7, 9]);
546        builder
547            .field_builder::<BooleanBuilder>(1)
548            .unwrap()
549            .append_slice(&[false, true, false, true, false]);
550
551        // Append slot values - all are valid.
552        for _ in 0..5 {
553            builder.append(true);
554        }
555
556        assert_eq!(5, builder.len());
557
558        let arr = builder.finish();
559
560        assert_eq!(5, arr.len());
561        assert_eq!(0, builder.len());
562    }
563
564    #[test]
565    fn test_build_fixed_size_list() {
566        const LIST_LENGTH: i32 = 4;
567        let fixed_size_list_dtype =
568            DataType::new_fixed_size_list(DataType::Int32, LIST_LENGTH, false);
569        let mut builder = make_builder(&fixed_size_list_dtype, 10);
570        let builder = builder
571            .as_any_mut()
572            .downcast_mut::<FixedSizeListBuilder<Box<dyn ArrayBuilder>>>();
573        match builder {
574            Some(builder) => {
575                assert_eq!(builder.value_length(), LIST_LENGTH);
576                assert!(builder
577                    .values()
578                    .as_any_mut()
579                    .downcast_mut::<Int32Builder>()
580                    .is_some());
581            }
582            None => panic!("expected FixedSizeListBuilder, got a different builder type"),
583        }
584    }
585
586    #[test]
587    fn test_struct_array_builder_finish_cloned() {
588        let int_builder = Int32Builder::new();
589        let bool_builder = BooleanBuilder::new();
590
591        let fields = vec![
592            Field::new("f1", DataType::Int32, false),
593            Field::new("f2", DataType::Boolean, false),
594        ];
595        let field_builders = vec![
596            Box::new(int_builder) as Box<dyn ArrayBuilder>,
597            Box::new(bool_builder) as Box<dyn ArrayBuilder>,
598        ];
599
600        let mut builder = StructBuilder::new(fields, field_builders);
601        builder
602            .field_builder::<Int32Builder>(0)
603            .unwrap()
604            .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
605        builder
606            .field_builder::<BooleanBuilder>(1)
607            .unwrap()
608            .append_slice(&[
609                false, true, false, true, false, true, false, true, false, true,
610            ]);
611
612        // Append slot values - all are valid.
613        for _ in 0..10 {
614            builder.append(true);
615        }
616
617        assert_eq!(10, builder.len());
618
619        let mut arr = builder.finish_cloned();
620
621        assert_eq!(10, arr.len());
622        assert_eq!(10, builder.len());
623
624        builder
625            .field_builder::<Int32Builder>(0)
626            .unwrap()
627            .append_slice(&[1, 3, 5, 7, 9]);
628        builder
629            .field_builder::<BooleanBuilder>(1)
630            .unwrap()
631            .append_slice(&[false, true, false, true, false]);
632
633        // Append slot values - all are valid.
634        for _ in 0..5 {
635            builder.append(true);
636        }
637
638        assert_eq!(15, builder.len());
639
640        arr = builder.finish();
641
642        assert_eq!(15, arr.len());
643        assert_eq!(0, builder.len());
644    }
645
646    #[test]
647    fn test_struct_array_builder_from_schema() {
648        let mut fields = vec![
649            Field::new("f1", DataType::Float32, false),
650            Field::new("f2", DataType::Utf8, false),
651        ];
652        let sub_fields = vec![
653            Field::new("g1", DataType::Int32, false),
654            Field::new("g2", DataType::Boolean, false),
655        ];
656        let struct_type = DataType::Struct(sub_fields.into());
657        fields.push(Field::new("f3", struct_type, false));
658
659        let mut builder = StructBuilder::from_fields(fields, 5);
660        assert_eq!(3, builder.num_fields());
661        assert!(builder.field_builder::<Float32Builder>(0).is_some());
662        assert!(builder.field_builder::<StringBuilder>(1).is_some());
663        assert!(builder.field_builder::<StructBuilder>(2).is_some());
664    }
665
666    #[test]
667    fn test_datatype_properties() {
668        let fields = Fields::from(vec![
669            Field::new("f1", DataType::Decimal128(1, 2), false),
670            Field::new(
671                "f2",
672                DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
673                false,
674            ),
675        ]);
676        let mut builder = StructBuilder::from_fields(fields.clone(), 1);
677        builder
678            .field_builder::<Decimal128Builder>(0)
679            .unwrap()
680            .append_value(1);
681        builder
682            .field_builder::<TimestampMillisecondBuilder>(1)
683            .unwrap()
684            .append_value(1);
685        builder.append(true);
686        let array = builder.finish();
687
688        assert_eq!(array.data_type(), &DataType::Struct(fields.clone()));
689        assert_eq!(array.column(0).data_type(), fields[0].data_type());
690        assert_eq!(array.column(1).data_type(), fields[1].data_type());
691    }
692
693    #[test]
694    fn test_struct_array_builder_from_dictionary_type() {
695        let dict_field = Field::new(
696            "f1",
697            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
698            false,
699        );
700        let fields = vec![dict_field.clone()];
701        let expected_dtype = DataType::Struct(fields.into());
702        let cloned_dict_field = dict_field.clone();
703        let expected_child_dtype = dict_field.data_type();
704        let mut struct_builder = StructBuilder::from_fields(vec![cloned_dict_field], 5);
705        struct_builder
706            .field_builder::<StringDictionaryBuilder<Int32Type>>(0)
707            .expect("Builder should be StringDictionaryBuilder")
708            .append_value("dict string");
709        struct_builder.append(true);
710        let array = struct_builder.finish();
711
712        assert_eq!(array.data_type(), &expected_dtype);
713        assert_eq!(array.column(0).data_type(), expected_child_dtype);
714        assert_eq!(array.column(0).len(), 1);
715    }
716
717    #[test]
718    #[should_panic(expected = "Data type Dictionary(Int16, Utf8) is not currently supported")]
719    fn test_struct_array_builder_from_schema_unsupported_type() {
720        let fields = vec![
721            Field::new("f1", DataType::Int16, false),
722            Field::new(
723                "f2",
724                DataType::Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
725                false,
726            ),
727        ];
728
729        let _ = StructBuilder::from_fields(fields, 5);
730    }
731
732    #[test]
733    #[should_panic(expected = "Unsupported dictionary value type Int32 is not currently supported")]
734    fn test_struct_array_builder_from_dict_with_unsupported_value_type() {
735        let fields = vec![Field::new(
736            "f1",
737            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32)),
738            false,
739        )];
740
741        let _ = StructBuilder::from_fields(fields, 5);
742    }
743
744    #[test]
745    fn test_struct_array_builder_field_builder_type_mismatch() {
746        let int_builder = Int32Builder::with_capacity(10);
747
748        let fields = vec![Field::new("f1", DataType::Int32, false)];
749        let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
750
751        let mut builder = StructBuilder::new(fields, field_builders);
752        assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
753    }
754
755    #[test]
756    #[should_panic(
757        expected = "StructBuilder (Schema { fields: [Field { name: \"f1\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"f2\", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
758    )]
759    fn test_struct_array_builder_unequal_field_builders_lengths() {
760        let mut int_builder = Int32Builder::with_capacity(10);
761        let mut bool_builder = BooleanBuilder::new();
762
763        int_builder.append_value(1);
764        int_builder.append_value(2);
765        bool_builder.append_value(true);
766
767        let fields = vec![
768            Field::new("f1", DataType::Int32, false),
769            Field::new("f2", DataType::Boolean, false),
770        ];
771        let field_builders = vec![
772            Box::new(int_builder) as Box<dyn ArrayBuilder>,
773            Box::new(bool_builder) as Box<dyn ArrayBuilder>,
774        ];
775
776        let mut builder = StructBuilder::new(fields, field_builders);
777        builder.append(true);
778        builder.append(true);
779        builder.finish();
780    }
781
782    #[test]
783    #[should_panic(expected = "Number of fields is not equal to the number of field_builders.")]
784    fn test_struct_array_builder_unequal_field_field_builders() {
785        let int_builder = Int32Builder::with_capacity(10);
786
787        let fields = vec![
788            Field::new("f1", DataType::Int32, false),
789            Field::new("f2", DataType::Boolean, false),
790        ];
791        let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
792
793        let mut builder = StructBuilder::new(fields, field_builders);
794        builder.finish();
795    }
796
797    #[test]
798    #[should_panic(
799        expected = "Incorrect datatype for StructArray field \\\"timestamp\\\", expected Timestamp(Nanosecond, Some(\\\"UTC\\\")) got Timestamp(Nanosecond, None)"
800    )]
801    fn test_struct_array_mismatch_builder() {
802        let fields = vec![Field::new(
803            "timestamp",
804            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_owned().into())),
805            false,
806        )];
807
808        let field_builders: Vec<Box<dyn ArrayBuilder>> =
809            vec![Box::new(TimestampNanosecondBuilder::new())];
810
811        let mut sa = StructBuilder::new(fields, field_builders);
812        sa.finish();
813    }
814
815    #[test]
816    fn test_empty() {
817        let mut builder = StructBuilder::new(Fields::empty(), vec![]);
818        builder.append(true);
819        builder.append(false);
820
821        let a1 = builder.finish_cloned();
822        let a2 = builder.finish();
823        assert_eq!(a1, a2);
824        assert_eq!(a1.len(), 2);
825        assert_eq!(a1.null_count(), 1);
826        assert!(a1.is_valid(0));
827        assert!(a1.is_null(1));
828    }
829}