1use crate::builder::{ArrayBuilder, BufferBuilder};
19use crate::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
20use arrow_buffer::NullBufferBuilder;
21use arrow_buffer::{Buffer, OffsetBuffer};
22use arrow_schema::{Field, FieldRef};
23use std::any::Any;
24use std::sync::Arc;
25
26#[derive(Debug)]
90pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
91 offsets_builder: BufferBuilder<OffsetSize>,
92 null_buffer_builder: NullBufferBuilder,
93 values_builder: T,
94 field: Option<FieldRef>,
95}
96
97impl<O: OffsetSizeTrait, T: ArrayBuilder + Default> Default for GenericListBuilder<O, T> {
98 fn default() -> Self {
99 Self::new(T::default())
100 }
101}
102
103impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T> {
104 pub fn new(values_builder: T) -> Self {
106 let capacity = values_builder.len();
107 Self::with_capacity(values_builder, capacity)
108 }
109
110 pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
113 let mut offsets_builder = BufferBuilder::<OffsetSize>::new(capacity + 1);
114 offsets_builder.append(OffsetSize::zero());
115 Self {
116 offsets_builder,
117 null_buffer_builder: NullBufferBuilder::new(capacity),
118 values_builder,
119 field: None,
120 }
121 }
122
123 pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
130 Self {
131 field: Some(field.into()),
132 ..self
133 }
134 }
135}
136
137impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
138 for GenericListBuilder<OffsetSize, T>
139where
140 T: 'static,
141{
142 fn as_any(&self) -> &dyn Any {
144 self
145 }
146
147 fn as_any_mut(&mut self) -> &mut dyn Any {
149 self
150 }
151
152 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
154 self
155 }
156
157 fn len(&self) -> usize {
159 self.null_buffer_builder.len()
160 }
161
162 fn finish(&mut self) -> ArrayRef {
164 Arc::new(self.finish())
165 }
166
167 fn finish_cloned(&self) -> ArrayRef {
169 Arc::new(self.finish_cloned())
170 }
171}
172
173impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
174where
175 T: 'static,
176{
177 pub fn values(&mut self) -> &mut T {
182 &mut self.values_builder
183 }
184
185 pub fn values_ref(&self) -> &T {
187 &self.values_builder
188 }
189
190 #[inline]
196 pub fn append(&mut self, is_valid: bool) {
197 self.offsets_builder.append(self.next_offset());
198 self.null_buffer_builder.append(is_valid);
199 }
200
201 #[inline]
207 fn next_offset(&self) -> OffsetSize {
208 OffsetSize::from_usize(self.values_builder.len()).unwrap()
209 }
210
211 #[inline]
258 pub fn append_value<I, V>(&mut self, i: I)
259 where
260 T: Extend<Option<V>>,
261 I: IntoIterator<Item = Option<V>>,
262 {
263 self.extend(std::iter::once(Some(i)))
264 }
265
266 #[inline]
270 pub fn append_null(&mut self) {
271 self.offsets_builder.append(self.next_offset());
272 self.null_buffer_builder.append_null();
273 }
274
275 #[inline]
279 pub fn append_option<I, V>(&mut self, i: Option<I>)
280 where
281 T: Extend<Option<V>>,
282 I: IntoIterator<Item = Option<V>>,
283 {
284 match i {
285 Some(i) => self.append_value(i),
286 None => self.append_null(),
287 }
288 }
289
290 pub fn finish(&mut self) -> GenericListArray<OffsetSize> {
292 let values = self.values_builder.finish();
293 let nulls = self.null_buffer_builder.finish();
294
295 let offsets = self.offsets_builder.finish();
296 let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
298 self.offsets_builder.append(OffsetSize::zero());
299
300 let field = match &self.field {
301 Some(f) => f.clone(),
302 None => Arc::new(Field::new("item", values.data_type().clone(), true)),
303 };
304
305 GenericListArray::new(field, offsets, values, nulls)
306 }
307
308 pub fn finish_cloned(&self) -> GenericListArray<OffsetSize> {
310 let values = self.values_builder.finish_cloned();
311 let nulls = self.null_buffer_builder.finish_cloned();
312
313 let offsets = Buffer::from_slice_ref(self.offsets_builder.as_slice());
314 let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
316
317 let field = match &self.field {
318 Some(f) => f.clone(),
319 None => Arc::new(Field::new("item", values.data_type().clone(), true)),
320 };
321
322 GenericListArray::new(field, offsets, values, nulls)
323 }
324
325 pub fn offsets_slice(&self) -> &[OffsetSize] {
327 self.offsets_builder.as_slice()
328 }
329
330 pub fn validity_slice(&self) -> Option<&[u8]> {
332 self.null_buffer_builder.as_slice()
333 }
334}
335
336impl<O, B, V, E> Extend<Option<V>> for GenericListBuilder<O, B>
337where
338 O: OffsetSizeTrait,
339 B: ArrayBuilder + Extend<E>,
340 V: IntoIterator<Item = E>,
341{
342 #[inline]
343 fn extend<T: IntoIterator<Item = Option<V>>>(&mut self, iter: T) {
344 for v in iter {
345 match v {
346 Some(elements) => {
347 self.values_builder.extend(elements);
348 self.append(true);
349 }
350 None => self.append(false),
351 }
352 }
353 }
354}
355
356#[cfg(test)]
357mod tests {
358 use super::*;
359 use crate::builder::{make_builder, Int32Builder, ListBuilder};
360 use crate::cast::AsArray;
361 use crate::types::Int32Type;
362 use crate::Int32Array;
363 use arrow_schema::DataType;
364
365 fn _test_generic_list_array_builder<O: OffsetSizeTrait>() {
366 let values_builder = Int32Builder::with_capacity(10);
367 let mut builder = GenericListBuilder::<O, _>::new(values_builder);
368
369 builder.values().append_value(0);
371 builder.values().append_value(1);
372 builder.values().append_value(2);
373 builder.append(true);
374 builder.values().append_value(3);
375 builder.values().append_value(4);
376 builder.values().append_value(5);
377 builder.append(true);
378 builder.values().append_value(6);
379 builder.values().append_value(7);
380 builder.append(true);
381 let list_array = builder.finish();
382
383 let list_values = list_array.values().as_primitive::<Int32Type>();
384 assert_eq!(list_values.values(), &[0, 1, 2, 3, 4, 5, 6, 7]);
385 assert_eq!(list_array.value_offsets(), [0, 3, 6, 8].map(O::usize_as));
386 assert_eq!(DataType::Int32, list_array.value_type());
387 assert_eq!(3, list_array.len());
388 assert_eq!(0, list_array.null_count());
389 assert_eq!(O::from_usize(6).unwrap(), list_array.value_offsets()[2]);
390 assert_eq!(O::from_usize(2).unwrap(), list_array.value_length(2));
391 for i in 0..3 {
392 assert!(list_array.is_valid(i));
393 assert!(!list_array.is_null(i));
394 }
395 }
396
397 #[test]
398 fn test_list_array_builder() {
399 _test_generic_list_array_builder::<i32>()
400 }
401
402 #[test]
403 fn test_large_list_array_builder() {
404 _test_generic_list_array_builder::<i64>()
405 }
406
407 fn _test_generic_list_array_builder_nulls<O: OffsetSizeTrait>() {
408 let values_builder = Int32Builder::with_capacity(10);
409 let mut builder = GenericListBuilder::<O, _>::new(values_builder);
410
411 builder.values().append_value(0);
413 builder.values().append_value(1);
414 builder.values().append_value(2);
415 builder.append(true);
416 builder.append(false);
417 builder.values().append_value(3);
418 builder.values().append_null();
419 builder.values().append_value(5);
420 builder.append(true);
421 builder.values().append_value(6);
422 builder.values().append_value(7);
423 builder.append(true);
424
425 let list_array = builder.finish();
426
427 assert_eq!(DataType::Int32, list_array.value_type());
428 assert_eq!(4, list_array.len());
429 assert_eq!(1, list_array.null_count());
430 assert_eq!(O::from_usize(3).unwrap(), list_array.value_offsets()[2]);
431 assert_eq!(O::from_usize(3).unwrap(), list_array.value_length(2));
432 }
433
434 #[test]
435 fn test_list_array_builder_nulls() {
436 _test_generic_list_array_builder_nulls::<i32>()
437 }
438
439 #[test]
440 fn test_large_list_array_builder_nulls() {
441 _test_generic_list_array_builder_nulls::<i64>()
442 }
443
444 #[test]
445 fn test_list_array_builder_finish() {
446 let values_builder = Int32Array::builder(5);
447 let mut builder = ListBuilder::new(values_builder);
448
449 builder.values().append_slice(&[1, 2, 3]);
450 builder.append(true);
451 builder.values().append_slice(&[4, 5, 6]);
452 builder.append(true);
453
454 let mut arr = builder.finish();
455 assert_eq!(2, arr.len());
456 assert!(builder.is_empty());
457
458 builder.values().append_slice(&[7, 8, 9]);
459 builder.append(true);
460 arr = builder.finish();
461 assert_eq!(1, arr.len());
462 assert!(builder.is_empty());
463 }
464
465 #[test]
466 fn test_list_array_builder_finish_cloned() {
467 let values_builder = Int32Array::builder(5);
468 let mut builder = ListBuilder::new(values_builder);
469
470 builder.values().append_slice(&[1, 2, 3]);
471 builder.append(true);
472 builder.values().append_slice(&[4, 5, 6]);
473 builder.append(true);
474
475 let mut arr = builder.finish_cloned();
476 assert_eq!(2, arr.len());
477 assert!(!builder.is_empty());
478
479 builder.values().append_slice(&[7, 8, 9]);
480 builder.append(true);
481 arr = builder.finish();
482 assert_eq!(3, arr.len());
483 assert!(builder.is_empty());
484 }
485
486 #[test]
487 fn test_list_list_array_builder() {
488 let primitive_builder = Int32Builder::with_capacity(10);
489 let values_builder = ListBuilder::new(primitive_builder);
490 let mut builder = ListBuilder::new(values_builder);
491
492 builder.values().values().append_value(1);
494 builder.values().values().append_value(2);
495 builder.values().append(true);
496 builder.values().values().append_value(3);
497 builder.values().values().append_value(4);
498 builder.values().append(true);
499 builder.append(true);
500
501 builder.values().values().append_value(5);
502 builder.values().values().append_value(6);
503 builder.values().values().append_value(7);
504 builder.values().append(true);
505 builder.values().append(false);
506 builder.values().values().append_value(8);
507 builder.values().append(true);
508 builder.append(true);
509
510 builder.append(false);
511
512 builder.values().values().append_value(9);
513 builder.values().values().append_value(10);
514 builder.values().append(true);
515 builder.append(true);
516
517 let l1 = builder.finish();
518
519 assert_eq!(4, l1.len());
520 assert_eq!(1, l1.null_count());
521
522 assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6]);
523 let l2 = l1.values().as_list::<i32>();
524
525 assert_eq!(6, l2.len());
526 assert_eq!(1, l2.null_count());
527 assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10]);
528
529 let i1 = l2.values().as_primitive::<Int32Type>();
530 assert_eq!(10, i1.len());
531 assert_eq!(0, i1.null_count());
532 assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
533 }
534
535 #[test]
536 fn test_extend() {
537 let mut builder = ListBuilder::new(Int32Builder::new());
538 builder.extend([
539 Some(vec![Some(1), Some(2), Some(7), None]),
540 Some(vec![]),
541 Some(vec![Some(4), Some(5)]),
542 None,
543 ]);
544
545 let array = builder.finish();
546 assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]);
547 assert_eq!(array.null_count(), 1);
548 assert_eq!(array.logical_null_count(), 1);
549 assert!(array.is_null(3));
550 let elements = array.values().as_primitive::<Int32Type>();
551 assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]);
552 assert_eq!(elements.null_count(), 1);
553 assert_eq!(elements.logical_null_count(), 1);
554 assert!(elements.is_null(3));
555 }
556
557 #[test]
558 fn test_boxed_primitive_array_builder() {
559 let values_builder = make_builder(&DataType::Int32, 5);
560 let mut builder = ListBuilder::new(values_builder);
561
562 builder
563 .values()
564 .as_any_mut()
565 .downcast_mut::<Int32Builder>()
566 .expect("should be an Int32Builder")
567 .append_slice(&[1, 2, 3]);
568 builder.append(true);
569
570 builder
571 .values()
572 .as_any_mut()
573 .downcast_mut::<Int32Builder>()
574 .expect("should be an Int32Builder")
575 .append_slice(&[4, 5, 6]);
576 builder.append(true);
577
578 let arr = builder.finish();
579 assert_eq!(2, arr.len());
580
581 let elements = arr.values().as_primitive::<Int32Type>();
582 assert_eq!(elements.values(), &[1, 2, 3, 4, 5, 6]);
583 }
584
585 #[test]
586 fn test_boxed_list_list_array_builder() {
587 let values_builder = make_builder(
589 &DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
590 10,
591 );
592 test_boxed_generic_list_generic_list_array_builder::<i32>(values_builder);
593 }
594
595 #[test]
596 fn test_boxed_large_list_large_list_array_builder() {
597 let values_builder = make_builder(
599 &DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true))),
600 10,
601 );
602 test_boxed_generic_list_generic_list_array_builder::<i64>(values_builder);
603 }
604
605 fn test_boxed_generic_list_generic_list_array_builder<O: OffsetSizeTrait + PartialEq>(
606 values_builder: Box<dyn ArrayBuilder>,
607 ) {
608 let mut builder: GenericListBuilder<O, Box<dyn ArrayBuilder>> =
609 GenericListBuilder::<O, Box<dyn ArrayBuilder>>::new(values_builder);
610
611 builder
613 .values()
614 .as_any_mut()
615 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
616 .expect("should be an (Large)ListBuilder")
617 .values()
618 .as_any_mut()
619 .downcast_mut::<Int32Builder>()
620 .expect("should be an Int32Builder")
621 .append_value(1);
622 builder
623 .values()
624 .as_any_mut()
625 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
626 .expect("should be an (Large)ListBuilder")
627 .values()
628 .as_any_mut()
629 .downcast_mut::<Int32Builder>()
630 .expect("should be an Int32Builder")
631 .append_value(2);
632 builder
633 .values()
634 .as_any_mut()
635 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
636 .expect("should be an (Large)ListBuilder")
637 .append(true);
638 builder
639 .values()
640 .as_any_mut()
641 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
642 .expect("should be an (Large)ListBuilder")
643 .values()
644 .as_any_mut()
645 .downcast_mut::<Int32Builder>()
646 .expect("should be an Int32Builder")
647 .append_value(3);
648 builder
649 .values()
650 .as_any_mut()
651 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
652 .expect("should be an (Large)ListBuilder")
653 .values()
654 .as_any_mut()
655 .downcast_mut::<Int32Builder>()
656 .expect("should be an Int32Builder")
657 .append_value(4);
658 builder
659 .values()
660 .as_any_mut()
661 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
662 .expect("should be an (Large)ListBuilder")
663 .append(true);
664 builder.append(true);
665
666 builder
667 .values()
668 .as_any_mut()
669 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
670 .expect("should be an (Large)ListBuilder")
671 .values()
672 .as_any_mut()
673 .downcast_mut::<Int32Builder>()
674 .expect("should be an Int32Builder")
675 .append_value(5);
676 builder
677 .values()
678 .as_any_mut()
679 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
680 .expect("should be an (Large)ListBuilder")
681 .values()
682 .as_any_mut()
683 .downcast_mut::<Int32Builder>()
684 .expect("should be an Int32Builder")
685 .append_value(6);
686 builder
687 .values()
688 .as_any_mut()
689 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
690 .expect("should be an (Large)ListBuilder")
691 .values()
692 .as_any_mut()
693 .downcast_mut::<Int32Builder>()
694 .expect("should be an (Large)ListBuilder")
695 .append_value(7);
696 builder
697 .values()
698 .as_any_mut()
699 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
700 .expect("should be an (Large)ListBuilder")
701 .append(true);
702 builder
703 .values()
704 .as_any_mut()
705 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
706 .expect("should be an (Large)ListBuilder")
707 .append(false);
708 builder
709 .values()
710 .as_any_mut()
711 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
712 .expect("should be an (Large)ListBuilder")
713 .values()
714 .as_any_mut()
715 .downcast_mut::<Int32Builder>()
716 .expect("should be an Int32Builder")
717 .append_value(8);
718 builder
719 .values()
720 .as_any_mut()
721 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
722 .expect("should be an (Large)ListBuilder")
723 .append(true);
724 builder.append(true);
725
726 builder.append(false);
727
728 builder
729 .values()
730 .as_any_mut()
731 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
732 .expect("should be an (Large)ListBuilder")
733 .values()
734 .as_any_mut()
735 .downcast_mut::<Int32Builder>()
736 .expect("should be an Int32Builder")
737 .append_value(9);
738 builder
739 .values()
740 .as_any_mut()
741 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
742 .expect("should be an (Large)ListBuilder")
743 .values()
744 .as_any_mut()
745 .downcast_mut::<Int32Builder>()
746 .expect("should be an Int32Builder")
747 .append_value(10);
748 builder
749 .values()
750 .as_any_mut()
751 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
752 .expect("should be an (Large)ListBuilder")
753 .append(true);
754 builder.append(true);
755
756 let l1 = builder.finish();
757
758 assert_eq!(4, l1.len());
759 assert_eq!(1, l1.null_count());
760
761 assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6].map(O::usize_as));
762 let l2 = l1.values().as_list::<O>();
763
764 assert_eq!(6, l2.len());
765 assert_eq!(1, l2.null_count());
766 assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10].map(O::usize_as));
767
768 let i1 = l2.values().as_primitive::<Int32Type>();
769 assert_eq!(10, i1.len());
770 assert_eq!(0, i1.null_count());
771 assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
772 }
773
774 #[test]
775 fn test_with_field() {
776 let field = Arc::new(Field::new("bar", DataType::Int32, false));
777 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
778 builder.append_value([Some(1), Some(2), Some(3)]);
779 builder.append_null(); builder.append_value([Some(4)]);
781 let array = builder.finish();
782 assert_eq!(array.len(), 3);
783 assert_eq!(array.data_type(), &DataType::List(field.clone()));
784
785 builder.append_value([Some(4), Some(5)]);
786 let array = builder.finish();
787 assert_eq!(array.data_type(), &DataType::List(field));
788 assert_eq!(array.len(), 1);
789 }
790
791 #[test]
792 #[should_panic(expected = "Non-nullable field of ListArray \\\"item\\\" cannot contain nulls")]
793 fn test_checks_nullability() {
794 let field = Arc::new(Field::new("item", DataType::Int32, false));
795 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
796 builder.append_value([Some(1), None]);
797 builder.finish();
798 }
799
800 #[test]
801 #[should_panic(expected = "ListArray expected data type Int64 got Int32")]
802 fn test_checks_data_type() {
803 let field = Arc::new(Field::new("item", DataType::Int64, false));
804 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
805 builder.append_value([Some(1)]);
806 builder.finish();
807 }
808}