1use crate::array::{get_offsets, make_array, print_long_array};
19use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20use crate::{
21 iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
22 ArrowPrimitiveType, FixedSizeListArray,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
41 const IS_LARGE: bool;
43 const PREFIX: &'static str;
45}
46
47impl OffsetSizeTrait for i32 {
48 const IS_LARGE: bool = false;
49 const PREFIX: &'static str = "";
50}
51
52impl OffsetSizeTrait for i64 {
53 const IS_LARGE: bool = true;
54 const PREFIX: &'static str = "Large";
55}
56
57pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
128 data_type: DataType,
129 nulls: Option<NullBuffer>,
130 values: ArrayRef,
131 value_offsets: OffsetBuffer<OffsetSize>,
132}
133
134impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
135 fn clone(&self) -> Self {
136 Self {
137 data_type: self.data_type.clone(),
138 nulls: self.nulls.clone(),
139 values: self.values.clone(),
140 value_offsets: self.value_offsets.clone(),
141 }
142 }
143}
144
145impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
146 pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
150 DataType::LargeList
151 } else {
152 DataType::List
153 };
154
155 pub fn try_new(
166 field: FieldRef,
167 offsets: OffsetBuffer<OffsetSize>,
168 values: ArrayRef,
169 nulls: Option<NullBuffer>,
170 ) -> Result<Self, ArrowError> {
171 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
173 if end_offset > values.len() {
176 return Err(ArrowError::InvalidArgumentError(format!(
177 "Max offset of {end_offset} exceeds length of values {}",
178 values.len()
179 )));
180 }
181
182 if let Some(n) = nulls.as_ref() {
183 if n.len() != len {
184 return Err(ArrowError::InvalidArgumentError(format!(
185 "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
186 OffsetSize::PREFIX,
187 n.len(),
188 )));
189 }
190 }
191 if !field.is_nullable() && values.is_nullable() {
192 return Err(ArrowError::InvalidArgumentError(format!(
193 "Non-nullable field of {}ListArray {:?} cannot contain nulls",
194 OffsetSize::PREFIX,
195 field.name()
196 )));
197 }
198
199 if field.data_type() != values.data_type() {
200 return Err(ArrowError::InvalidArgumentError(format!(
201 "{}ListArray expected data type {} got {} for {:?}",
202 OffsetSize::PREFIX,
203 field.data_type(),
204 values.data_type(),
205 field.name()
206 )));
207 }
208
209 Ok(Self {
210 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
211 nulls,
212 values,
213 value_offsets: offsets,
214 })
215 }
216
217 pub fn new(
223 field: FieldRef,
224 offsets: OffsetBuffer<OffsetSize>,
225 values: ArrayRef,
226 nulls: Option<NullBuffer>,
227 ) -> Self {
228 Self::try_new(field, offsets, values, nulls).unwrap()
229 }
230
231 pub fn new_null(field: FieldRef, len: usize) -> Self {
233 let values = new_empty_array(field.data_type());
234 Self {
235 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
236 nulls: Some(NullBuffer::new_null(len)),
237 value_offsets: OffsetBuffer::new_zeroed(len),
238 values,
239 }
240 }
241
242 pub fn into_parts(
244 self,
245 ) -> (
246 FieldRef,
247 OffsetBuffer<OffsetSize>,
248 ArrayRef,
249 Option<NullBuffer>,
250 ) {
251 let f = match self.data_type {
252 DataType::List(f) | DataType::LargeList(f) => f,
253 _ => unreachable!(),
254 };
255 (f, self.value_offsets, self.values, self.nulls)
256 }
257
258 #[inline]
263 pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
264 &self.value_offsets
265 }
266
267 #[inline]
269 pub fn values(&self) -> &ArrayRef {
270 &self.values
271 }
272
273 pub fn value_type(&self) -> DataType {
275 self.values.data_type().clone()
276 }
277
278 pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
282 let end = self.value_offsets().get_unchecked(i + 1).as_usize();
283 let start = self.value_offsets().get_unchecked(i).as_usize();
284 self.values.slice(start, end - start)
285 }
286
287 pub fn value(&self, i: usize) -> ArrayRef {
289 let end = self.value_offsets()[i + 1].as_usize();
290 let start = self.value_offsets()[i].as_usize();
291 self.values.slice(start, end - start)
292 }
293
294 #[inline]
296 pub fn value_offsets(&self) -> &[OffsetSize] {
297 &self.value_offsets
298 }
299
300 #[inline]
302 pub fn value_length(&self, i: usize) -> OffsetSize {
303 let offsets = self.value_offsets();
304 offsets[i + 1] - offsets[i]
305 }
306
307 pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
309 GenericListArrayIter::<'a, OffsetSize>::new(self)
310 }
311
312 #[inline]
313 fn get_type(data_type: &DataType) -> Option<&DataType> {
314 match (OffsetSize::IS_LARGE, data_type) {
315 (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
316 Some(child.data_type())
317 }
318 _ => None,
319 }
320 }
321
322 pub fn slice(&self, offset: usize, length: usize) -> Self {
324 Self {
325 data_type: self.data_type.clone(),
326 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
327 values: self.values.clone(),
328 value_offsets: self.value_offsets.slice(offset, length),
329 }
330 }
331
332 pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
348 where
349 T: ArrowPrimitiveType,
350 P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
351 I: IntoIterator<Item = Option<P>>,
352 {
353 let iter = iter.into_iter();
354 let size_hint = iter.size_hint().0;
355 let mut builder =
356 GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
357
358 for i in iter {
359 match i {
360 Some(p) => {
361 for t in p {
362 builder.values().append_option(t);
363 }
364 builder.append(true);
365 }
366 None => builder.append(false),
367 }
368 }
369 builder.finish()
370 }
371}
372
373impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
374 fn from(data: ArrayData) -> Self {
375 Self::try_new_from_array_data(data)
376 .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
377 }
378}
379
380impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
381 fn from(array: GenericListArray<OffsetSize>) -> Self {
382 let len = array.len();
383 let builder = ArrayDataBuilder::new(array.data_type)
384 .len(len)
385 .nulls(array.nulls)
386 .buffers(vec![array.value_offsets.into_inner().into_inner()])
387 .child_data(vec![array.values.to_data()]);
388
389 unsafe { builder.build_unchecked() }
390 }
391}
392
393impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
394 fn from(value: FixedSizeListArray) -> Self {
395 let (field, size) = match value.data_type() {
396 DataType::FixedSizeList(f, size) => (f, *size as usize),
397 _ => unreachable!(),
398 };
399
400 let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
401
402 Self {
403 data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
404 nulls: value.nulls().cloned(),
405 values: value.values().clone(),
406 value_offsets: offsets,
407 }
408 }
409}
410
411impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
412 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
413 if data.buffers().len() != 1 {
414 return Err(ArrowError::InvalidArgumentError(format!(
415 "ListArray data should contain a single buffer only (value offsets), had {}",
416 data.buffers().len()
417 )));
418 }
419
420 if data.child_data().len() != 1 {
421 return Err(ArrowError::InvalidArgumentError(format!(
422 "ListArray should contain a single child array (values array), had {}",
423 data.child_data().len()
424 )));
425 }
426
427 let values = data.child_data()[0].clone();
428
429 if let Some(child_data_type) = Self::get_type(data.data_type()) {
430 if values.data_type() != child_data_type {
431 return Err(ArrowError::InvalidArgumentError(format!(
432 "[Large]ListArray's child datatype {:?} does not \
433 correspond to the List's datatype {:?}",
434 values.data_type(),
435 child_data_type
436 )));
437 }
438 } else {
439 return Err(ArrowError::InvalidArgumentError(format!(
440 "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
441 data.data_type()
442 )));
443 }
444
445 let values = make_array(values);
446 let value_offsets = unsafe { get_offsets(&data) };
449
450 Ok(Self {
451 data_type: data.data_type().clone(),
452 nulls: data.nulls().cloned(),
453 values,
454 value_offsets,
455 })
456 }
457}
458
459impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
460 fn as_any(&self) -> &dyn Any {
461 self
462 }
463
464 fn to_data(&self) -> ArrayData {
465 self.clone().into()
466 }
467
468 fn into_data(self) -> ArrayData {
469 self.into()
470 }
471
472 fn data_type(&self) -> &DataType {
473 &self.data_type
474 }
475
476 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
477 Arc::new(self.slice(offset, length))
478 }
479
480 fn len(&self) -> usize {
481 self.value_offsets.len() - 1
482 }
483
484 fn is_empty(&self) -> bool {
485 self.value_offsets.len() <= 1
486 }
487
488 fn offset(&self) -> usize {
489 0
490 }
491
492 fn nulls(&self) -> Option<&NullBuffer> {
493 self.nulls.as_ref()
494 }
495
496 fn logical_null_count(&self) -> usize {
497 self.null_count()
499 }
500
501 fn get_buffer_memory_size(&self) -> usize {
502 let mut size = self.values.get_buffer_memory_size();
503 size += self.value_offsets.inner().inner().capacity();
504 if let Some(n) = self.nulls.as_ref() {
505 size += n.buffer().capacity();
506 }
507 size
508 }
509
510 fn get_array_memory_size(&self) -> usize {
511 let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
512 size += self.value_offsets.inner().inner().capacity();
513 if let Some(n) = self.nulls.as_ref() {
514 size += n.buffer().capacity();
515 }
516 size
517 }
518}
519
520impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
521 type Item = ArrayRef;
522
523 fn value(&self, index: usize) -> Self::Item {
524 GenericListArray::value(self, index)
525 }
526
527 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
528 GenericListArray::value(self, index)
529 }
530}
531
532impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
533 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
534 let prefix = OffsetSize::PREFIX;
535
536 write!(f, "{prefix}ListArray\n[\n")?;
537 print_long_array(self, f, |array, index, f| {
538 std::fmt::Debug::fmt(&array.value(index), f)
539 })?;
540 write!(f, "]")
541 }
542}
543
544pub type ListArray = GenericListArray<i32>;
548
549pub type LargeListArray = GenericListArray<i64>;
553
554#[cfg(test)]
555mod tests {
556 use super::*;
557 use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
558 use crate::cast::AsArray;
559 use crate::types::Int32Type;
560 use crate::{Int32Array, Int64Array};
561 use arrow_buffer::{bit_util, Buffer, ScalarBuffer};
562 use arrow_schema::Field;
563
564 fn create_from_buffers() -> ListArray {
565 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
567 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
568 let field = Arc::new(Field::new("item", DataType::Int32, true));
569 ListArray::new(field, offsets, Arc::new(values), None)
570 }
571
572 #[test]
573 fn test_from_iter_primitive() {
574 let data = vec![
575 Some(vec![Some(0), Some(1), Some(2)]),
576 Some(vec![Some(3), Some(4), Some(5)]),
577 Some(vec![Some(6), Some(7)]),
578 ];
579 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
580
581 let another = create_from_buffers();
582 assert_eq!(list_array, another)
583 }
584
585 #[test]
586 fn test_empty_list_array() {
587 let value_data = ArrayData::builder(DataType::Int32)
589 .len(0)
590 .add_buffer(Buffer::from([]))
591 .build()
592 .unwrap();
593
594 let value_offsets = Buffer::from([]);
596
597 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
599 let list_data = ArrayData::builder(list_data_type)
600 .len(0)
601 .add_buffer(value_offsets)
602 .add_child_data(value_data)
603 .build()
604 .unwrap();
605
606 let list_array = ListArray::from(list_data);
607 assert_eq!(list_array.len(), 0)
608 }
609
610 #[test]
611 fn test_list_array() {
612 let value_data = ArrayData::builder(DataType::Int32)
614 .len(8)
615 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
616 .build()
617 .unwrap();
618
619 let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
622
623 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
625 let list_data = ArrayData::builder(list_data_type.clone())
626 .len(3)
627 .add_buffer(value_offsets.clone())
628 .add_child_data(value_data.clone())
629 .build()
630 .unwrap();
631 let list_array = ListArray::from(list_data);
632
633 let values = list_array.values();
634 assert_eq!(value_data, values.to_data());
635 assert_eq!(DataType::Int32, list_array.value_type());
636 assert_eq!(3, list_array.len());
637 assert_eq!(0, list_array.null_count());
638 assert_eq!(6, list_array.value_offsets()[2]);
639 assert_eq!(2, list_array.value_length(2));
640 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
641 assert_eq!(
642 0,
643 unsafe { list_array.value_unchecked(0) }
644 .as_primitive::<Int32Type>()
645 .value(0)
646 );
647 for i in 0..3 {
648 assert!(list_array.is_valid(i));
649 assert!(!list_array.is_null(i));
650 }
651
652 let list_data = ArrayData::builder(list_data_type)
655 .len(2)
656 .offset(1)
657 .add_buffer(value_offsets)
658 .add_child_data(value_data.clone())
659 .build()
660 .unwrap();
661 let list_array = ListArray::from(list_data);
662
663 let values = list_array.values();
664 assert_eq!(value_data, values.to_data());
665 assert_eq!(DataType::Int32, list_array.value_type());
666 assert_eq!(2, list_array.len());
667 assert_eq!(0, list_array.null_count());
668 assert_eq!(6, list_array.value_offsets()[1]);
669 assert_eq!(2, list_array.value_length(1));
670 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
671 assert_eq!(
672 3,
673 unsafe { list_array.value_unchecked(0) }
674 .as_primitive::<Int32Type>()
675 .value(0)
676 );
677 }
678
679 #[test]
680 fn test_large_list_array() {
681 let value_data = ArrayData::builder(DataType::Int32)
683 .len(8)
684 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
685 .build()
686 .unwrap();
687
688 let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
691
692 let list_data_type = DataType::new_large_list(DataType::Int32, false);
694 let list_data = ArrayData::builder(list_data_type.clone())
695 .len(3)
696 .add_buffer(value_offsets.clone())
697 .add_child_data(value_data.clone())
698 .build()
699 .unwrap();
700 let list_array = LargeListArray::from(list_data);
701
702 let values = list_array.values();
703 assert_eq!(value_data, values.to_data());
704 assert_eq!(DataType::Int32, list_array.value_type());
705 assert_eq!(3, list_array.len());
706 assert_eq!(0, list_array.null_count());
707 assert_eq!(6, list_array.value_offsets()[2]);
708 assert_eq!(2, list_array.value_length(2));
709 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
710 assert_eq!(
711 0,
712 unsafe { list_array.value_unchecked(0) }
713 .as_primitive::<Int32Type>()
714 .value(0)
715 );
716 for i in 0..3 {
717 assert!(list_array.is_valid(i));
718 assert!(!list_array.is_null(i));
719 }
720
721 let list_data = ArrayData::builder(list_data_type)
724 .len(2)
725 .offset(1)
726 .add_buffer(value_offsets)
727 .add_child_data(value_data.clone())
728 .build()
729 .unwrap();
730 let list_array = LargeListArray::from(list_data);
731
732 let values = list_array.values();
733 assert_eq!(value_data, values.to_data());
734 assert_eq!(DataType::Int32, list_array.value_type());
735 assert_eq!(2, list_array.len());
736 assert_eq!(0, list_array.null_count());
737 assert_eq!(6, list_array.value_offsets()[1]);
738 assert_eq!(2, list_array.value_length(1));
739 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
740 assert_eq!(
741 3,
742 unsafe { list_array.value_unchecked(0) }
743 .as_primitive::<Int32Type>()
744 .value(0)
745 );
746 }
747
748 #[test]
749 fn test_list_array_slice() {
750 let value_data = ArrayData::builder(DataType::Int32)
752 .len(10)
753 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
754 .build()
755 .unwrap();
756
757 let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
760 let mut null_bits: [u8; 2] = [0; 2];
762 bit_util::set_bit(&mut null_bits, 0);
763 bit_util::set_bit(&mut null_bits, 3);
764 bit_util::set_bit(&mut null_bits, 4);
765 bit_util::set_bit(&mut null_bits, 6);
766 bit_util::set_bit(&mut null_bits, 8);
767
768 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
770 let list_data = ArrayData::builder(list_data_type)
771 .len(9)
772 .add_buffer(value_offsets)
773 .add_child_data(value_data.clone())
774 .null_bit_buffer(Some(Buffer::from(null_bits)))
775 .build()
776 .unwrap();
777 let list_array = ListArray::from(list_data);
778
779 let values = list_array.values();
780 assert_eq!(value_data, values.to_data());
781 assert_eq!(DataType::Int32, list_array.value_type());
782 assert_eq!(9, list_array.len());
783 assert_eq!(4, list_array.null_count());
784 assert_eq!(2, list_array.value_offsets()[3]);
785 assert_eq!(2, list_array.value_length(3));
786
787 let sliced_array = list_array.slice(1, 6);
788 assert_eq!(6, sliced_array.len());
789 assert_eq!(3, sliced_array.null_count());
790
791 for i in 0..sliced_array.len() {
792 if bit_util::get_bit(&null_bits, 1 + i) {
793 assert!(sliced_array.is_valid(i));
794 } else {
795 assert!(sliced_array.is_null(i));
796 }
797 }
798
799 let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
801 assert_eq!(2, sliced_list_array.value_offsets()[2]);
802 assert_eq!(2, sliced_list_array.value_length(2));
803 assert_eq!(4, sliced_list_array.value_offsets()[3]);
804 assert_eq!(2, sliced_list_array.value_length(3));
805 assert_eq!(6, sliced_list_array.value_offsets()[5]);
806 assert_eq!(3, sliced_list_array.value_length(5));
807 }
808
809 #[test]
810 fn test_large_list_array_slice() {
811 let value_data = ArrayData::builder(DataType::Int32)
813 .len(10)
814 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
815 .build()
816 .unwrap();
817
818 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
821 let mut null_bits: [u8; 2] = [0; 2];
823 bit_util::set_bit(&mut null_bits, 0);
824 bit_util::set_bit(&mut null_bits, 3);
825 bit_util::set_bit(&mut null_bits, 4);
826 bit_util::set_bit(&mut null_bits, 6);
827 bit_util::set_bit(&mut null_bits, 8);
828
829 let list_data_type = DataType::new_large_list(DataType::Int32, false);
831 let list_data = ArrayData::builder(list_data_type)
832 .len(9)
833 .add_buffer(value_offsets)
834 .add_child_data(value_data.clone())
835 .null_bit_buffer(Some(Buffer::from(null_bits)))
836 .build()
837 .unwrap();
838 let list_array = LargeListArray::from(list_data);
839
840 let values = list_array.values();
841 assert_eq!(value_data, values.to_data());
842 assert_eq!(DataType::Int32, list_array.value_type());
843 assert_eq!(9, list_array.len());
844 assert_eq!(4, list_array.null_count());
845 assert_eq!(2, list_array.value_offsets()[3]);
846 assert_eq!(2, list_array.value_length(3));
847
848 let sliced_array = list_array.slice(1, 6);
849 assert_eq!(6, sliced_array.len());
850 assert_eq!(3, sliced_array.null_count());
851
852 for i in 0..sliced_array.len() {
853 if bit_util::get_bit(&null_bits, 1 + i) {
854 assert!(sliced_array.is_valid(i));
855 } else {
856 assert!(sliced_array.is_null(i));
857 }
858 }
859
860 let sliced_list_array = sliced_array
862 .as_any()
863 .downcast_ref::<LargeListArray>()
864 .unwrap();
865 assert_eq!(2, sliced_list_array.value_offsets()[2]);
866 assert_eq!(2, sliced_list_array.value_length(2));
867 assert_eq!(4, sliced_list_array.value_offsets()[3]);
868 assert_eq!(2, sliced_list_array.value_length(3));
869 assert_eq!(6, sliced_list_array.value_offsets()[5]);
870 assert_eq!(3, sliced_list_array.value_length(5));
871 }
872
873 #[test]
874 #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
875 fn test_list_array_index_out_of_bound() {
876 let value_data = ArrayData::builder(DataType::Int32)
878 .len(10)
879 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
880 .build()
881 .unwrap();
882
883 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
886 let mut null_bits: [u8; 2] = [0; 2];
888 bit_util::set_bit(&mut null_bits, 0);
889 bit_util::set_bit(&mut null_bits, 3);
890 bit_util::set_bit(&mut null_bits, 4);
891 bit_util::set_bit(&mut null_bits, 6);
892 bit_util::set_bit(&mut null_bits, 8);
893
894 let list_data_type = DataType::new_large_list(DataType::Int32, false);
896 let list_data = ArrayData::builder(list_data_type)
897 .len(9)
898 .add_buffer(value_offsets)
899 .add_child_data(value_data)
900 .null_bit_buffer(Some(Buffer::from(null_bits)))
901 .build()
902 .unwrap();
903 let list_array = LargeListArray::from(list_data);
904 assert_eq!(9, list_array.len());
905
906 list_array.value(10);
907 }
908 #[test]
909 #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
910 #[cfg(not(feature = "force_validate"))]
913 fn test_list_array_invalid_buffer_len() {
914 let value_data = unsafe {
915 ArrayData::builder(DataType::Int32)
916 .len(8)
917 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
918 .build_unchecked()
919 };
920 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
921 let list_data = unsafe {
922 ArrayData::builder(list_data_type)
923 .len(3)
924 .add_child_data(value_data)
925 .build_unchecked()
926 };
927 drop(ListArray::from(list_data));
928 }
929
930 #[test]
931 #[should_panic(expected = "ListArray should contain a single child array (values array)")]
932 #[cfg(not(feature = "force_validate"))]
935 fn test_list_array_invalid_child_array_len() {
936 let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
937 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
938 let list_data = unsafe {
939 ArrayData::builder(list_data_type)
940 .len(3)
941 .add_buffer(value_offsets)
942 .build_unchecked()
943 };
944 drop(ListArray::from(list_data));
945 }
946
947 #[test]
948 #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
949 fn test_from_array_data_validation() {
950 let mut builder = ListBuilder::new(Int32Builder::new());
951 builder.values().append_value(1);
952 builder.append(true);
953 let array = builder.finish();
954 let _ = LargeListArray::from(array.into_data());
955 }
956
957 #[test]
958 fn test_list_array_offsets_need_not_start_at_zero() {
959 let value_data = ArrayData::builder(DataType::Int32)
960 .len(8)
961 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
962 .build()
963 .unwrap();
964
965 let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
966
967 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
968 let list_data = ArrayData::builder(list_data_type)
969 .len(3)
970 .add_buffer(value_offsets)
971 .add_child_data(value_data)
972 .build()
973 .unwrap();
974
975 let list_array = ListArray::from(list_data);
976 assert_eq!(list_array.value_length(0), 0);
977 assert_eq!(list_array.value_length(1), 3);
978 assert_eq!(list_array.value_length(2), 2);
979 }
980
981 #[test]
982 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
983 #[cfg(not(feature = "force_validate"))]
986 fn test_primitive_array_alignment() {
987 let buf = Buffer::from_slice_ref([0_u64]);
988 let buf2 = buf.slice(1);
989 let array_data = unsafe {
990 ArrayData::builder(DataType::Int32)
991 .add_buffer(buf2)
992 .build_unchecked()
993 };
994 drop(Int32Array::from(array_data));
995 }
996
997 #[test]
998 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
999 #[cfg(not(feature = "force_validate"))]
1002 fn test_list_array_alignment() {
1003 let buf = Buffer::from_slice_ref([0_u64]);
1004 let buf2 = buf.slice(1);
1005
1006 let values: [i32; 8] = [0; 8];
1007 let value_data = unsafe {
1008 ArrayData::builder(DataType::Int32)
1009 .add_buffer(Buffer::from_slice_ref(values))
1010 .build_unchecked()
1011 };
1012
1013 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
1014 let list_data = unsafe {
1015 ArrayData::builder(list_data_type)
1016 .add_buffer(buf2)
1017 .add_child_data(value_data)
1018 .build_unchecked()
1019 };
1020 drop(ListArray::from(list_data));
1021 }
1022
1023 #[test]
1024 fn list_array_equality() {
1025 fn do_comparison(
1027 lhs_data: Vec<Option<Vec<Option<i32>>>>,
1028 rhs_data: Vec<Option<Vec<Option<i32>>>>,
1029 should_equal: bool,
1030 ) {
1031 let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1032 let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1033 assert_eq!(lhs == rhs, should_equal);
1034
1035 let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1036 let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1037 assert_eq!(lhs == rhs, should_equal);
1038 }
1039
1040 do_comparison(
1041 vec![
1042 Some(vec![Some(0), Some(1), Some(2)]),
1043 None,
1044 Some(vec![Some(3), None, Some(5)]),
1045 Some(vec![Some(6), Some(7)]),
1046 ],
1047 vec![
1048 Some(vec![Some(0), Some(1), Some(2)]),
1049 None,
1050 Some(vec![Some(3), None, Some(5)]),
1051 Some(vec![Some(6), Some(7)]),
1052 ],
1053 true,
1054 );
1055
1056 do_comparison(
1057 vec![
1058 None,
1059 None,
1060 Some(vec![Some(3), None, Some(5)]),
1061 Some(vec![Some(6), Some(7)]),
1062 ],
1063 vec![
1064 Some(vec![Some(0), Some(1), Some(2)]),
1065 None,
1066 Some(vec![Some(3), None, Some(5)]),
1067 Some(vec![Some(6), Some(7)]),
1068 ],
1069 false,
1070 );
1071
1072 do_comparison(
1073 vec![
1074 None,
1075 None,
1076 Some(vec![Some(3), None, Some(5)]),
1077 Some(vec![Some(6), Some(7)]),
1078 ],
1079 vec![
1080 None,
1081 None,
1082 Some(vec![Some(3), None, Some(5)]),
1083 Some(vec![Some(0), Some(0)]),
1084 ],
1085 false,
1086 );
1087
1088 do_comparison(
1089 vec![None, None, Some(vec![Some(1)])],
1090 vec![None, None, Some(vec![Some(2)])],
1091 false,
1092 );
1093 }
1094
1095 #[test]
1096 fn test_empty_offsets() {
1097 let f = Arc::new(Field::new("element", DataType::Int32, true));
1098 let string = ListArray::from(
1099 ArrayData::builder(DataType::List(f.clone()))
1100 .buffers(vec![Buffer::from(&[])])
1101 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1102 .build()
1103 .unwrap(),
1104 );
1105 assert_eq!(string.value_offsets(), &[0]);
1106 let string = LargeListArray::from(
1107 ArrayData::builder(DataType::LargeList(f))
1108 .buffers(vec![Buffer::from(&[])])
1109 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1110 .build()
1111 .unwrap(),
1112 );
1113 assert_eq!(string.len(), 0);
1114 assert_eq!(string.value_offsets(), &[0]);
1115 }
1116
1117 #[test]
1118 fn test_try_new() {
1119 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1120 let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1121 let values = Arc::new(values) as ArrayRef;
1122
1123 let field = Arc::new(Field::new("element", DataType::Int32, false));
1124 ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1125
1126 let nulls = NullBuffer::new_null(3);
1127 ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1128
1129 let nulls = NullBuffer::new_null(3);
1130 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1131 let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1132 .unwrap_err();
1133
1134 assert_eq!(
1135 err.to_string(),
1136 "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1137 );
1138
1139 let field = Arc::new(Field::new("element", DataType::Int64, false));
1140 let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1141 .unwrap_err();
1142
1143 assert_eq!(
1144 err.to_string(),
1145 "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1146 );
1147
1148 let nulls = NullBuffer::new_null(7);
1149 let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1150 let values = Arc::new(values);
1151
1152 let err =
1153 LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1154
1155 assert_eq!(
1156 err.to_string(),
1157 "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1158 );
1159
1160 let field = Arc::new(Field::new("element", DataType::Int64, true));
1161 LargeListArray::new(field.clone(), offsets.clone(), values, None);
1162
1163 let values = Int64Array::new(vec![0; 2].into(), None);
1164 let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1165
1166 assert_eq!(
1167 err.to_string(),
1168 "Invalid argument error: Max offset of 5 exceeds length of values 2"
1169 );
1170 }
1171
1172 #[test]
1173 fn test_from_fixed_size_list() {
1174 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1175 builder.values().append_slice(&[1, 2, 3]);
1176 builder.append(true);
1177 builder.values().append_slice(&[0, 0, 0]);
1178 builder.append(false);
1179 builder.values().append_slice(&[4, 5, 6]);
1180 builder.append(true);
1181 let list: ListArray = builder.finish().into();
1182
1183 let values: Vec<_> = list
1184 .iter()
1185 .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1186 .collect();
1187 assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1188 }
1189
1190 #[test]
1191 fn test_nullable_union() {
1192 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1193 let mut builder = UnionBuilder::new_dense();
1194 builder.append::<Int32Type>("a", 1).unwrap();
1195 builder.append::<Int32Type>("b", 2).unwrap();
1196 builder.append::<Int32Type>("b", 3).unwrap();
1197 builder.append::<Int32Type>("a", 4).unwrap();
1198 builder.append::<Int32Type>("a", 5).unwrap();
1199 let values = builder.build().unwrap();
1200 let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1201 ListArray::new(field.clone(), offsets, Arc::new(values), None);
1202 }
1203}