1mod binary_array;
21
22use crate::types::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer};
24use arrow_data::ArrayData;
25use arrow_schema::{DataType, IntervalUnit, TimeUnit};
26use std::any::Any;
27use std::sync::Arc;
28
29pub use binary_array::*;
30
31mod boolean_array;
32pub use boolean_array::*;
33
34mod byte_array;
35pub use byte_array::*;
36
37mod dictionary_array;
38pub use dictionary_array::*;
39
40mod fixed_size_binary_array;
41pub use fixed_size_binary_array::*;
42
43mod fixed_size_list_array;
44pub use fixed_size_list_array::*;
45
46mod list_array;
47pub use list_array::*;
48
49mod map_array;
50pub use map_array::*;
51
52mod null_array;
53pub use null_array::*;
54
55mod primitive_array;
56pub use primitive_array::*;
57
58mod string_array;
59pub use string_array::*;
60
61mod struct_array;
62pub use struct_array::*;
63
64mod union_array;
65pub use union_array::*;
66
67mod run_array;
68
69pub use run_array::*;
70
71mod byte_view_array;
72
73pub use byte_view_array::*;
74
75mod list_view_array;
76
77pub use list_view_array::*;
78
79use crate::iterator::ArrayIter;
80
81pub trait Array: std::fmt::Debug + Send + Sync {
83 fn as_any(&self) -> &dyn Any;
106
107 fn to_data(&self) -> ArrayData;
109
110 fn into_data(self) -> ArrayData;
114
115 fn data_type(&self) -> &DataType;
128
129 fn slice(&self, offset: usize, length: usize) -> ArrayRef;
143
144 fn len(&self) -> usize;
156
157 fn is_empty(&self) -> bool;
169
170 fn offset(&self) -> usize;
186
187 fn nulls(&self) -> Option<&NullBuffer>;
200
201 fn logical_nulls(&self) -> Option<NullBuffer> {
220 self.nulls().cloned()
221 }
222
223 fn is_null(&self, index: usize) -> bool {
245 self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
246 }
247
248 fn is_valid(&self, index: usize) -> bool {
262 !self.is_null(index)
263 }
264
265 fn null_count(&self) -> usize {
281 self.nulls().map(|n| n.null_count()).unwrap_or_default()
282 }
283
284 fn logical_null_count(&self) -> usize {
301 self.logical_nulls()
302 .map(|n| n.null_count())
303 .unwrap_or_default()
304 }
305
306 fn is_nullable(&self) -> bool {
320 self.null_count() != 0
322 }
323
324 fn get_buffer_memory_size(&self) -> usize;
329
330 fn get_array_memory_size(&self) -> usize;
334}
335
336pub type ArrayRef = Arc<dyn Array>;
338
339impl Array for ArrayRef {
341 fn as_any(&self) -> &dyn Any {
342 self.as_ref().as_any()
343 }
344
345 fn to_data(&self) -> ArrayData {
346 self.as_ref().to_data()
347 }
348
349 fn into_data(self) -> ArrayData {
350 self.to_data()
351 }
352
353 fn data_type(&self) -> &DataType {
354 self.as_ref().data_type()
355 }
356
357 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
358 self.as_ref().slice(offset, length)
359 }
360
361 fn len(&self) -> usize {
362 self.as_ref().len()
363 }
364
365 fn is_empty(&self) -> bool {
366 self.as_ref().is_empty()
367 }
368
369 fn offset(&self) -> usize {
370 self.as_ref().offset()
371 }
372
373 fn nulls(&self) -> Option<&NullBuffer> {
374 self.as_ref().nulls()
375 }
376
377 fn logical_nulls(&self) -> Option<NullBuffer> {
378 self.as_ref().logical_nulls()
379 }
380
381 fn is_null(&self, index: usize) -> bool {
382 self.as_ref().is_null(index)
383 }
384
385 fn is_valid(&self, index: usize) -> bool {
386 self.as_ref().is_valid(index)
387 }
388
389 fn null_count(&self) -> usize {
390 self.as_ref().null_count()
391 }
392
393 fn logical_null_count(&self) -> usize {
394 self.as_ref().logical_null_count()
395 }
396
397 fn is_nullable(&self) -> bool {
398 self.as_ref().is_nullable()
399 }
400
401 fn get_buffer_memory_size(&self) -> usize {
402 self.as_ref().get_buffer_memory_size()
403 }
404
405 fn get_array_memory_size(&self) -> usize {
406 self.as_ref().get_array_memory_size()
407 }
408}
409
410impl<T: Array> Array for &T {
411 fn as_any(&self) -> &dyn Any {
412 T::as_any(self)
413 }
414
415 fn to_data(&self) -> ArrayData {
416 T::to_data(self)
417 }
418
419 fn into_data(self) -> ArrayData {
420 self.to_data()
421 }
422
423 fn data_type(&self) -> &DataType {
424 T::data_type(self)
425 }
426
427 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
428 T::slice(self, offset, length)
429 }
430
431 fn len(&self) -> usize {
432 T::len(self)
433 }
434
435 fn is_empty(&self) -> bool {
436 T::is_empty(self)
437 }
438
439 fn offset(&self) -> usize {
440 T::offset(self)
441 }
442
443 fn nulls(&self) -> Option<&NullBuffer> {
444 T::nulls(self)
445 }
446
447 fn logical_nulls(&self) -> Option<NullBuffer> {
448 T::logical_nulls(self)
449 }
450
451 fn is_null(&self, index: usize) -> bool {
452 T::is_null(self, index)
453 }
454
455 fn is_valid(&self, index: usize) -> bool {
456 T::is_valid(self, index)
457 }
458
459 fn null_count(&self) -> usize {
460 T::null_count(self)
461 }
462
463 fn logical_null_count(&self) -> usize {
464 T::logical_null_count(self)
465 }
466
467 fn is_nullable(&self) -> bool {
468 T::is_nullable(self)
469 }
470
471 fn get_buffer_memory_size(&self) -> usize {
472 T::get_buffer_memory_size(self)
473 }
474
475 fn get_array_memory_size(&self) -> usize {
476 T::get_array_memory_size(self)
477 }
478}
479
480pub trait ArrayAccessor: Array {
561 type Item: Send + Sync;
563
564 fn value(&self, index: usize) -> Self::Item;
568
569 unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
573}
574
575pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
583 fn is_ascii(&self) -> bool;
585
586 fn iter(&self) -> ArrayIter<Self>;
588}
589
590impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
591 fn is_ascii(&self) -> bool {
592 GenericStringArray::<O>::is_ascii(self)
593 }
594
595 fn iter(&self) -> ArrayIter<Self> {
596 GenericStringArray::<O>::iter(self)
597 }
598}
599impl<'a> StringArrayType<'a> for &'a StringViewArray {
600 fn is_ascii(&self) -> bool {
601 StringViewArray::is_ascii(self)
602 }
603
604 fn iter(&self) -> ArrayIter<Self> {
605 StringViewArray::iter(self)
606 }
607}
608
609impl PartialEq for dyn Array + '_ {
610 fn eq(&self, other: &Self) -> bool {
611 self.to_data().eq(&other.to_data())
612 }
613}
614
615impl<T: Array> PartialEq<T> for dyn Array + '_ {
616 fn eq(&self, other: &T) -> bool {
617 self.to_data().eq(&other.to_data())
618 }
619}
620
621impl PartialEq for NullArray {
622 fn eq(&self, other: &NullArray) -> bool {
623 self.to_data().eq(&other.to_data())
624 }
625}
626
627impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
628 fn eq(&self, other: &PrimitiveArray<T>) -> bool {
629 self.to_data().eq(&other.to_data())
630 }
631}
632
633impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
634 fn eq(&self, other: &Self) -> bool {
635 self.to_data().eq(&other.to_data())
636 }
637}
638
639impl PartialEq for BooleanArray {
640 fn eq(&self, other: &BooleanArray) -> bool {
641 self.to_data().eq(&other.to_data())
642 }
643}
644
645impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
646 fn eq(&self, other: &Self) -> bool {
647 self.to_data().eq(&other.to_data())
648 }
649}
650
651impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
652 fn eq(&self, other: &Self) -> bool {
653 self.to_data().eq(&other.to_data())
654 }
655}
656
657impl PartialEq for FixedSizeBinaryArray {
658 fn eq(&self, other: &Self) -> bool {
659 self.to_data().eq(&other.to_data())
660 }
661}
662
663impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
664 fn eq(&self, other: &Self) -> bool {
665 self.to_data().eq(&other.to_data())
666 }
667}
668
669impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListViewArray<OffsetSize> {
670 fn eq(&self, other: &Self) -> bool {
671 self.to_data().eq(&other.to_data())
672 }
673}
674
675impl PartialEq for MapArray {
676 fn eq(&self, other: &Self) -> bool {
677 self.to_data().eq(&other.to_data())
678 }
679}
680
681impl PartialEq for FixedSizeListArray {
682 fn eq(&self, other: &Self) -> bool {
683 self.to_data().eq(&other.to_data())
684 }
685}
686
687impl PartialEq for StructArray {
688 fn eq(&self, other: &Self) -> bool {
689 self.to_data().eq(&other.to_data())
690 }
691}
692
693impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
694 fn eq(&self, other: &Self) -> bool {
695 self.to_data().eq(&other.to_data())
696 }
697}
698
699pub fn make_array(data: ArrayData) -> ArrayRef {
702 match data.data_type() {
703 DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
704 DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
705 DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
706 DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
707 DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
708 DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
709 DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
710 DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
711 DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
712 DataType::Float16 => Arc::new(Float16Array::from(data)) as ArrayRef,
713 DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
714 DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
715 DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
716 DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
717 DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
718 DataType::Time32(TimeUnit::Millisecond) => {
719 Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
720 }
721 DataType::Time64(TimeUnit::Microsecond) => {
722 Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
723 }
724 DataType::Time64(TimeUnit::Nanosecond) => {
725 Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
726 }
727 DataType::Timestamp(TimeUnit::Second, _) => {
728 Arc::new(TimestampSecondArray::from(data)) as ArrayRef
729 }
730 DataType::Timestamp(TimeUnit::Millisecond, _) => {
731 Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
732 }
733 DataType::Timestamp(TimeUnit::Microsecond, _) => {
734 Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
735 }
736 DataType::Timestamp(TimeUnit::Nanosecond, _) => {
737 Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
738 }
739 DataType::Interval(IntervalUnit::YearMonth) => {
740 Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
741 }
742 DataType::Interval(IntervalUnit::DayTime) => {
743 Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
744 }
745 DataType::Interval(IntervalUnit::MonthDayNano) => {
746 Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
747 }
748 DataType::Duration(TimeUnit::Second) => {
749 Arc::new(DurationSecondArray::from(data)) as ArrayRef
750 }
751 DataType::Duration(TimeUnit::Millisecond) => {
752 Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
753 }
754 DataType::Duration(TimeUnit::Microsecond) => {
755 Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
756 }
757 DataType::Duration(TimeUnit::Nanosecond) => {
758 Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
759 }
760 DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
761 DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
762 DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
763 DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
764 DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
765 DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
766 DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
767 DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
768 DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
769 DataType::ListView(_) => Arc::new(ListViewArray::from(data)) as ArrayRef,
770 DataType::LargeListView(_) => Arc::new(LargeListViewArray::from(data)) as ArrayRef,
771 DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
772 DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
773 DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
774 DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
775 DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
776 DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
777 DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
778 DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
779 DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
780 DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
781 DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
782 DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
783 DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
784 dt => panic!("Unexpected dictionary key type {dt:?}"),
785 },
786 DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() {
787 DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
788 DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
789 DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
790 dt => panic!("Unexpected data type for run_ends array {dt:?}"),
791 },
792 DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
793 DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
794 DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
795 dt => panic!("Unexpected data type {dt:?}"),
796 }
797}
798
799pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
812 let data = ArrayData::new_empty(data_type);
813 make_array(data)
814}
815
816pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
830 make_array(ArrayData::new_null(data_type, length))
831}
832
833unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
839 match data.is_empty() && data.buffers()[0].is_empty() {
840 true => OffsetBuffer::new_empty(),
841 false => {
842 let buffer =
843 ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len() + 1);
844 unsafe { OffsetBuffer::new_unchecked(buffer) }
847 }
848 }
849}
850
851fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
853where
854 A: Array,
855 F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
856{
857 let head = std::cmp::min(10, array.len());
858
859 for i in 0..head {
860 if array.is_null(i) {
861 writeln!(f, " null,")?;
862 } else {
863 write!(f, " ")?;
864 print_item(array, i, f)?;
865 writeln!(f, ",")?;
866 }
867 }
868 if array.len() > 10 {
869 if array.len() > 20 {
870 writeln!(f, " ...{} elements...,", array.len() - 20)?;
871 }
872
873 let tail = std::cmp::max(head, array.len() - 10);
874
875 for i in tail..array.len() {
876 if array.is_null(i) {
877 writeln!(f, " null,")?;
878 } else {
879 write!(f, " ")?;
880 print_item(array, i, f)?;
881 writeln!(f, ",")?;
882 }
883 }
884 }
885 Ok(())
886}
887
888#[cfg(test)]
889mod tests {
890 use super::*;
891 use crate::cast::{as_union_array, downcast_array};
892 use crate::downcast_run_array;
893 use arrow_buffer::MutableBuffer;
894 use arrow_schema::{Field, Fields, UnionFields, UnionMode};
895
896 #[test]
897 fn test_empty_primitive() {
898 let array = new_empty_array(&DataType::Int32);
899 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
900 assert_eq!(a.len(), 0);
901 let expected: &[i32] = &[];
902 assert_eq!(a.values(), expected);
903 }
904
905 #[test]
906 fn test_empty_variable_sized() {
907 let array = new_empty_array(&DataType::Utf8);
908 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
909 assert_eq!(a.len(), 0);
910 assert_eq!(a.value_offsets()[0], 0i32);
911 }
912
913 #[test]
914 fn test_empty_list_primitive() {
915 let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
916 let array = new_empty_array(&data_type);
917 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
918 assert_eq!(a.len(), 0);
919 assert_eq!(a.value_offsets()[0], 0i32);
920 }
921
922 #[test]
923 fn test_null_boolean() {
924 let array = new_null_array(&DataType::Boolean, 9);
925 let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
926 assert_eq!(a.len(), 9);
927 for i in 0..9 {
928 assert!(a.is_null(i));
929 }
930 }
931
932 #[test]
933 fn test_null_primitive() {
934 let array = new_null_array(&DataType::Int32, 9);
935 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
936 assert_eq!(a.len(), 9);
937 for i in 0..9 {
938 assert!(a.is_null(i));
939 }
940 }
941
942 #[test]
943 fn test_null_struct() {
944 let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
947 let array = new_null_array(&struct_type, 9);
948
949 let a = array.as_any().downcast_ref::<StructArray>().unwrap();
950 assert_eq!(a.len(), 9);
951 assert_eq!(a.column(0).len(), 9);
952 for i in 0..9 {
953 assert!(a.is_null(i));
954 }
955
956 a.slice(0, 5);
958 }
959
960 #[test]
961 fn test_null_variable_sized() {
962 let array = new_null_array(&DataType::Utf8, 9);
963 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
964 assert_eq!(a.len(), 9);
965 assert_eq!(a.value_offsets()[9], 0i32);
966 for i in 0..9 {
967 assert!(a.is_null(i));
968 }
969 }
970
971 #[test]
972 fn test_null_list_primitive() {
973 let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
974 let array = new_null_array(&data_type, 9);
975 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
976 assert_eq!(a.len(), 9);
977 assert_eq!(a.value_offsets()[9], 0i32);
978 for i in 0..9 {
979 assert!(a.is_null(i));
980 }
981 }
982
983 #[test]
984 fn test_null_map() {
985 let data_type = DataType::Map(
986 Arc::new(Field::new(
987 "entry",
988 DataType::Struct(Fields::from(vec![
989 Field::new("key", DataType::Utf8, false),
990 Field::new("value", DataType::Int32, true),
991 ])),
992 false,
993 )),
994 false,
995 );
996 let array = new_null_array(&data_type, 9);
997 let a = array.as_any().downcast_ref::<MapArray>().unwrap();
998 assert_eq!(a.len(), 9);
999 assert_eq!(a.value_offsets()[9], 0i32);
1000 for i in 0..9 {
1001 assert!(a.is_null(i));
1002 }
1003 }
1004
1005 #[test]
1006 fn test_null_dictionary() {
1007 let values =
1008 vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
1009
1010 let array: DictionaryArray<Int8Type> = values.into_iter().collect();
1011 let array = Arc::new(array) as ArrayRef;
1012
1013 let null_array = new_null_array(array.data_type(), 9);
1014 assert_eq!(&array, &null_array);
1015 assert_eq!(
1016 array.to_data().buffers()[0].len(),
1017 null_array.to_data().buffers()[0].len()
1018 );
1019 }
1020
1021 #[test]
1022 fn test_null_union() {
1023 for mode in [UnionMode::Sparse, UnionMode::Dense] {
1024 let data_type = DataType::Union(
1025 UnionFields::new(
1026 vec![2, 1],
1027 vec![
1028 Field::new("foo", DataType::Int32, true),
1029 Field::new("bar", DataType::Int64, true),
1030 ],
1031 ),
1032 mode,
1033 );
1034 let array = new_null_array(&data_type, 4);
1035
1036 let array = as_union_array(array.as_ref());
1037 assert_eq!(array.len(), 4);
1038 assert_eq!(array.null_count(), 0);
1039 assert_eq!(array.logical_null_count(), 4);
1040
1041 for i in 0..4 {
1042 let a = array.value(i);
1043 assert_eq!(a.len(), 1);
1044 assert_eq!(a.null_count(), 1);
1045 assert_eq!(a.logical_null_count(), 1);
1046 assert!(a.is_null(0))
1047 }
1048
1049 array.to_data().validate_full().unwrap();
1050 }
1051 }
1052
1053 #[test]
1054 #[allow(unused_parens)]
1055 fn test_null_runs() {
1056 for r in [DataType::Int16, DataType::Int32, DataType::Int64] {
1057 let data_type = DataType::RunEndEncoded(
1058 Arc::new(Field::new("run_ends", r, false)),
1059 Arc::new(Field::new("values", DataType::Utf8, true)),
1060 );
1061
1062 let array = new_null_array(&data_type, 4);
1063 let array = array.as_ref();
1064
1065 downcast_run_array! {
1066 array => {
1067 assert_eq!(array.len(), 4);
1068 assert_eq!(array.null_count(), 0);
1069 assert_eq!(array.logical_null_count(), 4);
1070 assert_eq!(array.values().len(), 1);
1071 assert_eq!(array.values().null_count(), 1);
1072 assert_eq!(array.run_ends().len(), 4);
1073 assert_eq!(array.run_ends().values(), &[4]);
1074
1075 let idx = array.get_physical_indices(&[0, 1, 2, 3]).unwrap();
1076 assert_eq!(idx, &[0,0,0,0]);
1077 }
1078 d => unreachable!("{d}")
1079 }
1080 }
1081 }
1082
1083 #[test]
1084 fn test_null_fixed_size_binary() {
1085 for size in [1, 2, 7] {
1086 let array = new_null_array(&DataType::FixedSizeBinary(size), 6);
1087 let array = array
1088 .as_ref()
1089 .as_any()
1090 .downcast_ref::<FixedSizeBinaryArray>()
1091 .unwrap();
1092
1093 assert_eq!(array.len(), 6);
1094 assert_eq!(array.null_count(), 6);
1095 assert_eq!(array.logical_null_count(), 6);
1096 array.iter().for_each(|x| assert!(x.is_none()));
1097 }
1098 }
1099
1100 #[test]
1101 fn test_memory_size_null() {
1102 let null_arr = NullArray::new(32);
1103
1104 assert_eq!(0, null_arr.get_buffer_memory_size());
1105 assert_eq!(
1106 std::mem::size_of::<usize>(),
1107 null_arr.get_array_memory_size()
1108 );
1109 }
1110
1111 #[test]
1112 fn test_memory_size_primitive() {
1113 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1114 let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
1115
1116 assert_eq!(
1118 arr.get_array_memory_size() - empty.get_array_memory_size(),
1119 128 * std::mem::size_of::<i64>()
1120 );
1121 }
1122
1123 #[test]
1124 fn test_memory_size_primitive_sliced() {
1125 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1126 let slice1 = arr.slice(0, 64);
1127 let slice2 = arr.slice(64, 64);
1128
1129 assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
1131 assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
1132 }
1133
1134 #[test]
1135 fn test_memory_size_primitive_nullable() {
1136 let arr: PrimitiveArray<Int64Type> = (0..128)
1137 .map(|i| if i % 20 == 0 { Some(i) } else { None })
1138 .collect();
1139 let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
1140 ArrayData::builder(arr.data_type().clone())
1141 .add_buffer(MutableBuffer::new(0).into())
1142 .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
1143 .build()
1144 .unwrap(),
1145 );
1146
1147 assert_eq!(
1151 std::mem::size_of::<PrimitiveArray<Int64Type>>(),
1152 empty_with_bitmap.get_array_memory_size()
1153 );
1154
1155 assert_eq!(
1158 arr.get_array_memory_size() - empty_with_bitmap.get_array_memory_size(),
1159 128 * std::mem::size_of::<i64>() + 64
1160 );
1161 }
1162
1163 #[test]
1164 fn test_memory_size_dictionary() {
1165 let values = PrimitiveArray::<Int64Type>::from_iter_values(0..16);
1166 let keys = PrimitiveArray::<Int16Type>::from_iter_values(
1167 (0..256).map(|i| (i % values.len()) as i16),
1168 );
1169
1170 let dict_data_type = DataType::Dictionary(
1171 Box::new(keys.data_type().clone()),
1172 Box::new(values.data_type().clone()),
1173 );
1174 let dict_data = keys
1175 .into_data()
1176 .into_builder()
1177 .data_type(dict_data_type)
1178 .child_data(vec![values.into_data()])
1179 .build()
1180 .unwrap();
1181
1182 let empty_data = ArrayData::new_empty(&DataType::Dictionary(
1183 Box::new(DataType::Int16),
1184 Box::new(DataType::Int64),
1185 ));
1186
1187 let arr = DictionaryArray::<Int16Type>::from(dict_data);
1188 let empty = DictionaryArray::<Int16Type>::from(empty_data);
1189
1190 let expected_keys_size = 256 * std::mem::size_of::<i16>();
1191 assert_eq!(
1192 arr.keys().get_array_memory_size() - empty.keys().get_array_memory_size(),
1193 expected_keys_size
1194 );
1195
1196 let expected_values_size = 16 * std::mem::size_of::<i64>();
1197 assert_eq!(
1198 arr.values().get_array_memory_size() - empty.values().get_array_memory_size(),
1199 expected_values_size
1200 );
1201
1202 let expected_size = expected_keys_size + expected_values_size;
1203 assert_eq!(
1204 arr.get_array_memory_size() - empty.get_array_memory_size(),
1205 expected_size
1206 );
1207 }
1208
1209 fn compute_my_thing(arr: &dyn Array) -> bool {
1211 !arr.is_empty()
1212 }
1213
1214 #[test]
1215 fn test_array_ref_as_array() {
1216 let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1217
1218 assert!(compute_my_thing(&arr));
1220
1221 let arr: ArrayRef = Arc::new(arr);
1223 assert!(compute_my_thing(&arr));
1224 assert!(compute_my_thing(arr.as_ref()));
1225 }
1226
1227 #[test]
1228 fn test_downcast_array() {
1229 let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1230
1231 let boxed: ArrayRef = Arc::new(array);
1232 let array: Int32Array = downcast_array(&boxed);
1233
1234 let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1235 assert_eq!(array, expected);
1236 }
1237}