1#![allow(clippy::enum_clike_unportable_variant)]
18
19use crate::{make_array, Array, ArrayRef};
20use arrow_buffer::bit_chunk_iterator::{BitChunkIterator, BitChunks};
21use arrow_buffer::buffer::NullBuffer;
22use arrow_buffer::{BooleanBuffer, MutableBuffer, ScalarBuffer};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::{ArrowError, DataType, UnionFields, UnionMode};
25use std::any::Any;
28use std::collections::HashSet;
29use std::sync::Arc;
30
31#[derive(Clone)]
123pub struct UnionArray {
124 data_type: DataType,
125 type_ids: ScalarBuffer<i8>,
126 offsets: Option<ScalarBuffer<i32>>,
127 fields: Vec<Option<ArrayRef>>,
128}
129
130impl UnionArray {
131 pub unsafe fn new_unchecked(
150 fields: UnionFields,
151 type_ids: ScalarBuffer<i8>,
152 offsets: Option<ScalarBuffer<i32>>,
153 children: Vec<ArrayRef>,
154 ) -> Self {
155 let mode = if offsets.is_some() {
156 UnionMode::Dense
157 } else {
158 UnionMode::Sparse
159 };
160
161 let len = type_ids.len();
162 let builder = ArrayData::builder(DataType::Union(fields, mode))
163 .add_buffer(type_ids.into_inner())
164 .child_data(children.into_iter().map(Array::into_data).collect())
165 .len(len);
166
167 let data = match offsets {
168 Some(offsets) => builder.add_buffer(offsets.into_inner()).build_unchecked(),
169 None => builder.build_unchecked(),
170 };
171 Self::from(data)
172 }
173
174 pub fn try_new(
178 fields: UnionFields,
179 type_ids: ScalarBuffer<i8>,
180 offsets: Option<ScalarBuffer<i32>>,
181 children: Vec<ArrayRef>,
182 ) -> Result<Self, ArrowError> {
183 if fields.len() != children.len() {
185 return Err(ArrowError::InvalidArgumentError(
186 "Union fields length must match child arrays length".to_string(),
187 ));
188 }
189
190 if let Some(offsets) = &offsets {
191 if offsets.len() != type_ids.len() {
193 return Err(ArrowError::InvalidArgumentError(
194 "Type Ids and Offsets lengths must match".to_string(),
195 ));
196 }
197 } else {
198 for child in &children {
200 if child.len() != type_ids.len() {
201 return Err(ArrowError::InvalidArgumentError(
202 "Sparse union child arrays must be equal in length to the length of the union".to_string(),
203 ));
204 }
205 }
206 }
207
208 let max_id = fields.iter().map(|(i, _)| i).max().unwrap_or_default() as usize;
210 let mut array_lens = vec![i32::MIN; max_id + 1];
211 for (cd, (field_id, _)) in children.iter().zip(fields.iter()) {
212 array_lens[field_id as usize] = cd.len() as i32;
213 }
214
215 for id in &type_ids {
217 match array_lens.get(*id as usize) {
218 Some(x) if *x != i32::MIN => {}
219 _ => {
220 return Err(ArrowError::InvalidArgumentError(
221 "Type Ids values must match one of the field type ids".to_owned(),
222 ))
223 }
224 }
225 }
226
227 if let Some(offsets) = &offsets {
229 let mut iter = type_ids.iter().zip(offsets.iter());
230 if iter.any(|(type_id, &offset)| offset < 0 || offset >= array_lens[*type_id as usize])
231 {
232 return Err(ArrowError::InvalidArgumentError(
233 "Offsets must be positive and within the length of the Array".to_owned(),
234 ));
235 }
236 }
237
238 let union_array = unsafe { Self::new_unchecked(fields, type_ids, offsets, children) };
241 Ok(union_array)
242 }
243
244 pub fn child(&self, type_id: i8) -> &ArrayRef {
251 assert!((type_id as usize) < self.fields.len());
252 let boxed = &self.fields[type_id as usize];
253 boxed.as_ref().expect("invalid type id")
254 }
255
256 pub fn type_id(&self, index: usize) -> i8 {
262 assert!(index < self.type_ids.len());
263 self.type_ids[index]
264 }
265
266 pub fn type_ids(&self) -> &ScalarBuffer<i8> {
268 &self.type_ids
269 }
270
271 pub fn offsets(&self) -> Option<&ScalarBuffer<i32>> {
273 self.offsets.as_ref()
274 }
275
276 pub fn value_offset(&self, index: usize) -> usize {
282 assert!(index < self.len());
283 match &self.offsets {
284 Some(offsets) => offsets[index] as usize,
285 None => self.offset() + index,
286 }
287 }
288
289 pub fn value(&self, i: usize) -> ArrayRef {
293 let type_id = self.type_id(i);
294 let value_offset = self.value_offset(i);
295 let child = self.child(type_id);
296 child.slice(value_offset, 1)
297 }
298
299 pub fn type_names(&self) -> Vec<&str> {
301 match self.data_type() {
302 DataType::Union(fields, _) => fields
303 .iter()
304 .map(|(_, f)| f.name().as_str())
305 .collect::<Vec<&str>>(),
306 _ => unreachable!("Union array's data type is not a union!"),
307 }
308 }
309
310 fn is_dense(&self) -> bool {
312 match self.data_type() {
313 DataType::Union(_, mode) => mode == &UnionMode::Dense,
314 _ => unreachable!("Union array's data type is not a union!"),
315 }
316 }
317
318 pub fn slice(&self, offset: usize, length: usize) -> Self {
320 let (offsets, fields) = match self.offsets.as_ref() {
321 Some(offsets) => (Some(offsets.slice(offset, length)), self.fields.clone()),
323 None => {
325 let fields = self
326 .fields
327 .iter()
328 .map(|x| x.as_ref().map(|x| x.slice(offset, length)))
329 .collect();
330 (None, fields)
331 }
332 };
333
334 Self {
335 data_type: self.data_type.clone(),
336 type_ids: self.type_ids.slice(offset, length),
337 offsets,
338 fields,
339 }
340 }
341
342 #[allow(clippy::type_complexity)]
370 pub fn into_parts(
371 self,
372 ) -> (
373 UnionFields,
374 ScalarBuffer<i8>,
375 Option<ScalarBuffer<i32>>,
376 Vec<ArrayRef>,
377 ) {
378 let Self {
379 data_type,
380 type_ids,
381 offsets,
382 mut fields,
383 } = self;
384 match data_type {
385 DataType::Union(union_fields, _) => {
386 let children = union_fields
387 .iter()
388 .map(|(type_id, _)| fields[type_id as usize].take().unwrap())
389 .collect();
390 (union_fields, type_ids, offsets, children)
391 }
392 _ => unreachable!(),
393 }
394 }
395
396 fn mask_sparse_skip_without_nulls(&self, nulls: Vec<(i8, NullBuffer)>) -> BooleanBuffer {
398 let fold = |(with_nulls_selected, union_nulls), (is_field, field_nulls)| {
404 (
405 with_nulls_selected | is_field,
406 union_nulls | (is_field & field_nulls),
407 )
408 };
409
410 self.mask_sparse_helper(
411 nulls,
412 |type_ids_chunk_array, nulls_masks_iters| {
413 let (with_nulls_selected, union_nulls) = nulls_masks_iters
414 .iter_mut()
415 .map(|(field_type_id, field_nulls)| {
416 let field_nulls = field_nulls.next().unwrap();
417 let is_field = selection_mask(type_ids_chunk_array, *field_type_id);
418
419 (is_field, field_nulls)
420 })
421 .fold((0, 0), fold);
422
423 let without_nulls_selected = !with_nulls_selected;
425
426 without_nulls_selected | union_nulls
429 },
430 |type_ids_remainder, bit_chunks| {
431 let (with_nulls_selected, union_nulls) = bit_chunks
432 .iter()
433 .map(|(field_type_id, field_bit_chunks)| {
434 let field_nulls = field_bit_chunks.remainder_bits();
435 let is_field = selection_mask(type_ids_remainder, *field_type_id);
436
437 (is_field, field_nulls)
438 })
439 .fold((0, 0), fold);
440
441 let without_nulls_selected = !with_nulls_selected;
442
443 without_nulls_selected | union_nulls
444 },
445 )
446 }
447
448 fn mask_sparse_skip_fully_null(&self, mut nulls: Vec<(i8, NullBuffer)>) -> BooleanBuffer {
450 let fields = match self.data_type() {
451 DataType::Union(fields, _) => fields,
452 _ => unreachable!("Union array's data type is not a union!"),
453 };
454
455 let type_ids = fields.iter().map(|(id, _)| id).collect::<HashSet<_>>();
456 let with_nulls = nulls.iter().map(|(id, _)| *id).collect::<HashSet<_>>();
457
458 let without_nulls_ids = type_ids
459 .difference(&with_nulls)
460 .copied()
461 .collect::<Vec<_>>();
462
463 nulls.retain(|(_, nulls)| nulls.null_count() < nulls.len());
464
465 self.mask_sparse_helper(
470 nulls,
471 |type_ids_chunk_array, nulls_masks_iters| {
472 let union_nulls = nulls_masks_iters.iter_mut().fold(
473 0,
474 |union_nulls, (field_type_id, nulls_iter)| {
475 let field_nulls = nulls_iter.next().unwrap();
476
477 if field_nulls == 0 {
478 union_nulls
479 } else {
480 let is_field = selection_mask(type_ids_chunk_array, *field_type_id);
481
482 union_nulls | (is_field & field_nulls)
483 }
484 },
485 );
486
487 let without_nulls_selected =
489 without_nulls_selected(type_ids_chunk_array, &without_nulls_ids);
490
491 union_nulls | without_nulls_selected
494 },
495 |type_ids_remainder, bit_chunks| {
496 let union_nulls =
497 bit_chunks
498 .iter()
499 .fold(0, |union_nulls, (field_type_id, field_bit_chunks)| {
500 let is_field = selection_mask(type_ids_remainder, *field_type_id);
501 let field_nulls = field_bit_chunks.remainder_bits();
502
503 union_nulls | is_field & field_nulls
504 });
505
506 union_nulls | without_nulls_selected(type_ids_remainder, &without_nulls_ids)
507 },
508 )
509 }
510
511 fn mask_sparse_all_with_nulls_skip_one(&self, nulls: Vec<(i8, NullBuffer)>) -> BooleanBuffer {
513 self.mask_sparse_helper(
520 nulls,
521 |type_ids_chunk_array, nulls_masks_iters| {
522 let (is_not_first, union_nulls) = nulls_masks_iters[1..] .iter_mut()
524 .fold(
525 (0, 0),
526 |(is_not_first, union_nulls), (field_type_id, nulls_iter)| {
527 let field_nulls = nulls_iter.next().unwrap();
528 let is_field = selection_mask(type_ids_chunk_array, *field_type_id);
529
530 (
531 is_not_first | is_field,
532 union_nulls | (is_field & field_nulls),
533 )
534 },
535 );
536
537 let is_first = !is_not_first;
538 let first_nulls = nulls_masks_iters[0].1.next().unwrap();
539
540 (is_first & first_nulls) | union_nulls
541 },
542 |type_ids_remainder, bit_chunks| {
543 bit_chunks
544 .iter()
545 .fold(0, |union_nulls, (field_type_id, field_bit_chunks)| {
546 let field_nulls = field_bit_chunks.remainder_bits();
547 let is_field = selection_mask(type_ids_remainder, *field_type_id);
550
551 union_nulls | (is_field & field_nulls)
552 })
553 },
554 )
555 }
556
557 fn mask_sparse_helper(
560 &self,
561 nulls: Vec<(i8, NullBuffer)>,
562 mut mask_chunk: impl FnMut(&[i8; 64], &mut [(i8, BitChunkIterator)]) -> u64,
563 mask_remainder: impl FnOnce(&[i8], &[(i8, BitChunks)]) -> u64,
564 ) -> BooleanBuffer {
565 let bit_chunks = nulls
566 .iter()
567 .map(|(type_id, nulls)| (*type_id, nulls.inner().bit_chunks()))
568 .collect::<Vec<_>>();
569
570 let mut nulls_masks_iter = bit_chunks
571 .iter()
572 .map(|(type_id, bit_chunks)| (*type_id, bit_chunks.iter()))
573 .collect::<Vec<_>>();
574
575 let chunks_exact = self.type_ids.chunks_exact(64);
576 let remainder = chunks_exact.remainder();
577
578 let chunks = chunks_exact.map(|type_ids_chunk| {
579 let type_ids_chunk_array = <&[i8; 64]>::try_from(type_ids_chunk).unwrap();
580
581 mask_chunk(type_ids_chunk_array, &mut nulls_masks_iter)
582 });
583
584 let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
587
588 if !remainder.is_empty() {
589 buffer.push(mask_remainder(remainder, &bit_chunks));
590 }
591
592 BooleanBuffer::new(buffer.into(), 0, self.type_ids.len())
593 }
594
595 fn gather_nulls(&self, nulls: Vec<(i8, NullBuffer)>) -> BooleanBuffer {
597 let one_null = NullBuffer::new_null(1);
598 let one_valid = NullBuffer::new_valid(1);
599
600 let mut logical_nulls_array = [(&one_valid, Mask::Zero); 256];
607
608 for (type_id, nulls) in &nulls {
609 if nulls.null_count() == nulls.len() {
610 logical_nulls_array[*type_id as u8 as usize] = (&one_null, Mask::Zero);
612 } else {
613 logical_nulls_array[*type_id as u8 as usize] = (nulls, Mask::Max);
614 }
615 }
616
617 match &self.offsets {
618 Some(offsets) => {
619 assert_eq!(self.type_ids.len(), offsets.len());
620
621 BooleanBuffer::collect_bool(self.type_ids.len(), |i| unsafe {
622 let type_id = *self.type_ids.get_unchecked(i);
624 let offset = *offsets.get_unchecked(i);
626
627 let (nulls, offset_mask) = &logical_nulls_array[type_id as u8 as usize];
628
629 nulls
635 .inner()
636 .value_unchecked(offset as usize & *offset_mask as usize)
637 })
638 }
639 None => {
640 BooleanBuffer::collect_bool(self.type_ids.len(), |index| unsafe {
641 let type_id = *self.type_ids.get_unchecked(index);
643
644 let (nulls, index_mask) = &logical_nulls_array[type_id as u8 as usize];
645
646 nulls.inner().value_unchecked(index & *index_mask as usize)
652 })
653 }
654 }
655 }
656}
657
658impl From<ArrayData> for UnionArray {
659 fn from(data: ArrayData) -> Self {
660 let (fields, mode) = match data.data_type() {
661 DataType::Union(fields, mode) => (fields, *mode),
662 d => panic!("UnionArray expected ArrayData with type Union got {d}"),
663 };
664 let (type_ids, offsets) = match mode {
665 UnionMode::Sparse => (
666 ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len()),
667 None,
668 ),
669 UnionMode::Dense => (
670 ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len()),
671 Some(ScalarBuffer::new(
672 data.buffers()[1].clone(),
673 data.offset(),
674 data.len(),
675 )),
676 ),
677 };
678
679 let max_id = fields.iter().map(|(i, _)| i).max().unwrap_or_default() as usize;
680 let mut boxed_fields = vec![None; max_id + 1];
681 for (cd, (field_id, _)) in data.child_data().iter().zip(fields.iter()) {
682 boxed_fields[field_id as usize] = Some(make_array(cd.clone()));
683 }
684 Self {
685 data_type: data.data_type().clone(),
686 type_ids,
687 offsets,
688 fields: boxed_fields,
689 }
690 }
691}
692
693impl From<UnionArray> for ArrayData {
694 fn from(array: UnionArray) -> Self {
695 let len = array.len();
696 let f = match &array.data_type {
697 DataType::Union(f, _) => f,
698 _ => unreachable!(),
699 };
700 let buffers = match array.offsets {
701 Some(o) => vec![array.type_ids.into_inner(), o.into_inner()],
702 None => vec![array.type_ids.into_inner()],
703 };
704
705 let child = f
706 .iter()
707 .map(|(i, _)| array.fields[i as usize].as_ref().unwrap().to_data())
708 .collect();
709
710 let builder = ArrayDataBuilder::new(array.data_type)
711 .len(len)
712 .buffers(buffers)
713 .child_data(child);
714 unsafe { builder.build_unchecked() }
715 }
716}
717
718impl Array for UnionArray {
719 fn as_any(&self) -> &dyn Any {
720 self
721 }
722
723 fn to_data(&self) -> ArrayData {
724 self.clone().into()
725 }
726
727 fn into_data(self) -> ArrayData {
728 self.into()
729 }
730
731 fn data_type(&self) -> &DataType {
732 &self.data_type
733 }
734
735 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
736 Arc::new(self.slice(offset, length))
737 }
738
739 fn len(&self) -> usize {
740 self.type_ids.len()
741 }
742
743 fn is_empty(&self) -> bool {
744 self.type_ids.is_empty()
745 }
746
747 fn offset(&self) -> usize {
748 0
749 }
750
751 fn nulls(&self) -> Option<&NullBuffer> {
752 None
753 }
754
755 fn logical_nulls(&self) -> Option<NullBuffer> {
756 let fields = match self.data_type() {
757 DataType::Union(fields, _) => fields,
758 _ => unreachable!(),
759 };
760
761 if fields.len() <= 1 {
762 return self
763 .fields
764 .iter()
765 .flatten()
766 .map(Array::logical_nulls)
767 .next()
768 .flatten();
769 }
770
771 let logical_nulls = fields
772 .iter()
773 .filter_map(|(type_id, _)| Some((type_id, self.child(type_id).logical_nulls()?)))
774 .filter(|(_, nulls)| nulls.null_count() > 0)
775 .collect::<Vec<_>>();
776
777 if logical_nulls.is_empty() {
778 return None;
779 }
780
781 let fully_null_count = logical_nulls
782 .iter()
783 .filter(|(_, nulls)| nulls.null_count() == nulls.len())
784 .count();
785
786 if fully_null_count == fields.len() {
787 if let Some((_, exactly_sized)) = logical_nulls
788 .iter()
789 .find(|(_, nulls)| nulls.len() == self.len())
790 {
791 return Some(exactly_sized.clone());
792 }
793
794 if let Some((_, bigger)) = logical_nulls
795 .iter()
796 .find(|(_, nulls)| nulls.len() > self.len())
797 {
798 return Some(bigger.slice(0, self.len()));
799 }
800
801 return Some(NullBuffer::new_null(self.len()));
802 }
803
804 let boolean_buffer = match &self.offsets {
805 Some(_) => self.gather_nulls(logical_nulls),
806 None => {
807 let gather_relative_cost = if cfg!(target_feature = "avx2") {
815 10
816 } else if cfg!(target_feature = "sse4.1") {
817 3
818 } else if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") {
819 2
821 } else {
822 0
826 };
827
828 let strategies = [
829 (SparseStrategy::Gather, gather_relative_cost, true),
830 (
831 SparseStrategy::MaskAllFieldsWithNullsSkipOne,
832 fields.len() - 1,
833 fields.len() == logical_nulls.len(),
834 ),
835 (
836 SparseStrategy::MaskSkipWithoutNulls,
837 logical_nulls.len(),
838 true,
839 ),
840 (
841 SparseStrategy::MaskSkipFullyNull,
842 fields.len() - fully_null_count,
843 true,
844 ),
845 ];
846
847 let (strategy, _, _) = strategies
848 .iter()
849 .filter(|(_, _, applicable)| *applicable)
850 .min_by_key(|(_, cost, _)| cost)
851 .unwrap();
852
853 match strategy {
854 SparseStrategy::Gather => self.gather_nulls(logical_nulls),
855 SparseStrategy::MaskAllFieldsWithNullsSkipOne => {
856 self.mask_sparse_all_with_nulls_skip_one(logical_nulls)
857 }
858 SparseStrategy::MaskSkipWithoutNulls => {
859 self.mask_sparse_skip_without_nulls(logical_nulls)
860 }
861 SparseStrategy::MaskSkipFullyNull => {
862 self.mask_sparse_skip_fully_null(logical_nulls)
863 }
864 }
865 }
866 };
867
868 let null_buffer = NullBuffer::from(boolean_buffer);
869
870 if null_buffer.null_count() > 0 {
871 Some(null_buffer)
872 } else {
873 None
874 }
875 }
876
877 fn is_nullable(&self) -> bool {
878 self.fields
879 .iter()
880 .flatten()
881 .any(|field| field.is_nullable())
882 }
883
884 fn get_buffer_memory_size(&self) -> usize {
885 let mut sum = self.type_ids.inner().capacity();
886 if let Some(o) = self.offsets.as_ref() {
887 sum += o.inner().capacity()
888 }
889 self.fields
890 .iter()
891 .flat_map(|x| x.as_ref().map(|x| x.get_buffer_memory_size()))
892 .sum::<usize>()
893 + sum
894 }
895
896 fn get_array_memory_size(&self) -> usize {
897 let mut sum = self.type_ids.inner().capacity();
898 if let Some(o) = self.offsets.as_ref() {
899 sum += o.inner().capacity()
900 }
901 std::mem::size_of::<Self>()
902 + self
903 .fields
904 .iter()
905 .flat_map(|x| x.as_ref().map(|x| x.get_array_memory_size()))
906 .sum::<usize>()
907 + sum
908 }
909}
910
911impl std::fmt::Debug for UnionArray {
912 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
913 let header = if self.is_dense() {
914 "UnionArray(Dense)\n["
915 } else {
916 "UnionArray(Sparse)\n["
917 };
918 writeln!(f, "{header}")?;
919
920 writeln!(f, "-- type id buffer:")?;
921 writeln!(f, "{:?}", self.type_ids)?;
922
923 if let Some(offsets) = &self.offsets {
924 writeln!(f, "-- offsets buffer:")?;
925 writeln!(f, "{:?}", offsets)?;
926 }
927
928 let fields = match self.data_type() {
929 DataType::Union(fields, _) => fields,
930 _ => unreachable!(),
931 };
932
933 for (type_id, field) in fields.iter() {
934 let child = self.child(type_id);
935 writeln!(
936 f,
937 "-- child {}: \"{}\" ({:?})",
938 type_id,
939 field.name(),
940 field.data_type()
941 )?;
942 std::fmt::Debug::fmt(child, f)?;
943 writeln!(f)?;
944 }
945 writeln!(f, "]")
946 }
947}
948
949enum SparseStrategy {
954 Gather,
956 MaskAllFieldsWithNullsSkipOne,
958 MaskSkipWithoutNulls,
960 MaskSkipFullyNull,
962}
963
964#[derive(Copy, Clone)]
965#[repr(usize)]
966enum Mask {
967 Zero = 0,
968 #[allow(clippy::enum_clike_unportable_variant)]
970 Max = usize::MAX,
971}
972
973fn selection_mask(type_ids_chunk: &[i8], type_id: i8) -> u64 {
974 type_ids_chunk
975 .iter()
976 .copied()
977 .enumerate()
978 .fold(0, |packed, (bit_idx, v)| {
979 packed | ((v == type_id) as u64) << bit_idx
980 })
981}
982
983fn without_nulls_selected(type_ids_chunk: &[i8], without_nulls_ids: &[i8]) -> u64 {
985 without_nulls_ids
986 .iter()
987 .fold(0, |fully_valid_selected, field_type_id| {
988 fully_valid_selected | selection_mask(type_ids_chunk, *field_type_id)
989 })
990}
991
992#[cfg(test)]
993mod tests {
994 use super::*;
995 use std::collections::HashSet;
996
997 use crate::array::Int8Type;
998 use crate::builder::UnionBuilder;
999 use crate::cast::AsArray;
1000 use crate::types::{Float32Type, Float64Type, Int32Type, Int64Type};
1001 use crate::{Float64Array, Int32Array, Int64Array, StringArray};
1002 use crate::{Int8Array, RecordBatch};
1003 use arrow_buffer::Buffer;
1004 use arrow_schema::{Field, Schema};
1005
1006 #[test]
1007 fn test_dense_i32() {
1008 let mut builder = UnionBuilder::new_dense();
1009 builder.append::<Int32Type>("a", 1).unwrap();
1010 builder.append::<Int32Type>("b", 2).unwrap();
1011 builder.append::<Int32Type>("c", 3).unwrap();
1012 builder.append::<Int32Type>("a", 4).unwrap();
1013 builder.append::<Int32Type>("c", 5).unwrap();
1014 builder.append::<Int32Type>("a", 6).unwrap();
1015 builder.append::<Int32Type>("b", 7).unwrap();
1016 let union = builder.build().unwrap();
1017
1018 let expected_type_ids = vec![0_i8, 1, 2, 0, 2, 0, 1];
1019 let expected_offsets = vec![0_i32, 0, 0, 1, 1, 2, 1];
1020 let expected_array_values = [1_i32, 2, 3, 4, 5, 6, 7];
1021
1022 assert_eq!(*union.type_ids(), expected_type_ids);
1024 for (i, id) in expected_type_ids.iter().enumerate() {
1025 assert_eq!(id, &union.type_id(i));
1026 }
1027
1028 assert_eq!(*union.offsets().unwrap(), expected_offsets);
1030 for (i, id) in expected_offsets.iter().enumerate() {
1031 assert_eq!(union.value_offset(i), *id as usize);
1032 }
1033
1034 assert_eq!(
1036 *union.child(0).as_primitive::<Int32Type>().values(),
1037 [1_i32, 4, 6]
1038 );
1039 assert_eq!(
1040 *union.child(1).as_primitive::<Int32Type>().values(),
1041 [2_i32, 7]
1042 );
1043 assert_eq!(
1044 *union.child(2).as_primitive::<Int32Type>().values(),
1045 [3_i32, 5]
1046 );
1047
1048 assert_eq!(expected_array_values.len(), union.len());
1049 for (i, expected_value) in expected_array_values.iter().enumerate() {
1050 assert!(!union.is_null(i));
1051 let slot = union.value(i);
1052 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1053 assert_eq!(slot.len(), 1);
1054 let value = slot.value(0);
1055 assert_eq!(expected_value, &value);
1056 }
1057 }
1058
1059 #[test]
1060 #[cfg_attr(miri, ignore)]
1061 fn test_dense_i32_large() {
1062 let mut builder = UnionBuilder::new_dense();
1063
1064 let expected_type_ids = vec![0_i8; 1024];
1065 let expected_offsets: Vec<_> = (0..1024).collect();
1066 let expected_array_values: Vec<_> = (1..=1024).collect();
1067
1068 expected_array_values
1069 .iter()
1070 .for_each(|v| builder.append::<Int32Type>("a", *v).unwrap());
1071
1072 let union = builder.build().unwrap();
1073
1074 assert_eq!(*union.type_ids(), expected_type_ids);
1076 for (i, id) in expected_type_ids.iter().enumerate() {
1077 assert_eq!(id, &union.type_id(i));
1078 }
1079
1080 assert_eq!(*union.offsets().unwrap(), expected_offsets);
1082 for (i, id) in expected_offsets.iter().enumerate() {
1083 assert_eq!(union.value_offset(i), *id as usize);
1084 }
1085
1086 for (i, expected_value) in expected_array_values.iter().enumerate() {
1087 assert!(!union.is_null(i));
1088 let slot = union.value(i);
1089 let slot = slot.as_primitive::<Int32Type>();
1090 assert_eq!(slot.len(), 1);
1091 let value = slot.value(0);
1092 assert_eq!(expected_value, &value);
1093 }
1094 }
1095
1096 #[test]
1097 fn test_dense_mixed() {
1098 let mut builder = UnionBuilder::new_dense();
1099 builder.append::<Int32Type>("a", 1).unwrap();
1100 builder.append::<Int64Type>("c", 3).unwrap();
1101 builder.append::<Int32Type>("a", 4).unwrap();
1102 builder.append::<Int64Type>("c", 5).unwrap();
1103 builder.append::<Int32Type>("a", 6).unwrap();
1104 let union = builder.build().unwrap();
1105
1106 assert_eq!(5, union.len());
1107 for i in 0..union.len() {
1108 let slot = union.value(i);
1109 assert!(!union.is_null(i));
1110 match i {
1111 0 => {
1112 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1113 assert_eq!(slot.len(), 1);
1114 let value = slot.value(0);
1115 assert_eq!(1_i32, value);
1116 }
1117 1 => {
1118 let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
1119 assert_eq!(slot.len(), 1);
1120 let value = slot.value(0);
1121 assert_eq!(3_i64, value);
1122 }
1123 2 => {
1124 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1125 assert_eq!(slot.len(), 1);
1126 let value = slot.value(0);
1127 assert_eq!(4_i32, value);
1128 }
1129 3 => {
1130 let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
1131 assert_eq!(slot.len(), 1);
1132 let value = slot.value(0);
1133 assert_eq!(5_i64, value);
1134 }
1135 4 => {
1136 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1137 assert_eq!(slot.len(), 1);
1138 let value = slot.value(0);
1139 assert_eq!(6_i32, value);
1140 }
1141 _ => unreachable!(),
1142 }
1143 }
1144 }
1145
1146 #[test]
1147 fn test_dense_mixed_with_nulls() {
1148 let mut builder = UnionBuilder::new_dense();
1149 builder.append::<Int32Type>("a", 1).unwrap();
1150 builder.append::<Int64Type>("c", 3).unwrap();
1151 builder.append::<Int32Type>("a", 10).unwrap();
1152 builder.append_null::<Int32Type>("a").unwrap();
1153 builder.append::<Int32Type>("a", 6).unwrap();
1154 let union = builder.build().unwrap();
1155
1156 assert_eq!(5, union.len());
1157 for i in 0..union.len() {
1158 let slot = union.value(i);
1159 match i {
1160 0 => {
1161 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1162 assert!(!slot.is_null(0));
1163 assert_eq!(slot.len(), 1);
1164 let value = slot.value(0);
1165 assert_eq!(1_i32, value);
1166 }
1167 1 => {
1168 let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
1169 assert!(!slot.is_null(0));
1170 assert_eq!(slot.len(), 1);
1171 let value = slot.value(0);
1172 assert_eq!(3_i64, value);
1173 }
1174 2 => {
1175 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1176 assert!(!slot.is_null(0));
1177 assert_eq!(slot.len(), 1);
1178 let value = slot.value(0);
1179 assert_eq!(10_i32, value);
1180 }
1181 3 => assert!(slot.is_null(0)),
1182 4 => {
1183 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1184 assert!(!slot.is_null(0));
1185 assert_eq!(slot.len(), 1);
1186 let value = slot.value(0);
1187 assert_eq!(6_i32, value);
1188 }
1189 _ => unreachable!(),
1190 }
1191 }
1192 }
1193
1194 #[test]
1195 fn test_dense_mixed_with_nulls_and_offset() {
1196 let mut builder = UnionBuilder::new_dense();
1197 builder.append::<Int32Type>("a", 1).unwrap();
1198 builder.append::<Int64Type>("c", 3).unwrap();
1199 builder.append::<Int32Type>("a", 10).unwrap();
1200 builder.append_null::<Int32Type>("a").unwrap();
1201 builder.append::<Int32Type>("a", 6).unwrap();
1202 let union = builder.build().unwrap();
1203
1204 let slice = union.slice(2, 3);
1205 let new_union = slice.as_any().downcast_ref::<UnionArray>().unwrap();
1206
1207 assert_eq!(3, new_union.len());
1208 for i in 0..new_union.len() {
1209 let slot = new_union.value(i);
1210 match i {
1211 0 => {
1212 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1213 assert!(!slot.is_null(0));
1214 assert_eq!(slot.len(), 1);
1215 let value = slot.value(0);
1216 assert_eq!(10_i32, value);
1217 }
1218 1 => assert!(slot.is_null(0)),
1219 2 => {
1220 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1221 assert!(!slot.is_null(0));
1222 assert_eq!(slot.len(), 1);
1223 let value = slot.value(0);
1224 assert_eq!(6_i32, value);
1225 }
1226 _ => unreachable!(),
1227 }
1228 }
1229 }
1230
1231 #[test]
1232 fn test_dense_mixed_with_str() {
1233 let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
1234 let int_array = Int32Array::from(vec![5, 6]);
1235 let float_array = Float64Array::from(vec![10.0]);
1236
1237 let type_ids = [1, 0, 0, 2, 0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1238 let offsets = [0, 0, 1, 0, 2, 1]
1239 .into_iter()
1240 .collect::<ScalarBuffer<i32>>();
1241
1242 let fields = [
1243 (0, Arc::new(Field::new("A", DataType::Utf8, false))),
1244 (1, Arc::new(Field::new("B", DataType::Int32, false))),
1245 (2, Arc::new(Field::new("C", DataType::Float64, false))),
1246 ]
1247 .into_iter()
1248 .collect::<UnionFields>();
1249 let children = [
1250 Arc::new(string_array) as Arc<dyn Array>,
1251 Arc::new(int_array),
1252 Arc::new(float_array),
1253 ]
1254 .into_iter()
1255 .collect();
1256 let array =
1257 UnionArray::try_new(fields, type_ids.clone(), Some(offsets.clone()), children).unwrap();
1258
1259 assert_eq!(*array.type_ids(), type_ids);
1261 for (i, id) in type_ids.iter().enumerate() {
1262 assert_eq!(id, &array.type_id(i));
1263 }
1264
1265 assert_eq!(*array.offsets().unwrap(), offsets);
1267 for (i, id) in offsets.iter().enumerate() {
1268 assert_eq!(*id as usize, array.value_offset(i));
1269 }
1270
1271 assert_eq!(6, array.len());
1273
1274 let slot = array.value(0);
1275 let value = slot.as_any().downcast_ref::<Int32Array>().unwrap().value(0);
1276 assert_eq!(5, value);
1277
1278 let slot = array.value(1);
1279 let value = slot
1280 .as_any()
1281 .downcast_ref::<StringArray>()
1282 .unwrap()
1283 .value(0);
1284 assert_eq!("foo", value);
1285
1286 let slot = array.value(2);
1287 let value = slot
1288 .as_any()
1289 .downcast_ref::<StringArray>()
1290 .unwrap()
1291 .value(0);
1292 assert_eq!("bar", value);
1293
1294 let slot = array.value(3);
1295 let value = slot
1296 .as_any()
1297 .downcast_ref::<Float64Array>()
1298 .unwrap()
1299 .value(0);
1300 assert_eq!(10.0, value);
1301
1302 let slot = array.value(4);
1303 let value = slot
1304 .as_any()
1305 .downcast_ref::<StringArray>()
1306 .unwrap()
1307 .value(0);
1308 assert_eq!("baz", value);
1309
1310 let slot = array.value(5);
1311 let value = slot.as_any().downcast_ref::<Int32Array>().unwrap().value(0);
1312 assert_eq!(6, value);
1313 }
1314
1315 #[test]
1316 fn test_sparse_i32() {
1317 let mut builder = UnionBuilder::new_sparse();
1318 builder.append::<Int32Type>("a", 1).unwrap();
1319 builder.append::<Int32Type>("b", 2).unwrap();
1320 builder.append::<Int32Type>("c", 3).unwrap();
1321 builder.append::<Int32Type>("a", 4).unwrap();
1322 builder.append::<Int32Type>("c", 5).unwrap();
1323 builder.append::<Int32Type>("a", 6).unwrap();
1324 builder.append::<Int32Type>("b", 7).unwrap();
1325 let union = builder.build().unwrap();
1326
1327 let expected_type_ids = vec![0_i8, 1, 2, 0, 2, 0, 1];
1328 let expected_array_values = [1_i32, 2, 3, 4, 5, 6, 7];
1329
1330 assert_eq!(*union.type_ids(), expected_type_ids);
1332 for (i, id) in expected_type_ids.iter().enumerate() {
1333 assert_eq!(id, &union.type_id(i));
1334 }
1335
1336 assert!(union.offsets().is_none());
1338
1339 assert_eq!(
1341 *union.child(0).as_primitive::<Int32Type>().values(),
1342 [1_i32, 0, 0, 4, 0, 6, 0],
1343 );
1344 assert_eq!(
1345 *union.child(1).as_primitive::<Int32Type>().values(),
1346 [0_i32, 2_i32, 0, 0, 0, 0, 7]
1347 );
1348 assert_eq!(
1349 *union.child(2).as_primitive::<Int32Type>().values(),
1350 [0_i32, 0, 3_i32, 0, 5, 0, 0]
1351 );
1352
1353 assert_eq!(expected_array_values.len(), union.len());
1354 for (i, expected_value) in expected_array_values.iter().enumerate() {
1355 assert!(!union.is_null(i));
1356 let slot = union.value(i);
1357 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1358 assert_eq!(slot.len(), 1);
1359 let value = slot.value(0);
1360 assert_eq!(expected_value, &value);
1361 }
1362 }
1363
1364 #[test]
1365 fn test_sparse_mixed() {
1366 let mut builder = UnionBuilder::new_sparse();
1367 builder.append::<Int32Type>("a", 1).unwrap();
1368 builder.append::<Float64Type>("c", 3.0).unwrap();
1369 builder.append::<Int32Type>("a", 4).unwrap();
1370 builder.append::<Float64Type>("c", 5.0).unwrap();
1371 builder.append::<Int32Type>("a", 6).unwrap();
1372 let union = builder.build().unwrap();
1373
1374 let expected_type_ids = vec![0_i8, 1, 0, 1, 0];
1375
1376 assert_eq!(*union.type_ids(), expected_type_ids);
1378 for (i, id) in expected_type_ids.iter().enumerate() {
1379 assert_eq!(id, &union.type_id(i));
1380 }
1381
1382 assert!(union.offsets().is_none());
1384
1385 for i in 0..union.len() {
1386 let slot = union.value(i);
1387 assert!(!union.is_null(i));
1388 match i {
1389 0 => {
1390 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1391 assert_eq!(slot.len(), 1);
1392 let value = slot.value(0);
1393 assert_eq!(1_i32, value);
1394 }
1395 1 => {
1396 let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
1397 assert_eq!(slot.len(), 1);
1398 let value = slot.value(0);
1399 assert_eq!(value, 3_f64);
1400 }
1401 2 => {
1402 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1403 assert_eq!(slot.len(), 1);
1404 let value = slot.value(0);
1405 assert_eq!(4_i32, value);
1406 }
1407 3 => {
1408 let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
1409 assert_eq!(slot.len(), 1);
1410 let value = slot.value(0);
1411 assert_eq!(5_f64, value);
1412 }
1413 4 => {
1414 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1415 assert_eq!(slot.len(), 1);
1416 let value = slot.value(0);
1417 assert_eq!(6_i32, value);
1418 }
1419 _ => unreachable!(),
1420 }
1421 }
1422 }
1423
1424 #[test]
1425 fn test_sparse_mixed_with_nulls() {
1426 let mut builder = UnionBuilder::new_sparse();
1427 builder.append::<Int32Type>("a", 1).unwrap();
1428 builder.append_null::<Int32Type>("a").unwrap();
1429 builder.append::<Float64Type>("c", 3.0).unwrap();
1430 builder.append::<Int32Type>("a", 4).unwrap();
1431 let union = builder.build().unwrap();
1432
1433 let expected_type_ids = vec![0_i8, 0, 1, 0];
1434
1435 assert_eq!(*union.type_ids(), expected_type_ids);
1437 for (i, id) in expected_type_ids.iter().enumerate() {
1438 assert_eq!(id, &union.type_id(i));
1439 }
1440
1441 assert!(union.offsets().is_none());
1443
1444 for i in 0..union.len() {
1445 let slot = union.value(i);
1446 match i {
1447 0 => {
1448 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1449 assert!(!slot.is_null(0));
1450 assert_eq!(slot.len(), 1);
1451 let value = slot.value(0);
1452 assert_eq!(1_i32, value);
1453 }
1454 1 => assert!(slot.is_null(0)),
1455 2 => {
1456 let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
1457 assert!(!slot.is_null(0));
1458 assert_eq!(slot.len(), 1);
1459 let value = slot.value(0);
1460 assert_eq!(value, 3_f64);
1461 }
1462 3 => {
1463 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1464 assert!(!slot.is_null(0));
1465 assert_eq!(slot.len(), 1);
1466 let value = slot.value(0);
1467 assert_eq!(4_i32, value);
1468 }
1469 _ => unreachable!(),
1470 }
1471 }
1472 }
1473
1474 #[test]
1475 fn test_sparse_mixed_with_nulls_and_offset() {
1476 let mut builder = UnionBuilder::new_sparse();
1477 builder.append::<Int32Type>("a", 1).unwrap();
1478 builder.append_null::<Int32Type>("a").unwrap();
1479 builder.append::<Float64Type>("c", 3.0).unwrap();
1480 builder.append_null::<Float64Type>("c").unwrap();
1481 builder.append::<Int32Type>("a", 4).unwrap();
1482 let union = builder.build().unwrap();
1483
1484 let slice = union.slice(1, 4);
1485 let new_union = slice.as_any().downcast_ref::<UnionArray>().unwrap();
1486
1487 assert_eq!(4, new_union.len());
1488 for i in 0..new_union.len() {
1489 let slot = new_union.value(i);
1490 match i {
1491 0 => assert!(slot.is_null(0)),
1492 1 => {
1493 let slot = slot.as_primitive::<Float64Type>();
1494 assert!(!slot.is_null(0));
1495 assert_eq!(slot.len(), 1);
1496 let value = slot.value(0);
1497 assert_eq!(value, 3_f64);
1498 }
1499 2 => assert!(slot.is_null(0)),
1500 3 => {
1501 let slot = slot.as_primitive::<Int32Type>();
1502 assert!(!slot.is_null(0));
1503 assert_eq!(slot.len(), 1);
1504 let value = slot.value(0);
1505 assert_eq!(4_i32, value);
1506 }
1507 _ => unreachable!(),
1508 }
1509 }
1510 }
1511
1512 fn test_union_validity(union_array: &UnionArray) {
1513 assert_eq!(union_array.null_count(), 0);
1514
1515 for i in 0..union_array.len() {
1516 assert!(!union_array.is_null(i));
1517 assert!(union_array.is_valid(i));
1518 }
1519 }
1520
1521 #[test]
1522 fn test_union_array_validity() {
1523 let mut builder = UnionBuilder::new_sparse();
1524 builder.append::<Int32Type>("a", 1).unwrap();
1525 builder.append_null::<Int32Type>("a").unwrap();
1526 builder.append::<Float64Type>("c", 3.0).unwrap();
1527 builder.append_null::<Float64Type>("c").unwrap();
1528 builder.append::<Int32Type>("a", 4).unwrap();
1529 let union = builder.build().unwrap();
1530
1531 test_union_validity(&union);
1532
1533 let mut builder = UnionBuilder::new_dense();
1534 builder.append::<Int32Type>("a", 1).unwrap();
1535 builder.append_null::<Int32Type>("a").unwrap();
1536 builder.append::<Float64Type>("c", 3.0).unwrap();
1537 builder.append_null::<Float64Type>("c").unwrap();
1538 builder.append::<Int32Type>("a", 4).unwrap();
1539 let union = builder.build().unwrap();
1540
1541 test_union_validity(&union);
1542 }
1543
1544 #[test]
1545 fn test_type_check() {
1546 let mut builder = UnionBuilder::new_sparse();
1547 builder.append::<Float32Type>("a", 1.0).unwrap();
1548 let err = builder.append::<Int32Type>("a", 1).unwrap_err().to_string();
1549 assert!(
1550 err.contains(
1551 "Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"
1552 ),
1553 "{}",
1554 err
1555 );
1556 }
1557
1558 #[test]
1559 fn slice_union_array() {
1560 fn create_union(mut builder: UnionBuilder) -> UnionArray {
1562 builder.append::<Int32Type>("a", 1).unwrap();
1563 builder.append_null::<Int32Type>("a").unwrap();
1564 builder.append::<Float64Type>("c", 3.0).unwrap();
1565 builder.append_null::<Float64Type>("c").unwrap();
1566 builder.append::<Int32Type>("a", 4).unwrap();
1567 builder.build().unwrap()
1568 }
1569
1570 fn create_batch(union: UnionArray) -> RecordBatch {
1571 let schema = Schema::new(vec![Field::new(
1572 "struct_array",
1573 union.data_type().clone(),
1574 true,
1575 )]);
1576
1577 RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap()
1578 }
1579
1580 fn test_slice_union(record_batch_slice: RecordBatch) {
1581 let union_slice = record_batch_slice
1582 .column(0)
1583 .as_any()
1584 .downcast_ref::<UnionArray>()
1585 .unwrap();
1586
1587 assert_eq!(union_slice.type_id(0), 0);
1588 assert_eq!(union_slice.type_id(1), 1);
1589 assert_eq!(union_slice.type_id(2), 1);
1590
1591 let slot = union_slice.value(0);
1592 let array = slot.as_primitive::<Int32Type>();
1593 assert_eq!(array.len(), 1);
1594 assert!(array.is_null(0));
1595
1596 let slot = union_slice.value(1);
1597 let array = slot.as_primitive::<Float64Type>();
1598 assert_eq!(array.len(), 1);
1599 assert!(array.is_valid(0));
1600 assert_eq!(array.value(0), 3.0);
1601
1602 let slot = union_slice.value(2);
1603 let array = slot.as_primitive::<Float64Type>();
1604 assert_eq!(array.len(), 1);
1605 assert!(array.is_null(0));
1606 }
1607
1608 let builder = UnionBuilder::new_sparse();
1610 let record_batch = create_batch(create_union(builder));
1611 let record_batch_slice = record_batch.slice(1, 3);
1613 test_slice_union(record_batch_slice);
1614
1615 let builder = UnionBuilder::new_dense();
1617 let record_batch = create_batch(create_union(builder));
1618 let record_batch_slice = record_batch.slice(1, 3);
1620 test_slice_union(record_batch_slice);
1621 }
1622
1623 #[test]
1624 fn test_custom_type_ids() {
1625 let data_type = DataType::Union(
1626 UnionFields::new(
1627 vec![8, 4, 9],
1628 vec![
1629 Field::new("strings", DataType::Utf8, false),
1630 Field::new("integers", DataType::Int32, false),
1631 Field::new("floats", DataType::Float64, false),
1632 ],
1633 ),
1634 UnionMode::Dense,
1635 );
1636
1637 let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
1638 let int_array = Int32Array::from(vec![5, 6, 4]);
1639 let float_array = Float64Array::from(vec![10.0]);
1640
1641 let type_ids = Buffer::from_vec(vec![4_i8, 8, 4, 8, 9, 4, 8]);
1642 let value_offsets = Buffer::from_vec(vec![0_i32, 0, 1, 1, 0, 2, 2]);
1643
1644 let data = ArrayData::builder(data_type)
1645 .len(7)
1646 .buffers(vec![type_ids, value_offsets])
1647 .child_data(vec![
1648 string_array.into_data(),
1649 int_array.into_data(),
1650 float_array.into_data(),
1651 ])
1652 .build()
1653 .unwrap();
1654
1655 let array = UnionArray::from(data);
1656
1657 let v = array.value(0);
1658 assert_eq!(v.data_type(), &DataType::Int32);
1659 assert_eq!(v.len(), 1);
1660 assert_eq!(v.as_primitive::<Int32Type>().value(0), 5);
1661
1662 let v = array.value(1);
1663 assert_eq!(v.data_type(), &DataType::Utf8);
1664 assert_eq!(v.len(), 1);
1665 assert_eq!(v.as_string::<i32>().value(0), "foo");
1666
1667 let v = array.value(2);
1668 assert_eq!(v.data_type(), &DataType::Int32);
1669 assert_eq!(v.len(), 1);
1670 assert_eq!(v.as_primitive::<Int32Type>().value(0), 6);
1671
1672 let v = array.value(3);
1673 assert_eq!(v.data_type(), &DataType::Utf8);
1674 assert_eq!(v.len(), 1);
1675 assert_eq!(v.as_string::<i32>().value(0), "bar");
1676
1677 let v = array.value(4);
1678 assert_eq!(v.data_type(), &DataType::Float64);
1679 assert_eq!(v.len(), 1);
1680 assert_eq!(v.as_primitive::<Float64Type>().value(0), 10.0);
1681
1682 let v = array.value(5);
1683 assert_eq!(v.data_type(), &DataType::Int32);
1684 assert_eq!(v.len(), 1);
1685 assert_eq!(v.as_primitive::<Int32Type>().value(0), 4);
1686
1687 let v = array.value(6);
1688 assert_eq!(v.data_type(), &DataType::Utf8);
1689 assert_eq!(v.len(), 1);
1690 assert_eq!(v.as_string::<i32>().value(0), "baz");
1691 }
1692
1693 #[test]
1694 fn into_parts() {
1695 let mut builder = UnionBuilder::new_dense();
1696 builder.append::<Int32Type>("a", 1).unwrap();
1697 builder.append::<Int8Type>("b", 2).unwrap();
1698 builder.append::<Int32Type>("a", 3).unwrap();
1699 let dense_union = builder.build().unwrap();
1700
1701 let field = [
1702 &Arc::new(Field::new("a", DataType::Int32, false)),
1703 &Arc::new(Field::new("b", DataType::Int8, false)),
1704 ];
1705 let (union_fields, type_ids, offsets, children) = dense_union.into_parts();
1706 assert_eq!(
1707 union_fields
1708 .iter()
1709 .map(|(_, field)| field)
1710 .collect::<Vec<_>>(),
1711 field
1712 );
1713 assert_eq!(type_ids, [0, 1, 0]);
1714 assert!(offsets.is_some());
1715 assert_eq!(offsets.as_ref().unwrap(), &[0, 0, 1]);
1716
1717 let result = UnionArray::try_new(union_fields, type_ids, offsets, children);
1718 assert!(result.is_ok());
1719 assert_eq!(result.unwrap().len(), 3);
1720
1721 let mut builder = UnionBuilder::new_sparse();
1722 builder.append::<Int32Type>("a", 1).unwrap();
1723 builder.append::<Int8Type>("b", 2).unwrap();
1724 builder.append::<Int32Type>("a", 3).unwrap();
1725 let sparse_union = builder.build().unwrap();
1726
1727 let (union_fields, type_ids, offsets, children) = sparse_union.into_parts();
1728 assert_eq!(type_ids, [0, 1, 0]);
1729 assert!(offsets.is_none());
1730
1731 let result = UnionArray::try_new(union_fields, type_ids, offsets, children);
1732 assert!(result.is_ok());
1733 assert_eq!(result.unwrap().len(), 3);
1734 }
1735
1736 #[test]
1737 fn into_parts_custom_type_ids() {
1738 let set_field_type_ids: [i8; 3] = [8, 4, 9];
1739 let data_type = DataType::Union(
1740 UnionFields::new(
1741 set_field_type_ids,
1742 [
1743 Field::new("strings", DataType::Utf8, false),
1744 Field::new("integers", DataType::Int32, false),
1745 Field::new("floats", DataType::Float64, false),
1746 ],
1747 ),
1748 UnionMode::Dense,
1749 );
1750 let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
1751 let int_array = Int32Array::from(vec![5, 6, 4]);
1752 let float_array = Float64Array::from(vec![10.0]);
1753 let type_ids = Buffer::from_vec(vec![4_i8, 8, 4, 8, 9, 4, 8]);
1754 let value_offsets = Buffer::from_vec(vec![0_i32, 0, 1, 1, 0, 2, 2]);
1755 let data = ArrayData::builder(data_type)
1756 .len(7)
1757 .buffers(vec![type_ids, value_offsets])
1758 .child_data(vec![
1759 string_array.into_data(),
1760 int_array.into_data(),
1761 float_array.into_data(),
1762 ])
1763 .build()
1764 .unwrap();
1765 let array = UnionArray::from(data);
1766
1767 let (union_fields, type_ids, offsets, children) = array.into_parts();
1768 assert_eq!(
1769 type_ids.iter().collect::<HashSet<_>>(),
1770 set_field_type_ids.iter().collect::<HashSet<_>>()
1771 );
1772 let result = UnionArray::try_new(union_fields, type_ids, offsets, children);
1773 assert!(result.is_ok());
1774 let array = result.unwrap();
1775 assert_eq!(array.len(), 7);
1776 }
1777
1778 #[test]
1779 fn test_invalid() {
1780 let fields = UnionFields::new(
1781 [3, 2],
1782 [
1783 Field::new("a", DataType::Utf8, false),
1784 Field::new("b", DataType::Utf8, false),
1785 ],
1786 );
1787 let children = vec![
1788 Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
1789 Arc::new(StringArray::from_iter_values(["c", "d"])) as _,
1790 ];
1791
1792 let type_ids = vec![3, 3, 2].into();
1793 let err =
1794 UnionArray::try_new(fields.clone(), type_ids, None, children.clone()).unwrap_err();
1795 assert_eq!(
1796 err.to_string(),
1797 "Invalid argument error: Sparse union child arrays must be equal in length to the length of the union"
1798 );
1799
1800 let type_ids = vec![1, 2].into();
1801 let err =
1802 UnionArray::try_new(fields.clone(), type_ids, None, children.clone()).unwrap_err();
1803 assert_eq!(
1804 err.to_string(),
1805 "Invalid argument error: Type Ids values must match one of the field type ids"
1806 );
1807
1808 let type_ids = vec![7, 2].into();
1809 let err = UnionArray::try_new(fields.clone(), type_ids, None, children).unwrap_err();
1810 assert_eq!(
1811 err.to_string(),
1812 "Invalid argument error: Type Ids values must match one of the field type ids"
1813 );
1814
1815 let children = vec![
1816 Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
1817 Arc::new(StringArray::from_iter_values(["c"])) as _,
1818 ];
1819 let type_ids = ScalarBuffer::from(vec![3_i8, 3, 2]);
1820 let offsets = Some(vec![0, 1, 0].into());
1821 UnionArray::try_new(fields.clone(), type_ids.clone(), offsets, children.clone()).unwrap();
1822
1823 let offsets = Some(vec![0, 1, 1].into());
1824 let err = UnionArray::try_new(fields.clone(), type_ids.clone(), offsets, children.clone())
1825 .unwrap_err();
1826
1827 assert_eq!(
1828 err.to_string(),
1829 "Invalid argument error: Offsets must be positive and within the length of the Array"
1830 );
1831
1832 let offsets = Some(vec![0, 1].into());
1833 let err =
1834 UnionArray::try_new(fields.clone(), type_ids.clone(), offsets, children).unwrap_err();
1835
1836 assert_eq!(
1837 err.to_string(),
1838 "Invalid argument error: Type Ids and Offsets lengths must match"
1839 );
1840
1841 let err = UnionArray::try_new(fields.clone(), type_ids, None, vec![]).unwrap_err();
1842
1843 assert_eq!(
1844 err.to_string(),
1845 "Invalid argument error: Union fields length must match child arrays length"
1846 );
1847 }
1848
1849 #[test]
1850 fn test_logical_nulls_fast_paths() {
1851 let array = UnionArray::try_new(UnionFields::empty(), vec![].into(), None, vec![]).unwrap();
1853
1854 assert_eq!(array.logical_nulls(), None);
1855
1856 let fields = UnionFields::new(
1857 [1, 3],
1858 [
1859 Field::new("a", DataType::Int8, false), Field::new("b", DataType::Int8, false), ],
1862 );
1863 let array = UnionArray::try_new(
1864 fields,
1865 vec![1].into(),
1866 None,
1867 vec![
1868 Arc::new(Int8Array::from_value(5, 1)),
1869 Arc::new(Int8Array::from_value(5, 1)),
1870 ],
1871 )
1872 .unwrap();
1873
1874 assert_eq!(array.logical_nulls(), None);
1875
1876 let nullable_fields = UnionFields::new(
1877 [1, 3],
1878 [
1879 Field::new("a", DataType::Int8, true), Field::new("b", DataType::Int8, true), ],
1882 );
1883 let array = UnionArray::try_new(
1884 nullable_fields.clone(),
1885 vec![1, 1].into(),
1886 None,
1887 vec![
1888 Arc::new(Int8Array::from_value(-5, 2)), Arc::new(Int8Array::from_value(-5, 2)), ],
1891 )
1892 .unwrap();
1893
1894 assert_eq!(array.logical_nulls(), None);
1895
1896 let array = UnionArray::try_new(
1897 nullable_fields.clone(),
1898 vec![1, 1].into(),
1899 None,
1900 vec![
1901 Arc::new(Int8Array::new_null(2)), Arc::new(Int8Array::new_null(2)), ],
1905 )
1906 .unwrap();
1907
1908 assert_eq!(array.logical_nulls(), Some(NullBuffer::new_null(2)));
1909
1910 let array = UnionArray::try_new(
1911 nullable_fields.clone(),
1912 vec![1, 1].into(),
1913 Some(vec![0, 1].into()),
1914 vec![
1915 Arc::new(Int8Array::new_null(3)), Arc::new(Int8Array::new_null(3)), ],
1919 )
1920 .unwrap();
1921
1922 assert_eq!(array.logical_nulls(), Some(NullBuffer::new_null(2)));
1923 }
1924
1925 #[test]
1926 fn test_dense_union_logical_nulls_gather() {
1927 let int_array = Int32Array::from(vec![1, 2]);
1929 let float_array = Float64Array::from(vec![Some(3.2), None]);
1930 let str_array = StringArray::new_null(1);
1931 let type_ids = [1, 1, 3, 3, 4, 4].into_iter().collect::<ScalarBuffer<i8>>();
1932 let offsets = [0, 1, 0, 1, 0, 0]
1933 .into_iter()
1934 .collect::<ScalarBuffer<i32>>();
1935
1936 let children = vec![
1937 Arc::new(int_array) as Arc<dyn Array>,
1938 Arc::new(float_array),
1939 Arc::new(str_array),
1940 ];
1941
1942 let array = UnionArray::try_new(union_fields(), type_ids, Some(offsets), children).unwrap();
1943
1944 let result = array.logical_nulls();
1945
1946 let expected = NullBuffer::from(vec![true, true, true, false, false, false]);
1947 assert_eq!(Some(expected), result);
1948 }
1949
1950 #[test]
1951 fn test_sparse_union_logical_nulls_mask_all_nulls_skip_one() {
1952 let fields: UnionFields = [
1954 (1, Arc::new(Field::new("A", DataType::Int32, true))),
1955 (3, Arc::new(Field::new("B", DataType::Float64, true))),
1956 ]
1957 .into_iter()
1958 .collect();
1959
1960 let int_array = Int32Array::new_null(4);
1962 let float_array = Float64Array::from(vec![None, None, Some(3.2), None]);
1963 let type_ids = [1, 1, 3, 3].into_iter().collect::<ScalarBuffer<i8>>();
1964
1965 let children = vec![Arc::new(int_array) as Arc<dyn Array>, Arc::new(float_array)];
1966
1967 let array = UnionArray::try_new(fields.clone(), type_ids, None, children).unwrap();
1968
1969 let result = array.logical_nulls();
1970
1971 let expected = NullBuffer::from(vec![false, false, true, false]);
1972 assert_eq!(Some(expected), result);
1973
1974 let len = 2 * 64 + 32;
1976
1977 let int_array = Int32Array::new_null(len);
1978 let float_array = Float64Array::from_iter([Some(3.2), None].into_iter().cycle().take(len));
1979 let type_ids = ScalarBuffer::from_iter([1, 1, 3, 3].into_iter().cycle().take(len));
1980
1981 let array = UnionArray::try_new(
1982 fields,
1983 type_ids,
1984 None,
1985 vec![Arc::new(int_array), Arc::new(float_array)],
1986 )
1987 .unwrap();
1988
1989 let result = array.logical_nulls();
1990
1991 let expected =
1992 NullBuffer::from_iter([false, false, true, false].into_iter().cycle().take(len));
1993 assert_eq!(array.len(), len);
1994 assert_eq!(Some(expected), result);
1995 }
1996
1997 #[test]
1998 fn test_sparse_union_logical_mask_mixed_nulls_skip_fully_valid() {
1999 let int_array = Int32Array::from_value(2, 6);
2001 let float_array = Float64Array::from_value(4.2, 6);
2002 let str_array = StringArray::new_null(6);
2003 let type_ids = [1, 1, 3, 3, 4, 4].into_iter().collect::<ScalarBuffer<i8>>();
2004
2005 let children = vec![
2006 Arc::new(int_array) as Arc<dyn Array>,
2007 Arc::new(float_array),
2008 Arc::new(str_array),
2009 ];
2010
2011 let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
2012
2013 let result = array.logical_nulls();
2014
2015 let expected = NullBuffer::from(vec![true, true, true, true, false, false]);
2016 assert_eq!(Some(expected), result);
2017
2018 let len = 2 * 64 + 32;
2020
2021 let int_array = Int32Array::from_value(2, len);
2022 let float_array = Float64Array::from_value(4.2, len);
2023 let str_array = StringArray::from_iter([None, Some("a")].into_iter().cycle().take(len));
2024 let type_ids = ScalarBuffer::from_iter([1, 1, 3, 3, 4, 4].into_iter().cycle().take(len));
2025
2026 let children = vec![
2027 Arc::new(int_array) as Arc<dyn Array>,
2028 Arc::new(float_array),
2029 Arc::new(str_array),
2030 ];
2031
2032 let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
2033
2034 let result = array.logical_nulls();
2035
2036 let expected = NullBuffer::from_iter(
2037 [true, true, true, true, false, true]
2038 .into_iter()
2039 .cycle()
2040 .take(len),
2041 );
2042 assert_eq!(array.len(), len);
2043 assert_eq!(Some(expected), result);
2044 }
2045
2046 #[test]
2047 fn test_sparse_union_logical_mask_mixed_nulls_skip_fully_null() {
2048 let int_array = Int32Array::new_null(6);
2050 let float_array = Float64Array::from_value(4.2, 6);
2051 let str_array = StringArray::new_null(6);
2052 let type_ids = [1, 1, 3, 3, 4, 4].into_iter().collect::<ScalarBuffer<i8>>();
2053
2054 let children = vec![
2055 Arc::new(int_array) as Arc<dyn Array>,
2056 Arc::new(float_array),
2057 Arc::new(str_array),
2058 ];
2059
2060 let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
2061
2062 let result = array.logical_nulls();
2063
2064 let expected = NullBuffer::from(vec![false, false, true, true, false, false]);
2065 assert_eq!(Some(expected), result);
2066
2067 let len = 2 * 64 + 32;
2069
2070 let int_array = Int32Array::new_null(len);
2071 let float_array = Float64Array::from_value(4.2, len);
2072 let str_array = StringArray::new_null(len);
2073 let type_ids = ScalarBuffer::from_iter([1, 1, 3, 3, 4, 4].into_iter().cycle().take(len));
2074
2075 let children = vec![
2076 Arc::new(int_array) as Arc<dyn Array>,
2077 Arc::new(float_array),
2078 Arc::new(str_array),
2079 ];
2080
2081 let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
2082
2083 let result = array.logical_nulls();
2084
2085 let expected = NullBuffer::from_iter(
2086 [false, false, true, true, false, false]
2087 .into_iter()
2088 .cycle()
2089 .take(len),
2090 );
2091 assert_eq!(array.len(), len);
2092 assert_eq!(Some(expected), result);
2093 }
2094
2095 #[test]
2096 fn test_sparse_union_logical_nulls_gather() {
2097 let n_fields = 50;
2098
2099 let non_null = Int32Array::from_value(2, 4);
2100 let mixed = Int32Array::from(vec![None, None, Some(1), None]);
2101 let fully_null = Int32Array::new_null(4);
2102
2103 let array = UnionArray::try_new(
2104 (1..)
2105 .step_by(2)
2106 .map(|i| {
2107 (
2108 i,
2109 Arc::new(Field::new(format!("f{i}"), DataType::Int32, true)),
2110 )
2111 })
2112 .take(n_fields)
2113 .collect(),
2114 vec![1, 3, 3, 5].into(),
2115 None,
2116 [
2117 Arc::new(non_null) as ArrayRef,
2118 Arc::new(mixed),
2119 Arc::new(fully_null),
2120 ]
2121 .into_iter()
2122 .cycle()
2123 .take(n_fields)
2124 .collect(),
2125 )
2126 .unwrap();
2127
2128 let result = array.logical_nulls();
2129
2130 let expected = NullBuffer::from(vec![true, false, true, false]);
2131
2132 assert_eq!(Some(expected), result);
2133 }
2134
2135 fn union_fields() -> UnionFields {
2136 [
2137 (1, Arc::new(Field::new("A", DataType::Int32, true))),
2138 (3, Arc::new(Field::new("B", DataType::Float64, true))),
2139 (4, Arc::new(Field::new("C", DataType::Utf8, true))),
2140 ]
2141 .into_iter()
2142 .collect()
2143 }
2144
2145 #[test]
2146 fn test_is_nullable() {
2147 assert!(!create_union_array(false, false).is_nullable());
2148 assert!(create_union_array(true, false).is_nullable());
2149 assert!(create_union_array(false, true).is_nullable());
2150 assert!(create_union_array(true, true).is_nullable());
2151 }
2152
2153 fn create_union_array(int_nullable: bool, float_nullable: bool) -> UnionArray {
2160 let int_array = if int_nullable {
2161 Int32Array::from(vec![Some(1), None, Some(3)])
2162 } else {
2163 Int32Array::from(vec![1, 2, 3])
2164 };
2165 let float_array = if float_nullable {
2166 Float64Array::from(vec![Some(3.2), None, Some(4.2)])
2167 } else {
2168 Float64Array::from(vec![3.2, 4.2, 5.2])
2169 };
2170 let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
2171 let offsets = [0, 0, 0].into_iter().collect::<ScalarBuffer<i32>>();
2172 let union_fields = [
2173 (0, Arc::new(Field::new("A", DataType::Int32, true))),
2174 (1, Arc::new(Field::new("B", DataType::Float64, true))),
2175 ]
2176 .into_iter()
2177 .collect::<UnionFields>();
2178
2179 let children = vec![Arc::new(int_array) as Arc<dyn Array>, Arc::new(float_array)];
2180
2181 UnionArray::try_new(union_fields, type_ids, Some(offsets), children).unwrap()
2182 }
2183}