1use crate::array::print_long_array;
19use crate::{make_array, new_null_array, Array, ArrayRef, RecordBatch};
20use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
21use arrow_data::{ArrayData, ArrayDataBuilder};
22use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
23use std::sync::Arc;
24use std::{any::Any, ops::Index};
25
26#[derive(Clone)]
77pub struct StructArray {
78 len: usize,
79 data_type: DataType,
80 nulls: Option<NullBuffer>,
81 fields: Vec<ArrayRef>,
82}
83
84impl StructArray {
85 pub fn new(fields: Fields, arrays: Vec<ArrayRef>, nulls: Option<NullBuffer>) -> Self {
91 Self::try_new(fields, arrays, nulls).unwrap()
92 }
93
94 pub fn try_new(
106 fields: Fields,
107 arrays: Vec<ArrayRef>,
108 nulls: Option<NullBuffer>,
109 ) -> Result<Self, ArrowError> {
110 if fields.len() != arrays.len() {
111 return Err(ArrowError::InvalidArgumentError(format!(
112 "Incorrect number of arrays for StructArray fields, expected {} got {}",
113 fields.len(),
114 arrays.len()
115 )));
116 }
117 let len = arrays.first().map(|x| x.len()).unwrap_or_default();
118
119 if let Some(n) = nulls.as_ref() {
120 if n.len() != len {
121 return Err(ArrowError::InvalidArgumentError(format!(
122 "Incorrect number of nulls for StructArray, expected {len} got {}",
123 n.len(),
124 )));
125 }
126 }
127
128 for (f, a) in fields.iter().zip(&arrays) {
129 if f.data_type() != a.data_type() {
130 return Err(ArrowError::InvalidArgumentError(format!(
131 "Incorrect datatype for StructArray field {:?}, expected {} got {}",
132 f.name(),
133 f.data_type(),
134 a.data_type()
135 )));
136 }
137
138 if a.len() != len {
139 return Err(ArrowError::InvalidArgumentError(format!(
140 "Incorrect array length for StructArray field {:?}, expected {} got {}",
141 f.name(),
142 len,
143 a.len()
144 )));
145 }
146
147 if !f.is_nullable() {
148 if let Some(a) = a.logical_nulls() {
149 if !nulls.as_ref().map(|n| n.contains(&a)).unwrap_or_default() {
150 return Err(ArrowError::InvalidArgumentError(format!(
151 "Found unmasked nulls for non-nullable StructArray field {:?}",
152 f.name()
153 )));
154 }
155 }
156 }
157 }
158
159 Ok(Self {
160 len,
161 data_type: DataType::Struct(fields),
162 nulls: nulls.filter(|n| n.null_count() > 0),
163 fields: arrays,
164 })
165 }
166
167 pub fn new_null(fields: Fields, len: usize) -> Self {
169 let arrays = fields
170 .iter()
171 .map(|f| new_null_array(f.data_type(), len))
172 .collect();
173
174 Self {
175 len,
176 data_type: DataType::Struct(fields),
177 nulls: Some(NullBuffer::new_null(len)),
178 fields: arrays,
179 }
180 }
181
182 pub unsafe fn new_unchecked(
188 fields: Fields,
189 arrays: Vec<ArrayRef>,
190 nulls: Option<NullBuffer>,
191 ) -> Self {
192 let len = arrays.first().map(|x| x.len()).unwrap_or_default();
193 Self {
194 len,
195 data_type: DataType::Struct(fields),
196 nulls,
197 fields: arrays,
198 }
199 }
200
201 pub fn new_empty_fields(len: usize, nulls: Option<NullBuffer>) -> Self {
207 if let Some(n) = &nulls {
208 assert_eq!(len, n.len())
209 }
210 Self {
211 len,
212 data_type: DataType::Struct(Fields::empty()),
213 fields: vec![],
214 nulls,
215 }
216 }
217
218 pub fn into_parts(self) -> (Fields, Vec<ArrayRef>, Option<NullBuffer>) {
220 let f = match self.data_type {
221 DataType::Struct(f) => f,
222 _ => unreachable!(),
223 };
224 (f, self.fields, self.nulls)
225 }
226
227 pub fn column(&self, pos: usize) -> &ArrayRef {
229 &self.fields[pos]
230 }
231
232 pub fn num_columns(&self) -> usize {
234 self.fields.len()
235 }
236
237 pub fn columns(&self) -> &[ArrayRef] {
239 &self.fields
240 }
241
242 #[deprecated(note = "Use columns().to_vec()")]
244 pub fn columns_ref(&self) -> Vec<ArrayRef> {
245 self.columns().to_vec()
246 }
247
248 pub fn column_names(&self) -> Vec<&str> {
250 match self.data_type() {
251 DataType::Struct(fields) => fields
252 .iter()
253 .map(|f| f.name().as_str())
254 .collect::<Vec<&str>>(),
255 _ => unreachable!("Struct array's data type is not struct!"),
256 }
257 }
258
259 pub fn fields(&self) -> &Fields {
261 match self.data_type() {
262 DataType::Struct(f) => f,
263 _ => unreachable!(),
264 }
265 }
266
267 pub fn column_by_name(&self, column_name: &str) -> Option<&ArrayRef> {
273 self.column_names()
274 .iter()
275 .position(|c| c == &column_name)
276 .map(|pos| self.column(pos))
277 }
278
279 pub fn slice(&self, offset: usize, len: usize) -> Self {
281 assert!(
282 offset.saturating_add(len) <= self.len,
283 "the length + offset of the sliced StructArray cannot exceed the existing length"
284 );
285
286 let fields = self.fields.iter().map(|a| a.slice(offset, len)).collect();
287
288 Self {
289 len,
290 data_type: self.data_type.clone(),
291 nulls: self.nulls.as_ref().map(|n| n.slice(offset, len)),
292 fields,
293 }
294 }
295}
296
297impl From<ArrayData> for StructArray {
298 fn from(data: ArrayData) -> Self {
299 let fields = data
300 .child_data()
301 .iter()
302 .map(|cd| make_array(cd.clone()))
303 .collect();
304
305 Self {
306 len: data.len(),
307 data_type: data.data_type().clone(),
308 nulls: data.nulls().cloned(),
309 fields,
310 }
311 }
312}
313
314impl From<StructArray> for ArrayData {
315 fn from(array: StructArray) -> Self {
316 let builder = ArrayDataBuilder::new(array.data_type)
317 .len(array.len)
318 .nulls(array.nulls)
319 .child_data(array.fields.iter().map(|x| x.to_data()).collect());
320
321 unsafe { builder.build_unchecked() }
322 }
323}
324
325impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
326 type Error = ArrowError;
327
328 fn try_from(values: Vec<(&str, ArrayRef)>) -> Result<Self, ArrowError> {
330 let (fields, arrays): (Vec<_>, _) = values
331 .into_iter()
332 .map(|(name, array)| {
333 (
334 Field::new(name, array.data_type().clone(), array.is_nullable()),
335 array,
336 )
337 })
338 .unzip();
339
340 StructArray::try_new(fields.into(), arrays, None)
341 }
342}
343
344impl Array for StructArray {
345 fn as_any(&self) -> &dyn Any {
346 self
347 }
348
349 fn to_data(&self) -> ArrayData {
350 self.clone().into()
351 }
352
353 fn into_data(self) -> ArrayData {
354 self.into()
355 }
356
357 fn data_type(&self) -> &DataType {
358 &self.data_type
359 }
360
361 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
362 Arc::new(self.slice(offset, length))
363 }
364
365 fn len(&self) -> usize {
366 self.len
367 }
368
369 fn is_empty(&self) -> bool {
370 self.len == 0
371 }
372
373 fn offset(&self) -> usize {
374 0
375 }
376
377 fn nulls(&self) -> Option<&NullBuffer> {
378 self.nulls.as_ref()
379 }
380
381 fn logical_null_count(&self) -> usize {
382 self.null_count()
384 }
385
386 fn get_buffer_memory_size(&self) -> usize {
387 let mut size = self.fields.iter().map(|a| a.get_buffer_memory_size()).sum();
388 if let Some(n) = self.nulls.as_ref() {
389 size += n.buffer().capacity();
390 }
391 size
392 }
393
394 fn get_array_memory_size(&self) -> usize {
395 let mut size = self.fields.iter().map(|a| a.get_array_memory_size()).sum();
396 size += std::mem::size_of::<Self>();
397 if let Some(n) = self.nulls.as_ref() {
398 size += n.buffer().capacity();
399 }
400 size
401 }
402}
403
404impl From<Vec<(FieldRef, ArrayRef)>> for StructArray {
405 fn from(v: Vec<(FieldRef, ArrayRef)>) -> Self {
406 let (fields, arrays): (Vec<_>, _) = v.into_iter().unzip();
407 StructArray::new(fields.into(), arrays, None)
408 }
409}
410
411impl std::fmt::Debug for StructArray {
412 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
413 writeln!(f, "StructArray")?;
414 writeln!(f, "-- validity: ")?;
415 writeln!(f, "[")?;
416 print_long_array(self, f, |_array, _index, f| write!(f, "valid"))?;
417 writeln!(f, "]\n[")?;
418 for (child_index, name) in self.column_names().iter().enumerate() {
419 let column = self.column(child_index);
420 writeln!(
421 f,
422 "-- child {}: \"{}\" ({:?})",
423 child_index,
424 name,
425 column.data_type()
426 )?;
427 std::fmt::Debug::fmt(column, f)?;
428 writeln!(f)?;
429 }
430 write!(f, "]")
431 }
432}
433
434impl From<(Vec<(FieldRef, ArrayRef)>, Buffer)> for StructArray {
435 fn from(pair: (Vec<(FieldRef, ArrayRef)>, Buffer)) -> Self {
436 let len = pair.0.first().map(|x| x.1.len()).unwrap_or_default();
437 let (fields, arrays): (Vec<_>, Vec<_>) = pair.0.into_iter().unzip();
438 let nulls = NullBuffer::new(BooleanBuffer::new(pair.1, 0, len));
439 Self::new(fields.into(), arrays, Some(nulls))
440 }
441}
442
443impl From<RecordBatch> for StructArray {
444 fn from(value: RecordBatch) -> Self {
445 Self {
446 len: value.num_rows(),
447 data_type: DataType::Struct(value.schema().fields().clone()),
448 nulls: None,
449 fields: value.columns().to_vec(),
450 }
451 }
452}
453
454impl Index<&str> for StructArray {
455 type Output = ArrayRef;
456
457 fn index(&self, name: &str) -> &Self::Output {
467 self.column_by_name(name).unwrap()
468 }
469}
470
471#[cfg(test)]
472mod tests {
473 use super::*;
474
475 use crate::{BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray};
476 use arrow_buffer::ToByteSlice;
477
478 #[test]
479 fn test_struct_array_builder() {
480 let boolean_array = BooleanArray::from(vec![false, false, true, true]);
481 let int_array = Int64Array::from(vec![42, 28, 19, 31]);
482
483 let fields = vec![
484 Field::new("a", DataType::Boolean, false),
485 Field::new("b", DataType::Int64, false),
486 ];
487 let struct_array_data = ArrayData::builder(DataType::Struct(fields.into()))
488 .len(4)
489 .add_child_data(boolean_array.to_data())
490 .add_child_data(int_array.to_data())
491 .build()
492 .unwrap();
493 let struct_array = StructArray::from(struct_array_data);
494
495 assert_eq!(struct_array.column(0).as_ref(), &boolean_array);
496 assert_eq!(struct_array.column(1).as_ref(), &int_array);
497 }
498
499 #[test]
500 fn test_struct_array_from() {
501 let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
502 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
503
504 let struct_array = StructArray::from(vec![
505 (
506 Arc::new(Field::new("b", DataType::Boolean, false)),
507 boolean.clone() as ArrayRef,
508 ),
509 (
510 Arc::new(Field::new("c", DataType::Int32, false)),
511 int.clone() as ArrayRef,
512 ),
513 ]);
514 assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
515 assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
516 assert_eq!(4, struct_array.len());
517 assert_eq!(0, struct_array.null_count());
518 assert_eq!(0, struct_array.offset());
519 }
520
521 #[test]
523 fn test_struct_array_index_access() {
524 let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
525 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
526
527 let struct_array = StructArray::from(vec![
528 (
529 Arc::new(Field::new("b", DataType::Boolean, false)),
530 boolean.clone() as ArrayRef,
531 ),
532 (
533 Arc::new(Field::new("c", DataType::Int32, false)),
534 int.clone() as ArrayRef,
535 ),
536 ]);
537 assert_eq!(struct_array["b"].as_ref(), boolean.as_ref());
538 assert_eq!(struct_array["c"].as_ref(), int.as_ref());
539 }
540
541 #[test]
543 fn test_struct_array_from_vec() {
544 let strings: ArrayRef = Arc::new(StringArray::from(vec![
545 Some("joe"),
546 None,
547 None,
548 Some("mark"),
549 ]));
550 let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
551
552 let arr =
553 StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]).unwrap();
554
555 let struct_data = arr.into_data();
556 assert_eq!(4, struct_data.len());
557 assert_eq!(0, struct_data.null_count());
558
559 let expected_string_data = ArrayData::builder(DataType::Utf8)
560 .len(4)
561 .null_bit_buffer(Some(Buffer::from(&[9_u8])))
562 .add_buffer(Buffer::from([0, 3, 3, 3, 7].to_byte_slice()))
563 .add_buffer(Buffer::from(b"joemark"))
564 .build()
565 .unwrap();
566
567 let expected_int_data = ArrayData::builder(DataType::Int32)
568 .len(4)
569 .null_bit_buffer(Some(Buffer::from(&[11_u8])))
570 .add_buffer(Buffer::from([1, 2, 0, 4].to_byte_slice()))
571 .build()
572 .unwrap();
573
574 assert_eq!(expected_string_data, struct_data.child_data()[0]);
575 assert_eq!(expected_int_data, struct_data.child_data()[1]);
576 }
577
578 #[test]
579 fn test_struct_array_from_vec_error() {
580 let strings: ArrayRef = Arc::new(StringArray::from(vec![
581 Some("joe"),
582 None,
583 None,
584 ]));
586 let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
587
588 let err = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
589 .unwrap_err()
590 .to_string();
591
592 assert_eq!(
593 err,
594 "Invalid argument error: Incorrect array length for StructArray field \"f2\", expected 3 got 4"
595 )
596 }
597
598 #[test]
599 #[should_panic(
600 expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
601 )]
602 fn test_struct_array_from_mismatched_types_single() {
603 drop(StructArray::from(vec![(
604 Arc::new(Field::new("b", DataType::Int16, false)),
605 Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
606 )]));
607 }
608
609 #[test]
610 #[should_panic(
611 expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
612 )]
613 fn test_struct_array_from_mismatched_types_multiple() {
614 drop(StructArray::from(vec![
615 (
616 Arc::new(Field::new("b", DataType::Int16, false)),
617 Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
618 ),
619 (
620 Arc::new(Field::new("c", DataType::Utf8, false)),
621 Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
622 ),
623 ]));
624 }
625
626 #[test]
627 fn test_struct_array_slice() {
628 let boolean_data = ArrayData::builder(DataType::Boolean)
629 .len(5)
630 .add_buffer(Buffer::from([0b00010000]))
631 .null_bit_buffer(Some(Buffer::from([0b00010001])))
632 .build()
633 .unwrap();
634 let int_data = ArrayData::builder(DataType::Int32)
635 .len(5)
636 .add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice()))
637 .null_bit_buffer(Some(Buffer::from([0b00000110])))
638 .build()
639 .unwrap();
640
641 let field_types = vec![
642 Field::new("a", DataType::Boolean, true),
643 Field::new("b", DataType::Int32, true),
644 ];
645 let struct_array_data = ArrayData::builder(DataType::Struct(field_types.into()))
646 .len(5)
647 .add_child_data(boolean_data.clone())
648 .add_child_data(int_data.clone())
649 .null_bit_buffer(Some(Buffer::from([0b00010111])))
650 .build()
651 .unwrap();
652 let struct_array = StructArray::from(struct_array_data);
653
654 assert_eq!(5, struct_array.len());
655 assert_eq!(1, struct_array.null_count());
656 assert!(struct_array.is_valid(0));
657 assert!(struct_array.is_valid(1));
658 assert!(struct_array.is_valid(2));
659 assert!(struct_array.is_null(3));
660 assert!(struct_array.is_valid(4));
661 assert_eq!(boolean_data, struct_array.column(0).to_data());
662 assert_eq!(int_data, struct_array.column(1).to_data());
663
664 let c0 = struct_array.column(0);
665 let c0 = c0.as_any().downcast_ref::<BooleanArray>().unwrap();
666 assert_eq!(5, c0.len());
667 assert_eq!(3, c0.null_count());
668 assert!(c0.is_valid(0));
669 assert!(!c0.value(0));
670 assert!(c0.is_null(1));
671 assert!(c0.is_null(2));
672 assert!(c0.is_null(3));
673 assert!(c0.is_valid(4));
674 assert!(c0.value(4));
675
676 let c1 = struct_array.column(1);
677 let c1 = c1.as_any().downcast_ref::<Int32Array>().unwrap();
678 assert_eq!(5, c1.len());
679 assert_eq!(3, c1.null_count());
680 assert!(c1.is_null(0));
681 assert!(c1.is_valid(1));
682 assert_eq!(28, c1.value(1));
683 assert!(c1.is_valid(2));
684 assert_eq!(42, c1.value(2));
685 assert!(c1.is_null(3));
686 assert!(c1.is_null(4));
687
688 let sliced_array = struct_array.slice(2, 3);
689 let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
690 assert_eq!(3, sliced_array.len());
691 assert_eq!(1, sliced_array.null_count());
692 assert!(sliced_array.is_valid(0));
693 assert!(sliced_array.is_null(1));
694 assert!(sliced_array.is_valid(2));
695
696 let sliced_c0 = sliced_array.column(0);
697 let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
698 assert_eq!(3, sliced_c0.len());
699 assert!(sliced_c0.is_null(0));
700 assert!(sliced_c0.is_null(1));
701 assert!(sliced_c0.is_valid(2));
702 assert!(sliced_c0.value(2));
703
704 let sliced_c1 = sliced_array.column(1);
705 let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
706 assert_eq!(3, sliced_c1.len());
707 assert!(sliced_c1.is_valid(0));
708 assert_eq!(42, sliced_c1.value(0));
709 assert!(sliced_c1.is_null(1));
710 assert!(sliced_c1.is_null(2));
711 }
712
713 #[test]
714 #[should_panic(
715 expected = "Incorrect array length for StructArray field \\\"c\\\", expected 1 got 2"
716 )]
717 fn test_invalid_struct_child_array_lengths() {
718 drop(StructArray::from(vec![
719 (
720 Arc::new(Field::new("b", DataType::Float32, false)),
721 Arc::new(Float32Array::from(vec![1.1])) as Arc<dyn Array>,
722 ),
723 (
724 Arc::new(Field::new("c", DataType::Float64, false)),
725 Arc::new(Float64Array::from(vec![2.2, 3.3])),
726 ),
727 ]));
728 }
729
730 #[test]
731 fn test_struct_array_from_empty() {
732 let sa = StructArray::from(vec![]);
733 assert!(sa.is_empty())
734 }
735
736 #[test]
737 #[should_panic(expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\"")]
738 fn test_struct_array_from_mismatched_nullability() {
739 drop(StructArray::from(vec![(
740 Arc::new(Field::new("c", DataType::Int32, false)),
741 Arc::new(Int32Array::from(vec![Some(42), None, Some(19)])) as ArrayRef,
742 )]));
743 }
744
745 #[test]
746 fn test_struct_array_fmt_debug() {
747 let arr: StructArray = StructArray::new(
748 vec![Arc::new(Field::new("c", DataType::Int32, true))].into(),
749 vec![Arc::new(Int32Array::from((0..30).collect::<Vec<_>>())) as ArrayRef],
750 Some(NullBuffer::new(BooleanBuffer::from(
751 (0..30).map(|i| i % 2 == 0).collect::<Vec<_>>(),
752 ))),
753 );
754 assert_eq!(format!("{arr:?}"), "StructArray\n-- validity: \n[\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n ...10 elements...,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n]\n[\n-- child 0: \"c\" (Int32)\nPrimitiveArray<Int32>\n[\n 0,\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n 7,\n 8,\n 9,\n ...10 elements...,\n 20,\n 21,\n 22,\n 23,\n 24,\n 25,\n 26,\n 27,\n 28,\n 29,\n]\n]")
755 }
756}