arrow_array/array/
list_view_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow_buffer::{NullBuffer, ScalarBuffer};
19use arrow_data::{ArrayData, ArrayDataBuilder};
20use arrow_schema::{ArrowError, DataType, FieldRef};
21use std::any::Any;
22use std::ops::Add;
23use std::sync::Arc;
24
25use crate::array::{make_array, print_long_array};
26use crate::iterator::GenericListViewArrayIter;
27use crate::{new_empty_array, Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait};
28
29/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
30pub type ListViewArray = GenericListViewArray<i32>;
31
32/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i64`.
33pub type LargeListViewArray = GenericListViewArray<i64>;
34
35///
36/// Different from [`crate::GenericListArray`] as it stores both an offset and length
37/// meaning that take / filter operations can be implemented without copying the underlying data.
38///
39/// [Variable-size List Layout: ListView Layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout
40#[derive(Clone)]
41pub struct GenericListViewArray<OffsetSize: OffsetSizeTrait> {
42    data_type: DataType,
43    nulls: Option<NullBuffer>,
44    values: ArrayRef,
45    value_offsets: ScalarBuffer<OffsetSize>,
46    value_sizes: ScalarBuffer<OffsetSize>,
47}
48
49impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
50    /// The data type constructor of listview array.
51    /// The input is the schema of the child array and
52    /// the output is the [`DataType`], ListView or LargeListView.
53    pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
54        DataType::LargeListView
55    } else {
56        DataType::ListView
57    };
58
59    /// Create a new [`GenericListViewArray`] from the provided parts
60    ///
61    /// # Errors
62    ///
63    /// Errors if
64    ///
65    /// * `offsets.len() != sizes.len()`
66    /// * `offsets.len() != nulls.len()`
67    /// * `offsets[i] > values.len()`
68    /// * `!field.is_nullable() && values.is_nullable()`
69    /// * `field.data_type() != values.data_type()`
70    /// * `0 <= offsets[i] <= length of the child array`
71    /// * `0 <= offsets[i] + size[i] <= length of the child array`
72    pub fn try_new(
73        field: FieldRef,
74        offsets: ScalarBuffer<OffsetSize>,
75        sizes: ScalarBuffer<OffsetSize>,
76        values: ArrayRef,
77        nulls: Option<NullBuffer>,
78    ) -> Result<Self, ArrowError> {
79        let len = offsets.len();
80        if let Some(n) = nulls.as_ref() {
81            if n.len() != len {
82                return Err(ArrowError::InvalidArgumentError(format!(
83                    "Incorrect length of null buffer for {}ListViewArray, expected {len} got {}",
84                    OffsetSize::PREFIX,
85                    n.len(),
86                )));
87            }
88        }
89        if len != sizes.len() {
90            return Err(ArrowError::InvalidArgumentError(format!(
91                "Length of offsets buffer and sizes buffer must be equal for {}ListViewArray, got {len} and {}",
92                OffsetSize::PREFIX, sizes.len()
93            )));
94        }
95
96        for (offset, size) in offsets.iter().zip(sizes.iter()) {
97            let offset = offset.as_usize();
98            let size = size.as_usize();
99            if offset.checked_add(size).ok_or_else(|| {
100                ArrowError::InvalidArgumentError(format!(
101                    "Overflow in offset + size for {}ListViewArray",
102                    OffsetSize::PREFIX
103                ))
104            })? > values.len()
105            {
106                return Err(ArrowError::InvalidArgumentError(format!(
107                    "Offset + size for {}ListViewArray must be within the bounds of the child array, got offset: {offset}, size: {size}, child array length: {}",
108                    OffsetSize::PREFIX,
109                    values.len()
110                )));
111            }
112        }
113
114        if !field.is_nullable() && values.is_nullable() {
115            return Err(ArrowError::InvalidArgumentError(format!(
116                "Non-nullable field of {}ListViewArray {:?} cannot contain nulls",
117                OffsetSize::PREFIX,
118                field.name()
119            )));
120        }
121
122        if field.data_type() != values.data_type() {
123            return Err(ArrowError::InvalidArgumentError(format!(
124                "{}ListViewArray expected data type {} got {} for {:?}",
125                OffsetSize::PREFIX,
126                field.data_type(),
127                values.data_type(),
128                field.name()
129            )));
130        }
131
132        Ok(Self {
133            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
134            nulls,
135            values,
136            value_offsets: offsets,
137            value_sizes: sizes,
138        })
139    }
140
141    /// Create a new [`GenericListViewArray`] from the provided parts
142    ///
143    /// # Panics
144    ///
145    /// Panics if [`Self::try_new`] returns an error
146    pub fn new(
147        field: FieldRef,
148        offsets: ScalarBuffer<OffsetSize>,
149        sizes: ScalarBuffer<OffsetSize>,
150        values: ArrayRef,
151        nulls: Option<NullBuffer>,
152    ) -> Self {
153        Self::try_new(field, offsets, sizes, values, nulls).unwrap()
154    }
155
156    /// Create a new [`GenericListViewArray`] of length `len` where all values are null
157    pub fn new_null(field: FieldRef, len: usize) -> Self {
158        let values = new_empty_array(field.data_type());
159        Self {
160            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
161            nulls: Some(NullBuffer::new_null(len)),
162            value_offsets: ScalarBuffer::from(vec![]),
163            value_sizes: ScalarBuffer::from(vec![]),
164            values,
165        }
166    }
167
168    /// Deconstruct this array into its constituent parts
169    pub fn into_parts(
170        self,
171    ) -> (
172        FieldRef,
173        ScalarBuffer<OffsetSize>,
174        ScalarBuffer<OffsetSize>,
175        ArrayRef,
176        Option<NullBuffer>,
177    ) {
178        let f = match self.data_type {
179            DataType::ListView(f) | DataType::LargeListView(f) => f,
180            _ => unreachable!(),
181        };
182        (
183            f,
184            self.value_offsets,
185            self.value_sizes,
186            self.values,
187            self.nulls,
188        )
189    }
190
191    /// Returns a reference to the offsets of this list
192    ///
193    /// Unlike [`Self::value_offsets`] this returns the [`ScalarBuffer`]
194    /// allowing for zero-copy cloning
195    #[inline]
196    pub fn offsets(&self) -> &ScalarBuffer<OffsetSize> {
197        &self.value_offsets
198    }
199
200    /// Returns a reference to the values of this list
201    #[inline]
202    pub fn values(&self) -> &ArrayRef {
203        &self.values
204    }
205
206    /// Returns a reference to the sizes of this list
207    ///
208    /// Unlike [`Self::value_sizes`] this returns the [`ScalarBuffer`]
209    /// allowing for zero-copy cloning
210    #[inline]
211    pub fn sizes(&self) -> &ScalarBuffer<OffsetSize> {
212        &self.value_sizes
213    }
214
215    /// Returns a clone of the value type of this list.
216    pub fn value_type(&self) -> DataType {
217        self.values.data_type().clone()
218    }
219
220    /// Returns ith value of this list view array.
221    /// # Safety
222    /// Caller must ensure that the index is within the array bounds
223    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
224        let offset = self.value_offsets().get_unchecked(i).as_usize();
225        let length = self.value_sizes().get_unchecked(i).as_usize();
226        self.values.slice(offset, length)
227    }
228
229    /// Returns ith value of this list view array.
230    /// # Panics
231    /// Panics if the index is out of bounds
232    pub fn value(&self, i: usize) -> ArrayRef {
233        let offset = self.value_offsets()[i].as_usize();
234        let length = self.value_sizes()[i].as_usize();
235        self.values.slice(offset, length)
236    }
237
238    /// Returns the offset values in the offsets buffer
239    #[inline]
240    pub fn value_offsets(&self) -> &[OffsetSize] {
241        &self.value_offsets
242    }
243
244    /// Returns the sizes values in the offsets buffer
245    #[inline]
246    pub fn value_sizes(&self) -> &[OffsetSize] {
247        &self.value_sizes
248    }
249
250    /// Returns the size for value at index `i`.
251    #[inline]
252    pub fn value_size(&self, i: usize) -> OffsetSize {
253        self.value_sizes[i]
254    }
255
256    /// Returns the offset for value at index `i`.
257    pub fn value_offset(&self, i: usize) -> OffsetSize {
258        self.value_offsets[i]
259    }
260
261    /// Constructs a new iterator
262    pub fn iter(&self) -> GenericListViewArrayIter<'_, OffsetSize> {
263        GenericListViewArrayIter::<'_, OffsetSize>::new(self)
264    }
265
266    #[inline]
267    fn get_type(data_type: &DataType) -> Option<&DataType> {
268        match (OffsetSize::IS_LARGE, data_type) {
269            (true, DataType::LargeListView(child)) | (false, DataType::ListView(child)) => {
270                Some(child.data_type())
271            }
272            _ => None,
273        }
274    }
275
276    /// Returns a zero-copy slice of this array with the indicated offset and length.
277    pub fn slice(&self, offset: usize, length: usize) -> Self {
278        Self {
279            data_type: self.data_type.clone(),
280            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
281            values: self.values.clone(),
282            value_offsets: self.value_offsets.slice(offset, length),
283            value_sizes: self.value_sizes.slice(offset, length),
284        }
285    }
286}
287
288impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListViewArray<OffsetSize> {
289    type Item = ArrayRef;
290
291    fn value(&self, index: usize) -> Self::Item {
292        GenericListViewArray::value(self, index)
293    }
294
295    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
296        GenericListViewArray::value_unchecked(self, index)
297    }
298}
299
300impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
301    fn as_any(&self) -> &dyn Any {
302        self
303    }
304
305    fn to_data(&self) -> ArrayData {
306        self.clone().into()
307    }
308
309    fn into_data(self) -> ArrayData {
310        self.into()
311    }
312
313    fn data_type(&self) -> &DataType {
314        &self.data_type
315    }
316
317    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
318        Arc::new(self.slice(offset, length))
319    }
320
321    fn len(&self) -> usize {
322        self.sizes().len()
323    }
324
325    fn is_empty(&self) -> bool {
326        self.value_sizes.is_empty()
327    }
328
329    fn offset(&self) -> usize {
330        0
331    }
332
333    fn nulls(&self) -> Option<&NullBuffer> {
334        self.nulls.as_ref()
335    }
336
337    fn logical_null_count(&self) -> usize {
338        // More efficient that the default implementation
339        self.null_count()
340    }
341
342    fn get_buffer_memory_size(&self) -> usize {
343        let mut size = self.values.get_buffer_memory_size();
344        size += self.value_offsets.inner().capacity();
345        size += self.value_sizes.inner().capacity();
346        if let Some(n) = self.nulls.as_ref() {
347            size += n.buffer().capacity();
348        }
349        size
350    }
351
352    fn get_array_memory_size(&self) -> usize {
353        let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
354        size += self.value_offsets.inner().capacity();
355        size += self.value_sizes.inner().capacity();
356        if let Some(n) = self.nulls.as_ref() {
357            size += n.buffer().capacity();
358        }
359        size
360    }
361}
362
363impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<OffsetSize> {
364    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
365        let prefix = OffsetSize::PREFIX;
366        write!(f, "{prefix}ListViewArray\n[\n")?;
367        print_long_array(self, f, |array, index, f| {
368            std::fmt::Debug::fmt(&array.value(index), f)
369        })?;
370        write!(f, "]")
371    }
372}
373
374impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData {
375    fn from(array: GenericListViewArray<OffsetSize>) -> Self {
376        let len = array.len();
377        let builder = ArrayDataBuilder::new(array.data_type)
378            .len(len)
379            .nulls(array.nulls)
380            .buffers(vec![
381                array.value_offsets.into_inner(),
382                array.value_sizes.into_inner(),
383            ])
384            .child_data(vec![array.values.to_data()]);
385
386        unsafe { builder.build_unchecked() }
387    }
388}
389
390impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListViewArray<OffsetSize> {
391    fn from(data: ArrayData) -> Self {
392        Self::try_new_from_array_data(data)
393            .expect("Expected infallible creation of GenericListViewArray from ArrayDataRef failed")
394    }
395}
396
397impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewArray<OffsetSize> {
398    fn from(value: FixedSizeListArray) -> Self {
399        let (field, size) = match value.data_type() {
400            DataType::FixedSizeList(f, size) => (f, *size as usize),
401            _ => unreachable!(),
402        };
403        let mut acc = 0_usize;
404        let iter = std::iter::repeat(size).take(value.len());
405        let mut sizes = Vec::with_capacity(iter.size_hint().0);
406        let mut offsets = Vec::with_capacity(iter.size_hint().0);
407
408        for size in iter {
409            offsets.push(OffsetSize::usize_as(acc));
410            acc = acc.add(size);
411            sizes.push(OffsetSize::usize_as(size));
412        }
413        let sizes = ScalarBuffer::from(sizes);
414        let offsets = ScalarBuffer::from(offsets);
415        Self {
416            data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
417            nulls: value.nulls().cloned(),
418            values: value.values().clone(),
419            value_offsets: offsets,
420            value_sizes: sizes,
421        }
422    }
423}
424
425impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
426    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
427        if data.buffers().len() != 2 {
428            return Err(ArrowError::InvalidArgumentError(format!(
429                "ListViewArray data should contain two buffers (value offsets & value sizes), had {}",
430                data.buffers().len()
431            )));
432        }
433
434        if data.child_data().len() != 1 {
435            return Err(ArrowError::InvalidArgumentError(format!(
436                "ListViewArray should contain a single child array (values array), had {}",
437                data.child_data().len()
438            )));
439        }
440
441        let values = data.child_data()[0].clone();
442
443        if let Some(child_data_type) = Self::get_type(data.data_type()) {
444            if values.data_type() != child_data_type {
445                return Err(ArrowError::InvalidArgumentError(format!(
446                    "{}ListViewArray's child datatype {:?} does not \
447                             correspond to the List's datatype {:?}",
448                    OffsetSize::PREFIX,
449                    values.data_type(),
450                    child_data_type
451                )));
452            }
453        } else {
454            return Err(ArrowError::InvalidArgumentError(format!(
455                "{}ListViewArray's datatype must be {}ListViewArray(). It is {:?}",
456                OffsetSize::PREFIX,
457                OffsetSize::PREFIX,
458                data.data_type()
459            )));
460        }
461
462        let values = make_array(values);
463        // ArrayData is valid, and verified type above
464        let value_offsets = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
465        let value_sizes = ScalarBuffer::new(data.buffers()[1].clone(), data.offset(), data.len());
466
467        Ok(Self {
468            data_type: data.data_type().clone(),
469            nulls: data.nulls().cloned(),
470            values,
471            value_offsets,
472            value_sizes,
473        })
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    use arrow_buffer::{bit_util, BooleanBuffer, Buffer, ScalarBuffer};
480    use arrow_schema::Field;
481
482    use crate::builder::{FixedSizeListBuilder, Int32Builder};
483    use crate::cast::AsArray;
484    use crate::types::Int32Type;
485    use crate::{Int32Array, Int64Array};
486
487    use super::*;
488
489    #[test]
490    fn test_empty_list_view_array() {
491        // Construct an empty value array
492        let vec: Vec<i32> = vec![];
493        let field = Arc::new(Field::new("item", DataType::Int32, true));
494        let sizes = ScalarBuffer::from(vec![]);
495        let offsets = ScalarBuffer::from(vec![]);
496        let values = Int32Array::from(vec);
497        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
498
499        assert_eq!(list_array.len(), 0)
500    }
501
502    #[test]
503    fn test_list_view_array() {
504        // Construct a value array
505        let value_data = ArrayData::builder(DataType::Int32)
506            .len(8)
507            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
508            .build()
509            .unwrap();
510
511        let field = Arc::new(Field::new("item", DataType::Int32, true));
512        let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
513        let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
514        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
515        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
516
517        let values = list_array.values();
518        assert_eq!(value_data, values.to_data());
519        assert_eq!(DataType::Int32, list_array.value_type());
520        assert_eq!(3, list_array.len());
521        assert_eq!(0, list_array.null_count());
522        assert_eq!(6, list_array.value_offsets()[2]);
523        assert_eq!(2, list_array.value_sizes()[2]);
524        assert_eq!(2, list_array.value_size(2));
525        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
526        assert_eq!(
527            0,
528            unsafe { list_array.value_unchecked(0) }
529                .as_primitive::<Int32Type>()
530                .value(0)
531        );
532        for i in 0..3 {
533            assert!(list_array.is_valid(i));
534            assert!(!list_array.is_null(i));
535        }
536    }
537
538    #[test]
539    fn test_large_list_view_array() {
540        // Construct a value array
541        let value_data = ArrayData::builder(DataType::Int32)
542            .len(8)
543            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
544            .build()
545            .unwrap();
546
547        let field = Arc::new(Field::new("item", DataType::Int32, true));
548        let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
549        let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
550        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
551        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
552
553        let values = list_array.values();
554        assert_eq!(value_data, values.to_data());
555        assert_eq!(DataType::Int32, list_array.value_type());
556        assert_eq!(3, list_array.len());
557        assert_eq!(0, list_array.null_count());
558        assert_eq!(6, list_array.value_offsets()[2]);
559        assert_eq!(2, list_array.value_sizes()[2]);
560        assert_eq!(2, list_array.value_size(2));
561        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
562        assert_eq!(
563            0,
564            unsafe { list_array.value_unchecked(0) }
565                .as_primitive::<Int32Type>()
566                .value(0)
567        );
568        for i in 0..3 {
569            assert!(list_array.is_valid(i));
570            assert!(!list_array.is_null(i));
571        }
572    }
573
574    #[test]
575    fn test_list_view_array_slice() {
576        // Construct a value array
577        let value_data = ArrayData::builder(DataType::Int32)
578            .len(10)
579            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
580            .build()
581            .unwrap();
582
583        // 01011001 00000001
584        let mut null_bits: [u8; 2] = [0; 2];
585        bit_util::set_bit(&mut null_bits, 0);
586        bit_util::set_bit(&mut null_bits, 3);
587        bit_util::set_bit(&mut null_bits, 4);
588        bit_util::set_bit(&mut null_bits, 6);
589        bit_util::set_bit(&mut null_bits, 8);
590        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
591        let null_buffer = NullBuffer::new(buffer);
592
593        let field = Arc::new(Field::new("item", DataType::Int32, true));
594        let sizes = ScalarBuffer::from(vec![2, 0, 0, 2, 2, 0, 3, 0, 1]);
595        let offsets = ScalarBuffer::from(vec![0, 2, 2, 2, 4, 6, 6, 9, 9]);
596        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
597        let list_array =
598            ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
599
600        let values = list_array.values();
601        assert_eq!(value_data, values.to_data());
602        assert_eq!(DataType::Int32, list_array.value_type());
603        assert_eq!(9, list_array.len());
604        assert_eq!(4, list_array.null_count());
605        assert_eq!(2, list_array.value_offsets()[3]);
606        assert_eq!(2, list_array.value_sizes()[3]);
607        assert_eq!(2, list_array.value_size(3));
608
609        let sliced_array = list_array.slice(1, 6);
610        assert_eq!(6, sliced_array.len());
611        assert_eq!(3, sliced_array.null_count());
612
613        for i in 0..sliced_array.len() {
614            if bit_util::get_bit(&null_bits, 1 + i) {
615                assert!(sliced_array.is_valid(i));
616            } else {
617                assert!(sliced_array.is_null(i));
618            }
619        }
620
621        // Check offset and length for each non-null value.
622        let sliced_list_array = sliced_array
623            .as_any()
624            .downcast_ref::<ListViewArray>()
625            .unwrap();
626        assert_eq!(2, sliced_list_array.value_offsets()[2]);
627        assert_eq!(2, sliced_list_array.value_sizes()[2]);
628        assert_eq!(2, sliced_list_array.value_size(2));
629
630        assert_eq!(4, sliced_list_array.value_offsets()[3]);
631        assert_eq!(2, sliced_list_array.value_sizes()[3]);
632        assert_eq!(2, sliced_list_array.value_size(3));
633
634        assert_eq!(6, sliced_list_array.value_offsets()[5]);
635        assert_eq!(3, sliced_list_array.value_sizes()[5]);
636        assert_eq!(3, sliced_list_array.value_size(5));
637    }
638
639    #[test]
640    fn test_large_list_view_array_slice() {
641        // Construct a value array
642        let value_data = ArrayData::builder(DataType::Int32)
643            .len(10)
644            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
645            .build()
646            .unwrap();
647
648        // 01011001 00000001
649        let mut null_bits: [u8; 2] = [0; 2];
650        bit_util::set_bit(&mut null_bits, 0);
651        bit_util::set_bit(&mut null_bits, 3);
652        bit_util::set_bit(&mut null_bits, 4);
653        bit_util::set_bit(&mut null_bits, 6);
654        bit_util::set_bit(&mut null_bits, 8);
655        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
656        let null_buffer = NullBuffer::new(buffer);
657
658        // Construct a large list view array from the above two
659        let field = Arc::new(Field::new("item", DataType::Int32, true));
660        let sizes = ScalarBuffer::from(vec![2i64, 0, 0, 2, 2, 0, 3, 0, 1]);
661        let offsets = ScalarBuffer::from(vec![0i64, 2, 2, 2, 4, 6, 6, 9, 9]);
662        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
663        let list_array =
664            LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
665
666        let values = list_array.values();
667        assert_eq!(value_data, values.to_data());
668        assert_eq!(DataType::Int32, list_array.value_type());
669        assert_eq!(9, list_array.len());
670        assert_eq!(4, list_array.null_count());
671        assert_eq!(2, list_array.value_offsets()[3]);
672        assert_eq!(2, list_array.value_sizes()[3]);
673        assert_eq!(2, list_array.value_size(3));
674
675        let sliced_array = list_array.slice(1, 6);
676        assert_eq!(6, sliced_array.len());
677        assert_eq!(3, sliced_array.null_count());
678
679        for i in 0..sliced_array.len() {
680            if bit_util::get_bit(&null_bits, 1 + i) {
681                assert!(sliced_array.is_valid(i));
682            } else {
683                assert!(sliced_array.is_null(i));
684            }
685        }
686
687        // Check offset and length for each non-null value.
688        let sliced_list_array = sliced_array
689            .as_any()
690            .downcast_ref::<LargeListViewArray>()
691            .unwrap();
692        assert_eq!(2, sliced_list_array.value_offsets()[2]);
693        assert_eq!(2, sliced_list_array.value_size(2));
694        assert_eq!(2, sliced_list_array.value_sizes()[2]);
695
696        assert_eq!(4, sliced_list_array.value_offsets()[3]);
697        assert_eq!(2, sliced_list_array.value_size(3));
698        assert_eq!(2, sliced_list_array.value_sizes()[3]);
699
700        assert_eq!(6, sliced_list_array.value_offsets()[5]);
701        assert_eq!(3, sliced_list_array.value_size(5));
702        assert_eq!(2, sliced_list_array.value_sizes()[3]);
703    }
704
705    #[test]
706    #[should_panic(expected = "index out of bounds: the len is 9 but the index is 10")]
707    fn test_list_view_array_index_out_of_bound() {
708        // 01011001 00000001
709        let mut null_bits: [u8; 2] = [0; 2];
710        bit_util::set_bit(&mut null_bits, 0);
711        bit_util::set_bit(&mut null_bits, 3);
712        bit_util::set_bit(&mut null_bits, 4);
713        bit_util::set_bit(&mut null_bits, 6);
714        bit_util::set_bit(&mut null_bits, 8);
715        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
716        let null_buffer = NullBuffer::new(buffer);
717
718        // Construct a buffer for value offsets, for the nested array:
719        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
720        // Construct a list array from the above two
721        let field = Arc::new(Field::new("item", DataType::Int32, true));
722        let sizes = ScalarBuffer::from(vec![2i32, 0, 0, 2, 2, 0, 3, 0, 1]);
723        let offsets = ScalarBuffer::from(vec![0i32, 2, 2, 2, 4, 6, 6, 9, 9]);
724        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
725        let list_array =
726            ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
727
728        assert_eq!(9, list_array.len());
729        list_array.value(10);
730    }
731    #[test]
732    #[should_panic(
733        expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 0"
734    )]
735    #[cfg(not(feature = "force_validate"))]
736    fn test_list_view_array_invalid_buffer_len() {
737        let value_data = unsafe {
738            ArrayData::builder(DataType::Int32)
739                .len(8)
740                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
741                .build_unchecked()
742        };
743        let list_data_type =
744            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
745        let list_data = unsafe {
746            ArrayData::builder(list_data_type)
747                .len(3)
748                .add_child_data(value_data)
749                .build_unchecked()
750        };
751        drop(ListViewArray::from(list_data));
752    }
753
754    #[test]
755    #[should_panic(
756        expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 1"
757    )]
758    #[cfg(not(feature = "force_validate"))]
759    fn test_list_view_array_invalid_child_array_len() {
760        let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
761        let list_data_type =
762            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
763        let list_data = unsafe {
764            ArrayData::builder(list_data_type)
765                .len(3)
766                .add_buffer(value_offsets)
767                .build_unchecked()
768        };
769        drop(ListViewArray::from(list_data));
770    }
771
772    #[test]
773    fn test_list_view_array_offsets_need_not_start_at_zero() {
774        let field = Arc::new(Field::new("item", DataType::Int32, true));
775        let sizes = ScalarBuffer::from(vec![0i32, 0, 3]);
776        let offsets = ScalarBuffer::from(vec![2i32, 2, 5]);
777        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
778        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
779
780        assert_eq!(list_array.value_size(0), 0);
781        assert_eq!(list_array.value_size(1), 0);
782        assert_eq!(list_array.value_size(2), 3);
783    }
784
785    #[test]
786    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
787    #[cfg(not(feature = "force_validate"))]
788    fn test_list_view_array_alignment() {
789        let offset_buf = Buffer::from_slice_ref([0_u64]);
790        let offset_buf2 = offset_buf.slice(1);
791
792        let size_buf = Buffer::from_slice_ref([0_u64]);
793        let size_buf2 = size_buf.slice(1);
794
795        let values: [i32; 8] = [0; 8];
796        let value_data = unsafe {
797            ArrayData::builder(DataType::Int32)
798                .add_buffer(Buffer::from_slice_ref(values))
799                .build_unchecked()
800        };
801
802        let list_data_type =
803            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
804        let list_data = unsafe {
805            ArrayData::builder(list_data_type)
806                .add_buffer(offset_buf2)
807                .add_buffer(size_buf2)
808                .add_child_data(value_data)
809                .build_unchecked()
810        };
811        drop(ListViewArray::from(list_data));
812    }
813
814    #[test]
815    fn test_empty_offsets() {
816        let f = Arc::new(Field::new("element", DataType::Int32, true));
817        let string = ListViewArray::from(
818            ArrayData::builder(DataType::ListView(f.clone()))
819                .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
820                .add_child_data(ArrayData::new_empty(&DataType::Int32))
821                .build()
822                .unwrap(),
823        );
824        assert_eq!(string.value_offsets(), &[]);
825        assert_eq!(string.value_sizes(), &[]);
826
827        let string = LargeListViewArray::from(
828            ArrayData::builder(DataType::LargeListView(f))
829                .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
830                .add_child_data(ArrayData::new_empty(&DataType::Int32))
831                .build()
832                .unwrap(),
833        );
834        assert_eq!(string.len(), 0);
835        assert_eq!(string.value_offsets(), &[]);
836        assert_eq!(string.value_sizes(), &[]);
837    }
838
839    #[test]
840    fn test_try_new() {
841        let offsets = ScalarBuffer::from(vec![0, 1, 4, 5]);
842        let sizes = ScalarBuffer::from(vec![1, 3, 1, 0]);
843        let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
844        let values = Arc::new(values) as ArrayRef;
845
846        let field = Arc::new(Field::new("element", DataType::Int32, false));
847        ListViewArray::new(
848            field.clone(),
849            offsets.clone(),
850            sizes.clone(),
851            values.clone(),
852            None,
853        );
854
855        let nulls = NullBuffer::new_null(4);
856        ListViewArray::new(
857            field.clone(),
858            offsets,
859            sizes.clone(),
860            values.clone(),
861            Some(nulls),
862        );
863
864        let nulls = NullBuffer::new_null(4);
865        let offsets = ScalarBuffer::from(vec![0, 1, 2, 3, 4]);
866        let sizes = ScalarBuffer::from(vec![1, 1, 1, 1, 0]);
867        let err = LargeListViewArray::try_new(
868            field,
869            offsets.clone(),
870            sizes.clone(),
871            values.clone(),
872            Some(nulls),
873        )
874        .unwrap_err();
875
876        assert_eq!(
877            err.to_string(),
878            "Invalid argument error: Incorrect length of null buffer for LargeListViewArray, expected 5 got 4"
879        );
880
881        let field = Arc::new(Field::new("element", DataType::Int64, false));
882        let err = LargeListViewArray::try_new(
883            field.clone(),
884            offsets.clone(),
885            sizes.clone(),
886            values.clone(),
887            None,
888        )
889        .unwrap_err();
890
891        assert_eq!(
892            err.to_string(),
893            "Invalid argument error: LargeListViewArray expected data type Int64 got Int32 for \"element\""
894        );
895
896        let nulls = NullBuffer::new_null(7);
897        let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
898        let values = Arc::new(values);
899
900        let err = LargeListViewArray::try_new(
901            field,
902            offsets.clone(),
903            sizes.clone(),
904            values.clone(),
905            None,
906        )
907        .unwrap_err();
908
909        assert_eq!(
910            err.to_string(),
911            "Invalid argument error: Non-nullable field of LargeListViewArray \"element\" cannot contain nulls"
912        );
913    }
914
915    #[test]
916    fn test_from_fixed_size_list() {
917        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
918        builder.values().append_slice(&[1, 2, 3]);
919        builder.append(true);
920        builder.values().append_slice(&[0, 0, 0]);
921        builder.append(false);
922        builder.values().append_slice(&[4, 5, 6]);
923        builder.append(true);
924        let list: ListViewArray = builder.finish().into();
925        let values: Vec<_> = list
926            .iter()
927            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
928            .collect();
929        assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])]);
930        let offsets = list.value_offsets();
931        assert_eq!(offsets, &[0, 3, 6]);
932        let sizes = list.value_sizes();
933        assert_eq!(sizes, &[3, 3, 3]);
934    }
935
936    #[test]
937    fn test_list_view_array_overlap_lists() {
938        let value_data = unsafe {
939            ArrayData::builder(DataType::Int32)
940                .len(8)
941                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
942                .build_unchecked()
943        };
944        let list_data_type =
945            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
946        let list_data = unsafe {
947            ArrayData::builder(list_data_type)
948                .len(2)
949                .add_buffer(Buffer::from_slice_ref([0, 3])) // offsets
950                .add_buffer(Buffer::from_slice_ref([5, 5])) // sizes
951                .add_child_data(value_data)
952                .build_unchecked()
953        };
954        let array = ListViewArray::from(list_data);
955
956        assert_eq!(array.len(), 2);
957        assert_eq!(array.value_size(0), 5);
958        assert_eq!(array.value_size(1), 5);
959
960        let values: Vec<_> = array
961            .iter()
962            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
963            .collect();
964        assert_eq!(
965            values,
966            vec![Some(vec![0, 1, 2, 3, 4]), Some(vec![3, 4, 5, 6, 7])]
967        );
968    }
969
970    #[test]
971    fn test_list_view_array_incomplete_offsets() {
972        let value_data = unsafe {
973            ArrayData::builder(DataType::Int32)
974                .len(50)
975                .add_buffer(Buffer::from_slice_ref((0..50).collect::<Vec<i32>>()))
976                .build_unchecked()
977        };
978        let list_data_type =
979            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
980        let list_data = unsafe {
981            ArrayData::builder(list_data_type)
982                .len(3)
983                .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // offsets
984                .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // sizes
985                .add_child_data(value_data)
986                .build_unchecked()
987        };
988        let array = ListViewArray::from(list_data);
989
990        assert_eq!(array.len(), 3);
991        assert_eq!(array.value_size(0), 0);
992        assert_eq!(array.value_size(1), 5);
993        assert_eq!(array.value_size(2), 10);
994
995        let values: Vec<_> = array
996            .iter()
997            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
998            .collect();
999        assert_eq!(
1000            values,
1001            vec![
1002                Some(vec![]),
1003                Some(vec![5, 6, 7, 8, 9]),
1004                Some(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
1005            ]
1006        );
1007    }
1008
1009    #[test]
1010    fn test_list_view_array_empty_lists() {
1011        let value_data = unsafe {
1012            ArrayData::builder(DataType::Int32)
1013                .len(0)
1014                .add_buffer(Buffer::from_slice_ref::<i32, &[_; 0]>(&[]))
1015                .build_unchecked()
1016        };
1017        let list_data_type =
1018            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false)));
1019        let list_data = unsafe {
1020            ArrayData::builder(list_data_type)
1021                .len(3)
1022                .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // offsets
1023                .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // sizes
1024                .add_child_data(value_data)
1025                .build_unchecked()
1026        };
1027        let array = ListViewArray::from(list_data);
1028
1029        assert_eq!(array.len(), 3);
1030        assert_eq!(array.value_size(0), 0);
1031        assert_eq!(array.value_size(1), 0);
1032        assert_eq!(array.value_size(2), 0);
1033
1034        let values: Vec<_> = array
1035            .iter()
1036            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1037            .collect();
1038        assert_eq!(values, vec![Some(vec![]), Some(vec![]), Some(vec![])]);
1039    }
1040}