arrow_cast/cast/
list.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::cast::*;
19
20/// Helper function that takes a primitive array and casts to a (generic) list array.
21pub(crate) fn cast_values_to_list<O: OffsetSizeTrait>(
22    array: &dyn Array,
23    to: &FieldRef,
24    cast_options: &CastOptions,
25) -> Result<ArrayRef, ArrowError> {
26    let values = cast_with_options(array, to.data_type(), cast_options)?;
27    let offsets = OffsetBuffer::from_lengths(std::iter::repeat(1).take(values.len()));
28    let list = GenericListArray::<O>::new(to.clone(), offsets, values, None);
29    Ok(Arc::new(list))
30}
31
32/// Helper function that takes a primitive array and casts to a fixed size list array.
33pub(crate) fn cast_values_to_fixed_size_list(
34    array: &dyn Array,
35    to: &FieldRef,
36    size: i32,
37    cast_options: &CastOptions,
38) -> Result<ArrayRef, ArrowError> {
39    let values = cast_with_options(array, to.data_type(), cast_options)?;
40    let list = FixedSizeListArray::new(to.clone(), size, values, None);
41    Ok(Arc::new(list))
42}
43
44pub(crate) fn cast_single_element_fixed_size_list_to_values(
45    array: &dyn Array,
46    to: &DataType,
47    cast_options: &CastOptions,
48) -> Result<ArrayRef, ArrowError> {
49    let values = array.as_fixed_size_list().values();
50    cast_with_options(values, to, cast_options)
51}
52
53pub(crate) fn cast_fixed_size_list_to_list<OffsetSize>(
54    array: &dyn Array,
55) -> Result<ArrayRef, ArrowError>
56where
57    OffsetSize: OffsetSizeTrait,
58{
59    let fixed_size_list: &FixedSizeListArray = array.as_fixed_size_list();
60    let list: GenericListArray<OffsetSize> = fixed_size_list.clone().into();
61    Ok(Arc::new(list))
62}
63
64pub(crate) fn cast_list_to_fixed_size_list<OffsetSize>(
65    array: &GenericListArray<OffsetSize>,
66    field: &FieldRef,
67    size: i32,
68    cast_options: &CastOptions,
69) -> Result<ArrayRef, ArrowError>
70where
71    OffsetSize: OffsetSizeTrait,
72{
73    let cap = array.len() * size as usize;
74
75    // Whether the resulting array may contain null lists
76    let nullable = cast_options.safe || array.null_count() != 0;
77    let mut nulls = nullable.then(|| {
78        let mut buffer = BooleanBufferBuilder::new(array.len());
79        match array.nulls() {
80            Some(n) => buffer.append_buffer(n.inner()),
81            None => buffer.append_n(array.len(), true),
82        }
83        buffer
84    });
85
86    // Nulls in FixedSizeListArray take up space and so we must pad the values
87    let values = array.values().to_data();
88    let mut mutable = MutableArrayData::new(vec![&values], nullable, cap);
89    // The end position in values of the last incorrectly-sized list slice
90    let mut last_pos = 0;
91    for (idx, w) in array.offsets().windows(2).enumerate() {
92        let start_pos = w[0].as_usize();
93        let end_pos = w[1].as_usize();
94        let len = end_pos - start_pos;
95
96        if len != size as usize {
97            if cast_options.safe || array.is_null(idx) {
98                if last_pos != start_pos {
99                    // Extend with valid slices
100                    mutable.extend(0, last_pos, start_pos);
101                }
102                // Pad this slice with nulls
103                mutable.extend_nulls(size as _);
104                nulls.as_mut().unwrap().set_bit(idx, false);
105                // Set last_pos to the end of this slice's values
106                last_pos = end_pos
107            } else {
108                return Err(ArrowError::CastError(format!(
109                    "Cannot cast to FixedSizeList({size}): value at index {idx} has length {len}",
110                )));
111            }
112        }
113    }
114
115    let values = match last_pos {
116        0 => array.values().slice(0, cap), // All slices were the correct length
117        _ => {
118            if mutable.len() != cap {
119                // Remaining slices were all correct length
120                let remaining = cap - mutable.len();
121                mutable.extend(0, last_pos, last_pos + remaining)
122            }
123            make_array(mutable.freeze())
124        }
125    };
126
127    // Cast the inner values if necessary
128    let values = cast_with_options(values.as_ref(), field.data_type(), cast_options)?;
129
130    // Construct the FixedSizeListArray
131    let nulls = nulls.map(|mut x| x.finish().into());
132    let array = FixedSizeListArray::new(field.clone(), size, values, nulls);
133    Ok(Arc::new(array))
134}
135
136/// Helper function that takes an Generic list container and casts the inner datatype.
137pub(crate) fn cast_list_values<O: OffsetSizeTrait>(
138    array: &dyn Array,
139    to: &FieldRef,
140    cast_options: &CastOptions,
141) -> Result<ArrayRef, ArrowError> {
142    let list = array.as_list::<O>();
143    let values = cast_with_options(list.values(), to.data_type(), cast_options)?;
144    Ok(Arc::new(GenericListArray::<O>::new(
145        to.clone(),
146        list.offsets().clone(),
147        values,
148        list.nulls().cloned(),
149    )))
150}
151
152/// Cast the container type of List/Largelist array along with the inner datatype
153pub(crate) fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>(
154    array: &dyn Array,
155    field: &FieldRef,
156    cast_options: &CastOptions,
157) -> Result<ArrayRef, ArrowError> {
158    let list = array.as_list::<I>();
159    let values = list.values();
160    let offsets = list.offsets();
161    let nulls = list.nulls().cloned();
162
163    if !O::IS_LARGE && values.len() > i32::MAX as usize {
164        return Err(ArrowError::ComputeError(
165            "LargeList too large to cast to List".into(),
166        ));
167    }
168
169    // Recursively cast values
170    let values = cast_with_options(values, field.data_type(), cast_options)?;
171    let offsets: Vec<_> = offsets.iter().map(|x| O::usize_as(x.as_usize())).collect();
172
173    // Safety: valid offsets and checked for overflow
174    let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
175
176    Ok(Arc::new(GenericListArray::<O>::new(
177        field.clone(),
178        offsets,
179        values,
180        nulls,
181    )))
182}