arrow_array/
cast.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines helper functions for downcasting [`dyn Array`](Array) to concrete types
19
20use crate::array::*;
21use crate::types::*;
22use arrow_data::ArrayData;
23
24/// Re-export symbols needed for downcast macros
25///
26/// Name follows `serde` convention
27#[doc(hidden)]
28pub mod __private {
29    pub use arrow_schema::{DataType, IntervalUnit, TimeUnit};
30}
31
32/// Repeats the provided pattern based on the number of comma separated identifiers
33#[doc(hidden)]
34#[macro_export]
35macro_rules! repeat_pat {
36    ($e:pat, $v_:expr) => {
37        $e
38    };
39    ($e:pat, $v_:expr $(, $tail:expr)+) => {
40        ($e, $crate::repeat_pat!($e $(, $tail)+))
41    }
42}
43
44/// Given one or more expressions evaluating to an integer [`DataType`] invokes the provided macro
45/// `m` with the corresponding integer [`ArrowPrimitiveType`], followed by any additional arguments
46///
47/// ```
48/// # use arrow_array::{downcast_primitive, ArrowPrimitiveType, downcast_integer};
49/// # use arrow_schema::DataType;
50///
51/// macro_rules! dictionary_key_size_helper {
52///   ($t:ty, $o:ty) => {
53///       std::mem::size_of::<<$t as ArrowPrimitiveType>::Native>() as $o
54///   };
55/// }
56///
57/// fn dictionary_key_size(t: &DataType) -> u8 {
58///     match t {
59///         DataType::Dictionary(k, _) => downcast_integer! {
60///             k.as_ref() => (dictionary_key_size_helper, u8),
61///             _ => unreachable!(),
62///         },
63///         _ => u8::MAX,
64///     }
65/// }
66///
67/// assert_eq!(dictionary_key_size(&DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))), 4);
68/// assert_eq!(dictionary_key_size(&DataType::Dictionary(Box::new(DataType::Int64), Box::new(DataType::Utf8))), 8);
69/// assert_eq!(dictionary_key_size(&DataType::Dictionary(Box::new(DataType::UInt16), Box::new(DataType::Utf8))), 2);
70/// ```
71///
72/// [`DataType`]: arrow_schema::DataType
73#[macro_export]
74macro_rules! downcast_integer {
75    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat => $fallback:expr $(,)*)*) => {
76        match ($($data_type),+) {
77            $crate::repeat_pat!($crate::cast::__private::DataType::Int8, $($data_type),+) => {
78                $m!($crate::types::Int8Type $(, $args)*)
79            }
80            $crate::repeat_pat!($crate::cast::__private::DataType::Int16, $($data_type),+) => {
81                $m!($crate::types::Int16Type $(, $args)*)
82            }
83            $crate::repeat_pat!($crate::cast::__private::DataType::Int32, $($data_type),+) => {
84                $m!($crate::types::Int32Type $(, $args)*)
85            }
86            $crate::repeat_pat!($crate::cast::__private::DataType::Int64, $($data_type),+) => {
87                $m!($crate::types::Int64Type $(, $args)*)
88            }
89            $crate::repeat_pat!($crate::cast::__private::DataType::UInt8, $($data_type),+) => {
90                $m!($crate::types::UInt8Type $(, $args)*)
91            }
92            $crate::repeat_pat!($crate::cast::__private::DataType::UInt16, $($data_type),+) => {
93                $m!($crate::types::UInt16Type $(, $args)*)
94            }
95            $crate::repeat_pat!($crate::cast::__private::DataType::UInt32, $($data_type),+) => {
96                $m!($crate::types::UInt32Type $(, $args)*)
97            }
98            $crate::repeat_pat!($crate::cast::__private::DataType::UInt64, $($data_type),+) => {
99                $m!($crate::types::UInt64Type $(, $args)*)
100            }
101            $($p => $fallback,)*
102        }
103    };
104}
105
106/// Given one or more expressions evaluating to an integer [`PrimitiveArray`] invokes the provided macro
107/// with the corresponding array, along with match statements for any non integer array types
108///
109/// ```
110/// # use arrow_array::{Array, downcast_integer_array, cast::as_string_array};
111/// # use arrow_schema::DataType;
112///
113/// fn print_integer(array: &dyn Array) {
114///     downcast_integer_array!(
115///         array => {
116///             for v in array {
117///                 println!("{:?}", v);
118///             }
119///         }
120///         DataType::Utf8 => {
121///             for v in as_string_array(array) {
122///                 println!("{:?}", v);
123///             }
124///         }
125///         t => println!("Unsupported datatype {}", t)
126///     )
127/// }
128/// ```
129///
130/// [`DataType`]: arrow_schema::DataType
131#[macro_export]
132macro_rules! downcast_integer_array {
133    ($values:ident => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => {
134        $crate::downcast_integer_array!($values => {$e} $($p => $fallback)*)
135    };
136    (($($values:ident),+) => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => {
137        $crate::downcast_integer_array!($($values),+ => {$e} $($p => $fallback)*)
138    };
139    ($($values:ident),+ => $e:block $($p:pat => $fallback:expr $(,)*)*) => {
140        $crate::downcast_integer_array!(($($values),+) => $e $($p => $fallback)*)
141    };
142    (($($values:ident),+) => $e:block $($p:pat => $fallback:expr $(,)*)*) => {
143        $crate::downcast_integer!{
144            $($values.data_type()),+ => ($crate::downcast_primitive_array_helper, $($values),+, $e),
145            $($p => $fallback,)*
146        }
147    };
148}
149
150/// Given one or more expressions evaluating to an integer [`DataType`] invokes the provided macro
151/// `m` with the corresponding integer [`RunEndIndexType`], followed by any additional arguments
152///
153/// ```
154/// # use std::sync::Arc;
155/// # use arrow_array::{downcast_primitive, ArrowPrimitiveType, downcast_run_end_index};
156/// # use arrow_schema::{DataType, Field};
157///
158/// macro_rules! run_end_size_helper {
159///   ($t:ty, $o:ty) => {
160///       std::mem::size_of::<<$t as ArrowPrimitiveType>::Native>() as $o
161///   };
162/// }
163///
164/// fn run_end_index_size(t: &DataType) -> u8 {
165///     match t {
166///         DataType::RunEndEncoded(k, _) => downcast_run_end_index! {
167///             k.data_type() => (run_end_size_helper, u8),
168///             _ => unreachable!(),
169///         },
170///         _ => u8::MAX,
171///     }
172/// }
173///
174/// assert_eq!(run_end_index_size(&DataType::RunEndEncoded(Arc::new(Field::new("a", DataType::Int32, false)), Arc::new(Field::new("b", DataType::Utf8, true)))), 4);
175/// assert_eq!(run_end_index_size(&DataType::RunEndEncoded(Arc::new(Field::new("a", DataType::Int64, false)), Arc::new(Field::new("b", DataType::Utf8, true)))), 8);
176/// assert_eq!(run_end_index_size(&DataType::RunEndEncoded(Arc::new(Field::new("a", DataType::Int16, false)), Arc::new(Field::new("b", DataType::Utf8, true)))), 2);
177/// ```
178///
179/// [`DataType`]: arrow_schema::DataType
180#[macro_export]
181macro_rules! downcast_run_end_index {
182    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat => $fallback:expr $(,)*)*) => {
183        match ($($data_type),+) {
184            $crate::repeat_pat!($crate::cast::__private::DataType::Int16, $($data_type),+) => {
185                $m!($crate::types::Int16Type $(, $args)*)
186            }
187            $crate::repeat_pat!($crate::cast::__private::DataType::Int32, $($data_type),+) => {
188                $m!($crate::types::Int32Type $(, $args)*)
189            }
190            $crate::repeat_pat!($crate::cast::__private::DataType::Int64, $($data_type),+) => {
191                $m!($crate::types::Int64Type $(, $args)*)
192            }
193            $($p => $fallback,)*
194        }
195    };
196}
197
198/// Given one or more expressions evaluating to primitive [`DataType`] invokes the provided macro
199/// `m` with the corresponding [`ArrowPrimitiveType`], followed by any additional arguments
200///
201/// ```
202/// # use arrow_array::{downcast_temporal, ArrowPrimitiveType};
203/// # use arrow_schema::DataType;
204///
205/// macro_rules! temporal_size_helper {
206///   ($t:ty, $o:ty) => {
207///       std::mem::size_of::<<$t as ArrowPrimitiveType>::Native>() as $o
208///   };
209/// }
210///
211/// fn temporal_size(t: &DataType) -> u8 {
212///     downcast_temporal! {
213///         t => (temporal_size_helper, u8),
214///         _ => u8::MAX
215///     }
216/// }
217///
218/// assert_eq!(temporal_size(&DataType::Date32), 4);
219/// assert_eq!(temporal_size(&DataType::Date64), 8);
220/// ```
221///
222/// [`DataType`]: arrow_schema::DataType
223#[macro_export]
224macro_rules! downcast_temporal {
225    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat => $fallback:expr $(,)*)*) => {
226        match ($($data_type),+) {
227            $crate::repeat_pat!($crate::cast::__private::DataType::Time32($crate::cast::__private::TimeUnit::Second), $($data_type),+) => {
228                $m!($crate::types::Time32SecondType $(, $args)*)
229            }
230            $crate::repeat_pat!($crate::cast::__private::DataType::Time32($crate::cast::__private::TimeUnit::Millisecond), $($data_type),+) => {
231                $m!($crate::types::Time32MillisecondType $(, $args)*)
232            }
233            $crate::repeat_pat!($crate::cast::__private::DataType::Time64($crate::cast::__private::TimeUnit::Microsecond), $($data_type),+) => {
234                $m!($crate::types::Time64MicrosecondType $(, $args)*)
235            }
236            $crate::repeat_pat!($crate::cast::__private::DataType::Time64($crate::cast::__private::TimeUnit::Nanosecond), $($data_type),+) => {
237                $m!($crate::types::Time64NanosecondType $(, $args)*)
238            }
239            $crate::repeat_pat!($crate::cast::__private::DataType::Date32, $($data_type),+) => {
240                $m!($crate::types::Date32Type $(, $args)*)
241            }
242            $crate::repeat_pat!($crate::cast::__private::DataType::Date64, $($data_type),+) => {
243                $m!($crate::types::Date64Type $(, $args)*)
244            }
245            $crate::repeat_pat!($crate::cast::__private::DataType::Timestamp($crate::cast::__private::TimeUnit::Second, _), $($data_type),+) => {
246                $m!($crate::types::TimestampSecondType $(, $args)*)
247            }
248            $crate::repeat_pat!($crate::cast::__private::DataType::Timestamp($crate::cast::__private::TimeUnit::Millisecond, _), $($data_type),+) => {
249                $m!($crate::types::TimestampMillisecondType $(, $args)*)
250            }
251            $crate::repeat_pat!($crate::cast::__private::DataType::Timestamp($crate::cast::__private::TimeUnit::Microsecond, _), $($data_type),+) => {
252                $m!($crate::types::TimestampMicrosecondType $(, $args)*)
253            }
254            $crate::repeat_pat!($crate::cast::__private::DataType::Timestamp($crate::cast::__private::TimeUnit::Nanosecond, _), $($data_type),+) => {
255                $m!($crate::types::TimestampNanosecondType $(, $args)*)
256            }
257            $($p => $fallback,)*
258        }
259    };
260}
261
262/// Downcast an [`Array`] to a temporal [`PrimitiveArray`] based on its [`DataType`]
263/// accepts a number of subsequent patterns to match the data type
264///
265/// ```
266/// # use arrow_array::{Array, downcast_temporal_array, cast::as_string_array};
267/// # use arrow_schema::DataType;
268///
269/// fn print_temporal(array: &dyn Array) {
270///     downcast_temporal_array!(
271///         array => {
272///             for v in array {
273///                 println!("{:?}", v);
274///             }
275///         }
276///         DataType::Utf8 => {
277///             for v in as_string_array(array) {
278///                 println!("{:?}", v);
279///             }
280///         }
281///         t => println!("Unsupported datatype {}", t)
282///     )
283/// }
284/// ```
285///
286/// [`DataType`]: arrow_schema::DataType
287#[macro_export]
288macro_rules! downcast_temporal_array {
289    ($values:ident => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => {
290        $crate::downcast_temporal_array!($values => {$e} $($p => $fallback)*)
291    };
292    (($($values:ident),+) => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => {
293        $crate::downcast_temporal_array!($($values),+ => {$e} $($p => $fallback)*)
294    };
295    ($($values:ident),+ => $e:block $($p:pat => $fallback:expr $(,)*)*) => {
296        $crate::downcast_temporal_array!(($($values),+) => $e $($p => $fallback)*)
297    };
298    (($($values:ident),+) => $e:block $($p:pat => $fallback:expr $(,)*)*) => {
299        $crate::downcast_temporal!{
300            $($values.data_type()),+ => ($crate::downcast_primitive_array_helper, $($values),+, $e),
301            $($p => $fallback,)*
302        }
303    };
304}
305
306/// Given one or more expressions evaluating to primitive [`DataType`] invokes the provided macro
307/// `m` with the corresponding [`ArrowPrimitiveType`], followed by any additional arguments
308///
309/// ```
310/// # use arrow_array::{downcast_primitive, ArrowPrimitiveType};
311/// # use arrow_schema::DataType;
312///
313/// macro_rules! primitive_size_helper {
314///   ($t:ty, $o:ty) => {
315///       std::mem::size_of::<<$t as ArrowPrimitiveType>::Native>() as $o
316///   };
317/// }
318///
319/// fn primitive_size(t: &DataType) -> u8 {
320///     downcast_primitive! {
321///         t => (primitive_size_helper, u8),
322///         _ => u8::MAX
323///     }
324/// }
325///
326/// assert_eq!(primitive_size(&DataType::Int32), 4);
327/// assert_eq!(primitive_size(&DataType::Int64), 8);
328/// assert_eq!(primitive_size(&DataType::Float16), 2);
329/// assert_eq!(primitive_size(&DataType::Decimal128(38, 10)), 16);
330/// assert_eq!(primitive_size(&DataType::Decimal256(76, 20)), 32);
331/// ```
332///
333/// [`DataType`]: arrow_schema::DataType
334#[macro_export]
335macro_rules! downcast_primitive {
336    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat => $fallback:expr $(,)*)*) => {
337        $crate::downcast_integer! {
338            $($data_type),+ => ($m $(, $args)*),
339            $crate::repeat_pat!($crate::cast::__private::DataType::Float16, $($data_type),+) => {
340                $m!($crate::types::Float16Type $(, $args)*)
341            }
342            $crate::repeat_pat!($crate::cast::__private::DataType::Float32, $($data_type),+) => {
343                $m!($crate::types::Float32Type $(, $args)*)
344            }
345            $crate::repeat_pat!($crate::cast::__private::DataType::Float64, $($data_type),+) => {
346                $m!($crate::types::Float64Type $(, $args)*)
347            }
348            $crate::repeat_pat!($crate::cast::__private::DataType::Decimal128(_, _), $($data_type),+) => {
349                $m!($crate::types::Decimal128Type $(, $args)*)
350            }
351            $crate::repeat_pat!($crate::cast::__private::DataType::Decimal256(_, _), $($data_type),+) => {
352                $m!($crate::types::Decimal256Type $(, $args)*)
353            }
354            $crate::repeat_pat!($crate::cast::__private::DataType::Interval($crate::cast::__private::IntervalUnit::YearMonth), $($data_type),+) => {
355                $m!($crate::types::IntervalYearMonthType $(, $args)*)
356            }
357            $crate::repeat_pat!($crate::cast::__private::DataType::Interval($crate::cast::__private::IntervalUnit::DayTime), $($data_type),+) => {
358                $m!($crate::types::IntervalDayTimeType $(, $args)*)
359            }
360            $crate::repeat_pat!($crate::cast::__private::DataType::Interval($crate::cast::__private::IntervalUnit::MonthDayNano), $($data_type),+) => {
361                $m!($crate::types::IntervalMonthDayNanoType $(, $args)*)
362            }
363            $crate::repeat_pat!($crate::cast::__private::DataType::Duration($crate::cast::__private::TimeUnit::Second), $($data_type),+) => {
364                $m!($crate::types::DurationSecondType $(, $args)*)
365            }
366            $crate::repeat_pat!($crate::cast::__private::DataType::Duration($crate::cast::__private::TimeUnit::Millisecond), $($data_type),+) => {
367                $m!($crate::types::DurationMillisecondType $(, $args)*)
368            }
369            $crate::repeat_pat!($crate::cast::__private::DataType::Duration($crate::cast::__private::TimeUnit::Microsecond), $($data_type),+) => {
370                $m!($crate::types::DurationMicrosecondType $(, $args)*)
371            }
372            $crate::repeat_pat!($crate::cast::__private::DataType::Duration($crate::cast::__private::TimeUnit::Nanosecond), $($data_type),+) => {
373                $m!($crate::types::DurationNanosecondType $(, $args)*)
374            }
375            _ => {
376                $crate::downcast_temporal! {
377                    $($data_type),+ => ($m $(, $args)*),
378                    $($p => $fallback,)*
379                }
380            }
381        }
382    };
383}
384
385#[macro_export]
386#[doc(hidden)]
387macro_rules! downcast_primitive_array_helper {
388    ($t:ty, $($values:ident),+, $e:block) => {{
389        $(let $values = $crate::cast::as_primitive_array::<$t>($values);)+
390        $e
391    }};
392}
393
394/// Downcast an [`Array`] to a [`PrimitiveArray`] based on its [`DataType`]
395/// accepts a number of subsequent patterns to match the data type
396///
397/// ```
398/// # use arrow_array::{Array, downcast_primitive_array, cast::as_string_array};
399/// # use arrow_schema::DataType;
400///
401/// fn print_primitive(array: &dyn Array) {
402///     downcast_primitive_array!(
403///         array => {
404///             for v in array {
405///                 println!("{:?}", v);
406///             }
407///         }
408///         DataType::Utf8 => {
409///             for v in as_string_array(array) {
410///                 println!("{:?}", v);
411///             }
412///         }
413///         t => println!("Unsupported datatype {}", t)
414///     )
415/// }
416/// ```
417///
418/// [`DataType`]: arrow_schema::DataType
419#[macro_export]
420macro_rules! downcast_primitive_array {
421    ($values:ident => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => {
422        $crate::downcast_primitive_array!($values => {$e} $($p => $fallback)*)
423    };
424    (($($values:ident),+) => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => {
425        $crate::downcast_primitive_array!($($values),+ => {$e} $($p => $fallback)*)
426    };
427    ($($values:ident),+ => $e:block $($p:pat => $fallback:expr $(,)*)*) => {
428        $crate::downcast_primitive_array!(($($values),+) => $e $($p => $fallback)*)
429    };
430    (($($values:ident),+) => $e:block $($p:pat => $fallback:expr $(,)*)*) => {
431        $crate::downcast_primitive!{
432            $($values.data_type()),+ => ($crate::downcast_primitive_array_helper, $($values),+, $e),
433            $($p => $fallback,)*
434        }
435    };
436}
437
438/// Force downcast of an [`Array`], such as an [`ArrayRef`], to
439/// [`PrimitiveArray<T>`], panic'ing on failure.
440///
441/// # Example
442///
443/// ```
444/// # use std::sync::Arc;
445/// # use arrow_array::{ArrayRef, Int32Array};
446/// # use arrow_array::cast::as_primitive_array;
447/// # use arrow_array::types::Int32Type;
448///
449/// let arr: ArrayRef = Arc::new(Int32Array::from(vec![Some(1)]));
450///
451/// // Downcast an `ArrayRef` to Int32Array / PrimitiveArray<Int32>:
452/// let primitive_array: &Int32Array = as_primitive_array(&arr);
453///
454/// // Equivalently:
455/// let primitive_array = as_primitive_array::<Int32Type>(&arr);
456///
457/// // This is the equivalent of:
458/// let primitive_array = arr
459///     .as_any()
460///     .downcast_ref::<Int32Array>()
461///     .unwrap();
462/// ```
463pub fn as_primitive_array<T>(arr: &dyn Array) -> &PrimitiveArray<T>
464where
465    T: ArrowPrimitiveType,
466{
467    arr.as_any()
468        .downcast_ref::<PrimitiveArray<T>>()
469        .expect("Unable to downcast to primitive array")
470}
471
472#[macro_export]
473#[doc(hidden)]
474macro_rules! downcast_dictionary_array_helper {
475    ($t:ty, $($values:ident),+, $e:block) => {{
476        $(let $values = $crate::cast::as_dictionary_array::<$t>($values);)+
477        $e
478    }};
479}
480
481/// Downcast an [`Array`] to a [`DictionaryArray`] based on its [`DataType`], accepts
482/// a number of subsequent patterns to match the data type
483///
484/// ```
485/// # use arrow_array::{Array, StringArray, downcast_dictionary_array, cast::as_string_array};
486/// # use arrow_schema::DataType;
487///
488/// fn print_strings(array: &dyn Array) {
489///     downcast_dictionary_array!(
490///         array => match array.values().data_type() {
491///             DataType::Utf8 => {
492///                 for v in array.downcast_dict::<StringArray>().unwrap() {
493///                     println!("{:?}", v);
494///                 }
495///             }
496///             t => println!("Unsupported dictionary value type {}", t),
497///         },
498///         DataType::Utf8 => {
499///             for v in as_string_array(array) {
500///                 println!("{:?}", v);
501///             }
502///         }
503///         t => println!("Unsupported datatype {}", t)
504///     )
505/// }
506/// ```
507///
508/// [`DataType`]: arrow_schema::DataType
509#[macro_export]
510macro_rules! downcast_dictionary_array {
511    ($values:ident => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => {
512        downcast_dictionary_array!($values => {$e} $($p => $fallback)*)
513    };
514
515    ($values:ident => $e:block $($p:pat => $fallback:expr $(,)*)*) => {
516        match $values.data_type() {
517            $crate::cast::__private::DataType::Dictionary(k, _) => {
518                $crate::downcast_integer! {
519                    k.as_ref() => ($crate::downcast_dictionary_array_helper, $values, $e),
520                    k => unreachable!("unsupported dictionary key type: {}", k)
521                }
522            }
523            $($p => $fallback,)*
524        }
525    }
526}
527
528/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
529/// [`DictionaryArray<T>`], panic'ing on failure.
530///
531/// # Example
532///
533/// ```
534/// # use arrow_array::{ArrayRef, DictionaryArray};
535/// # use arrow_array::cast::as_dictionary_array;
536/// # use arrow_array::types::Int32Type;
537///
538/// let arr: DictionaryArray<Int32Type> = vec![Some("foo")].into_iter().collect();
539/// let arr: ArrayRef = std::sync::Arc::new(arr);
540/// let dict_array: &DictionaryArray<Int32Type> = as_dictionary_array::<Int32Type>(&arr);
541/// ```
542pub fn as_dictionary_array<T>(arr: &dyn Array) -> &DictionaryArray<T>
543where
544    T: ArrowDictionaryKeyType,
545{
546    arr.as_any()
547        .downcast_ref::<DictionaryArray<T>>()
548        .expect("Unable to downcast to dictionary array")
549}
550
551/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
552/// [`RunArray<T>`], panic'ing on failure.
553///
554/// # Example
555///
556/// ```
557/// # use arrow_array::{ArrayRef, RunArray};
558/// # use arrow_array::cast::as_run_array;
559/// # use arrow_array::types::Int32Type;
560///
561/// let arr: RunArray<Int32Type> = vec![Some("foo")].into_iter().collect();
562/// let arr: ArrayRef = std::sync::Arc::new(arr);
563/// let run_array: &RunArray<Int32Type> = as_run_array::<Int32Type>(&arr);
564/// ```
565pub fn as_run_array<T>(arr: &dyn Array) -> &RunArray<T>
566where
567    T: RunEndIndexType,
568{
569    arr.as_any()
570        .downcast_ref::<RunArray<T>>()
571        .expect("Unable to downcast to run array")
572}
573
574#[macro_export]
575#[doc(hidden)]
576macro_rules! downcast_run_array_helper {
577    ($t:ty, $($values:ident),+, $e:block) => {{
578        $(let $values = $crate::cast::as_run_array::<$t>($values);)+
579        $e
580    }};
581}
582
583/// Downcast an [`Array`] to a [`RunArray`] based on its [`DataType`], accepts
584/// a number of subsequent patterns to match the data type
585///
586/// ```
587/// # use arrow_array::{Array, StringArray, downcast_run_array, cast::as_string_array};
588/// # use arrow_schema::DataType;
589///
590/// fn print_strings(array: &dyn Array) {
591///     downcast_run_array!(
592///         array => match array.values().data_type() {
593///             DataType::Utf8 => {
594///                 for v in array.downcast::<StringArray>().unwrap() {
595///                     println!("{:?}", v);
596///                 }
597///             }
598///             t => println!("Unsupported run array value type {}", t),
599///         },
600///         DataType::Utf8 => {
601///             for v in as_string_array(array) {
602///                 println!("{:?}", v);
603///             }
604///         }
605///         t => println!("Unsupported datatype {}", t)
606///     )
607/// }
608/// ```
609///
610/// [`DataType`]: arrow_schema::DataType
611#[macro_export]
612macro_rules! downcast_run_array {
613    ($values:ident => $e:expr, $($p:pat => $fallback:expr $(,)*)*) => {
614        downcast_run_array!($values => {$e} $($p => $fallback)*)
615    };
616
617    ($values:ident => $e:block $($p:pat => $fallback:expr $(,)*)*) => {
618        match $values.data_type() {
619            $crate::cast::__private::DataType::RunEndEncoded(k, _) => {
620                $crate::downcast_run_end_index! {
621                    k.data_type() => ($crate::downcast_run_array_helper, $values, $e),
622                    k => unreachable!("unsupported run end index type: {}", k)
623                }
624            }
625            $($p => $fallback,)*
626        }
627    }
628}
629
630/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
631/// [`GenericListArray<T>`], panicking on failure.
632pub fn as_generic_list_array<S: OffsetSizeTrait>(arr: &dyn Array) -> &GenericListArray<S> {
633    arr.as_any()
634        .downcast_ref::<GenericListArray<S>>()
635        .expect("Unable to downcast to list array")
636}
637
638/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
639/// [`ListArray`], panicking on failure.
640#[inline]
641pub fn as_list_array(arr: &dyn Array) -> &ListArray {
642    as_generic_list_array::<i32>(arr)
643}
644
645/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
646/// [`FixedSizeListArray`], panicking on failure.
647#[inline]
648pub fn as_fixed_size_list_array(arr: &dyn Array) -> &FixedSizeListArray {
649    arr.as_any()
650        .downcast_ref::<FixedSizeListArray>()
651        .expect("Unable to downcast to fixed size list array")
652}
653
654/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
655/// [`LargeListArray`], panicking on failure.
656#[inline]
657pub fn as_large_list_array(arr: &dyn Array) -> &LargeListArray {
658    as_generic_list_array::<i64>(arr)
659}
660
661/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
662/// [`GenericBinaryArray<S>`], panicking on failure.
663#[inline]
664pub fn as_generic_binary_array<S: OffsetSizeTrait>(arr: &dyn Array) -> &GenericBinaryArray<S> {
665    arr.as_any()
666        .downcast_ref::<GenericBinaryArray<S>>()
667        .expect("Unable to downcast to binary array")
668}
669
670/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
671/// [`StringArray`], panicking on failure.
672///
673/// # Example
674///
675/// ```
676/// # use std::sync::Arc;
677/// # use arrow_array::cast::as_string_array;
678/// # use arrow_array::{ArrayRef, StringArray};
679///
680/// let arr: ArrayRef = Arc::new(StringArray::from_iter(vec![Some("foo")]));
681/// let string_array = as_string_array(&arr);
682/// ```
683pub fn as_string_array(arr: &dyn Array) -> &StringArray {
684    arr.as_any()
685        .downcast_ref::<StringArray>()
686        .expect("Unable to downcast to StringArray")
687}
688
689/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
690/// [`BooleanArray`], panicking on failure.
691///
692/// # Example
693///
694/// ```
695/// # use std::sync::Arc;
696/// # use arrow_array::{ArrayRef, BooleanArray};
697/// # use arrow_array::cast::as_boolean_array;
698///
699/// let arr: ArrayRef = Arc::new(BooleanArray::from_iter(vec![Some(true)]));
700/// let boolean_array = as_boolean_array(&arr);
701/// ```
702pub fn as_boolean_array(arr: &dyn Array) -> &BooleanArray {
703    arr.as_any()
704        .downcast_ref::<BooleanArray>()
705        .expect("Unable to downcast to BooleanArray")
706}
707
708macro_rules! array_downcast_fn {
709    ($name: ident, $arrty: ty, $arrty_str:expr) => {
710        #[doc = "Force downcast of an [`Array`], such as an [`ArrayRef`] to "]
711        #[doc = $arrty_str]
712        pub fn $name(arr: &dyn Array) -> &$arrty {
713            arr.as_any().downcast_ref::<$arrty>().expect(concat!(
714                "Unable to downcast to typed array through ",
715                stringify!($name)
716            ))
717        }
718    };
719
720    // use recursive macro to generate dynamic doc string for a given array type
721    ($name: ident, $arrty: ty) => {
722        array_downcast_fn!(
723            $name,
724            $arrty,
725            concat!("[`", stringify!($arrty), "`], panicking on failure.")
726        );
727    };
728}
729
730array_downcast_fn!(as_largestring_array, LargeStringArray);
731array_downcast_fn!(as_null_array, NullArray);
732array_downcast_fn!(as_struct_array, StructArray);
733array_downcast_fn!(as_union_array, UnionArray);
734array_downcast_fn!(as_map_array, MapArray);
735
736/// Downcasts a `dyn Array` to a concrete type
737///
738/// ```
739/// # use arrow_array::{BooleanArray, Int32Array, RecordBatch, StringArray};
740/// # use arrow_array::cast::downcast_array;
741/// struct ConcreteBatch {
742///     col1: Int32Array,
743///     col2: BooleanArray,
744///     col3: StringArray,
745/// }
746///
747/// impl ConcreteBatch {
748///     fn new(batch: &RecordBatch) -> Self {
749///         Self {
750///             col1: downcast_array(batch.column(0).as_ref()),
751///             col2: downcast_array(batch.column(1).as_ref()),
752///             col3: downcast_array(batch.column(2).as_ref()),
753///         }
754///     }
755/// }
756/// ```
757///
758/// # Panics
759///
760/// Panics if array is not of the correct data type
761pub fn downcast_array<T>(array: &dyn Array) -> T
762where
763    T: From<ArrayData>,
764{
765    T::from(array.to_data())
766}
767
768mod private {
769    pub trait Sealed {}
770}
771
772/// An extension trait for `dyn Array` that provides ergonomic downcasting
773///
774/// ```
775/// # use std::sync::Arc;
776/// # use arrow_array::{ArrayRef, Int32Array};
777/// # use arrow_array::cast::AsArray;
778/// # use arrow_array::types::Int32Type;
779/// let col = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
780/// assert_eq!(col.as_primitive::<Int32Type>().values(), &[1, 2, 3]);
781/// ```
782pub trait AsArray: private::Sealed {
783    /// Downcast this to a [`BooleanArray`] returning `None` if not possible
784    fn as_boolean_opt(&self) -> Option<&BooleanArray>;
785
786    /// Downcast this to a [`BooleanArray`] panicking if not possible
787    fn as_boolean(&self) -> &BooleanArray {
788        self.as_boolean_opt().expect("boolean array")
789    }
790
791    /// Downcast this to a [`PrimitiveArray`] returning `None` if not possible
792    fn as_primitive_opt<T: ArrowPrimitiveType>(&self) -> Option<&PrimitiveArray<T>>;
793
794    /// Downcast this to a [`PrimitiveArray`] panicking if not possible
795    fn as_primitive<T: ArrowPrimitiveType>(&self) -> &PrimitiveArray<T> {
796        self.as_primitive_opt().expect("primitive array")
797    }
798
799    /// Downcast this to a [`GenericByteArray`] returning `None` if not possible
800    fn as_bytes_opt<T: ByteArrayType>(&self) -> Option<&GenericByteArray<T>>;
801
802    /// Downcast this to a [`GenericByteArray`] panicking if not possible
803    fn as_bytes<T: ByteArrayType>(&self) -> &GenericByteArray<T> {
804        self.as_bytes_opt().expect("byte array")
805    }
806
807    /// Downcast this to a [`GenericStringArray`] returning `None` if not possible
808    fn as_string_opt<O: OffsetSizeTrait>(&self) -> Option<&GenericStringArray<O>> {
809        self.as_bytes_opt()
810    }
811
812    /// Downcast this to a [`GenericStringArray`] panicking if not possible
813    fn as_string<O: OffsetSizeTrait>(&self) -> &GenericStringArray<O> {
814        self.as_bytes_opt().expect("string array")
815    }
816
817    /// Downcast this to a [`GenericBinaryArray`] returning `None` if not possible
818    fn as_binary_opt<O: OffsetSizeTrait>(&self) -> Option<&GenericBinaryArray<O>> {
819        self.as_bytes_opt()
820    }
821
822    /// Downcast this to a [`GenericBinaryArray`] panicking if not possible
823    fn as_binary<O: OffsetSizeTrait>(&self) -> &GenericBinaryArray<O> {
824        self.as_bytes_opt().expect("binary array")
825    }
826
827    /// Downcast this to a [`StringViewArray`] returning `None` if not possible
828    fn as_string_view_opt(&self) -> Option<&StringViewArray> {
829        self.as_byte_view_opt()
830    }
831
832    /// Downcast this to a [`StringViewArray`] panicking if not possible
833    fn as_string_view(&self) -> &StringViewArray {
834        self.as_byte_view_opt().expect("string view array")
835    }
836
837    /// Downcast this to a [`BinaryViewArray`] returning `None` if not possible
838    fn as_binary_view_opt(&self) -> Option<&BinaryViewArray> {
839        self.as_byte_view_opt()
840    }
841
842    /// Downcast this to a [`BinaryViewArray`] panicking if not possible
843    fn as_binary_view(&self) -> &BinaryViewArray {
844        self.as_byte_view_opt().expect("binary view array")
845    }
846
847    /// Downcast this to a [`GenericByteViewArray`] returning `None` if not possible
848    fn as_byte_view_opt<T: ByteViewType>(&self) -> Option<&GenericByteViewArray<T>>;
849
850    /// Downcast this to a [`GenericByteViewArray`] panicking if not possible
851    fn as_byte_view<T: ByteViewType>(&self) -> &GenericByteViewArray<T> {
852        self.as_byte_view_opt().expect("byte view array")
853    }
854
855    /// Downcast this to a [`StructArray`] returning `None` if not possible
856    fn as_struct_opt(&self) -> Option<&StructArray>;
857
858    /// Downcast this to a [`StructArray`] panicking if not possible
859    fn as_struct(&self) -> &StructArray {
860        self.as_struct_opt().expect("struct array")
861    }
862
863    /// Downcast this to a [`UnionArray`] returning `None` if not possible
864    fn as_union_opt(&self) -> Option<&UnionArray>;
865
866    /// Downcast this to a [`UnionArray`] panicking if not possible
867    fn as_union(&self) -> &UnionArray {
868        self.as_union_opt().expect("union array")
869    }
870
871    /// Downcast this to a [`GenericListArray`] returning `None` if not possible
872    fn as_list_opt<O: OffsetSizeTrait>(&self) -> Option<&GenericListArray<O>>;
873
874    /// Downcast this to a [`GenericListArray`] panicking if not possible
875    fn as_list<O: OffsetSizeTrait>(&self) -> &GenericListArray<O> {
876        self.as_list_opt().expect("list array")
877    }
878
879    /// Downcast this to a [`GenericListViewArray`] returning `None` if not possible
880    fn as_list_view_opt<O: OffsetSizeTrait>(&self) -> Option<&GenericListViewArray<O>>;
881
882    /// Downcast this to a [`GenericListViewArray`] panicking if not possible
883    fn as_list_view<O: OffsetSizeTrait>(&self) -> &GenericListViewArray<O> {
884        self.as_list_view_opt().expect("list view array")
885    }
886
887    /// Downcast this to a [`FixedSizeBinaryArray`] returning `None` if not possible
888    fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray>;
889
890    /// Downcast this to a [`FixedSizeBinaryArray`] panicking if not possible
891    fn as_fixed_size_binary(&self) -> &FixedSizeBinaryArray {
892        self.as_fixed_size_binary_opt()
893            .expect("fixed size binary array")
894    }
895
896    /// Downcast this to a [`FixedSizeListArray`] returning `None` if not possible
897    fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray>;
898
899    /// Downcast this to a [`FixedSizeListArray`] panicking if not possible
900    fn as_fixed_size_list(&self) -> &FixedSizeListArray {
901        self.as_fixed_size_list_opt()
902            .expect("fixed size list array")
903    }
904
905    /// Downcast this to a [`MapArray`] returning `None` if not possible
906    fn as_map_opt(&self) -> Option<&MapArray>;
907
908    /// Downcast this to a [`MapArray`] panicking if not possible
909    fn as_map(&self) -> &MapArray {
910        self.as_map_opt().expect("map array")
911    }
912
913    /// Downcast this to a [`DictionaryArray`] returning `None` if not possible
914    fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>>;
915
916    /// Downcast this to a [`DictionaryArray`] panicking if not possible
917    fn as_dictionary<K: ArrowDictionaryKeyType>(&self) -> &DictionaryArray<K> {
918        self.as_dictionary_opt().expect("dictionary array")
919    }
920
921    /// Downcasts this to a [`AnyDictionaryArray`] returning `None` if not possible
922    fn as_any_dictionary_opt(&self) -> Option<&dyn AnyDictionaryArray>;
923
924    /// Downcasts this to a [`AnyDictionaryArray`] panicking if not possible
925    fn as_any_dictionary(&self) -> &dyn AnyDictionaryArray {
926        self.as_any_dictionary_opt().expect("any dictionary array")
927    }
928}
929
930impl private::Sealed for dyn Array + '_ {}
931impl AsArray for dyn Array + '_ {
932    fn as_boolean_opt(&self) -> Option<&BooleanArray> {
933        self.as_any().downcast_ref()
934    }
935
936    fn as_primitive_opt<T: ArrowPrimitiveType>(&self) -> Option<&PrimitiveArray<T>> {
937        self.as_any().downcast_ref()
938    }
939
940    fn as_bytes_opt<T: ByteArrayType>(&self) -> Option<&GenericByteArray<T>> {
941        self.as_any().downcast_ref()
942    }
943
944    fn as_byte_view_opt<T: ByteViewType>(&self) -> Option<&GenericByteViewArray<T>> {
945        self.as_any().downcast_ref()
946    }
947
948    fn as_struct_opt(&self) -> Option<&StructArray> {
949        self.as_any().downcast_ref()
950    }
951
952    fn as_union_opt(&self) -> Option<&UnionArray> {
953        self.as_any().downcast_ref()
954    }
955
956    fn as_list_opt<O: OffsetSizeTrait>(&self) -> Option<&GenericListArray<O>> {
957        self.as_any().downcast_ref()
958    }
959
960    fn as_list_view_opt<O: OffsetSizeTrait>(&self) -> Option<&GenericListViewArray<O>> {
961        self.as_any().downcast_ref()
962    }
963
964    fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray> {
965        self.as_any().downcast_ref()
966    }
967
968    fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray> {
969        self.as_any().downcast_ref()
970    }
971
972    fn as_map_opt(&self) -> Option<&MapArray> {
973        self.as_any().downcast_ref()
974    }
975
976    fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>> {
977        self.as_any().downcast_ref()
978    }
979
980    fn as_any_dictionary_opt(&self) -> Option<&dyn AnyDictionaryArray> {
981        let array = self;
982        downcast_dictionary_array! {
983            array => Some(array),
984            _ => None
985        }
986    }
987}
988
989impl private::Sealed for ArrayRef {}
990impl AsArray for ArrayRef {
991    fn as_boolean_opt(&self) -> Option<&BooleanArray> {
992        self.as_ref().as_boolean_opt()
993    }
994
995    fn as_primitive_opt<T: ArrowPrimitiveType>(&self) -> Option<&PrimitiveArray<T>> {
996        self.as_ref().as_primitive_opt()
997    }
998
999    fn as_bytes_opt<T: ByteArrayType>(&self) -> Option<&GenericByteArray<T>> {
1000        self.as_ref().as_bytes_opt()
1001    }
1002
1003    fn as_byte_view_opt<T: ByteViewType>(&self) -> Option<&GenericByteViewArray<T>> {
1004        self.as_ref().as_byte_view_opt()
1005    }
1006
1007    fn as_struct_opt(&self) -> Option<&StructArray> {
1008        self.as_ref().as_struct_opt()
1009    }
1010
1011    fn as_union_opt(&self) -> Option<&UnionArray> {
1012        self.as_any().downcast_ref()
1013    }
1014
1015    fn as_list_opt<O: OffsetSizeTrait>(&self) -> Option<&GenericListArray<O>> {
1016        self.as_ref().as_list_opt()
1017    }
1018
1019    fn as_list_view_opt<O: OffsetSizeTrait>(&self) -> Option<&GenericListViewArray<O>> {
1020        self.as_ref().as_list_view_opt()
1021    }
1022
1023    fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray> {
1024        self.as_ref().as_fixed_size_binary_opt()
1025    }
1026
1027    fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray> {
1028        self.as_ref().as_fixed_size_list_opt()
1029    }
1030
1031    fn as_map_opt(&self) -> Option<&MapArray> {
1032        self.as_any().downcast_ref()
1033    }
1034
1035    fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>> {
1036        self.as_ref().as_dictionary_opt()
1037    }
1038
1039    fn as_any_dictionary_opt(&self) -> Option<&dyn AnyDictionaryArray> {
1040        self.as_ref().as_any_dictionary_opt()
1041    }
1042}
1043
1044#[cfg(test)]
1045mod tests {
1046    use super::*;
1047    use arrow_buffer::i256;
1048    use arrow_schema::DataType;
1049    use std::sync::Arc;
1050
1051    #[test]
1052    fn test_as_primitive_array_ref() {
1053        let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1054        assert!(!as_primitive_array::<Int32Type>(&array).is_empty());
1055
1056        // should also work when wrapped in an Arc
1057        let array: ArrayRef = Arc::new(array);
1058        assert!(!as_primitive_array::<Int32Type>(&array).is_empty());
1059    }
1060
1061    #[test]
1062    fn test_as_string_array_ref() {
1063        let array: StringArray = vec!["foo", "bar"].into_iter().map(Some).collect();
1064        assert!(!as_string_array(&array).is_empty());
1065
1066        // should also work when wrapped in an Arc
1067        let array: ArrayRef = Arc::new(array);
1068        assert!(!as_string_array(&array).is_empty())
1069    }
1070
1071    #[test]
1072    fn test_decimal128array() {
1073        let a = Decimal128Array::from_iter_values([1, 2, 4, 5]);
1074        assert!(!as_primitive_array::<Decimal128Type>(&a).is_empty());
1075    }
1076
1077    #[test]
1078    fn test_decimal256array() {
1079        let a = Decimal256Array::from_iter_values([1, 2, 4, 5].into_iter().map(i256::from_i128));
1080        assert!(!as_primitive_array::<Decimal256Type>(&a).is_empty());
1081    }
1082
1083    #[test]
1084    fn downcast_integer_array_should_match_only_integers() {
1085        let i32_array: ArrayRef = Arc::new(Int32Array::new_null(1));
1086        let i32_array_ref = &i32_array;
1087        downcast_integer_array!(
1088            i32_array_ref => {
1089                assert_eq!(i32_array_ref.null_count(), 1);
1090            },
1091            _ => panic!("unexpected data type")
1092        );
1093    }
1094
1095    #[test]
1096    fn downcast_integer_array_should_not_match_primitive_that_are_not_integers() {
1097        let array: ArrayRef = Arc::new(Float32Array::new_null(1));
1098        let array_ref = &array;
1099        downcast_integer_array!(
1100            array_ref => {
1101                panic!("unexpected data type {}", array_ref.data_type())
1102            },
1103            DataType::Float32 => {
1104                assert_eq!(array_ref.null_count(), 1);
1105            },
1106            _ => panic!("unexpected data type")
1107        );
1108    }
1109
1110    #[test]
1111    fn downcast_integer_array_should_not_match_non_primitive() {
1112        let array: ArrayRef = Arc::new(StringArray::new_null(1));
1113        let array_ref = &array;
1114        downcast_integer_array!(
1115            array_ref => {
1116                panic!("unexpected data type {}", array_ref.data_type())
1117            },
1118            DataType::Utf8 => {
1119                assert_eq!(array_ref.null_count(), 1);
1120            },
1121            _ => panic!("unexpected data type")
1122        );
1123    }
1124}