arrow_arith/
aggregate.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines aggregations over Arrow arrays.
19
20use arrow_array::cast::*;
21use arrow_array::iterator::ArrayIter;
22use arrow_array::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer};
24use arrow_data::bit_iterator::try_for_each_valid_idx;
25use arrow_schema::*;
26use std::borrow::BorrowMut;
27use std::cmp::{self, Ordering};
28use std::ops::{BitAnd, BitOr, BitXor};
29use types::ByteViewType;
30
31/// An accumulator for primitive numeric values.
32trait NumericAccumulator<T: ArrowNativeTypeOp>: Copy + Default {
33    /// Accumulate a non-null value.
34    fn accumulate(&mut self, value: T);
35    /// Accumulate a nullable values.
36    /// If `valid` is false the `value` should not affect the accumulator state.
37    fn accumulate_nullable(&mut self, value: T, valid: bool);
38    /// Merge another accumulator into this accumulator
39    fn merge(&mut self, other: Self);
40    /// Return the aggregated value.
41    fn finish(&mut self) -> T;
42}
43
44/// Helper for branchlessly selecting either `a` or `b` based on the boolean `m`.
45/// After verifying the generated assembly this can be a simple `if`.
46#[inline(always)]
47fn select<T: Copy>(m: bool, a: T, b: T) -> T {
48    if m { a } else { b }
49}
50
51#[derive(Clone, Copy)]
52struct SumAccumulator<T: ArrowNativeTypeOp> {
53    sum: T,
54}
55
56impl<T: ArrowNativeTypeOp> Default for SumAccumulator<T> {
57    fn default() -> Self {
58        Self { sum: T::ZERO }
59    }
60}
61
62impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for SumAccumulator<T> {
63    fn accumulate(&mut self, value: T) {
64        self.sum = self.sum.add_wrapping(value);
65    }
66
67    fn accumulate_nullable(&mut self, value: T, valid: bool) {
68        let sum = self.sum;
69        self.sum = select(valid, sum.add_wrapping(value), sum)
70    }
71
72    fn merge(&mut self, other: Self) {
73        self.sum = self.sum.add_wrapping(other.sum);
74    }
75
76    fn finish(&mut self) -> T {
77        self.sum
78    }
79}
80
81#[derive(Clone, Copy)]
82struct MinAccumulator<T: ArrowNativeTypeOp> {
83    min: T,
84}
85
86impl<T: ArrowNativeTypeOp> Default for MinAccumulator<T> {
87    fn default() -> Self {
88        Self {
89            min: T::MAX_TOTAL_ORDER,
90        }
91    }
92}
93
94impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MinAccumulator<T> {
95    fn accumulate(&mut self, value: T) {
96        let min = self.min;
97        self.min = select(value.is_lt(min), value, min);
98    }
99
100    fn accumulate_nullable(&mut self, value: T, valid: bool) {
101        let min = self.min;
102        let is_lt = valid & value.is_lt(min);
103        self.min = select(is_lt, value, min);
104    }
105
106    fn merge(&mut self, other: Self) {
107        self.accumulate(other.min)
108    }
109
110    fn finish(&mut self) -> T {
111        self.min
112    }
113}
114
115#[derive(Clone, Copy)]
116struct MaxAccumulator<T: ArrowNativeTypeOp> {
117    max: T,
118}
119
120impl<T: ArrowNativeTypeOp> Default for MaxAccumulator<T> {
121    fn default() -> Self {
122        Self {
123            max: T::MIN_TOTAL_ORDER,
124        }
125    }
126}
127
128impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MaxAccumulator<T> {
129    fn accumulate(&mut self, value: T) {
130        let max = self.max;
131        self.max = select(value.is_gt(max), value, max);
132    }
133
134    fn accumulate_nullable(&mut self, value: T, valid: bool) {
135        let max = self.max;
136        let is_gt = value.is_gt(max) & valid;
137        self.max = select(is_gt, value, max);
138    }
139
140    fn merge(&mut self, other: Self) {
141        self.accumulate(other.max)
142    }
143
144    fn finish(&mut self) -> T {
145        self.max
146    }
147}
148
149fn reduce_accumulators<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
150    mut acc: [A; LANES],
151) -> A {
152    assert!(LANES > 0 && LANES.is_power_of_two());
153    let mut len = LANES;
154
155    // attempt at tree reduction, unfortunately llvm does not fully recognize this pattern,
156    // but the generated code is still a little faster than purely sequential reduction for floats.
157    while len >= 2 {
158        let mid = len / 2;
159        let (h, t) = acc[..len].split_at_mut(mid);
160
161        for i in 0..mid {
162            h[i].merge(t[i]);
163        }
164        len /= 2;
165    }
166    acc[0]
167}
168
169#[inline(always)]
170fn aggregate_nonnull_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
171    acc: &mut [A; LANES],
172    values: &[T; LANES],
173) {
174    for i in 0..LANES {
175        acc[i].accumulate(values[i]);
176    }
177}
178
179#[inline(always)]
180fn aggregate_nullable_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
181    acc: &mut [A; LANES],
182    values: &[T; LANES],
183    validity: u64,
184) {
185    let mut bit = 1;
186    for i in 0..LANES {
187        acc[i].accumulate_nullable(values[i], (validity & bit) != 0);
188        bit <<= 1;
189    }
190}
191
192fn aggregate_nonnull_simple<T: ArrowNativeTypeOp, A: NumericAccumulator<T>>(values: &[T]) -> T {
193    values
194        .iter()
195        .copied()
196        .fold(A::default(), |mut a, b| {
197            a.accumulate(b);
198            a
199        })
200        .finish()
201}
202
203#[inline(never)]
204fn aggregate_nonnull_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
205    values: &[T],
206) -> T {
207    // aggregating into multiple independent accumulators allows the compiler to use vector registers
208    // with a single accumulator the compiler would not be allowed to reorder floating point addition
209    let mut acc = [A::default(); LANES];
210    let mut chunks = values.chunks_exact(LANES);
211    chunks.borrow_mut().for_each(|chunk| {
212        aggregate_nonnull_chunk(&mut acc, chunk[..LANES].try_into().unwrap());
213    });
214
215    let remainder = chunks.remainder();
216    for i in 0..remainder.len() {
217        acc[i].accumulate(remainder[i]);
218    }
219
220    reduce_accumulators(acc).finish()
221}
222
223#[inline(never)]
224fn aggregate_nullable_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
225    values: &[T],
226    validity: &NullBuffer,
227) -> T {
228    assert!(LANES > 0 && 64 % LANES == 0);
229    assert_eq!(values.len(), validity.len());
230
231    // aggregating into multiple independent accumulators allows the compiler to use vector registers
232    let mut acc = [A::default(); LANES];
233    // we process 64 bits of validity at a time
234    let mut values_chunks = values.chunks_exact(64);
235    let validity_chunks = validity.inner().bit_chunks();
236    let mut validity_chunks_iter = validity_chunks.iter();
237
238    values_chunks.borrow_mut().for_each(|chunk| {
239        // Safety: we asserted that values and validity have the same length and trust the iterator impl
240        let mut validity = unsafe { validity_chunks_iter.next().unwrap_unchecked() };
241        // chunk further based on the number of vector lanes
242        chunk.chunks_exact(LANES).for_each(|chunk| {
243            aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity);
244            validity >>= LANES;
245        });
246    });
247
248    let remainder = values_chunks.remainder();
249    if !remainder.is_empty() {
250        let mut validity = validity_chunks.remainder_bits();
251
252        let mut remainder_chunks = remainder.chunks_exact(LANES);
253        remainder_chunks.borrow_mut().for_each(|chunk| {
254            aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity);
255            validity >>= LANES;
256        });
257
258        let remainder = remainder_chunks.remainder();
259        if !remainder.is_empty() {
260            let mut bit = 1;
261            for i in 0..remainder.len() {
262                acc[i].accumulate_nullable(remainder[i], (validity & bit) != 0);
263                bit <<= 1;
264            }
265        }
266    }
267
268    reduce_accumulators(acc).finish()
269}
270
271/// The preferred vector size in bytes for the target platform.
272/// Note that the avx512 target feature is still unstable and this also means it is not detected on stable rust.
273const PREFERRED_VECTOR_SIZE: usize =
274    if cfg!(all(target_arch = "x86_64", target_feature = "avx512f")) {
275        64
276    } else if cfg!(all(target_arch = "x86_64", target_feature = "avx")) {
277        32
278    } else {
279        16
280    };
281
282/// non-nullable aggregation requires fewer temporary registers so we can use more of them for accumulators
283const PREFERRED_VECTOR_SIZE_NON_NULL: usize = PREFERRED_VECTOR_SIZE * 2;
284
285/// Generic aggregation for any primitive type.
286/// Returns None if there are no non-null values in `array`.
287fn aggregate<T: ArrowNativeTypeOp, P: ArrowPrimitiveType<Native = T>, A: NumericAccumulator<T>>(
288    array: &PrimitiveArray<P>,
289) -> Option<T> {
290    let null_count = array.null_count();
291    if null_count == array.len() {
292        return None;
293    }
294    let values = array.values().as_ref();
295    match array.nulls() {
296        Some(nulls) if null_count > 0 => {
297            // const generics depending on a generic type parameter are not supported
298            // so we have to match and call aggregate with the corresponding constant
299            match PREFERRED_VECTOR_SIZE / std::mem::size_of::<T>() {
300                64 => Some(aggregate_nullable_lanes::<T, A, 64>(values, nulls)),
301                32 => Some(aggregate_nullable_lanes::<T, A, 32>(values, nulls)),
302                16 => Some(aggregate_nullable_lanes::<T, A, 16>(values, nulls)),
303                8 => Some(aggregate_nullable_lanes::<T, A, 8>(values, nulls)),
304                4 => Some(aggregate_nullable_lanes::<T, A, 4>(values, nulls)),
305                2 => Some(aggregate_nullable_lanes::<T, A, 2>(values, nulls)),
306                _ => Some(aggregate_nullable_lanes::<T, A, 1>(values, nulls)),
307            }
308        }
309        _ => {
310            let is_float = matches!(
311                array.data_type(),
312                DataType::Float16 | DataType::Float32 | DataType::Float64
313            );
314            if is_float {
315                match PREFERRED_VECTOR_SIZE_NON_NULL / std::mem::size_of::<T>() {
316                    64 => Some(aggregate_nonnull_lanes::<T, A, 64>(values)),
317                    32 => Some(aggregate_nonnull_lanes::<T, A, 32>(values)),
318                    16 => Some(aggregate_nonnull_lanes::<T, A, 16>(values)),
319                    8 => Some(aggregate_nonnull_lanes::<T, A, 8>(values)),
320                    4 => Some(aggregate_nonnull_lanes::<T, A, 4>(values)),
321                    2 => Some(aggregate_nonnull_lanes::<T, A, 2>(values)),
322                    _ => Some(aggregate_nonnull_simple::<T, A>(values)),
323                }
324            } else {
325                // for non-null integers its better to not chunk ourselves and instead
326                // let llvm fully handle loop unrolling and vectorization
327                Some(aggregate_nonnull_simple::<T, A>(values))
328            }
329        }
330    }
331}
332
333/// Returns the minimum value in the boolean array.
334///
335/// ```
336/// # use arrow_array::BooleanArray;
337/// # use arrow_arith::aggregate::min_boolean;
338///
339/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
340/// assert_eq!(min_boolean(&a), Some(false))
341/// ```
342pub fn min_boolean(array: &BooleanArray) -> Option<bool> {
343    // short circuit if all nulls / zero length array
344    if array.null_count() == array.len() {
345        return None;
346    }
347
348    // Note the min bool is false (0), so short circuit as soon as we see it
349    match array.nulls() {
350        None => {
351            let bit_chunks = array.values().bit_chunks();
352            if bit_chunks.iter().any(|x| {
353                // u64::MAX has all bits set, so if the value is not that, then there is a false
354                x != u64::MAX
355            }) {
356                return Some(false);
357            }
358            // If the remainder bits are not all set, then there is a false
359            if bit_chunks.remainder_bits().count_ones() as usize != bit_chunks.remainder_len() {
360                Some(false)
361            } else {
362                Some(true)
363            }
364        }
365        Some(nulls) => {
366            let validity_chunks = nulls.inner().bit_chunks();
367            let value_chunks = array.values().bit_chunks();
368
369            if value_chunks
370                .iter()
371                .zip(validity_chunks.iter())
372                .any(|(value, validity)| {
373                    // We are looking for a false value, but because applying the validity mask
374                    // can create a false for a true value (e.g. value: true, validity: false), we instead invert the value, so that we have to look for a true.
375                    (!value & validity) != 0
376                })
377            {
378                return Some(false);
379            }
380
381            // Same trick as above: Instead of looking for a false, we invert the value bits and look for a true
382            if (!value_chunks.remainder_bits() & validity_chunks.remainder_bits()) != 0 {
383                Some(false)
384            } else {
385                Some(true)
386            }
387        }
388    }
389}
390
391/// Returns the maximum value in the boolean array
392///
393/// ```
394/// # use arrow_array::BooleanArray;
395/// # use arrow_arith::aggregate::max_boolean;
396///
397/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
398/// assert_eq!(max_boolean(&a), Some(true))
399/// ```
400pub fn max_boolean(array: &BooleanArray) -> Option<bool> {
401    // short circuit if all nulls / zero length array
402    if array.null_count() == array.len() {
403        return None;
404    }
405
406    // Note the max bool is true (1), so short circuit as soon as we see it
407    match array.nulls() {
408        None => array
409            .values()
410            .bit_chunks()
411            .iter_padded()
412            // We found a true if any bit is set
413            .map(|x| x != 0)
414            .find(|b| *b)
415            .or(Some(false)),
416        Some(nulls) => {
417            let validity_chunks = nulls.inner().bit_chunks().iter_padded();
418            let value_chunks = array.values().bit_chunks().iter_padded();
419            value_chunks
420                .zip(validity_chunks)
421                // We found a true if the value bit is 1, AND the validity bit is 1 for any bits in the chunk
422                .map(|(value_bits, validity_bits)| (value_bits & validity_bits) != 0)
423                .find(|b| *b)
424                .or(Some(false))
425        }
426    }
427}
428
429/// Helper to compute min/max of [`ArrayAccessor`].
430fn min_max_helper<T, A: ArrayAccessor<Item = T>, F>(array: A, cmp: F) -> Option<T>
431where
432    F: Fn(&T, &T) -> bool,
433{
434    let null_count = array.null_count();
435    if null_count == array.len() {
436        None
437    } else if null_count == 0 {
438        // JUSTIFICATION
439        //  Benefit:  ~8% speedup
440        //  Soundness: `i` is always within the array bounds
441        (0..array.len())
442            .map(|i| unsafe { array.value_unchecked(i) })
443            .reduce(|acc, item| if cmp(&acc, &item) { item } else { acc })
444    } else {
445        let nulls = array.nulls().unwrap();
446        unsafe {
447            let idx = nulls.valid_indices().reduce(|acc_idx, idx| {
448                let acc = array.value_unchecked(acc_idx);
449                let item = array.value_unchecked(idx);
450                if cmp(&acc, &item) { idx } else { acc_idx }
451            });
452            idx.map(|idx| array.value_unchecked(idx))
453        }
454    }
455}
456
457/// Helper to compute min/max of [`GenericByteViewArray<T>`].
458/// The specialized min/max leverages the inlined values to compare the byte views.
459/// `swap_cond` is the condition to swap current min/max with the new value.
460/// For example, `Ordering::Greater` for max and `Ordering::Less` for min.
461fn min_max_view_helper<T: ByteViewType>(
462    array: &GenericByteViewArray<T>,
463    swap_cond: cmp::Ordering,
464) -> Option<&T::Native> {
465    let null_count = array.null_count();
466    if null_count == array.len() {
467        None
468    } else if null_count == 0 {
469        let target_idx = (0..array.len()).reduce(|acc, item| {
470            // SAFETY:  array's length is correct so item is within bounds
471            let cmp = unsafe { GenericByteViewArray::compare_unchecked(array, item, array, acc) };
472            if cmp == swap_cond { item } else { acc }
473        });
474        // SAFETY: idx came from valid range `0..array.len()`
475        unsafe { target_idx.map(|idx| array.value_unchecked(idx)) }
476    } else {
477        let nulls = array.nulls().unwrap();
478
479        let target_idx = nulls.valid_indices().reduce(|acc_idx, idx| {
480            let cmp =
481                unsafe { GenericByteViewArray::compare_unchecked(array, idx, array, acc_idx) };
482            if cmp == swap_cond { idx } else { acc_idx }
483        });
484
485        // SAFETY: idx came from valid range `0..array.len()`
486        unsafe { target_idx.map(|idx| array.value_unchecked(idx)) }
487    }
488}
489
490/// Returns the maximum value in the binary array, according to the natural order.
491pub fn max_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
492    min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
493}
494
495/// Returns the maximum value in the binary view array, according to the natural order.
496pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
497    min_max_view_helper(array, Ordering::Greater)
498}
499
500/// Returns the maximum value in the fixed size binary array, according to the natural order.
501pub fn max_fixed_size_binary(array: &FixedSizeBinaryArray) -> Option<&[u8]> {
502    min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
503}
504
505/// Returns the minimum value in the binary array, according to the natural order.
506pub fn min_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
507    min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
508}
509
510/// Returns the minimum value in the binary view array, according to the natural order.
511pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
512    min_max_view_helper(array, Ordering::Less)
513}
514
515/// Returns the minimum value in the fixed size binary array, according to the natural order.
516pub fn min_fixed_size_binary(array: &FixedSizeBinaryArray) -> Option<&[u8]> {
517    min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
518}
519
520/// Returns the maximum value in the string array, according to the natural order.
521pub fn max_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
522    min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
523}
524
525/// Returns the maximum value in the string view array, according to the natural order.
526pub fn max_string_view(array: &StringViewArray) -> Option<&str> {
527    min_max_view_helper(array, Ordering::Greater)
528}
529
530/// Returns the minimum value in the string array, according to the natural order.
531pub fn min_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
532    min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
533}
534
535/// Returns the minimum value in the string view array, according to the natural order.
536pub fn min_string_view(array: &StringViewArray) -> Option<&str> {
537    min_max_view_helper(array, Ordering::Less)
538}
539
540/// Returns the sum of values in the array.
541///
542/// This doesn't detect overflow. Once overflowing, the result will wrap around.
543/// For an overflow-checking variant, use `sum_array_checked` instead.
544pub fn sum_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
545where
546    T: ArrowNumericType,
547    T::Native: ArrowNativeTypeOp,
548{
549    match array.data_type() {
550        DataType::Dictionary(_, _) => {
551            let null_count = array.null_count();
552
553            if null_count == array.len() {
554                return None;
555            }
556
557            let iter = ArrayIter::new(array);
558            let sum = iter
559                .into_iter()
560                .fold(T::default_value(), |accumulator, value| {
561                    if let Some(value) = value {
562                        accumulator.add_wrapping(value)
563                    } else {
564                        accumulator
565                    }
566                });
567
568            Some(sum)
569        }
570        _ => sum::<T>(as_primitive_array(&array)),
571    }
572}
573
574/// Returns the sum of values in the array.
575///
576/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
577/// use `sum_array` instead.
578pub fn sum_array_checked<T, A: ArrayAccessor<Item = T::Native>>(
579    array: A,
580) -> Result<Option<T::Native>, ArrowError>
581where
582    T: ArrowNumericType,
583    T::Native: ArrowNativeTypeOp,
584{
585    match array.data_type() {
586        DataType::Dictionary(_, _) => {
587            let null_count = array.null_count();
588
589            if null_count == array.len() {
590                return Ok(None);
591            }
592
593            let iter = ArrayIter::new(array);
594            let sum = iter
595                .into_iter()
596                .try_fold(T::default_value(), |accumulator, value| {
597                    if let Some(value) = value {
598                        accumulator.add_checked(value)
599                    } else {
600                        Ok(accumulator)
601                    }
602                })?;
603
604            Ok(Some(sum))
605        }
606        _ => sum_checked::<T>(as_primitive_array(&array)),
607    }
608}
609
610/// Returns the min of values in the array of `ArrowNumericType` type, or dictionary
611/// array with value of `ArrowNumericType` type.
612pub fn min_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
613where
614    T: ArrowNumericType,
615    T::Native: ArrowNativeType,
616{
617    min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_gt(*b), min)
618}
619
620/// Returns the max of values in the array of `ArrowNumericType` type, or dictionary
621/// array with value of `ArrowNumericType` type.
622pub fn max_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
623where
624    T: ArrowNumericType,
625    T::Native: ArrowNativeTypeOp,
626{
627    min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_lt(*b), max)
628}
629
630fn min_max_array_helper<T, A: ArrayAccessor<Item = T::Native>, F, M>(
631    array: A,
632    cmp: F,
633    m: M,
634) -> Option<T::Native>
635where
636    T: ArrowNumericType,
637    F: Fn(&T::Native, &T::Native) -> bool,
638    M: Fn(&PrimitiveArray<T>) -> Option<T::Native>,
639{
640    match array.data_type() {
641        DataType::Dictionary(_, _) => min_max_helper::<T::Native, _, _>(array, cmp),
642        _ => m(as_primitive_array(&array)),
643    }
644}
645
646macro_rules! bit_operation {
647    ($NAME:ident, $OP:ident, $NATIVE:ident, $DEFAULT:expr, $DOC:expr) => {
648        #[doc = $DOC]
649        ///
650        /// Returns `None` if the array is empty or only contains null values.
651        pub fn $NAME<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
652        where
653            T: ArrowNumericType,
654            T::Native: $NATIVE<Output = T::Native> + ArrowNativeTypeOp,
655        {
656            let default;
657            if $DEFAULT == -1 {
658                default = T::Native::ONE.neg_wrapping();
659            } else {
660                default = T::default_value();
661            }
662
663            let null_count = array.null_count();
664
665            if null_count == array.len() {
666                return None;
667            }
668
669            let data: &[T::Native] = array.values();
670
671            match array.nulls() {
672                None => {
673                    let result = data
674                        .iter()
675                        .fold(default, |accumulator, value| accumulator.$OP(*value));
676
677                    Some(result)
678                }
679                Some(nulls) => {
680                    let mut result = default;
681                    let data_chunks = data.chunks_exact(64);
682                    let remainder = data_chunks.remainder();
683
684                    let bit_chunks = nulls.inner().bit_chunks();
685                    data_chunks
686                        .zip(bit_chunks.iter())
687                        .for_each(|(chunk, mask)| {
688                            // index_mask has value 1 << i in the loop
689                            let mut index_mask = 1;
690                            chunk.iter().for_each(|value| {
691                                if (mask & index_mask) != 0 {
692                                    result = result.$OP(*value);
693                                }
694                                index_mask <<= 1;
695                            });
696                        });
697
698                    let remainder_bits = bit_chunks.remainder_bits();
699
700                    remainder.iter().enumerate().for_each(|(i, value)| {
701                        if remainder_bits & (1 << i) != 0 {
702                            result = result.$OP(*value);
703                        }
704                    });
705
706                    Some(result)
707                }
708            }
709        }
710    };
711}
712
713bit_operation!(
714    bit_and,
715    bitand,
716    BitAnd,
717    -1,
718    "Returns the bitwise and of all non-null input values."
719);
720bit_operation!(
721    bit_or,
722    bitor,
723    BitOr,
724    0,
725    "Returns the bitwise or of all non-null input values."
726);
727bit_operation!(
728    bit_xor,
729    bitxor,
730    BitXor,
731    0,
732    "Returns the bitwise xor of all non-null input values."
733);
734
735/// Returns true if all non-null input values are true, otherwise false.
736///
737/// Returns `None` if the array is empty or only contains null values.
738pub fn bool_and(array: &BooleanArray) -> Option<bool> {
739    min_boolean(array)
740}
741
742/// Returns true if any non-null input value is true, otherwise false.
743///
744/// Returns `None` if the array is empty or only contains null values.
745pub fn bool_or(array: &BooleanArray) -> Option<bool> {
746    max_boolean(array)
747}
748
749/// Returns the sum of values in the primitive array.
750///
751/// Returns `Ok(None)` if the array is empty or only contains null values.
752///
753/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
754/// use `sum` instead.
755pub fn sum_checked<T>(array: &PrimitiveArray<T>) -> Result<Option<T::Native>, ArrowError>
756where
757    T: ArrowNumericType,
758    T::Native: ArrowNativeTypeOp,
759{
760    let null_count = array.null_count();
761
762    if null_count == array.len() {
763        return Ok(None);
764    }
765
766    let data: &[T::Native] = array.values();
767
768    match array.nulls() {
769        None => {
770            let sum = data
771                .iter()
772                .try_fold(T::default_value(), |accumulator, value| {
773                    accumulator.add_checked(*value)
774                })?;
775
776            Ok(Some(sum))
777        }
778        Some(nulls) => {
779            let mut sum = T::default_value();
780
781            try_for_each_valid_idx(
782                nulls.len(),
783                nulls.offset(),
784                nulls.null_count(),
785                Some(nulls.validity()),
786                |idx| {
787                    unsafe { sum = sum.add_checked(array.value_unchecked(idx))? };
788                    Ok::<_, ArrowError>(())
789                },
790            )?;
791
792            Ok(Some(sum))
793        }
794    }
795}
796
797/// Returns the sum of values in the primitive array.
798///
799/// Returns `None` if the array is empty or only contains null values.
800///
801/// This doesn't detect overflow in release mode by default. Once overflowing, the result will
802/// wrap around. For an overflow-checking variant, use `sum_checked` instead.
803pub fn sum<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
804where
805    T::Native: ArrowNativeTypeOp,
806{
807    aggregate::<T::Native, T, SumAccumulator<T::Native>>(array)
808}
809
810/// Returns the minimum value in the array, according to the natural order.
811/// For floating point arrays any NaN values are considered to be greater than any other non-null value
812pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
813where
814    T::Native: PartialOrd,
815{
816    aggregate::<T::Native, T, MinAccumulator<T::Native>>(array)
817}
818
819/// Returns the maximum value in the array, according to the natural order.
820/// For floating point arrays any NaN values are considered to be greater than any other non-null value
821pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
822where
823    T::Native: PartialOrd,
824{
825    aggregate::<T::Native, T, MaxAccumulator<T::Native>>(array)
826}
827
828#[cfg(test)]
829mod tests {
830    use super::*;
831    use arrow_array::types::*;
832    use builder::BooleanBuilder;
833    use std::sync::Arc;
834
835    #[test]
836    fn test_primitive_array_sum() {
837        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
838        assert_eq!(15, sum(&a).unwrap());
839    }
840
841    #[test]
842    fn test_primitive_array_float_sum() {
843        let a = Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
844        assert_eq!(16.5, sum(&a).unwrap());
845    }
846
847    #[test]
848    fn test_primitive_array_sum_with_nulls() {
849        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
850        assert_eq!(10, sum(&a).unwrap());
851    }
852
853    #[test]
854    fn test_primitive_array_sum_all_nulls() {
855        let a = Int32Array::from(vec![None, None, None]);
856        assert_eq!(None, sum(&a));
857    }
858
859    #[test]
860    fn test_primitive_array_sum_large_float_64() {
861        let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), None);
862        assert_eq!(Some((1..=100).sum::<i64>() as f64), sum(&c));
863
864        // create an array that actually has non-zero values at the invalid indices
865        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
866        let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), Some(validity));
867
868        assert_eq!(
869            Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f64),
870            sum(&c)
871        );
872    }
873
874    #[test]
875    fn test_primitive_array_sum_large_float_32() {
876        let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), None);
877        assert_eq!(Some((1..=100).sum::<i64>() as f32), sum(&c));
878
879        // create an array that actually has non-zero values at the invalid indices
880        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
881        let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), Some(validity));
882
883        assert_eq!(
884            Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f32),
885            sum(&c)
886        );
887    }
888
889    #[test]
890    fn test_primitive_array_sum_large_64() {
891        let c = Int64Array::new((1..=100).collect(), None);
892        assert_eq!(Some((1..=100).sum()), sum(&c));
893
894        // create an array that actually has non-zero values at the invalid indices
895        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
896        let c = Int64Array::new((1..=100).collect(), Some(validity));
897
898        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
899    }
900
901    #[test]
902    fn test_primitive_array_sum_large_32() {
903        let c = Int32Array::new((1..=100).collect(), None);
904        assert_eq!(Some((1..=100).sum()), sum(&c));
905
906        // create an array that actually has non-zero values at the invalid indices
907        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
908        let c = Int32Array::new((1..=100).collect(), Some(validity));
909        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
910    }
911
912    #[test]
913    fn test_primitive_array_sum_large_16() {
914        let c = Int16Array::new((1..=100).collect(), None);
915        assert_eq!(Some((1..=100).sum()), sum(&c));
916
917        // create an array that actually has non-zero values at the invalid indices
918        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
919        let c = Int16Array::new((1..=100).collect(), Some(validity));
920        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
921    }
922
923    #[test]
924    fn test_primitive_array_sum_large_8() {
925        let c = UInt8Array::new((1..=100).collect(), None);
926        assert_eq!(
927            Some((1..=100).fold(0_u8, |a, x| a.wrapping_add(x))),
928            sum(&c)
929        );
930
931        // create an array that actually has non-zero values at the invalid indices
932        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
933        let c = UInt8Array::new((1..=100).collect(), Some(validity));
934        assert_eq!(
935            Some(
936                (1..=100)
937                    .filter(|i| i % 3 == 0)
938                    .fold(0_u8, |a, x| a.wrapping_add(x))
939            ),
940            sum(&c)
941        );
942    }
943
944    #[test]
945    fn test_primitive_array_bit_and() {
946        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
947        assert_eq!(0, bit_and(&a).unwrap());
948    }
949
950    #[test]
951    fn test_primitive_array_bit_and_with_nulls() {
952        let a = Int32Array::from(vec![None, Some(2), Some(3), None, None]);
953        assert_eq!(2, bit_and(&a).unwrap());
954    }
955
956    #[test]
957    fn test_primitive_array_bit_and_all_nulls() {
958        let a = Int32Array::from(vec![None, None, None]);
959        assert_eq!(None, bit_and(&a));
960    }
961
962    #[test]
963    fn test_primitive_array_bit_or() {
964        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
965        assert_eq!(7, bit_or(&a).unwrap());
966    }
967
968    #[test]
969    fn test_primitive_array_bit_or_with_nulls() {
970        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
971        assert_eq!(7, bit_or(&a).unwrap());
972    }
973
974    #[test]
975    fn test_primitive_array_bit_or_all_nulls() {
976        let a = Int32Array::from(vec![None, None, None]);
977        assert_eq!(None, bit_or(&a));
978    }
979
980    #[test]
981    fn test_primitive_array_bit_xor() {
982        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
983        assert_eq!(1, bit_xor(&a).unwrap());
984    }
985
986    #[test]
987    fn test_primitive_array_bit_xor_with_nulls() {
988        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
989        assert_eq!(4, bit_xor(&a).unwrap());
990    }
991
992    #[test]
993    fn test_primitive_array_bit_xor_all_nulls() {
994        let a = Int32Array::from(vec![None, None, None]);
995        assert_eq!(None, bit_xor(&a));
996    }
997
998    #[test]
999    fn test_primitive_array_bool_and() {
1000        let a = BooleanArray::from(vec![true, false, true, false, true]);
1001        assert!(!bool_and(&a).unwrap());
1002    }
1003
1004    #[test]
1005    fn test_primitive_array_bool_and_with_nulls() {
1006        let a = BooleanArray::from(vec![None, Some(true), Some(true), None, Some(true)]);
1007        assert!(bool_and(&a).unwrap());
1008    }
1009
1010    #[test]
1011    fn test_primitive_array_bool_and_all_nulls() {
1012        let a = BooleanArray::from(vec![None, None, None]);
1013        assert_eq!(None, bool_and(&a));
1014    }
1015
1016    #[test]
1017    fn test_primitive_array_bool_or() {
1018        let a = BooleanArray::from(vec![true, false, true, false, true]);
1019        assert!(bool_or(&a).unwrap());
1020    }
1021
1022    #[test]
1023    fn test_primitive_array_bool_or_with_nulls() {
1024        let a = BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]);
1025        assert!(!bool_or(&a).unwrap());
1026    }
1027
1028    #[test]
1029    fn test_primitive_array_bool_or_all_nulls() {
1030        let a = BooleanArray::from(vec![None, None, None]);
1031        assert_eq!(None, bool_or(&a));
1032    }
1033
1034    #[test]
1035    fn test_primitive_array_min_max() {
1036        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
1037        assert_eq!(5, min(&a).unwrap());
1038        assert_eq!(9, max(&a).unwrap());
1039    }
1040
1041    #[test]
1042    fn test_primitive_array_min_max_with_nulls() {
1043        let a = Int32Array::from(vec![Some(5), None, None, Some(8), Some(9)]);
1044        assert_eq!(5, min(&a).unwrap());
1045        assert_eq!(9, max(&a).unwrap());
1046    }
1047
1048    #[test]
1049    fn test_primitive_min_max_1() {
1050        let a = Int32Array::from(vec![None, None, Some(5), Some(2)]);
1051        assert_eq!(Some(2), min(&a));
1052        assert_eq!(Some(5), max(&a));
1053    }
1054
1055    #[test]
1056    fn test_primitive_min_max_float_large_nonnull_array() {
1057        let a: Float64Array = (0..256).map(|i| Some((i + 1) as f64)).collect();
1058        // min/max are on boundaries of chunked data
1059        assert_eq!(Some(1.0), min(&a));
1060        assert_eq!(Some(256.0), max(&a));
1061
1062        // max is last value in remainder after chunking
1063        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
1064        assert_eq!(Some(255.0), max(&a));
1065
1066        // max is first value in remainder after chunking
1067        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
1068        assert_eq!(Some(257.0), max(&a));
1069    }
1070
1071    #[test]
1072    fn test_primitive_min_max_float_large_nullable_array() {
1073        let a: Float64Array = (0..256)
1074            .map(|i| {
1075                if (i + 1) % 3 == 0 {
1076                    None
1077                } else {
1078                    Some((i + 1) as f64)
1079                }
1080            })
1081            .collect();
1082        // min/max are on boundaries of chunked data
1083        assert_eq!(Some(1.0), min(&a));
1084        assert_eq!(Some(256.0), max(&a));
1085
1086        let a: Float64Array = (0..256)
1087            .map(|i| {
1088                if i == 0 || i == 255 {
1089                    None
1090                } else {
1091                    Some((i + 1) as f64)
1092                }
1093            })
1094            .collect();
1095        // boundaries of chunked data are null
1096        assert_eq!(Some(2.0), min(&a));
1097        assert_eq!(Some(255.0), max(&a));
1098
1099        let a: Float64Array = (0..256)
1100            .map(|i| if i != 100 { None } else { Some((i) as f64) })
1101            .collect();
1102        // a single non-null value somewhere in the middle
1103        assert_eq!(Some(100.0), min(&a));
1104        assert_eq!(Some(100.0), max(&a));
1105
1106        // max is last value in remainder after chunking
1107        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
1108        assert_eq!(Some(255.0), max(&a));
1109
1110        // max is first value in remainder after chunking
1111        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
1112        assert_eq!(Some(257.0), max(&a));
1113    }
1114
1115    #[test]
1116    fn test_primitive_min_max_float_edge_cases() {
1117        let a: Float64Array = (0..100).map(|_| Some(f64::NEG_INFINITY)).collect();
1118        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
1119        assert_eq!(Some(f64::NEG_INFINITY), max(&a));
1120
1121        let a: Float64Array = (0..100).map(|_| Some(f64::MIN)).collect();
1122        assert_eq!(Some(f64::MIN), min(&a));
1123        assert_eq!(Some(f64::MIN), max(&a));
1124
1125        let a: Float64Array = (0..100).map(|_| Some(f64::MAX)).collect();
1126        assert_eq!(Some(f64::MAX), min(&a));
1127        assert_eq!(Some(f64::MAX), max(&a));
1128
1129        let a: Float64Array = (0..100).map(|_| Some(f64::INFINITY)).collect();
1130        assert_eq!(Some(f64::INFINITY), min(&a));
1131        assert_eq!(Some(f64::INFINITY), max(&a));
1132    }
1133
1134    #[test]
1135    fn test_primitive_min_max_float_all_nans_non_null() {
1136        let a: Float64Array = (0..100).map(|_| Some(f64::NAN)).collect();
1137        assert!(max(&a).unwrap().is_nan());
1138        assert!(min(&a).unwrap().is_nan());
1139    }
1140
1141    #[test]
1142    fn test_primitive_min_max_float_negative_nan() {
1143        let a: Float64Array =
1144            Float64Array::from(vec![f64::NEG_INFINITY, f64::NAN, f64::INFINITY, -f64::NAN]);
1145        let max = max(&a).unwrap();
1146        let min = min(&a).unwrap();
1147        assert!(max.is_nan());
1148        assert!(max.is_sign_positive());
1149
1150        assert!(min.is_nan());
1151        assert!(min.is_sign_negative());
1152    }
1153
1154    #[test]
1155    fn test_primitive_min_max_float_first_nan_nonnull() {
1156        let a: Float64Array = (0..100)
1157            .map(|i| {
1158                if i == 0 {
1159                    Some(f64::NAN)
1160                } else {
1161                    Some(i as f64)
1162                }
1163            })
1164            .collect();
1165        assert_eq!(Some(1.0), min(&a));
1166        assert!(max(&a).unwrap().is_nan());
1167    }
1168
1169    #[test]
1170    fn test_primitive_min_max_float_last_nan_nonnull() {
1171        let a: Float64Array = (0..100)
1172            .map(|i| {
1173                if i == 99 {
1174                    Some(f64::NAN)
1175                } else {
1176                    Some((i + 1) as f64)
1177                }
1178            })
1179            .collect();
1180        assert_eq!(Some(1.0), min(&a));
1181        assert!(max(&a).unwrap().is_nan());
1182    }
1183
1184    #[test]
1185    fn test_primitive_min_max_float_first_nan_nullable() {
1186        let a: Float64Array = (0..100)
1187            .map(|i| {
1188                if i == 0 {
1189                    Some(f64::NAN)
1190                } else if i % 2 == 0 {
1191                    None
1192                } else {
1193                    Some(i as f64)
1194                }
1195            })
1196            .collect();
1197        assert_eq!(Some(1.0), min(&a));
1198        assert!(max(&a).unwrap().is_nan());
1199    }
1200
1201    #[test]
1202    fn test_primitive_min_max_float_last_nan_nullable() {
1203        let a: Float64Array = (0..100)
1204            .map(|i| {
1205                if i == 99 {
1206                    Some(f64::NAN)
1207                } else if i % 2 == 0 {
1208                    None
1209                } else {
1210                    Some(i as f64)
1211                }
1212            })
1213            .collect();
1214        assert_eq!(Some(1.0), min(&a));
1215        assert!(max(&a).unwrap().is_nan());
1216    }
1217
1218    #[test]
1219    fn test_primitive_min_max_float_inf_and_nans() {
1220        let a: Float64Array = (0..100)
1221            .map(|i| {
1222                let x = match i % 10 {
1223                    0 => f64::NEG_INFINITY,
1224                    1 => f64::MIN,
1225                    2 => f64::MAX,
1226                    4 => f64::INFINITY,
1227                    5 => f64::NAN,
1228                    _ => i as f64,
1229                };
1230                Some(x)
1231            })
1232            .collect();
1233        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
1234        assert!(max(&a).unwrap().is_nan());
1235    }
1236
1237    fn pad_inputs_and_test_fixed_size_binary(
1238        input: Vec<Option<&[u8]>>,
1239        expected_min: Option<&[u8]>,
1240        expected_max: Option<&[u8]>,
1241    ) {
1242        fn pad_slice(slice: &[u8], len: usize) -> Vec<u8> {
1243            let mut padded = vec![0; len];
1244            padded[..slice.len()].copy_from_slice(slice);
1245            padded
1246        }
1247
1248        let max_len = input
1249            .iter()
1250            .filter_map(|x| x.as_ref().map(|b| b.len()))
1251            .max()
1252            .unwrap_or(0);
1253        let padded_input = input
1254            .iter()
1255            .map(|x| x.as_ref().map(|b| pad_slice(b, max_len)));
1256        let input_arr =
1257            FixedSizeBinaryArray::try_from_sparse_iter_with_size(padded_input, max_len as i32)
1258                .unwrap();
1259        let padded_expected_min = expected_min.map(|b| pad_slice(b, max_len));
1260        let padded_expected_max = expected_max.map(|b| pad_slice(b, max_len));
1261
1262        assert_eq!(
1263            padded_expected_min.as_deref(),
1264            min_fixed_size_binary(&input_arr)
1265        );
1266        assert_eq!(
1267            padded_expected_max.as_deref(),
1268            max_fixed_size_binary(&input_arr)
1269        );
1270    }
1271
1272    macro_rules! test_binary {
1273        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1274            #[test]
1275            fn $NAME() {
1276                let binary = BinaryArray::from($ARRAY);
1277                assert_eq!($EXPECTED_MIN, min_binary(&binary));
1278                assert_eq!($EXPECTED_MAX, max_binary(&binary));
1279
1280                let large_binary = LargeBinaryArray::from($ARRAY);
1281                assert_eq!($EXPECTED_MIN, min_binary(&large_binary));
1282                assert_eq!($EXPECTED_MAX, max_binary(&large_binary));
1283
1284                let binary_view = BinaryViewArray::from($ARRAY);
1285                assert_eq!($EXPECTED_MIN, min_binary_view(&binary_view));
1286                assert_eq!($EXPECTED_MAX, max_binary_view(&binary_view));
1287
1288                pad_inputs_and_test_fixed_size_binary($ARRAY, $EXPECTED_MIN, $EXPECTED_MAX);
1289            }
1290        };
1291    }
1292
1293    test_binary!(
1294        test_binary_min_max_with_nulls,
1295        vec![
1296            Some("b01234567890123".as_bytes()), // long bytes
1297            None,
1298            None,
1299            Some(b"a"),
1300            Some(b"c"),
1301            Some(b"abcdedfg0123456"),
1302        ],
1303        Some("a".as_bytes()),
1304        Some("c".as_bytes())
1305    );
1306
1307    test_binary!(
1308        test_binary_min_max_no_null,
1309        vec![
1310            Some("b".as_bytes()),
1311            Some(b"abcdefghijklmnopqrst"), // long bytes
1312            Some(b"c"),
1313            Some(b"b01234567890123"), // long bytes for view types
1314        ],
1315        Some("abcdefghijklmnopqrst".as_bytes()),
1316        Some("c".as_bytes())
1317    );
1318
1319    test_binary!(test_binary_min_max_all_nulls, vec![None, None], None, None);
1320
1321    test_binary!(
1322        test_binary_min_max_1,
1323        vec![
1324            None,
1325            Some("b01234567890123435".as_bytes()), // long bytes for view types
1326            None,
1327            Some(b"b0123xxxxxxxxxxx"),
1328            Some(b"a")
1329        ],
1330        Some("a".as_bytes()),
1331        Some("b0123xxxxxxxxxxx".as_bytes())
1332    );
1333
1334    macro_rules! test_string {
1335        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1336            #[test]
1337            fn $NAME() {
1338                let string = StringArray::from($ARRAY);
1339                assert_eq!($EXPECTED_MIN, min_string(&string));
1340                assert_eq!($EXPECTED_MAX, max_string(&string));
1341
1342                let large_string = LargeStringArray::from($ARRAY);
1343                assert_eq!($EXPECTED_MIN, min_string(&large_string));
1344                assert_eq!($EXPECTED_MAX, max_string(&large_string));
1345
1346                let string_view = StringViewArray::from($ARRAY);
1347                assert_eq!($EXPECTED_MIN, min_string_view(&string_view));
1348                assert_eq!($EXPECTED_MAX, max_string_view(&string_view));
1349            }
1350        };
1351    }
1352
1353    test_string!(
1354        test_string_min_max_with_nulls,
1355        vec![
1356            Some("b012345678901234"), // long bytes for view types
1357            None,
1358            None,
1359            Some("a"),
1360            Some("c"),
1361            Some("b0123xxxxxxxxxxx")
1362        ],
1363        Some("a"),
1364        Some("c")
1365    );
1366
1367    test_string!(
1368        test_string_min_max_no_null,
1369        vec![
1370            Some("b"),
1371            Some("b012345678901234"), // long bytes for view types
1372            Some("a"),
1373            Some("b012xxxxxxxxxxxx")
1374        ],
1375        Some("a"),
1376        Some("b012xxxxxxxxxxxx")
1377    );
1378
1379    test_string!(
1380        test_string_min_max_all_nulls,
1381        Vec::<Option<&str>>::from_iter([None, None]),
1382        None,
1383        None
1384    );
1385
1386    test_string!(
1387        test_string_min_max_1,
1388        vec![
1389            None,
1390            Some("c12345678901234"), // long bytes for view types
1391            None,
1392            Some("b"),
1393            Some("c1234xxxxxxxxxx")
1394        ],
1395        Some("b"),
1396        Some("c1234xxxxxxxxxx")
1397    );
1398
1399    test_string!(
1400        test_string_min_max_empty,
1401        Vec::<Option<&str>>::new(),
1402        None,
1403        None
1404    );
1405
1406    #[test]
1407    fn test_boolean_min_max_empty() {
1408        let a = BooleanArray::from(vec![] as Vec<Option<bool>>);
1409        assert_eq!(None, min_boolean(&a));
1410        assert_eq!(None, max_boolean(&a));
1411    }
1412
1413    #[test]
1414    fn test_boolean_min_max_all_null() {
1415        let a = BooleanArray::from(vec![None, None]);
1416        assert_eq!(None, min_boolean(&a));
1417        assert_eq!(None, max_boolean(&a));
1418    }
1419
1420    #[test]
1421    fn test_boolean_min_max_no_null() {
1422        let a = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
1423        assert_eq!(Some(false), min_boolean(&a));
1424        assert_eq!(Some(true), max_boolean(&a));
1425    }
1426
1427    #[test]
1428    fn test_boolean_min_max() {
1429        let a = BooleanArray::from(vec![Some(true), Some(true), None, Some(false), None]);
1430        assert_eq!(Some(false), min_boolean(&a));
1431        assert_eq!(Some(true), max_boolean(&a));
1432
1433        let a = BooleanArray::from(vec![None, Some(true), None, Some(false), None]);
1434        assert_eq!(Some(false), min_boolean(&a));
1435        assert_eq!(Some(true), max_boolean(&a));
1436
1437        let a = BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
1438        assert_eq!(Some(false), min_boolean(&a));
1439        assert_eq!(Some(true), max_boolean(&a));
1440
1441        let a = BooleanArray::from(vec![Some(true), None]);
1442        assert_eq!(Some(true), min_boolean(&a));
1443        assert_eq!(Some(true), max_boolean(&a));
1444
1445        let a = BooleanArray::from(vec![Some(false), None]);
1446        assert_eq!(Some(false), min_boolean(&a));
1447        assert_eq!(Some(false), max_boolean(&a));
1448
1449        let a = BooleanArray::from(vec![Some(true)]);
1450        assert_eq!(Some(true), min_boolean(&a));
1451        assert_eq!(Some(true), max_boolean(&a));
1452
1453        let a = BooleanArray::from(vec![Some(false)]);
1454        assert_eq!(Some(false), min_boolean(&a));
1455        assert_eq!(Some(false), max_boolean(&a));
1456    }
1457
1458    #[test]
1459    fn test_boolean_min_max_smaller() {
1460        let a = BooleanArray::from(vec![Some(false)]);
1461        assert_eq!(Some(false), min_boolean(&a));
1462        assert_eq!(Some(false), max_boolean(&a));
1463
1464        let a = BooleanArray::from(vec![None, Some(false)]);
1465        assert_eq!(Some(false), min_boolean(&a));
1466        assert_eq!(Some(false), max_boolean(&a));
1467
1468        let a = BooleanArray::from(vec![None, Some(true)]);
1469        assert_eq!(Some(true), min_boolean(&a));
1470        assert_eq!(Some(true), max_boolean(&a));
1471
1472        let a = BooleanArray::from(vec![Some(true)]);
1473        assert_eq!(Some(true), min_boolean(&a));
1474        assert_eq!(Some(true), max_boolean(&a));
1475    }
1476
1477    #[test]
1478    fn test_boolean_min_max_64_true_64_false() {
1479        let mut no_nulls = BooleanBuilder::new();
1480        no_nulls.append_slice(&[true; 64]);
1481        no_nulls.append_slice(&[false; 64]);
1482        let no_nulls = no_nulls.finish();
1483
1484        assert_eq!(Some(false), min_boolean(&no_nulls));
1485        assert_eq!(Some(true), max_boolean(&no_nulls));
1486
1487        let mut with_nulls = BooleanBuilder::new();
1488        with_nulls.append_slice(&[true; 31]);
1489        with_nulls.append_null();
1490        with_nulls.append_slice(&[true; 32]);
1491        with_nulls.append_slice(&[false; 1]);
1492        with_nulls.append_nulls(63);
1493        let with_nulls = with_nulls.finish();
1494
1495        assert_eq!(Some(false), min_boolean(&with_nulls));
1496        assert_eq!(Some(true), max_boolean(&with_nulls));
1497    }
1498
1499    #[test]
1500    fn test_boolean_min_max_64_false_64_true() {
1501        let mut no_nulls = BooleanBuilder::new();
1502        no_nulls.append_slice(&[false; 64]);
1503        no_nulls.append_slice(&[true; 64]);
1504        let no_nulls = no_nulls.finish();
1505
1506        assert_eq!(Some(false), min_boolean(&no_nulls));
1507        assert_eq!(Some(true), max_boolean(&no_nulls));
1508
1509        let mut with_nulls = BooleanBuilder::new();
1510        with_nulls.append_slice(&[false; 31]);
1511        with_nulls.append_null();
1512        with_nulls.append_slice(&[false; 32]);
1513        with_nulls.append_slice(&[true; 1]);
1514        with_nulls.append_nulls(63);
1515        let with_nulls = with_nulls.finish();
1516
1517        assert_eq!(Some(false), min_boolean(&with_nulls));
1518        assert_eq!(Some(true), max_boolean(&with_nulls));
1519    }
1520
1521    #[test]
1522    fn test_boolean_min_max_96_true() {
1523        let mut no_nulls = BooleanBuilder::new();
1524        no_nulls.append_slice(&[true; 96]);
1525        let no_nulls = no_nulls.finish();
1526
1527        assert_eq!(Some(true), min_boolean(&no_nulls));
1528        assert_eq!(Some(true), max_boolean(&no_nulls));
1529
1530        let mut with_nulls = BooleanBuilder::new();
1531        with_nulls.append_slice(&[true; 31]);
1532        with_nulls.append_null();
1533        with_nulls.append_slice(&[true; 32]);
1534        with_nulls.append_slice(&[true; 31]);
1535        with_nulls.append_null();
1536        let with_nulls = with_nulls.finish();
1537
1538        assert_eq!(Some(true), min_boolean(&with_nulls));
1539        assert_eq!(Some(true), max_boolean(&with_nulls));
1540    }
1541
1542    #[test]
1543    fn test_boolean_min_max_96_false() {
1544        let mut no_nulls = BooleanBuilder::new();
1545        no_nulls.append_slice(&[false; 96]);
1546        let no_nulls = no_nulls.finish();
1547
1548        assert_eq!(Some(false), min_boolean(&no_nulls));
1549        assert_eq!(Some(false), max_boolean(&no_nulls));
1550
1551        let mut with_nulls = BooleanBuilder::new();
1552        with_nulls.append_slice(&[false; 31]);
1553        with_nulls.append_null();
1554        with_nulls.append_slice(&[false; 32]);
1555        with_nulls.append_slice(&[false; 31]);
1556        with_nulls.append_null();
1557        let with_nulls = with_nulls.finish();
1558
1559        assert_eq!(Some(false), min_boolean(&with_nulls));
1560        assert_eq!(Some(false), max_boolean(&with_nulls));
1561    }
1562
1563    #[test]
1564    fn test_sum_dyn() {
1565        let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1566        let values = Arc::new(values) as ArrayRef;
1567        let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
1568
1569        let dict_array = DictionaryArray::new(keys, values.clone());
1570        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1571        assert_eq!(39, sum_array::<Int8Type, _>(array).unwrap());
1572
1573        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
1574        assert_eq!(15, sum_array::<Int32Type, _>(&a).unwrap());
1575
1576        let keys = Int8Array::from(vec![Some(2_i8), None, Some(4)]);
1577        let dict_array = DictionaryArray::new(keys, values.clone());
1578        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1579        assert_eq!(26, sum_array::<Int8Type, _>(array).unwrap());
1580
1581        let keys = Int8Array::from(vec![None, None, None]);
1582        let dict_array = DictionaryArray::new(keys, values.clone());
1583        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1584        assert!(sum_array::<Int8Type, _>(array).is_none());
1585    }
1586
1587    #[test]
1588    fn test_max_min_dyn() {
1589        let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1590        let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
1591        let values = Arc::new(values) as ArrayRef;
1592
1593        let dict_array = DictionaryArray::new(keys, values.clone());
1594        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1595        assert_eq!(14, max_array::<Int8Type, _>(array).unwrap());
1596
1597        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1598        assert_eq!(12, min_array::<Int8Type, _>(array).unwrap());
1599
1600        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
1601        assert_eq!(5, max_array::<Int32Type, _>(&a).unwrap());
1602        assert_eq!(1, min_array::<Int32Type, _>(&a).unwrap());
1603
1604        let keys = Int8Array::from(vec![Some(2_i8), None, Some(7)]);
1605        let dict_array = DictionaryArray::new(keys, values.clone());
1606        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1607        assert_eq!(17, max_array::<Int8Type, _>(array).unwrap());
1608        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1609        assert_eq!(12, min_array::<Int8Type, _>(array).unwrap());
1610
1611        let keys = Int8Array::from(vec![None, None, None]);
1612        let dict_array = DictionaryArray::new(keys, values.clone());
1613        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1614        assert!(max_array::<Int8Type, _>(array).is_none());
1615        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1616        assert!(min_array::<Int8Type, _>(array).is_none());
1617    }
1618
1619    #[test]
1620    fn test_max_min_dyn_nan() {
1621        let values = Float32Array::from(vec![5.0_f32, 2.0_f32, f32::NAN]);
1622        let keys = Int8Array::from_iter_values([0_i8, 1, 2]);
1623
1624        let dict_array = DictionaryArray::new(keys, Arc::new(values));
1625        let array = dict_array.downcast_dict::<Float32Array>().unwrap();
1626        assert!(max_array::<Float32Type, _>(array).unwrap().is_nan());
1627
1628        let array = dict_array.downcast_dict::<Float32Array>().unwrap();
1629        assert_eq!(2.0_f32, min_array::<Float32Type, _>(array).unwrap());
1630    }
1631
1632    #[test]
1633    fn test_min_max_sliced_primitive() {
1634        let expected = Some(4.0);
1635        let input: Float64Array = vec![None, Some(4.0)].into_iter().collect();
1636        let actual = min(&input);
1637        assert_eq!(actual, expected);
1638        let actual = max(&input);
1639        assert_eq!(actual, expected);
1640
1641        let sliced_input: Float64Array = vec![None, None, None, None, None, Some(4.0)]
1642            .into_iter()
1643            .collect();
1644        let sliced_input = sliced_input.slice(4, 2);
1645
1646        assert_eq!(&sliced_input, &input);
1647
1648        let actual = min(&sliced_input);
1649        assert_eq!(actual, expected);
1650        let actual = max(&sliced_input);
1651        assert_eq!(actual, expected);
1652    }
1653
1654    #[test]
1655    fn test_min_max_sliced_boolean() {
1656        let expected = Some(true);
1657        let input: BooleanArray = vec![None, Some(true)].into_iter().collect();
1658        let actual = min_boolean(&input);
1659        assert_eq!(actual, expected);
1660        let actual = max_boolean(&input);
1661        assert_eq!(actual, expected);
1662
1663        let sliced_input: BooleanArray = vec![None, None, None, None, None, Some(true)]
1664            .into_iter()
1665            .collect();
1666        let sliced_input = sliced_input.slice(4, 2);
1667
1668        assert_eq!(sliced_input, input);
1669
1670        let actual = min_boolean(&sliced_input);
1671        assert_eq!(actual, expected);
1672        let actual = max_boolean(&sliced_input);
1673        assert_eq!(actual, expected);
1674    }
1675
1676    #[test]
1677    fn test_min_max_sliced_string() {
1678        let expected = Some("foo");
1679        let input: StringArray = vec![None, Some("foo")].into_iter().collect();
1680        let actual = min_string(&input);
1681        assert_eq!(actual, expected);
1682        let actual = max_string(&input);
1683        assert_eq!(actual, expected);
1684
1685        let sliced_input: StringArray = vec![None, None, None, None, None, Some("foo")]
1686            .into_iter()
1687            .collect();
1688        let sliced_input = sliced_input.slice(4, 2);
1689
1690        assert_eq!(&sliced_input, &input);
1691
1692        let actual = min_string(&sliced_input);
1693        assert_eq!(actual, expected);
1694        let actual = max_string(&sliced_input);
1695        assert_eq!(actual, expected);
1696    }
1697
1698    #[test]
1699    fn test_min_max_sliced_binary() {
1700        let expected: Option<&[u8]> = Some(&[5]);
1701        let input: BinaryArray = vec![None, Some(&[5])].into_iter().collect();
1702        let actual = min_binary(&input);
1703        assert_eq!(actual, expected);
1704        let actual = max_binary(&input);
1705        assert_eq!(actual, expected);
1706
1707        let sliced_input: BinaryArray = vec![None, None, None, None, None, Some(&[5])]
1708            .into_iter()
1709            .collect();
1710        let sliced_input = sliced_input.slice(4, 2);
1711
1712        assert_eq!(&sliced_input, &input);
1713
1714        let actual = min_binary(&sliced_input);
1715        assert_eq!(actual, expected);
1716        let actual = max_binary(&sliced_input);
1717        assert_eq!(actual, expected);
1718    }
1719
1720    #[test]
1721    fn test_sum_overflow() {
1722        let a = Int32Array::from(vec![i32::MAX, 1]);
1723
1724        assert_eq!(sum(&a).unwrap(), -2147483648);
1725        assert_eq!(sum_array::<Int32Type, _>(&a).unwrap(), -2147483648);
1726    }
1727
1728    #[test]
1729    fn test_sum_checked_overflow() {
1730        let a = Int32Array::from(vec![i32::MAX, 1]);
1731
1732        sum_checked(&a).expect_err("overflow should be detected");
1733        sum_array_checked::<Int32Type, _>(&a).expect_err("overflow should be detected");
1734    }
1735}
arrow_arith/aggregate.rs

arrow_arith/
aggregate.rs