arrow_array/array/
boolean_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::print_long_array;
19use crate::builder::BooleanBuilder;
20use crate::iterator::BooleanIter;
21use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
22use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::DataType;
25use std::any::Any;
26use std::sync::Arc;
27
28/// An array of [boolean values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
29///
30/// # Example: From a Vec
31///
32/// ```
33/// # use arrow_array::{Array, BooleanArray};
34/// let arr: BooleanArray = vec![true, true, false].into();
35/// ```
36///
37/// # Example: From an optional Vec
38///
39/// ```
40/// # use arrow_array::{Array, BooleanArray};
41/// let arr: BooleanArray = vec![Some(true), None, Some(false)].into();
42/// ```
43///
44/// # Example: From an iterator
45///
46/// ```
47/// # use arrow_array::{Array, BooleanArray};
48/// let arr: BooleanArray = (0..5).map(|x| (x % 2 == 0).then(|| x % 3 == 0)).collect();
49/// let values: Vec<_> = arr.iter().collect();
50/// assert_eq!(&values, &[Some(true), None, Some(false), None, Some(false)])
51/// ```
52///
53/// # Example: Using Builder
54///
55/// ```
56/// # use arrow_array::Array;
57/// # use arrow_array::builder::BooleanBuilder;
58/// let mut builder = BooleanBuilder::new();
59/// builder.append_value(true);
60/// builder.append_null();
61/// builder.append_value(false);
62/// let array = builder.finish();
63/// let values: Vec<_> = array.iter().collect();
64/// assert_eq!(&values, &[Some(true), None, Some(false)])
65/// ```
66///
67#[derive(Clone)]
68pub struct BooleanArray {
69    values: BooleanBuffer,
70    nulls: Option<NullBuffer>,
71}
72
73impl std::fmt::Debug for BooleanArray {
74    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
75        write!(f, "BooleanArray\n[\n")?;
76        print_long_array(self, f, |array, index, f| {
77            std::fmt::Debug::fmt(&array.value(index), f)
78        })?;
79        write!(f, "]")
80    }
81}
82
83impl BooleanArray {
84    /// Create a new [`BooleanArray`] from the provided values and nulls
85    ///
86    /// # Panics
87    ///
88    /// Panics if `values.len() != nulls.len()`
89    pub fn new(values: BooleanBuffer, nulls: Option<NullBuffer>) -> Self {
90        if let Some(n) = nulls.as_ref() {
91            assert_eq!(values.len(), n.len());
92        }
93        Self { values, nulls }
94    }
95
96    /// Create a new [`BooleanArray`] with length `len` consisting only of nulls
97    pub fn new_null(len: usize) -> Self {
98        Self {
99            values: BooleanBuffer::new_unset(len),
100            nulls: Some(NullBuffer::new_null(len)),
101        }
102    }
103
104    /// Create a new [`Scalar`] from `value`
105    pub fn new_scalar(value: bool) -> Scalar<Self> {
106        let values = match value {
107            true => BooleanBuffer::new_set(1),
108            false => BooleanBuffer::new_unset(1),
109        };
110        Scalar::new(Self::new(values, None))
111    }
112
113    /// Create a new [`BooleanArray`] from a [`Buffer`] specified by `offset` and `len`, the `offset` and `len` in bits
114    /// Logically convert each bit in [`Buffer`] to boolean and use it to build [`BooleanArray`].
115    /// using this method will make the following points self-evident:
116    /// * there is no `null` in the constructed [`BooleanArray`];
117    /// * without considering `buffer.into()`, this method is efficient because there is no need to perform pack and unpack operations on boolean;
118    pub fn new_from_packed(buffer: impl Into<Buffer>, offset: usize, len: usize) -> Self {
119        BooleanBuffer::new(buffer.into(), offset, len).into()
120    }
121
122    /// Create a new [`BooleanArray`] from `&[u8]`
123    /// This method uses `new_from_packed` and constructs a [`Buffer`] using `value`, and offset is set to 0 and len is set to `value.len() * 8`
124    /// using this method will make the following points self-evident:
125    /// * there is no `null` in the constructed [`BooleanArray`];
126    /// * the length of the constructed [`BooleanArray`] is always a multiple of 8;
127    pub fn new_from_u8(value: &[u8]) -> Self {
128        BooleanBuffer::new(Buffer::from(value), 0, value.len() * 8).into()
129    }
130
131    /// Returns the length of this array.
132    pub fn len(&self) -> usize {
133        self.values.len()
134    }
135
136    /// Returns whether this array is empty.
137    pub fn is_empty(&self) -> bool {
138        self.values.is_empty()
139    }
140
141    /// Returns a zero-copy slice of this array with the indicated offset and length.
142    pub fn slice(&self, offset: usize, length: usize) -> Self {
143        Self {
144            values: self.values.slice(offset, length),
145            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
146        }
147    }
148
149    /// Returns a new boolean array builder
150    pub fn builder(capacity: usize) -> BooleanBuilder {
151        BooleanBuilder::with_capacity(capacity)
152    }
153
154    /// Returns the underlying [`BooleanBuffer`] holding all the values of this array
155    pub fn values(&self) -> &BooleanBuffer {
156        &self.values
157    }
158
159    /// Returns the number of non null, true values within this array
160    pub fn true_count(&self) -> usize {
161        match self.nulls() {
162            Some(nulls) => {
163                let null_chunks = nulls.inner().bit_chunks().iter_padded();
164                let value_chunks = self.values().bit_chunks().iter_padded();
165                null_chunks
166                    .zip(value_chunks)
167                    .map(|(a, b)| (a & b).count_ones() as usize)
168                    .sum()
169            }
170            None => self.values().count_set_bits(),
171        }
172    }
173
174    /// Returns the number of non null, false values within this array
175    pub fn false_count(&self) -> usize {
176        self.len() - self.null_count() - self.true_count()
177    }
178
179    /// Returns the boolean value at index `i`.
180    ///
181    /// # Safety
182    /// This doesn't check bounds, the caller must ensure that index < self.len()
183    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
184        self.values.value_unchecked(i)
185    }
186
187    /// Returns the boolean value at index `i`.
188    /// # Panics
189    /// Panics if index `i` is out of bounds
190    pub fn value(&self, i: usize) -> bool {
191        assert!(
192            i < self.len(),
193            "Trying to access an element at index {} from a BooleanArray of length {}",
194            i,
195            self.len()
196        );
197        // Safety:
198        // `i < self.len()
199        unsafe { self.value_unchecked(i) }
200    }
201
202    /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i`
203    pub fn take_iter<'a>(
204        &'a self,
205        indexes: impl Iterator<Item = Option<usize>> + 'a,
206    ) -> impl Iterator<Item = Option<bool>> + 'a {
207        indexes.map(|opt_index| opt_index.map(|index| self.value(index)))
208    }
209
210    /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i`
211    /// # Safety
212    ///
213    /// caller must ensure that the offsets in the iterator are less than the array len()
214    pub unsafe fn take_iter_unchecked<'a>(
215        &'a self,
216        indexes: impl Iterator<Item = Option<usize>> + 'a,
217    ) -> impl Iterator<Item = Option<bool>> + 'a {
218        indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
219    }
220
221    /// Create a [`BooleanArray`] by evaluating the operation for
222    /// each element of the provided array
223    ///
224    /// ```
225    /// # use arrow_array::{BooleanArray, Int32Array};
226    ///
227    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
228    /// let r = BooleanArray::from_unary(&array, |x| x > 2);
229    /// assert_eq!(&r, &BooleanArray::from(vec![false, false, true, true, true]));
230    /// ```
231    pub fn from_unary<T: ArrayAccessor, F>(left: T, mut op: F) -> Self
232    where
233        F: FnMut(T::Item) -> bool,
234    {
235        let nulls = left.logical_nulls();
236        let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
237            // SAFETY: i in range 0..len
238            op(left.value_unchecked(i))
239        });
240        Self::new(values, nulls)
241    }
242
243    /// Create a [`BooleanArray`] by evaluating the binary operation for
244    /// each element of the provided arrays
245    ///
246    /// ```
247    /// # use arrow_array::{BooleanArray, Int32Array};
248    ///
249    /// let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
250    /// let b = Int32Array::from(vec![1, 2, 0, 2, 5]);
251    /// let r = BooleanArray::from_binary(&a, &b, |a, b| a == b);
252    /// assert_eq!(&r, &BooleanArray::from(vec![true, true, false, false, true]));
253    /// ```
254    ///
255    /// # Panics
256    ///
257    /// This function panics if left and right are not the same length
258    ///
259    pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(left: T, right: S, mut op: F) -> Self
260    where
261        F: FnMut(T::Item, S::Item) -> bool,
262    {
263        assert_eq!(left.len(), right.len());
264
265        let nulls = NullBuffer::union(
266            left.logical_nulls().as_ref(),
267            right.logical_nulls().as_ref(),
268        );
269        let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
270            // SAFETY: i in range 0..len
271            op(left.value_unchecked(i), right.value_unchecked(i))
272        });
273        Self::new(values, nulls)
274    }
275
276    /// Deconstruct this array into its constituent parts
277    pub fn into_parts(self) -> (BooleanBuffer, Option<NullBuffer>) {
278        (self.values, self.nulls)
279    }
280}
281
282impl Array for BooleanArray {
283    fn as_any(&self) -> &dyn Any {
284        self
285    }
286
287    fn to_data(&self) -> ArrayData {
288        self.clone().into()
289    }
290
291    fn into_data(self) -> ArrayData {
292        self.into()
293    }
294
295    fn data_type(&self) -> &DataType {
296        &DataType::Boolean
297    }
298
299    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
300        Arc::new(self.slice(offset, length))
301    }
302
303    fn len(&self) -> usize {
304        self.values.len()
305    }
306
307    fn is_empty(&self) -> bool {
308        self.values.is_empty()
309    }
310
311    fn offset(&self) -> usize {
312        self.values.offset()
313    }
314
315    fn nulls(&self) -> Option<&NullBuffer> {
316        self.nulls.as_ref()
317    }
318
319    fn logical_null_count(&self) -> usize {
320        self.null_count()
321    }
322
323    fn get_buffer_memory_size(&self) -> usize {
324        let mut sum = self.values.inner().capacity();
325        if let Some(x) = &self.nulls {
326            sum += x.buffer().capacity()
327        }
328        sum
329    }
330
331    fn get_array_memory_size(&self) -> usize {
332        std::mem::size_of::<Self>() + self.get_buffer_memory_size()
333    }
334}
335
336impl ArrayAccessor for &BooleanArray {
337    type Item = bool;
338
339    fn value(&self, index: usize) -> Self::Item {
340        BooleanArray::value(self, index)
341    }
342
343    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
344        BooleanArray::value_unchecked(self, index)
345    }
346}
347
348impl From<Vec<bool>> for BooleanArray {
349    fn from(data: Vec<bool>) -> Self {
350        let mut mut_buf = MutableBuffer::new_null(data.len());
351        {
352            let mut_slice = mut_buf.as_slice_mut();
353            for (i, b) in data.iter().enumerate() {
354                if *b {
355                    bit_util::set_bit(mut_slice, i);
356                }
357            }
358        }
359        let array_data = ArrayData::builder(DataType::Boolean)
360            .len(data.len())
361            .add_buffer(mut_buf.into());
362
363        let array_data = unsafe { array_data.build_unchecked() };
364        BooleanArray::from(array_data)
365    }
366}
367
368impl From<Vec<Option<bool>>> for BooleanArray {
369    fn from(data: Vec<Option<bool>>) -> Self {
370        data.iter().collect()
371    }
372}
373
374impl From<ArrayData> for BooleanArray {
375    fn from(data: ArrayData) -> Self {
376        assert_eq!(
377            data.data_type(),
378            &DataType::Boolean,
379            "BooleanArray expected ArrayData with type {} got {}",
380            DataType::Boolean,
381            data.data_type()
382        );
383        assert_eq!(
384            data.buffers().len(),
385            1,
386            "BooleanArray data should contain a single buffer only (values buffer)"
387        );
388        let values = BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
389
390        Self {
391            values,
392            nulls: data.nulls().cloned(),
393        }
394    }
395}
396
397impl From<BooleanArray> for ArrayData {
398    fn from(array: BooleanArray) -> Self {
399        let builder = ArrayDataBuilder::new(DataType::Boolean)
400            .len(array.values.len())
401            .offset(array.values.offset())
402            .nulls(array.nulls)
403            .buffers(vec![array.values.into_inner()]);
404
405        unsafe { builder.build_unchecked() }
406    }
407}
408
409impl<'a> IntoIterator for &'a BooleanArray {
410    type Item = Option<bool>;
411    type IntoIter = BooleanIter<'a>;
412
413    fn into_iter(self) -> Self::IntoIter {
414        BooleanIter::<'a>::new(self)
415    }
416}
417
418impl<'a> BooleanArray {
419    /// constructs a new iterator
420    pub fn iter(&'a self) -> BooleanIter<'a> {
421        BooleanIter::<'a>::new(self)
422    }
423}
424
425impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
426    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
427        let iter = iter.into_iter();
428        let (_, data_len) = iter.size_hint();
429        let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
430
431        let num_bytes = bit_util::ceil(data_len, 8);
432        let mut null_builder = MutableBuffer::from_len_zeroed(num_bytes);
433        let mut val_builder = MutableBuffer::from_len_zeroed(num_bytes);
434
435        let data = val_builder.as_slice_mut();
436
437        let null_slice = null_builder.as_slice_mut();
438        iter.enumerate().for_each(|(i, item)| {
439            if let Some(a) = item.borrow() {
440                bit_util::set_bit(null_slice, i);
441                if *a {
442                    bit_util::set_bit(data, i);
443                }
444            }
445        });
446
447        let data = unsafe {
448            ArrayData::new_unchecked(
449                DataType::Boolean,
450                data_len,
451                None,
452                Some(null_builder.into()),
453                0,
454                vec![val_builder.into()],
455                vec![],
456            )
457        };
458        BooleanArray::from(data)
459    }
460}
461
462impl From<BooleanBuffer> for BooleanArray {
463    fn from(values: BooleanBuffer) -> Self {
464        Self {
465            values,
466            nulls: None,
467        }
468    }
469}
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474    use arrow_buffer::Buffer;
475    use rand::{thread_rng, Rng};
476
477    #[test]
478    fn test_boolean_fmt_debug() {
479        let arr = BooleanArray::from(vec![true, false, false]);
480        assert_eq!(
481            "BooleanArray\n[\n  true,\n  false,\n  false,\n]",
482            format!("{arr:?}")
483        );
484    }
485
486    #[test]
487    fn test_boolean_with_null_fmt_debug() {
488        let mut builder = BooleanArray::builder(3);
489        builder.append_value(true);
490        builder.append_null();
491        builder.append_value(false);
492        let arr = builder.finish();
493        assert_eq!(
494            "BooleanArray\n[\n  true,\n  null,\n  false,\n]",
495            format!("{arr:?}")
496        );
497    }
498
499    #[test]
500    fn test_boolean_array_from_vec() {
501        let buf = Buffer::from([10_u8]);
502        let arr = BooleanArray::from(vec![false, true, false, true]);
503        assert_eq!(&buf, arr.values().inner());
504        assert_eq!(4, arr.len());
505        assert_eq!(0, arr.offset());
506        assert_eq!(0, arr.null_count());
507        for i in 0..4 {
508            assert!(!arr.is_null(i));
509            assert!(arr.is_valid(i));
510            assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
511        }
512    }
513
514    #[test]
515    fn test_boolean_array_from_vec_option() {
516        let buf = Buffer::from([10_u8]);
517        let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
518        assert_eq!(&buf, arr.values().inner());
519        assert_eq!(4, arr.len());
520        assert_eq!(0, arr.offset());
521        assert_eq!(1, arr.null_count());
522        for i in 0..4 {
523            if i == 2 {
524                assert!(arr.is_null(i));
525                assert!(!arr.is_valid(i));
526            } else {
527                assert!(!arr.is_null(i));
528                assert!(arr.is_valid(i));
529                assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
530            }
531        }
532    }
533
534    #[test]
535    fn test_boolean_array_from_packed() {
536        let v = [1_u8, 2_u8, 3_u8];
537        let arr = BooleanArray::new_from_packed(v, 0, 24);
538        assert_eq!(24, arr.len());
539        assert_eq!(0, arr.offset());
540        assert_eq!(0, arr.null_count());
541        assert!(arr.nulls.is_none());
542        for i in 0..24 {
543            assert!(!arr.is_null(i));
544            assert!(arr.is_valid(i));
545            assert_eq!(
546                i == 0 || i == 9 || i == 16 || i == 17,
547                arr.value(i),
548                "failed t {i}"
549            )
550        }
551    }
552
553    #[test]
554    fn test_boolean_array_from_slice_u8() {
555        let v: Vec<u8> = vec![1, 2, 3];
556        let slice = &v[..];
557        let arr = BooleanArray::new_from_u8(slice);
558        assert_eq!(24, arr.len());
559        assert_eq!(0, arr.offset());
560        assert_eq!(0, arr.null_count());
561        assert!(arr.nulls().is_none());
562        for i in 0..24 {
563            assert!(!arr.is_null(i));
564            assert!(arr.is_valid(i));
565            assert_eq!(
566                i == 0 || i == 9 || i == 16 || i == 17,
567                arr.value(i),
568                "failed t {i}"
569            )
570        }
571    }
572
573    #[test]
574    fn test_boolean_array_from_iter() {
575        let v = vec![Some(false), Some(true), Some(false), Some(true)];
576        let arr = v.into_iter().collect::<BooleanArray>();
577        assert_eq!(4, arr.len());
578        assert_eq!(0, arr.offset());
579        assert_eq!(0, arr.null_count());
580        assert!(arr.nulls().is_none());
581        for i in 0..3 {
582            assert!(!arr.is_null(i));
583            assert!(arr.is_valid(i));
584            assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
585        }
586    }
587
588    #[test]
589    fn test_boolean_array_from_nullable_iter() {
590        let v = vec![Some(true), None, Some(false), None];
591        let arr = v.into_iter().collect::<BooleanArray>();
592        assert_eq!(4, arr.len());
593        assert_eq!(0, arr.offset());
594        assert_eq!(2, arr.null_count());
595        assert!(arr.nulls().is_some());
596
597        assert!(arr.is_valid(0));
598        assert!(arr.is_null(1));
599        assert!(arr.is_valid(2));
600        assert!(arr.is_null(3));
601
602        assert!(arr.value(0));
603        assert!(!arr.value(2));
604    }
605
606    #[test]
607    fn test_boolean_array_builder() {
608        // Test building a boolean array with ArrayData builder and offset
609        // 000011011
610        let buf = Buffer::from([27_u8]);
611        let buf2 = buf.clone();
612        let data = ArrayData::builder(DataType::Boolean)
613            .len(5)
614            .offset(2)
615            .add_buffer(buf)
616            .build()
617            .unwrap();
618        let arr = BooleanArray::from(data);
619        assert_eq!(&buf2, arr.values().inner());
620        assert_eq!(5, arr.len());
621        assert_eq!(2, arr.offset());
622        assert_eq!(0, arr.null_count());
623        for i in 0..3 {
624            assert_eq!(i != 0, arr.value(i), "failed at {i}");
625        }
626    }
627
628    #[test]
629    #[should_panic(
630        expected = "Trying to access an element at index 4 from a BooleanArray of length 3"
631    )]
632    fn test_fixed_size_binary_array_get_value_index_out_of_bound() {
633        let v = vec![Some(true), None, Some(false)];
634        let array = v.into_iter().collect::<BooleanArray>();
635
636        array.value(4);
637    }
638
639    #[test]
640    #[should_panic(expected = "BooleanArray data should contain a single buffer only \
641                               (values buffer)")]
642    // Different error messages, so skip for now
643    // https://github.com/apache/arrow-rs/issues/1545
644    #[cfg(not(feature = "force_validate"))]
645    fn test_boolean_array_invalid_buffer_len() {
646        let data = unsafe {
647            ArrayData::builder(DataType::Boolean)
648                .len(5)
649                .build_unchecked()
650        };
651        drop(BooleanArray::from(data));
652    }
653
654    #[test]
655    #[should_panic(expected = "BooleanArray expected ArrayData with type Boolean got Int32")]
656    fn test_from_array_data_validation() {
657        let _ = BooleanArray::from(ArrayData::new_empty(&DataType::Int32));
658    }
659
660    #[test]
661    #[cfg_attr(miri, ignore)] // Takes too long
662    fn test_true_false_count() {
663        let mut rng = thread_rng();
664
665        for _ in 0..10 {
666            // No nulls
667            let d: Vec<_> = (0..2000).map(|_| rng.gen_bool(0.5)).collect();
668            let b = BooleanArray::from(d.clone());
669
670            let expected_true = d.iter().filter(|x| **x).count();
671            assert_eq!(b.true_count(), expected_true);
672            assert_eq!(b.false_count(), d.len() - expected_true);
673
674            // With nulls
675            let d: Vec<_> = (0..2000)
676                .map(|_| rng.gen_bool(0.5).then(|| rng.gen_bool(0.5)))
677                .collect();
678            let b = BooleanArray::from(d.clone());
679
680            let expected_true = d.iter().filter(|x| matches!(x, Some(true))).count();
681            assert_eq!(b.true_count(), expected_true);
682
683            let expected_false = d.iter().filter(|x| matches!(x, Some(false))).count();
684            assert_eq!(b.false_count(), expected_false);
685        }
686    }
687
688    #[test]
689    fn test_into_parts() {
690        let boolean_array = [Some(true), None, Some(false)]
691            .into_iter()
692            .collect::<BooleanArray>();
693        let (values, nulls) = boolean_array.into_parts();
694        assert_eq!(values.values(), &[0b0000_0001]);
695        assert!(nulls.is_some());
696        assert_eq!(nulls.unwrap().buffer().as_slice(), &[0b0000_0101]);
697
698        let boolean_array =
699            BooleanArray::from(vec![false, false, false, false, false, false, false, true]);
700        let (values, nulls) = boolean_array.into_parts();
701        assert_eq!(values.values(), &[0b1000_0000]);
702        assert!(nulls.is_none());
703    }
704}