1use crate::array::print_long_array;
19use crate::builder::BooleanBuilder;
20use crate::iterator::BooleanIter;
21use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
22use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::DataType;
25use std::any::Any;
26use std::sync::Arc;
27
28#[derive(Clone)]
68pub struct BooleanArray {
69 values: BooleanBuffer,
70 nulls: Option<NullBuffer>,
71}
72
73impl std::fmt::Debug for BooleanArray {
74 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
75 write!(f, "BooleanArray\n[\n")?;
76 print_long_array(self, f, |array, index, f| {
77 std::fmt::Debug::fmt(&array.value(index), f)
78 })?;
79 write!(f, "]")
80 }
81}
82
83impl BooleanArray {
84 pub fn new(values: BooleanBuffer, nulls: Option<NullBuffer>) -> Self {
90 if let Some(n) = nulls.as_ref() {
91 assert_eq!(values.len(), n.len());
92 }
93 Self { values, nulls }
94 }
95
96 pub fn new_null(len: usize) -> Self {
98 Self {
99 values: BooleanBuffer::new_unset(len),
100 nulls: Some(NullBuffer::new_null(len)),
101 }
102 }
103
104 pub fn new_scalar(value: bool) -> Scalar<Self> {
106 let values = match value {
107 true => BooleanBuffer::new_set(1),
108 false => BooleanBuffer::new_unset(1),
109 };
110 Scalar::new(Self::new(values, None))
111 }
112
113 pub fn new_from_packed(buffer: impl Into<Buffer>, offset: usize, len: usize) -> Self {
119 BooleanBuffer::new(buffer.into(), offset, len).into()
120 }
121
122 pub fn new_from_u8(value: &[u8]) -> Self {
128 BooleanBuffer::new(Buffer::from(value), 0, value.len() * 8).into()
129 }
130
131 pub fn len(&self) -> usize {
133 self.values.len()
134 }
135
136 pub fn is_empty(&self) -> bool {
138 self.values.is_empty()
139 }
140
141 pub fn slice(&self, offset: usize, length: usize) -> Self {
143 Self {
144 values: self.values.slice(offset, length),
145 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
146 }
147 }
148
149 pub fn builder(capacity: usize) -> BooleanBuilder {
151 BooleanBuilder::with_capacity(capacity)
152 }
153
154 pub fn values(&self) -> &BooleanBuffer {
156 &self.values
157 }
158
159 pub fn true_count(&self) -> usize {
161 match self.nulls() {
162 Some(nulls) => {
163 let null_chunks = nulls.inner().bit_chunks().iter_padded();
164 let value_chunks = self.values().bit_chunks().iter_padded();
165 null_chunks
166 .zip(value_chunks)
167 .map(|(a, b)| (a & b).count_ones() as usize)
168 .sum()
169 }
170 None => self.values().count_set_bits(),
171 }
172 }
173
174 pub fn false_count(&self) -> usize {
176 self.len() - self.null_count() - self.true_count()
177 }
178
179 pub unsafe fn value_unchecked(&self, i: usize) -> bool {
184 self.values.value_unchecked(i)
185 }
186
187 pub fn value(&self, i: usize) -> bool {
191 assert!(
192 i < self.len(),
193 "Trying to access an element at index {} from a BooleanArray of length {}",
194 i,
195 self.len()
196 );
197 unsafe { self.value_unchecked(i) }
200 }
201
202 pub fn take_iter<'a>(
204 &'a self,
205 indexes: impl Iterator<Item = Option<usize>> + 'a,
206 ) -> impl Iterator<Item = Option<bool>> + 'a {
207 indexes.map(|opt_index| opt_index.map(|index| self.value(index)))
208 }
209
210 pub unsafe fn take_iter_unchecked<'a>(
215 &'a self,
216 indexes: impl Iterator<Item = Option<usize>> + 'a,
217 ) -> impl Iterator<Item = Option<bool>> + 'a {
218 indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
219 }
220
221 pub fn from_unary<T: ArrayAccessor, F>(left: T, mut op: F) -> Self
232 where
233 F: FnMut(T::Item) -> bool,
234 {
235 let nulls = left.logical_nulls();
236 let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
237 op(left.value_unchecked(i))
239 });
240 Self::new(values, nulls)
241 }
242
243 pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(left: T, right: S, mut op: F) -> Self
260 where
261 F: FnMut(T::Item, S::Item) -> bool,
262 {
263 assert_eq!(left.len(), right.len());
264
265 let nulls = NullBuffer::union(
266 left.logical_nulls().as_ref(),
267 right.logical_nulls().as_ref(),
268 );
269 let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
270 op(left.value_unchecked(i), right.value_unchecked(i))
272 });
273 Self::new(values, nulls)
274 }
275
276 pub fn into_parts(self) -> (BooleanBuffer, Option<NullBuffer>) {
278 (self.values, self.nulls)
279 }
280}
281
282impl Array for BooleanArray {
283 fn as_any(&self) -> &dyn Any {
284 self
285 }
286
287 fn to_data(&self) -> ArrayData {
288 self.clone().into()
289 }
290
291 fn into_data(self) -> ArrayData {
292 self.into()
293 }
294
295 fn data_type(&self) -> &DataType {
296 &DataType::Boolean
297 }
298
299 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
300 Arc::new(self.slice(offset, length))
301 }
302
303 fn len(&self) -> usize {
304 self.values.len()
305 }
306
307 fn is_empty(&self) -> bool {
308 self.values.is_empty()
309 }
310
311 fn offset(&self) -> usize {
312 self.values.offset()
313 }
314
315 fn nulls(&self) -> Option<&NullBuffer> {
316 self.nulls.as_ref()
317 }
318
319 fn logical_null_count(&self) -> usize {
320 self.null_count()
321 }
322
323 fn get_buffer_memory_size(&self) -> usize {
324 let mut sum = self.values.inner().capacity();
325 if let Some(x) = &self.nulls {
326 sum += x.buffer().capacity()
327 }
328 sum
329 }
330
331 fn get_array_memory_size(&self) -> usize {
332 std::mem::size_of::<Self>() + self.get_buffer_memory_size()
333 }
334}
335
336impl ArrayAccessor for &BooleanArray {
337 type Item = bool;
338
339 fn value(&self, index: usize) -> Self::Item {
340 BooleanArray::value(self, index)
341 }
342
343 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
344 BooleanArray::value_unchecked(self, index)
345 }
346}
347
348impl From<Vec<bool>> for BooleanArray {
349 fn from(data: Vec<bool>) -> Self {
350 let mut mut_buf = MutableBuffer::new_null(data.len());
351 {
352 let mut_slice = mut_buf.as_slice_mut();
353 for (i, b) in data.iter().enumerate() {
354 if *b {
355 bit_util::set_bit(mut_slice, i);
356 }
357 }
358 }
359 let array_data = ArrayData::builder(DataType::Boolean)
360 .len(data.len())
361 .add_buffer(mut_buf.into());
362
363 let array_data = unsafe { array_data.build_unchecked() };
364 BooleanArray::from(array_data)
365 }
366}
367
368impl From<Vec<Option<bool>>> for BooleanArray {
369 fn from(data: Vec<Option<bool>>) -> Self {
370 data.iter().collect()
371 }
372}
373
374impl From<ArrayData> for BooleanArray {
375 fn from(data: ArrayData) -> Self {
376 assert_eq!(
377 data.data_type(),
378 &DataType::Boolean,
379 "BooleanArray expected ArrayData with type {} got {}",
380 DataType::Boolean,
381 data.data_type()
382 );
383 assert_eq!(
384 data.buffers().len(),
385 1,
386 "BooleanArray data should contain a single buffer only (values buffer)"
387 );
388 let values = BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
389
390 Self {
391 values,
392 nulls: data.nulls().cloned(),
393 }
394 }
395}
396
397impl From<BooleanArray> for ArrayData {
398 fn from(array: BooleanArray) -> Self {
399 let builder = ArrayDataBuilder::new(DataType::Boolean)
400 .len(array.values.len())
401 .offset(array.values.offset())
402 .nulls(array.nulls)
403 .buffers(vec![array.values.into_inner()]);
404
405 unsafe { builder.build_unchecked() }
406 }
407}
408
409impl<'a> IntoIterator for &'a BooleanArray {
410 type Item = Option<bool>;
411 type IntoIter = BooleanIter<'a>;
412
413 fn into_iter(self) -> Self::IntoIter {
414 BooleanIter::<'a>::new(self)
415 }
416}
417
418impl<'a> BooleanArray {
419 pub fn iter(&'a self) -> BooleanIter<'a> {
421 BooleanIter::<'a>::new(self)
422 }
423}
424
425impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
426 fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
427 let iter = iter.into_iter();
428 let (_, data_len) = iter.size_hint();
429 let data_len = data_len.expect("Iterator must be sized"); let num_bytes = bit_util::ceil(data_len, 8);
432 let mut null_builder = MutableBuffer::from_len_zeroed(num_bytes);
433 let mut val_builder = MutableBuffer::from_len_zeroed(num_bytes);
434
435 let data = val_builder.as_slice_mut();
436
437 let null_slice = null_builder.as_slice_mut();
438 iter.enumerate().for_each(|(i, item)| {
439 if let Some(a) = item.borrow() {
440 bit_util::set_bit(null_slice, i);
441 if *a {
442 bit_util::set_bit(data, i);
443 }
444 }
445 });
446
447 let data = unsafe {
448 ArrayData::new_unchecked(
449 DataType::Boolean,
450 data_len,
451 None,
452 Some(null_builder.into()),
453 0,
454 vec![val_builder.into()],
455 vec![],
456 )
457 };
458 BooleanArray::from(data)
459 }
460}
461
462impl From<BooleanBuffer> for BooleanArray {
463 fn from(values: BooleanBuffer) -> Self {
464 Self {
465 values,
466 nulls: None,
467 }
468 }
469}
470
471#[cfg(test)]
472mod tests {
473 use super::*;
474 use arrow_buffer::Buffer;
475 use rand::{thread_rng, Rng};
476
477 #[test]
478 fn test_boolean_fmt_debug() {
479 let arr = BooleanArray::from(vec![true, false, false]);
480 assert_eq!(
481 "BooleanArray\n[\n true,\n false,\n false,\n]",
482 format!("{arr:?}")
483 );
484 }
485
486 #[test]
487 fn test_boolean_with_null_fmt_debug() {
488 let mut builder = BooleanArray::builder(3);
489 builder.append_value(true);
490 builder.append_null();
491 builder.append_value(false);
492 let arr = builder.finish();
493 assert_eq!(
494 "BooleanArray\n[\n true,\n null,\n false,\n]",
495 format!("{arr:?}")
496 );
497 }
498
499 #[test]
500 fn test_boolean_array_from_vec() {
501 let buf = Buffer::from([10_u8]);
502 let arr = BooleanArray::from(vec![false, true, false, true]);
503 assert_eq!(&buf, arr.values().inner());
504 assert_eq!(4, arr.len());
505 assert_eq!(0, arr.offset());
506 assert_eq!(0, arr.null_count());
507 for i in 0..4 {
508 assert!(!arr.is_null(i));
509 assert!(arr.is_valid(i));
510 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
511 }
512 }
513
514 #[test]
515 fn test_boolean_array_from_vec_option() {
516 let buf = Buffer::from([10_u8]);
517 let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
518 assert_eq!(&buf, arr.values().inner());
519 assert_eq!(4, arr.len());
520 assert_eq!(0, arr.offset());
521 assert_eq!(1, arr.null_count());
522 for i in 0..4 {
523 if i == 2 {
524 assert!(arr.is_null(i));
525 assert!(!arr.is_valid(i));
526 } else {
527 assert!(!arr.is_null(i));
528 assert!(arr.is_valid(i));
529 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
530 }
531 }
532 }
533
534 #[test]
535 fn test_boolean_array_from_packed() {
536 let v = [1_u8, 2_u8, 3_u8];
537 let arr = BooleanArray::new_from_packed(v, 0, 24);
538 assert_eq!(24, arr.len());
539 assert_eq!(0, arr.offset());
540 assert_eq!(0, arr.null_count());
541 assert!(arr.nulls.is_none());
542 for i in 0..24 {
543 assert!(!arr.is_null(i));
544 assert!(arr.is_valid(i));
545 assert_eq!(
546 i == 0 || i == 9 || i == 16 || i == 17,
547 arr.value(i),
548 "failed t {i}"
549 )
550 }
551 }
552
553 #[test]
554 fn test_boolean_array_from_slice_u8() {
555 let v: Vec<u8> = vec![1, 2, 3];
556 let slice = &v[..];
557 let arr = BooleanArray::new_from_u8(slice);
558 assert_eq!(24, arr.len());
559 assert_eq!(0, arr.offset());
560 assert_eq!(0, arr.null_count());
561 assert!(arr.nulls().is_none());
562 for i in 0..24 {
563 assert!(!arr.is_null(i));
564 assert!(arr.is_valid(i));
565 assert_eq!(
566 i == 0 || i == 9 || i == 16 || i == 17,
567 arr.value(i),
568 "failed t {i}"
569 )
570 }
571 }
572
573 #[test]
574 fn test_boolean_array_from_iter() {
575 let v = vec![Some(false), Some(true), Some(false), Some(true)];
576 let arr = v.into_iter().collect::<BooleanArray>();
577 assert_eq!(4, arr.len());
578 assert_eq!(0, arr.offset());
579 assert_eq!(0, arr.null_count());
580 assert!(arr.nulls().is_none());
581 for i in 0..3 {
582 assert!(!arr.is_null(i));
583 assert!(arr.is_valid(i));
584 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
585 }
586 }
587
588 #[test]
589 fn test_boolean_array_from_nullable_iter() {
590 let v = vec![Some(true), None, Some(false), None];
591 let arr = v.into_iter().collect::<BooleanArray>();
592 assert_eq!(4, arr.len());
593 assert_eq!(0, arr.offset());
594 assert_eq!(2, arr.null_count());
595 assert!(arr.nulls().is_some());
596
597 assert!(arr.is_valid(0));
598 assert!(arr.is_null(1));
599 assert!(arr.is_valid(2));
600 assert!(arr.is_null(3));
601
602 assert!(arr.value(0));
603 assert!(!arr.value(2));
604 }
605
606 #[test]
607 fn test_boolean_array_builder() {
608 let buf = Buffer::from([27_u8]);
611 let buf2 = buf.clone();
612 let data = ArrayData::builder(DataType::Boolean)
613 .len(5)
614 .offset(2)
615 .add_buffer(buf)
616 .build()
617 .unwrap();
618 let arr = BooleanArray::from(data);
619 assert_eq!(&buf2, arr.values().inner());
620 assert_eq!(5, arr.len());
621 assert_eq!(2, arr.offset());
622 assert_eq!(0, arr.null_count());
623 for i in 0..3 {
624 assert_eq!(i != 0, arr.value(i), "failed at {i}");
625 }
626 }
627
628 #[test]
629 #[should_panic(
630 expected = "Trying to access an element at index 4 from a BooleanArray of length 3"
631 )]
632 fn test_fixed_size_binary_array_get_value_index_out_of_bound() {
633 let v = vec![Some(true), None, Some(false)];
634 let array = v.into_iter().collect::<BooleanArray>();
635
636 array.value(4);
637 }
638
639 #[test]
640 #[should_panic(expected = "BooleanArray data should contain a single buffer only \
641 (values buffer)")]
642 #[cfg(not(feature = "force_validate"))]
645 fn test_boolean_array_invalid_buffer_len() {
646 let data = unsafe {
647 ArrayData::builder(DataType::Boolean)
648 .len(5)
649 .build_unchecked()
650 };
651 drop(BooleanArray::from(data));
652 }
653
654 #[test]
655 #[should_panic(expected = "BooleanArray expected ArrayData with type Boolean got Int32")]
656 fn test_from_array_data_validation() {
657 let _ = BooleanArray::from(ArrayData::new_empty(&DataType::Int32));
658 }
659
660 #[test]
661 #[cfg_attr(miri, ignore)] fn test_true_false_count() {
663 let mut rng = thread_rng();
664
665 for _ in 0..10 {
666 let d: Vec<_> = (0..2000).map(|_| rng.gen_bool(0.5)).collect();
668 let b = BooleanArray::from(d.clone());
669
670 let expected_true = d.iter().filter(|x| **x).count();
671 assert_eq!(b.true_count(), expected_true);
672 assert_eq!(b.false_count(), d.len() - expected_true);
673
674 let d: Vec<_> = (0..2000)
676 .map(|_| rng.gen_bool(0.5).then(|| rng.gen_bool(0.5)))
677 .collect();
678 let b = BooleanArray::from(d.clone());
679
680 let expected_true = d.iter().filter(|x| matches!(x, Some(true))).count();
681 assert_eq!(b.true_count(), expected_true);
682
683 let expected_false = d.iter().filter(|x| matches!(x, Some(false))).count();
684 assert_eq!(b.false_count(), expected_false);
685 }
686 }
687
688 #[test]
689 fn test_into_parts() {
690 let boolean_array = [Some(true), None, Some(false)]
691 .into_iter()
692 .collect::<BooleanArray>();
693 let (values, nulls) = boolean_array.into_parts();
694 assert_eq!(values.values(), &[0b0000_0001]);
695 assert!(nulls.is_some());
696 assert_eq!(nulls.unwrap().buffer().as_slice(), &[0b0000_0101]);
697
698 let boolean_array =
699 BooleanArray::from(vec![false, false, false, false, false, false, false, true]);
700 let (values, nulls) = boolean_array.into_parts();
701 assert_eq!(values.values(), &[0b1000_0000]);
702 assert!(nulls.is_none());
703 }
704}