1use crate::types::{ByteArrayType, GenericBinaryType};
19use crate::{Array, GenericByteArray, GenericListArray, GenericStringArray, OffsetSizeTrait};
20use arrow_data::ArrayData;
21use arrow_schema::DataType;
22
23pub type GenericBinaryArray<OffsetSize> = GenericByteArray<GenericBinaryType<OffsetSize>>;
25
26impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
27 #[deprecated(note = "please use `Self::DATA_TYPE` instead")]
29 pub const fn get_data_type() -> DataType {
30 Self::DATA_TYPE
31 }
32
33 pub fn from_vec(v: Vec<&[u8]>) -> Self {
37 Self::from_iter_values(v)
38 }
39
40 pub fn from_opt_vec(v: Vec<Option<&[u8]>>) -> Self {
42 v.into_iter().collect()
43 }
44
45 fn from_list(v: GenericListArray<OffsetSize>) -> Self {
46 let v = v.into_data();
47 assert_eq!(
48 v.child_data().len(),
49 1,
50 "BinaryArray can only be created from list array of u8 values \
51 (i.e. List<PrimitiveArray<u8>>)."
52 );
53 let child_data = &v.child_data()[0];
54
55 assert_eq!(
56 child_data.child_data().len(),
57 0,
58 "BinaryArray can only be created from list array of u8 values \
59 (i.e. List<PrimitiveArray<u8>>)."
60 );
61 assert_eq!(
62 child_data.data_type(),
63 &DataType::UInt8,
64 "BinaryArray can only be created from List<u8> arrays, mismatched data types."
65 );
66 assert_eq!(
67 child_data.null_count(),
68 0,
69 "The child array cannot contain null values."
70 );
71
72 let builder = ArrayData::builder(Self::DATA_TYPE)
73 .len(v.len())
74 .offset(v.offset())
75 .add_buffer(v.buffers()[0].clone())
76 .add_buffer(child_data.buffers()[0].slice(child_data.offset()))
77 .nulls(v.nulls().cloned());
78
79 let data = unsafe { builder.build_unchecked() };
80 Self::from(data)
81 }
82
83 pub fn take_iter<'a>(
85 &'a self,
86 indexes: impl Iterator<Item = Option<usize>> + 'a,
87 ) -> impl Iterator<Item = Option<&'a [u8]>> {
88 indexes.map(|opt_index| opt_index.map(|index| self.value(index)))
89 }
90
91 pub unsafe fn take_iter_unchecked<'a>(
96 &'a self,
97 indexes: impl Iterator<Item = Option<usize>> + 'a,
98 ) -> impl Iterator<Item = Option<&'a [u8]>> {
99 indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
100 }
101}
102
103impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&[u8]>>> for GenericBinaryArray<OffsetSize> {
104 fn from(v: Vec<Option<&[u8]>>) -> Self {
105 Self::from_opt_vec(v)
106 }
107}
108
109impl<OffsetSize: OffsetSizeTrait> From<Vec<&[u8]>> for GenericBinaryArray<OffsetSize> {
110 fn from(v: Vec<&[u8]>) -> Self {
111 Self::from_iter_values(v)
112 }
113}
114
115impl<T: OffsetSizeTrait> From<GenericListArray<T>> for GenericBinaryArray<T> {
116 fn from(v: GenericListArray<T>) -> Self {
117 Self::from_list(v)
118 }
119}
120
121impl<OffsetSize: OffsetSizeTrait> From<GenericStringArray<OffsetSize>>
122 for GenericBinaryArray<OffsetSize>
123{
124 fn from(value: GenericStringArray<OffsetSize>) -> Self {
125 let builder = value
126 .into_data()
127 .into_builder()
128 .data_type(GenericBinaryType::<OffsetSize>::DATA_TYPE);
129
130 Self::from(unsafe { builder.build_unchecked() })
133 }
134}
135
136pub type BinaryArray = GenericBinaryArray<i32>;
177
178pub type LargeBinaryArray = GenericBinaryArray<i64>;
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221 use crate::{ListArray, StringArray};
222 use arrow_buffer::Buffer;
223 use arrow_schema::Field;
224 use std::sync::Arc;
225
226 #[test]
227 fn test_binary_array() {
228 let values: [u8; 12] = [
229 b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
230 ];
231 let offsets: [i32; 4] = [0, 5, 5, 12];
232
233 let array_data = ArrayData::builder(DataType::Binary)
235 .len(3)
236 .add_buffer(Buffer::from_slice_ref(offsets))
237 .add_buffer(Buffer::from_slice_ref(values))
238 .build()
239 .unwrap();
240 let binary_array = BinaryArray::from(array_data);
241 assert_eq!(3, binary_array.len());
242 assert_eq!(0, binary_array.null_count());
243 assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
244 assert_eq!([b'h', b'e', b'l', b'l', b'o'], unsafe {
245 binary_array.value_unchecked(0)
246 });
247 assert_eq!([] as [u8; 0], binary_array.value(1));
248 assert_eq!([] as [u8; 0], unsafe { binary_array.value_unchecked(1) });
249 assert_eq!(
250 [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
251 binary_array.value(2)
252 );
253 assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
254 binary_array.value_unchecked(2)
255 });
256 assert_eq!(5, binary_array.value_offsets()[2]);
257 assert_eq!(7, binary_array.value_length(2));
258 for i in 0..3 {
259 assert!(binary_array.is_valid(i));
260 assert!(!binary_array.is_null(i));
261 }
262 }
263
264 #[test]
265 fn test_binary_array_with_offsets() {
266 let values: [u8; 12] = [
267 b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
268 ];
269 let offsets: [i32; 4] = [0, 5, 5, 12];
270
271 let array_data = ArrayData::builder(DataType::Binary)
273 .len(2)
274 .offset(1)
275 .add_buffer(Buffer::from_slice_ref(offsets))
276 .add_buffer(Buffer::from_slice_ref(values))
277 .build()
278 .unwrap();
279 let binary_array = BinaryArray::from(array_data);
280 assert_eq!(
281 [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
282 binary_array.value(1)
283 );
284 assert_eq!(5, binary_array.value_offsets()[0]);
285 assert_eq!(0, binary_array.value_length(0));
286 assert_eq!(5, binary_array.value_offsets()[1]);
287 assert_eq!(7, binary_array.value_length(1));
288 }
289
290 #[test]
291 fn test_large_binary_array() {
292 let values: [u8; 12] = [
293 b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
294 ];
295 let offsets: [i64; 4] = [0, 5, 5, 12];
296
297 let array_data = ArrayData::builder(DataType::LargeBinary)
299 .len(3)
300 .add_buffer(Buffer::from_slice_ref(offsets))
301 .add_buffer(Buffer::from_slice_ref(values))
302 .build()
303 .unwrap();
304 let binary_array = LargeBinaryArray::from(array_data);
305 assert_eq!(3, binary_array.len());
306 assert_eq!(0, binary_array.null_count());
307 assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
308 assert_eq!([b'h', b'e', b'l', b'l', b'o'], unsafe {
309 binary_array.value_unchecked(0)
310 });
311 assert_eq!([] as [u8; 0], binary_array.value(1));
312 assert_eq!([] as [u8; 0], unsafe { binary_array.value_unchecked(1) });
313 assert_eq!(
314 [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
315 binary_array.value(2)
316 );
317 assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
318 binary_array.value_unchecked(2)
319 });
320 assert_eq!(5, binary_array.value_offsets()[2]);
321 assert_eq!(7, binary_array.value_length(2));
322 for i in 0..3 {
323 assert!(binary_array.is_valid(i));
324 assert!(!binary_array.is_null(i));
325 }
326 }
327
328 #[test]
329 fn test_large_binary_array_with_offsets() {
330 let values: [u8; 12] = [
331 b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
332 ];
333 let offsets: [i64; 4] = [0, 5, 5, 12];
334
335 let array_data = ArrayData::builder(DataType::LargeBinary)
337 .len(2)
338 .offset(1)
339 .add_buffer(Buffer::from_slice_ref(offsets))
340 .add_buffer(Buffer::from_slice_ref(values))
341 .build()
342 .unwrap();
343 let binary_array = LargeBinaryArray::from(array_data);
344 assert_eq!(
345 [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
346 binary_array.value(1)
347 );
348 assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
349 binary_array.value_unchecked(1)
350 });
351 assert_eq!(5, binary_array.value_offsets()[0]);
352 assert_eq!(0, binary_array.value_length(0));
353 assert_eq!(5, binary_array.value_offsets()[1]);
354 assert_eq!(7, binary_array.value_length(1));
355 }
356
357 fn _test_generic_binary_array_from_list_array<O: OffsetSizeTrait>() {
358 let values = b"helloparquet";
359 let child_data = ArrayData::builder(DataType::UInt8)
360 .len(12)
361 .add_buffer(Buffer::from(&values[..]))
362 .build()
363 .unwrap();
364 let offsets = [0, 5, 5, 12].map(|n| O::from_usize(n).unwrap());
365
366 let array_data1 = ArrayData::builder(GenericBinaryArray::<O>::DATA_TYPE)
368 .len(3)
369 .add_buffer(Buffer::from_slice_ref(offsets))
370 .add_buffer(Buffer::from_slice_ref(values))
371 .build()
372 .unwrap();
373 let binary_array1 = GenericBinaryArray::<O>::from(array_data1);
374
375 let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
376 "item",
377 DataType::UInt8,
378 false,
379 )));
380
381 let array_data2 = ArrayData::builder(data_type)
382 .len(3)
383 .add_buffer(Buffer::from_slice_ref(offsets))
384 .add_child_data(child_data)
385 .build()
386 .unwrap();
387 let list_array = GenericListArray::<O>::from(array_data2);
388 let binary_array2 = GenericBinaryArray::<O>::from(list_array);
389
390 assert_eq!(binary_array1.len(), binary_array2.len());
391 assert_eq!(binary_array1.null_count(), binary_array2.null_count());
392 assert_eq!(binary_array1.value_offsets(), binary_array2.value_offsets());
393 for i in 0..binary_array1.len() {
394 assert_eq!(binary_array1.value(i), binary_array2.value(i));
395 assert_eq!(binary_array1.value(i), unsafe {
396 binary_array2.value_unchecked(i)
397 });
398 assert_eq!(binary_array1.value_length(i), binary_array2.value_length(i));
399 }
400 }
401
402 #[test]
403 fn test_binary_array_from_list_array() {
404 _test_generic_binary_array_from_list_array::<i32>();
405 }
406
407 #[test]
408 fn test_large_binary_array_from_list_array() {
409 _test_generic_binary_array_from_list_array::<i64>();
410 }
411
412 fn _test_generic_binary_array_from_list_array_with_offset<O: OffsetSizeTrait>() {
413 let values = b"HelloArrowAndParquet";
414 let child_data = ArrayData::builder(DataType::UInt8)
416 .len(15)
417 .offset(5)
418 .add_buffer(Buffer::from(&values[..]))
419 .build()
420 .unwrap();
421
422 let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
423 let null_buffer = Buffer::from_slice_ref([0b101]);
424 let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
425 "item",
426 DataType::UInt8,
427 false,
428 )));
429
430 let array_data = ArrayData::builder(data_type)
432 .len(2)
433 .offset(1)
434 .add_buffer(Buffer::from_slice_ref(offsets))
435 .null_bit_buffer(Some(null_buffer))
436 .add_child_data(child_data)
437 .build()
438 .unwrap();
439 let list_array = GenericListArray::<O>::from(array_data);
440 let binary_array = GenericBinaryArray::<O>::from(list_array);
441
442 assert_eq!(2, binary_array.len());
443 assert_eq!(1, binary_array.null_count());
444 assert!(binary_array.is_null(0));
445 assert!(binary_array.is_valid(1));
446 assert_eq!(b"Parquet", binary_array.value(1));
447 }
448
449 #[test]
450 fn test_binary_array_from_list_array_with_offset() {
451 _test_generic_binary_array_from_list_array_with_offset::<i32>();
452 }
453
454 #[test]
455 fn test_large_binary_array_from_list_array_with_offset() {
456 _test_generic_binary_array_from_list_array_with_offset::<i64>();
457 }
458
459 fn _test_generic_binary_array_from_list_array_with_child_nulls_failed<O: OffsetSizeTrait>() {
460 let values = b"HelloArrow";
461 let child_data = ArrayData::builder(DataType::UInt8)
462 .len(10)
463 .add_buffer(Buffer::from(&values[..]))
464 .null_bit_buffer(Some(Buffer::from_slice_ref([0b1010101010])))
465 .build()
466 .unwrap();
467
468 let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
469 let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
470 "item",
471 DataType::UInt8,
472 true,
473 )));
474
475 let array_data = ArrayData::builder(data_type)
477 .len(2)
478 .add_buffer(Buffer::from_slice_ref(offsets))
479 .add_child_data(child_data)
480 .build()
481 .unwrap();
482 let list_array = GenericListArray::<O>::from(array_data);
483 drop(GenericBinaryArray::<O>::from(list_array));
484 }
485
486 #[test]
487 #[should_panic(expected = "The child array cannot contain null values.")]
488 fn test_binary_array_from_list_array_with_child_nulls_failed() {
489 _test_generic_binary_array_from_list_array_with_child_nulls_failed::<i32>();
490 }
491
492 #[test]
493 #[should_panic(expected = "The child array cannot contain null values.")]
494 fn test_large_binary_array_from_list_array_with_child_nulls_failed() {
495 _test_generic_binary_array_from_list_array_with_child_nulls_failed::<i64>();
496 }
497
498 fn test_generic_binary_array_from_opt_vec<T: OffsetSizeTrait>() {
499 let values: Vec<Option<&[u8]>> =
500 vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")];
501 let array = GenericBinaryArray::<T>::from_opt_vec(values);
502 assert_eq!(array.len(), 5);
503 assert_eq!(array.value(0), b"one");
504 assert_eq!(array.value(1), b"two");
505 assert_eq!(array.value(3), b"");
506 assert_eq!(array.value(4), b"three");
507 assert!(!array.is_null(0));
508 assert!(!array.is_null(1));
509 assert!(array.is_null(2));
510 assert!(!array.is_null(3));
511 assert!(!array.is_null(4));
512 }
513
514 #[test]
515 fn test_large_binary_array_from_opt_vec() {
516 test_generic_binary_array_from_opt_vec::<i64>()
517 }
518
519 #[test]
520 fn test_binary_array_from_opt_vec() {
521 test_generic_binary_array_from_opt_vec::<i32>()
522 }
523
524 #[test]
525 fn test_binary_array_from_unbound_iter() {
526 let value_iter = (0..)
528 .scan(0usize, |pos, i| {
529 if *pos < 10 {
530 *pos += 1;
531 Some(Some(format!("value {i}")))
532 } else {
533 None
535 }
536 })
537 .take(100);
539
540 let (_, upper_size_bound) = value_iter.size_hint();
541 assert_eq!(upper_size_bound, Some(100));
543 let binary_array: BinaryArray = value_iter.collect();
544 assert_eq!(binary_array.len(), 10);
546 }
547
548 #[test]
549 #[should_panic(
550 expected = "BinaryArray can only be created from List<u8> arrays, mismatched data types."
551 )]
552 fn test_binary_array_from_incorrect_list_array() {
553 let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
554 let values_data = ArrayData::builder(DataType::UInt32)
555 .len(12)
556 .add_buffer(Buffer::from_slice_ref(values))
557 .build()
558 .unwrap();
559 let offsets: [i32; 4] = [0, 5, 5, 12];
560
561 let data_type = DataType::List(Arc::new(Field::new("item", DataType::UInt32, false)));
562 let array_data = ArrayData::builder(data_type)
563 .len(3)
564 .add_buffer(Buffer::from_slice_ref(offsets))
565 .add_child_data(values_data)
566 .build()
567 .unwrap();
568 let list_array = ListArray::from(array_data);
569 drop(BinaryArray::from(list_array));
570 }
571
572 #[test]
573 #[should_panic(
574 expected = "Trying to access an element at index 4 from a BinaryArray of length 3"
575 )]
576 fn test_binary_array_get_value_index_out_of_bound() {
577 let values: [u8; 12] = [104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116];
578 let offsets: [i32; 4] = [0, 5, 5, 12];
579 let array_data = ArrayData::builder(DataType::Binary)
580 .len(3)
581 .add_buffer(Buffer::from_slice_ref(offsets))
582 .add_buffer(Buffer::from_slice_ref(values))
583 .build()
584 .unwrap();
585 let binary_array = BinaryArray::from(array_data);
586 binary_array.value(4);
587 }
588
589 #[test]
590 #[should_panic(expected = "LargeBinaryArray expects DataType::LargeBinary")]
591 fn test_binary_array_validation() {
592 let array = BinaryArray::from_iter_values([&[1, 2]]);
593 let _ = LargeBinaryArray::from(array.into_data());
594 }
595
596 #[test]
597 fn test_binary_array_all_null() {
598 let data = vec![None];
599 let array = BinaryArray::from(data);
600 array
601 .into_data()
602 .validate_full()
603 .expect("All null array has valid array data");
604 }
605
606 #[test]
607 fn test_large_binary_array_all_null() {
608 let data = vec![None];
609 let array = LargeBinaryArray::from(data);
610 array
611 .into_data()
612 .validate_full()
613 .expect("All null array has valid array data");
614 }
615
616 #[test]
617 fn test_empty_offsets() {
618 let string = BinaryArray::from(
619 ArrayData::builder(DataType::Binary)
620 .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
621 .build()
622 .unwrap(),
623 );
624 assert_eq!(string.value_offsets(), &[0]);
625 let string = LargeBinaryArray::from(
626 ArrayData::builder(DataType::LargeBinary)
627 .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
628 .build()
629 .unwrap(),
630 );
631 assert_eq!(string.len(), 0);
632 assert_eq!(string.value_offsets(), &[0]);
633 }
634
635 #[test]
636 fn test_to_from_string() {
637 let s = StringArray::from_iter_values(["a", "b", "c", "d"]);
638 let b = BinaryArray::from(s.clone());
639 let sa = StringArray::from(b); assert_eq!(s, sa);
642 }
643}