1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
//! Functionality to mmap in-memory data regions.
use std::sync::Arc;

use crate::{
    array::{FromFfi, PrimitiveArray},
    types::NativeType,
};

use super::{ArrowArray, InternalArrowArray};

#[allow(dead_code)]
struct PrivateData<T> {
    // the owner of the pointers' regions
    data: T,
    buffers_ptr: Box<[*const std::os::raw::c_void]>,
    children_ptr: Box<[*mut ArrowArray]>,
    dictionary_ptr: Option<*mut ArrowArray>,
}

pub(crate) unsafe fn create_array<
    T: AsRef<[u8]>,
    I: Iterator<Item = Option<*const u8>>,
    II: Iterator<Item = ArrowArray>,
>(
    data: Arc<T>,
    num_rows: usize,
    null_count: usize,
    buffers: I,
    children: II,
    dictionary: Option<ArrowArray>,
) -> ArrowArray {
    let buffers_ptr = buffers
        .map(|maybe_buffer| match maybe_buffer {
            Some(b) => b as *const std::os::raw::c_void,
            None => std::ptr::null(),
        })
        .collect::<Box<[_]>>();
    let n_buffers = buffers_ptr.len() as i64;

    let children_ptr = children
        .map(|child| Box::into_raw(Box::new(child)))
        .collect::<Box<_>>();
    let n_children = children_ptr.len() as i64;

    let dictionary_ptr = dictionary.map(|array| Box::into_raw(Box::new(array)));

    let mut private_data = Box::new(PrivateData::<Arc<T>> {
        data,
        buffers_ptr,
        children_ptr,
        dictionary_ptr,
    });

    ArrowArray {
        length: num_rows as i64,
        null_count: null_count as i64,
        offset: 0, // IPC files are by definition not offset
        n_buffers,
        n_children,
        buffers: private_data.buffers_ptr.as_mut_ptr(),
        children: private_data.children_ptr.as_mut_ptr(),
        dictionary: private_data.dictionary_ptr.unwrap_or(std::ptr::null_mut()),
        release: Some(release::<Arc<T>>),
        private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
    }
}

/// callback used to drop [`ArrowArray`] when it is exported specified for [`PrivateData`].
unsafe extern "C" fn release<T>(array: *mut ArrowArray) {
    if array.is_null() {
        return;
    }
    let array = &mut *array;

    // take ownership of `private_data`, therefore dropping it
    let private = Box::from_raw(array.private_data as *mut PrivateData<T>);
    for child in private.children_ptr.iter() {
        let _ = Box::from_raw(*child);
    }

    if let Some(ptr) = private.dictionary_ptr {
        let _ = Box::from_raw(ptr);
    }

    array.release = None;
}

/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
/// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
///
/// This can be useful if you want to apply arrow kernels on slices without incurring
/// a memcopy cost.
///
/// # Safety
///
/// Using this function is not unsafe, but the returned PrimitiveArray's lifetime is bound to the lifetime
/// of the slice. The returned [`PrimitiveArray`] _must not_ outlive the passed slice.
pub unsafe fn slice<T: NativeType>(slice: &[T]) -> PrimitiveArray<T> {
    let num_rows = slice.len();
    let null_count = 0;
    let validity = None;

    let data: &[u8] = bytemuck::cast_slice(slice);
    let ptr = data.as_ptr() as *const u8;
    let data = Arc::new(data);

    // safety: the underlying assumption of this function: the array will not be used
    // beyond the
    let array = create_array(
        data,
        num_rows,
        null_count,
        [validity, Some(ptr)].into_iter(),
        [].into_iter(),
        None,
    );
    let array = InternalArrowArray::new(array, T::PRIMITIVE.into());

    // safety: we just created a valid array
    unsafe { PrimitiveArray::<T>::try_from_ffi(array) }.unwrap()
}