arrow_buffer/buffer/
scalar.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::alloc::Deallocation;
19use crate::buffer::Buffer;
20use crate::native::ArrowNativeType;
21use crate::{BufferBuilder, MutableBuffer, OffsetBuffer};
22use std::fmt::Formatter;
23use std::marker::PhantomData;
24use std::ops::Deref;
25
26/// A strongly-typed [`Buffer`] supporting zero-copy cloning and slicing
27///
28/// The easiest way to think about `ScalarBuffer<T>` is being equivalent to a `Arc<Vec<T>>`,
29/// with the following differences:
30///
31/// - slicing and cloning is O(1).
32/// - it supports external allocated memory
33///
34/// ```
35/// # use arrow_buffer::ScalarBuffer;
36/// // Zero-copy conversion from Vec
37/// let buffer = ScalarBuffer::from(vec![1, 2, 3]);
38/// assert_eq!(&buffer, &[1, 2, 3]);
39///
40/// // Zero-copy slicing
41/// let sliced = buffer.slice(1, 2);
42/// assert_eq!(&sliced, &[2, 3]);
43/// ```
44#[derive(Clone)]
45pub struct ScalarBuffer<T: ArrowNativeType> {
46    /// Underlying data buffer
47    buffer: Buffer,
48    phantom: PhantomData<T>,
49}
50
51impl<T: ArrowNativeType> std::fmt::Debug for ScalarBuffer<T> {
52    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
53        f.debug_tuple("ScalarBuffer").field(&self.as_ref()).finish()
54    }
55}
56
57impl<T: ArrowNativeType> ScalarBuffer<T> {
58    /// Create a new [`ScalarBuffer`] from a [`Buffer`], and an `offset`
59    /// and `length` in units of `T`
60    ///
61    /// # Panics
62    ///
63    /// This method will panic if
64    ///
65    /// * `offset` or `len` would result in overflow
66    /// * `buffer` is not aligned to a multiple of `std::mem::align_of::<T>`
67    /// * `bytes` is not large enough for the requested slice
68    pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
69        let size = std::mem::size_of::<T>();
70        let byte_offset = offset.checked_mul(size).expect("offset overflow");
71        let byte_len = len.checked_mul(size).expect("length overflow");
72        buffer.slice_with_length(byte_offset, byte_len).into()
73    }
74
75    /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
76    pub fn slice(&self, offset: usize, len: usize) -> Self {
77        Self::new(self.buffer.clone(), offset, len)
78    }
79
80    /// Returns the inner [`Buffer`]
81    pub fn inner(&self) -> &Buffer {
82        &self.buffer
83    }
84
85    /// Returns the inner [`Buffer`], consuming self
86    pub fn into_inner(self) -> Buffer {
87        self.buffer
88    }
89
90    /// Returns true if this [`ScalarBuffer`] is equal to `other`, using pointer comparisons
91    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
92    /// return false when the arrays are logically equal
93    #[inline]
94    pub fn ptr_eq(&self, other: &Self) -> bool {
95        self.buffer.ptr_eq(&other.buffer)
96    }
97}
98
99impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {
100    type Target = [T];
101
102    #[inline]
103    fn deref(&self) -> &Self::Target {
104        // SAFETY: Verified alignment in From<Buffer>
105        unsafe {
106            std::slice::from_raw_parts(
107                self.buffer.as_ptr() as *const T,
108                self.buffer.len() / std::mem::size_of::<T>(),
109            )
110        }
111    }
112}
113
114impl<T: ArrowNativeType> AsRef<[T]> for ScalarBuffer<T> {
115    #[inline]
116    fn as_ref(&self) -> &[T] {
117        self
118    }
119}
120
121impl<T: ArrowNativeType> From<MutableBuffer> for ScalarBuffer<T> {
122    fn from(value: MutableBuffer) -> Self {
123        Buffer::from(value).into()
124    }
125}
126
127impl<T: ArrowNativeType> From<Buffer> for ScalarBuffer<T> {
128    fn from(buffer: Buffer) -> Self {
129        let align = std::mem::align_of::<T>();
130        let is_aligned = buffer.as_ptr().align_offset(align) == 0;
131
132        match buffer.deallocation() {
133            Deallocation::Standard(_) => assert!(
134                is_aligned,
135                "Memory pointer is not aligned with the specified scalar type"
136            ),
137            Deallocation::Custom(_, _) =>
138                assert!(is_aligned, "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned."),
139        }
140
141        Self {
142            buffer,
143            phantom: Default::default(),
144        }
145    }
146}
147
148impl<T: ArrowNativeType> From<OffsetBuffer<T>> for ScalarBuffer<T> {
149    fn from(value: OffsetBuffer<T>) -> Self {
150        value.into_inner()
151    }
152}
153
154impl<T: ArrowNativeType> From<Vec<T>> for ScalarBuffer<T> {
155    fn from(value: Vec<T>) -> Self {
156        Self {
157            buffer: Buffer::from_vec(value),
158            phantom: Default::default(),
159        }
160    }
161}
162
163impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Vec<T> {
164    fn from(value: ScalarBuffer<T>) -> Self {
165        value
166            .buffer
167            .into_vec()
168            .unwrap_or_else(|buffer| buffer.typed_data::<T>().into())
169    }
170}
171
172impl<T: ArrowNativeType> From<BufferBuilder<T>> for ScalarBuffer<T> {
173    fn from(mut value: BufferBuilder<T>) -> Self {
174        let len = value.len();
175        Self::new(value.finish(), 0, len)
176    }
177}
178
179impl<T: ArrowNativeType> FromIterator<T> for ScalarBuffer<T> {
180    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
181        iter.into_iter().collect::<Vec<_>>().into()
182    }
183}
184
185impl<'a, T: ArrowNativeType> IntoIterator for &'a ScalarBuffer<T> {
186    type Item = &'a T;
187    type IntoIter = std::slice::Iter<'a, T>;
188
189    fn into_iter(self) -> Self::IntoIter {
190        self.as_ref().iter()
191    }
192}
193
194impl<T: ArrowNativeType, S: AsRef<[T]> + ?Sized> PartialEq<S> for ScalarBuffer<T> {
195    fn eq(&self, other: &S) -> bool {
196        self.as_ref().eq(other.as_ref())
197    }
198}
199
200impl<T: ArrowNativeType, const N: usize> PartialEq<ScalarBuffer<T>> for [T; N] {
201    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
202        self.as_ref().eq(other.as_ref())
203    }
204}
205
206impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for [T] {
207    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
208        self.as_ref().eq(other.as_ref())
209    }
210}
211
212impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for Vec<T> {
213    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
214        self.as_slice().eq(other.as_ref())
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use std::{ptr::NonNull, sync::Arc};
221
222    use super::*;
223
224    #[test]
225    fn test_basic() {
226        let expected = [0_i32, 1, 2];
227        let buffer = Buffer::from_iter(expected.iter().cloned());
228        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 0, 3);
229        assert_eq!(*typed, expected);
230
231        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 2);
232        assert_eq!(*typed, expected[1..]);
233
234        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 0);
235        assert!(typed.is_empty());
236
237        let typed = ScalarBuffer::<i32>::new(buffer, 3, 0);
238        assert!(typed.is_empty());
239    }
240
241    #[test]
242    fn test_debug() {
243        let buffer = ScalarBuffer::from(vec![1, 2, 3]);
244        assert_eq!(format!("{buffer:?}"), "ScalarBuffer([1, 2, 3])");
245    }
246
247    #[test]
248    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
249    fn test_unaligned() {
250        let expected = [0_i32, 1, 2];
251        let buffer = Buffer::from_iter(expected.iter().cloned());
252        let buffer = buffer.slice(1);
253        ScalarBuffer::<i32>::new(buffer, 0, 2);
254    }
255
256    #[test]
257    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
258    fn test_length_out_of_bounds() {
259        let buffer = Buffer::from_iter([0_i32, 1, 2]);
260        ScalarBuffer::<i32>::new(buffer, 1, 3);
261    }
262
263    #[test]
264    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
265    fn test_offset_out_of_bounds() {
266        let buffer = Buffer::from_iter([0_i32, 1, 2]);
267        ScalarBuffer::<i32>::new(buffer, 4, 0);
268    }
269
270    #[test]
271    #[should_panic(expected = "offset overflow")]
272    fn test_length_overflow() {
273        let buffer = Buffer::from_iter([0_i32, 1, 2]);
274        ScalarBuffer::<i32>::new(buffer, usize::MAX, 1);
275    }
276
277    #[test]
278    #[should_panic(expected = "offset overflow")]
279    fn test_start_overflow() {
280        let buffer = Buffer::from_iter([0_i32, 1, 2]);
281        ScalarBuffer::<i32>::new(buffer, usize::MAX / 4 + 1, 0);
282    }
283
284    #[test]
285    #[should_panic(expected = "length overflow")]
286    fn test_end_overflow() {
287        let buffer = Buffer::from_iter([0_i32, 1, 2]);
288        ScalarBuffer::<i32>::new(buffer, 0, usize::MAX / 4 + 1);
289    }
290
291    #[test]
292    fn convert_from_buffer_builder() {
293        let input = vec![1, 2, 3, 4];
294        let buffer_builder = BufferBuilder::from(input.clone());
295        let scalar_buffer = ScalarBuffer::from(buffer_builder);
296        assert_eq!(scalar_buffer.as_ref(), input);
297    }
298
299    #[test]
300    fn into_vec() {
301        let input = vec![1u8, 2, 3, 4];
302
303        // No copy
304        let input_buffer = Buffer::from_vec(input.clone());
305        let input_ptr = input_buffer.as_ptr();
306        let input_len = input_buffer.len();
307        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 0, input_len);
308        let vec = Vec::from(scalar_buffer);
309        assert_eq!(vec.as_slice(), input.as_slice());
310        assert_eq!(vec.as_ptr(), input_ptr);
311
312        // Custom allocation - makes a copy
313        let mut input_clone = input.clone();
314        let input_ptr = NonNull::new(input_clone.as_mut_ptr()).unwrap();
315        let dealloc = Arc::new(());
316        let buffer =
317            unsafe { Buffer::from_custom_allocation(input_ptr, input_clone.len(), dealloc as _) };
318        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
319        let vec = Vec::from(scalar_buffer);
320        assert_eq!(vec, input.as_slice());
321        assert_ne!(vec.as_ptr(), input_ptr.as_ptr());
322
323        // Offset - makes a copy
324        let input_buffer = Buffer::from_vec(input.clone());
325        let input_ptr = input_buffer.as_ptr();
326        let input_len = input_buffer.len();
327        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 1, input_len - 1);
328        let vec = Vec::from(scalar_buffer);
329        assert_eq!(vec.as_slice(), &input[1..]);
330        assert_ne!(vec.as_ptr(), input_ptr);
331
332        // Inner buffer Arc ref count != 0 - makes a copy
333        let buffer = Buffer::from_slice_ref(input.as_slice());
334        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
335        let vec = Vec::from(scalar_buffer);
336        assert_eq!(vec, input.as_slice());
337        assert_ne!(vec.as_ptr(), input.as_ptr());
338    }
339}