arrow_buffer/buffer/
mutable.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::alloc::{handle_alloc_error, Layout};
19use std::mem;
20use std::ptr::NonNull;
21
22use crate::alloc::{Deallocation, ALIGNMENT};
23use crate::{
24    bytes::Bytes,
25    native::{ArrowNativeType, ToByteSlice},
26    util::bit_util,
27};
28
29use super::Buffer;
30
31/// A [`MutableBuffer`] is Arrow's interface to build a [`Buffer`] out of items or slices of items.
32///
33/// [`Buffer`]s created from [`MutableBuffer`] (via `into`) are guaranteed to have its pointer aligned
34/// along cache lines and in multiple of 64 bytes.
35///
36/// Use [MutableBuffer::push] to insert an item, [MutableBuffer::extend_from_slice]
37/// to insert many items, and `into` to convert it to [`Buffer`].
38///
39/// For a safe, strongly typed API consider using [`Vec`] and [`ScalarBuffer`](crate::ScalarBuffer)
40///
41/// Note: this may be deprecated in a future release ([#1176](https://github.com/apache/arrow-rs/issues/1176))
42///
43/// # Example
44///
45/// ```
46/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
47/// let mut buffer = MutableBuffer::new(0);
48/// buffer.push(256u32);
49/// buffer.extend_from_slice(&[1u32]);
50/// let buffer: Buffer = buffer.into();
51/// assert_eq!(buffer.as_slice(), &[0u8, 1, 0, 0, 1, 0, 0, 0])
52/// ```
53#[derive(Debug)]
54pub struct MutableBuffer {
55    // dangling iff capacity = 0
56    data: NonNull<u8>,
57    // invariant: len <= capacity
58    len: usize,
59    layout: Layout,
60}
61
62impl MutableBuffer {
63    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
64    ///
65    /// See [`MutableBuffer::with_capacity`].
66    #[inline]
67    pub fn new(capacity: usize) -> Self {
68        Self::with_capacity(capacity)
69    }
70
71    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
72    ///
73    /// # Panics
74    ///
75    /// If `capacity`, when rounded up to the nearest multiple of [`ALIGNMENT`], is greater
76    /// then `isize::MAX`, then this function will panic.
77    #[inline]
78    pub fn with_capacity(capacity: usize) -> Self {
79        let capacity = bit_util::round_upto_multiple_of_64(capacity);
80        let layout = Layout::from_size_align(capacity, ALIGNMENT)
81            .expect("failed to create layout for MutableBuffer");
82        let data = match layout.size() {
83            0 => dangling_ptr(),
84            _ => {
85                // Safety: Verified size != 0
86                let raw_ptr = unsafe { std::alloc::alloc(layout) };
87                NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
88            }
89        };
90        Self {
91            data,
92            len: 0,
93            layout,
94        }
95    }
96
97    /// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` where
98    /// all bytes are guaranteed to be `0u8`.
99    /// # Example
100    /// ```
101    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
102    /// let mut buffer = MutableBuffer::from_len_zeroed(127);
103    /// assert_eq!(buffer.len(), 127);
104    /// assert!(buffer.capacity() >= 127);
105    /// let data = buffer.as_slice_mut();
106    /// assert_eq!(data[126], 0u8);
107    /// ```
108    pub fn from_len_zeroed(len: usize) -> Self {
109        let layout = Layout::from_size_align(len, ALIGNMENT).unwrap();
110        let data = match layout.size() {
111            0 => dangling_ptr(),
112            _ => {
113                // Safety: Verified size != 0
114                let raw_ptr = unsafe { std::alloc::alloc_zeroed(layout) };
115                NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
116            }
117        };
118        Self { data, len, layout }
119    }
120
121    /// Create a [`MutableBuffer`] from the provided [`Vec`] without copying
122    #[inline]
123    #[deprecated(note = "Use From<Vec<T>>")]
124    pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
125        Self::from(vec)
126    }
127
128    /// Allocates a new [MutableBuffer] from given `Bytes`.
129    pub(crate) fn from_bytes(bytes: Bytes) -> Result<Self, Bytes> {
130        let layout = match bytes.deallocation() {
131            Deallocation::Standard(layout) => *layout,
132            _ => return Err(bytes),
133        };
134
135        let len = bytes.len();
136        let data = bytes.ptr();
137        mem::forget(bytes);
138
139        Ok(Self { data, len, layout })
140    }
141
142    /// creates a new [MutableBuffer] with capacity and length capable of holding `len` bits.
143    /// This is useful to create a buffer for packed bitmaps.
144    pub fn new_null(len: usize) -> Self {
145        let num_bytes = bit_util::ceil(len, 8);
146        MutableBuffer::from_len_zeroed(num_bytes)
147    }
148
149    /// Set the bits in the range of `[0, end)` to 0 (if `val` is false), or 1 (if `val`
150    /// is true). Also extend the length of this buffer to be `end`.
151    ///
152    /// This is useful when one wants to clear (or set) the bits and then manipulate
153    /// the buffer directly (e.g., modifying the buffer by holding a mutable reference
154    /// from `data_mut()`).
155    pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
156        assert!(end <= self.layout.size());
157        let v = if val { 255 } else { 0 };
158        unsafe {
159            std::ptr::write_bytes(self.data.as_ptr(), v, end);
160            self.len = end;
161        }
162        self
163    }
164
165    /// Ensure that `count` bytes from `start` contain zero bits
166    ///
167    /// This is used to initialize the bits in a buffer, however, it has no impact on the
168    /// `len` of the buffer and so can be used to initialize the memory region from
169    /// `len` to `capacity`.
170    pub fn set_null_bits(&mut self, start: usize, count: usize) {
171        assert!(
172            start.saturating_add(count) <= self.layout.size(),
173            "range start index {start} and count {count} out of bounds for \
174            buffer of length {}",
175            self.layout.size(),
176        );
177
178        // Safety: `self.data[start..][..count]` is in-bounds and well-aligned for `u8`
179        unsafe {
180            std::ptr::write_bytes(self.data.as_ptr().add(start), 0, count);
181        }
182    }
183
184    /// Ensures that this buffer has at least `self.len + additional` bytes. This re-allocates iff
185    /// `self.len + additional > capacity`.
186    /// # Example
187    /// ```
188    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
189    /// let mut buffer = MutableBuffer::new(0);
190    /// buffer.reserve(253); // allocates for the first time
191    /// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation
192    /// let buffer: Buffer = buffer.into();
193    /// assert_eq!(buffer.len(), 253);
194    /// ```
195    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
196    // exits.
197    #[inline(always)]
198    pub fn reserve(&mut self, additional: usize) {
199        let required_cap = self.len + additional;
200        if required_cap > self.layout.size() {
201            let new_capacity = bit_util::round_upto_multiple_of_64(required_cap);
202            let new_capacity = std::cmp::max(new_capacity, self.layout.size() * 2);
203            self.reallocate(new_capacity)
204        }
205    }
206
207    #[cold]
208    fn reallocate(&mut self, capacity: usize) {
209        let new_layout = Layout::from_size_align(capacity, self.layout.align()).unwrap();
210        if new_layout.size() == 0 {
211            if self.layout.size() != 0 {
212                // Safety: data was allocated with layout
213                unsafe { std::alloc::dealloc(self.as_mut_ptr(), self.layout) };
214                self.layout = new_layout
215            }
216            return;
217        }
218
219        let data = match self.layout.size() {
220            // Safety: new_layout is not empty
221            0 => unsafe { std::alloc::alloc(new_layout) },
222            // Safety: verified new layout is valid and not empty
223            _ => unsafe { std::alloc::realloc(self.as_mut_ptr(), self.layout, capacity) },
224        };
225        self.data = NonNull::new(data).unwrap_or_else(|| handle_alloc_error(new_layout));
226        self.layout = new_layout;
227    }
228
229    /// Truncates this buffer to `len` bytes
230    ///
231    /// If `len` is greater than the buffer's current length, this has no effect
232    #[inline(always)]
233    pub fn truncate(&mut self, len: usize) {
234        if len > self.len {
235            return;
236        }
237        self.len = len;
238    }
239
240    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
241    /// growing it (potentially reallocating it) and writing `value` in the newly available bytes.
242    /// # Example
243    /// ```
244    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
245    /// let mut buffer = MutableBuffer::new(0);
246    /// buffer.resize(253, 2); // allocates for the first time
247    /// assert_eq!(buffer.as_slice()[252], 2u8);
248    /// ```
249    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
250    // exits.
251    #[inline(always)]
252    pub fn resize(&mut self, new_len: usize, value: u8) {
253        if new_len > self.len {
254            let diff = new_len - self.len;
255            self.reserve(diff);
256            // write the value
257            unsafe { self.data.as_ptr().add(self.len).write_bytes(value, diff) };
258        }
259        // this truncates the buffer when new_len < self.len
260        self.len = new_len;
261    }
262
263    /// Shrinks the capacity of the buffer as much as possible.
264    /// The new capacity will aligned to the nearest 64 bit alignment.
265    ///
266    /// # Example
267    /// ```
268    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
269    /// // 2 cache lines
270    /// let mut buffer = MutableBuffer::new(128);
271    /// assert_eq!(buffer.capacity(), 128);
272    /// buffer.push(1);
273    /// buffer.push(2);
274    ///
275    /// buffer.shrink_to_fit();
276    /// assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
277    /// ```
278    pub fn shrink_to_fit(&mut self) {
279        let new_capacity = bit_util::round_upto_multiple_of_64(self.len);
280        if new_capacity < self.layout.size() {
281            self.reallocate(new_capacity)
282        }
283    }
284
285    /// Returns whether this buffer is empty or not.
286    #[inline]
287    pub const fn is_empty(&self) -> bool {
288        self.len == 0
289    }
290
291    /// Returns the length (the number of bytes written) in this buffer.
292    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
293    #[inline]
294    pub const fn len(&self) -> usize {
295        self.len
296    }
297
298    /// Returns the total capacity in this buffer.
299    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
300    #[inline]
301    pub const fn capacity(&self) -> usize {
302        self.layout.size()
303    }
304
305    /// Clear all existing data from this buffer.
306    pub fn clear(&mut self) {
307        self.len = 0
308    }
309
310    /// Returns the data stored in this buffer as a slice.
311    pub fn as_slice(&self) -> &[u8] {
312        self
313    }
314
315    /// Returns the data stored in this buffer as a mutable slice.
316    pub fn as_slice_mut(&mut self) -> &mut [u8] {
317        self
318    }
319
320    /// Returns a raw pointer to this buffer's internal memory
321    /// This pointer is guaranteed to be aligned along cache-lines.
322    #[inline]
323    pub const fn as_ptr(&self) -> *const u8 {
324        self.data.as_ptr()
325    }
326
327    /// Returns a mutable raw pointer to this buffer's internal memory
328    /// This pointer is guaranteed to be aligned along cache-lines.
329    #[inline]
330    pub fn as_mut_ptr(&mut self) -> *mut u8 {
331        self.data.as_ptr()
332    }
333
334    #[deprecated(
335        since = "2.0.0",
336        note = "This method is deprecated in favour of `into` from the trait `Into`."
337    )]
338    /// Freezes this buffer and return an immutable version of it.
339    pub fn freeze(self) -> Buffer {
340        self.into_buffer()
341    }
342
343    #[inline]
344    pub(super) fn into_buffer(self) -> Buffer {
345        let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) };
346        std::mem::forget(self);
347        Buffer::from_bytes(bytes)
348    }
349
350    /// View this buffer as a mutable slice of a specific type.
351    ///
352    /// # Panics
353    ///
354    /// This function panics if the underlying buffer is not aligned
355    /// correctly for type `T`.
356    pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
357        // SAFETY
358        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
359        // implementation outside this crate, and this method checks alignment
360        let (prefix, offsets, suffix) = unsafe { self.as_slice_mut().align_to_mut::<T>() };
361        assert!(prefix.is_empty() && suffix.is_empty());
362        offsets
363    }
364
365    /// View buffer as a immutable slice of a specific type.
366    ///
367    /// # Panics
368    ///
369    /// This function panics if the underlying buffer is not aligned
370    /// correctly for type `T`.
371    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
372        // SAFETY
373        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
374        // implementation outside this crate, and this method checks alignment
375        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
376        assert!(prefix.is_empty() && suffix.is_empty());
377        offsets
378    }
379
380    /// Extends this buffer from a slice of items that can be represented in bytes, increasing its capacity if needed.
381    /// # Example
382    /// ```
383    /// # use arrow_buffer::buffer::MutableBuffer;
384    /// let mut buffer = MutableBuffer::new(0);
385    /// buffer.extend_from_slice(&[2u32, 0]);
386    /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
387    /// ```
388    #[inline]
389    pub fn extend_from_slice<T: ArrowNativeType>(&mut self, items: &[T]) {
390        let additional = mem::size_of_val(items);
391        self.reserve(additional);
392        unsafe {
393            // this assumes that `[ToByteSlice]` can be copied directly
394            // without calling `to_byte_slice` for each element,
395            // which is correct for all ArrowNativeType implementations.
396            let src = items.as_ptr() as *const u8;
397            let dst = self.data.as_ptr().add(self.len);
398            std::ptr::copy_nonoverlapping(src, dst, additional)
399        }
400        self.len += additional;
401    }
402
403    /// Extends the buffer with a new item, increasing its capacity if needed.
404    /// # Example
405    /// ```
406    /// # use arrow_buffer::buffer::MutableBuffer;
407    /// let mut buffer = MutableBuffer::new(0);
408    /// buffer.push(256u32);
409    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
410    /// ```
411    #[inline]
412    pub fn push<T: ToByteSlice>(&mut self, item: T) {
413        let additional = std::mem::size_of::<T>();
414        self.reserve(additional);
415        unsafe {
416            let src = item.to_byte_slice().as_ptr();
417            let dst = self.data.as_ptr().add(self.len);
418            std::ptr::copy_nonoverlapping(src, dst, additional);
419        }
420        self.len += additional;
421    }
422
423    /// Extends the buffer with a new item, without checking for sufficient capacity
424    /// # Safety
425    /// Caller must ensure that the capacity()-len()>=`size_of<T>`()
426    #[inline]
427    pub unsafe fn push_unchecked<T: ToByteSlice>(&mut self, item: T) {
428        let additional = std::mem::size_of::<T>();
429        let src = item.to_byte_slice().as_ptr();
430        let dst = self.data.as_ptr().add(self.len);
431        std::ptr::copy_nonoverlapping(src, dst, additional);
432        self.len += additional;
433    }
434
435    /// Extends the buffer by `additional` bytes equal to `0u8`, incrementing its capacity if needed.
436    #[inline]
437    pub fn extend_zeros(&mut self, additional: usize) {
438        self.resize(self.len + additional, 0);
439    }
440
441    /// # Safety
442    /// The caller must ensure that the buffer was properly initialized up to `len`.
443    #[inline]
444    pub unsafe fn set_len(&mut self, len: usize) {
445        assert!(len <= self.capacity());
446        self.len = len;
447    }
448
449    /// Invokes `f` with values `0..len` collecting the boolean results into a new `MutableBuffer`
450    ///
451    /// This is similar to `from_trusted_len_iter_bool`, however, can be significantly faster
452    /// as it eliminates the conditional `Iterator::next`
453    #[inline]
454    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
455        let mut buffer = Self::new(bit_util::ceil(len, 64) * 8);
456
457        let chunks = len / 64;
458        let remainder = len % 64;
459        for chunk in 0..chunks {
460            let mut packed = 0;
461            for bit_idx in 0..64 {
462                let i = bit_idx + chunk * 64;
463                packed |= (f(i) as u64) << bit_idx;
464            }
465
466            // SAFETY: Already allocated sufficient capacity
467            unsafe { buffer.push_unchecked(packed) }
468        }
469
470        if remainder != 0 {
471            let mut packed = 0;
472            for bit_idx in 0..remainder {
473                let i = bit_idx + chunks * 64;
474                packed |= (f(i) as u64) << bit_idx;
475            }
476
477            // SAFETY: Already allocated sufficient capacity
478            unsafe { buffer.push_unchecked(packed) }
479        }
480
481        buffer.truncate(bit_util::ceil(len, 8));
482        buffer
483    }
484}
485
486#[inline]
487fn dangling_ptr() -> NonNull<u8> {
488    // SAFETY: ALIGNMENT is a non-zero usize which is then casted
489    // to a *mut T. Therefore, `ptr` is not null and the conditions for
490    // calling new_unchecked() are respected.
491    #[cfg(miri)]
492    {
493        // Since miri implies a nightly rust version we can use the unstable strict_provenance feature
494        unsafe { NonNull::new_unchecked(std::ptr::without_provenance_mut(ALIGNMENT)) }
495    }
496    #[cfg(not(miri))]
497    {
498        unsafe { NonNull::new_unchecked(ALIGNMENT as *mut u8) }
499    }
500}
501
502impl<A: ArrowNativeType> Extend<A> for MutableBuffer {
503    #[inline]
504    fn extend<T: IntoIterator<Item = A>>(&mut self, iter: T) {
505        let iterator = iter.into_iter();
506        self.extend_from_iter(iterator)
507    }
508}
509
510impl<T: ArrowNativeType> From<Vec<T>> for MutableBuffer {
511    fn from(value: Vec<T>) -> Self {
512        // Safety
513        // Vec::as_ptr guaranteed to not be null and ArrowNativeType are trivially transmutable
514        let data = unsafe { NonNull::new_unchecked(value.as_ptr() as _) };
515        let len = value.len() * mem::size_of::<T>();
516        // Safety
517        // Vec guaranteed to have a valid layout matching that of `Layout::array`
518        // This is based on `RawVec::current_memory`
519        let layout = unsafe { Layout::array::<T>(value.capacity()).unwrap_unchecked() };
520        mem::forget(value);
521        Self { data, len, layout }
522    }
523}
524
525impl MutableBuffer {
526    #[inline]
527    pub(super) fn extend_from_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
528        &mut self,
529        mut iterator: I,
530    ) {
531        let item_size = std::mem::size_of::<T>();
532        let (lower, _) = iterator.size_hint();
533        let additional = lower * item_size;
534        self.reserve(additional);
535
536        // this is necessary because of https://github.com/rust-lang/rust/issues/32155
537        let mut len = SetLenOnDrop::new(&mut self.len);
538        let mut dst = unsafe { self.data.as_ptr().add(len.local_len) };
539        let capacity = self.layout.size();
540
541        while len.local_len + item_size <= capacity {
542            if let Some(item) = iterator.next() {
543                unsafe {
544                    let src = item.to_byte_slice().as_ptr();
545                    std::ptr::copy_nonoverlapping(src, dst, item_size);
546                    dst = dst.add(item_size);
547                }
548                len.local_len += item_size;
549            } else {
550                break;
551            }
552        }
553        drop(len);
554
555        iterator.for_each(|item| self.push(item));
556    }
557
558    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length.
559    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
560    /// # Example
561    /// ```
562    /// # use arrow_buffer::buffer::MutableBuffer;
563    /// let v = vec![1u32];
564    /// let iter = v.iter().map(|x| x * 2);
565    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
566    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
567    /// ```
568    /// # Safety
569    /// This method assumes that the iterator's size is correct and is undefined behavior
570    /// to use it on an iterator that reports an incorrect length.
571    // This implementation is required for two reasons:
572    // 1. there is no trait `TrustedLen` in stable rust and therefore
573    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
574    // 2. `from_trusted_len_iter` is faster.
575    #[inline]
576    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
577        iterator: I,
578    ) -> Self {
579        let item_size = std::mem::size_of::<T>();
580        let (_, upper) = iterator.size_hint();
581        let upper = upper.expect("from_trusted_len_iter requires an upper limit");
582        let len = upper * item_size;
583
584        let mut buffer = MutableBuffer::new(len);
585
586        let mut dst = buffer.data.as_ptr();
587        for item in iterator {
588            // note how there is no reserve here (compared with `extend_from_iter`)
589            let src = item.to_byte_slice().as_ptr();
590            std::ptr::copy_nonoverlapping(src, dst, item_size);
591            dst = dst.add(item_size);
592        }
593        assert_eq!(
594            dst.offset_from(buffer.data.as_ptr()) as usize,
595            len,
596            "Trusted iterator length was not accurately reported"
597        );
598        buffer.len = len;
599        buffer
600    }
601
602    /// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length.
603    /// # use arrow_buffer::buffer::MutableBuffer;
604    /// # Example
605    /// ```
606    /// # use arrow_buffer::buffer::MutableBuffer;
607    /// let v = vec![false, true, false];
608    /// let iter = v.iter().map(|x| *x || true);
609    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) };
610    /// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
611    /// ```
612    /// # Safety
613    /// This method assumes that the iterator's size is correct and is undefined behavior
614    /// to use it on an iterator that reports an incorrect length.
615    // This implementation is required for two reasons:
616    // 1. there is no trait `TrustedLen` in stable rust and therefore
617    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
618    // 2. `from_trusted_len_iter_bool` is faster.
619    #[inline]
620    pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(mut iterator: I) -> Self {
621        let (_, upper) = iterator.size_hint();
622        let len = upper.expect("from_trusted_len_iter requires an upper limit");
623
624        Self::collect_bool(len, |_| iterator.next().unwrap())
625    }
626
627    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors
628    /// if any of the items of the iterator is an error.
629    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
630    /// # Safety
631    /// This method assumes that the iterator's size is correct and is undefined behavior
632    /// to use it on an iterator that reports an incorrect length.
633    #[inline]
634    pub unsafe fn try_from_trusted_len_iter<
635        E,
636        T: ArrowNativeType,
637        I: Iterator<Item = Result<T, E>>,
638    >(
639        iterator: I,
640    ) -> Result<Self, E> {
641        let item_size = std::mem::size_of::<T>();
642        let (_, upper) = iterator.size_hint();
643        let upper = upper.expect("try_from_trusted_len_iter requires an upper limit");
644        let len = upper * item_size;
645
646        let mut buffer = MutableBuffer::new(len);
647
648        let mut dst = buffer.data.as_ptr();
649        for item in iterator {
650            let item = item?;
651            // note how there is no reserve here (compared with `extend_from_iter`)
652            let src = item.to_byte_slice().as_ptr();
653            std::ptr::copy_nonoverlapping(src, dst, item_size);
654            dst = dst.add(item_size);
655        }
656        // try_from_trusted_len_iter is instantiated a lot, so we extract part of it into a less
657        // generic method to reduce compile time
658        unsafe fn finalize_buffer(dst: *mut u8, buffer: &mut MutableBuffer, len: usize) {
659            assert_eq!(
660                dst.offset_from(buffer.data.as_ptr()) as usize,
661                len,
662                "Trusted iterator length was not accurately reported"
663            );
664            buffer.len = len;
665        }
666        finalize_buffer(dst, &mut buffer, len);
667        Ok(buffer)
668    }
669}
670
671impl Default for MutableBuffer {
672    fn default() -> Self {
673        Self::with_capacity(0)
674    }
675}
676
677impl std::ops::Deref for MutableBuffer {
678    type Target = [u8];
679
680    fn deref(&self) -> &[u8] {
681        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len) }
682    }
683}
684
685impl std::ops::DerefMut for MutableBuffer {
686    fn deref_mut(&mut self) -> &mut [u8] {
687        unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) }
688    }
689}
690
691impl Drop for MutableBuffer {
692    fn drop(&mut self) {
693        if self.layout.size() != 0 {
694            // Safety: data was allocated with standard allocator with given layout
695            unsafe { std::alloc::dealloc(self.data.as_ptr() as _, self.layout) };
696        }
697    }
698}
699
700impl PartialEq for MutableBuffer {
701    fn eq(&self, other: &MutableBuffer) -> bool {
702        if self.len != other.len {
703            return false;
704        }
705        if self.layout != other.layout {
706            return false;
707        }
708        self.as_slice() == other.as_slice()
709    }
710}
711
712unsafe impl Sync for MutableBuffer {}
713unsafe impl Send for MutableBuffer {}
714
715struct SetLenOnDrop<'a> {
716    len: &'a mut usize,
717    local_len: usize,
718}
719
720impl<'a> SetLenOnDrop<'a> {
721    #[inline]
722    fn new(len: &'a mut usize) -> Self {
723        SetLenOnDrop {
724            local_len: *len,
725            len,
726        }
727    }
728}
729
730impl Drop for SetLenOnDrop<'_> {
731    #[inline]
732    fn drop(&mut self) {
733        *self.len = self.local_len;
734    }
735}
736
737/// Creating a `MutableBuffer` instance by setting bits according to the boolean values
738impl std::iter::FromIterator<bool> for MutableBuffer {
739    fn from_iter<I>(iter: I) -> Self
740    where
741        I: IntoIterator<Item = bool>,
742    {
743        let mut iterator = iter.into_iter();
744        let mut result = {
745            let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
746            MutableBuffer::new(byte_capacity)
747        };
748
749        loop {
750            let mut exhausted = false;
751            let mut byte_accum: u8 = 0;
752            let mut mask: u8 = 1;
753
754            //collect (up to) 8 bits into a byte
755            while mask != 0 {
756                if let Some(value) = iterator.next() {
757                    byte_accum |= match value {
758                        true => mask,
759                        false => 0,
760                    };
761                    mask <<= 1;
762                } else {
763                    exhausted = true;
764                    break;
765                }
766            }
767
768            // break if the iterator was exhausted before it provided a bool for this byte
769            if exhausted && mask == 1 {
770                break;
771            }
772
773            //ensure we have capacity to write the byte
774            if result.len() == result.capacity() {
775                //no capacity for new byte, allocate 1 byte more (plus however many more the iterator advertises)
776                let additional_byte_capacity = 1usize.saturating_add(
777                    iterator.size_hint().0.saturating_add(7) / 8, //convert bit count to byte count, rounding up
778                );
779                result.reserve(additional_byte_capacity)
780            }
781
782            // Soundness: capacity was allocated above
783            unsafe { result.push_unchecked(byte_accum) };
784            if exhausted {
785                break;
786            }
787        }
788        result
789    }
790}
791
792impl<T: ArrowNativeType> std::iter::FromIterator<T> for MutableBuffer {
793    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
794        let mut buffer = Self::default();
795        buffer.extend_from_iter(iter.into_iter());
796        buffer
797    }
798}
799
800#[cfg(test)]
801mod tests {
802    use super::*;
803
804    #[test]
805    fn test_mutable_new() {
806        let buf = MutableBuffer::new(63);
807        assert_eq!(64, buf.capacity());
808        assert_eq!(0, buf.len());
809        assert!(buf.is_empty());
810    }
811
812    #[test]
813    fn test_mutable_default() {
814        let buf = MutableBuffer::default();
815        assert_eq!(0, buf.capacity());
816        assert_eq!(0, buf.len());
817        assert!(buf.is_empty());
818
819        let mut buf = MutableBuffer::default();
820        buf.extend_from_slice(b"hello");
821        assert_eq!(5, buf.len());
822        assert_eq!(b"hello", buf.as_slice());
823    }
824
825    #[test]
826    fn test_mutable_extend_from_slice() {
827        let mut buf = MutableBuffer::new(100);
828        buf.extend_from_slice(b"hello");
829        assert_eq!(5, buf.len());
830        assert_eq!(b"hello", buf.as_slice());
831
832        buf.extend_from_slice(b" world");
833        assert_eq!(11, buf.len());
834        assert_eq!(b"hello world", buf.as_slice());
835
836        buf.clear();
837        assert_eq!(0, buf.len());
838        buf.extend_from_slice(b"hello arrow");
839        assert_eq!(11, buf.len());
840        assert_eq!(b"hello arrow", buf.as_slice());
841    }
842
843    #[test]
844    fn mutable_extend_from_iter() {
845        let mut buf = MutableBuffer::new(0);
846        buf.extend(vec![1u32, 2]);
847        assert_eq!(8, buf.len());
848        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
849
850        buf.extend(vec![3u32, 4]);
851        assert_eq!(16, buf.len());
852        assert_eq!(
853            &[1u8, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0],
854            buf.as_slice()
855        );
856    }
857
858    #[test]
859    fn mutable_extend_from_iter_unaligned_u64() {
860        let mut buf = MutableBuffer::new(16);
861        buf.push(1_u8);
862        buf.extend([1_u64]);
863        assert_eq!(9, buf.len());
864        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
865    }
866
867    #[test]
868    fn mutable_extend_from_slice_unaligned_u64() {
869        let mut buf = MutableBuffer::new(16);
870        buf.extend_from_slice(&[1_u8]);
871        buf.extend_from_slice(&[1_u64]);
872        assert_eq!(9, buf.len());
873        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
874    }
875
876    #[test]
877    fn mutable_push_unaligned_u64() {
878        let mut buf = MutableBuffer::new(16);
879        buf.push(1_u8);
880        buf.push(1_u64);
881        assert_eq!(9, buf.len());
882        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
883    }
884
885    #[test]
886    fn mutable_push_unchecked_unaligned_u64() {
887        let mut buf = MutableBuffer::new(16);
888        unsafe {
889            buf.push_unchecked(1_u8);
890            buf.push_unchecked(1_u64);
891        }
892        assert_eq!(9, buf.len());
893        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
894    }
895
896    #[test]
897    fn test_from_trusted_len_iter() {
898        let iter = vec![1u32, 2].into_iter();
899        let buf = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
900        assert_eq!(8, buf.len());
901        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
902    }
903
904    #[test]
905    fn test_mutable_reserve() {
906        let mut buf = MutableBuffer::new(1);
907        assert_eq!(64, buf.capacity());
908
909        // Reserving a smaller capacity should have no effect.
910        buf.reserve(10);
911        assert_eq!(64, buf.capacity());
912
913        buf.reserve(80);
914        assert_eq!(128, buf.capacity());
915
916        buf.reserve(129);
917        assert_eq!(256, buf.capacity());
918    }
919
920    #[test]
921    fn test_mutable_resize() {
922        let mut buf = MutableBuffer::new(1);
923        assert_eq!(64, buf.capacity());
924        assert_eq!(0, buf.len());
925
926        buf.resize(20, 0);
927        assert_eq!(64, buf.capacity());
928        assert_eq!(20, buf.len());
929
930        buf.resize(10, 0);
931        assert_eq!(64, buf.capacity());
932        assert_eq!(10, buf.len());
933
934        buf.resize(100, 0);
935        assert_eq!(128, buf.capacity());
936        assert_eq!(100, buf.len());
937
938        buf.resize(30, 0);
939        assert_eq!(128, buf.capacity());
940        assert_eq!(30, buf.len());
941
942        buf.resize(0, 0);
943        assert_eq!(128, buf.capacity());
944        assert_eq!(0, buf.len());
945    }
946
947    #[test]
948    fn test_mutable_into() {
949        let mut buf = MutableBuffer::new(1);
950        buf.extend_from_slice(b"aaaa bbbb cccc dddd");
951        assert_eq!(19, buf.len());
952        assert_eq!(64, buf.capacity());
953        assert_eq!(b"aaaa bbbb cccc dddd", buf.as_slice());
954
955        let immutable_buf: Buffer = buf.into();
956        assert_eq!(19, immutable_buf.len());
957        assert_eq!(64, immutable_buf.capacity());
958        assert_eq!(b"aaaa bbbb cccc dddd", immutable_buf.as_slice());
959    }
960
961    #[test]
962    fn test_mutable_equal() {
963        let mut buf = MutableBuffer::new(1);
964        let mut buf2 = MutableBuffer::new(1);
965
966        buf.extend_from_slice(&[0xaa]);
967        buf2.extend_from_slice(&[0xaa, 0xbb]);
968        assert!(buf != buf2);
969
970        buf.extend_from_slice(&[0xbb]);
971        assert_eq!(buf, buf2);
972
973        buf2.reserve(65);
974        assert!(buf != buf2);
975    }
976
977    #[test]
978    fn test_mutable_shrink_to_fit() {
979        let mut buffer = MutableBuffer::new(128);
980        assert_eq!(buffer.capacity(), 128);
981        buffer.push(1);
982        buffer.push(2);
983
984        buffer.shrink_to_fit();
985        assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
986    }
987
988    #[test]
989    fn test_mutable_set_null_bits() {
990        let mut buffer = MutableBuffer::new(8).with_bitset(8, true);
991
992        for i in 0..=buffer.capacity() {
993            buffer.set_null_bits(i, 0);
994            assert_eq!(buffer[..8], [255; 8][..]);
995        }
996
997        buffer.set_null_bits(1, 4);
998        assert_eq!(buffer[..8], [255, 0, 0, 0, 0, 255, 255, 255][..]);
999    }
1000
1001    #[test]
1002    #[should_panic = "out of bounds for buffer of length"]
1003    fn test_mutable_set_null_bits_oob() {
1004        let mut buffer = MutableBuffer::new(64);
1005        buffer.set_null_bits(1, buffer.capacity());
1006    }
1007
1008    #[test]
1009    #[should_panic = "out of bounds for buffer of length"]
1010    fn test_mutable_set_null_bits_oob_by_overflow() {
1011        let mut buffer = MutableBuffer::new(0);
1012        buffer.set_null_bits(1, usize::MAX);
1013    }
1014
1015    #[test]
1016    fn from_iter() {
1017        let buffer = [1u16, 2, 3, 4].into_iter().collect::<MutableBuffer>();
1018        assert_eq!(buffer.len(), 4 * mem::size_of::<u16>());
1019        assert_eq!(buffer.as_slice(), &[1, 0, 2, 0, 3, 0, 4, 0]);
1020    }
1021
1022    #[test]
1023    #[should_panic(expected = "failed to create layout for MutableBuffer: LayoutError")]
1024    fn test_with_capacity_panics_above_max_capacity() {
1025        let max_capacity = isize::MAX as usize - (isize::MAX as usize % ALIGNMENT);
1026        let _ = MutableBuffer::with_capacity(max_capacity + 1);
1027    }
1028}