arrow_buffer/buffer/
offset.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::buffer::ScalarBuffer;
19use crate::{ArrowNativeType, MutableBuffer, OffsetBufferBuilder};
20use std::ops::Deref;
21
22/// A non-empty buffer of monotonically increasing, positive integers.
23///
24/// [`OffsetBuffer`] are used to represent ranges of offsets. An
25/// `OffsetBuffer` of `N+1` items contains `N` such ranges. The start
26/// offset for element `i` is `offsets[i]` and the end offset is
27/// `offsets[i+1]`. Equal offsets represent an empty range.
28///
29/// # Example
30///
31/// This example shows how 5 distinct ranges, are represented using a
32/// 6 entry `OffsetBuffer`. The first entry `(0, 3)` represents the
33/// three offsets `0, 1, 2`. The entry `(3,3)` represent no offsets
34/// (e.g. an empty list).
35///
36/// ```text
37///   ┌───────┐                ┌───┐
38///   │ (0,3) │                │ 0 │
39///   ├───────┤                ├───┤
40///   │ (3,3) │                │ 3 │
41///   ├───────┤                ├───┤
42///   │ (3,4) │                │ 3 │
43///   ├───────┤                ├───┤
44///   │ (4,5) │                │ 4 │
45///   ├───────┤                ├───┤
46///   │ (5,7) │                │ 5 │
47///   └───────┘                ├───┤
48///                            │ 7 │
49///                            └───┘
50///
51///                        Offsets Buffer
52///    Logical
53///    Offsets
54///
55///  (offsets[i],
56///   offsets[i+1])
57/// ```
58#[derive(Debug, Clone)]
59pub struct OffsetBuffer<O: ArrowNativeType>(ScalarBuffer<O>);
60
61impl<O: ArrowNativeType> OffsetBuffer<O> {
62    /// Create a new [`OffsetBuffer`] from the provided [`ScalarBuffer`]
63    ///
64    /// # Panics
65    ///
66    /// Panics if `buffer` is not a non-empty buffer containing
67    /// monotonically increasing values greater than or equal to zero
68    pub fn new(buffer: ScalarBuffer<O>) -> Self {
69        assert!(!buffer.is_empty(), "offsets cannot be empty");
70        assert!(
71            buffer[0] >= O::usize_as(0),
72            "offsets must be greater than 0"
73        );
74        assert!(
75            buffer.windows(2).all(|w| w[0] <= w[1]),
76            "offsets must be monotonically increasing"
77        );
78        Self(buffer)
79    }
80
81    /// Create a new [`OffsetBuffer`] from the provided [`ScalarBuffer`]
82    ///
83    /// # Safety
84    ///
85    /// `buffer` must be a non-empty buffer containing monotonically increasing
86    /// values greater than or equal to zero
87    pub unsafe fn new_unchecked(buffer: ScalarBuffer<O>) -> Self {
88        Self(buffer)
89    }
90
91    /// Create a new [`OffsetBuffer`] containing a single 0 value
92    pub fn new_empty() -> Self {
93        let buffer = MutableBuffer::from_len_zeroed(std::mem::size_of::<O>());
94        Self(buffer.into_buffer().into())
95    }
96
97    /// Create a new [`OffsetBuffer`] containing `len + 1` `0` values
98    pub fn new_zeroed(len: usize) -> Self {
99        let len_bytes = len
100            .checked_add(1)
101            .and_then(|o| o.checked_mul(std::mem::size_of::<O>()))
102            .expect("overflow");
103        let buffer = MutableBuffer::from_len_zeroed(len_bytes);
104        Self(buffer.into_buffer().into())
105    }
106
107    /// Create a new [`OffsetBuffer`] from the iterator of slice lengths
108    ///
109    /// ```
110    /// # use arrow_buffer::OffsetBuffer;
111    /// let offsets = OffsetBuffer::<i32>::from_lengths([1, 3, 5]);
112    /// assert_eq!(offsets.as_ref(), &[0, 1, 4, 9]);
113    /// ```
114    ///
115    /// # Panics
116    ///
117    /// Panics on overflow
118    pub fn from_lengths<I>(lengths: I) -> Self
119    where
120        I: IntoIterator<Item = usize>,
121    {
122        let iter = lengths.into_iter();
123        let mut out = Vec::with_capacity(iter.size_hint().0 + 1);
124        out.push(O::usize_as(0));
125
126        let mut acc = 0_usize;
127        for length in iter {
128            acc = acc.checked_add(length).expect("usize overflow");
129            out.push(O::usize_as(acc))
130        }
131        // Check for overflow
132        O::from_usize(acc).expect("offset overflow");
133        Self(out.into())
134    }
135
136    /// Returns the inner [`ScalarBuffer`]
137    pub fn inner(&self) -> &ScalarBuffer<O> {
138        &self.0
139    }
140
141    /// Returns the inner [`ScalarBuffer`], consuming self
142    pub fn into_inner(self) -> ScalarBuffer<O> {
143        self.0
144    }
145
146    /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
147    pub fn slice(&self, offset: usize, len: usize) -> Self {
148        Self(self.0.slice(offset, len.saturating_add(1)))
149    }
150
151    /// Returns true if this [`OffsetBuffer`] is equal to `other`, using pointer comparisons
152    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
153    /// return false when the arrays are logically equal
154    #[inline]
155    pub fn ptr_eq(&self, other: &Self) -> bool {
156        self.0.ptr_eq(&other.0)
157    }
158}
159
160impl<T: ArrowNativeType> Deref for OffsetBuffer<T> {
161    type Target = [T];
162
163    #[inline]
164    fn deref(&self) -> &Self::Target {
165        &self.0
166    }
167}
168
169impl<T: ArrowNativeType> AsRef<[T]> for OffsetBuffer<T> {
170    #[inline]
171    fn as_ref(&self) -> &[T] {
172        self
173    }
174}
175
176impl<O: ArrowNativeType> From<OffsetBufferBuilder<O>> for OffsetBuffer<O> {
177    fn from(value: OffsetBufferBuilder<O>) -> Self {
178        value.finish()
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    #[should_panic(expected = "offsets cannot be empty")]
188    fn empty_offsets() {
189        OffsetBuffer::new(Vec::<i32>::new().into());
190    }
191
192    #[test]
193    #[should_panic(expected = "offsets must be greater than 0")]
194    fn negative_offsets() {
195        OffsetBuffer::new(vec![-1, 0, 1].into());
196    }
197
198    #[test]
199    fn offsets() {
200        OffsetBuffer::new(vec![0, 1, 2, 3].into());
201
202        let offsets = OffsetBuffer::<i32>::new_zeroed(3);
203        assert_eq!(offsets.as_ref(), &[0; 4]);
204
205        let offsets = OffsetBuffer::<i32>::new_zeroed(0);
206        assert_eq!(offsets.as_ref(), &[0; 1]);
207    }
208
209    #[test]
210    #[should_panic(expected = "overflow")]
211    fn offsets_new_zeroed_overflow() {
212        OffsetBuffer::<i32>::new_zeroed(usize::MAX);
213    }
214
215    #[test]
216    #[should_panic(expected = "offsets must be monotonically increasing")]
217    fn non_monotonic_offsets() {
218        OffsetBuffer::new(vec![1, 2, 0].into());
219    }
220
221    #[test]
222    fn from_lengths() {
223        let buffer = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
224        assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);
225
226        let half_max = i32::MAX / 2;
227        let buffer = OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
228        assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
229    }
230
231    #[test]
232    #[should_panic(expected = "offset overflow")]
233    fn from_lengths_offset_overflow() {
234        OffsetBuffer::<i32>::from_lengths([i32::MAX as usize, 1]);
235    }
236
237    #[test]
238    #[should_panic(expected = "usize overflow")]
239    fn from_lengths_usize_overflow() {
240        OffsetBuffer::<i32>::from_lengths([usize::MAX, 1]);
241    }
242}