arrow_buffer/buffer/offset.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::buffer::ScalarBuffer;
19use crate::{ArrowNativeType, MutableBuffer, OffsetBufferBuilder};
20use std::ops::Deref;
21
22/// A non-empty buffer of monotonically increasing, positive integers.
23///
24/// [`OffsetBuffer`] are used to represent ranges of offsets. An
25/// `OffsetBuffer` of `N+1` items contains `N` such ranges. The start
26/// offset for element `i` is `offsets[i]` and the end offset is
27/// `offsets[i+1]`. Equal offsets represent an empty range.
28///
29/// # Example
30///
31/// This example shows how 5 distinct ranges, are represented using a
32/// 6 entry `OffsetBuffer`. The first entry `(0, 3)` represents the
33/// three offsets `0, 1, 2`. The entry `(3,3)` represent no offsets
34/// (e.g. an empty list).
35///
36/// ```text
37/// ┌───────┐ ┌───┐
38/// │ (0,3) │ │ 0 │
39/// ├───────┤ ├───┤
40/// │ (3,3) │ │ 3 │
41/// ├───────┤ ├───┤
42/// │ (3,4) │ │ 3 │
43/// ├───────┤ ├───┤
44/// │ (4,5) │ │ 4 │
45/// ├───────┤ ├───┤
46/// │ (5,7) │ │ 5 │
47/// └───────┘ ├───┤
48/// │ 7 │
49/// └───┘
50///
51/// Offsets Buffer
52/// Logical
53/// Offsets
54///
55/// (offsets[i],
56/// offsets[i+1])
57/// ```
58#[derive(Debug, Clone)]
59pub struct OffsetBuffer<O: ArrowNativeType>(ScalarBuffer<O>);
60
61impl<O: ArrowNativeType> OffsetBuffer<O> {
62 /// Create a new [`OffsetBuffer`] from the provided [`ScalarBuffer`]
63 ///
64 /// # Panics
65 ///
66 /// Panics if `buffer` is not a non-empty buffer containing
67 /// monotonically increasing values greater than or equal to zero
68 pub fn new(buffer: ScalarBuffer<O>) -> Self {
69 assert!(!buffer.is_empty(), "offsets cannot be empty");
70 assert!(
71 buffer[0] >= O::usize_as(0),
72 "offsets must be greater than 0"
73 );
74 assert!(
75 buffer.windows(2).all(|w| w[0] <= w[1]),
76 "offsets must be monotonically increasing"
77 );
78 Self(buffer)
79 }
80
81 /// Create a new [`OffsetBuffer`] from the provided [`ScalarBuffer`]
82 ///
83 /// # Safety
84 ///
85 /// `buffer` must be a non-empty buffer containing monotonically increasing
86 /// values greater than or equal to zero
87 pub unsafe fn new_unchecked(buffer: ScalarBuffer<O>) -> Self {
88 Self(buffer)
89 }
90
91 /// Create a new [`OffsetBuffer`] containing a single 0 value
92 pub fn new_empty() -> Self {
93 let buffer = MutableBuffer::from_len_zeroed(std::mem::size_of::<O>());
94 Self(buffer.into_buffer().into())
95 }
96
97 /// Create a new [`OffsetBuffer`] containing `len + 1` `0` values
98 pub fn new_zeroed(len: usize) -> Self {
99 let len_bytes = len
100 .checked_add(1)
101 .and_then(|o| o.checked_mul(std::mem::size_of::<O>()))
102 .expect("overflow");
103 let buffer = MutableBuffer::from_len_zeroed(len_bytes);
104 Self(buffer.into_buffer().into())
105 }
106
107 /// Create a new [`OffsetBuffer`] from the iterator of slice lengths
108 ///
109 /// ```
110 /// # use arrow_buffer::OffsetBuffer;
111 /// let offsets = OffsetBuffer::<i32>::from_lengths([1, 3, 5]);
112 /// assert_eq!(offsets.as_ref(), &[0, 1, 4, 9]);
113 /// ```
114 ///
115 /// # Panics
116 ///
117 /// Panics on overflow
118 pub fn from_lengths<I>(lengths: I) -> Self
119 where
120 I: IntoIterator<Item = usize>,
121 {
122 let iter = lengths.into_iter();
123 let mut out = Vec::with_capacity(iter.size_hint().0 + 1);
124 out.push(O::usize_as(0));
125
126 let mut acc = 0_usize;
127 for length in iter {
128 acc = acc.checked_add(length).expect("usize overflow");
129 out.push(O::usize_as(acc))
130 }
131 // Check for overflow
132 O::from_usize(acc).expect("offset overflow");
133 Self(out.into())
134 }
135
136 /// Returns the inner [`ScalarBuffer`]
137 pub fn inner(&self) -> &ScalarBuffer<O> {
138 &self.0
139 }
140
141 /// Returns the inner [`ScalarBuffer`], consuming self
142 pub fn into_inner(self) -> ScalarBuffer<O> {
143 self.0
144 }
145
146 /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
147 pub fn slice(&self, offset: usize, len: usize) -> Self {
148 Self(self.0.slice(offset, len.saturating_add(1)))
149 }
150
151 /// Returns true if this [`OffsetBuffer`] is equal to `other`, using pointer comparisons
152 /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
153 /// return false when the arrays are logically equal
154 #[inline]
155 pub fn ptr_eq(&self, other: &Self) -> bool {
156 self.0.ptr_eq(&other.0)
157 }
158}
159
160impl<T: ArrowNativeType> Deref for OffsetBuffer<T> {
161 type Target = [T];
162
163 #[inline]
164 fn deref(&self) -> &Self::Target {
165 &self.0
166 }
167}
168
169impl<T: ArrowNativeType> AsRef<[T]> for OffsetBuffer<T> {
170 #[inline]
171 fn as_ref(&self) -> &[T] {
172 self
173 }
174}
175
176impl<O: ArrowNativeType> From<OffsetBufferBuilder<O>> for OffsetBuffer<O> {
177 fn from(value: OffsetBufferBuilder<O>) -> Self {
178 value.finish()
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 #[test]
187 #[should_panic(expected = "offsets cannot be empty")]
188 fn empty_offsets() {
189 OffsetBuffer::new(Vec::<i32>::new().into());
190 }
191
192 #[test]
193 #[should_panic(expected = "offsets must be greater than 0")]
194 fn negative_offsets() {
195 OffsetBuffer::new(vec![-1, 0, 1].into());
196 }
197
198 #[test]
199 fn offsets() {
200 OffsetBuffer::new(vec![0, 1, 2, 3].into());
201
202 let offsets = OffsetBuffer::<i32>::new_zeroed(3);
203 assert_eq!(offsets.as_ref(), &[0; 4]);
204
205 let offsets = OffsetBuffer::<i32>::new_zeroed(0);
206 assert_eq!(offsets.as_ref(), &[0; 1]);
207 }
208
209 #[test]
210 #[should_panic(expected = "overflow")]
211 fn offsets_new_zeroed_overflow() {
212 OffsetBuffer::<i32>::new_zeroed(usize::MAX);
213 }
214
215 #[test]
216 #[should_panic(expected = "offsets must be monotonically increasing")]
217 fn non_monotonic_offsets() {
218 OffsetBuffer::new(vec![1, 2, 0].into());
219 }
220
221 #[test]
222 fn from_lengths() {
223 let buffer = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
224 assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);
225
226 let half_max = i32::MAX / 2;
227 let buffer = OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
228 assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
229 }
230
231 #[test]
232 #[should_panic(expected = "offset overflow")]
233 fn from_lengths_offset_overflow() {
234 OffsetBuffer::<i32>::from_lengths([i32::MAX as usize, 1]);
235 }
236
237 #[test]
238 #[should_panic(expected = "usize overflow")]
239 fn from_lengths_usize_overflow() {
240 OffsetBuffer::<i32>::from_lengths([usize::MAX, 1]);
241 }
242}