arrow_buffer/builder/
boolean.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::{bit_mask, bit_util, BooleanBuffer, Buffer, MutableBuffer};
19use std::ops::Range;
20
21/// Builder for [`BooleanBuffer`]
22#[derive(Debug)]
23pub struct BooleanBufferBuilder {
24    buffer: MutableBuffer,
25    len: usize,
26}
27
28impl BooleanBufferBuilder {
29    /// Creates a new `BooleanBufferBuilder`
30    #[inline]
31    pub fn new(capacity: usize) -> Self {
32        let byte_capacity = bit_util::ceil(capacity, 8);
33        let buffer = MutableBuffer::new(byte_capacity);
34        Self { buffer, len: 0 }
35    }
36
37    /// Creates a new `BooleanBufferBuilder` from [`MutableBuffer`] of `len`
38    pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
39        assert!(len <= buffer.len() * 8);
40        let mut s = Self {
41            len: buffer.len() * 8,
42            buffer,
43        };
44        s.truncate(len);
45        s
46    }
47
48    /// Returns the length of the buffer
49    #[inline]
50    pub fn len(&self) -> usize {
51        self.len
52    }
53
54    /// Sets a bit in the buffer at `index`
55    #[inline]
56    pub fn set_bit(&mut self, index: usize, v: bool) {
57        if v {
58            bit_util::set_bit(self.buffer.as_mut(), index);
59        } else {
60            bit_util::unset_bit(self.buffer.as_mut(), index);
61        }
62    }
63
64    /// Gets a bit in the buffer at `index`
65    #[inline]
66    pub fn get_bit(&self, index: usize) -> bool {
67        bit_util::get_bit(self.buffer.as_slice(), index)
68    }
69
70    /// Returns true if empty
71    #[inline]
72    pub fn is_empty(&self) -> bool {
73        self.len == 0
74    }
75
76    /// Returns the capacity of the buffer
77    #[inline]
78    pub fn capacity(&self) -> usize {
79        self.buffer.capacity() * 8
80    }
81
82    /// Advances the buffer by `additional` bits
83    #[inline]
84    pub fn advance(&mut self, additional: usize) {
85        let new_len = self.len + additional;
86        let new_len_bytes = bit_util::ceil(new_len, 8);
87        if new_len_bytes > self.buffer.len() {
88            self.buffer.resize(new_len_bytes, 0);
89        }
90        self.len = new_len;
91    }
92
93    /// Truncates the builder to the given length
94    ///
95    /// If `len` is greater than the buffer's current length, this has no effect
96    #[inline]
97    pub fn truncate(&mut self, len: usize) {
98        if len > self.len {
99            return;
100        }
101
102        let new_len_bytes = bit_util::ceil(len, 8);
103        self.buffer.truncate(new_len_bytes);
104        self.len = len;
105
106        let remainder = self.len % 8;
107        if remainder != 0 {
108            let mask = (1_u8 << remainder).wrapping_sub(1);
109            *self.buffer.as_mut().last_mut().unwrap() &= mask;
110        }
111    }
112
113    /// Reserve space to at least `additional` new bits.
114    /// Capacity will be `>= self.len() + additional`.
115    /// New bytes are uninitialized and reading them is undefined behavior.
116    #[inline]
117    pub fn reserve(&mut self, additional: usize) {
118        let capacity = self.len + additional;
119        if capacity > self.capacity() {
120            // convert differential to bytes
121            let additional = bit_util::ceil(capacity, 8) - self.buffer.len();
122            self.buffer.reserve(additional);
123        }
124    }
125
126    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
127    /// growing it (potentially reallocating it) and writing `false` in the newly available bits.
128    #[inline]
129    pub fn resize(&mut self, len: usize) {
130        match len.checked_sub(self.len) {
131            Some(delta) => self.advance(delta),
132            None => self.truncate(len),
133        }
134    }
135
136    /// Appends a boolean `v` into the buffer
137    #[inline]
138    pub fn append(&mut self, v: bool) {
139        self.advance(1);
140        if v {
141            unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len - 1) };
142        }
143    }
144
145    /// Appends n `additional` bits of value `v` into the buffer
146    #[inline]
147    pub fn append_n(&mut self, additional: usize, v: bool) {
148        match v {
149            true => {
150                let new_len = self.len + additional;
151                let new_len_bytes = bit_util::ceil(new_len, 8);
152                let cur_remainder = self.len % 8;
153                let new_remainder = new_len % 8;
154
155                if cur_remainder != 0 {
156                    // Pad last byte with 1s
157                    *self.buffer.as_slice_mut().last_mut().unwrap() |= !((1 << cur_remainder) - 1)
158                }
159                self.buffer.resize(new_len_bytes, 0xFF);
160                if new_remainder != 0 {
161                    // Clear remaining bits
162                    *self.buffer.as_slice_mut().last_mut().unwrap() &= (1 << new_remainder) - 1
163                }
164                self.len = new_len;
165            }
166            false => self.advance(additional),
167        }
168    }
169
170    /// Appends a slice of booleans into the buffer
171    #[inline]
172    pub fn append_slice(&mut self, slice: &[bool]) {
173        let additional = slice.len();
174        self.advance(additional);
175
176        let offset = self.len() - additional;
177        for (i, v) in slice.iter().enumerate() {
178            if *v {
179                unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i) }
180            }
181        }
182    }
183
184    /// Append `range` bits from `to_set`
185    ///
186    /// `to_set` is a slice of bits packed LSB-first into `[u8]`
187    ///
188    /// # Panics
189    ///
190    /// Panics if `to_set` does not contain `ceil(range.end / 8)` bytes
191    pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) {
192        let offset_write = self.len;
193        let len = range.end - range.start;
194        self.advance(len);
195        bit_mask::set_bits(
196            self.buffer.as_slice_mut(),
197            to_set,
198            offset_write,
199            range.start,
200            len,
201        );
202    }
203
204    /// Append [`BooleanBuffer`] to this [`BooleanBufferBuilder`]
205    pub fn append_buffer(&mut self, buffer: &BooleanBuffer) {
206        let range = buffer.offset()..buffer.offset() + buffer.len();
207        self.append_packed_range(range, buffer.values())
208    }
209
210    /// Returns the packed bits
211    pub fn as_slice(&self) -> &[u8] {
212        self.buffer.as_slice()
213    }
214
215    /// Returns the packed bits
216    pub fn as_slice_mut(&mut self) -> &mut [u8] {
217        self.buffer.as_slice_mut()
218    }
219
220    /// Creates a [`BooleanBuffer`]
221    #[inline]
222    pub fn finish(&mut self) -> BooleanBuffer {
223        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
224        let len = std::mem::replace(&mut self.len, 0);
225        BooleanBuffer::new(buf.into(), 0, len)
226    }
227
228    /// Builds the [BooleanBuffer] without resetting the builder.
229    pub fn finish_cloned(&self) -> BooleanBuffer {
230        BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len)
231    }
232}
233
234impl From<BooleanBufferBuilder> for Buffer {
235    #[inline]
236    fn from(builder: BooleanBufferBuilder) -> Self {
237        builder.buffer.into()
238    }
239}
240
241impl From<BooleanBufferBuilder> for BooleanBuffer {
242    #[inline]
243    fn from(builder: BooleanBufferBuilder) -> Self {
244        BooleanBuffer::new(builder.buffer.into(), 0, builder.len)
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    #[test]
253    fn test_boolean_buffer_builder_write_bytes() {
254        let mut b = BooleanBufferBuilder::new(4);
255        b.append(false);
256        b.append(true);
257        b.append(false);
258        b.append(true);
259        assert_eq!(4, b.len());
260        assert_eq!(512, b.capacity());
261        let buffer = b.finish();
262        assert_eq!(4, buffer.len());
263
264        // Overallocate capacity
265        let mut b = BooleanBufferBuilder::new(8);
266        b.append_slice(&[false, true, false, true]);
267        assert_eq!(4, b.len());
268        assert_eq!(512, b.capacity());
269        let buffer = b.finish();
270        assert_eq!(4, buffer.len());
271    }
272
273    #[test]
274    fn test_boolean_buffer_builder_unset_first_bit() {
275        let mut buffer = BooleanBufferBuilder::new(4);
276        buffer.append(true);
277        buffer.append(true);
278        buffer.append(false);
279        buffer.append(true);
280        buffer.set_bit(0, false);
281        assert_eq!(buffer.len(), 4);
282        assert_eq!(buffer.finish().values(), &[0b1010_u8]);
283    }
284
285    #[test]
286    fn test_boolean_buffer_builder_unset_last_bit() {
287        let mut buffer = BooleanBufferBuilder::new(4);
288        buffer.append(true);
289        buffer.append(true);
290        buffer.append(false);
291        buffer.append(true);
292        buffer.set_bit(3, false);
293        assert_eq!(buffer.len(), 4);
294        assert_eq!(buffer.finish().values(), &[0b0011_u8]);
295    }
296
297    #[test]
298    fn test_boolean_buffer_builder_unset_an_inner_bit() {
299        let mut buffer = BooleanBufferBuilder::new(5);
300        buffer.append(true);
301        buffer.append(true);
302        buffer.append(false);
303        buffer.append(true);
304        buffer.set_bit(1, false);
305        assert_eq!(buffer.len(), 4);
306        assert_eq!(buffer.finish().values(), &[0b1001_u8]);
307    }
308
309    #[test]
310    fn test_boolean_buffer_builder_unset_several_bits() {
311        let mut buffer = BooleanBufferBuilder::new(5);
312        buffer.append(true);
313        buffer.append(true);
314        buffer.append(true);
315        buffer.append(false);
316        buffer.append(true);
317        buffer.set_bit(1, false);
318        buffer.set_bit(2, false);
319        assert_eq!(buffer.len(), 5);
320        assert_eq!(buffer.finish().values(), &[0b10001_u8]);
321    }
322
323    #[test]
324    fn test_boolean_buffer_builder_unset_several_bits_bigger_than_one_byte() {
325        let mut buffer = BooleanBufferBuilder::new(16);
326        buffer.append_n(10, true);
327        buffer.set_bit(0, false);
328        buffer.set_bit(3, false);
329        buffer.set_bit(9, false);
330        assert_eq!(buffer.len(), 10);
331        assert_eq!(buffer.finish().values(), &[0b11110110_u8, 0b01_u8]);
332    }
333
334    #[test]
335    fn test_boolean_buffer_builder_flip_several_bits_bigger_than_one_byte() {
336        let mut buffer = BooleanBufferBuilder::new(16);
337        buffer.append_n(5, true);
338        buffer.append_n(5, false);
339        buffer.append_n(5, true);
340        buffer.set_bit(0, false);
341        buffer.set_bit(3, false);
342        buffer.set_bit(9, false);
343        buffer.set_bit(6, true);
344        buffer.set_bit(14, true);
345        buffer.set_bit(13, false);
346        assert_eq!(buffer.len(), 15);
347        assert_eq!(buffer.finish().values(), &[0b01010110_u8, 0b1011100_u8]);
348    }
349
350    #[test]
351    fn test_bool_buffer_builder_get_first_bit() {
352        let mut buffer = BooleanBufferBuilder::new(16);
353        buffer.append_n(8, true);
354        buffer.append_n(8, false);
355        assert!(buffer.get_bit(0));
356    }
357
358    #[test]
359    fn test_bool_buffer_builder_get_first_bit_not_requires_mutability() {
360        let buffer = {
361            let mut buffer = BooleanBufferBuilder::new(16);
362            buffer.append_n(8, true);
363            buffer
364        };
365
366        assert!(buffer.get_bit(0));
367    }
368
369    #[test]
370    fn test_bool_buffer_builder_get_last_bit() {
371        let mut buffer = BooleanBufferBuilder::new(16);
372        buffer.append_n(8, true);
373        buffer.append_n(8, false);
374        assert!(!buffer.get_bit(15));
375    }
376
377    #[test]
378    fn test_bool_buffer_builder_get_an_inner_bit() {
379        let mut buffer = BooleanBufferBuilder::new(16);
380        buffer.append_n(4, false);
381        buffer.append_n(8, true);
382        buffer.append_n(4, false);
383        assert!(buffer.get_bit(11));
384    }
385
386    #[test]
387    fn test_bool_buffer_fuzz() {
388        use rand::prelude::*;
389
390        let mut buffer = BooleanBufferBuilder::new(12);
391        let mut all_bools = vec![];
392        let mut rng = rand::thread_rng();
393
394        let src_len = 32;
395        let (src, compacted_src) = {
396            let src: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
397                .take(src_len)
398                .collect();
399
400            let mut compacted_src = BooleanBufferBuilder::new(src_len);
401            compacted_src.append_slice(&src);
402            (src, compacted_src.finish())
403        };
404
405        for _ in 0..100 {
406            let a = rng.next_u32() as usize % src_len;
407            let b = rng.next_u32() as usize % src_len;
408
409            let start = a.min(b);
410            let end = a.max(b);
411
412            buffer.append_packed_range(start..end, compacted_src.values());
413            all_bools.extend_from_slice(&src[start..end]);
414        }
415
416        let mut compacted = BooleanBufferBuilder::new(all_bools.len());
417        compacted.append_slice(&all_bools);
418
419        assert_eq!(buffer.finish(), compacted.finish())
420    }
421
422    #[test]
423    fn test_boolean_array_builder_resize() {
424        let mut builder = BooleanBufferBuilder::new(20);
425        builder.append_n(4, true);
426        builder.append_n(7, false);
427        builder.append_n(2, true);
428        builder.resize(20);
429
430        assert_eq!(builder.len(), 20);
431        assert_eq!(builder.as_slice(), &[0b00001111, 0b00011000, 0b00000000]);
432
433        builder.resize(5);
434        assert_eq!(builder.len(), 5);
435        assert_eq!(builder.as_slice(), &[0b00001111]);
436
437        builder.append_n(4, true);
438        assert_eq!(builder.len(), 9);
439        assert_eq!(builder.as_slice(), &[0b11101111, 0b00000001]);
440    }
441
442    #[test]
443    fn test_truncate() {
444        let b = MutableBuffer::from_iter([true, true, true, true]);
445        let mut builder = BooleanBufferBuilder::new_from_buffer(b, 2);
446        builder.advance(2);
447        let finished = builder.finish();
448        assert_eq!(finished.values(), &[0b00000011]);
449
450        let mut builder = BooleanBufferBuilder::new(10);
451        builder.append_n(5, true);
452        builder.resize(3);
453        builder.advance(2);
454        let finished = builder.finish();
455        assert_eq!(finished.values(), &[0b00000111]);
456
457        let mut builder = BooleanBufferBuilder::new(10);
458        builder.append_n(16, true);
459        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
460        builder.truncate(20);
461        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
462        builder.truncate(14);
463        assert_eq!(builder.as_slice(), &[0xFF, 0b00111111]);
464        builder.append(false);
465        builder.append(true);
466        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111]);
467        builder.append_packed_range(0..3, &[0xFF]);
468        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000111]);
469        builder.truncate(17);
470        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000001]);
471        builder.append_packed_range(0..2, &[2]);
472        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b0000101]);
473        builder.truncate(8);
474        assert_eq!(builder.as_slice(), &[0xFF]);
475        builder.resize(14);
476        assert_eq!(builder.as_slice(), &[0xFF, 0x00]);
477        builder.truncate(0);
478        assert_eq!(builder.as_slice(), &[]);
479    }
480
481    #[test]
482    fn test_boolean_builder_increases_buffer_len() {
483        // 00000010 01001000
484        let buf = Buffer::from([72_u8, 2_u8]);
485        let mut builder = BooleanBufferBuilder::new(8);
486
487        for i in 0..16 {
488            if i == 3 || i == 6 || i == 9 {
489                builder.append(true);
490            } else {
491                builder.append(false);
492            }
493        }
494        let buf2 = builder.finish();
495
496        assert_eq!(buf.len(), buf2.inner().len());
497        assert_eq!(buf.as_slice(), buf2.values());
498    }
499}