compact_bytes/
fixed.rs

1use std::fmt;
2use std::hash::{Hash, Hasher};
3use std::mem::ManuallyDrop;
4use std::ops::Deref;
5use std::ops::DerefMut;
6use std::ptr::NonNull;
7
8use crate::{HeapBytesFixed, InlineBytes15, INLINE_MASK};
9
10/// [`CompactBytesSlice`] inlines up-to 15 bytes on the stack, if more than that is required we
11/// spill to the heap. The heap representation is not reference counted like `bytes::Bytes`.
12///
13/// Whereas a [`CompactBytes`] is like a `Vec<u8>`, [`CompactBytesSlice`] is like a `Box<[u8]>`.
14pub union CompactBytesSlice {
15    heap: ManuallyDrop<HeapBytesFixed>,
16    inline: InlineBytes15,
17}
18
19static_assertions::assert_eq_align!(
20    InlineBytes15,
21    HeapBytesFixed,
22    CompactBytesSlice,
23    Box<[u8]>,
24    usize
25);
26static_assertions::assert_eq_size!(InlineBytes15, HeapBytesFixed, CompactBytesSlice, Box<[u8]>);
27static_assertions::const_assert_eq!(std::mem::size_of::<CompactBytesSlice>(), 16);
28
29// SAFETY: It is safe to Send a `CompactBytesSlice` to other threads because it
30// owns all of its data.
31unsafe impl Send for CompactBytesSlice {}
32
33// SAFETY: It is safe to share references of `CompactBytesSlice` between
34// threads because it does not support any kind of interior mutability, or
35// other way to introduce races.
36unsafe impl Sync for CompactBytesSlice {}
37
38impl CompactBytesSlice {
39    /// The maximum amount of bytes that a [`CompactBytesSlice`] can store inline.
40    pub const MAX_INLINE: usize = InlineBytes15::CAPACITY;
41
42    // Note: Unlike `CompactBytes` we do not have a minimum heap allocation size
43    // because our heap allocations are always exactly `len` bytes.
44
45    /// The maximum amount of bytes that a [`CompactBytesSlice`] can store on the heap.
46    pub const MAX_HEAP: usize = usize::MAX >> 1;
47
48    /// Creates a new [`CompactBytesSlice`] from the provided slice. Stores the
49    /// bytes inline if small enough.
50    ///
51    /// # Examples
52    ///
53    /// ```
54    /// use compact_bytes::CompactBytesSlice;
55    ///
56    /// let inline = CompactBytesSlice::new(&[1, 2, 3, 4]);
57    /// assert!(!inline.spilled());
58    /// assert_eq!(inline.len(), 4);
59    ///
60    /// let heap = CompactBytesSlice::new(b"I am a bytes type that will get stored on the heap");
61    /// assert!(heap.spilled());
62    /// assert_eq!(heap.len(), 50);
63    /// ```
64    #[inline]
65    pub fn new(slice: &[u8]) -> Self {
66        if slice.len() <= Self::MAX_INLINE {
67            // SAFETY: We just checked that slice length is less than or equal to MAX_INLINE.
68            let inline = unsafe { InlineBytes15::new(slice) };
69            CompactBytesSlice { inline }
70        } else {
71            let heap = ManuallyDrop::new(HeapBytesFixed::new(slice));
72            CompactBytesSlice { heap }
73        }
74    }
75
76    /// Creates a new empty [`CompactBytesSlice`] a capacity of
77    /// [`CompactBytesSlice::MAX_INLINE`]. The function can be called in
78    /// `const` contexts.
79    ///
80    /// # Examples
81    ///
82    /// ```
83    /// use compact_bytes::CompactBytesSlice;
84    ///
85    /// let min = CompactBytesSlice::empty();
86    /// assert_eq!(min.capacity(), CompactBytesSlice::MAX_INLINE);
87    /// ```
88    #[inline]
89    pub const fn empty() -> Self {
90        let inline = InlineBytes15::empty();
91        CompactBytesSlice { inline }
92    }
93
94    /// Creates a new [`CompactBytesSlice`] using the provided pointer and length.
95    ///
96    /// # Safety
97    ///
98    /// * The caller must guarantee that the provided pointer is properly aligned, and the backing
99    ///   allocation was made by the same allocator that will eventually be used to free the
100    ///   returned [`CompactBytesSlice`].
101    /// * `length` needs to be the capacity that the pointer was allocated with.
102    /// * `length` needs to be less than or equal to [`CompactBytesSlice::MAX_HEAP`].
103    ///
104    #[inline]
105    pub unsafe fn from_raw_parts(ptr: *mut u8, length: usize) -> Self {
106        let heap = HeapBytesFixed {
107            ptr: NonNull::new_unchecked(ptr),
108            len: length,
109        };
110        let heap = ManuallyDrop::new(heap);
111        CompactBytesSlice { heap }
112    }
113
114    /// Returns the contents of the [`CompactBytesSlice`] as a bytes slice.
115    #[inline]
116    pub fn as_slice(&self) -> &[u8] {
117        let pointer = self.as_ptr();
118        let length = self.len();
119
120        unsafe { core::slice::from_raw_parts(pointer, length) }
121    }
122
123    /// Returns the contents of the [`CompactBytesSlice`] as a mutable bytes slice.
124    #[inline]
125    pub fn as_mut_slice(&mut self) -> &mut [u8] {
126        let pointer = self.as_mut_ptr();
127        let length = self.len();
128
129        unsafe { core::slice::from_raw_parts_mut(pointer, length) }
130    }
131
132    /// Returns the length of the [`CompactBytesSlice`].
133    #[inline(always)]
134    pub fn len(&self) -> usize {
135        // SAFETY: `InlineBytes15` and `HeapBytesFixed` have the same size and alignment.
136        //
137        // Note: This code is very carefully written so we can benefit from branchless
138        // instructions.
139        //
140        // Note: If the `CompactBytesSlice`
141        let (mut length, heap_length) = unsafe { (self.inline.len(), self.heap.len) };
142        if self.spilled() {
143            length = heap_length;
144        }
145
146        length
147    }
148
149    /// Returns if the [`CompactBytesSlice`] is empty.
150    #[inline]
151    pub fn is_empty(&self) -> bool {
152        self.len() == 0
153    }
154
155    /// Returns the capacity of the [`CompactBytesSlice`].
156    #[inline(always)]
157    pub fn capacity(&self) -> usize {
158        // SAFETY: `InlineBytes15` and `HeapBytesFixed` share the same size and alignment.
159        //
160        // Note: This code is very carefully written so we can benefit from branchless
161        // instructions.
162        let (mut capacity, heap_capacity) = unsafe { (Self::MAX_INLINE, self.heap.len) };
163        if self.spilled() {
164            capacity = heap_capacity;
165        }
166        capacity
167    }
168
169    /// Consumes the [`CompactBytesSlice`], returning a `Box<[u8]>`.
170    #[inline]
171    pub fn into_boxed_slice(self) -> Box<[u8]> {
172        if self.spilled() {
173            // SAFETY: `InlineBytes15` and `HeapBytesFixed` have the same size and alignment.
174            // Above we checked if the current `CompactBytesSlice` is heap allocated.
175            let heap = unsafe { &self.heap };
176
177            let slice = unsafe { core::slice::from_raw_parts_mut(heap.ptr.as_ptr(), heap.len) };
178            let boxed = unsafe { Box::from_raw(slice) };
179            std::mem::forget(self);
180
181            boxed
182        } else {
183            // SAFETY: `InlineBytes15` and `HeapBytesFixed` have the same size and alignment.
184            // Above we checked if the current `CompactBytesSlice` is heap allocated.
185            let inline = unsafe { &self.inline };
186            inline.buffer.into()
187        }
188    }
189
190    /// Returns if the [`CompactBytesSlice`] has spilled to the heap.
191    #[inline(always)]
192    pub fn spilled(&self) -> bool {
193        // We store whether or not a `CompactBytesSlice` has spilled to the
194        // heap using the most significant bit of the last byte.
195        //
196        // SAFETY: `InlineBytes15` and `HeapBytesFixed` have the same size and alignment.
197        unsafe { self.inline.data < INLINE_MASK }
198    }
199
200    #[inline(always)]
201    fn as_ptr(&self) -> *const u8 {
202        // SAFETY: `InlineBytes15` and `HeapBytesFixed` have the same size and alignment.
203        //
204        // Note: This code is very carefully written so we can benefit from branchless
205        // instructions.
206        let mut pointer = self as *const Self as *const u8;
207        if self.spilled() {
208            pointer = unsafe { self.heap.ptr }.as_ptr()
209        }
210        pointer
211    }
212
213    #[inline(always)]
214    fn as_mut_ptr(&mut self) -> *mut u8 {
215        // SAFETY: `InlineBytes15` and `HeapBytesFixed` have the same size and alignment.
216        //
217        // Note: This code is very carefully written so we can benefit from branchless
218        // instructions.
219        let mut pointer = self as *mut Self as *mut u8;
220        if self.spilled() {
221            pointer = unsafe { self.heap.ptr }.as_ptr()
222        }
223        pointer
224    }
225}
226
227impl Default for CompactBytesSlice {
228    #[inline]
229    fn default() -> Self {
230        CompactBytesSlice::new(&[])
231    }
232}
233
234impl Deref for CompactBytesSlice {
235    type Target = [u8];
236
237    #[inline]
238    fn deref(&self) -> &[u8] {
239        self.as_slice()
240    }
241}
242
243impl DerefMut for CompactBytesSlice {
244    #[inline]
245    fn deref_mut(&mut self) -> &mut [u8] {
246        self.as_mut_slice()
247    }
248}
249
250impl AsRef<[u8]> for CompactBytesSlice {
251    #[inline]
252    fn as_ref(&self) -> &[u8] {
253        self.as_slice()
254    }
255}
256
257impl<T: AsRef<[u8]>> PartialEq<T> for CompactBytesSlice {
258    #[inline]
259    fn eq(&self, other: &T) -> bool {
260        self.as_slice() == other.as_ref()
261    }
262}
263
264impl Eq for CompactBytesSlice {}
265
266impl Hash for CompactBytesSlice {
267    fn hash<H: Hasher>(&self, state: &mut H) {
268        self.as_slice().hash(state)
269    }
270}
271
272impl fmt::Debug for CompactBytesSlice {
273    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
274        write!(f, "{:?}", self.as_slice())
275    }
276}
277
278impl Drop for CompactBytesSlice {
279    #[inline]
280    fn drop(&mut self) {
281        // Note: we hint to the compiler that dropping a heap variant is cold to improve the
282        // performance of dropping the inline variant.
283        #[cold]
284        fn outlined_drop(this: &mut CompactBytesSlice) {
285            let heap = unsafe { &mut this.heap };
286            heap.dealloc();
287        }
288
289        if self.spilled() {
290            outlined_drop(self);
291        }
292    }
293}
294
295impl Clone for CompactBytesSlice {
296    #[inline]
297    fn clone(&self) -> Self {
298        // Note: we hint to the compiler that cloning a heap variant is cold to improve the
299        // performance of cloning the inline variant.
300        #[cold]
301        fn outlined_clone(this: &CompactBytesSlice) -> CompactBytesSlice {
302            CompactBytesSlice::new(this.as_slice())
303        }
304
305        if self.spilled() {
306            outlined_clone(self)
307        } else {
308            let inline = unsafe { &self.inline };
309            CompactBytesSlice { inline: *inline }
310        }
311    }
312}
313
314impl From<Box<[u8]>> for CompactBytesSlice {
315    #[inline]
316    fn from(value: Box<[u8]>) -> Self {
317        if value.is_empty() {
318            return CompactBytesSlice::empty();
319        }
320
321        // "leak" the Box so dropping the value does not de-allocate the backing buffer.
322        let slice = Box::leak(value);
323        // Use the returned pointer and length to construct a CompactBytesSlice in constant time.
324        let (ptr, len) = (slice.as_mut_ptr(), slice.len());
325        // SAFETY: We checked above, and returned early, if the `Box` was empty, thus we know this
326        // pointer is not null.
327        let ptr = unsafe { NonNull::new_unchecked(ptr) };
328
329        let heap = HeapBytesFixed { ptr, len };
330        CompactBytesSlice {
331            heap: ManuallyDrop::new(heap),
332        }
333    }
334}
335
336#[cfg(test)]
337mod test {
338    use proptest::prelude::*;
339    use test_case::test_case;
340    use test_strategy::proptest;
341
342    use super::{CompactBytesSlice, HeapBytesFixed};
343
344    #[test]
345    fn test_empty() {
346        let obj = const { CompactBytesSlice::empty() };
347        assert_eq!(obj.as_slice(), [0u8; 0].as_slice());
348        assert!(obj.is_empty());
349        assert!(!obj.spilled())
350    }
351
352    #[test]
353    #[cfg_attr(miri, ignore)]
354    fn test_discriminant() {
355        let mut buf = vec![0u8; 32];
356        let heap = HeapBytesFixed {
357            ptr: unsafe { std::ptr::NonNull::new_unchecked(buf.as_mut_ptr()) },
358            len: usize::MAX >> 1,
359        };
360        let repr = CompactBytesSlice {
361            heap: std::mem::ManuallyDrop::new(heap),
362        };
363        assert!(repr.spilled());
364        // mem::forget the repr since it's underlying buffer is shared.
365        std::mem::forget(repr);
366
367        let bad_heap = HeapBytesFixed {
368            ptr: unsafe { std::ptr::NonNull::new_unchecked(buf.as_mut_ptr()) },
369            len: usize::MAX,
370        };
371        let repr = CompactBytesSlice {
372            heap: std::mem::ManuallyDrop::new(bad_heap),
373        };
374        // This will identify as inline since the MSB is 1.
375        assert!(!repr.spilled());
376        // mem::forget the repr since it's underlying buffer is shared.
377        std::mem::forget(repr);
378    }
379
380    #[test_case(&[], 0 ; "empty")]
381    #[test_case(b"hello world", 11 ; "short")]
382    #[test_case(b"15 bytes inline", 15 ; "max_inline")]
383    #[test_case(b"16 bytes spills!", 16 ; "first_spill")]
384    #[test_case(b"i am very large and will spill to the heap", 42 ; "heap")]
385    fn smoketest_row(slice: &[u8], expected_len: usize) {
386        let repr = CompactBytesSlice::new(slice);
387
388        assert_eq!(repr.len(), expected_len);
389        assert_eq!(repr.as_slice(), slice);
390        assert_eq!(repr.spilled(), expected_len > CompactBytesSlice::MAX_INLINE);
391    }
392
393    #[test_case(&[] ; "empty")]
394    #[test_case(b"smol" ; "inline")]
395    #[test_case(b"large large large large large large" ; "heap")]
396    fn smoketest_clone(initial: &[u8]) {
397        let repr_a = CompactBytesSlice::new(initial);
398        let repr_b = repr_a.clone();
399
400        assert_eq!(repr_a.len(), repr_b.len());
401        assert_eq!(repr_a.capacity(), repr_b.capacity());
402        assert_eq!(repr_a.as_slice(), repr_b.as_slice());
403    }
404
405    #[test_case(Box::from([]) ; "empty")]
406    #[test_case(Box::from([0, 1, 2, 3, 4]) ; "inline")]
407    #[test_case(b"I am long and will be on the heap, yada yada yada".to_vec().into_boxed_slice() ; "heap")]
408    fn smoketest_from_box_slice(initial: Box<[u8]>) {
409        let control = initial.clone();
410        let pointer = initial.as_ptr();
411        let repr = CompactBytesSlice::from(initial);
412
413        assert_eq!(control.len(), repr.len());
414        assert_eq!(&control[..], &repr[..]);
415
416        // We do not eagerly inline, except if the Vec is empty.
417        assert_eq!(repr.spilled(), !control.is_empty());
418        // The allocation of the Vec should get re-used.
419        assert_eq!(repr.as_ptr() == pointer, !control.is_empty());
420    }
421
422    #[test]
423    fn test_cloning_inlines() {
424        let data = Box::from([42u8]);
425        let c = CompactBytesSlice::from(data);
426
427        assert_eq!(c.as_slice(), &[42]);
428        assert_eq!(c.capacity(), 1);
429        assert!(c.spilled());
430
431        let clone = c.clone();
432        assert_eq!(clone.as_slice(), &[42]);
433        assert_eq!(clone.capacity(), CompactBytesSlice::MAX_INLINE);
434        assert!(!clone.spilled());
435    }
436
437    #[proptest]
438    #[cfg_attr(miri, ignore)]
439    fn proptest_row(initial: Vec<u8>) {
440        let repr = CompactBytesSlice::new(&initial);
441
442        prop_assert_eq!(repr.as_slice(), initial.as_slice());
443        prop_assert_eq!(repr.len(), initial.len());
444    }
445
446    #[proptest]
447    #[cfg_attr(miri, ignore)]
448    fn proptest_clone(initial: Vec<u8>) {
449        let repr_a = CompactBytesSlice::new(&initial);
450        let repr_b = repr_a.clone();
451
452        assert_eq!(repr_a.len(), repr_b.len());
453        assert_eq!(repr_a.capacity(), repr_b.capacity());
454        assert_eq!(repr_a.as_slice(), repr_b.as_slice());
455    }
456
457    #[proptest]
458    #[cfg_attr(miri, ignore)]
459    fn proptest_from_box_slice(initial: Box<[u8]>) {
460        let control = initial.clone();
461        let pointer = initial.as_ptr();
462        let repr = CompactBytesSlice::from(initial);
463
464        assert_eq!(control.len(), repr.len());
465        assert_eq!(&control[..], &repr[..]);
466
467        // We do not eagerly inline, except if the Vec is empty.
468        assert_eq!(repr.spilled(), !control.is_empty());
469        // The allocation of the Vec should get re-used.
470        assert_eq!(repr.as_ptr() == pointer, !control.is_empty());
471    }
472}