compact_bytes/
lib.rs

1//! "Small string optimization" for a bytes.
2
3use std::alloc;
4use std::fmt;
5use std::hash::{Hash, Hasher};
6use std::mem::ManuallyDrop;
7use std::ops::Deref;
8use std::ops::DerefMut;
9use std::ptr::NonNull;
10
11const INLINE_MASK: u8 = 0b1000_0000;
12
13/// [`CompactBytes`] inlines up-to 23 bytes on the stack, if more than that is required we spill to
14/// the heap. The heap representation is not reference counted like `bytes::Bytes`, it's just an
15/// owned blob of bytes.
16///
17/// # Why?
18///
19/// ### 1. Do we want to do this?
20///
21/// Performance. A `Vec<u8>` is already 24 bytes on the stack, and then another allocation on the
22/// heap. If we can avoid the heap allocation altogether it saves memory and improves runtime
23/// performance.
24///
25/// ### 2. Did we write our own implementation?
26///
27/// At the time of writing (October 2023), there isn't anything else in the Rust ecosystem that
28/// provides what we need. There is `smallvec` (which we used to use) but it's not space efficient.
29/// A `SmallVec<[u8; 24]>` required 32 bytes on the stack, so we were wasting 8 bytes! There are
30/// other small vector crates (e.g. `arrayvec` or `tinyvec`) but they have their own limitations.
31/// There are also a number of small string optimizations in the Rust ecosystem, but none of them
32/// work for other various reasons.
33///
34/// # How does this work?
35///
36/// A [`CompactBytes`] is 24 bytes on the stack (same as `Vec<u8>`) but it has two modes:
37///
38/// 1. Heap   `[ ptr<8> | len<8> | cap<8> ]`
39/// 2. Inline `[   buffer<23>   | len <1> ]`
40///
41/// We use the most significant bit of the last byte to indicate which mode we're in.
42///
43pub union CompactBytes {
44    heap: ManuallyDrop<HeapBytes>,
45    inline: InlineBytes,
46}
47
48// SAFETY: It is safe to Send a `CompactBytes` to other threads because it owns all of its data.
49unsafe impl Send for CompactBytes {}
50
51// SAFETY: It is safe to share references of `CompactBytes` between threads because it does not
52// support any kind of interior mutability, or other way to introduce races.
53unsafe impl Sync for CompactBytes {}
54
55static_assertions::assert_eq_align!(InlineBytes, HeapBytes, CompactBytes, Vec<u8>, usize);
56static_assertions::assert_eq_size!(InlineBytes, HeapBytes, CompactBytes, Vec<u8>);
57
58static_assertions::const_assert_eq!(std::mem::size_of::<CompactBytes>(), 24);
59
60impl CompactBytes {
61    /// The maximum amount of bytes that a [`CompactBytes`] can store inline.
62    pub const MAX_INLINE: usize = 23;
63
64    /// The minimum amount of bytes that a [`CompactBytes`] will store on the heap.
65    pub const MIN_HEAP: usize = std::mem::size_of::<usize>() * 2;
66    /// The maximum amount of bytes that a [`CompactBytes`] can store on the heap.
67    pub const MAX_HEAP: usize = usize::MAX >> 1;
68
69    /// Creates a new [`CompactBytes`] from the provided slice. Stores the bytes inline if small
70    /// enough.
71    ///
72    /// # Examples
73    ///
74    /// ```
75    /// use compact_bytes::CompactBytes;
76    ///
77    /// let inline = CompactBytes::new(&[1, 2, 3, 4]);
78    /// assert!(!inline.spilled());
79    /// assert_eq!(inline.len(), 4);
80    ///
81    /// let heap = CompactBytes::new(b"I am a bytes type that will get stored on the heap");
82    /// assert!(heap.spilled());
83    /// assert_eq!(heap.len(), 50);
84    /// ```
85    #[inline]
86    pub fn new(slice: &[u8]) -> Self {
87        if slice.len() <= Self::MAX_INLINE {
88            // SAFETY: We just checked that slice length is less than or equal to MAX_INLINE.
89            let inline = unsafe { InlineBytes::new(slice) };
90            CompactBytes { inline }
91        } else {
92            let heap = ManuallyDrop::new(HeapBytes::new(slice));
93            CompactBytes { heap }
94        }
95    }
96
97    /// Creates a new [`CompactBytes`] with the specified capacity, but with a minimum of
98    /// [`CompactBytes::MAX_INLINE`].
99    ///
100    /// # Examples
101    ///
102    /// ```
103    /// use compact_bytes::CompactBytes;
104    ///
105    /// let min = CompactBytes::with_capacity(4);
106    /// assert_eq!(min.capacity(), CompactBytes::MAX_INLINE);
107    /// ```
108    #[inline]
109    pub fn with_capacity(capacity: usize) -> Self {
110        if capacity <= Self::MAX_INLINE {
111            let inline = InlineBytes::empty();
112            CompactBytes { inline }
113        } else {
114            let heap = ManuallyDrop::new(HeapBytes::with_capacity(capacity));
115            CompactBytes { heap }
116        }
117    }
118
119    /// Creates a new [`CompactBytes`] using the provided pointer, length, and capacity.
120    ///
121    /// # Safety
122    ///
123    /// * The caller must guarantee that the provided pointer is properly aligned, and the backing
124    ///   allocation was made by the same allocator that will eventually be used to free the
125    ///   returned [`CompactBytes`].
126    /// * `length` needs to be less than or equal to `capacity`.
127    /// * `capacity` needs to be the capacity that the pointer was allocated with.
128    /// * `capacity` needs to be less than or equal to [`CompactBytes::MAX_HEAP`].
129    ///
130    #[inline]
131    pub unsafe fn from_raw_parts(ptr: *mut u8, length: usize, capacity: usize) -> Self {
132        let heap = HeapBytes {
133            ptr: NonNull::new_unchecked(ptr),
134            len: length,
135            cap: capacity,
136        };
137        let heap = ManuallyDrop::new(heap);
138        CompactBytes { heap }
139    }
140
141    /// Returns the contents of the [`CompactBytes`] as a bytes slice.
142    #[inline]
143    pub fn as_slice(&self) -> &[u8] {
144        let pointer = self.as_ptr();
145        let length = self.len();
146
147        unsafe { core::slice::from_raw_parts(pointer, length) }
148    }
149
150    /// Returns the contents of the [`CompactBytes`] as a mutable bytes slice.
151    #[inline]
152    pub fn as_mut_slice(&mut self) -> &mut [u8] {
153        let pointer = self.as_mut_ptr();
154        let length = self.len();
155
156        unsafe { core::slice::from_raw_parts_mut(pointer, length) }
157    }
158
159    /// Returns the length of the [`CompactBytes`].
160    #[inline(always)]
161    pub fn len(&self) -> usize {
162        // SAFETY: `InlineBytes` and `HeapBytes` share the same size and alignment. Before
163        // returning this value we check whether it's valid or not.
164        //
165        // Note: This code is very carefully written so we can benefit from branchless
166        // instructions.
167        let (mut length, heap_length) = unsafe { (self.inline.len(), self.heap.len) };
168        if self.spilled() {
169            length = heap_length;
170        }
171
172        length
173    }
174
175    /// Returns if the [`CompactBytes`] is empty.
176    #[inline]
177    pub fn is_empty(&self) -> bool {
178        self.len() == 0
179    }
180
181    /// Returns the capacity of the [`CompactBytes`].
182    #[inline(always)]
183    pub fn capacity(&self) -> usize {
184        // SAFETY: `InlineBytes` and `HeapBytes` share the same size and alignment. Before
185        // returning this value we check whether it's valid or not.
186        //
187        // Note: This code is very carefully written so we can benefit from branchless
188        // instructions.
189        let (mut capacity, heap_capacity) = unsafe { (Self::MAX_INLINE, self.heap.cap) };
190        if self.spilled() {
191            capacity = heap_capacity;
192        }
193        capacity
194    }
195
196    /// Appends an additional byte to the [`CompactBytes`], resizing if necessary.
197    ///
198    /// Note: You should almost never call this in a loop, instead use
199    /// [`CompactBytes::extend_from_slice`].
200    #[inline]
201    pub fn push(&mut self, byte: u8) {
202        self.extend_from_slice(&[byte]);
203    }
204
205    /// Extends the [`CompactBytes`] with bytes from `slice`, resizing if necessary.
206    #[inline(always)]
207    pub fn extend_from_slice(&mut self, slice: &[u8]) {
208        // Reserve at least enough space to fit slice.
209        self.reserve(slice.len());
210
211        let (ptr, len, cap) = self.as_mut_triple();
212        // SAFTEY: `len` is less than `cap`, so we know it's within the original allocation. This
213        // addition does not overflow `isize`, nor does it rely on any wrapping logic.
214        let push_ptr = unsafe { ptr.add(len) };
215
216        debug_assert!((cap - len) >= slice.len(), "failed to reserve enough space");
217
218        // Safety:
219        //
220        // * src is valid for a read of len bytes, since len comes from src.
221        // * dst is valid for writes of len bytes, since we just reserved extra space.
222        // * src and dst are both properly aligned.
223        // * src and dst to not overlap because we have a unique reference to dst.
224        //
225        unsafe { std::ptr::copy_nonoverlapping(slice.as_ptr(), push_ptr, slice.len()) };
226
227        // SAFETY: We just wrote an additional len bytes, so we know this length is valid.
228        unsafe { self.set_len(len + slice.len()) };
229    }
230
231    /// Truncates this [`CompactBytes`], removing all contents but without effecting the capacity.
232    #[inline]
233    pub fn clear(&mut self) {
234        self.truncate(0);
235    }
236
237    /// Truncates this [`CompactBytes`] to the specified length without effecting the capacity. Has
238    /// no effect if `new_len` is greater than the current length.
239    #[inline]
240    pub fn truncate(&mut self, new_len: usize) {
241        if new_len >= self.len() {
242            return;
243        }
244        unsafe { self.set_len(new_len) }
245    }
246
247    /// Reserves at least `additional` bytes for this [`CompactBytes`], possibly re-allocating if
248    /// there is not enough remaining capacity.
249    ///
250    /// # Examples
251    ///
252    /// ```
253    /// use compact_bytes::CompactBytes;
254    ///
255    /// let mut b = CompactBytes::new(b"foo");
256    /// b.reserve(100);
257    ///
258    /// assert_eq!(b.capacity(), 103);
259    /// ```
260    #[inline]
261    pub fn reserve(&mut self, additional: usize) {
262        let len = self.len();
263        let needed_capacity = len
264            .checked_add(additional)
265            .expect("attempt to reserve more than usize::MAX");
266
267        // Already have enough space, nothing to do!
268        if self.capacity() >= needed_capacity {
269            return;
270        }
271
272        // Note: We move the actual re-allocation code path into its own function
273        // so the common case of calling `reserve(...)` when we already have
274        // enough capacity can be inlined by LLVM.
275        realloc(self, len, additional);
276
277        #[cold]
278        fn realloc(this: &mut CompactBytes, len: usize, additional: usize) {
279            // Note: Here we are making a distinct choice to _not_ eagerly inline.
280            //
281            // `CompactBytes`s can get re-used, e.g. calling `CompactBytes::clear`, at which point it's
282            // possible  we could have a length of 0, and 'additional' bytes would be less then
283            // `MAX_INLINE`. Some implementations might opt to drop the existing heap allocation, but
284            // if a `CompactBytes` is being re-used it's likely we'll need the full original capacity,
285            // thus we do not eagerly inline.
286
287            if !this.spilled() {
288                let heap = HeapBytes::with_additional(this.as_slice(), additional);
289                *this = CompactBytes {
290                    heap: ManuallyDrop::new(heap),
291                };
292            } else {
293                // SAFETY: `InlineBytes` and `HeapBytes` have the same size and alignment. We also
294                // checked above that the current `CompactBytes` is heap allocated.
295                let heap_row = unsafe { &mut this.heap };
296
297                let amortized_capacity = HeapBytes::amortized_growth(len, additional);
298
299                // First attempt to resize the existing allocation, if that fails then create a new one.
300                if heap_row.realloc(amortized_capacity).is_err() {
301                    let heap = HeapBytes::with_additional(this.as_slice(), additional);
302                    let heap = ManuallyDrop::new(heap);
303                    *this = CompactBytes { heap };
304                }
305            }
306        }
307    }
308
309    /// Consumes the [`CompactBytes`], returning a `Vec<u8>`.
310    #[inline]
311    pub fn into_vec(self) -> Vec<u8> {
312        if self.spilled() {
313            // SAFETY: `InlineBytes` and `HeapBytes` have the same size and alignment. We also
314            // checked above that the current `CompactBytes` is heap allocated.
315            let heap = unsafe { &self.heap };
316            let vec = unsafe { Vec::from_raw_parts(heap.ptr.as_ptr(), heap.len, heap.cap) };
317            std::mem::forget(self);
318
319            vec
320        } else {
321            self.as_slice().to_vec()
322        }
323    }
324
325    /// Returns if the [`CompactBytes`] has spilled to the heap.
326    #[inline(always)]
327    pub fn spilled(&self) -> bool {
328        // SAFETY: `InlineBytes` and `HeapBytes` have the same size and alignment. We also checked
329        // above that the current `CompactBytes` is heap allocated.
330        unsafe { self.inline.data < INLINE_MASK }
331    }
332
333    /// Forces the length of [`CompactBytes`] to `new_len`.
334    ///
335    /// # Safety
336    /// * `new_len` must be less than or equal to capacity.
337    /// * The bytes at `old_len..new_len` must be initialized.
338    ///
339    #[inline]
340    unsafe fn set_len(&mut self, new_len: usize) {
341        if self.spilled() {
342            self.heap.set_len(new_len);
343        } else {
344            self.inline.set_len(new_len);
345        }
346    }
347
348    #[inline(always)]
349    fn as_ptr(&self) -> *const u8 {
350        // SAFETY: `InlineBytes` and `HeapBytes` share the same size and alignment. Before
351        // returning this value we check whether it's valid or not.
352        //
353        // Note: This code is very carefully written so we can benefit from branchless
354        // instructions.
355        let mut pointer = self as *const Self as *const u8;
356        if self.spilled() {
357            pointer = unsafe { self.heap.ptr }.as_ptr()
358        }
359        pointer
360    }
361
362    #[inline(always)]
363    fn as_mut_ptr(&mut self) -> *mut u8 {
364        // SAFETY: `InlineBytes` and `HeapBytes` share the same size and alignment. Before
365        // returning this value we check whether it's valid or not.
366        //
367        // Note: This code is very carefully written so we can benefit from branchless
368        // instructions.
369        let mut pointer = self as *mut Self as *mut u8;
370        if self.spilled() {
371            pointer = unsafe { self.heap.ptr }.as_ptr()
372        }
373        pointer
374    }
375
376    #[inline(always)]
377    fn as_mut_triple(&mut self) -> (*mut u8, usize, usize) {
378        let ptr = self.as_mut_ptr();
379        let len = self.len();
380        let cap = self.capacity();
381
382        (ptr, len, cap)
383    }
384}
385
386impl Default for CompactBytes {
387    #[inline]
388    fn default() -> Self {
389        CompactBytes::new(&[])
390    }
391}
392
393impl Deref for CompactBytes {
394    type Target = [u8];
395
396    #[inline]
397    fn deref(&self) -> &[u8] {
398        self.as_slice()
399    }
400}
401
402impl DerefMut for CompactBytes {
403    #[inline]
404    fn deref_mut(&mut self) -> &mut [u8] {
405        self.as_mut_slice()
406    }
407}
408
409impl AsRef<[u8]> for CompactBytes {
410    #[inline]
411    fn as_ref(&self) -> &[u8] {
412        self.as_slice()
413    }
414}
415
416impl<T: AsRef<[u8]>> PartialEq<T> for CompactBytes {
417    #[inline]
418    fn eq(&self, other: &T) -> bool {
419        self.as_slice() == other.as_ref()
420    }
421}
422
423impl Eq for CompactBytes {}
424
425impl Hash for CompactBytes {
426    fn hash<H: Hasher>(&self, state: &mut H) {
427        self.as_slice().hash(state)
428    }
429}
430
431impl fmt::Debug for CompactBytes {
432    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
433        write!(f, "{:?}", self.as_slice())
434    }
435}
436
437impl Drop for CompactBytes {
438    #[inline]
439    fn drop(&mut self) {
440        // Note: we hint to the compiler that dropping a heap variant is cold to improve the
441        // performance of dropping the inline variant.
442        #[cold]
443        fn outlined_drop(this: &mut CompactBytes) {
444            let heap = unsafe { &mut this.heap };
445            heap.dealloc();
446        }
447
448        if self.spilled() {
449            outlined_drop(self);
450        }
451    }
452}
453
454impl Clone for CompactBytes {
455    #[inline]
456    fn clone(&self) -> Self {
457        // Note: we hint to the compiler that cloing a heap variant is cold to improve the
458        // performance of cloning the inline variant.
459        #[cold]
460        fn outlined_clone(this: &CompactBytes) -> CompactBytes {
461            CompactBytes::new(this.as_slice())
462        }
463
464        if self.spilled() {
465            outlined_clone(self)
466        } else {
467            let inline = unsafe { &self.inline };
468            CompactBytes { inline: *inline }
469        }
470    }
471
472    #[inline]
473    fn clone_from(&mut self, source: &Self) {
474        self.clear();
475        self.extend_from_slice(source.as_slice());
476    }
477}
478
479impl From<Vec<u8>> for CompactBytes {
480    #[inline]
481    fn from(mut value: Vec<u8>) -> Self {
482        if value.is_empty() {
483            let inline = InlineBytes::empty();
484            return CompactBytes { inline };
485        }
486
487        // Deconstruct the Vec so we can convert to a `CompactBytes` in constant time.
488        let (ptr, len, cap) = (value.as_mut_ptr(), value.len(), value.capacity());
489        // SAFETY: We checked above, and returned early, if the `Vec` was empty, thus we know this
490        // pointer is not null.
491        let ptr = unsafe { NonNull::new_unchecked(ptr) };
492        // Forget the original Vec so it's underlying buffer does not get dropped.
493        std::mem::forget(value);
494
495        let heap = HeapBytes { ptr, len, cap };
496        CompactBytes {
497            heap: ManuallyDrop::new(heap),
498        }
499    }
500}
501
502impl serde::Serialize for CompactBytes {
503    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
504        self.as_slice().serialize(serializer)
505    }
506}
507
508impl<'de> serde::Deserialize<'de> for CompactBytes {
509    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
510        deserialize_compact_bytes(deserializer)
511    }
512}
513
514fn deserialize_compact_bytes<'de: 'a, 'a, D: serde::Deserializer<'de>>(
515    deserializer: D,
516) -> Result<CompactBytes, D::Error> {
517    struct CompactBytesVisitor;
518
519    impl<'a> serde::de::Visitor<'a> for CompactBytesVisitor {
520        type Value = CompactBytes;
521
522        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
523            formatter.write_str("bytes")
524        }
525
526        fn visit_seq<A: serde::de::SeqAccess<'a>>(
527            self,
528            mut seq: A,
529        ) -> Result<Self::Value, A::Error> {
530            let mut bytes = CompactBytes::default();
531            if let Some(capacity_hint) = seq.size_hint() {
532                bytes.reserve(capacity_hint);
533            }
534
535            while let Some(elem) = seq.next_element::<u8>()? {
536                bytes.push(elem)
537            }
538
539            Ok(bytes)
540        }
541
542        fn visit_borrowed_bytes<E: serde::de::Error>(self, v: &'a [u8]) -> Result<Self::Value, E> {
543            Ok(CompactBytes::new(v))
544        }
545
546        fn visit_bytes<E: serde::de::Error>(self, v: &[u8]) -> Result<Self::Value, E> {
547            Ok(CompactBytes::new(v))
548        }
549    }
550
551    deserializer.deserialize_bytes(CompactBytesVisitor)
552}
553
554#[repr(C, align(8))]
555#[derive(Copy, Clone)]
556struct InlineBytes {
557    buffer: [u8; CompactBytes::MAX_INLINE],
558    data: u8,
559}
560
561impl InlineBytes {
562    /// Create an [`InlineBytes`] from the provided slice.
563    ///
564    /// Safety:
565    /// * `slice` must have a length less than or equal to [`CompactBytes::MAX_INLINE`].
566    ///
567    #[inline]
568    pub unsafe fn new(slice: &[u8]) -> Self {
569        debug_assert!(slice.len() <= CompactBytes::MAX_INLINE);
570
571        let len = slice.len();
572        let mut buffer = [0u8; CompactBytes::MAX_INLINE];
573
574        // SAFETY: We know src and dst are valid for len bytes, nor do they overlap.
575        unsafe {
576            buffer
577                .as_mut_ptr()
578                .copy_from_nonoverlapping(slice.as_ptr(), len)
579        };
580
581        let data = INLINE_MASK | (len as u8);
582
583        InlineBytes { buffer, data }
584    }
585
586    #[inline]
587    pub const fn empty() -> Self {
588        let buffer = [0u8; CompactBytes::MAX_INLINE];
589
590        // Even though the below statement as no effect, we leave it for better understanding.
591        #[allow(clippy::identity_op)]
592        let data = INLINE_MASK | 0;
593
594        InlineBytes { buffer, data }
595    }
596
597    pub fn len(&self) -> usize {
598        (self.data & !INLINE_MASK) as usize
599    }
600
601    /// Forces the length of [`InlineBytes`] to `new_len`.
602    ///
603    /// # Safety
604    /// * `new_len` must be less than or equal to [`CompactBytes::MAX_INLINE`].
605    /// * `new_len` must be less than or equal to capacity.
606    /// * The bytes at `old_len..new_len` must be initialized.
607    ///
608    unsafe fn set_len(&mut self, new_len: usize) {
609        debug_assert!(new_len <= CompactBytes::MAX_INLINE);
610        self.data = INLINE_MASK | (new_len as u8);
611    }
612}
613
614#[repr(C)]
615struct HeapBytes {
616    ptr: NonNull<u8>,
617    len: usize,
618    cap: usize,
619}
620
621impl HeapBytes {
622    #[inline]
623    pub fn new(slice: &[u8]) -> Self {
624        let len = slice.len();
625        let cap = len.max(CompactBytes::MIN_HEAP);
626
627        debug_assert!(cap <= CompactBytes::MAX_HEAP, "too large of allocation");
628        let ptr = Self::alloc_ptr(cap);
629
630        unsafe { ptr.as_ptr().copy_from_nonoverlapping(slice.as_ptr(), len) };
631
632        HeapBytes { ptr, len, cap }
633    }
634
635    pub fn with_capacity(capacity: usize) -> Self {
636        assert!(
637            capacity <= CompactBytes::MAX_HEAP,
638            "too large of allocation"
639        );
640
641        let len = 0;
642        let cap = capacity.max(CompactBytes::MIN_HEAP);
643        let ptr = Self::alloc_ptr(cap);
644
645        HeapBytes {
646            ptr,
647            len,
648            cap: capacity,
649        }
650    }
651
652    pub fn with_additional(slice: &[u8], additional: usize) -> Self {
653        let new_capacity = Self::amortized_growth(slice.len(), additional);
654        let mut row = Self::with_capacity(new_capacity);
655
656        debug_assert!(row.cap > slice.len());
657
658        // SAFETY: We know src and dst are both valid for len bytes, nor are they overlapping.
659        unsafe {
660            std::ptr::copy_nonoverlapping(slice.as_ptr(), row.ptr.as_ptr(), slice.len());
661        };
662        // Set our length.
663        row.len = slice.len();
664
665        row
666    }
667
668    pub unsafe fn set_len(&mut self, len: usize) {
669        self.len = len;
670    }
671
672    pub fn realloc(&mut self, new_capacity: usize) -> Result<usize, ()> {
673        // Can't shrink the heap allocation to be less than length, because we'd lose data.
674        if new_capacity < self.len {
675            return Err(());
676        }
677        // Do not reallocate to 0 capacity.
678        if new_capacity == 0 {
679            return Err(());
680        }
681
682        // Always allocate at least "4 usize" amount of bytes.
683        let new_capacity = new_capacity.max(CompactBytes::MIN_HEAP);
684
685        // Already at the appropriate size!
686        if new_capacity == self.cap {
687            return Ok(new_capacity);
688        }
689
690        let cur_layout = Self::layout(self.cap);
691        let new_layout = Self::layout(new_capacity);
692
693        // Check for overflow.
694        let new_size = new_layout.size();
695        if new_size < new_capacity {
696            return Err(());
697        }
698
699        // SAFETY:
700        // * Our pointer was allocated via the same allocator.
701        // * We used the same layout for the previous allocation.
702        // * `new_size` is correct.
703        let raw_ptr = unsafe { alloc::realloc(self.ptr.as_ptr(), cur_layout, new_size) };
704        let ptr = NonNull::new(raw_ptr).ok_or(())?;
705
706        self.ptr = ptr;
707        self.cap = new_capacity;
708
709        Ok(new_capacity)
710    }
711
712    #[inline]
713    fn dealloc(&mut self) {
714        Self::dealloc_ptr(self.ptr, self.cap);
715    }
716
717    #[inline]
718    fn alloc_ptr(capacity: usize) -> NonNull<u8> {
719        let layout = Self::layout(capacity);
720        debug_assert!(layout.size() > 0);
721
722        // SAFETY: We ensure that the layout is not zero sized, by enforcing a minimum size.
723        let ptr = unsafe { alloc::alloc(layout) };
724
725        NonNull::new(ptr).expect("failed to allocate HeapRow")
726    }
727
728    #[inline]
729    fn dealloc_ptr(ptr: NonNull<u8>, capacity: usize) {
730        let layout = Self::layout(capacity);
731
732        // SAFETY:
733        // * The pointer was allocated via this allocator.
734        // * We used the same layout when allocating.
735        unsafe { alloc::dealloc(ptr.as_ptr(), layout) };
736    }
737
738    #[inline(always)]
739    fn layout(capacity: usize) -> alloc::Layout {
740        debug_assert!(capacity > 0, "tried to allocate a HeapRow with 0 capacity");
741        alloc::Layout::array::<u8>(capacity).expect("valid capacity")
742    }
743
744    /// [`HeapBytes`] grows at an amortized rates of 1.5x
745    ///
746    /// Note: this is different than [`std::vec::Vec`], which grows at a rate of 2x. It's debated
747    /// which is better, for now we'll stick with a rate of 1.5x
748    #[inline(always)]
749    pub fn amortized_growth(cur_len: usize, additional: usize) -> usize {
750        let required = cur_len.saturating_add(additional);
751        let amortized = cur_len.saturating_mul(3) / 2;
752        amortized.max(required)
753    }
754}
755
756impl Drop for HeapBytes {
757    fn drop(&mut self) {
758        self.dealloc()
759    }
760}
761
762#[cfg(test)]
763mod test {
764    use proptest::prelude::*;
765    use test_case::test_case;
766    use test_strategy::proptest;
767
768    use super::{CompactBytes, HeapBytes};
769
770    #[test]
771    fn test_bitcode() {
772        let obj = CompactBytes::new(b"hello world");
773        let encoded = bitcode::serialize(&obj).unwrap();
774        let decoded: CompactBytes = bitcode::deserialize(&encoded).unwrap();
775        assert_eq!(obj.as_slice(), decoded.as_slice());
776    }
777
778    #[test]
779    #[cfg_attr(miri, ignore)]
780    fn test_discriminant() {
781        let mut buf = vec![0u8; 32];
782        let heap = HeapBytes {
783            ptr: unsafe { std::ptr::NonNull::new_unchecked(buf.as_mut_ptr()) },
784            len: 0,
785            cap: usize::MAX >> 1,
786        };
787        let repr = CompactBytes {
788            heap: std::mem::ManuallyDrop::new(heap),
789        };
790        assert!(repr.spilled());
791        // mem::forget the repr since it's underlying buffer is shared.
792        std::mem::forget(repr);
793
794        let bad_heap = HeapBytes {
795            ptr: unsafe { std::ptr::NonNull::new_unchecked(buf.as_mut_ptr()) },
796            len: 0,
797            cap: usize::MAX,
798        };
799        let repr = CompactBytes {
800            heap: std::mem::ManuallyDrop::new(bad_heap),
801        };
802        // This will identify as inline since the MSB is 1.
803        assert!(!repr.spilled());
804        // mem::forget the repr since it's underlying buffer is shared.
805        std::mem::forget(repr);
806    }
807
808    #[test_case(&[], 0 ; "empty")]
809    #[test_case(b"hello world", 11 ; "short")]
810    #[test_case(b"can fit 23 bytes inline", 23 ; "max_inline")]
811    #[test_case(b"24 bytes and will spill!", 24 ; "first_spill")]
812    #[test_case(b"i am very large and will spill to the heap", 42 ; "heap")]
813    fn smoketest_row(slice: &[u8], expected_len: usize) {
814        let repr = CompactBytes::new(slice);
815
816        assert_eq!(repr.len(), expected_len);
817        assert_eq!(repr.as_slice(), slice);
818        assert_eq!(repr.spilled(), expected_len > CompactBytes::MAX_INLINE);
819    }
820
821    #[test_case(&[], &[] ; "empty_empty")]
822    #[test_case(&[], &[1, 2, 3, 4] ; "empty_inline")]
823    #[test_case(&[], b"once extended I will end up on the heap" ; "empty_heap")]
824    #[test_case(&[1, 2], &[3, 4] ; "inline_inline")]
825    #[test_case(&[1, 2, 3, 4], b"i am some more bytes, i will be on the heap, woohoo!" ; "inline_heap")]
826    #[test_case(b"this row will start on the heap because it's large", b"and this will keep it on the heap" ; "heap_heap")]
827    fn smoketest_extend(initial: &[u8], other: &[u8]) {
828        let mut repr = CompactBytes::new(initial);
829        repr.extend_from_slice(other);
830
831        let mut control = initial.to_vec();
832        control.extend_from_slice(other);
833
834        assert_eq!(repr.len(), control.len());
835        assert_eq!(repr.as_slice(), control.as_slice());
836    }
837
838    #[test_case(&[] ; "empty")]
839    #[test_case(b"i am smol" ; "inline")]
840    #[test_case(b"i am large and will end up on the heap" ; "heap")]
841    fn smoketest_clear(initial: &[u8]) {
842        let mut repr = CompactBytes::new(initial);
843        let capacity = repr.capacity();
844        assert_eq!(repr.as_slice(), initial);
845
846        repr.clear();
847
848        assert!(repr.as_slice().is_empty());
849        assert_eq!(repr.len(), 0);
850
851        // The capacity should not change after clearing.
852        assert_eq!(repr.capacity(), capacity);
853    }
854
855    #[test_case(&[] ; "empty")]
856    #[test_case(b"smol" ; "inline")]
857    #[test_case(b"large large large large large large" ; "heap")]
858    fn smoketest_clone(initial: &[u8]) {
859        let repr_a = CompactBytes::new(initial);
860        let repr_b = repr_a.clone();
861
862        assert_eq!(repr_a.len(), repr_b.len());
863        assert_eq!(repr_a.capacity(), repr_b.capacity());
864        assert_eq!(repr_a.as_slice(), repr_b.as_slice());
865    }
866
867    #[test_case(&[], &[], false ; "empty_empty")]
868    #[test_case(&[], b"hello", false ; "empty_inline")]
869    #[test_case(&[], b"I am long and will be on the heap", true ; "empty_heap")]
870    #[test_case(b"short", &[], false ; "inline_empty")]
871    #[test_case(b"hello", b"world", false ; "inline_inline")]
872    #[test_case(b"i am short", b"I am long and will be on the heap", true ; "inline_heap")]
873    fn smoketest_clone_from(a: &[u8], b: &[u8], should_reallocate: bool) {
874        let mut a = CompactBytes::new(a);
875        let a_capacity = a.capacity();
876        let a_pointer = a.as_slice().as_ptr();
877
878        let b = CompactBytes::new(b);
879
880        // If there is enough capacity in `a`, it's buffer should get re-used.
881        a.clone_from(&b);
882
883        assert_eq!(a.capacity() != a_capacity, should_reallocate);
884        assert_eq!(a.as_slice().as_ptr() != a_pointer, should_reallocate);
885    }
886
887    #[test_case(vec![] ; "empty")]
888    #[test_case(vec![0, 1, 2, 3, 4] ; "inline")]
889    #[test_case(b"I am long and will be on the heap, yada yada yada".to_vec() ; "heap")]
890    fn smoketest_from_vec(initial: Vec<u8>) {
891        let control = initial.clone();
892        let pointer = initial.as_ptr();
893        let repr = CompactBytes::from(initial);
894
895        assert_eq!(control.len(), repr.len());
896        assert_eq!(control.as_slice(), repr.as_slice());
897
898        // We do not eagerly inline, except if the Vec is empty.
899        assert_eq!(repr.spilled(), !control.is_empty());
900        // The allocation of the Vec should get re-used.
901        assert_eq!(repr.as_ptr() == pointer, !control.is_empty());
902    }
903
904    #[test]
905    fn test_cloning_inlines() {
906        let mut c = CompactBytes::with_capacity(48);
907        c.push(42);
908
909        assert_eq!(c.as_slice(), &[42]);
910        assert_eq!(c.capacity(), 48);
911        assert!(c.spilled());
912
913        let clone = c.clone();
914        assert_eq!(clone.as_slice(), &[42]);
915        assert_eq!(clone.capacity(), CompactBytes::MAX_INLINE);
916        assert!(!clone.spilled());
917    }
918
919    #[test]
920    fn test_cloning_drops_excess_capacity() {
921        let mut c = CompactBytes::with_capacity(48);
922        c.extend_from_slice(&[42; 32]);
923
924        assert_eq!(c.as_slice(), &[42; 32]);
925        assert_eq!(c.capacity(), 48);
926        assert_eq!(c.len(), 32);
927        assert!(c.spilled());
928
929        let clone = c.clone();
930        assert_eq!(clone.as_slice(), &[42; 32]);
931        assert_eq!(clone.capacity(), 32);
932        assert_eq!(clone.capacity(), clone.len());
933        assert!(clone.spilled());
934    }
935
936    #[proptest]
937    #[cfg_attr(miri, ignore)]
938    fn proptest_row(initial: Vec<u8>) {
939        let repr = CompactBytes::new(&initial);
940
941        prop_assert_eq!(repr.as_slice(), initial.as_slice());
942        prop_assert_eq!(repr.len(), initial.len());
943    }
944
945    #[proptest]
946    #[cfg_attr(miri, ignore)]
947    fn proptest_extend(initial: Vec<u8>, other: Vec<u8>) {
948        let mut repr = CompactBytes::new(&initial);
949        repr.extend_from_slice(other.as_slice());
950
951        let mut control = initial;
952        control.extend_from_slice(other.as_slice());
953
954        prop_assert_eq!(repr.as_slice(), control.as_slice());
955        prop_assert_eq!(repr.len(), control.len());
956    }
957
958    #[proptest]
959    #[cfg_attr(miri, ignore)]
960    fn proptest_clear(initial: Vec<u8>) {
961        let mut repr = CompactBytes::new(&initial);
962        let capacity = repr.capacity();
963
964        repr.clear();
965        assert!(repr.as_slice().is_empty());
966        assert_eq!(repr.len(), 0);
967
968        // Capacity should not have changed after clear.
969        assert_eq!(repr.capacity(), capacity);
970    }
971
972    #[proptest]
973    #[cfg_attr(miri, ignore)]
974    fn proptest_clear_then_extend(initial: Vec<u8>, a: Vec<u8>) {
975        let mut repr = CompactBytes::new(&initial);
976        let capacity = repr.capacity();
977        let pointer = repr.as_slice().as_ptr();
978
979        repr.clear();
980        assert!(repr.as_slice().is_empty());
981        assert_eq!(repr.len(), 0);
982
983        // Capacity should not have changed after clear.
984        assert_eq!(repr.capacity(), capacity);
985
986        repr.extend_from_slice(&a);
987        assert_eq!(repr.as_slice(), &a);
988        assert_eq!(repr.len(), a.len());
989
990        // If we originall had capacity for the new extension, we should not re-allocate.
991        if a.len() < capacity {
992            assert_eq!(repr.capacity(), capacity);
993            assert_eq!(repr.as_slice().as_ptr(), pointer);
994        }
995    }
996
997    #[proptest]
998    #[cfg_attr(miri, ignore)]
999    fn proptest_clone(initial: Vec<u8>) {
1000        let repr_a = CompactBytes::new(&initial);
1001        let repr_b = repr_a.clone();
1002
1003        assert_eq!(repr_a.len(), repr_b.len());
1004        assert_eq!(repr_a.capacity(), repr_b.capacity());
1005        assert_eq!(repr_a.as_slice(), repr_b.as_slice());
1006    }
1007
1008    #[proptest]
1009    #[cfg_attr(miri, ignore)]
1010    fn proptest_from_vec(initial: Vec<u8>) {
1011        let control = initial.clone();
1012        let pointer = initial.as_ptr();
1013        let repr = CompactBytes::from(initial);
1014
1015        assert_eq!(control.len(), repr.len());
1016        assert_eq!(control.as_slice(), repr.as_slice());
1017
1018        // We do not eagerly inline, except if the Vec is empty.
1019        assert_eq!(repr.spilled(), !control.is_empty());
1020        // The allocation of the Vec should get re-used.
1021        assert_eq!(repr.as_ptr() == pointer, !control.is_empty());
1022    }
1023
1024    #[proptest]
1025    #[cfg_attr(miri, ignore)]
1026    fn proptest_serde(initial: Vec<u8>) {
1027        let repr = CompactBytes::new(&initial);
1028
1029        let (repr_json, ctrl_json) = match (
1030            serde_json::to_string(&repr),
1031            serde_json::to_string(&initial),
1032        ) {
1033            (Ok(r), Ok(c)) => (r, c),
1034            (Err(_), Err(_)) => return Ok(()),
1035            (r, c) => panic!("Got mismatched results when serializing {r:?}, {c:?}"),
1036        };
1037
1038        prop_assert_eq!(&repr_json, &ctrl_json);
1039
1040        let (repr_rnd_trip, ctrl_rnd_trip): (CompactBytes, Vec<u8>) = match (
1041            serde_json::from_str(&repr_json),
1042            serde_json::from_str(&ctrl_json),
1043        ) {
1044            (Ok(r), Ok(c)) => (r, c),
1045            (Err(_), Err(_)) => return Ok(()),
1046            (r, c) => panic!("Got mismatched results {r:?}, {c:?}"),
1047        };
1048
1049        prop_assert_eq!(&repr, &repr_rnd_trip);
1050        prop_assert_eq!(repr_rnd_trip, ctrl_rnd_trip);
1051    }
1052}