compact_bytes/
lib.rs

1//! "Small string optimization" for a bytes.
2
3use std::alloc;
4use std::ptr::NonNull;
5
6mod fixed;
7mod growable;
8
9pub use fixed::CompactBytesSlice;
10pub use growable::CompactBytes;
11
12const INLINE_MASK: u8 = 0b1000_0000;
13
14/// Fixed capacity inline buffer of bytes.
15#[repr(C, align(8))]
16#[derive(Copy, Clone)]
17struct InlineBytes<const CAPACITY: usize> {
18    buffer: [u8; CAPACITY],
19    data: u8,
20}
21
22/// Inline storage for [`CompactBytes`].
23type InlineBytes23 = InlineBytes<23>;
24static_assertions::assert_eq_size!(InlineBytes23, Vec<u8>);
25
26/// Inline storage for [`CompactBytesSlice`].
27type InlineBytes15 = InlineBytes<15>;
28static_assertions::assert_eq_size!(InlineBytes15, Box<[u8]>);
29
30impl<const CAPACITY: usize> InlineBytes<CAPACITY> {
31    /// The amount of bytes this [`InlineBytes`] can store inline.
32    const CAPACITY: usize = CAPACITY;
33
34    /// Create an [`InlineBytes`] from the provided slice.
35    ///
36    /// Safety:
37    ///
38    /// * `slice` must have a length less than or equal to `CAPACITY`.
39    ///
40    #[inline]
41    pub unsafe fn new(slice: &[u8]) -> Self {
42        debug_assert!(slice.len() <= CAPACITY);
43
44        let len = slice.len();
45        let mut buffer = [0u8; CAPACITY];
46
47        // SAFETY: We know src and dst are valid for len bytes, nor do they overlap.
48        unsafe {
49            buffer
50                .as_mut_ptr()
51                .copy_from_nonoverlapping(slice.as_ptr(), len)
52        };
53
54        let data = INLINE_MASK | (len as u8);
55
56        InlineBytes { buffer, data }
57    }
58
59    #[inline]
60    pub const fn empty() -> Self {
61        let buffer = [0u8; CAPACITY];
62
63        // Even though the below statement as no effect, we leave it for better understanding.
64        #[allow(clippy::identity_op)]
65        let data = INLINE_MASK | 0;
66
67        InlineBytes { buffer, data }
68    }
69
70    pub fn len(&self) -> usize {
71        (self.data & !INLINE_MASK) as usize
72    }
73
74    /// Forces the length of [`InlineBytes`] to `new_len`.
75    ///
76    /// # Safety
77    /// * `new_len` must be less than or equal to the generic `CAPACITY`.
78    /// * `new_len` must be less than or equal to capacity.
79    /// * The bytes at `old_len..new_len` must be initialized.
80    ///
81    unsafe fn set_len(&mut self, new_len: usize) {
82        debug_assert!(new_len <= CAPACITY);
83        self.data = INLINE_MASK | (new_len as u8);
84    }
85}
86
87/// A growable heap allocation of bytes.
88#[repr(C)]
89struct HeapBytesGrowable {
90    ptr: NonNull<u8>,
91    len: usize,
92    cap: usize,
93}
94static_assertions::assert_eq_size!(HeapBytesGrowable, Vec<u8>);
95
96impl HeapBytesGrowable {
97    /// The minimum allocation size for a [`HeapBytesGrowable`].
98    pub const MIN_ALLOCATION_SIZE: usize = std::mem::size_of::<usize>() * 2;
99    /// The maximum allocation size for a [`HeapBytesGrowable`].
100    pub const MAX_ALLOCATION_SIZE: usize = usize::MAX >> 1;
101
102    #[inline]
103    pub fn new(slice: &[u8]) -> Self {
104        let len = slice.len();
105        let cap = len.max(Self::MIN_ALLOCATION_SIZE);
106
107        debug_assert!(cap <= Self::MAX_ALLOCATION_SIZE, "too large of allocation");
108        let ptr = heap::alloc_ptr(cap);
109
110        unsafe { ptr.as_ptr().copy_from_nonoverlapping(slice.as_ptr(), len) };
111
112        HeapBytesGrowable { ptr, len, cap }
113    }
114
115    pub fn with_capacity(capacity: usize) -> Self {
116        assert!(
117            capacity <= Self::MAX_ALLOCATION_SIZE,
118            "too large of allocation"
119        );
120
121        let len = 0;
122        let cap = capacity.max(Self::MIN_ALLOCATION_SIZE);
123        let ptr = heap::alloc_ptr(cap);
124
125        HeapBytesGrowable { ptr, len, cap }
126    }
127
128    pub fn with_additional(slice: &[u8], additional: usize) -> Self {
129        let new_capacity = Self::amortized_growth(slice.len(), additional);
130        let mut row = Self::with_capacity(new_capacity);
131
132        debug_assert!(row.cap > slice.len());
133
134        // SAFETY: We know src and dst are both valid for len bytes, nor are they overlapping.
135        unsafe {
136            std::ptr::copy_nonoverlapping(slice.as_ptr(), row.ptr.as_ptr(), slice.len());
137        };
138        // Set our length.
139        row.len = slice.len();
140
141        row
142    }
143
144    pub unsafe fn set_len(&mut self, len: usize) {
145        self.len = len;
146    }
147
148    pub fn realloc(&mut self, new_capacity: usize) -> Result<usize, ()> {
149        // Can't shrink the heap allocation to be less than length, because we'd lose data.
150        if new_capacity < self.len {
151            return Err(());
152        }
153        // Do not reallocate to 0 capacity.
154        if new_capacity == 0 {
155            return Err(());
156        }
157
158        // Always allocate some minimum amount of bytes.
159        let new_capacity = new_capacity.max(Self::MIN_ALLOCATION_SIZE);
160
161        // Already at the appropriate size!
162        if new_capacity == self.cap {
163            return Ok(new_capacity);
164        }
165
166        let cur_layout = heap::layout(self.cap);
167        let new_layout = heap::layout(new_capacity);
168
169        // Check for overflow.
170        let new_size = new_layout.size();
171        if new_size < new_capacity {
172            return Err(());
173        }
174
175        // SAFETY:
176        // * Our pointer was allocated via the same allocator.
177        // * We used the same layout for the previous allocation.
178        // * `new_size` is correct.
179        let raw_ptr = unsafe { alloc::realloc(self.ptr.as_ptr(), cur_layout, new_size) };
180        let ptr = NonNull::new(raw_ptr).ok_or(())?;
181
182        self.ptr = ptr;
183        self.cap = new_capacity;
184
185        Ok(new_capacity)
186    }
187
188    #[inline]
189    fn dealloc(&mut self) {
190        heap::dealloc_ptr(self.ptr, self.cap);
191    }
192
193    /// [`HeapBytes`] grows at an amortized rates of 1.5x
194    ///
195    /// Note: this is different than [`std::vec::Vec`], which grows at a rate of 2x. It's debated
196    /// which is better, for now we'll stick with a rate of 1.5x
197    #[inline(always)]
198    pub fn amortized_growth(cur_len: usize, additional: usize) -> usize {
199        let required = cur_len.saturating_add(additional);
200        let amortized = cur_len.saturating_mul(3) / 2;
201        amortized.max(required)
202    }
203}
204
205impl Drop for HeapBytesGrowable {
206    fn drop(&mut self) {
207        self.dealloc()
208    }
209}
210
211/// A fixed size heap allocation of bytes.
212#[repr(C)]
213struct HeapBytesFixed {
214    ptr: NonNull<u8>,
215    len: usize,
216}
217static_assertions::assert_eq_size!(HeapBytesFixed, Box<[u8]>);
218
219impl HeapBytesFixed {
220    /// The maximum allocation size for a [`HeapBytesFixed`].
221    pub const MAX_ALLOCATION_SIZE: usize = usize::MAX >> 1;
222
223    #[inline]
224    pub fn new(slice: &[u8]) -> Self {
225        let len = slice.len();
226        debug_assert!(len <= Self::MAX_ALLOCATION_SIZE, "too large of allocation");
227
228        let ptr = heap::alloc_ptr(len);
229        unsafe { ptr.as_ptr().copy_from_nonoverlapping(slice.as_ptr(), len) };
230
231        HeapBytesFixed { ptr, len }
232    }
233
234    #[inline]
235    fn dealloc(&mut self) {
236        heap::dealloc_ptr(self.ptr, self.len);
237    }
238}
239
240mod heap {
241    use std::alloc;
242    use std::ptr::NonNull;
243
244    /// Create an allocation with [`layout`].
245    #[inline]
246    pub(crate) fn alloc_ptr(capacity: usize) -> NonNull<u8> {
247        let layout = layout(capacity);
248        debug_assert!(layout.size() > 0);
249
250        // SAFETY: We ensure that the layout is not zero sized, by enforcing a minimum size.
251        let ptr = unsafe { alloc::alloc(layout) };
252
253        NonNull::new(ptr).expect("failed to allocate HeapRow")
254    }
255
256    /// Drop an allocation that was created with [`layout`].
257    #[inline]
258    pub(crate) fn dealloc_ptr(ptr: NonNull<u8>, capacity: usize) {
259        let layout = layout(capacity);
260
261        // SAFETY:
262        // * The pointer was allocated via this allocator.
263        // * We used the same layout when allocating.
264        unsafe { alloc::dealloc(ptr.as_ptr(), layout) };
265    }
266
267    /// Returns the [`alloc::Layout`] for [`HeapBytesGrowable`] and [`HeapBytesSlice`].
268    ///
269    /// [`HeapBytesGrowable`]: super::HeapBytesGrowable
270    /// [`HeapBytesSlice`]: super::HeapBytesSlice
271    #[inline(always)]
272    pub(crate) fn layout(capacity: usize) -> alloc::Layout {
273        debug_assert!(capacity > 0, "tried to allocate a HeapRow with 0 capacity");
274        alloc::Layout::array::<u8>(capacity).expect("valid capacity")
275    }
276}