mz_ore/
region.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16//! Region-allocated data utilities.
17
18use std::fmt::{Debug, Formatter};
19use std::mem::ManuallyDrop;
20use std::ops::{Deref, DerefMut};
21
22/// Enable allocations through `new_auto` to use lgalloc. `new_mmap` will always use lgalloc.
23pub static ENABLE_LGALLOC_REGION: std::sync::atomic::AtomicBool =
24    std::sync::atomic::AtomicBool::new(false);
25
26/// A region allocator which holds items at stable memory locations.
27///
28/// Items once inserted will not be moved, and their locations in memory
29/// can be relied on by others, until the region is cleared.
30///
31/// This type accepts owned data, rather than references, and does not
32/// itself intend to implement `Region`. Rather, it is a useful building
33/// block for other less-safe code that wants allocated data to remain at
34/// fixed memory locations.
35pub struct LgAllocRegion<T> {
36    /// The active allocation into which we are writing.
37    local: Region<T>,
38    /// All previously active allocations.
39    stash: Vec<Region<T>>,
40    /// The maximum allocation size
41    limit: usize,
42}
43
44// Manually implement `Default` as `T` may not implement it.
45impl<T> Default for LgAllocRegion<T> {
46    fn default() -> Self {
47        Self {
48            local: Default::default(),
49            stash: Vec::new(),
50            limit: usize::MAX,
51        }
52    }
53}
54
55impl<T> Debug for LgAllocRegion<T> {
56    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
57        f.debug_struct("LgAllocRegion")
58            .field("limit", &self.limit)
59            .finish_non_exhaustive()
60    }
61}
62
63impl<T> LgAllocRegion<T> {
64    /// Construct a [LgAllocRegion] with a allocation size limit.
65    pub fn with_limit(limit: usize) -> Self {
66        Self {
67            local: Default::default(),
68            stash: Default::default(),
69            limit,
70        }
71    }
72
73    /// Clears the contents without dropping any elements.
74    #[inline]
75    pub fn clear(&mut self) {
76        unsafe {
77            // Unsafety justified in that setting the length to zero exposes
78            // no invalid data.
79            self.local.clear();
80            // Release allocations in `stash` without dropping their elements.
81            self.stash.clear()
82        }
83    }
84    /// Copies an iterator of items into the region.
85    #[inline]
86    pub fn copy_iter<I>(&mut self, items: I) -> &mut [T]
87    where
88        I: Iterator<Item = T> + std::iter::ExactSizeIterator,
89    {
90        self.reserve(items.len());
91        let initial_len = self.local.len();
92        self.local.extend(items);
93        &mut self.local[initial_len..]
94    }
95    /// Copies a slice of cloneable items into the region.
96    #[inline]
97    pub fn copy_slice(&mut self, items: &[T]) -> &mut [T]
98    where
99        T: Clone,
100    {
101        self.reserve(items.len());
102        let initial_len = self.local.len();
103        self.local.extend_from_slice(items);
104        &mut self.local[initial_len..]
105    }
106
107    /// Ensures that there is space in `self.local` to copy at least `count` items.
108    #[inline(always)]
109    pub fn reserve(&mut self, count: usize) {
110        #[cold]
111        fn reserve_inner<T>(this: &mut LgAllocRegion<T>, count: usize) {
112            // Increase allocated capacity in powers of two.
113            // We could choose a different rule here if we wanted to be
114            // more conservative with memory (e.g. page size allocations).
115            let mut next_len = (this.local.capacity() + 1).next_power_of_two();
116            next_len = std::cmp::min(next_len, this.limit);
117            next_len = std::cmp::max(count, next_len);
118            let new_local = Region::new_auto(next_len);
119            if !this.local.is_empty() {
120                this.stash.push(std::mem::take(&mut this.local));
121            }
122            this.local = new_local;
123        }
124
125        // Check if `item` fits into `self.local` without reallocation.
126        // If not, stash `self.local` and increase the allocation.
127        if count > self.local.capacity() - self.local.len() {
128            reserve_inner(self, count);
129        }
130    }
131
132    /// Allocates a new `Self` that can accept `count` items without reallocation.
133    pub fn with_capacity(count: usize) -> Self {
134        let mut region = Self::default();
135        region.reserve(count);
136        region
137    }
138
139    /// The number of items current held in the region.
140    pub fn len(&self) -> usize {
141        self.local.len() + self.stash.iter().map(|r| r.len()).sum::<usize>()
142    }
143
144    /// Visit contained allocations to determine their size and capacity.
145    #[inline]
146    pub fn heap_size(&self, mut callback: impl FnMut(usize, usize)) {
147        // Calculate heap size for local, stash, and stash entries
148        let size_of_t = std::mem::size_of::<T>();
149        callback(
150            self.local.len() * size_of_t,
151            self.local.capacity() * size_of_t,
152        );
153        callback(
154            self.stash.len() * std::mem::size_of::<Vec<T>>(),
155            self.stash.capacity() * std::mem::size_of::<Vec<T>>(),
156        );
157        for stash in &self.stash {
158            callback(stash.len() * size_of_t, stash.capacity() * size_of_t);
159        }
160    }
161}
162
163/// An abstraction over different kinds of allocated regions.
164///
165/// # WARNING
166///
167/// The implementation does not drop its elements, but forgets them instead. Do not use where
168/// this is not intended, i.e., outside `Copy` types or columnation regions.
169///
170/// NOTE: We plan to deprecate this type soon. Users should switch to different types or the raw
171/// `lgalloc` API instead.
172#[derive(Debug)]
173#[must_use]
174pub enum Region<T> {
175    /// A possibly empty heap-allocated region, represented as a vector.
176    Heap(Vec<T>),
177    /// A mmaped region, represented by a vector and its backing memory mapping.
178    MMap(MMapRegion<T>),
179}
180
181/// Type encapsulating private data for memory-mapped regions.
182pub struct MMapRegion<T> {
183    /// Vector-representation of the underlying memory. Must not be dropped.
184    inner: ManuallyDrop<Vec<T>>,
185    /// Opaque handle to lgalloc.
186    handle: Option<lgalloc::Handle>,
187}
188
189impl<T> MMapRegion<T> {
190    /// Clear the contents of this region without dropping elements.
191    unsafe fn clear(&mut self) {
192        unsafe { self.inner.set_len(0) };
193    }
194}
195
196impl<T: Debug> Debug for MMapRegion<T> {
197    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
198        f.debug_struct("MMapRegion")
199            .field("inner", &self.inner)
200            .finish_non_exhaustive()
201    }
202}
203
204impl<T> Deref for MMapRegion<T> {
205    type Target = [T];
206
207    fn deref(&self) -> &Self::Target {
208        &self.inner
209    }
210}
211
212impl<T> Default for Region<T> {
213    #[inline]
214    fn default() -> Self {
215        Self::new_empty()
216    }
217}
218
219impl<T> Region<T> {
220    /// Create a new empty region.
221    #[inline]
222    pub fn new_empty() -> Region<T> {
223        Region::Heap(Vec::new())
224    }
225
226    /// Create a new heap-allocated region of a specific capacity.
227    #[inline]
228    pub fn new_heap(capacity: usize) -> Region<T> {
229        Region::Heap(Vec::with_capacity(capacity))
230    }
231
232    /// Create a new file-based mapped region of a specific capacity. The capacity of the
233    /// returned region can be larger than requested to accommodate page sizes.
234    ///
235    /// # Errors
236    ///
237    /// Returns an error if the memory allocation fails.
238    #[inline(always)]
239    pub fn new_mmap(capacity: usize) -> Result<Region<T>, lgalloc::AllocError> {
240        lgalloc::allocate(capacity).map(|(ptr, capacity, handle)| {
241            // SAFETY: `ptr` points to suitable memory.
242            // It is UB to call `from_raw_parts` with a pointer not allocated from the global
243            // allocator, but we accept this here because we promise never to reallocate the vector.
244            let inner =
245                ManuallyDrop::new(unsafe { Vec::from_raw_parts(ptr.as_ptr(), 0, capacity) });
246            let handle = Some(handle);
247            Region::MMap(MMapRegion { inner, handle })
248        })
249    }
250
251    /// Create a region depending on the capacity.
252    ///
253    /// The capacity of the returned region must be at least as large as the requested capacity,
254    /// but can be larger if the implementation requires it.
255    ///
256    /// Returns a [`Region::MMap`] if possible, and falls back to [`Region::Heap`] otherwise.
257    pub fn new_auto(capacity: usize) -> Region<T> {
258        if ENABLE_LGALLOC_REGION.load(std::sync::atomic::Ordering::Relaxed) {
259            match Region::new_mmap(capacity) {
260                Ok(r) => return r,
261                Err(lgalloc::AllocError::Disabled)
262                | Err(lgalloc::AllocError::InvalidSizeClass(_)) => {}
263                Err(e) => {
264                    eprintln!("lgalloc error: {e}, falling back to heap");
265                }
266            }
267        }
268        // Fall-through
269        Region::new_heap(capacity)
270    }
271
272    /// Clears the contents of the region, without dropping its elements.
273    ///
274    /// # Safety
275    ///
276    /// Discards all contends. Elements are not dropped.
277    #[inline]
278    pub unsafe fn clear(&mut self) {
279        match self {
280            Region::Heap(vec) => unsafe { vec.set_len(0) },
281            Region::MMap(inner) => unsafe { inner.clear() },
282        }
283    }
284
285    /// Returns the capacity of the underlying allocation.
286    #[inline]
287    #[must_use]
288    pub fn capacity(&self) -> usize {
289        match self {
290            Region::Heap(vec) => vec.capacity(),
291            Region::MMap(inner) => inner.inner.capacity(),
292        }
293    }
294
295    /// Returns the number of elements in the allocation.
296    #[inline]
297    #[must_use]
298    pub fn len(&self) -> usize {
299        match self {
300            Region::Heap(vec) => vec.len(),
301            Region::MMap(inner) => inner.len(),
302        }
303    }
304
305    /// Returns true if the region does not contain any elements.
306    #[inline]
307    #[must_use]
308    pub fn is_empty(&self) -> bool {
309        match self {
310            Region::Heap(vec) => vec.is_empty(),
311            Region::MMap(inner) => inner.is_empty(),
312        }
313    }
314
315    /// Dereference to the contained vector
316    #[inline]
317    #[must_use]
318    pub fn as_vec(&self) -> &Vec<T> {
319        match self {
320            Region::Heap(vec) => vec,
321            Region::MMap(inner) => &inner.inner,
322        }
323    }
324
325    /// Extend the underlying region from the iterator.
326    ///
327    /// Care must be taken to not re-allocate the inner vector representation.
328    #[inline]
329    pub fn extend<I: IntoIterator<Item = T> + ExactSizeIterator>(&mut self, iter: I) {
330        assert!(self.capacity() - self.len() >= iter.len());
331        // SAFETY: We just asserted that we have sufficient capacity.
332        unsafe { self.as_mut_vec().extend(iter) };
333    }
334
335    /// Obtain a mutable reference to the inner vector representation.
336    ///
337    /// Unsafe because the caller has to make sure that the vector will not reallocate.
338    /// Otherwise, the vector representation could try to reallocate the underlying memory
339    /// using the global allocator, which would cause problems because the memory might not
340    /// have originated from it. This is undefined behavior.
341    ///
342    /// Private because it is too dangerous to expose to the public.
343    #[inline]
344    unsafe fn as_mut_vec(&mut self) -> &mut Vec<T> {
345        match self {
346            Region::Heap(vec) => vec,
347            Region::MMap(inner) => &mut inner.inner,
348        }
349    }
350}
351
352impl<T: bytemuck::AnyBitPattern> Region<T> {
353    /// Create a new file-based mapped region of a specific capacity, initialized to 0. The
354    /// capacity of the returned region can be larger than requested to accommodate page sizes.
355    ///
356    /// # Errors
357    ///
358    /// Returns an error if the memory allocation fails.
359    #[inline(always)]
360    pub fn new_mmap_zeroed(capacity: usize) -> Result<Self, lgalloc::AllocError> {
361        let (ptr, capacity, handle) = lgalloc::allocate::<T>(capacity)?;
362        // SAFETY: `allocate` returns a valid memory block, and `T` supports a null-bit pattern.
363        unsafe { ptr.as_ptr().write_bytes(0, capacity) }
364        // SAFETY: `ptr` points to suitable memory.
365        // It is UB to call `from_raw_parts` with a pointer not allocated from the global
366        // allocator, but we accept this here because we promise never to reallocate the vector.
367        let inner =
368            ManuallyDrop::new(unsafe { Vec::from_raw_parts(ptr.as_ptr(), capacity, capacity) });
369        let handle = Some(handle);
370        Ok(Self::MMap(MMapRegion { inner, handle }))
371    }
372
373    /// Allocate a zeroed region on the heap.
374    #[inline(always)]
375    pub fn new_heap_zeroed(capacity: usize) -> Self {
376        Self::Heap(vec![T::zeroed(); capacity])
377    }
378
379    /// Construct a new region with the specified capacity, initialized to 0.
380    pub fn new_auto_zeroed(capacity: usize) -> Self {
381        if ENABLE_LGALLOC_REGION.load(std::sync::atomic::Ordering::Relaxed) {
382            match Region::new_mmap_zeroed(capacity) {
383                Ok(r) => return r,
384                Err(lgalloc::AllocError::Disabled)
385                | Err(lgalloc::AllocError::InvalidSizeClass(_)) => {}
386                Err(e) => {
387                    eprintln!("lgalloc error: {e}, falling back to heap");
388                }
389            }
390        }
391        // Fall-through
392        Self::new_heap_zeroed(capacity)
393    }
394}
395
396impl<T: Clone> Region<T> {
397    /// Extend the region from a slice.
398    ///
399    /// Panics if the region does not have sufficient capacity.
400    #[inline]
401    pub fn extend_from_slice(&mut self, slice: &[T]) {
402        assert!(self.capacity() - self.len() >= slice.len());
403        // SAFETY: We just asserted that we have enough capacity.
404        unsafe { self.as_mut_vec() }.extend_from_slice(slice);
405    }
406}
407
408impl<T> Deref for Region<T> {
409    type Target = [T];
410
411    #[inline]
412    fn deref(&self) -> &Self::Target {
413        self.as_vec()
414    }
415}
416
417impl<T> DerefMut for Region<T> {
418    #[inline]
419    fn deref_mut(&mut self) -> &mut Self::Target {
420        // SAFETY: We're dereferencing to `&mut [T]`, which does not allow reallocating the
421        // underlying allocation, which makes it safe.
422        unsafe { self.as_mut_vec().as_mut_slice() }
423    }
424}
425
426impl<T> Drop for Region<T> {
427    #[inline]
428    fn drop(&mut self) {
429        match self {
430            Region::Heap(vec) => {
431                // SAFETY: Don't drop the elements, drop the vec, in line with the documentation
432                // of the `Region` type.
433                unsafe { vec.set_len(0) }
434            }
435            Region::MMap(_) => {}
436        }
437    }
438}
439
440impl<T> Drop for MMapRegion<T> {
441    fn drop(&mut self) {
442        // Similar to dropping Region: Drop the allocation, don't drop the `inner` vector.
443        lgalloc::deallocate(self.handle.take().unwrap());
444    }
445}