mz_ore/region.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16//! Region-allocated data utilities.
17
18use std::fmt::{Debug, Formatter};
19use std::mem::ManuallyDrop;
20use std::ops::{Deref, DerefMut};
21
22/// Enable allocations through `new_auto` to use lgalloc. `new_mmap` will always use lgalloc.
23pub static ENABLE_LGALLOC_REGION: std::sync::atomic::AtomicBool =
24 std::sync::atomic::AtomicBool::new(false);
25
26/// A region allocator which holds items at stable memory locations.
27///
28/// Items once inserted will not be moved, and their locations in memory
29/// can be relied on by others, until the region is cleared.
30///
31/// This type accepts owned data, rather than references, and does not
32/// itself intend to implement `Region`. Rather, it is a useful building
33/// block for other less-safe code that wants allocated data to remain at
34/// fixed memory locations.
35pub struct LgAllocRegion<T> {
36 /// The active allocation into which we are writing.
37 local: Region<T>,
38 /// All previously active allocations.
39 stash: Vec<Region<T>>,
40 /// The maximum allocation size
41 limit: usize,
42}
43
44// Manually implement `Default` as `T` may not implement it.
45impl<T> Default for LgAllocRegion<T> {
46 fn default() -> Self {
47 Self {
48 local: Default::default(),
49 stash: Vec::new(),
50 limit: usize::MAX,
51 }
52 }
53}
54
55impl<T> Debug for LgAllocRegion<T> {
56 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
57 f.debug_struct("LgAllocRegion")
58 .field("limit", &self.limit)
59 .finish_non_exhaustive()
60 }
61}
62
63impl<T> LgAllocRegion<T> {
64 /// Construct a [LgAllocRegion] with a allocation size limit.
65 pub fn with_limit(limit: usize) -> Self {
66 Self {
67 local: Default::default(),
68 stash: Default::default(),
69 limit,
70 }
71 }
72
73 /// Clears the contents without dropping any elements.
74 #[inline]
75 pub fn clear(&mut self) {
76 unsafe {
77 // Unsafety justified in that setting the length to zero exposes
78 // no invalid data.
79 self.local.clear();
80 // Release allocations in `stash` without dropping their elements.
81 self.stash.clear()
82 }
83 }
84 /// Copies an iterator of items into the region.
85 #[inline]
86 pub fn copy_iter<I>(&mut self, items: I) -> &mut [T]
87 where
88 I: Iterator<Item = T> + std::iter::ExactSizeIterator,
89 {
90 self.reserve(items.len());
91 let initial_len = self.local.len();
92 self.local.extend(items);
93 &mut self.local[initial_len..]
94 }
95 /// Copies a slice of cloneable items into the region.
96 #[inline]
97 pub fn copy_slice(&mut self, items: &[T]) -> &mut [T]
98 where
99 T: Clone,
100 {
101 self.reserve(items.len());
102 let initial_len = self.local.len();
103 self.local.extend_from_slice(items);
104 &mut self.local[initial_len..]
105 }
106
107 /// Ensures that there is space in `self.local` to copy at least `count` items.
108 #[inline(always)]
109 pub fn reserve(&mut self, count: usize) {
110 #[cold]
111 fn reserve_inner<T>(this: &mut LgAllocRegion<T>, count: usize) {
112 // Increase allocated capacity in powers of two.
113 // We could choose a different rule here if we wanted to be
114 // more conservative with memory (e.g. page size allocations).
115 let mut next_len = (this.local.capacity() + 1).next_power_of_two();
116 next_len = std::cmp::min(next_len, this.limit);
117 next_len = std::cmp::max(count, next_len);
118 let new_local = Region::new_auto(next_len);
119 if !this.local.is_empty() {
120 this.stash.push(std::mem::take(&mut this.local));
121 }
122 this.local = new_local;
123 }
124
125 // Check if `item` fits into `self.local` without reallocation.
126 // If not, stash `self.local` and increase the allocation.
127 if count > self.local.capacity() - self.local.len() {
128 reserve_inner(self, count);
129 }
130 }
131
132 /// Allocates a new `Self` that can accept `count` items without reallocation.
133 pub fn with_capacity(count: usize) -> Self {
134 let mut region = Self::default();
135 region.reserve(count);
136 region
137 }
138
139 /// The number of items current held in the region.
140 pub fn len(&self) -> usize {
141 self.local.len() + self.stash.iter().map(|r| r.len()).sum::<usize>()
142 }
143
144 /// Visit contained allocations to determine their size and capacity.
145 #[inline]
146 pub fn heap_size(&self, mut callback: impl FnMut(usize, usize)) {
147 // Calculate heap size for local, stash, and stash entries
148 let size_of_t = std::mem::size_of::<T>();
149 callback(
150 self.local.len() * size_of_t,
151 self.local.capacity() * size_of_t,
152 );
153 callback(
154 self.stash.len() * std::mem::size_of::<Vec<T>>(),
155 self.stash.capacity() * std::mem::size_of::<Vec<T>>(),
156 );
157 for stash in &self.stash {
158 callback(stash.len() * size_of_t, stash.capacity() * size_of_t);
159 }
160 }
161}
162
163/// An abstraction over different kinds of allocated regions.
164///
165/// # WARNING
166///
167/// The implementation does not drop its elements, but forgets them instead. Do not use where
168/// this is not intended, i.e., outside `Copy` types or columnation regions.
169///
170/// NOTE: We plan to deprecate this type soon. Users should switch to different types or the raw
171/// `lgalloc` API instead.
172#[derive(Debug)]
173#[must_use]
174pub enum Region<T> {
175 /// A possibly empty heap-allocated region, represented as a vector.
176 Heap(Vec<T>),
177 /// A mmaped region, represented by a vector and its backing memory mapping.
178 MMap(MMapRegion<T>),
179}
180
181/// Type encapsulating private data for memory-mapped regions.
182pub struct MMapRegion<T> {
183 /// Vector-representation of the underlying memory. Must not be dropped.
184 inner: ManuallyDrop<Vec<T>>,
185 /// Opaque handle to lgalloc.
186 handle: Option<lgalloc::Handle>,
187}
188
189impl<T> MMapRegion<T> {
190 /// Clear the contents of this region without dropping elements.
191 unsafe fn clear(&mut self) {
192 unsafe { self.inner.set_len(0) };
193 }
194}
195
196impl<T: Debug> Debug for MMapRegion<T> {
197 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
198 f.debug_struct("MMapRegion")
199 .field("inner", &self.inner)
200 .finish_non_exhaustive()
201 }
202}
203
204impl<T> Deref for MMapRegion<T> {
205 type Target = [T];
206
207 fn deref(&self) -> &Self::Target {
208 &self.inner
209 }
210}
211
212impl<T> Default for Region<T> {
213 #[inline]
214 fn default() -> Self {
215 Self::new_empty()
216 }
217}
218
219impl<T> Region<T> {
220 /// Create a new empty region.
221 #[inline]
222 pub fn new_empty() -> Region<T> {
223 Region::Heap(Vec::new())
224 }
225
226 /// Create a new heap-allocated region of a specific capacity.
227 #[inline]
228 pub fn new_heap(capacity: usize) -> Region<T> {
229 Region::Heap(Vec::with_capacity(capacity))
230 }
231
232 /// Create a new file-based mapped region of a specific capacity. The capacity of the
233 /// returned region can be larger than requested to accommodate page sizes.
234 ///
235 /// # Errors
236 ///
237 /// Returns an error if the memory allocation fails.
238 #[inline(always)]
239 pub fn new_mmap(capacity: usize) -> Result<Region<T>, lgalloc::AllocError> {
240 lgalloc::allocate(capacity).map(|(ptr, capacity, handle)| {
241 // SAFETY: `ptr` points to suitable memory.
242 // It is UB to call `from_raw_parts` with a pointer not allocated from the global
243 // allocator, but we accept this here because we promise never to reallocate the vector.
244 let inner =
245 ManuallyDrop::new(unsafe { Vec::from_raw_parts(ptr.as_ptr(), 0, capacity) });
246 let handle = Some(handle);
247 Region::MMap(MMapRegion { inner, handle })
248 })
249 }
250
251 /// Create a region depending on the capacity.
252 ///
253 /// The capacity of the returned region must be at least as large as the requested capacity,
254 /// but can be larger if the implementation requires it.
255 ///
256 /// Returns a [`Region::MMap`] if possible, and falls back to [`Region::Heap`] otherwise.
257 pub fn new_auto(capacity: usize) -> Region<T> {
258 if ENABLE_LGALLOC_REGION.load(std::sync::atomic::Ordering::Relaxed) {
259 match Region::new_mmap(capacity) {
260 Ok(r) => return r,
261 Err(lgalloc::AllocError::Disabled)
262 | Err(lgalloc::AllocError::InvalidSizeClass(_)) => {}
263 Err(e) => {
264 eprintln!("lgalloc error: {e}, falling back to heap");
265 }
266 }
267 }
268 // Fall-through
269 Region::new_heap(capacity)
270 }
271
272 /// Clears the contents of the region, without dropping its elements.
273 ///
274 /// # Safety
275 ///
276 /// Discards all contends. Elements are not dropped.
277 #[inline]
278 pub unsafe fn clear(&mut self) {
279 match self {
280 Region::Heap(vec) => unsafe { vec.set_len(0) },
281 Region::MMap(inner) => unsafe { inner.clear() },
282 }
283 }
284
285 /// Returns the capacity of the underlying allocation.
286 #[inline]
287 #[must_use]
288 pub fn capacity(&self) -> usize {
289 match self {
290 Region::Heap(vec) => vec.capacity(),
291 Region::MMap(inner) => inner.inner.capacity(),
292 }
293 }
294
295 /// Returns the number of elements in the allocation.
296 #[inline]
297 #[must_use]
298 pub fn len(&self) -> usize {
299 match self {
300 Region::Heap(vec) => vec.len(),
301 Region::MMap(inner) => inner.len(),
302 }
303 }
304
305 /// Returns true if the region does not contain any elements.
306 #[inline]
307 #[must_use]
308 pub fn is_empty(&self) -> bool {
309 match self {
310 Region::Heap(vec) => vec.is_empty(),
311 Region::MMap(inner) => inner.is_empty(),
312 }
313 }
314
315 /// Dereference to the contained vector
316 #[inline]
317 #[must_use]
318 pub fn as_vec(&self) -> &Vec<T> {
319 match self {
320 Region::Heap(vec) => vec,
321 Region::MMap(inner) => &inner.inner,
322 }
323 }
324
325 /// Extend the underlying region from the iterator.
326 ///
327 /// Care must be taken to not re-allocate the inner vector representation.
328 #[inline]
329 pub fn extend<I: IntoIterator<Item = T> + ExactSizeIterator>(&mut self, iter: I) {
330 assert!(self.capacity() - self.len() >= iter.len());
331 // SAFETY: We just asserted that we have sufficient capacity.
332 unsafe { self.as_mut_vec().extend(iter) };
333 }
334
335 /// Obtain a mutable reference to the inner vector representation.
336 ///
337 /// Unsafe because the caller has to make sure that the vector will not reallocate.
338 /// Otherwise, the vector representation could try to reallocate the underlying memory
339 /// using the global allocator, which would cause problems because the memory might not
340 /// have originated from it. This is undefined behavior.
341 ///
342 /// Private because it is too dangerous to expose to the public.
343 #[inline]
344 unsafe fn as_mut_vec(&mut self) -> &mut Vec<T> {
345 match self {
346 Region::Heap(vec) => vec,
347 Region::MMap(inner) => &mut inner.inner,
348 }
349 }
350}
351
352impl<T: bytemuck::AnyBitPattern> Region<T> {
353 /// Create a new file-based mapped region of a specific capacity, initialized to 0. The
354 /// capacity of the returned region can be larger than requested to accommodate page sizes.
355 ///
356 /// # Errors
357 ///
358 /// Returns an error if the memory allocation fails.
359 #[inline(always)]
360 pub fn new_mmap_zeroed(capacity: usize) -> Result<Self, lgalloc::AllocError> {
361 let (ptr, capacity, handle) = lgalloc::allocate::<T>(capacity)?;
362 // SAFETY: `allocate` returns a valid memory block, and `T` supports a null-bit pattern.
363 unsafe { ptr.as_ptr().write_bytes(0, capacity) }
364 // SAFETY: `ptr` points to suitable memory.
365 // It is UB to call `from_raw_parts` with a pointer not allocated from the global
366 // allocator, but we accept this here because we promise never to reallocate the vector.
367 let inner =
368 ManuallyDrop::new(unsafe { Vec::from_raw_parts(ptr.as_ptr(), capacity, capacity) });
369 let handle = Some(handle);
370 Ok(Self::MMap(MMapRegion { inner, handle }))
371 }
372
373 /// Allocate a zeroed region on the heap.
374 #[inline(always)]
375 pub fn new_heap_zeroed(capacity: usize) -> Self {
376 Self::Heap(vec![T::zeroed(); capacity])
377 }
378
379 /// Construct a new region with the specified capacity, initialized to 0.
380 pub fn new_auto_zeroed(capacity: usize) -> Self {
381 if ENABLE_LGALLOC_REGION.load(std::sync::atomic::Ordering::Relaxed) {
382 match Region::new_mmap_zeroed(capacity) {
383 Ok(r) => return r,
384 Err(lgalloc::AllocError::Disabled)
385 | Err(lgalloc::AllocError::InvalidSizeClass(_)) => {}
386 Err(e) => {
387 eprintln!("lgalloc error: {e}, falling back to heap");
388 }
389 }
390 }
391 // Fall-through
392 Self::new_heap_zeroed(capacity)
393 }
394}
395
396impl<T: Clone> Region<T> {
397 /// Extend the region from a slice.
398 ///
399 /// Panics if the region does not have sufficient capacity.
400 #[inline]
401 pub fn extend_from_slice(&mut self, slice: &[T]) {
402 assert!(self.capacity() - self.len() >= slice.len());
403 // SAFETY: We just asserted that we have enough capacity.
404 unsafe { self.as_mut_vec() }.extend_from_slice(slice);
405 }
406}
407
408impl<T> Deref for Region<T> {
409 type Target = [T];
410
411 #[inline]
412 fn deref(&self) -> &Self::Target {
413 self.as_vec()
414 }
415}
416
417impl<T> DerefMut for Region<T> {
418 #[inline]
419 fn deref_mut(&mut self) -> &mut Self::Target {
420 // SAFETY: We're dereferencing to `&mut [T]`, which does not allow reallocating the
421 // underlying allocation, which makes it safe.
422 unsafe { self.as_mut_vec().as_mut_slice() }
423 }
424}
425
426impl<T> Drop for Region<T> {
427 #[inline]
428 fn drop(&mut self) {
429 match self {
430 Region::Heap(vec) => {
431 // SAFETY: Don't drop the elements, drop the vec, in line with the documentation
432 // of the `Region` type.
433 unsafe { vec.set_len(0) }
434 }
435 Region::MMap(_) => {}
436 }
437 }
438}
439
440impl<T> Drop for MMapRegion<T> {
441 fn drop(&mut self) {
442 // Similar to dropping Region: Drop the allocation, don't drop the `inner` vector.
443 lgalloc::deallocate(self.handle.take().unwrap());
444 }
445}