domain/utils/
dst.rs

1//! Working with dynamically sized types (DSTs).
2//!
3//! DSTs are types whose size is known at run-time instead of compile-time.
4//! The primary examples of this are slices and [`str`].  While Rust provides
5//! relatively good support for DSTs (e.g. they can be held by reference like
6//! any other type), it has some rough edges.  The standard library tries to
7//! paper over these with helpful functions and trait impls, but it does not
8//! account for custom DST types.  In particular, [`new::base`] introduces a
9//! large number of user-facing DSTs and needs to paper over the same rough
10//! edges for all of them.
11//!
12//! [`new::base`]: crate::new::base
13//!
14//! ## Coping DSTs
15//!
16//! Because DSTs cannot be held by value, they must be handled and manipulated
17//! through an indirection (a reference or a smart pointer of some kind).
18//! Copying a DST into new container (e.g. [`Box`]) requires explicit support
19//! from that container type.
20//!
21//! [`Box`]: https://doc.rust-lang.org/std/boxed/struct.Box.html
22//!
23//! This module introduces the [`UnsizedCopy`] trait (and a derive macro) that
24//! types like [`str`] implement.  Container types that can support copying
25//! DSTs implement [`UnsizedCopyFrom`].
26//
27// TODO: Example
28
29//----------- UnsizedCopy ----------------------------------------------------
30
31/// An extension of [`Copy`] to dynamically sized types.
32///
33/// This is a generalization of [`Copy`].  It is intended to simplify working
34/// with DSTs that support zero-copy parsing techniques (as these are built
35/// from byte sequences, they are inherently trivial to copy).
36///
37/// # Usage
38///
39/// To copy a type, call [`UnsizedCopy::unsized_copy_into()`] on the DST being
40/// copied, or call [`UnsizedCopyFrom::unsized_copy_from()`] on the container
41/// type to copy into.  The two function identically.
42///
43#[cfg_attr(
44    feature = "bumpalo",
45    doc = "The [`copy_to_bump()`] function is useful for copying data into [`bumpalo`]-based allocations."
46)]
47///
48/// # Safety
49///
50/// A type `T` can implement `UnsizedCopy` if all of the following hold:
51///
52/// - It is an aggregate type (`struct`, `enum`, or `union`) and every field
53///   implements [`UnsizedCopy`].
54///
55/// - `T::Alignment` has exactly the same alignment as `T`.
56///
57/// - `T::ptr_with_addr()` satisfies the documented invariants.
58pub unsafe trait UnsizedCopy {
59    /// Copy `self` into a new container.
60    ///
61    /// A new container of the specified type (which is usually inferred) is
62    /// allocated, and the contents of `self` are copied into it.  This is a
63    /// convenience method that calls [`unsized_copy_from()`].
64    ///
65    /// [`unsized_copy_from()`]: UnsizedCopyFrom::unsized_copy_from().
66    #[inline]
67    fn unsized_copy_into<T: UnsizedCopyFrom<Source = Self>>(&self) -> T {
68        T::unsized_copy_from(self)
69    }
70
71    /// Copy `self` and return it by value.
72    ///
73    /// This offers equivalent functionality to the regular [`Copy`] trait,
74    /// which is also why it has the same [`Sized`] bound.
75    #[inline]
76    fn copy(&self) -> Self
77    where
78        Self: Sized,
79    {
80        // The compiler can't tell that 'Self' is 'Copy', so we're just going
81        // to copy it manually.  Hopefully this optimizes fine.
82
83        // SAFETY: 'self' is a valid reference, and is thus safe for reads.
84        unsafe { core::ptr::read(self) }
85    }
86
87    /// A type with the same alignment as `Self`.
88    ///
89    /// At the moment, Rust does not provide a way to determine the alignment
90    /// of a dynamically sized type at compile-time.  This restriction exists
91    /// because trait objects (which count as DSTs, but are not supported by
92    /// [`UnsizedCopy`]) have an alignment determined by their implementation
93    /// (which can vary at runtime).
94    ///
95    /// This associated type papers over this limitation, by simply requiring
96    /// every implementation of [`UnsizedCopy`] to specify a type with the
97    /// same alignment here.  This is used by internal plumbing code to know
98    /// the alignment of `Self` at compile-time.
99    ///
100    /// ## Invariants
101    ///
102    /// The alignment of `Self::Alignment` must be the same as that of `Self`.
103    type Alignment: Sized;
104
105    /// Change the address of a pointer to `Self`.
106    ///
107    /// `Self` may be a DST, which means that references (and pointers) to it
108    /// store metadata alongside the usual memory address.  For example, the
109    /// metadata for a slice type is its length.  In order to construct a new
110    /// instance of `Self` (as is done by copying), a new pointer must be
111    /// created, and the appropriate metadata must be inserted.
112    ///
113    /// At the moment, Rust does not provide a way to examine this metadata
114    /// for an arbitrary type.  This method papers over this limitation, and
115    /// provides a way to copy the metadata from an existing pointer while
116    /// changing the pointer address.
117    ///
118    /// # Implementing
119    ///
120    /// Most users will derive [`UnsizedCopy`] and so don't need to worry
121    /// about this.  In any case, when Rust builds in support for extracting
122    /// metadata, this function will gain a default implementation, and will
123    /// eventually be deprecated.
124    ///
125    /// For manual implementations for unsized types:
126    ///
127    /// ```no_run
128    /// # use domain::utils::dst::UnsizedCopy;
129    /// #
130    /// pub struct Foo {
131    ///     a: i32,
132    ///     b: [u8],
133    /// }
134    ///
135    /// unsafe impl UnsizedCopy for Foo {
136    ///     // We would like to write 'Alignment = Self' here, but we can't
137    ///     // because 'Self' is not 'Sized'.  However, 'Self' is a 'struct'
138    ///     // using 'repr(Rust)'; the following tuple (which implicitly also
139    ///     // uses 'repr(Rust)') has the same alignment as it.
140    ///     type Alignment = (i32, u8);
141    ///
142    ///     fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
143    ///         // Delegate to the same function on the last field.
144    ///         //
145    ///         // Rust knows that 'Self' has the same metadata as '[u8]',
146    ///         // and so permits casting pointers between those types.
147    ///         self.b.ptr_with_addr(addr) as *const Self
148    ///     }
149    /// }
150    /// ```
151    ///
152    /// For manual implementations for sized types:
153    ///
154    /// ```no_run
155    /// # use domain::utils::dst::UnsizedCopy;
156    /// #
157    /// pub struct Foo {
158    ///     a: i32,
159    ///     b: Option<f64>,
160    /// }
161    ///
162    /// unsafe impl UnsizedCopy for Foo {
163    ///     // Because 'Foo' is a sized type, we can use it here directly.
164    ///     type Alignment = Self;
165    ///
166    ///     fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
167    ///         // Since 'Self' is 'Sized', there is no metadata.
168    ///         addr.cast::<Self>()
169    ///     }
170    /// }
171    /// ```
172    ///
173    /// # Invariants
174    ///
175    /// For the statement `let result = Self::ptr_with_addr(ptr, addr);`, the
176    /// following always hold:
177    ///
178    /// - `result as usize == addr as usize`.
179    /// - `core::ptr::metadata(result) == core::ptr::metadata(ptr)`.
180    ///
181    /// It is undefined behaviour for an implementation of [`UnsizedCopy`] to
182    /// break these invariants.
183    fn ptr_with_addr(&self, addr: *const ()) -> *const Self;
184}
185
186/// Deriving [`UnsizedCopy`] automatically.
187///
188/// [`UnsizedCopy`] can be derived on any aggregate type.  `enum`s and
189/// `union`s are inherently [`Sized`] types, and [`UnsizedCopy`] will simply
190/// require every field to implement [`Copy`] on them.  For `struct`s, all but
191/// the last field need to implement [`Copy`]; the last field needs to
192/// implement [`UnsizedCopy`].
193///
194/// Here's a simple example:
195///
196/// ```no_run
197/// # use domain::utils::dst::UnsizedCopy;
198/// struct Foo<T: ?Sized> {
199///     a: u32,
200///     b: Bar<T>,
201/// }
202///
203/// # struct Bar<T: ?Sized> { data: T }
204///
205/// // The generated impl with 'derive(UnsizedCopy)':
206/// unsafe impl<T: ?Sized> UnsizedCopy for Foo<T>
207/// where
208///     u32: Copy,
209///     Bar<T>: UnsizedCopy,
210/// {
211///     // This type has the same alignment as 'Foo<T>'.
212///     type Alignment = (u32, <Bar<T> as UnsizedCopy>::Alignment);
213///
214///     fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
215///         self.b.ptr_with_addr(addr) as *const Self
216///     }
217/// }
218/// ```
219pub use domain_macros::UnsizedCopy;
220
221macro_rules! impl_primitive_unsized_copy {
222    ($($type:ty),+) => {
223        $(unsafe impl UnsizedCopy for $type {
224            type Alignment = Self;
225
226            fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
227                addr.cast::<Self>()
228            }
229        })+
230    };
231}
232
233impl_primitive_unsized_copy!((), bool, char);
234impl_primitive_unsized_copy!(u8, u16, u32, u64, u128, usize);
235impl_primitive_unsized_copy!(i8, i16, i32, i64, i128, isize);
236impl_primitive_unsized_copy!(f32, f64);
237
238unsafe impl<T: ?Sized> UnsizedCopy for &T {
239    type Alignment = Self;
240
241    fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
242        addr.cast::<Self>()
243    }
244}
245
246unsafe impl UnsizedCopy for str {
247    // 'str' has no alignment.
248    type Alignment = u8;
249
250    fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
251        // NOTE: The Rust Reference indicates that 'str' has the same layout
252        // as '[u8]' [1].  This is also the most natural layout for it.  Since
253        // there's no way to construct a '*const str' from raw parts, we will
254        // just construct a raw slice and transmute it.
255        //
256        // [1]: https://doc.rust-lang.org/reference/type-layout.html#str-layout
257
258        self.as_bytes().ptr_with_addr(addr) as *const Self
259    }
260}
261
262unsafe impl<T: UnsizedCopy> UnsizedCopy for [T] {
263    type Alignment = T;
264
265    fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
266        core::ptr::slice_from_raw_parts(addr.cast::<T>(), self.len())
267    }
268}
269
270unsafe impl<T: UnsizedCopy, const N: usize> UnsizedCopy for [T; N] {
271    type Alignment = T;
272
273    fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
274        addr.cast::<Self>()
275    }
276}
277
278macro_rules! impl_unsized_copy_tuple {
279    ($($type:ident),*; $last:ident) => {
280        unsafe impl<$($type: Copy,)* $last: ?Sized + UnsizedCopy>
281        UnsizedCopy for ($($type,)* $last,) {
282            type Alignment = ($($type,)* <$last>::Alignment,);
283
284            fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
285                let (.., last) = self;
286                last.ptr_with_addr(addr) as *const Self
287            }
288        }
289    };
290}
291
292impl_unsized_copy_tuple!(; A);
293impl_unsized_copy_tuple!(A; B);
294impl_unsized_copy_tuple!(A, B; C);
295impl_unsized_copy_tuple!(A, B, C; D);
296impl_unsized_copy_tuple!(A, B, C, D; E);
297impl_unsized_copy_tuple!(A, B, C, D, E; F);
298impl_unsized_copy_tuple!(A, B, C, D, E, F; G);
299impl_unsized_copy_tuple!(A, B, C, D, E, F, G; H);
300impl_unsized_copy_tuple!(A, B, C, D, E, F, G, H; I);
301impl_unsized_copy_tuple!(A, B, C, D, E, F, G, H, I; J);
302impl_unsized_copy_tuple!(A, B, C, D, E, F, G, H, I, J; K);
303impl_unsized_copy_tuple!(A, B, C, D, E, F, G, H, I, J, K; L);
304
305//----------- UnsizedCopyFrom ------------------------------------------------
306
307/// A container type that can be copied into.
308pub trait UnsizedCopyFrom: Sized {
309    /// The source type to copy from.
310    type Source: ?Sized + UnsizedCopy;
311
312    /// Create a new `Self` by copying the given value.
313    fn unsized_copy_from(value: &Self::Source) -> Self;
314}
315
316#[cfg(feature = "std")]
317impl<T: ?Sized + UnsizedCopy> UnsizedCopyFrom for std::boxed::Box<T> {
318    type Source = T;
319
320    fn unsized_copy_from(value: &Self::Source) -> Self {
321        use std::alloc;
322
323        let layout = alloc::Layout::for_value(value);
324        let ptr = unsafe { alloc::alloc(layout) };
325        if ptr.is_null() {
326            alloc::handle_alloc_error(layout);
327        }
328        let src = value as *const _ as *const u8;
329        unsafe { core::ptr::copy_nonoverlapping(src, ptr, layout.size()) };
330        let ptr = value.ptr_with_addr(ptr.cast()).cast_mut();
331        unsafe { std::boxed::Box::from_raw(ptr) }
332    }
333}
334
335#[cfg(feature = "std")]
336impl<T: ?Sized + UnsizedCopy> UnsizedCopyFrom for std::rc::Rc<T> {
337    type Source = T;
338
339    fn unsized_copy_from(value: &Self::Source) -> Self {
340        use core::mem::MaybeUninit;
341
342        /// A [`u8`] with a custom alignment.
343        #[derive(Copy, Clone)]
344        #[repr(C)]
345        struct AlignedU8<T>([T; 0], u8);
346
347        // TODO(1.82): Use 'Rc::new_uninit_slice()'.
348        // 'impl FromIterator for Rc' describes performance characteristics.
349        // For efficiency, the iterator should implement 'TrustedLen', which
350        // is (currently) a nightly-only trait.  However, we can use the
351        // existing 'std' types which happen to implement it.
352        let size = core::mem::size_of_val(value);
353        let rc: std::rc::Rc<[MaybeUninit<AlignedU8<T::Alignment>>]> =
354            (0..size).map(|_| MaybeUninit::uninit()).collect();
355
356        let src = value as *const _ as *const u8;
357        let dst = std::rc::Rc::into_raw(rc).cast_mut();
358        // SAFETY: 'rc' was just constructed and has never been copied.  Thus,
359        //   its contents can be mutated without violating any references.
360        unsafe { core::ptr::copy_nonoverlapping(src, dst.cast(), size) };
361
362        let ptr = value.ptr_with_addr(dst.cast());
363        unsafe { std::rc::Rc::from_raw(ptr) }
364    }
365}
366
367#[cfg(feature = "std")]
368impl<T: ?Sized + UnsizedCopy> UnsizedCopyFrom for std::sync::Arc<T> {
369    type Source = T;
370
371    fn unsized_copy_from(value: &Self::Source) -> Self {
372        use core::mem::MaybeUninit;
373
374        /// A [`u8`] with a custom alignment.
375        #[derive(Copy, Clone)]
376        #[repr(C)]
377        struct AlignedU8<T>([T; 0], u8);
378
379        // TODO(1.82): Use 'Arc::new_uninit_slice()'.
380        // 'impl FromIterator for Arc' describes performance characteristics.
381        // For efficiency, the iterator should implement 'TrustedLen', which
382        // is (currently) a nightly-only trait.  However, we can use the
383        // existing 'std' types which happen to implement it.
384        let size = core::mem::size_of_val(value);
385        let arc: std::sync::Arc<[MaybeUninit<AlignedU8<T::Alignment>>]> =
386            (0..size).map(|_| MaybeUninit::uninit()).collect();
387
388        let src = value as *const _ as *const u8;
389        let dst = std::sync::Arc::into_raw(arc).cast_mut();
390        // SAFETY: 'arc' was just constructed and has never been copied.  Thus,
391        //   its contents can be mutated without violating any references.
392        unsafe { core::ptr::copy_nonoverlapping(src, dst.cast(), size) };
393
394        let ptr = value.ptr_with_addr(dst.cast());
395        unsafe { std::sync::Arc::from_raw(ptr) }
396    }
397}
398
399#[cfg(feature = "std")]
400impl<T: UnsizedCopy> UnsizedCopyFrom for std::vec::Vec<T> {
401    type Source = [T];
402
403    fn unsized_copy_from(value: &Self::Source) -> Self {
404        // We can't use 'impl From<&[T]> for Vec<T>', because that requires
405        // 'T' to implement 'Clone'.  We could reuse the 'UnsizedCopyFrom'
406        // impl for 'Box', but a manual implementation is probably better.
407
408        let mut this = Self::with_capacity(value.len());
409        let src = value.as_ptr();
410        let dst = this.spare_capacity_mut() as *mut _ as *mut T;
411        unsafe { core::ptr::copy_nonoverlapping(src, dst, value.len()) };
412        // SAFETY: The first 'value.len()' elements are now initialized.
413        unsafe { this.set_len(value.len()) };
414        this
415    }
416}
417
418#[cfg(feature = "std")]
419impl UnsizedCopyFrom for std::string::String {
420    type Source = str;
421
422    fn unsized_copy_from(value: &Self::Source) -> Self {
423        value.into()
424    }
425}
426
427//----------- copy_to_bump ---------------------------------------------------
428
429/// Copy a value into a [`Bump`] allocator.
430///
431/// This works with [`UnsizedCopy`] values, which extends [`Bump`]'s native
432/// functionality.
433///
434/// [`Bump`]: bumpalo::Bump
435#[cfg(feature = "bumpalo")]
436#[allow(clippy::mut_from_ref)] // using a memory allocator
437pub fn copy_to_bump<'a, T: ?Sized + UnsizedCopy>(
438    value: &T,
439    bump: &'a bumpalo::Bump,
440) -> &'a mut T {
441    let layout = std::alloc::Layout::for_value(value);
442    let ptr = bump.alloc_layout(layout).as_ptr();
443    let src = value as *const _ as *const u8;
444    unsafe { core::ptr::copy_nonoverlapping(src, ptr, layout.size()) };
445    let ptr = value.ptr_with_addr(ptr.cast()).cast_mut();
446    unsafe { &mut *ptr }
447}