domain/utils/dst.rs
1//! Working with dynamically sized types (DSTs).
2//!
3//! DSTs are types whose size is known at run-time instead of compile-time.
4//! The primary examples of this are slices and [`str`]. While Rust provides
5//! relatively good support for DSTs (e.g. they can be held by reference like
6//! any other type), it has some rough edges. The standard library tries to
7//! paper over these with helpful functions and trait impls, but it does not
8//! account for custom DST types. In particular, [`new::base`] introduces a
9//! large number of user-facing DSTs and needs to paper over the same rough
10//! edges for all of them.
11//!
12//! [`new::base`]: crate::new::base
13//!
14//! ## Coping DSTs
15//!
16//! Because DSTs cannot be held by value, they must be handled and manipulated
17//! through an indirection (a reference or a smart pointer of some kind).
18//! Copying a DST into new container (e.g. [`Box`]) requires explicit support
19//! from that container type.
20//!
21//! [`Box`]: https://doc.rust-lang.org/std/boxed/struct.Box.html
22//!
23//! This module introduces the [`UnsizedCopy`] trait (and a derive macro) that
24//! types like [`str`] implement. Container types that can support copying
25//! DSTs implement [`UnsizedCopyFrom`].
26//
27// TODO: Example
28
29//----------- UnsizedCopy ----------------------------------------------------
30
31/// An extension of [`Copy`] to dynamically sized types.
32///
33/// This is a generalization of [`Copy`]. It is intended to simplify working
34/// with DSTs that support zero-copy parsing techniques (as these are built
35/// from byte sequences, they are inherently trivial to copy).
36///
37/// # Usage
38///
39/// To copy a type, call [`UnsizedCopy::unsized_copy_into()`] on the DST being
40/// copied, or call [`UnsizedCopyFrom::unsized_copy_from()`] on the container
41/// type to copy into. The two function identically.
42///
43#[cfg_attr(
44 feature = "bumpalo",
45 doc = "The [`copy_to_bump()`] function is useful for copying data into [`bumpalo`]-based allocations."
46)]
47///
48/// # Safety
49///
50/// A type `T` can implement `UnsizedCopy` if all of the following hold:
51///
52/// - It is an aggregate type (`struct`, `enum`, or `union`) and every field
53/// implements [`UnsizedCopy`].
54///
55/// - `T::Alignment` has exactly the same alignment as `T`.
56///
57/// - `T::ptr_with_addr()` satisfies the documented invariants.
58pub unsafe trait UnsizedCopy {
59 /// Copy `self` into a new container.
60 ///
61 /// A new container of the specified type (which is usually inferred) is
62 /// allocated, and the contents of `self` are copied into it. This is a
63 /// convenience method that calls [`unsized_copy_from()`].
64 ///
65 /// [`unsized_copy_from()`]: UnsizedCopyFrom::unsized_copy_from().
66 #[inline]
67 fn unsized_copy_into<T: UnsizedCopyFrom<Source = Self>>(&self) -> T {
68 T::unsized_copy_from(self)
69 }
70
71 /// Copy `self` and return it by value.
72 ///
73 /// This offers equivalent functionality to the regular [`Copy`] trait,
74 /// which is also why it has the same [`Sized`] bound.
75 #[inline]
76 fn copy(&self) -> Self
77 where
78 Self: Sized,
79 {
80 // The compiler can't tell that 'Self' is 'Copy', so we're just going
81 // to copy it manually. Hopefully this optimizes fine.
82
83 // SAFETY: 'self' is a valid reference, and is thus safe for reads.
84 unsafe { core::ptr::read(self) }
85 }
86
87 /// A type with the same alignment as `Self`.
88 ///
89 /// At the moment, Rust does not provide a way to determine the alignment
90 /// of a dynamically sized type at compile-time. This restriction exists
91 /// because trait objects (which count as DSTs, but are not supported by
92 /// [`UnsizedCopy`]) have an alignment determined by their implementation
93 /// (which can vary at runtime).
94 ///
95 /// This associated type papers over this limitation, by simply requiring
96 /// every implementation of [`UnsizedCopy`] to specify a type with the
97 /// same alignment here. This is used by internal plumbing code to know
98 /// the alignment of `Self` at compile-time.
99 ///
100 /// ## Invariants
101 ///
102 /// The alignment of `Self::Alignment` must be the same as that of `Self`.
103 type Alignment: Sized;
104
105 /// Change the address of a pointer to `Self`.
106 ///
107 /// `Self` may be a DST, which means that references (and pointers) to it
108 /// store metadata alongside the usual memory address. For example, the
109 /// metadata for a slice type is its length. In order to construct a new
110 /// instance of `Self` (as is done by copying), a new pointer must be
111 /// created, and the appropriate metadata must be inserted.
112 ///
113 /// At the moment, Rust does not provide a way to examine this metadata
114 /// for an arbitrary type. This method papers over this limitation, and
115 /// provides a way to copy the metadata from an existing pointer while
116 /// changing the pointer address.
117 ///
118 /// # Implementing
119 ///
120 /// Most users will derive [`UnsizedCopy`] and so don't need to worry
121 /// about this. In any case, when Rust builds in support for extracting
122 /// metadata, this function will gain a default implementation, and will
123 /// eventually be deprecated.
124 ///
125 /// For manual implementations for unsized types:
126 ///
127 /// ```no_run
128 /// # use domain::utils::dst::UnsizedCopy;
129 /// #
130 /// pub struct Foo {
131 /// a: i32,
132 /// b: [u8],
133 /// }
134 ///
135 /// unsafe impl UnsizedCopy for Foo {
136 /// // We would like to write 'Alignment = Self' here, but we can't
137 /// // because 'Self' is not 'Sized'. However, 'Self' is a 'struct'
138 /// // using 'repr(Rust)'; the following tuple (which implicitly also
139 /// // uses 'repr(Rust)') has the same alignment as it.
140 /// type Alignment = (i32, u8);
141 ///
142 /// fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
143 /// // Delegate to the same function on the last field.
144 /// //
145 /// // Rust knows that 'Self' has the same metadata as '[u8]',
146 /// // and so permits casting pointers between those types.
147 /// self.b.ptr_with_addr(addr) as *const Self
148 /// }
149 /// }
150 /// ```
151 ///
152 /// For manual implementations for sized types:
153 ///
154 /// ```no_run
155 /// # use domain::utils::dst::UnsizedCopy;
156 /// #
157 /// pub struct Foo {
158 /// a: i32,
159 /// b: Option<f64>,
160 /// }
161 ///
162 /// unsafe impl UnsizedCopy for Foo {
163 /// // Because 'Foo' is a sized type, we can use it here directly.
164 /// type Alignment = Self;
165 ///
166 /// fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
167 /// // Since 'Self' is 'Sized', there is no metadata.
168 /// addr.cast::<Self>()
169 /// }
170 /// }
171 /// ```
172 ///
173 /// # Invariants
174 ///
175 /// For the statement `let result = Self::ptr_with_addr(ptr, addr);`, the
176 /// following always hold:
177 ///
178 /// - `result as usize == addr as usize`.
179 /// - `core::ptr::metadata(result) == core::ptr::metadata(ptr)`.
180 ///
181 /// It is undefined behaviour for an implementation of [`UnsizedCopy`] to
182 /// break these invariants.
183 fn ptr_with_addr(&self, addr: *const ()) -> *const Self;
184}
185
186/// Deriving [`UnsizedCopy`] automatically.
187///
188/// [`UnsizedCopy`] can be derived on any aggregate type. `enum`s and
189/// `union`s are inherently [`Sized`] types, and [`UnsizedCopy`] will simply
190/// require every field to implement [`Copy`] on them. For `struct`s, all but
191/// the last field need to implement [`Copy`]; the last field needs to
192/// implement [`UnsizedCopy`].
193///
194/// Here's a simple example:
195///
196/// ```no_run
197/// # use domain::utils::dst::UnsizedCopy;
198/// struct Foo<T: ?Sized> {
199/// a: u32,
200/// b: Bar<T>,
201/// }
202///
203/// # struct Bar<T: ?Sized> { data: T }
204///
205/// // The generated impl with 'derive(UnsizedCopy)':
206/// unsafe impl<T: ?Sized> UnsizedCopy for Foo<T>
207/// where
208/// u32: Copy,
209/// Bar<T>: UnsizedCopy,
210/// {
211/// // This type has the same alignment as 'Foo<T>'.
212/// type Alignment = (u32, <Bar<T> as UnsizedCopy>::Alignment);
213///
214/// fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
215/// self.b.ptr_with_addr(addr) as *const Self
216/// }
217/// }
218/// ```
219pub use domain_macros::UnsizedCopy;
220
221macro_rules! impl_primitive_unsized_copy {
222 ($($type:ty),+) => {
223 $(unsafe impl UnsizedCopy for $type {
224 type Alignment = Self;
225
226 fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
227 addr.cast::<Self>()
228 }
229 })+
230 };
231}
232
233impl_primitive_unsized_copy!((), bool, char);
234impl_primitive_unsized_copy!(u8, u16, u32, u64, u128, usize);
235impl_primitive_unsized_copy!(i8, i16, i32, i64, i128, isize);
236impl_primitive_unsized_copy!(f32, f64);
237
238unsafe impl<T: ?Sized> UnsizedCopy for &T {
239 type Alignment = Self;
240
241 fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
242 addr.cast::<Self>()
243 }
244}
245
246unsafe impl UnsizedCopy for str {
247 // 'str' has no alignment.
248 type Alignment = u8;
249
250 fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
251 // NOTE: The Rust Reference indicates that 'str' has the same layout
252 // as '[u8]' [1]. This is also the most natural layout for it. Since
253 // there's no way to construct a '*const str' from raw parts, we will
254 // just construct a raw slice and transmute it.
255 //
256 // [1]: https://doc.rust-lang.org/reference/type-layout.html#str-layout
257
258 self.as_bytes().ptr_with_addr(addr) as *const Self
259 }
260}
261
262unsafe impl<T: UnsizedCopy> UnsizedCopy for [T] {
263 type Alignment = T;
264
265 fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
266 core::ptr::slice_from_raw_parts(addr.cast::<T>(), self.len())
267 }
268}
269
270unsafe impl<T: UnsizedCopy, const N: usize> UnsizedCopy for [T; N] {
271 type Alignment = T;
272
273 fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
274 addr.cast::<Self>()
275 }
276}
277
278macro_rules! impl_unsized_copy_tuple {
279 ($($type:ident),*; $last:ident) => {
280 unsafe impl<$($type: Copy,)* $last: ?Sized + UnsizedCopy>
281 UnsizedCopy for ($($type,)* $last,) {
282 type Alignment = ($($type,)* <$last>::Alignment,);
283
284 fn ptr_with_addr(&self, addr: *const ()) -> *const Self {
285 let (.., last) = self;
286 last.ptr_with_addr(addr) as *const Self
287 }
288 }
289 };
290}
291
292impl_unsized_copy_tuple!(; A);
293impl_unsized_copy_tuple!(A; B);
294impl_unsized_copy_tuple!(A, B; C);
295impl_unsized_copy_tuple!(A, B, C; D);
296impl_unsized_copy_tuple!(A, B, C, D; E);
297impl_unsized_copy_tuple!(A, B, C, D, E; F);
298impl_unsized_copy_tuple!(A, B, C, D, E, F; G);
299impl_unsized_copy_tuple!(A, B, C, D, E, F, G; H);
300impl_unsized_copy_tuple!(A, B, C, D, E, F, G, H; I);
301impl_unsized_copy_tuple!(A, B, C, D, E, F, G, H, I; J);
302impl_unsized_copy_tuple!(A, B, C, D, E, F, G, H, I, J; K);
303impl_unsized_copy_tuple!(A, B, C, D, E, F, G, H, I, J, K; L);
304
305//----------- UnsizedCopyFrom ------------------------------------------------
306
307/// A container type that can be copied into.
308pub trait UnsizedCopyFrom: Sized {
309 /// The source type to copy from.
310 type Source: ?Sized + UnsizedCopy;
311
312 /// Create a new `Self` by copying the given value.
313 fn unsized_copy_from(value: &Self::Source) -> Self;
314}
315
316#[cfg(feature = "std")]
317impl<T: ?Sized + UnsizedCopy> UnsizedCopyFrom for std::boxed::Box<T> {
318 type Source = T;
319
320 fn unsized_copy_from(value: &Self::Source) -> Self {
321 use std::alloc;
322
323 let layout = alloc::Layout::for_value(value);
324 let ptr = unsafe { alloc::alloc(layout) };
325 if ptr.is_null() {
326 alloc::handle_alloc_error(layout);
327 }
328 let src = value as *const _ as *const u8;
329 unsafe { core::ptr::copy_nonoverlapping(src, ptr, layout.size()) };
330 let ptr = value.ptr_with_addr(ptr.cast()).cast_mut();
331 unsafe { std::boxed::Box::from_raw(ptr) }
332 }
333}
334
335#[cfg(feature = "std")]
336impl<T: ?Sized + UnsizedCopy> UnsizedCopyFrom for std::rc::Rc<T> {
337 type Source = T;
338
339 fn unsized_copy_from(value: &Self::Source) -> Self {
340 use core::mem::MaybeUninit;
341
342 /// A [`u8`] with a custom alignment.
343 #[derive(Copy, Clone)]
344 #[repr(C)]
345 struct AlignedU8<T>([T; 0], u8);
346
347 // TODO(1.82): Use 'Rc::new_uninit_slice()'.
348 // 'impl FromIterator for Rc' describes performance characteristics.
349 // For efficiency, the iterator should implement 'TrustedLen', which
350 // is (currently) a nightly-only trait. However, we can use the
351 // existing 'std' types which happen to implement it.
352 let size = core::mem::size_of_val(value);
353 let rc: std::rc::Rc<[MaybeUninit<AlignedU8<T::Alignment>>]> =
354 (0..size).map(|_| MaybeUninit::uninit()).collect();
355
356 let src = value as *const _ as *const u8;
357 let dst = std::rc::Rc::into_raw(rc).cast_mut();
358 // SAFETY: 'rc' was just constructed and has never been copied. Thus,
359 // its contents can be mutated without violating any references.
360 unsafe { core::ptr::copy_nonoverlapping(src, dst.cast(), size) };
361
362 let ptr = value.ptr_with_addr(dst.cast());
363 unsafe { std::rc::Rc::from_raw(ptr) }
364 }
365}
366
367#[cfg(feature = "std")]
368impl<T: ?Sized + UnsizedCopy> UnsizedCopyFrom for std::sync::Arc<T> {
369 type Source = T;
370
371 fn unsized_copy_from(value: &Self::Source) -> Self {
372 use core::mem::MaybeUninit;
373
374 /// A [`u8`] with a custom alignment.
375 #[derive(Copy, Clone)]
376 #[repr(C)]
377 struct AlignedU8<T>([T; 0], u8);
378
379 // TODO(1.82): Use 'Arc::new_uninit_slice()'.
380 // 'impl FromIterator for Arc' describes performance characteristics.
381 // For efficiency, the iterator should implement 'TrustedLen', which
382 // is (currently) a nightly-only trait. However, we can use the
383 // existing 'std' types which happen to implement it.
384 let size = core::mem::size_of_val(value);
385 let arc: std::sync::Arc<[MaybeUninit<AlignedU8<T::Alignment>>]> =
386 (0..size).map(|_| MaybeUninit::uninit()).collect();
387
388 let src = value as *const _ as *const u8;
389 let dst = std::sync::Arc::into_raw(arc).cast_mut();
390 // SAFETY: 'arc' was just constructed and has never been copied. Thus,
391 // its contents can be mutated without violating any references.
392 unsafe { core::ptr::copy_nonoverlapping(src, dst.cast(), size) };
393
394 let ptr = value.ptr_with_addr(dst.cast());
395 unsafe { std::sync::Arc::from_raw(ptr) }
396 }
397}
398
399#[cfg(feature = "std")]
400impl<T: UnsizedCopy> UnsizedCopyFrom for std::vec::Vec<T> {
401 type Source = [T];
402
403 fn unsized_copy_from(value: &Self::Source) -> Self {
404 // We can't use 'impl From<&[T]> for Vec<T>', because that requires
405 // 'T' to implement 'Clone'. We could reuse the 'UnsizedCopyFrom'
406 // impl for 'Box', but a manual implementation is probably better.
407
408 let mut this = Self::with_capacity(value.len());
409 let src = value.as_ptr();
410 let dst = this.spare_capacity_mut() as *mut _ as *mut T;
411 unsafe { core::ptr::copy_nonoverlapping(src, dst, value.len()) };
412 // SAFETY: The first 'value.len()' elements are now initialized.
413 unsafe { this.set_len(value.len()) };
414 this
415 }
416}
417
418#[cfg(feature = "std")]
419impl UnsizedCopyFrom for std::string::String {
420 type Source = str;
421
422 fn unsized_copy_from(value: &Self::Source) -> Self {
423 value.into()
424 }
425}
426
427//----------- copy_to_bump ---------------------------------------------------
428
429/// Copy a value into a [`Bump`] allocator.
430///
431/// This works with [`UnsizedCopy`] values, which extends [`Bump`]'s native
432/// functionality.
433///
434/// [`Bump`]: bumpalo::Bump
435#[cfg(feature = "bumpalo")]
436#[allow(clippy::mut_from_ref)] // using a memory allocator
437pub fn copy_to_bump<'a, T: ?Sized + UnsizedCopy>(
438 value: &T,
439 bump: &'a bumpalo::Bump,
440) -> &'a mut T {
441 let layout = std::alloc::Layout::for_value(value);
442 let ptr = bump.alloc_layout(layout).as_ptr();
443 let src = value as *const _ as *const u8;
444 unsafe { core::ptr::copy_nonoverlapping(src, ptr, layout.size()) };
445 let ptr = value.ptr_with_addr(ptr.cast()).cast_mut();
446 unsafe { &mut *ptr }
447}