mz_ore/
id_gen.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16//! ID generation utilities.
17
18use hibitset::BitSet;
19use std::borrow::Borrow;
20use std::fmt;
21use std::hash::Hash;
22use std::marker::PhantomData;
23use std::ops::{AddAssign, Sub};
24use std::sync::atomic::{AtomicU64, Ordering};
25use std::sync::{Arc, Mutex};
26use uuid::Uuid;
27
28use rand::rngs::StdRng;
29use rand::{Rng, SeedableRng};
30
31use crate::cast::CastFrom;
32
33/// Manages the allocation of unique IDs.
34#[derive(Debug, Clone)]
35pub struct Gen<Id> {
36    id: u64,
37    phantom: PhantomData<Id>,
38}
39
40impl<Id> Default for Gen<Id> {
41    fn default() -> Self {
42        Self {
43            id: 0,
44            phantom: PhantomData,
45        }
46    }
47}
48
49impl<Id: From<u64>> Gen<Id> {
50    /// Allocates a new identifier of type `Id` and advances the generator.
51    pub fn allocate_id(&mut self) -> Id {
52        let id = self.id;
53        self.id += 1;
54        id.into()
55    }
56}
57
58/// A generator of u64-bit IDs.
59pub type IdGen = Gen<u64>;
60
61/// Manages the allocation of unique IDs.
62///
63/// Atomic version of `Gen`, for sharing between threads.
64#[derive(Debug)]
65pub struct AtomicGen<Id> {
66    id: AtomicU64,
67    phantom: PhantomData<Id>,
68}
69
70impl<Id> Default for AtomicGen<Id> {
71    fn default() -> Self {
72        Self {
73            id: AtomicU64::new(0),
74            phantom: PhantomData,
75        }
76    }
77}
78
79impl<Id: From<u64> + Default> AtomicGen<Id> {
80    /// Allocates a new identifier of type `Id` and advances the generator.
81    pub fn allocate_id(&self) -> Id {
82        // The only purpose of the atomic here is to ensure every caller receives a distinct ID,
83        // there are no requirements on the order in which IDs are produced and there is no other
84        // state protected by this atomic. `Relaxed` ordering is therefore sufficient.
85        let id = self.id.fetch_add(1, Ordering::Relaxed);
86        id.into()
87    }
88}
89
90/// A generator of u64-bit IDs.
91///
92/// Atomic version of `IdGen`, for sharing between threads.
93pub type AtomicIdGen = AtomicGen<u64>;
94
95/// IdAllocator common traits.
96pub trait IdGenerator:
97    From<u8> + AddAssign + Sub + PartialOrd + Copy + Eq + Hash + Ord + serde::Serialize + fmt::Display
98{
99}
100
101impl<T> IdGenerator for T where
102    T: From<u8>
103        + AddAssign
104        + Sub
105        + PartialOrd
106        + Copy
107        + Eq
108        + Hash
109        + Ord
110        + serde::Serialize
111        + fmt::Display
112{
113}
114
115/// Manages allocation of numeric IDs.
116#[derive(Debug, Clone)]
117pub struct IdAllocator<A: IdAllocatorInner>(pub Arc<Mutex<A>>);
118
119/// Common trait for id allocators.
120pub trait IdAllocatorInner: std::fmt::Debug + Send {
121    /// Name of the allocator.
122    const NAME: &'static str;
123    /// Construct an allocator with the given range. Returned ids will be OR'd with `mask`. `mask`
124    /// must not have any bits that could be set by a number <= `max`.
125    fn new(min: u32, max: u32, mask: u32) -> Self;
126    /// Allocate a new id.
127    fn alloc(&mut self) -> Option<u32>;
128    /// Deallocate a used id, making it available for reuse.
129    fn remove(&mut self, id: u32);
130}
131
132/// IdAllocator using a HiBitSet.
133#[derive(Debug, Clone)]
134pub struct IdAllocatorInnerBitSet {
135    next: StdRng,
136    min: u32,
137    max: u32,
138    mask: u32,
139    used: BitSet,
140}
141
142impl IdAllocatorInner for IdAllocatorInnerBitSet {
143    const NAME: &'static str = "hibitset";
144
145    fn new(min: u32, max: u32, mask: u32) -> Self {
146        let total = usize::cast_from(max - min);
147        assert!(total < BitSet::BITS_PER_USIZE.pow(4));
148        IdAllocatorInnerBitSet {
149            next: StdRng::from_entropy(),
150            min,
151            max,
152            mask,
153            used: BitSet::new(),
154        }
155    }
156
157    fn alloc(&mut self) -> Option<u32> {
158        let range = self.min..=self.max;
159        let init = self.next.gen_range(range);
160        let mut next = init;
161        loop {
162            // Because hibitset has a hard maximum of 64**4 (~16 million), subtract the min in case
163            // max is above that. This is safe because we already asserted above that `max - min <
164            // 64**4`.
165            let stored = next - self.min;
166            if !self.used.add(stored) {
167                assert!(
168                    next & self.mask == 0,
169                    "chosen ID must not intersect with mask:\n{:#034b}\n{:#034b}",
170                    next,
171                    self.mask
172                );
173                return Some(next | self.mask);
174            }
175            // Existing value, increment and try again. Wrap once we hit max back to min.
176            next = if next == self.max { self.min } else { next + 1 };
177            // We fully wrapped around. BitSet doesn't have a rank or count method, so we can't
178            // compute this early.
179            if next == init {
180                return None;
181            }
182        }
183    }
184
185    fn remove(&mut self, id: u32) {
186        let id = (!self.mask) & id;
187        let stored = id - self.min;
188        self.used.remove(stored);
189    }
190}
191
192impl<A: IdAllocatorInner> IdAllocator<A> {
193    /// Creates a new `IdAllocator` that will assign IDs between `min` and
194    /// `max`, both inclusive.
195    pub fn new(min: u32, max: u32, mask: u32) -> IdAllocator<A> {
196        assert!(min <= max);
197        if mask != 0 && max > 0 {
198            // mask_check is all 1s in any bit set by all numbers >= max. Assert that the mask
199            // doesn't share any bits with those.
200            let mask_check = (1 << (max.ilog2() + 1)) - 1;
201            assert_eq!(mask & mask_check, 0, "max and mask share bits");
202        }
203        let inner = A::new(min, max, mask);
204        IdAllocator(Arc::new(Mutex::new(inner)))
205    }
206
207    /// Allocates a new ID randomly distributed between min and max.
208    ///
209    /// Returns `None` if the allocator is exhausted.
210    ///
211    /// The ID associated with the [`IdHandle`] will be freed when all of the
212    /// outstanding [`IdHandle`]s have been dropped.
213    pub fn alloc(&self) -> Option<IdHandle<u32, A>> {
214        let inner = Arc::new(internal::IdHandleInner::new(self)?);
215        Some(IdHandle::Dynamic(inner))
216    }
217
218    // Attempt to allocate a new ID. We want the ID to be randomly distributed in the range. To do
219    // this, choose a random candidate. Check if it's already in the used set, and increment in a
220    // loop until an unused slot is found. Ideally we could ask used set what its next used or
221    // unused id is after some given X, but that's not part of the API. This means that when the
222    // range is large and the used set is near full, we will spend a lot of cycles looking for an
223    // open slot. However, we limit the number of connections to the thousands, and connection IDs
224    // have 20 bits (~1 million) of space, so it is not currently possible to enter that state.
225    fn alloc_internal(&self) -> Option<u32> {
226        let mut inner = self.0.lock().expect("lock poisoned");
227        inner.alloc()
228    }
229
230    fn free_internal(&self, id: u32) {
231        let mut inner = self.0.lock().expect("lock poisoned");
232        inner.remove(id);
233    }
234}
235
236/// A clone-able owned reference to an ID.
237///
238/// Once all of the [`IdHandle`]s referencing an ID have been dropped, we will then free the ID
239/// for later re-use.
240#[derive(Debug)]
241pub enum IdHandle<T, A: IdAllocatorInner> {
242    /// An ID "allocated" at compile time.
243    ///
244    /// Note: It is *entirely* up to the caller to make sure the provided ID is
245    /// not used by a dynamic ID allocator.
246    Static(T),
247    /// An ID allocated at runtime, gets freed once all handles have been dropped.
248    Dynamic(Arc<internal::IdHandleInner<T, A>>),
249}
250
251impl<T: Clone, A: IdAllocatorInner> Clone for IdHandle<T, A> {
252    fn clone(&self) -> Self {
253        match self {
254            IdHandle::Static(t) => IdHandle::Static(t.clone()),
255            IdHandle::Dynamic(handle) => IdHandle::Dynamic(Arc::clone(handle)),
256        }
257    }
258}
259
260impl<T: IdGenerator, A: IdAllocatorInner> IdHandle<T, A> {
261    /// Returns the raw ID inside of this handle.
262    ///
263    /// Use with caution! It is easy for a raw ID to outlive the handle from
264    /// which it came. You are responsible for ensuring that your use of the raw
265    /// ID does not lead to ID reuse bugs.
266    pub fn unhandled(&self) -> T {
267        *self.borrow()
268    }
269}
270
271impl<T: IdGenerator, A: IdAllocatorInner> PartialEq for IdHandle<T, A> {
272    fn eq(&self, other: &Self) -> bool {
273        self.unhandled() == other.unhandled()
274    }
275}
276impl<T: IdGenerator, A: IdAllocatorInner> Eq for IdHandle<T, A> {}
277
278impl<T: IdGenerator, A: IdAllocatorInner> PartialOrd for IdHandle<T, A> {
279    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
280        Some(self.cmp(other))
281    }
282}
283
284impl<T: IdGenerator, A: IdAllocatorInner> Ord for IdHandle<T, A> {
285    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
286        self.unhandled().cmp(&other.unhandled())
287    }
288}
289
290impl<T, A: IdAllocatorInner> Borrow<T> for IdHandle<T, A> {
291    fn borrow(&self) -> &T {
292        match self {
293            IdHandle::Static(id) => id,
294            IdHandle::Dynamic(inner) => &inner.id,
295        }
296    }
297}
298
299impl<T: IdGenerator, A: IdAllocatorInner> fmt::Display for IdHandle<T, A> {
300    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
301        self.unhandled().fmt(f)
302    }
303}
304
305impl<T: IdGenerator, A: IdAllocatorInner> serde::Serialize for IdHandle<T, A> {
306    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
307    where
308        S: serde::Serializer,
309    {
310        self.unhandled().serialize(serializer)
311    }
312}
313
314mod internal {
315    use std::sync::Arc;
316
317    use crate::cast::CastFrom;
318    use crate::id_gen::{IdAllocator, IdAllocatorInner};
319
320    #[derive(Debug)]
321    pub struct IdHandleInner<T, A: IdAllocatorInner> {
322        /// A handle to the [`IdAllocator`] used to allocated the provided id.
323        pub(super) allocator: IdAllocator<A>,
324        /// The actual ID that was allocated.
325        pub(super) id: T,
326        stored: u32,
327    }
328
329    impl<T, A: IdAllocatorInner> IdHandleInner<T, A>
330    where
331        T: CastFrom<u32>,
332    {
333        pub fn new(allocator: &IdAllocator<A>) -> Option<Self> {
334            let stored = allocator.alloc_internal()?;
335            Some(IdHandleInner {
336                allocator: IdAllocator(Arc::clone(&allocator.0)),
337                id: T::cast_from(stored),
338                stored,
339            })
340        }
341    }
342
343    impl<T, A: IdAllocatorInner> Drop for IdHandleInner<T, A> {
344        fn drop(&mut self) {
345            // Release our ID for later re-use.
346            self.allocator.free_internal(self.stored);
347        }
348    }
349}
350
351/// Number of bits the org id is offset into a connection id.
352pub const ORG_ID_OFFSET: usize = 19;
353
354/// Max (inclusive) connection id that can be produced.
355pub const MAX_ORG_ID: u32 = (1 << ORG_ID_OFFSET) - 1;
356
357/// Extracts the lower 12 bits from an org id. These are later used as the [31, 20] bits of a
358/// connection id to help route cancellation requests.
359pub fn org_id_conn_bits(uuid: &Uuid) -> u32 {
360    let lower = uuid.as_u128();
361    let lower = (lower & 0xFFF) << ORG_ID_OFFSET;
362    let lower: u32 = lower.try_into().expect("must fit");
363    lower
364}
365
366/// Returns the portion of the org's UUID present in connection id.
367pub fn conn_id_org_uuid(conn_id: u32) -> String {
368    const UPPER: [char; 16] = [
369        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
370    ];
371
372    // Extract UUID from conn_id: upper 12 bits excluding the first.
373    let orgid = usize::try_from((conn_id >> ORG_ID_OFFSET) & 0xFFF).expect("must cast");
374    // Convert the bits into a 3 char string and inject into the resolver template.
375    let mut dst = String::with_capacity(3);
376    dst.push(UPPER[(orgid >> 8) & 0xf]);
377    dst.push(UPPER[(orgid >> 4) & 0xf]);
378    dst.push(UPPER[orgid & 0xf]);
379    dst
380}
381
382/// Generate a random temporary ID.
383///
384/// Concretely we generate a UUIDv4 and return the last 12 characters for maximum uniqueness.
385///
386/// Note: the reason we use the last 12 characters is because the bits 6, 7, and 12 - 15
387/// are all hard coded <https://www.rfc-editor.org/rfc/rfc4122#section-4.4>.
388/// ```
389/// use mz_ore::id_gen::temp_id;
390///
391/// let temp = temp_id();
392/// assert_eq!(temp.len(), 12);
393/// assert!(temp.is_ascii());
394/// ```
395pub fn temp_id() -> String {
396    let temp_uuid = uuid::Uuid::new_v4().as_hyphenated().to_string();
397    temp_uuid.chars().rev().take_while(|c| *c != '-').collect()
398}
399
400#[cfg(test)]
401mod tests {
402    use std::collections::BTreeMap;
403
404    use crate::assert_none;
405
406    use super::*;
407
408    #[crate::test]
409    fn test_conn_org() {
410        let uuid = Uuid::parse_str("9e37ec59-56f4-450a-acbd-18ff14f10ca8").unwrap();
411        let lower = org_id_conn_bits(&uuid);
412        let org_lower_uuid = conn_id_org_uuid(lower);
413        assert_eq!(org_lower_uuid, "CA8");
414    }
415
416    #[crate::test]
417    fn test_id_gen() {
418        test_ad_allocator::<IdAllocatorInnerBitSet>();
419    }
420
421    // Test masks and maxs that intersect panic.
422    #[crate::test]
423    #[should_panic]
424    fn test_mask_intersect<A: IdAllocatorInner>() {
425        let env_lower = org_id_conn_bits(&uuid::Uuid::from_u128(u128::MAX));
426        let ida = IdAllocator::<IdAllocatorInnerBitSet>::new(
427            1 << ORG_ID_OFFSET,
428            1 << ORG_ID_OFFSET,
429            env_lower,
430        );
431        let id = ida.alloc().unwrap();
432        assert_eq!(id.unhandled(), (0xfff << ORG_ID_OFFSET) | MAX_ORG_ID);
433    }
434
435    fn test_ad_allocator<A: IdAllocatorInner>() {
436        test_id_alloc::<A>();
437        test_static_id_sorting::<A>();
438        test_id_reuse::<A>();
439        test_display::<A>();
440        test_map_lookup::<A>();
441        test_serialization::<A>();
442        test_mask::<A>();
443        test_mask_envd::<A>();
444    }
445
446    fn test_mask<A: IdAllocatorInner>() {
447        let ida = IdAllocator::<A>::new(1, 1, 0xfff << 20);
448        let id = ida.alloc().unwrap();
449        assert_eq!(id.unhandled(), (0xfff << 20) | 1);
450    }
451
452    // Test that the random conn id and and uuid each with all bits set don't intersect.
453    fn test_mask_envd<A: IdAllocatorInner>() {
454        let env_lower = org_id_conn_bits(&uuid::Uuid::from_u128(u128::MAX));
455        let ida = IdAllocator::<A>::new(MAX_ORG_ID, MAX_ORG_ID, env_lower);
456        let id = ida.alloc().unwrap();
457        assert_eq!(id.unhandled(), (0xfff << ORG_ID_OFFSET) | MAX_ORG_ID);
458    }
459
460    fn test_id_alloc<A: IdAllocatorInner>() {
461        let ida = IdAllocator::<A>::new(3, 5, 0);
462        let id3 = ida.alloc().unwrap();
463        let id4 = ida.alloc().unwrap();
464        let id5 = ida.alloc().unwrap();
465        assert_ne!(id3, id4);
466        assert_ne!(id3, id5);
467        assert_ne!(id4, id5);
468        drop(id4);
469        let _id4 = ida.alloc().unwrap();
470        drop(id5);
471        drop(id3);
472        let _id5 = ida.alloc().unwrap();
473        let _id3 = ida.alloc().unwrap();
474        match ida.alloc() {
475            Some(id) => panic!(
476                "id allocator returned {}, not expected id exhaustion error",
477                id
478            ),
479            None => (),
480        }
481    }
482
483    fn test_static_id_sorting<A: IdAllocatorInner>() {
484        let ida = IdAllocator::<A>::new(0, 0, 0);
485        let id0 = ida.alloc().unwrap();
486        let id1 = IdHandle::Static(1);
487        assert!(id0 < id1);
488
489        let ida = IdAllocator::<A>::new(1, 1, 0);
490        let id0 = IdHandle::Static(0);
491        let id1 = ida.alloc().unwrap();
492        assert!(id0 < id1);
493    }
494
495    fn test_id_reuse<A: IdAllocatorInner>() {
496        let allocator = IdAllocator::<A>::new(10, 11, 0);
497
498        let id_a = allocator.alloc().unwrap();
499        let a = id_a.unhandled();
500        let id_a_clone = id_a.clone();
501        // a should not get freed.
502        drop(id_a);
503
504        // There are only two slots, so trying to allocate 2 more should fail the second time.
505        let _id_b = allocator.alloc().unwrap();
506        assert_none!(allocator.alloc());
507
508        // a should get freed since all outstanding references have been dropped.
509        drop(id_a_clone);
510
511        // We should re-use a.
512        let id_c = allocator.alloc().unwrap();
513        assert_eq!(id_c.unhandled(), a);
514    }
515
516    fn test_display<A: IdAllocatorInner>() {
517        let allocator = IdAllocator::<A>::new(65_000, 65_000, 0);
518
519        let id_a = allocator.alloc().unwrap();
520        assert_eq!(id_a.unhandled(), 65_000);
521
522        // An IdHandle should use the inner type's Display impl.
523        let id_display = format!("{id_a}");
524        let val_display = format!("{}", id_a.unhandled());
525
526        assert_eq!(id_display, val_display);
527    }
528
529    fn test_map_lookup<A: IdAllocatorInner>() {
530        let allocator = IdAllocator::<A>::new(99, 101, 0);
531
532        let id_a = allocator.alloc().unwrap();
533        let a = id_a.unhandled();
534
535        let mut btree = BTreeMap::new();
536        btree.insert(id_a, "hello world");
537
538        // We should be able to lookup an IdHandle, based on just the value.
539        let entry = btree.remove(&a).unwrap();
540        assert_eq!(entry, "hello world");
541
542        assert!(btree.is_empty());
543    }
544
545    fn test_serialization<A: IdAllocatorInner>() {
546        let allocator = IdAllocator::<A>::new(42, 42, 0);
547
548        let id_a = allocator.alloc().unwrap();
549        assert_eq!(id_a.unhandled(), 42);
550
551        // An IdHandle should serialize the same as the inner value.
552        let id_json = serde_json::to_string(&id_a).unwrap();
553        let val_json = serde_json::to_string(&id_a.unhandled()).unwrap();
554
555        assert_eq!(id_json, val_json);
556    }
557}