lz4_flex/block/
hashtable.rs

1#[allow(unused_imports)]
2use alloc::boxed::Box;
3
4/// The Hashtable trait used by the compression to store hashed bytes to their position.
5/// `val` can be maximum the size of the input in bytes.
6///
7/// `pos` can have a maximum value of u16::MAX or 65535
8/// If the hashtable is smaller it needs to reduce the pos to its space, e.g. by right
9/// shifting.
10///
11/// Duplication dictionary size.
12///
13/// Every four bytes is assigned an entry. When this number is lower, fewer entries exists, and
14/// thus collisions are more likely, hurting the compression ratio.
15
16/// hashes and right shifts to a maximum value of 16bit, 65535
17/// The right shift is done in order to not exceed, the hashtables capacity
18#[inline]
19fn hash(sequence: u32) -> u32 {
20    (sequence.wrapping_mul(2654435761_u32)) >> 16
21}
22
23/// hashes and right shifts to a maximum value of 16bit, 65535
24/// The right shift is done in order to not exceed, the hashtables capacity
25#[cfg(target_pointer_width = "64")]
26#[inline]
27fn hash5(sequence: usize) -> u32 {
28    let primebytes = if cfg!(target_endian = "little") {
29        889523592379_usize
30    } else {
31        11400714785074694791_usize
32    };
33    (((sequence << 24).wrapping_mul(primebytes)) >> 48) as u32
34}
35
36pub trait HashTable {
37    fn get_at(&self, pos: usize) -> usize;
38    fn put_at(&mut self, pos: usize, val: usize);
39    #[allow(dead_code)]
40    fn clear(&mut self);
41    #[inline]
42    #[cfg(target_pointer_width = "64")]
43    fn get_hash_at(input: &[u8], pos: usize) -> usize {
44        hash5(super::compress::get_batch_arch(input, pos)) as usize
45    }
46    #[inline]
47    #[cfg(target_pointer_width = "32")]
48    fn get_hash_at(input: &[u8], pos: usize) -> usize {
49        hash(super::compress::get_batch(input, pos)) as usize
50    }
51}
52
53const HASHTABLE_SIZE_4K: usize = 4 * 1024;
54const HASHTABLE_BIT_SHIFT_4K: usize = 4;
55
56#[derive(Debug)]
57#[repr(align(64))]
58pub struct HashTable4KU16 {
59    dict: Box<[u16; HASHTABLE_SIZE_4K]>,
60}
61impl HashTable4KU16 {
62    #[inline]
63    pub fn new() -> Self {
64        // This generates more efficient assembly in contrast to Box::new(slice), because of an
65        // optmized call alloc_zeroed, vs. alloc + memset
66        // try_into is optimized away
67        let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
68            .into_boxed_slice()
69            .try_into()
70            .unwrap();
71        Self { dict }
72    }
73}
74impl HashTable for HashTable4KU16 {
75    #[inline]
76    fn get_at(&self, hash: usize) -> usize {
77        self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize
78    }
79    #[inline]
80    fn put_at(&mut self, hash: usize, val: usize) {
81        self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u16;
82    }
83    #[inline]
84    fn clear(&mut self) {
85        self.dict.fill(0);
86    }
87    #[inline]
88    fn get_hash_at(input: &[u8], pos: usize) -> usize {
89        hash(super::get_batch(input, pos)) as usize
90    }
91}
92
93#[derive(Debug)]
94pub struct HashTable4K {
95    dict: Box<[u32; HASHTABLE_SIZE_4K]>,
96}
97impl HashTable4K {
98    #[inline]
99    pub fn new() -> Self {
100        let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
101            .into_boxed_slice()
102            .try_into()
103            .unwrap();
104        Self { dict }
105    }
106
107    #[cold]
108    #[allow(dead_code)]
109    pub fn reposition(&mut self, offset: u32) {
110        for i in self.dict.iter_mut() {
111            *i = i.saturating_sub(offset);
112        }
113    }
114}
115impl HashTable for HashTable4K {
116    #[inline]
117    fn get_at(&self, hash: usize) -> usize {
118        self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize
119    }
120    #[inline]
121    fn put_at(&mut self, hash: usize, val: usize) {
122        self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u32;
123    }
124    #[inline]
125    fn clear(&mut self) {
126        self.dict.fill(0);
127    }
128}
129
130const HASHTABLE_SIZE_8K: usize = 8 * 1024;
131const HASH_TABLE_BIT_SHIFT_8K: usize = 3;
132
133#[derive(Debug)]
134pub struct HashTable8K {
135    dict: Box<[u32; HASHTABLE_SIZE_8K]>,
136}
137#[allow(dead_code)]
138impl HashTable8K {
139    #[inline]
140    pub fn new() -> Self {
141        let dict = alloc::vec![0; HASHTABLE_SIZE_8K]
142            .into_boxed_slice()
143            .try_into()
144            .unwrap();
145
146        Self { dict }
147    }
148}
149impl HashTable for HashTable8K {
150    #[inline]
151    fn get_at(&self, hash: usize) -> usize {
152        self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] as usize
153    }
154    #[inline]
155    fn put_at(&mut self, hash: usize, val: usize) {
156        self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] = val as u32;
157    }
158    #[inline]
159    fn clear(&mut self) {
160        self.dict.fill(0);
161    }
162}