seahash/buffer.rs
1//! A highly optimized version of SeaHash.
2
3use std::slice;
4
5use helper;
6
7/// A SeaHash state.
8#[derive(Clone)]
9pub struct State {
10 /// `a`
11 a: u64,
12 /// `b`
13 b: u64,
14 /// `c`
15 c: u64,
16 /// `d`
17 d: u64,
18 /// The number of written bytes.
19 written: u64,
20}
21
22impl State {
23 /// Create a new state vector with some initial values.
24 pub fn new(a: u64, b: u64, c: u64, d: u64) -> State {
25 State {
26 a: a,
27 b: b,
28 c: c,
29 d: d,
30 written: 0,
31 }
32 }
33
34 /// Hash a buffer with some seed.
35 pub fn hash(buf: &[u8], (mut a, mut b, mut c, mut d): (u64, u64, u64, u64)) -> State {
36 unsafe {
37 // We use 4 different registers to store seperate hash states, because this allows us
38 // to update them seperately, and consequently exploiting ILP to update the states in
39 // parallel.
40
41 // The pointer to the current bytes.
42 let mut ptr = buf.as_ptr();
43 // The end of the "main segment", i.e. the biggest buffer s.t. the length is divisible
44 // by 32.
45 let end_ptr = buf.as_ptr().offset(buf.len() as isize & !0x1F);
46
47 while end_ptr > ptr {
48 // Modern CPUs allow the pointer arithmetic to be done in place, hence not
49 // introducing tmpvars.
50 a ^= helper::read_u64(ptr);
51 b ^= helper::read_u64(ptr.offset(8));
52 c ^= helper::read_u64(ptr.offset(16));
53 d ^= helper::read_u64(ptr.offset(24));
54
55 // Increment the pointer.
56 ptr = ptr.offset(32);
57
58 // Diffuse the updated registers. We hope that each of these are executed in
59 // parallel.
60 a = helper::diffuse(a);
61 b = helper::diffuse(b);
62 c = helper::diffuse(c);
63 d = helper::diffuse(d);
64 }
65
66 // Calculate the number of excessive bytes. These are bytes that could not be handled
67 // in the loop above.
68 let mut excessive = buf.len() as usize + buf.as_ptr() as usize - end_ptr as usize;
69 // Handle the excessive bytes.
70 match excessive {
71 0 => {}
72 1..=7 => {
73 // 1 or more excessive.
74
75 // Write the last excessive bytes (<8 bytes).
76 a ^= helper::read_int(slice::from_raw_parts(ptr as *const u8, excessive));
77
78 // Diffuse.
79 a = helper::diffuse(a);
80 }
81 8 => {
82 // 8 bytes excessive.
83
84 // Mix in the partial block.
85 a ^= helper::read_u64(ptr);
86
87 // Diffuse.
88 a = helper::diffuse(a);
89 }
90 9..=15 => {
91 // More than 8 bytes excessive.
92
93 // Mix in the partial block.
94 a ^= helper::read_u64(ptr);
95
96 // Write the last excessive bytes (<8 bytes).
97 excessive = excessive - 8;
98 b ^= helper::read_int(slice::from_raw_parts(ptr.offset(8), excessive));
99
100 // Diffuse.
101 a = helper::diffuse(a);
102 b = helper::diffuse(b);
103 }
104 16 => {
105 // 16 bytes excessive.
106
107 // Mix in the partial block.
108 a = helper::diffuse(a ^ helper::read_u64(ptr));
109 b = helper::diffuse(b ^ helper::read_u64(ptr.offset(8)));
110 }
111 17..=23 => {
112 // 16 bytes or more excessive.
113
114 // Mix in the partial block.
115 a ^= helper::read_u64(ptr);
116 b ^= helper::read_u64(ptr.offset(8));
117
118 // Write the last excessive bytes (<8 bytes).
119 excessive = excessive - 16;
120 c ^= helper::read_int(slice::from_raw_parts(ptr.offset(16), excessive));
121
122 // Diffuse.
123 a = helper::diffuse(a);
124 b = helper::diffuse(b);
125 c = helper::diffuse(c);
126 }
127 24 => {
128 // 24 bytes excessive.
129
130 // Mix in the partial block.
131 a ^= helper::read_u64(ptr);
132 b ^= helper::read_u64(ptr.offset(8));
133 c ^= helper::read_u64(ptr.offset(16));
134
135 // Diffuse.
136 a = helper::diffuse(a);
137 b = helper::diffuse(b);
138 c = helper::diffuse(c);
139 }
140 _ => {
141 // More than 24 bytes excessive.
142
143 // Mix in the partial block.
144 a ^= helper::read_u64(ptr);
145 b ^= helper::read_u64(ptr.offset(8));
146 c ^= helper::read_u64(ptr.offset(16));
147
148 // Write the last excessive bytes (<8 bytes).
149 excessive = excessive - 24;
150 d ^= helper::read_int(slice::from_raw_parts(ptr.offset(24), excessive));
151
152 // Diffuse.
153 a = helper::diffuse(a);
154 b = helper::diffuse(b);
155 c = helper::diffuse(c);
156 d = helper::diffuse(d);
157 }
158 }
159 }
160
161 State {
162 a: a,
163 b: b,
164 c: c,
165 d: d,
166 written: buf.len() as u64,
167 }
168 }
169
170 /// Write another 64-bit integer into the state.
171 pub fn push(&mut self, x: u64) {
172 // Mix `x` into `a`.
173 let a = helper::diffuse(self.a ^ x);
174
175 // Rotate around.
176 // _______________________
177 // | v
178 // a <---- b <---- c <---- d
179 self.a = self.b;
180 self.b = self.c;
181 self.c = self.d;
182 self.d = a;
183
184 // Increase the written bytes counter.
185 self.written += 8;
186 }
187
188 /// Remove the most recently written 64-bit integer from the state.
189 ///
190 /// Given the value of the most recently written u64 `last`, remove it from the state.
191 pub fn pop(&mut self, last: u64) {
192 // Un-mix `last` from `d`. Removes the recently written data.
193 let d = helper::undiffuse(self.d) ^ last;
194
195 // Rotate back.
196 // _______________________
197 // v |
198 // a ----> b ----> c ----> d
199 self.d = self.c;
200 self.c = self.b;
201 self.b = self.a;
202 self.a = d;
203
204 // Decrese the written bytes counter.
205 self.written -= 8;
206 }
207
208 /// Finalize the state.
209 #[inline]
210 pub fn finalize(self) -> u64 {
211 let State {
212 written,
213 mut a,
214 b,
215 mut c,
216 d,
217 } = self;
218
219 // XOR the states together. Even though XOR is commutative, it doesn't matter, because the
220 // state vector's initial components are mutually distinct, and thus swapping even and odd
221 // chunks will affect the result, because it is sensitive to the initial condition.
222 a ^= b;
223 c ^= d;
224 a ^= c;
225 // XOR the number of written bytes in order to make the excessive bytes zero-sensitive
226 // (without this, two excessive zeros would be equivalent to three excessive zeros). This
227 // is know as length padding.
228 a ^= written;
229
230 // We diffuse to make the excessive bytes discrete (i.e. small changes shouldn't give small
231 // changes in the output).
232 helper::diffuse(a)
233 }
234}
235
236/// Hash some buffer.
237///
238/// This is a highly optimized implementation of SeaHash. It implements numerous techniques to
239/// improve performance:
240///
241/// - Register allocation: This makes a great deal out of making sure everything fits into
242/// registers such that minimal memory accesses are needed. This works quite successfully on most
243/// CPUs, and the only time it reads from memory is when it fetches the data of the buffer.
244/// - Bulk reads: Like most other good hash functions, we read 8 bytes a time. This obviously
245/// improves performance a lot
246/// - Independent updates: We make sure very few statements next to each other depends on the
247/// other. This means that almost always the CPU will be able to run the instructions in parallel.
248/// - Loop unrolling: The hot loop is unrolled such that very little branches (one every 32 bytes)
249/// are needed.
250///
251/// and more.
252///
253/// The seed of this hash function is prechosen.
254pub fn hash(buf: &[u8]) -> u64 {
255 hash_seeded(
256 buf,
257 0x16f11fe89b0d677c,
258 0xb480a793d8e6c86c,
259 0x6fe2e5aaf078ebc9,
260 0x14f994a4c5259381,
261 )
262}
263
264/// Hash some buffer according to a chosen seed.
265///
266/// The keys are expected to be chosen from a uniform distribution. The keys should be mutually
267/// distinct to avoid issues with collisions if the lanes are permuted.
268///
269/// This is not secure, as [the key can be extracted with a bit of computational
270/// work](https://github.com/ticki/tfs/issues/5), as such, it is recommended to have a fallback
271/// hash function (adaptive hashing) in the case of hash flooding. It can be considered unbroken if
272/// the output is not known (i.e. no malicious party has access to the raw values of the keys, only
273/// a permutation thereof).), however I absolutely do not recommend using it for this. If you want
274/// to be strict, this should only be used as a layer of obfuscation, such that the fallback (e.g.
275/// SipHash) is harder to trigger.
276///
277/// In the future, I might strengthen the security if possible while having backward compatibility
278/// with the default initialization vector.
279pub fn hash_seeded(buf: &[u8], a: u64, b: u64, c: u64, d: u64) -> u64 {
280 State::hash(buf, (a, b, c, d)).finalize()
281}
282
283#[cfg(test)]
284mod tests {
285 use super::*;
286
287 use reference;
288
289 fn hash_match(a: &[u8]) {
290 assert_eq!(hash(a), reference::hash(a));
291 assert_eq!(
292 hash_seeded(a, 1, 1, 1, 1),
293 reference::hash_seeded(a, 1, 1, 1, 1)
294 );
295 assert_eq!(
296 hash_seeded(a, 500, 2873, 2389, 9283),
297 reference::hash_seeded(a, 500, 2873, 2389, 9283)
298 );
299 assert_eq!(
300 hash_seeded(a, 238945723984, 872894734, 239478243, 28937498234),
301 reference::hash_seeded(a, 238945723984, 872894734, 239478243, 28937498234)
302 );
303 assert_eq!(
304 hash_seeded(a, !0, !0, !0, !0),
305 reference::hash_seeded(a, !0, !0, !0, !0)
306 );
307 assert_eq!(
308 hash_seeded(a, 0, 0, 0, 0),
309 reference::hash_seeded(a, 0, 0, 0, 0)
310 );
311 }
312
313 #[test]
314 #[cfg_attr(miri, ignore)] // very slow to run on miri
315 fn zero() {
316 let arr = [0; 4096];
317 for n in 0..4096 {
318 hash_match(&arr[0..n]);
319 }
320 }
321
322 #[test]
323 fn seq() {
324 let mut buf = [0; 4096];
325 for i in 0..4096 {
326 buf[i] = i as u8;
327 }
328 hash_match(&buf);
329 }
330
331 #[test]
332 fn position_depedent() {
333 let mut buf1 = [0; 4098];
334 for i in 0..4098 {
335 buf1[i] = i as u8;
336 }
337 let mut buf2 = [0; 4098];
338 for i in 0..4098 {
339 buf2[i] = i as u8 ^ 1;
340 }
341
342 assert!(hash(&buf1) != hash(&buf2));
343 }
344
345 #[test]
346 fn shakespear() {
347 hash_match(b"to be or not to be");
348 hash_match(b"love is a wonderful terrible thing");
349 }
350
351 #[test]
352 fn zero_senitive() {
353 assert_ne!(hash(&[1, 2, 3, 4]), hash(&[1, 0, 2, 3, 4]));
354 assert_ne!(hash(&[1, 2, 3, 4]), hash(&[1, 0, 0, 2, 3, 4]));
355 assert_ne!(hash(&[1, 2, 3, 4]), hash(&[1, 2, 3, 4, 0]));
356 assert_ne!(hash(&[1, 2, 3, 4]), hash(&[0, 1, 2, 3, 4]));
357 assert_ne!(hash(&[0, 0, 0]), hash(&[0, 0, 0, 0, 0]));
358 }
359
360 #[test]
361 fn not_equal() {
362 assert_ne!(hash(b"to be or not to be "), hash(b"to be or not to be"));
363 assert_ne!(hash(b"jkjke"), hash(b"jkjk"));
364 assert_ne!(hash(b"ijkjke"), hash(b"ijkjk"));
365 assert_ne!(hash(b"iijkjke"), hash(b"iijkjk"));
366 assert_ne!(hash(b"iiijkjke"), hash(b"iiijkjk"));
367 assert_ne!(hash(b"iiiijkjke"), hash(b"iiiijkjk"));
368 assert_ne!(hash(b"iiiiijkjke"), hash(b"iiiiijkjk"));
369 assert_ne!(hash(b"iiiiiijkjke"), hash(b"iiiiiijkjk"));
370 assert_ne!(hash(b"iiiiiiijkjke"), hash(b"iiiiiiijkjk"));
371 assert_ne!(hash(b"iiiiiiiijkjke"), hash(b"iiiiiiiijkjk"));
372 assert_ne!(hash(b"ab"), hash(b"bb"));
373 }
374
375 #[test]
376 fn push() {
377 let mut state = State::new(1, 2, 3, 4);
378 state.push(!0);
379 state.push(0);
380
381 assert_eq!(
382 hash_seeded(
383 &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0],
384 1,
385 2,
386 3,
387 4
388 ),
389 state.finalize()
390 );
391 }
392
393 #[test]
394 fn pop() {
395 let mut state = State::new(1, 2, 3, 4);
396 state.push(!0);
397 state.push(0);
398 state.pop(0);
399
400 assert_eq!(
401 hash_seeded(
402 &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF],
403 1,
404 2,
405 3,
406 4
407 ),
408 state.finalize()
409 );
410 }
411}