seahash/reference.rs
1//! A slow, but clear reference implementation of SeaHash.
2//!
3//! # Specification
4//!
5//! The input buffer is padded with null bytes until the length is divisible by 8.
6//!
7//! We start out with state
8//!
9//! ```notest
10//! a = 0x16f11fe89b0d677c
11//! b = 0xb480a793d8e6c86c
12//! c = 0x6fe2e5aaf078ebc9
13//! d = 0x14f994a4c5259381
14//! ```
15//!
16//! If a seed is given, each of the initial state component are modularly multiplied by the seed.
17//!
18//! From the stream, we read one 64-bit block (in little-endian) at a time. This number, `n`,
19//! determines the new state by:
20//!
21//! ```notest
22//! a' = b
23//! b' = c
24//! c' = d
25//! d' = g(a ⊕ n)
26//! ```
27//!
28//! `g(x)` is defined as `g(x) = j(h(j(x)))` with `h(x) = (x ≫ 32) ≫ (x ≫ 60)` and `j(x) ≡ px (mod
29//! 2^64)` with `p = 0x7ed0e9fa0d94a33`.
30//!
31//! Let the final state be `(x, y, z, w)`. Then the final result is given by `H = g(x ⊕ y ⊕ z ⊕ w ⊕
32//! l)` where `l` is the number of bytes in the original buffer.
33
34use helper;
35
36/// Read an integer in little-endian.
37fn read_int(int: &[u8]) -> u64 {
38 debug_assert!(
39 int.len() <= 8,
40 "The buffer length of the integer must be less than or equal to \
41 the one of an u64."
42 );
43
44 // Start at 0.
45 let mut x = 0;
46 for &i in int.iter().rev() {
47 // Shift up a byte.
48 x <<= 8;
49 // Set the lower byte.
50 x |= i as u64;
51 }
52
53 x
54}
55
56/// A hash state.
57struct State {
58 /// The `a` substate.
59 a: u64,
60 /// The `b` substate.
61 b: u64,
62 /// The `c` substate.
63 c: u64,
64 /// The `d` substate.
65 d: u64,
66}
67
68impl State {
69 /// Write a 64-bit integer to the state.
70 fn write_u64(&mut self, x: u64) {
71 let mut a = self.a;
72
73 // Mix `x` into `a`.
74 a = helper::diffuse(a ^ x);
75
76 // Rotate around.
77 // _______________________
78 // | v
79 // a <---- b <---- c <---- d
80 self.a = self.b;
81 self.b = self.c;
82 self.c = self.d;
83 self.d = a;
84 }
85
86 /// Calculate the final hash.
87 fn finish(self, total: usize) -> u64 {
88 // Even though XORing is commutative, it doesn't matter, because the state vector's initial
89 // components are mutually distinct, and thus swapping even and odd chunks will affect the
90 // result, because it is sensitive to the initial condition. To add discreteness, we
91 // diffuse.
92 helper::diffuse(
93 self.a ^ self.b ^ self.c ^ self.d
94 // We XOR in the number of written bytes to make it zero-sensitive when excessive bytes
95 // are written (0u32.0u8 ≠ 0u16.0u8).
96 ^ total as u64,
97 )
98 }
99
100 /// Create a new state with some initial values (seed).
101 fn with_seeds(k1: u64, k2: u64, k3: u64, k4: u64) -> State {
102 State {
103 // These values are randomly generated.
104 a: k1,
105 b: k2,
106 c: k3,
107 d: k4,
108 }
109 }
110}
111
112/// A reference implementation of SeaHash.
113///
114/// This is bloody slow when compared to the optimized version. This is because SeaHash was
115/// specifically designed to take all sorts of hardware and software hacks into account to achieve
116/// maximal performance, but this makes code significantly less readable. As such, this version has
117/// only one goal: to make the algorithm readable and understandable.
118pub fn hash(buf: &[u8]) -> u64 {
119 hash_seeded(
120 buf,
121 0x16f11fe89b0d677c,
122 0xb480a793d8e6c86c,
123 0x6fe2e5aaf078ebc9,
124 0x14f994a4c5259381,
125 )
126}
127
128/// The seeded version of the reference implementation.
129pub fn hash_seeded(buf: &[u8], k1: u64, k2: u64, k3: u64, k4: u64) -> u64 {
130 // Initialize the state.
131 let mut state = State::with_seeds(k1, k2, k3, k4);
132
133 // Partition the rounded down buffer into chunks of 8 bytes, and iterate over them. The last
134 // block might not be 8 bytes long.
135 for int in buf.chunks(8) {
136 // Read the chunk into an integer and write into the state.
137 state.write_u64(read_int(int));
138 }
139
140 // Finish the hash state and return the final value.
141 state.finish(buf.len())
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147
148 #[test]
149 fn shakespear() {
150 assert_eq!(hash(b"to be or not to be"), 1988685042348123509);
151 }
152}