seahash/
reference.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
//! A slow, but clear reference implementation of SeaHash.
//!
//! # Specification
//!
//! The input buffer is padded with null bytes until the length is divisible by 8.
//!
//! We start out with state
//!
//! ```notest
//! a = 0x16f11fe89b0d677c
//! b = 0xb480a793d8e6c86c
//! c = 0x6fe2e5aaf078ebc9
//! d = 0x14f994a4c5259381
//! ```
//!
//! If a seed is given, each of the initial state component are modularly multiplied by the seed.
//!
//! From the stream, we read one 64-bit block (in little-endian) at a time.  This number, `n`,
//! determines the new state by:
//!
//! ```notest
//! a' = b
//! b' = c
//! c' = d
//! d' = g(a ⊕ n)
//! ```
//!
//! `g(x)` is defined as `g(x) = j(h(j(x)))` with `h(x) = (x ≫ 32) ≫ (x ≫ 60)` and `j(x) ≡ px (mod
//! 2^64)` with `p = 0x7ed0e9fa0d94a33`.
//!
//! Let the final state be `(x, y, z, w)`. Then the final result is given by `H = g(x ⊕ y ⊕ z ⊕ w ⊕
//! l)` where `l` is the number of bytes in the original buffer.

use helper;

/// Read an integer in little-endian.
fn read_int(int: &[u8]) -> u64 {
    debug_assert!(
        int.len() <= 8,
        "The buffer length of the integer must be less than or equal to \
                  the one of an u64."
    );

    // Start at 0.
    let mut x = 0;
    for &i in int.iter().rev() {
        // Shift up a byte.
        x <<= 8;
        // Set the lower byte.
        x |= i as u64;
    }

    x
}

/// A hash state.
struct State {
    /// The `a` substate.
    a: u64,
    /// The `b` substate.
    b: u64,
    /// The `c` substate.
    c: u64,
    /// The `d` substate.
    d: u64,
}

impl State {
    /// Write a 64-bit integer to the state.
    fn write_u64(&mut self, x: u64) {
        let mut a = self.a;

        // Mix `x` into `a`.
        a = helper::diffuse(a ^ x);

        //  Rotate around.
        //  _______________________
        // |                       v
        // a <---- b <---- c <---- d
        self.a = self.b;
        self.b = self.c;
        self.c = self.d;
        self.d = a;
    }

    /// Calculate the final hash.
    fn finish(self, total: usize) -> u64 {
        // Even though XORing is commutative, it doesn't matter, because the state vector's initial
        // components are mutually distinct, and thus swapping even and odd chunks will affect the
        // result, because it is sensitive to the initial condition. To add discreteness, we
        // diffuse.
        helper::diffuse(
            self.a ^ self.b ^ self.c ^ self.d
            // We XOR in the number of written bytes to make it zero-sensitive when excessive bytes
            // are written (0u32.0u8 ≠ 0u16.0u8).
            ^ total as u64,
        )
    }

    /// Create a new state with some initial values (seed).
    fn with_seeds(k1: u64, k2: u64, k3: u64, k4: u64) -> State {
        State {
            // These values are randomly generated.
            a: k1,
            b: k2,
            c: k3,
            d: k4,
        }
    }
}

/// A reference implementation of SeaHash.
///
/// This is bloody slow when compared to the optimized version. This is because SeaHash was
/// specifically designed to take all sorts of hardware and software hacks into account to achieve
/// maximal performance, but this makes code significantly less readable. As such, this version has
/// only one goal: to make the algorithm readable and understandable.
pub fn hash(buf: &[u8]) -> u64 {
    hash_seeded(
        buf,
        0x16f11fe89b0d677c,
        0xb480a793d8e6c86c,
        0x6fe2e5aaf078ebc9,
        0x14f994a4c5259381,
    )
}

/// The seeded version of the reference implementation.
pub fn hash_seeded(buf: &[u8], k1: u64, k2: u64, k3: u64, k4: u64) -> u64 {
    // Initialize the state.
    let mut state = State::with_seeds(k1, k2, k3, k4);

    // Partition the rounded down buffer into chunks of 8 bytes, and iterate over them. The last
    // block might not be 8 bytes long.
    for int in buf.chunks(8) {
        // Read the chunk into an integer and write into the state.
        state.write_u64(read_int(int));
    }

    // Finish the hash state and return the final value.
    state.finish(buf.len())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn shakespear() {
        assert_eq!(hash(b"to be or not to be"), 1988685042348123509);
    }
}