httparse/simd/
swar.rs

1/// SWAR: SIMD Within A Register
2/// SIMD validator backend that validates register-sized chunks of data at a time.
3use crate::{is_header_name_token, is_header_value_token, is_uri_token, Bytes};
4
5// Adapt block-size to match native register size, i.e: 32bit => 4, 64bit => 8
6const BLOCK_SIZE: usize = core::mem::size_of::<usize>();
7type ByteBlock = [u8; BLOCK_SIZE];
8
9#[inline]
10pub fn match_uri_vectored(bytes: &mut Bytes) {
11    loop {
12        if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) {
13            let n = match_uri_char_8_swar(bytes8);
14            // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes
15            // in `bytes`, so calling `advance(n)` is safe.
16            unsafe {
17                bytes.advance(n);
18            }
19            if n == BLOCK_SIZE {
20                continue;
21            }
22        }
23        if let Some(b) = bytes.peek() {
24            if is_uri_token(b) {
25                // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte
26                // in bytes, so calling advance is safe.
27                unsafe {
28                    bytes.advance(1);
29                }
30                continue;
31            }
32        }
33        break;
34    }
35}
36
37#[inline]
38pub fn match_header_value_vectored(bytes: &mut Bytes) {
39    loop {
40        if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) {
41            let n = match_header_value_char_8_swar(bytes8);
42            // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes
43            // in `bytes`, so calling `advance(n)` is safe.
44            unsafe {
45                bytes.advance(n);
46            }
47            if n == BLOCK_SIZE {
48                continue;
49            }
50        }
51        if let Some(b) = bytes.peek() {
52            if is_header_value_token(b) {
53                // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte
54                // in bytes, so calling advance is safe.
55                unsafe {
56                    bytes.advance(1);
57                }
58                continue;
59            }
60        }
61        break;
62    }
63}
64
65#[inline]
66pub fn match_header_name_vectored(bytes: &mut Bytes) {
67    while let Some(block) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) {
68        let n = match_block(is_header_name_token, block);
69        // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes
70        // in `bytes`, so calling `advance(n)` is safe.
71        unsafe {
72            bytes.advance(n);
73        }
74        if n != BLOCK_SIZE {
75            return;
76        }
77    }
78    // SAFETY: match_tail processes at most the remaining data in `bytes`. advances `bytes` to the
79    // end, but no further.
80    unsafe { bytes.advance(match_tail(is_header_name_token, bytes.as_ref())) };
81}
82
83// Matches "tail", i.e: when we have <BLOCK_SIZE bytes in the buffer, should be uncommon
84#[cold]
85#[inline]
86fn match_tail(f: impl Fn(u8) -> bool, bytes: &[u8]) -> usize {
87    for (i, &b) in bytes.iter().enumerate() {
88        if !f(b) {
89            return i;
90        }
91    }
92    bytes.len()
93}
94
95// Naive fallback block matcher
96#[inline(always)]
97fn match_block(f: impl Fn(u8) -> bool, block: ByteBlock) -> usize {
98    for (i, &b) in block.iter().enumerate() {
99        if !f(b) {
100            return i;
101        }
102    }
103    BLOCK_SIZE
104}
105
106// A const alternative to u64::from_ne_bytes to avoid bumping MSRV (1.36 => 1.44)
107// creates a u64 whose bytes are each equal to b
108const fn uniform_block(b: u8) -> usize {
109    (b as u64 *  0x01_01_01_01_01_01_01_01 /* [1_u8; 8] */) as usize
110}
111
112// A byte-wise range-check on an entire word/block,
113// ensuring all bytes in the word satisfy `33 <= (x != 127) <= 255`
114#[inline]
115fn match_uri_char_8_swar(block: ByteBlock) -> usize {
116    // 33 <= (x != 127) <= 255
117    const M: u8 = 0x21;
118    // uniform block full of exclamation mark (!) (33).
119    const BM: usize = uniform_block(M);
120    // uniform block full of 1.
121    const ONE: usize = uniform_block(0x01);
122    // uniform block full of DEL (127).
123    const DEL: usize = uniform_block(0x7f);
124    // uniform block full of 128.
125    const M128: usize = uniform_block(128);
126
127    let x = usize::from_ne_bytes(block); // Really just a transmute
128    let lt = x.wrapping_sub(BM) & !x; // <= m
129
130    let xor_del = x ^ DEL;
131    let eq_del = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL
132
133    offsetnz((lt | eq_del) & M128)
134}
135
136// A byte-wise range-check on an entire word/block,
137// ensuring all bytes in the word satisfy `32 <= (x != 127) <= 255`
138#[inline]
139fn match_header_value_char_8_swar(block: ByteBlock) -> usize {
140    // 32 <= (x != 127) <= 255
141    const M: u8 = 0x20;
142    // uniform block full of exclamation mark (!) (33).
143    const BM: usize = uniform_block(M);
144    // uniform block full of 1.
145    const ONE: usize = uniform_block(0x01);
146    // uniform block full of DEL (127).
147    const DEL: usize = uniform_block(0x7f);
148    // uniform block full of 128.
149    const M128: usize = uniform_block(128);
150
151    let x = usize::from_ne_bytes(block); // Really just a transmute
152    let lt = x.wrapping_sub(BM) & !x; // <= m
153
154    let xor_del = x ^ DEL;
155    let eq_del = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL
156
157    offsetnz((lt | eq_del) & M128)
158}
159
160/// Check block to find offset of first non-zero byte
161// NOTE: Curiously `block.trailing_zeros() >> 3` appears to be slower, maybe revisit
162#[inline]
163fn offsetnz(block: usize) -> usize {
164    // fast path optimistic case (common for long valid sequences)
165    if block == 0 {
166        return BLOCK_SIZE;
167    }
168
169    // perf: rust will unroll this loop
170    for (i, b) in block.to_ne_bytes().iter().copied().enumerate() {
171        if b != 0 {
172            return i;
173        }
174    }
175    unreachable!()
176}
177
178#[test]
179fn test_is_header_value_block() {
180    let is_header_value_block = |b| match_header_value_char_8_swar(b) == BLOCK_SIZE;
181
182    // 0..32 => false
183    for b in 0..32_u8 {
184        assert!(!is_header_value_block([b; BLOCK_SIZE]), "b={}", b);
185    }
186    // 32..=126 => true
187    for b in 32..=126_u8 {
188        assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b);
189    }
190    // 127 => false
191    assert!(!is_header_value_block([b'\x7F'; BLOCK_SIZE]), "b={}", b'\x7F');
192    // 128..=255 => true
193    for b in 128..=255_u8 {
194        assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b);
195    }
196
197
198    #[cfg(target_pointer_width = "64")]
199    {
200        // A few sanity checks on non-uniform bytes for safe-measure
201        assert!(!is_header_value_block(*b"foo.com\n"));
202        assert!(!is_header_value_block(*b"o.com\r\nU"));
203    }
204}
205
206#[test]
207fn test_is_uri_block() {
208    let is_uri_block = |b| match_uri_char_8_swar(b) == BLOCK_SIZE;
209
210    // 0..33 => false
211    for b in 0..33_u8 {
212        assert!(!is_uri_block([b; BLOCK_SIZE]), "b={}", b);
213    }
214    // 33..=126 => true
215    for b in 33..=126_u8 {
216        assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b);
217    }
218    // 127 => false
219    assert!(!is_uri_block([b'\x7F'; BLOCK_SIZE]), "b={}", b'\x7F');
220    // 128..=255 => true
221    for b in 128..=255_u8 {
222        assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b);
223    }
224}
225
226#[test]
227fn test_offsetnz() {
228    let seq = [0_u8; BLOCK_SIZE];
229    for i in 0..BLOCK_SIZE {
230        let mut seq = seq;
231        seq[i] = 1;
232        let x = usize::from_ne_bytes(seq);
233        assert_eq!(offsetnz(x), i);
234    }
235}