httparse/simd/
sse42.rs

1use crate::iter::Bytes;
2
3#[target_feature(enable = "sse4.2")]
4pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
5    while bytes.as_ref().len() >= 16 {
6        let advance = match_url_char_16_sse(bytes.as_ref());
7
8        bytes.advance(advance);
9
10        if advance != 16 {
11            return;
12        }
13    }
14    super::swar::match_uri_vectored(bytes);
15}
16
17#[inline(always)]
18#[allow(non_snake_case)]
19unsafe fn match_url_char_16_sse(buf: &[u8]) -> usize {
20    debug_assert!(buf.len() >= 16);
21
22    #[cfg(target_arch = "x86")]
23    use core::arch::x86::*;
24    #[cfg(target_arch = "x86_64")]
25    use core::arch::x86_64::*;
26
27    let ptr = buf.as_ptr();
28
29    // %x21-%x7e %x80-%xff
30    let DEL: __m128i = _mm_set1_epi8(0x7f);
31    let LOW: __m128i = _mm_set1_epi8(0x21);
32
33    let dat = _mm_lddqu_si128(ptr as *const _);
34    // unsigned comparison dat >= LOW
35    let low = _mm_cmpeq_epi8(_mm_max_epu8(dat, LOW), dat);
36    let del = _mm_cmpeq_epi8(dat, DEL);
37    let bit = _mm_andnot_si128(del, low);
38    let res = _mm_movemask_epi8(bit) as u16;
39
40    // TODO: use .trailing_ones() once MSRV >= 1.46
41    (!res).trailing_zeros() as usize
42}
43
44#[target_feature(enable = "sse4.2")]
45pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
46    while bytes.as_ref().len() >= 16 {
47        let advance = match_header_value_char_16_sse(bytes.as_ref());
48        bytes.advance(advance);
49
50       if advance != 16 {
51            return;
52       }
53    }
54    super::swar::match_header_value_vectored(bytes);
55}
56
57#[inline(always)]
58#[allow(non_snake_case)]
59unsafe fn match_header_value_char_16_sse(buf: &[u8]) -> usize {
60    debug_assert!(buf.len() >= 16);
61
62    #[cfg(target_arch = "x86")]
63    use core::arch::x86::*;
64    #[cfg(target_arch = "x86_64")]
65    use core::arch::x86_64::*;
66
67    let ptr = buf.as_ptr();
68
69    // %x09 %x20-%x7e %x80-%xff
70    let TAB: __m128i = _mm_set1_epi8(0x09);
71    let DEL: __m128i = _mm_set1_epi8(0x7f);
72    let LOW: __m128i = _mm_set1_epi8(0x20);
73
74    let dat = _mm_lddqu_si128(ptr as *const _);
75    // unsigned comparison dat >= LOW
76    let low = _mm_cmpeq_epi8(_mm_max_epu8(dat, LOW), dat);
77    let tab = _mm_cmpeq_epi8(dat, TAB);
78    let del = _mm_cmpeq_epi8(dat, DEL);
79    let bit = _mm_andnot_si128(del, _mm_or_si128(low, tab));
80    let res = _mm_movemask_epi8(bit) as u16;
81
82    // TODO: use .trailing_ones() once MSRV >= 1.46
83    (!res).trailing_zeros() as usize
84}
85
86#[test]
87fn sse_code_matches_uri_chars_table() {
88    if !is_x86_feature_detected!("sse4.2") {
89        return;
90    }
91
92    #[allow(clippy::undocumented_unsafe_blocks)]
93    unsafe {
94        assert!(byte_is_allowed(b'_', match_uri_vectored));
95
96        for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() {
97            assert_eq!(
98                byte_is_allowed(b as u8, match_uri_vectored), allowed,
99                "byte_is_allowed({:?}) should be {:?}", b, allowed,
100            );
101        }
102    }
103}
104
105#[test]
106fn sse_code_matches_header_value_chars_table() {
107    if !is_x86_feature_detected!("sse4.2") {
108        return;
109    }
110
111    #[allow(clippy::undocumented_unsafe_blocks)]
112    unsafe {
113        assert!(byte_is_allowed(b'_', match_header_value_vectored));
114
115        for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() {
116            assert_eq!(
117                byte_is_allowed(b as u8, match_header_value_vectored), allowed,
118                "byte_is_allowed({:?}) should be {:?}", b, allowed,
119            );
120        }
121    }
122}
123
124#[allow(clippy::missing_safety_doc)]
125#[cfg(test)]
126unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
127    let slice = [
128        b'_', b'_', b'_', b'_',
129        b'_', b'_', b'_', b'_',
130        b'_', b'_', byte, b'_',
131        b'_', b'_', b'_', b'_',
132    ];
133    let mut bytes = Bytes::new(&slice);
134
135    f(&mut bytes);
136
137    match bytes.pos() {
138        16 => true,
139        10 => false,
140        _ => unreachable!(),
141    }
142}