httparse/simd/
sse42.rs
1use crate::iter::Bytes;
2
3#[target_feature(enable = "sse4.2")]
4pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
5 while bytes.as_ref().len() >= 16 {
6 let advance = match_url_char_16_sse(bytes.as_ref());
7
8 bytes.advance(advance);
9
10 if advance != 16 {
11 return;
12 }
13 }
14 super::swar::match_uri_vectored(bytes);
15}
16
17#[inline(always)]
18#[allow(non_snake_case)]
19unsafe fn match_url_char_16_sse(buf: &[u8]) -> usize {
20 debug_assert!(buf.len() >= 16);
21
22 #[cfg(target_arch = "x86")]
23 use core::arch::x86::*;
24 #[cfg(target_arch = "x86_64")]
25 use core::arch::x86_64::*;
26
27 let ptr = buf.as_ptr();
28
29 let DEL: __m128i = _mm_set1_epi8(0x7f);
31 let LOW: __m128i = _mm_set1_epi8(0x21);
32
33 let dat = _mm_lddqu_si128(ptr as *const _);
34 let low = _mm_cmpeq_epi8(_mm_max_epu8(dat, LOW), dat);
36 let del = _mm_cmpeq_epi8(dat, DEL);
37 let bit = _mm_andnot_si128(del, low);
38 let res = _mm_movemask_epi8(bit) as u16;
39
40 (!res).trailing_zeros() as usize
42}
43
44#[target_feature(enable = "sse4.2")]
45pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
46 while bytes.as_ref().len() >= 16 {
47 let advance = match_header_value_char_16_sse(bytes.as_ref());
48 bytes.advance(advance);
49
50 if advance != 16 {
51 return;
52 }
53 }
54 super::swar::match_header_value_vectored(bytes);
55}
56
57#[inline(always)]
58#[allow(non_snake_case)]
59unsafe fn match_header_value_char_16_sse(buf: &[u8]) -> usize {
60 debug_assert!(buf.len() >= 16);
61
62 #[cfg(target_arch = "x86")]
63 use core::arch::x86::*;
64 #[cfg(target_arch = "x86_64")]
65 use core::arch::x86_64::*;
66
67 let ptr = buf.as_ptr();
68
69 let TAB: __m128i = _mm_set1_epi8(0x09);
71 let DEL: __m128i = _mm_set1_epi8(0x7f);
72 let LOW: __m128i = _mm_set1_epi8(0x20);
73
74 let dat = _mm_lddqu_si128(ptr as *const _);
75 let low = _mm_cmpeq_epi8(_mm_max_epu8(dat, LOW), dat);
77 let tab = _mm_cmpeq_epi8(dat, TAB);
78 let del = _mm_cmpeq_epi8(dat, DEL);
79 let bit = _mm_andnot_si128(del, _mm_or_si128(low, tab));
80 let res = _mm_movemask_epi8(bit) as u16;
81
82 (!res).trailing_zeros() as usize
84}
85
86#[test]
87fn sse_code_matches_uri_chars_table() {
88 if !is_x86_feature_detected!("sse4.2") {
89 return;
90 }
91
92 #[allow(clippy::undocumented_unsafe_blocks)]
93 unsafe {
94 assert!(byte_is_allowed(b'_', match_uri_vectored));
95
96 for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() {
97 assert_eq!(
98 byte_is_allowed(b as u8, match_uri_vectored), allowed,
99 "byte_is_allowed({:?}) should be {:?}", b, allowed,
100 );
101 }
102 }
103}
104
105#[test]
106fn sse_code_matches_header_value_chars_table() {
107 if !is_x86_feature_detected!("sse4.2") {
108 return;
109 }
110
111 #[allow(clippy::undocumented_unsafe_blocks)]
112 unsafe {
113 assert!(byte_is_allowed(b'_', match_header_value_vectored));
114
115 for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() {
116 assert_eq!(
117 byte_is_allowed(b as u8, match_header_value_vectored), allowed,
118 "byte_is_allowed({:?}) should be {:?}", b, allowed,
119 );
120 }
121 }
122}
123
124#[allow(clippy::missing_safety_doc)]
125#[cfg(test)]
126unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
127 let slice = [
128 b'_', b'_', b'_', b'_',
129 b'_', b'_', b'_', b'_',
130 b'_', b'_', byte, b'_',
131 b'_', b'_', b'_', b'_',
132 ];
133 let mut bytes = Bytes::new(&slice);
134
135 f(&mut bytes);
136
137 match bytes.pos() {
138 16 => true,
139 10 => false,
140 _ => unreachable!(),
141 }
142}