vsimd/
hex.rs

1use crate::alsw::{self, AlswLut};
2use crate::isa::{AVX2, NEON, SSSE3, WASM128};
3use crate::mask::{u8x16_highbit_any, u8x32_highbit_any};
4use crate::pod::POD;
5use crate::vector::{V128, V256, V64};
6use crate::{Scalable, SIMD128, SIMD256};
7
8pub const UPPER_CHARSET: &[u8; 16] = b"0123456789ABCDEF";
9pub const LOWER_CHARSET: &[u8; 16] = b"0123456789abcdef";
10
11const fn parse_hex(x: u8) -> u8 {
12    match x {
13        b'0'..=b'9' => x - b'0',
14        b'a'..=b'f' => x - b'a' + 10,
15        b'A'..=b'F' => x - b'A' + 10,
16        _ => 0xff,
17    }
18}
19
20#[inline(always)]
21#[must_use]
22pub const fn unhex(x: u8) -> u8 {
23    const UNHEX_TABLE: &[u8; 256] = &{
24        let mut arr = [0; 256];
25        let mut i = 0;
26        while i < 256 {
27            arr[i] = parse_hex(i as u8);
28            i += 1;
29        }
30        arr
31    };
32    UNHEX_TABLE[x as usize]
33}
34
35#[inline(always)]
36pub fn check_xn<S, V>(s: S, x: V) -> bool
37where
38    S: Scalable<V>,
39    V: POD,
40{
41    let x1 = s.u8xn_sub(x, s.u8xn_splat(0x30 + 0x80));
42    let x2 = s.u8xn_sub(s.and(x, s.u8xn_splat(0xdf)), s.u8xn_splat(0x41 + 0x80));
43    let m1 = s.i8xn_lt(x1, s.i8xn_splat(-118));
44    let m2 = s.i8xn_lt(x2, s.i8xn_splat(-122));
45    s.mask8xn_all(s.or(m1, m2))
46}
47
48pub const ENCODE_UPPER_LUT: V256 = V256::double_bytes(*UPPER_CHARSET);
49pub const ENCODE_LOWER_LUT: V256 = V256::double_bytes(*LOWER_CHARSET);
50
51#[inline(always)]
52pub fn encode_bytes16<S: SIMD256>(s: S, x: V128, lut: V256) -> V256 {
53    let x = s.u16x16_from_u8x16(x);
54    let hi = s.u16x16_shl::<8>(x);
55    let lo = s.u16x16_shr::<4>(x);
56    let values = s.v256_and(s.v256_or(hi, lo), s.u8x32_splat(0x0f));
57    s.u8x16x2_swizzle(lut, values)
58}
59
60#[inline(always)]
61pub fn encode_bytes32<S: SIMD256>(s: S, x: V256, lut: V256) -> (V256, V256) {
62    let m = s.u8x32_splat(0x0f);
63    let hi = s.v256_and(s.u16x16_shr::<4>(x), m);
64    let lo = s.v256_and(x, m);
65
66    let ac = s.u8x16x2_zip_lo(hi, lo);
67    let bd = s.u8x16x2_zip_hi(hi, lo);
68
69    let ab = s.v128x2_zip_lo(ac, bd);
70    let cd = s.v128x2_zip_hi(ac, bd);
71
72    let y1 = s.u8x16x2_swizzle(lut, ab);
73    let y2 = s.u8x16x2_swizzle(lut, cd);
74
75    (y1, y2)
76}
77
78struct HexAlsw;
79
80impl HexAlsw {
81    const fn decode(c: u8) -> u8 {
82        parse_hex(c)
83    }
84
85    const fn check_hash(i: u8) -> u8 {
86        match i {
87            0 => 1,
88            1..=6 => 1,
89            7..=9 => 6,
90            0xA..=0xF => 8,
91            _ => unreachable!(),
92        }
93    }
94
95    const fn decode_hash(i: u8) -> u8 {
96        Self::check_hash(i)
97    }
98}
99
100impl_alsw!(HexAlsw);
101
102const HEX_ALSW_CHECK: AlswLut<V128> = HexAlsw::check_lut();
103const HEX_ALSW_DECODE: AlswLut<V128> = HexAlsw::decode_lut();
104
105const HEX_ALSW_CHECK_X2: AlswLut<V256> = HexAlsw::check_lut().x2();
106const HEX_ALSW_DECODE_X2: AlswLut<V256> = HexAlsw::decode_lut().x2();
107
108const DECODE_UZP1: V256 = V256::double_bytes([
109    0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, //
110    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, //
111]);
112
113const DECODE_UZP2: V256 = V256::double_bytes([
114    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, //
115    0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, //
116]);
117
118#[inline(always)]
119fn merge_bits<S: Scalable<V>, V: POD>(s: S, x: V) -> V {
120    // x:  {0000hhhh|0000llll} xn
121
122    let x1 = s.u16xn_shl::<4>(x);
123    // x1: {hhhh0000|llll0000} xn
124
125    let x2 = s.u16xn_shr::<12>(x1);
126    // x2: {0000llll|00000000} xn
127
128    s.or(x1, x2)
129    //     {hhhhllll|????????} xn
130}
131
132#[inline(always)]
133fn decode16<S: SIMD128>(s: S, x: V128) -> (V128, V128) {
134    let (c1, c2) = alsw::decode_ascii_xn(s, x, HEX_ALSW_CHECK, HEX_ALSW_DECODE);
135    (merge_bits(s, c2), c1)
136}
137
138#[inline(always)]
139fn decode32<S: SIMD256>(s: S, x: V256) -> (V256, V256) {
140    let (c1, c2) = alsw::decode_ascii_xn(s, x, HEX_ALSW_CHECK_X2, HEX_ALSW_DECODE_X2);
141    (merge_bits(s, c2), c1)
142}
143
144#[allow(clippy::result_unit_err)]
145#[inline(always)]
146pub fn decode_ascii16<S: SIMD128>(s: S, x: V128) -> Result<V64, ()> {
147    let (y, is_invalid) = decode16(s, x);
148
149    let ans = if matches_isa!(S, SSSE3 | WASM128) {
150        const UZP1: V128 = DECODE_UZP1.to_v128x2().0;
151        s.u8x16_swizzle(y, UZP1).to_v64x2().0
152    } else if matches_isa!(S, NEON) {
153        let (a, b) = y.to_v64x2();
154        s.u8x8_unzip_even(a, b)
155    } else {
156        unreachable!()
157    };
158
159    if u8x16_highbit_any(s, is_invalid) {
160        Err(())
161    } else {
162        Ok(ans)
163    }
164}
165
166#[allow(clippy::result_unit_err)]
167#[inline(always)]
168pub fn decode_ascii32<S: SIMD256>(s: S, x: V256) -> Result<V128, ()> {
169    let (y, is_invalid) = decode32(s, x);
170
171    let ans = if matches_isa!(S, SSSE3 | WASM128) {
172        let (a, b) = s.u8x16x2_swizzle(y, DECODE_UZP1).to_v128x2();
173        s.u64x2_zip_lo(a, b)
174    } else if matches_isa!(S, NEON) {
175        let (a, b) = y.to_v128x2();
176        s.u8x16_unzip_even(a, b)
177    } else {
178        unreachable!()
179    };
180
181    if u8x32_highbit_any(s, is_invalid) {
182        Err(())
183    } else {
184        Ok(ans)
185    }
186}
187
188#[allow(clippy::result_unit_err)]
189#[inline(always)]
190pub fn decode_ascii32x2<S: SIMD256>(s: S, x: (V256, V256)) -> Result<V256, ()> {
191    let (y1, is_invalid1) = decode32(s, x.0);
192    let (y2, is_invalid2) = decode32(s, x.1);
193    let is_invalid = s.v256_or(is_invalid1, is_invalid2);
194
195    let ans = if matches_isa!(S, AVX2) {
196        let ab = s.u8x16x2_swizzle(y1, DECODE_UZP1);
197        let cd = s.u8x16x2_swizzle(y2, DECODE_UZP2);
198        let acbd = s.v256_or(ab, cd);
199        s.u64x4_permute::<0b_1101_1000>(acbd) // 0213
200    } else if matches_isa!(S, SSSE3 | WASM128) {
201        let ab = s.u8x16x2_swizzle(y1, DECODE_UZP1);
202        let cd = s.u8x16x2_swizzle(y2, DECODE_UZP1);
203        s.u64x4_unzip_even(ab, cd)
204    } else if matches_isa!(S, NEON) {
205        s.u8x32_unzip_even(y1, y2)
206    } else {
207        unreachable!()
208    };
209
210    if u8x32_highbit_any(s, is_invalid) {
211        Err(())
212    } else {
213        Ok(ans)
214    }
215}
216
217pub mod sse2 {
218    use crate::isa::SSE2;
219    use crate::vector::{V128, V64};
220    use crate::SIMD128;
221
222    #[inline(always)]
223    #[must_use]
224    pub fn decode_nibbles(s: SSE2, x: V128) -> (V128, V128) {
225        // http://0x80.pl/notesen/2022-01-17-validating-hex-parse.html
226        // Algorithm 3
227
228        let t1 = s.u8x16_add(x, s.u8x16_splat(0xff - b'9'));
229        let t2 = s.u8x16_sub_sat(t1, s.u8x16_splat(6));
230        let t3 = s.u8x16_sub(t2, s.u8x16_splat(0xf0));
231        let t4 = s.v128_and(x, s.u8x16_splat(0xdf));
232        let t5 = s.u8x16_sub(t4, s.u8x16_splat(0x41));
233        let t6 = s.u8x16_add_sat(t5, s.u8x16_splat(10));
234        let t7 = s.u8x16_min(t3, t6);
235        let t8 = s.u8x16_add_sat(t7, s.u8x16_splat(127 - 15));
236        (t7, t8)
237    }
238
239    #[inline(always)]
240    #[must_use]
241    pub fn merge_bits(s: SSE2, x: V128) -> V64 {
242        let lo = s.u16x8_shr::<8>(x);
243        let hi = s.u16x8_shl::<4>(x);
244        let t1 = s.v128_or(lo, hi);
245        let t2 = s.v128_and(t1, s.u16x8_splat(0x00ff));
246        let t3 = s.i16x8_packus(t2, s.v128_create_zero());
247        t3.to_v64x2().0
248    }
249
250    pub const LOWER_OFFSET: V128 = V128::from_bytes([0x27; 16]);
251    pub const UPPER_OFFSET: V128 = V128::from_bytes([0x07; 16]);
252
253    #[inline(always)]
254    #[must_use]
255    pub fn encode16(s: SSE2, x: V128, offset: V128) -> (V128, V128) {
256        let m = s.u8x16_splat(0x0f);
257        let hi = s.v128_and(s.u16x8_shr::<4>(x), m);
258        let lo = s.v128_and(x, m);
259
260        let c1 = s.u8x16_splat(0x30);
261        let h1 = s.u8x16_add(hi, c1);
262        let l1 = s.u8x16_add(lo, c1);
263
264        let c2 = s.u8x16_splat(0x39);
265        let h2 = s.v128_and(s.i8x16_lt(c2, h1), offset);
266        let l2 = s.v128_and(s.i8x16_lt(c2, l1), offset);
267
268        let h3 = s.u8x16_add(h1, h2);
269        let l3 = s.u8x16_add(l1, l2);
270
271        let y1 = s.u8x16_zip_lo(h3, l3);
272        let y2 = s.u8x16_zip_hi(h3, l3);
273
274        (y1, y2)
275    }
276}
277
278#[cfg(test)]
279mod algorithm {
280    use super::*;
281
282    #[test]
283    #[ignore]
284    fn check() {
285        fn is_hex_v1(c: u8) -> bool {
286            matches!(c, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')
287        }
288
289        fn is_hex_v2(c: u8) -> bool {
290            let x1 = c.wrapping_sub(0x30);
291            let x2 = (c & 0xdf).wrapping_sub(0x41);
292            x1 < 10 || x2 < 6
293        }
294
295        fn is_hex_v3(c: u8) -> bool {
296            let x1 = c.wrapping_sub(0x30 + 0x80);
297            let x2 = (c & 0xdf).wrapping_sub(0x41 + 0x80);
298            ((x1 as i8) < -118) || ((x2 as i8) < -122)
299        }
300
301        for c in 0..=255_u8 {
302            let (v1, v2, v3) = (is_hex_v1(c), is_hex_v2(c), is_hex_v3(c));
303            assert_eq!(v1, v2);
304            assert_eq!(v1, v3);
305        }
306    }
307
308    #[test]
309    #[ignore]
310    fn hex_alsw() {
311        HexAlsw::test_check();
312        HexAlsw::test_decode();
313    }
314}