1use crate::alsw::{self, AlswLut};
2use crate::isa::{AVX2, NEON, SSSE3, WASM128};
3use crate::mask::{u8x16_highbit_any, u8x32_highbit_any};
4use crate::pod::POD;
5use crate::vector::{V128, V256, V64};
6use crate::{Scalable, SIMD128, SIMD256};
7
8pub const UPPER_CHARSET: &[u8; 16] = b"0123456789ABCDEF";
9pub const LOWER_CHARSET: &[u8; 16] = b"0123456789abcdef";
10
11const fn parse_hex(x: u8) -> u8 {
12 match x {
13 b'0'..=b'9' => x - b'0',
14 b'a'..=b'f' => x - b'a' + 10,
15 b'A'..=b'F' => x - b'A' + 10,
16 _ => 0xff,
17 }
18}
19
20#[inline(always)]
21#[must_use]
22pub const fn unhex(x: u8) -> u8 {
23 const UNHEX_TABLE: &[u8; 256] = &{
24 let mut arr = [0; 256];
25 let mut i = 0;
26 while i < 256 {
27 arr[i] = parse_hex(i as u8);
28 i += 1;
29 }
30 arr
31 };
32 UNHEX_TABLE[x as usize]
33}
34
35#[inline(always)]
36pub fn check_xn<S, V>(s: S, x: V) -> bool
37where
38 S: Scalable<V>,
39 V: POD,
40{
41 let x1 = s.u8xn_sub(x, s.u8xn_splat(0x30 + 0x80));
42 let x2 = s.u8xn_sub(s.and(x, s.u8xn_splat(0xdf)), s.u8xn_splat(0x41 + 0x80));
43 let m1 = s.i8xn_lt(x1, s.i8xn_splat(-118));
44 let m2 = s.i8xn_lt(x2, s.i8xn_splat(-122));
45 s.mask8xn_all(s.or(m1, m2))
46}
47
48pub const ENCODE_UPPER_LUT: V256 = V256::double_bytes(*UPPER_CHARSET);
49pub const ENCODE_LOWER_LUT: V256 = V256::double_bytes(*LOWER_CHARSET);
50
51#[inline(always)]
52pub fn encode_bytes16<S: SIMD256>(s: S, x: V128, lut: V256) -> V256 {
53 let x = s.u16x16_from_u8x16(x);
54 let hi = s.u16x16_shl::<8>(x);
55 let lo = s.u16x16_shr::<4>(x);
56 let values = s.v256_and(s.v256_or(hi, lo), s.u8x32_splat(0x0f));
57 s.u8x16x2_swizzle(lut, values)
58}
59
60#[inline(always)]
61pub fn encode_bytes32<S: SIMD256>(s: S, x: V256, lut: V256) -> (V256, V256) {
62 let m = s.u8x32_splat(0x0f);
63 let hi = s.v256_and(s.u16x16_shr::<4>(x), m);
64 let lo = s.v256_and(x, m);
65
66 let ac = s.u8x16x2_zip_lo(hi, lo);
67 let bd = s.u8x16x2_zip_hi(hi, lo);
68
69 let ab = s.v128x2_zip_lo(ac, bd);
70 let cd = s.v128x2_zip_hi(ac, bd);
71
72 let y1 = s.u8x16x2_swizzle(lut, ab);
73 let y2 = s.u8x16x2_swizzle(lut, cd);
74
75 (y1, y2)
76}
77
78struct HexAlsw;
79
80impl HexAlsw {
81 const fn decode(c: u8) -> u8 {
82 parse_hex(c)
83 }
84
85 const fn check_hash(i: u8) -> u8 {
86 match i {
87 0 => 1,
88 1..=6 => 1,
89 7..=9 => 6,
90 0xA..=0xF => 8,
91 _ => unreachable!(),
92 }
93 }
94
95 const fn decode_hash(i: u8) -> u8 {
96 Self::check_hash(i)
97 }
98}
99
100impl_alsw!(HexAlsw);
101
102const HEX_ALSW_CHECK: AlswLut<V128> = HexAlsw::check_lut();
103const HEX_ALSW_DECODE: AlswLut<V128> = HexAlsw::decode_lut();
104
105const HEX_ALSW_CHECK_X2: AlswLut<V256> = HexAlsw::check_lut().x2();
106const HEX_ALSW_DECODE_X2: AlswLut<V256> = HexAlsw::decode_lut().x2();
107
108const DECODE_UZP1: V256 = V256::double_bytes([
109 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, ]);
112
113const DECODE_UZP2: V256 = V256::double_bytes([
114 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, ]);
117
118#[inline(always)]
119fn merge_bits<S: Scalable<V>, V: POD>(s: S, x: V) -> V {
120 let x1 = s.u16xn_shl::<4>(x);
123 let x2 = s.u16xn_shr::<12>(x1);
126 s.or(x1, x2)
129 }
131
132#[inline(always)]
133fn decode16<S: SIMD128>(s: S, x: V128) -> (V128, V128) {
134 let (c1, c2) = alsw::decode_ascii_xn(s, x, HEX_ALSW_CHECK, HEX_ALSW_DECODE);
135 (merge_bits(s, c2), c1)
136}
137
138#[inline(always)]
139fn decode32<S: SIMD256>(s: S, x: V256) -> (V256, V256) {
140 let (c1, c2) = alsw::decode_ascii_xn(s, x, HEX_ALSW_CHECK_X2, HEX_ALSW_DECODE_X2);
141 (merge_bits(s, c2), c1)
142}
143
144#[allow(clippy::result_unit_err)]
145#[inline(always)]
146pub fn decode_ascii16<S: SIMD128>(s: S, x: V128) -> Result<V64, ()> {
147 let (y, is_invalid) = decode16(s, x);
148
149 let ans = if matches_isa!(S, SSSE3 | WASM128) {
150 const UZP1: V128 = DECODE_UZP1.to_v128x2().0;
151 s.u8x16_swizzle(y, UZP1).to_v64x2().0
152 } else if matches_isa!(S, NEON) {
153 let (a, b) = y.to_v64x2();
154 s.u8x8_unzip_even(a, b)
155 } else {
156 unreachable!()
157 };
158
159 if u8x16_highbit_any(s, is_invalid) {
160 Err(())
161 } else {
162 Ok(ans)
163 }
164}
165
166#[allow(clippy::result_unit_err)]
167#[inline(always)]
168pub fn decode_ascii32<S: SIMD256>(s: S, x: V256) -> Result<V128, ()> {
169 let (y, is_invalid) = decode32(s, x);
170
171 let ans = if matches_isa!(S, SSSE3 | WASM128) {
172 let (a, b) = s.u8x16x2_swizzle(y, DECODE_UZP1).to_v128x2();
173 s.u64x2_zip_lo(a, b)
174 } else if matches_isa!(S, NEON) {
175 let (a, b) = y.to_v128x2();
176 s.u8x16_unzip_even(a, b)
177 } else {
178 unreachable!()
179 };
180
181 if u8x32_highbit_any(s, is_invalid) {
182 Err(())
183 } else {
184 Ok(ans)
185 }
186}
187
188#[allow(clippy::result_unit_err)]
189#[inline(always)]
190pub fn decode_ascii32x2<S: SIMD256>(s: S, x: (V256, V256)) -> Result<V256, ()> {
191 let (y1, is_invalid1) = decode32(s, x.0);
192 let (y2, is_invalid2) = decode32(s, x.1);
193 let is_invalid = s.v256_or(is_invalid1, is_invalid2);
194
195 let ans = if matches_isa!(S, AVX2) {
196 let ab = s.u8x16x2_swizzle(y1, DECODE_UZP1);
197 let cd = s.u8x16x2_swizzle(y2, DECODE_UZP2);
198 let acbd = s.v256_or(ab, cd);
199 s.u64x4_permute::<0b_1101_1000>(acbd) } else if matches_isa!(S, SSSE3 | WASM128) {
201 let ab = s.u8x16x2_swizzle(y1, DECODE_UZP1);
202 let cd = s.u8x16x2_swizzle(y2, DECODE_UZP1);
203 s.u64x4_unzip_even(ab, cd)
204 } else if matches_isa!(S, NEON) {
205 s.u8x32_unzip_even(y1, y2)
206 } else {
207 unreachable!()
208 };
209
210 if u8x32_highbit_any(s, is_invalid) {
211 Err(())
212 } else {
213 Ok(ans)
214 }
215}
216
217pub mod sse2 {
218 use crate::isa::SSE2;
219 use crate::vector::{V128, V64};
220 use crate::SIMD128;
221
222 #[inline(always)]
223 #[must_use]
224 pub fn decode_nibbles(s: SSE2, x: V128) -> (V128, V128) {
225 let t1 = s.u8x16_add(x, s.u8x16_splat(0xff - b'9'));
229 let t2 = s.u8x16_sub_sat(t1, s.u8x16_splat(6));
230 let t3 = s.u8x16_sub(t2, s.u8x16_splat(0xf0));
231 let t4 = s.v128_and(x, s.u8x16_splat(0xdf));
232 let t5 = s.u8x16_sub(t4, s.u8x16_splat(0x41));
233 let t6 = s.u8x16_add_sat(t5, s.u8x16_splat(10));
234 let t7 = s.u8x16_min(t3, t6);
235 let t8 = s.u8x16_add_sat(t7, s.u8x16_splat(127 - 15));
236 (t7, t8)
237 }
238
239 #[inline(always)]
240 #[must_use]
241 pub fn merge_bits(s: SSE2, x: V128) -> V64 {
242 let lo = s.u16x8_shr::<8>(x);
243 let hi = s.u16x8_shl::<4>(x);
244 let t1 = s.v128_or(lo, hi);
245 let t2 = s.v128_and(t1, s.u16x8_splat(0x00ff));
246 let t3 = s.i16x8_packus(t2, s.v128_create_zero());
247 t3.to_v64x2().0
248 }
249
250 pub const LOWER_OFFSET: V128 = V128::from_bytes([0x27; 16]);
251 pub const UPPER_OFFSET: V128 = V128::from_bytes([0x07; 16]);
252
253 #[inline(always)]
254 #[must_use]
255 pub fn encode16(s: SSE2, x: V128, offset: V128) -> (V128, V128) {
256 let m = s.u8x16_splat(0x0f);
257 let hi = s.v128_and(s.u16x8_shr::<4>(x), m);
258 let lo = s.v128_and(x, m);
259
260 let c1 = s.u8x16_splat(0x30);
261 let h1 = s.u8x16_add(hi, c1);
262 let l1 = s.u8x16_add(lo, c1);
263
264 let c2 = s.u8x16_splat(0x39);
265 let h2 = s.v128_and(s.i8x16_lt(c2, h1), offset);
266 let l2 = s.v128_and(s.i8x16_lt(c2, l1), offset);
267
268 let h3 = s.u8x16_add(h1, h2);
269 let l3 = s.u8x16_add(l1, l2);
270
271 let y1 = s.u8x16_zip_lo(h3, l3);
272 let y2 = s.u8x16_zip_hi(h3, l3);
273
274 (y1, y2)
275 }
276}
277
278#[cfg(test)]
279mod algorithm {
280 use super::*;
281
282 #[test]
283 #[ignore]
284 fn check() {
285 fn is_hex_v1(c: u8) -> bool {
286 matches!(c, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')
287 }
288
289 fn is_hex_v2(c: u8) -> bool {
290 let x1 = c.wrapping_sub(0x30);
291 let x2 = (c & 0xdf).wrapping_sub(0x41);
292 x1 < 10 || x2 < 6
293 }
294
295 fn is_hex_v3(c: u8) -> bool {
296 let x1 = c.wrapping_sub(0x30 + 0x80);
297 let x2 = (c & 0xdf).wrapping_sub(0x41 + 0x80);
298 ((x1 as i8) < -118) || ((x2 as i8) < -122)
299 }
300
301 for c in 0..=255_u8 {
302 let (v1, v2, v3) = (is_hex_v1(c), is_hex_v2(c), is_hex_v3(c));
303 assert_eq!(v1, v2);
304 assert_eq!(v1, v3);
305 }
306 }
307
308 #[test]
309 #[ignore]
310 fn hex_alsw() {
311 HexAlsw::test_check();
312 HexAlsw::test_decode();
313 }
314}