crc32c/
hw_x86_64.rs
1use crate::hw_tables;
4use crate::util::{self, U64Le};
5use std::arch::x86_64 as simd;
6
7pub unsafe fn crc32c(crci: u32, buffer: &[u8]) -> u32 {
9 let mut crc0 = u64::from(!crci);
10
11 let (begin, middle, end) = util::split(buffer);
12
13 crc0 = crc_u8(crc0, begin);
15
16 let chunk_size = (hw_tables::LONG * 3) / 8;
22 let last_chunk = middle.len() / chunk_size * chunk_size;
23
24 let (middle_first, middle_last) = middle.split_at(last_chunk);
25
26 crc0 = crc_u64_parallel3(crc0, chunk_size, &hw_tables::LONG_TABLE, middle_first);
27
28 let chunk_size = (hw_tables::SHORT * 3) / 8;
30 let last_chunk = middle_last.len() / chunk_size * chunk_size;
31
32 let (middle_last_first, middle_last_last) = middle_last.split_at(last_chunk);
33
34 crc0 = crc_u64_parallel3(crc0, chunk_size, &hw_tables::SHORT_TABLE, middle_last_first);
35
36 crc0 = crc_u64(crc0, middle_last_last);
38
39 crc0 = crc_u8(crc0, end);
41
42 !(crc0 as u32)
43}
44
45#[inline]
46#[target_feature(enable = "sse4.2")]
47unsafe fn crc_u8_append(crc: u64, next: u8) -> u64 {
48 u64::from(self::simd::_mm_crc32_u8(crc as u32, next))
49}
50
51#[inline]
52#[target_feature(enable = "sse4.2")]
53unsafe fn crc_u64_append(crc: u64, next: u64) -> u64 {
54 self::simd::_mm_crc32_u64(crc, next)
55}
56
57#[inline]
58unsafe fn crc_u8(crc: u64, buffer: &[u8]) -> u64 {
59 buffer
60 .iter()
61 .fold(crc, |crc, &next| crc_u8_append(crc, next))
62}
63
64#[inline]
65unsafe fn crc_u64(crc: u64, buffer: &[U64Le]) -> u64 {
66 buffer
67 .iter()
68 .fold(crc, |crc, &next| crc_u64_append(crc, next.get()))
69}
70
71#[inline]
78unsafe fn crc_u64_parallel3(
79 crc: u64,
80 chunk_size: usize,
81 table: &hw_tables::CrcTable,
82 buffer: &[U64Le],
83) -> u64 {
84 buffer.chunks(chunk_size).fold(crc, |mut crc0, chunk| {
85 let mut crc1 = 0;
86 let mut crc2 = 0;
87
88 let block_size = chunk_size / 3;
90
91 let mut blocks = chunk.chunks(block_size);
92 let a = blocks.next().unwrap();
93 let b = blocks.next().unwrap();
94 let c = blocks.next().unwrap();
95
96 for i in 0..block_size {
97 crc0 = crc_u64_append(crc0, a[i].get());
98 crc1 = crc_u64_append(crc1, b[i].get());
99 crc2 = crc_u64_append(crc2, c[i].get());
100 }
101
102 crc0 = table.shift_u64(crc0) ^ crc1;
103 crc0 = table.shift_u64(crc0) ^ crc2;
104
105 crc0
106 })
107}