lz4_flex/frame/
header.rs

1use twox_hash::XxHash32;
2
3use super::Error;
4use std::{
5    fmt::Debug,
6    hash::Hasher,
7    io,
8    io::{Read, Write},
9};
10
11const FLG_RESERVED_MASK: u8 = 0b00000010;
12const FLG_VERSION_MASK: u8 = 0b11000000;
13const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000;
14
15const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000;
16const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000;
17const FLG_CONTENT_SIZE: u8 = 0b00001000;
18const FLG_CONTENT_CHECKSUM: u8 = 0b00000100;
19const FLG_DICTIONARY_ID: u8 = 0b00000001;
20
21const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK;
22const BD_BLOCK_SIZE_MASK: u8 = 0b01110000;
23const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4;
24
25const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000;
26
27const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204;
28pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102;
29const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive<u32> = 0x184D2A50..=0x184D2A5F;
30
31pub(crate) const MAGIC_NUMBER_SIZE: usize = 4;
32pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7;
33pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19;
34pub(crate) const BLOCK_INFO_SIZE: usize = 4;
35
36#[derive(Clone, Copy, PartialEq, Debug)]
37/// Different predefines blocksizes to choose when compressing data.
38#[derive(Default)]
39pub enum BlockSize {
40    /// Will detect optimal frame size based on the size of the first write call
41    #[default]
42    Auto = 0,
43    /// The default block size.
44    Max64KB = 4,
45    /// 256KB block size.
46    Max256KB = 5,
47    /// 1MB block size.
48    Max1MB = 6,
49    /// 4MB block size.
50    Max4MB = 7,
51    /// 8MB block size.
52    Max8MB = 8,
53}
54
55impl BlockSize {
56    /// Try to find optimal size based on passed buffer length.
57    pub(crate) fn from_buf_length(buf_len: usize) -> Self {
58        let mut blocksize = BlockSize::Max4MB;
59
60        for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] {
61            if buf_len > candidate.get_size() {
62                return blocksize;
63            }
64            blocksize = candidate;
65        }
66        BlockSize::Max64KB
67    }
68    pub(crate) fn get_size(&self) -> usize {
69        match self {
70            BlockSize::Auto => unreachable!(),
71            BlockSize::Max64KB => 64 * 1024,
72            BlockSize::Max256KB => 256 * 1024,
73            BlockSize::Max1MB => 1024 * 1024,
74            BlockSize::Max4MB => 4 * 1024 * 1024,
75            BlockSize::Max8MB => 8 * 1024 * 1024,
76        }
77    }
78}
79
80#[derive(Clone, Copy, PartialEq, Debug)]
81/// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo]
82#[derive(Default)]
83pub enum BlockMode {
84    /// Every block is compressed independently. The default.
85    #[default]
86    Independent,
87    /// Blocks can reference data from previous blocks.
88    ///
89    /// Effective when the stream contains small blocks.
90    Linked,
91}
92
93// From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
94//
95// General Structure of LZ4 Frame format
96// -------------------------------------
97//
98// | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum |
99// |:-------:|:-------------:| ----- | ----- | ------- | ----------- |
100// | 4 bytes |  3-15 bytes   |       |       | 4 bytes | 0-4 bytes   |
101//
102// Frame Descriptor
103// ----------------
104//
105// | FLG     | BD      | (Content Size) | (Dictionary ID) | HC      |
106// | ------- | ------- |:--------------:|:---------------:| ------- |
107// | 1 byte  | 1 byte  |  0 - 8 bytes   |   0 - 4 bytes   | 1 byte  |
108//
109// __FLG byte__
110//
111// |  BitNb  |  7-6  |   5   |    4     |  3   |    2     |    1     |   0  |
112// | ------- |-------|-------|----------|------|----------|----------|------|
113// |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID|
114//
115// __BD byte__
116//
117// |  BitNb  |     7    |     6-5-4     |  3-2-1-0 |
118// | ------- | -------- | ------------- | -------- |
119// |FieldName|*Reserved*| Block MaxSize |*Reserved*|
120//
121// Data Blocks
122// -----------
123//
124// | Block Size |  data  | (Block Checksum) |
125// |:----------:| ------ |:----------------:|
126// |  4 bytes   |        |   0 - 4 bytes    |
127//
128#[derive(Debug, Default, Clone)]
129/// The metadata for de/compressing with lz4 frame format.
130pub struct FrameInfo {
131    /// If set, includes the total uncompressed size of data in the frame.
132    pub content_size: Option<u64>,
133    /// The identifier for the dictionary that must be used to correctly decode data.
134    /// The compressor and the decompressor must use exactly the same dictionary.
135    ///
136    /// Note that this is currently unsupported and for this reason it's not pub.
137    pub(crate) dict_id: Option<u32>,
138    /// The maximum uncompressed size of each data block.
139    pub block_size: BlockSize,
140    /// The block mode.
141    pub block_mode: BlockMode,
142    /// If set, includes a checksum for each data block in the frame.
143    pub block_checksums: bool,
144    /// If set, includes a content checksum to verify that the full frame contents have been
145    /// decoded correctly.
146    pub content_checksum: bool,
147    /// If set, use the legacy frame format
148    pub legacy_frame: bool,
149}
150
151impl FrameInfo {
152    /// Create a new `FrameInfo`.
153    pub fn new() -> Self {
154        Self::default()
155    }
156
157    /// Whether to include the total uncompressed size of data in the frame.
158    pub fn content_size(mut self, content_size: Option<u64>) -> Self {
159        self.content_size = content_size;
160        self
161    }
162
163    /// The maximum uncompressed size of each data block.
164    pub fn block_size(mut self, block_size: BlockSize) -> Self {
165        self.block_size = block_size;
166        self
167    }
168
169    /// The block mode.
170    pub fn block_mode(mut self, block_mode: BlockMode) -> Self {
171        self.block_mode = block_mode;
172        self
173    }
174
175    /// If set, includes a checksum for each data block in the frame.
176    pub fn block_checksums(mut self, block_checksums: bool) -> Self {
177        self.block_checksums = block_checksums;
178        self
179    }
180
181    /// If set, includes a content checksum to verify that the full frame contents have been
182    /// decoded correctly.
183    pub fn content_checksum(mut self, content_checksum: bool) -> Self {
184        self.content_checksum = content_checksum;
185        self
186    }
187
188    /// If set, use the legacy frame format.
189    pub fn legacy_frame(mut self, legacy_frame: bool) -> Self {
190        self.legacy_frame = legacy_frame;
191        self
192    }
193
194    pub(crate) fn read_size(input: &[u8]) -> Result<usize, Error> {
195        let mut required = MIN_FRAME_INFO_SIZE;
196        let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap());
197        if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
198            return Ok(MAGIC_NUMBER_SIZE);
199        }
200
201        if input.len() < required {
202            return Ok(required);
203        }
204
205        if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
206            return Ok(8);
207        }
208        if magic_num != LZ4F_MAGIC_NUMBER {
209            return Err(Error::WrongMagicNumber);
210        }
211
212        if input[4] & FLG_CONTENT_SIZE != 0 {
213            required += 8;
214        }
215        if input[4] & FLG_DICTIONARY_ID != 0 {
216            required += 4
217        }
218        Ok(required)
219    }
220
221    pub(crate) fn write_size(&self) -> usize {
222        let mut required = MIN_FRAME_INFO_SIZE;
223        if self.content_size.is_some() {
224            required += 8;
225        }
226        if self.dict_id.is_some() {
227            required += 4;
228        }
229        required
230    }
231
232    pub(crate) fn write(&self, output: &mut [u8]) -> Result<usize, Error> {
233        let write_size = self.write_size();
234        if output.len() < write_size {
235            return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into()));
236        }
237        let mut buffer = [0u8; MAX_FRAME_INFO_SIZE];
238        assert!(write_size <= buffer.len());
239        buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes());
240        buffer[4] = FLG_SUPPORTED_VERSION_BITS;
241        if self.block_checksums {
242            buffer[4] |= FLG_BLOCK_CHECKSUMS;
243        }
244        if self.content_checksum {
245            buffer[4] |= FLG_CONTENT_CHECKSUM;
246        }
247        if self.block_mode == BlockMode::Independent {
248            buffer[4] |= FLG_INDEPENDENT_BLOCKS;
249        }
250        buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT;
251
252        // Optional section
253        let mut offset = 6;
254        if let Some(size) = self.content_size {
255            buffer[4] |= FLG_CONTENT_SIZE;
256            buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes());
257            offset += 8;
258        }
259        if let Some(dict_id) = self.dict_id {
260            buffer[4] |= FLG_DICTIONARY_ID;
261            buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes());
262            offset += 4;
263        }
264
265        // Header checksum
266        let mut hasher = XxHash32::with_seed(0);
267        hasher.write(&buffer[4..offset]);
268        let header_checksum = (hasher.finish() >> 8) as u8;
269        buffer[offset] = header_checksum;
270        offset += 1;
271
272        debug_assert_eq!(offset, write_size);
273        output[..write_size].copy_from_slice(&buffer[..write_size]);
274        Ok(write_size)
275    }
276
277    pub(crate) fn read(mut input: &[u8]) -> Result<FrameInfo, Error> {
278        let original_input = input;
279        // 4 byte Magic
280        let magic_num = {
281            let mut buffer = [0u8; 4];
282            input.read_exact(&mut buffer)?;
283            u32::from_le_bytes(buffer)
284        };
285        if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
286            return Ok(FrameInfo {
287                block_size: BlockSize::Max8MB,
288                legacy_frame: true,
289                ..FrameInfo::default()
290            });
291        }
292        if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
293            let mut buffer = [0u8; 4];
294            input.read_exact(&mut buffer)?;
295            let user_data_len = u32::from_le_bytes(buffer);
296            return Err(Error::SkippableFrame(user_data_len));
297        }
298        if magic_num != LZ4F_MAGIC_NUMBER {
299            return Err(Error::WrongMagicNumber);
300        }
301
302        // fixed size section
303        let [flg_byte, bd_byte] = {
304            let mut buffer = [0u8, 0];
305            input.read_exact(&mut buffer)?;
306            buffer
307        };
308
309        if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS {
310            // version is always 01
311            return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK));
312        }
313
314        if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 {
315            return Err(Error::ReservedBitsSet);
316        }
317
318        let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 {
319            BlockMode::Independent
320        } else {
321            BlockMode::Linked
322        };
323        let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0;
324        let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0;
325
326        let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT {
327            i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)),
328            4 => BlockSize::Max64KB,
329            5 => BlockSize::Max256KB,
330            6 => BlockSize::Max1MB,
331            7 => BlockSize::Max4MB,
332            _ => unreachable!(),
333        };
334
335        // var len section
336        let mut content_size = None;
337        if flg_byte & FLG_CONTENT_SIZE != 0 {
338            let mut buffer = [0u8; 8];
339            input.read_exact(&mut buffer).unwrap();
340            content_size = Some(u64::from_le_bytes(buffer));
341        }
342
343        let mut dict_id = None;
344        if flg_byte & FLG_DICTIONARY_ID != 0 {
345            let mut buffer = [0u8; 4];
346            input.read_exact(&mut buffer)?;
347            dict_id = Some(u32::from_le_bytes(buffer));
348        }
349
350        // 1 byte header checksum
351        let expected_checksum = {
352            let mut buffer = [0u8; 1];
353            input.read_exact(&mut buffer)?;
354            buffer[0]
355        };
356        let mut hasher = XxHash32::with_seed(0);
357        hasher.write(&original_input[4..original_input.len() - input.len() - 1]);
358        let header_hash = (hasher.finish() >> 8) as u8;
359        if header_hash != expected_checksum {
360            return Err(Error::HeaderChecksumError);
361        }
362
363        Ok(FrameInfo {
364            content_size,
365            dict_id,
366            block_size,
367            block_mode,
368            block_checksums,
369            content_checksum,
370            legacy_frame: false,
371        })
372    }
373}
374
375#[derive(Debug)]
376pub(crate) enum BlockInfo {
377    Compressed(u32),
378    Uncompressed(u32),
379    EndMark,
380}
381
382impl BlockInfo {
383    pub(crate) fn read(mut input: &[u8]) -> Result<Self, Error> {
384        let mut size_buffer = [0u8; 4];
385        input.read_exact(&mut size_buffer)?;
386        let size = u32::from_le_bytes(size_buffer);
387        if size == 0 {
388            Ok(BlockInfo::EndMark)
389        } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 {
390            Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT))
391        } else {
392            Ok(BlockInfo::Compressed(size))
393        }
394    }
395
396    pub(crate) fn write(&self, mut output: &mut [u8]) -> Result<usize, Error> {
397        let value = match self {
398            BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo),
399            BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len)
400                if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 =>
401            {
402                return Err(Error::InvalidBlockInfo)
403            }
404            BlockInfo::Compressed(len) => *len,
405            BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT,
406            BlockInfo::EndMark => 0,
407        };
408        output.write_all(&value.to_le_bytes())?;
409        Ok(4)
410    }
411}