Skip to main content

lz4_flex/block/
decompress.rs

1//! The block decompression algorithm.
2use crate::block::{DecompressError, MINMATCH};
3use crate::fastcpy_unsafe;
4use crate::sink::SliceSink;
5use crate::sink::{PtrSink, Sink};
6#[allow(unused_imports)]
7use alloc::vec::Vec;
8
9/// Copies data to output_ptr by self-referential copy from start and match_length
10#[inline]
11unsafe fn duplicate(
12    output_ptr: &mut *mut u8,
13    output_end: *mut u8,
14    start: *const u8,
15    match_length: usize,
16) {
17    // We cannot simply use memcpy or `extend_from_slice`, because these do not allow
18    // self-referential copies: http://ticki.github.io/img/lz4_runs_encoding_diagram.svg
19
20    // Considering that `wild_copy_match_16` can copy up to `16 - 1` extra bytes.
21    // Defer to `duplicate_overlapping` in case of an overlapping match
22    // OR the if the wild copy would copy beyond the end of the output.
23    if (output_ptr.offset_from(start) as usize) < match_length + 16 - 1
24        || (output_end.offset_from(*output_ptr) as usize) < match_length + 16 - 1
25    {
26        duplicate_overlapping(output_ptr, start, match_length);
27    } else {
28        debug_assert!(
29            output_ptr.add(match_length / 16 * 16 + ((match_length % 16) != 0) as usize * 16)
30                <= output_end
31        );
32        wild_copy_from_src_16(start, *output_ptr, match_length);
33        *output_ptr = output_ptr.add(match_length);
34    }
35}
36
37#[inline]
38fn wild_copy_from_src_16(mut source: *const u8, mut dst_ptr: *mut u8, num_items: usize) {
39    // Note: if the compiler auto-vectorizes this it'll hurt performance!
40    // It's not the case for 16 bytes stepsize, but for 8 bytes.
41    unsafe {
42        let dst_ptr_end = dst_ptr.add(num_items);
43        loop {
44            core::ptr::copy_nonoverlapping(source, dst_ptr, 16);
45            source = source.add(16);
46            dst_ptr = dst_ptr.add(16);
47            if dst_ptr >= dst_ptr_end {
48                break;
49            }
50        }
51    }
52}
53
54/// Copy function, if the data start + match_length overlaps into output_ptr
55#[inline]
56#[cfg_attr(feature = "nightly", optimize(size))] // to avoid loop unrolling
57unsafe fn duplicate_overlapping(
58    output_ptr: &mut *mut u8,
59    mut start: *const u8,
60    match_length: usize,
61) {
62    let dst_ptr_end = output_ptr.add(match_length);
63
64    while output_ptr.add(1) < dst_ptr_end {
65        // Note that this loop unrolling is done, so that the compiler doesn't do it in a awful
66        // way.
67        // Without that the compiler will unroll/auto-vectorize the copy with a lot of branches.
68        // This is not what we want, as large overlapping copies are not that common.
69        core::ptr::copy(start, *output_ptr, 1);
70        start = start.add(1);
71        *output_ptr = output_ptr.add(1);
72
73        core::ptr::copy(start, *output_ptr, 1);
74        start = start.add(1);
75        *output_ptr = output_ptr.add(1);
76    }
77
78    if *output_ptr < dst_ptr_end {
79        core::ptr::copy(start, *output_ptr, 1);
80        *output_ptr = output_ptr.add(1);
81    }
82}
83
84#[inline]
85unsafe fn copy_from_dict(
86    output_base: *mut u8,
87    output_ptr: &mut *mut u8,
88    ext_dict: &[u8],
89    offset: usize,
90    match_length: usize,
91) -> usize {
92    // If we're here we know offset > output pos, so we have at least 1 byte to copy from dict
93    debug_assert!(output_ptr.offset_from(output_base) >= 0);
94    debug_assert!(offset > output_ptr.offset_from(output_base) as usize);
95    // offset falls within ext_dict
96    debug_assert!(ext_dict.len() + output_ptr.offset_from(output_base) as usize >= offset);
97
98    let dict_offset = ext_dict.len() + output_ptr.offset_from(output_base) as usize - offset;
99    // Can't copy past ext_dict len, the match may cross dict and output
100    let dict_match_length = match_length.min(ext_dict.len() - dict_offset);
101    // TODO test fastcpy_unsafe
102    core::ptr::copy_nonoverlapping(
103        ext_dict.as_ptr().add(dict_offset),
104        *output_ptr,
105        dict_match_length,
106    );
107    *output_ptr = output_ptr.add(dict_match_length);
108    dict_match_length
109}
110
111/// Read an integer.
112///
113/// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In
114/// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add
115/// this byte to our sum and terminate the loop.
116///
117/// # Example
118///
119/// ```notest
120///     255, 255, 255, 4, 2, 3, 4, 6, 7
121/// ```
122///
123/// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because
124/// 4 is the first non-0xFF byte.
125#[inline]
126pub(super) fn read_integer_ptr(
127    input_ptr: &mut *const u8,
128    _input_ptr_end: *const u8,
129) -> Result<usize, DecompressError> {
130    // We start at zero and count upwards.
131    let mut n: usize = 0;
132    // If this byte takes value 255 (the maximum value it can take), another byte is read
133    // and added to the sum. This repeats until a byte lower than 255 is read.
134    loop {
135        // We add the next byte until we get a byte which we add to the counting variable.
136
137        // could be skipped with unchecked-decode
138        {
139            if *input_ptr >= _input_ptr_end {
140                return Err(DecompressError::ExpectedAnotherByte);
141            }
142        }
143        let extra = unsafe { input_ptr.read() };
144        *input_ptr = unsafe { input_ptr.add(1) };
145        n += extra as usize;
146
147        // We continue if we got 255, break otherwise.
148        if extra != 0xFF {
149            break;
150        }
151    }
152
153    // 255, 255, 255, 8
154    // 111, 111, 111, 101
155
156    Ok(n)
157}
158
159/// Read the match offset as a little-endian 16-bit integer from the input stream.
160#[inline]
161fn read_match_offset(input_ptr: &mut *const u8) -> Result<u16, DecompressError> {
162    let mut num: u16 = 0;
163    unsafe {
164        core::ptr::copy_nonoverlapping(*input_ptr, &mut num as *mut u16 as *mut u8, 2);
165        *input_ptr = input_ptr.add(2);
166    }
167
168    let offset = u16::from_le(num);
169    if offset == 0 {
170        Err(DecompressError::OffsetZero)
171    } else {
172        Ok(offset)
173    }
174}
175
176const FIT_TOKEN_MASK_LITERAL: u8 = 0b00001111;
177const FIT_TOKEN_MASK_MATCH: u8 = 0b11110000;
178
179#[test]
180fn check_token() {
181    assert!(!does_token_fit(15));
182    assert!(does_token_fit(14));
183    assert!(does_token_fit(114));
184    assert!(!does_token_fit(0b11110000));
185    assert!(does_token_fit(0b10110000));
186}
187
188/// The token consists of two parts, the literal length (upper 4 bits) and match_length (lower 4
189/// bits) if the literal length and match_length are both below 15, we don't need to read additional
190/// data, so the token does fit the metadata in a single u8.
191#[inline]
192fn does_token_fit(token: u8) -> bool {
193    !((token & FIT_TOKEN_MASK_LITERAL) == FIT_TOKEN_MASK_LITERAL
194        || (token & FIT_TOKEN_MASK_MATCH) == FIT_TOKEN_MASK_MATCH)
195}
196
197/// Decompress all bytes of `input` into `output`.
198///
199/// Returns the number of bytes written (decompressed) into `output`.
200#[inline]
201pub(crate) fn decompress_internal<const USE_DICT: bool, S: Sink>(
202    input: &[u8],
203    output: &mut S,
204    ext_dict: &[u8],
205) -> Result<usize, DecompressError> {
206    // Prevent segfault for empty input
207    if input.is_empty() {
208        return Err(DecompressError::ExpectedAnotherByte);
209    }
210
211    let ext_dict = if USE_DICT {
212        ext_dict
213    } else {
214        // ensure optimizer knows ext_dict length is 0 if !USE_DICT
215        debug_assert!(ext_dict.is_empty());
216        &[]
217    };
218    let output_base = unsafe { output.base_mut_ptr() };
219    let output_end = unsafe { output_base.add(output.capacity()) };
220    let output_start_pos_ptr = unsafe { output.base_mut_ptr().add(output.pos()) as *mut u8 };
221    let mut output_ptr = output_start_pos_ptr;
222
223    let mut input_ptr = input.as_ptr();
224    let input_ptr_end = unsafe { input.as_ptr().add(input.len()) };
225    let safe_distance_from_end =  (16 /* literal copy */ +  2 /* u16 match offset */ + 1 /* The next token to read (we can skip the check) */).min(input.len()) ;
226    let input_ptr_safe = unsafe { input_ptr_end.sub(safe_distance_from_end) };
227
228    let safe_output_ptr = unsafe {
229        let mut output_num_safe_bytes = output
230            .capacity()
231            .saturating_sub(16 /* literal copy */ + 18 /* match copy */);
232        if USE_DICT {
233            // In the dictionary case the output pointer is moved by the match length in the dictionary.
234            // This may be up to 17 bytes without exiting the loop. So we need to ensure that we have
235            // at least additional 17 bytes of space left in the output buffer in the fast loop.
236            output_num_safe_bytes = output_num_safe_bytes.saturating_sub(17);
237        };
238
239        output_base.add(output_num_safe_bytes)
240    };
241
242    // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer is
243    // empty.
244    loop {
245        // Read the token. The token is the first byte in a block. It is divided into two 4-bit
246        // subtokens, the higher and the lower.
247        // This token contains to 4-bit "fields", a higher and a lower, representing the literals'
248        // length and the back reference's length, respectively.
249        let token = unsafe { input_ptr.read() };
250        input_ptr = unsafe { input_ptr.add(1) };
251
252        // Checking for hot-loop.
253        // In most cases the metadata does fit in a single 1byte token (statistically) and we are in
254        // a safe-distance to the end. This enables some optimized handling.
255        //
256        // Ideally we want to check for safe output pos like: output.pos() <= safe_output_pos; But
257        // that doesn't work when the safe_output_ptr is == output_ptr due to insufficient
258        // capacity. So we use `<` instead of `<=`, which covers that case.
259        if does_token_fit(token)
260            && (input_ptr as usize) <= input_ptr_safe as usize
261            && output_ptr < safe_output_ptr
262        {
263            let literal_length = (token >> 4) as usize;
264            let mut match_length = MINMATCH + (token & 0xF) as usize;
265
266            // output_ptr <= safe_output_ptr should guarantee we have enough space in output
267            debug_assert!(
268                unsafe { output_ptr.add(literal_length + match_length) } <= output_end,
269                "{literal_length} + {match_length} {} wont fit ",
270                literal_length + match_length
271            );
272
273            // Copy the literal
274            // The literal is at max 16 bytes, and the is_safe_distance check assures
275            // that we are far away enough from the end so we can safely copy 16 bytes
276            unsafe {
277                core::ptr::copy_nonoverlapping(input_ptr, output_ptr, 16);
278                input_ptr = input_ptr.add(literal_length);
279                output_ptr = output_ptr.add(literal_length);
280            }
281
282            // input_ptr <= input_ptr_safe should guarantee we have enough space in input
283            debug_assert!(input_ptr_end as usize - input_ptr as usize >= 2);
284            let offset = read_match_offset(&mut input_ptr)? as usize;
285
286            let output_len = unsafe { output_ptr.offset_from(output_base) as usize };
287            if offset > output_len + ext_dict.len() {
288                return Err(DecompressError::OffsetOutOfBounds);
289            }
290
291            // Check if part of the match is in the external dict
292            if USE_DICT && offset > output_len {
293                let copied = unsafe {
294                    copy_from_dict(output_base, &mut output_ptr, ext_dict, offset, match_length)
295                };
296                if copied == match_length {
297                    continue;
298                }
299                // match crosses ext_dict and output
300                match_length -= copied;
301            }
302
303            // Calculate the start of this duplicate segment. At this point offset was already
304            // checked to be in bounds and the external dictionary copy, if any, was
305            // already copied and subtracted from match_length.
306            let start_ptr = unsafe { output_ptr.sub(offset) };
307            debug_assert!(start_ptr >= output_base);
308            debug_assert!(start_ptr < output_end);
309            debug_assert!(unsafe { output_end.offset_from(start_ptr) as usize } >= match_length);
310
311            // In this branch we know that match_length is at most 18 (14 + MINMATCH).
312            // But the blocks can overlap, so make sure they are at least 18 bytes apart
313            // to enable an optimized copy of 18 bytes.
314            if offset >= match_length {
315                unsafe {
316                    // _copy_, not copy_non_overlapping, as it may overlap.
317                    // Compiles to the same assembly on x68_64.
318                    core::ptr::copy(start_ptr, output_ptr, 18);
319                    output_ptr = output_ptr.add(match_length);
320                }
321            } else {
322                unsafe {
323                    duplicate_overlapping(&mut output_ptr, start_ptr, match_length);
324                }
325            }
326
327            continue;
328        }
329
330        // Now, we read the literals section.
331        // Literal Section
332        // If the initial value is 15, it is indicated that another byte will be read and added to
333        // it
334        let mut literal_length = (token >> 4) as usize;
335        if literal_length != 0 {
336            if literal_length == 15 {
337                // The literal_length length took the maximal value, indicating that there is more
338                // than 15 literal_length bytes. We read the extra integer.
339                literal_length += read_integer_ptr(&mut input_ptr, input_ptr_end)? as usize;
340            }
341
342            // could be skipped with unchecked-decode
343            {
344                // Check if literal is out of bounds for the input, and if there is enough space on
345                // the output
346                if literal_length > input_ptr_end as usize - input_ptr as usize {
347                    return Err(DecompressError::LiteralOutOfBounds);
348                }
349                if literal_length > unsafe { output_end.offset_from(output_ptr) as usize } {
350                    return Err(DecompressError::OutputTooSmall {
351                        expected: unsafe { output_ptr.offset_from(output_base) as usize }
352                            + literal_length,
353                        actual: output.capacity(),
354                    });
355                }
356            }
357            unsafe {
358                fastcpy_unsafe::slice_copy(input_ptr, output_ptr, literal_length);
359                output_ptr = output_ptr.add(literal_length);
360                input_ptr = input_ptr.add(literal_length);
361            }
362        }
363
364        // If the input stream is emptied, we break out of the loop. This is only the case
365        // in the end of the stream, since the block is intact otherwise.
366        if input_ptr >= input_ptr_end {
367            break;
368        }
369
370        // Read duplicate section
371        // could be skipped with unchecked-decode
372        {
373            if (input_ptr_end as usize) - (input_ptr as usize) < 2 {
374                return Err(DecompressError::ExpectedAnotherByte);
375            }
376        }
377        let offset = read_match_offset(&mut input_ptr)? as usize;
378        // Obtain the initial match length. The match length is the length of the duplicate segment
379        // which will later be copied from data previously decompressed into the output buffer. The
380        // initial length is derived from the second part of the token (the lower nibble), we read
381        // earlier. Since having a match length of less than 4 would mean negative compression
382        // ratio, we start at 4 (MINMATCH).
383
384        // The initial match length can maximally be 19 (MINMATCH + 15). As with the literal length,
385        // this indicates that there are more bytes to read.
386        let mut match_length = MINMATCH + (token & 0xF) as usize;
387        if match_length == MINMATCH + 15 {
388            // The match length took the maximal value, indicating that there is more bytes. We
389            // read the extra integer.
390            match_length += read_integer_ptr(&mut input_ptr, input_ptr_end)? as usize;
391        }
392
393        // We now copy from the already decompressed buffer. This allows us for storing duplicates
394        // by simply referencing the other location.
395        let output_len = unsafe { output_ptr.offset_from(output_base) as usize };
396
397        // could be skipped with unchecked-decode
398        {
399            if offset > output_len + ext_dict.len() {
400                return Err(DecompressError::OffsetOutOfBounds);
401            }
402            if match_length > unsafe { output_end.offset_from(output_ptr) as usize } {
403                return Err(DecompressError::OutputTooSmall {
404                    expected: output_len + match_length,
405                    actual: output.capacity(),
406                });
407            }
408        }
409
410        if USE_DICT && offset > output_len {
411            let copied = unsafe {
412                copy_from_dict(output_base, &mut output_ptr, ext_dict, offset, match_length)
413            };
414            if copied == match_length {
415                // could be skipped with unchecked-decode
416                {
417                    if input_ptr >= input_ptr_end {
418                        return Err(DecompressError::ExpectedAnotherByte);
419                    }
420                }
421
422                continue;
423            }
424            // match crosses ext_dict and output
425            match_length -= copied;
426        }
427
428        // Calculate the start of this duplicate segment. At this point offset was already checked
429        // to be in bounds and the external dictionary copy, if any, was already copied and
430        // subtracted from match_length.
431        let start_ptr = unsafe { output_ptr.sub(offset) };
432        debug_assert!(start_ptr >= output_base);
433        debug_assert!(start_ptr < output_end);
434        debug_assert!(unsafe { output_end.offset_from(start_ptr) as usize } >= match_length);
435        unsafe {
436            duplicate(&mut output_ptr, output_end, start_ptr, match_length);
437        }
438        // could be skipped with unchecked-decode
439        {
440            if input_ptr >= input_ptr_end {
441                return Err(DecompressError::ExpectedAnotherByte);
442            }
443        }
444    }
445    unsafe {
446        output.set_pos(output_ptr.offset_from(output_base) as usize);
447        Ok(output_ptr.offset_from(output_start_pos_ptr) as usize)
448    }
449}
450
451/// Decompress all bytes of `input` into `output`.
452/// `output` should be preallocated with a size of of the uncompressed data.
453#[inline]
454pub fn decompress_into(input: &[u8], output: &mut [u8]) -> Result<usize, DecompressError> {
455    decompress_internal::<false, _>(input, &mut SliceSink::new(output, 0), b"")
456}
457
458/// Decompress all bytes of `input` into `output`.
459///
460/// Returns the number of bytes written (decompressed) into `output`.
461#[inline]
462pub fn decompress_into_with_dict(
463    input: &[u8],
464    output: &mut [u8],
465    ext_dict: &[u8],
466) -> Result<usize, DecompressError> {
467    decompress_internal::<true, _>(input, &mut SliceSink::new(output, 0), ext_dict)
468}
469
470/// Decompress all bytes of `input` into a new vec.
471/// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
472///
473/// # Panics
474/// May panic if the parameter `min_uncompressed_size` is smaller than the
475/// uncompressed data.
476
477#[inline]
478pub fn decompress_with_dict(
479    input: &[u8],
480    min_uncompressed_size: usize,
481    ext_dict: &[u8],
482) -> Result<Vec<u8>, DecompressError> {
483    // Allocate a vector to contain the decompressed stream.
484    let mut vec = Vec::with_capacity(min_uncompressed_size);
485    let decomp_len =
486        decompress_internal::<true, _>(input, &mut PtrSink::from_vec(&mut vec, 0), ext_dict)?;
487    unsafe {
488        vec.set_len(decomp_len);
489    }
490    Ok(vec)
491}
492
493/// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
494/// little endian. Can be used in conjunction with `compress_prepend_size`
495#[inline]
496pub fn decompress_size_prepended(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
497    let (uncompressed_size, input) = super::uncompressed_size(input)?;
498    decompress(input, uncompressed_size)
499}
500
501/// Decompress all bytes of `input` into a new vec.
502/// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
503///
504/// # Panics
505/// May panic if the parameter `min_uncompressed_size` is smaller than the
506/// uncompressed data.
507#[inline]
508pub fn decompress(input: &[u8], min_uncompressed_size: usize) -> Result<Vec<u8>, DecompressError> {
509    // Allocate a vector to contain the decompressed stream.
510    let mut vec = Vec::with_capacity(min_uncompressed_size);
511    let decomp_len =
512        decompress_internal::<true, _>(input, &mut PtrSink::from_vec(&mut vec, 0), b"")?;
513    unsafe {
514        vec.set_len(decomp_len);
515    }
516    Ok(vec)
517}
518
519/// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
520/// little endian. Can be used in conjunction with `compress_prepend_size_with_dict`
521#[inline]
522pub fn decompress_size_prepended_with_dict(
523    input: &[u8],
524    ext_dict: &[u8],
525) -> Result<Vec<u8>, DecompressError> {
526    let (uncompressed_size, input) = super::uncompressed_size(input)?;
527    decompress_with_dict(input, uncompressed_size, ext_dict)
528}
529
530#[cfg(test)]
531mod test {
532    use super::*;
533
534    #[test]
535    fn all_literal() {
536        assert_eq!(decompress(&[0x30, b'a', b'4', b'9'], 3).unwrap(), b"a49");
537    }
538
539    #[test]
540    fn incomplete_input() {
541        assert!(matches!(
542            decompress(&[], 255),
543            Err(DecompressError::ExpectedAnotherByte)
544        ));
545        assert!(matches!(
546            // incomplete literal len
547            decompress(&[0xF0], 255),
548            Err(DecompressError::ExpectedAnotherByte)
549        ));
550        assert!(matches!(
551            // incomplete match offset
552            decompress(&[0x0F, 0], 255),
553            Err(DecompressError::ExpectedAnotherByte)
554        ));
555        assert!(matches!(
556            // incomplete match len
557            decompress(&[0x0F, 1, 0], 255),
558            Err(DecompressError::ExpectedAnotherByte)
559        ));
560    }
561
562    // this error test is only valid in safe-decode.
563    #[test]
564    fn offset_oob() {
565        // incomplete literal
566        assert!(matches!(
567            decompress(&[0x40, b'a', 1, 0], 4),
568            Err(DecompressError::LiteralOutOfBounds)
569        ));
570        // literal too large for output
571        assert!(matches!(
572            decompress(&[0x20, b'a', b'a', 1, 0], 1),
573            Err(DecompressError::OutputTooSmall {
574                expected: 2,
575                actual: 1
576            })
577        ));
578        // match too large for output
579        assert!(matches!(
580            decompress(&[0x10, b'a', 1, 0], 4),
581            Err(DecompressError::OutputTooSmall {
582                expected: 5,
583                actual: 4
584            })
585        ));
586
587        // out-of-bounds hot-loop
588        assert!(matches!(
589            decompress(
590                &[0x0E, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
591                256
592            ),
593            Err(DecompressError::OffsetOutOfBounds)
594        ));
595        // out-of-bounds for dict
596        assert!(matches!(
597            decompress_with_dict(
598                &[0x0E, 255, 0, 0x70, 0, 0, 0, 0, 0, 0, 0],
599                256,
600                &[0_u8; 250]
601            ),
602            Err(DecompressError::OffsetOutOfBounds)
603        ));
604        // out-of-bounds non-hot-loop overlapping
605        assert!(matches!(
606            decompress(&[0x0F, 1, 0, 1, 0x70, 0, 0, 0, 0, 0, 0, 0], 256),
607            Err(DecompressError::OffsetOutOfBounds)
608        ));
609        // out-of-bounds non-hot-loop non-overlapping
610        assert!(matches!(
611            decompress(&[0x40, 0, 0, 0, 0, 255, 0, 0x70, 0, 0, 0, 0, 0, 0, 0], 256),
612            Err(DecompressError::OffsetOutOfBounds)
613        ));
614    }
615
616    #[test]
617    fn offset_0() {
618        assert!(matches!(
619            decompress(&[0x0E, 0, 0, 0x70, 0, 0, 0, 0, 0, 0, 0], 256),
620            Err(DecompressError::OffsetZero)
621        ));
622    }
623}