lz4_flex/block/decompress.rs
1//! The block decompression algorithm.
2use crate::block::{DecompressError, MINMATCH};
3use crate::fastcpy_unsafe;
4use crate::sink::SliceSink;
5use crate::sink::{PtrSink, Sink};
6#[allow(unused_imports)]
7use alloc::vec::Vec;
8
9/// Copies data to output_ptr by self-referential copy from start and match_length
10#[inline]
11unsafe fn duplicate(
12 output_ptr: &mut *mut u8,
13 output_end: *mut u8,
14 start: *const u8,
15 match_length: usize,
16) {
17 // We cannot simply use memcpy or `extend_from_slice`, because these do not allow
18 // self-referential copies: http://ticki.github.io/img/lz4_runs_encoding_diagram.svg
19
20 // Considering that `wild_copy_match_16` can copy up to `16 - 1` extra bytes.
21 // Defer to `duplicate_overlapping` in case of an overlapping match
22 // OR the if the wild copy would copy beyond the end of the output.
23 if (output_ptr.offset_from(start) as usize) < match_length + 16 - 1
24 || (output_end.offset_from(*output_ptr) as usize) < match_length + 16 - 1
25 {
26 duplicate_overlapping(output_ptr, start, match_length);
27 } else {
28 debug_assert!(
29 output_ptr.add(match_length / 16 * 16 + ((match_length % 16) != 0) as usize * 16)
30 <= output_end
31 );
32 wild_copy_from_src_16(start, *output_ptr, match_length);
33 *output_ptr = output_ptr.add(match_length);
34 }
35}
36
37#[inline]
38fn wild_copy_from_src_16(mut source: *const u8, mut dst_ptr: *mut u8, num_items: usize) {
39 // Note: if the compiler auto-vectorizes this it'll hurt performance!
40 // It's not the case for 16 bytes stepsize, but for 8 bytes.
41 unsafe {
42 let dst_ptr_end = dst_ptr.add(num_items);
43 loop {
44 core::ptr::copy_nonoverlapping(source, dst_ptr, 16);
45 source = source.add(16);
46 dst_ptr = dst_ptr.add(16);
47 if dst_ptr >= dst_ptr_end {
48 break;
49 }
50 }
51 }
52}
53
54/// Copy function, if the data start + match_length overlaps into output_ptr
55#[inline]
56#[cfg_attr(nightly, optimize(size))] // to avoid loop unrolling
57unsafe fn duplicate_overlapping(
58 output_ptr: &mut *mut u8,
59 mut start: *const u8,
60 match_length: usize,
61) {
62 // There is an edge case when output_ptr == start, which causes the decoder to potentially
63 // expose up to match_length bytes of uninitialized data in the decompression buffer.
64 // To prevent that we write a dummy zero to output, which will zero out output in such cases.
65 // This is the same strategy used by the reference C implementation https://github.com/lz4/lz4/pull/772
66 output_ptr.write(0u8);
67 let dst_ptr_end = output_ptr.add(match_length);
68
69 while output_ptr.add(1) < dst_ptr_end {
70 // Note that this loop unrolling is done, so that the compiler doesn't do it in a awful
71 // way.
72 // Without that the compiler will unroll/auto-vectorize the copy with a lot of branches.
73 // This is not what we want, as large overlapping copies are not that common.
74 core::ptr::copy(start, *output_ptr, 1);
75 start = start.add(1);
76 *output_ptr = output_ptr.add(1);
77
78 core::ptr::copy(start, *output_ptr, 1);
79 start = start.add(1);
80 *output_ptr = output_ptr.add(1);
81 }
82
83 if *output_ptr < dst_ptr_end {
84 core::ptr::copy(start, *output_ptr, 1);
85 *output_ptr = output_ptr.add(1);
86 }
87}
88
89#[inline]
90unsafe fn copy_from_dict(
91 output_base: *mut u8,
92 output_ptr: &mut *mut u8,
93 ext_dict: &[u8],
94 offset: usize,
95 match_length: usize,
96) -> usize {
97 // If we're here we know offset > output pos, so we have at least 1 byte to copy from dict
98 debug_assert!(output_ptr.offset_from(output_base) >= 0);
99 debug_assert!(offset > output_ptr.offset_from(output_base) as usize);
100 // If unchecked-decode is not disabled we also know that the offset falls within ext_dict
101 debug_assert!(ext_dict.len() + output_ptr.offset_from(output_base) as usize >= offset);
102
103 let dict_offset = ext_dict.len() + output_ptr.offset_from(output_base) as usize - offset;
104 // Can't copy past ext_dict len, the match may cross dict and output
105 let dict_match_length = match_length.min(ext_dict.len() - dict_offset);
106 // TODO test fastcpy_unsafe
107 core::ptr::copy_nonoverlapping(
108 ext_dict.as_ptr().add(dict_offset),
109 *output_ptr,
110 dict_match_length,
111 );
112 *output_ptr = output_ptr.add(dict_match_length);
113 dict_match_length
114}
115
116/// Read an integer.
117///
118/// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In
119/// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add
120/// this byte to our sum and terminate the loop.
121///
122/// # Example
123///
124/// ```notest
125/// 255, 255, 255, 4, 2, 3, 4, 6, 7
126/// ```
127///
128/// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because
129/// 4 is the first non-0xFF byte.
130#[inline]
131fn read_integer_ptr(
132 input_ptr: &mut *const u8,
133 _input_ptr_end: *const u8,
134) -> Result<u32, DecompressError> {
135 // We start at zero and count upwards.
136 let mut n: u32 = 0;
137 // If this byte takes value 255 (the maximum value it can take), another byte is read
138 // and added to the sum. This repeats until a byte lower than 255 is read.
139 loop {
140 // We add the next byte until we get a byte which we add to the counting variable.
141
142 #[cfg(not(feature = "unchecked-decode"))]
143 {
144 if *input_ptr >= _input_ptr_end {
145 return Err(DecompressError::ExpectedAnotherByte);
146 }
147 }
148 let extra = unsafe { input_ptr.read() };
149 *input_ptr = unsafe { input_ptr.add(1) };
150 n += extra as u32;
151
152 // We continue if we got 255, break otherwise.
153 if extra != 0xFF {
154 break;
155 }
156 }
157
158 // 255, 255, 255, 8
159 // 111, 111, 111, 101
160
161 Ok(n)
162}
163
164/// Read a little-endian 16-bit integer from the input stream.
165#[inline]
166fn read_u16_ptr(input_ptr: &mut *const u8) -> u16 {
167 let mut num: u16 = 0;
168 unsafe {
169 core::ptr::copy_nonoverlapping(*input_ptr, &mut num as *mut u16 as *mut u8, 2);
170 *input_ptr = input_ptr.add(2);
171 }
172
173 u16::from_le(num)
174}
175
176const FIT_TOKEN_MASK_LITERAL: u8 = 0b00001111;
177const FIT_TOKEN_MASK_MATCH: u8 = 0b11110000;
178
179#[test]
180fn check_token() {
181 assert!(!does_token_fit(15));
182 assert!(does_token_fit(14));
183 assert!(does_token_fit(114));
184 assert!(!does_token_fit(0b11110000));
185 assert!(does_token_fit(0b10110000));
186}
187
188/// The token consists of two parts, the literal length (upper 4 bits) and match_length (lower 4
189/// bits) if the literal length and match_length are both below 15, we don't need to read additional
190/// data, so the token does fit the metadata in a single u8.
191#[inline]
192fn does_token_fit(token: u8) -> bool {
193 !((token & FIT_TOKEN_MASK_LITERAL) == FIT_TOKEN_MASK_LITERAL
194 || (token & FIT_TOKEN_MASK_MATCH) == FIT_TOKEN_MASK_MATCH)
195}
196
197/// Decompress all bytes of `input` into `output`.
198///
199/// Returns the number of bytes written (decompressed) into `output`.
200#[inline]
201pub(crate) fn decompress_internal<const USE_DICT: bool, S: Sink>(
202 input: &[u8],
203 output: &mut S,
204 ext_dict: &[u8],
205) -> Result<usize, DecompressError> {
206 // Prevent segfault for empty input
207 if input.is_empty() {
208 return Err(DecompressError::ExpectedAnotherByte);
209 }
210
211 let ext_dict = if USE_DICT {
212 ext_dict
213 } else {
214 // ensure optimizer knows ext_dict length is 0 if !USE_DICT
215 debug_assert!(ext_dict.is_empty());
216 &[]
217 };
218 let output_base = unsafe { output.base_mut_ptr() };
219 let output_end = unsafe { output_base.add(output.capacity()) };
220 let output_start_pos_ptr = unsafe { output.base_mut_ptr().add(output.pos()) as *mut u8 };
221 let mut output_ptr = output_start_pos_ptr;
222
223 let mut input_ptr = input.as_ptr();
224 let input_ptr_end = unsafe { input.as_ptr().add(input.len()) };
225 let safe_distance_from_end = (16 /* literal copy */ + 2 /* u16 match offset */ + 1 /* The next token to read (we can skip the check) */).min(input.len()) ;
226 let input_ptr_safe = unsafe { input_ptr_end.sub(safe_distance_from_end) };
227
228 let safe_output_ptr = unsafe {
229 let mut output_num_safe_bytes = output
230 .capacity()
231 .saturating_sub(16 /* literal copy */ + 18 /* match copy */);
232 if USE_DICT {
233 // In the dictionary case the output pointer is moved by the match length in the dictionary.
234 // This may be up to 17 bytes without exiting the loop. So we need to ensure that we have
235 // at least additional 17 bytes of space left in the output buffer in the fast loop.
236 output_num_safe_bytes = output_num_safe_bytes.saturating_sub(17);
237 };
238
239 output_base.add(output_num_safe_bytes)
240 };
241
242 // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer is
243 // empty.
244 loop {
245 // Read the token. The token is the first byte in a block. It is divided into two 4-bit
246 // subtokens, the higher and the lower.
247 // This token contains to 4-bit "fields", a higher and a lower, representing the literals'
248 // length and the back reference's length, respectively.
249 let token = unsafe { input_ptr.read() };
250 input_ptr = unsafe { input_ptr.add(1) };
251
252 // Checking for hot-loop.
253 // In most cases the metadata does fit in a single 1byte token (statistically) and we are in
254 // a safe-distance to the end. This enables some optimized handling.
255 //
256 // Ideally we want to check for safe output pos like: output.pos() <= safe_output_pos; But
257 // that doesn't work when the safe_output_ptr is == output_ptr due to insufficient
258 // capacity. So we use `<` instead of `<=`, which covers that case.
259 if does_token_fit(token)
260 && (input_ptr as usize) <= input_ptr_safe as usize
261 && output_ptr < safe_output_ptr
262 {
263 let literal_length = (token >> 4) as usize;
264 let mut match_length = MINMATCH + (token & 0xF) as usize;
265
266 // output_ptr <= safe_output_ptr should guarantee we have enough space in output
267 debug_assert!(
268 unsafe { output_ptr.add(literal_length + match_length) } <= output_end,
269 "{literal_length} + {match_length} {} wont fit ",
270 literal_length + match_length
271 );
272
273 // Copy the literal
274 // The literal is at max 16 bytes, and the is_safe_distance check assures
275 // that we are far away enough from the end so we can safely copy 16 bytes
276 unsafe {
277 core::ptr::copy_nonoverlapping(input_ptr, output_ptr, 16);
278 input_ptr = input_ptr.add(literal_length);
279 output_ptr = output_ptr.add(literal_length);
280 }
281
282 // input_ptr <= input_ptr_safe should guarantee we have enough space in input
283 debug_assert!(input_ptr_end as usize - input_ptr as usize >= 2);
284 let offset = read_u16_ptr(&mut input_ptr) as usize;
285
286 let output_len = unsafe { output_ptr.offset_from(output_base) as usize };
287 let offset = offset.min(output_len + ext_dict.len());
288
289 // Check if part of the match is in the external dict
290 if USE_DICT && offset > output_len {
291 let copied = unsafe {
292 copy_from_dict(output_base, &mut output_ptr, ext_dict, offset, match_length)
293 };
294 if copied == match_length {
295 continue;
296 }
297 // match crosses ext_dict and output
298 match_length -= copied;
299 }
300
301 // Calculate the start of this duplicate segment. At this point offset was already
302 // checked to be in bounds and the external dictionary copy, if any, was
303 // already copied and subtracted from match_length.
304 let start_ptr = unsafe { output_ptr.sub(offset) };
305 debug_assert!(start_ptr >= output_base);
306 debug_assert!(start_ptr < output_end);
307 debug_assert!(unsafe { output_end.offset_from(start_ptr) as usize } >= match_length);
308
309 // In this branch we know that match_length is at most 18 (14 + MINMATCH).
310 // But the blocks can overlap, so make sure they are at least 18 bytes apart
311 // to enable an optimized copy of 18 bytes.
312 if offset >= match_length {
313 unsafe {
314 // _copy_, not copy_non_overlaping, as it may overlap.
315 // Compiles to the same assembly on x68_64.
316 core::ptr::copy(start_ptr, output_ptr, 18);
317 output_ptr = output_ptr.add(match_length);
318 }
319 } else {
320 unsafe {
321 duplicate_overlapping(&mut output_ptr, start_ptr, match_length);
322 }
323 }
324
325 continue;
326 }
327
328 // Now, we read the literals section.
329 // Literal Section
330 // If the initial value is 15, it is indicated that another byte will be read and added to
331 // it
332 let mut literal_length = (token >> 4) as usize;
333 if literal_length != 0 {
334 if literal_length == 15 {
335 // The literal_length length took the maximal value, indicating that there is more
336 // than 15 literal_length bytes. We read the extra integer.
337 literal_length += read_integer_ptr(&mut input_ptr, input_ptr_end)? as usize;
338 }
339
340 #[cfg(not(feature = "unchecked-decode"))]
341 {
342 // Check if literal is out of bounds for the input, and if there is enough space on
343 // the output
344 if literal_length > input_ptr_end as usize - input_ptr as usize {
345 return Err(DecompressError::LiteralOutOfBounds);
346 }
347 if literal_length > unsafe { output_end.offset_from(output_ptr) as usize } {
348 return Err(DecompressError::OutputTooSmall {
349 expected: unsafe { output_ptr.offset_from(output_base) as usize }
350 + literal_length,
351 actual: output.capacity(),
352 });
353 }
354 }
355 unsafe {
356 fastcpy_unsafe::slice_copy(input_ptr, output_ptr, literal_length);
357 output_ptr = output_ptr.add(literal_length);
358 input_ptr = input_ptr.add(literal_length);
359 }
360 }
361
362 // If the input stream is emptied, we break out of the loop. This is only the case
363 // in the end of the stream, since the block is intact otherwise.
364 if input_ptr >= input_ptr_end {
365 break;
366 }
367
368 // Read duplicate section
369 #[cfg(not(feature = "unchecked-decode"))]
370 {
371 if (input_ptr_end as usize) - (input_ptr as usize) < 2 {
372 return Err(DecompressError::ExpectedAnotherByte);
373 }
374 }
375 let offset = read_u16_ptr(&mut input_ptr) as usize;
376 // Obtain the initial match length. The match length is the length of the duplicate segment
377 // which will later be copied from data previously decompressed into the output buffer. The
378 // initial length is derived from the second part of the token (the lower nibble), we read
379 // earlier. Since having a match length of less than 4 would mean negative compression
380 // ratio, we start at 4 (MINMATCH).
381
382 // The initial match length can maximally be 19 (MINMATCH + 15). As with the literal length,
383 // this indicates that there are more bytes to read.
384 let mut match_length = MINMATCH + (token & 0xF) as usize;
385 if match_length == MINMATCH + 15 {
386 // The match length took the maximal value, indicating that there is more bytes. We
387 // read the extra integer.
388 match_length += read_integer_ptr(&mut input_ptr, input_ptr_end)? as usize;
389 }
390
391 // We now copy from the already decompressed buffer. This allows us for storing duplicates
392 // by simply referencing the other location.
393 let output_len = unsafe { output_ptr.offset_from(output_base) as usize };
394
395 // We'll do a bounds check except unchecked-decode is enabled.
396 #[cfg(not(feature = "unchecked-decode"))]
397 {
398 if offset > output_len + ext_dict.len() {
399 return Err(DecompressError::OffsetOutOfBounds);
400 }
401 if match_length > unsafe { output_end.offset_from(output_ptr) as usize } {
402 return Err(DecompressError::OutputTooSmall {
403 expected: output_len + match_length,
404 actual: output.capacity(),
405 });
406 }
407 }
408
409 if USE_DICT && offset > output_len {
410 let copied = unsafe {
411 copy_from_dict(output_base, &mut output_ptr, ext_dict, offset, match_length)
412 };
413 if copied == match_length {
414 #[cfg(not(feature = "unchecked-decode"))]
415 {
416 if input_ptr >= input_ptr_end {
417 return Err(DecompressError::ExpectedAnotherByte);
418 }
419 }
420
421 continue;
422 }
423 // match crosses ext_dict and output
424 match_length -= copied;
425 }
426
427 // Calculate the start of this duplicate segment. At this point offset was already checked
428 // to be in bounds and the external dictionary copy, if any, was already copied and
429 // subtracted from match_length.
430 let start_ptr = unsafe { output_ptr.sub(offset) };
431 debug_assert!(start_ptr >= output_base);
432 debug_assert!(start_ptr < output_end);
433 debug_assert!(unsafe { output_end.offset_from(start_ptr) as usize } >= match_length);
434 unsafe {
435 duplicate(&mut output_ptr, output_end, start_ptr, match_length);
436 }
437 #[cfg(not(feature = "unchecked-decode"))]
438 {
439 if input_ptr >= input_ptr_end {
440 return Err(DecompressError::ExpectedAnotherByte);
441 }
442 }
443 }
444 unsafe {
445 output.set_pos(output_ptr.offset_from(output_base) as usize);
446 Ok(output_ptr.offset_from(output_start_pos_ptr) as usize)
447 }
448}
449
450/// Decompress all bytes of `input` into `output`.
451/// `output` should be preallocated with a size of of the uncompressed data.
452#[inline]
453pub fn decompress_into(input: &[u8], output: &mut [u8]) -> Result<usize, DecompressError> {
454 decompress_internal::<false, _>(input, &mut SliceSink::new(output, 0), b"")
455}
456
457/// Decompress all bytes of `input` into `output`.
458///
459/// Returns the number of bytes written (decompressed) into `output`.
460#[inline]
461pub fn decompress_into_with_dict(
462 input: &[u8],
463 output: &mut [u8],
464 ext_dict: &[u8],
465) -> Result<usize, DecompressError> {
466 decompress_internal::<true, _>(input, &mut SliceSink::new(output, 0), ext_dict)
467}
468
469/// Decompress all bytes of `input` into a new vec.
470/// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
471///
472/// # Panics
473/// May panic if the parameter `min_uncompressed_size` is smaller than the
474/// uncompressed data.
475
476#[inline]
477pub fn decompress_with_dict(
478 input: &[u8],
479 min_uncompressed_size: usize,
480 ext_dict: &[u8],
481) -> Result<Vec<u8>, DecompressError> {
482 // Allocate a vector to contain the decompressed stream.
483 let mut vec = Vec::with_capacity(min_uncompressed_size);
484 let decomp_len =
485 decompress_internal::<true, _>(input, &mut PtrSink::from_vec(&mut vec, 0), ext_dict)?;
486 unsafe {
487 vec.set_len(decomp_len);
488 }
489 Ok(vec)
490}
491
492/// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
493/// little endian. Can be used in conjunction with `compress_prepend_size`
494#[inline]
495pub fn decompress_size_prepended(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
496 let (uncompressed_size, input) = super::uncompressed_size(input)?;
497 decompress(input, uncompressed_size)
498}
499
500/// Decompress all bytes of `input` into a new vec.
501/// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
502///
503/// # Panics
504/// May panic if the parameter `min_uncompressed_size` is smaller than the
505/// uncompressed data.
506#[inline]
507pub fn decompress(input: &[u8], min_uncompressed_size: usize) -> Result<Vec<u8>, DecompressError> {
508 // Allocate a vector to contain the decompressed stream.
509 let mut vec = Vec::with_capacity(min_uncompressed_size);
510 let decomp_len =
511 decompress_internal::<true, _>(input, &mut PtrSink::from_vec(&mut vec, 0), b"")?;
512 unsafe {
513 vec.set_len(decomp_len);
514 }
515 Ok(vec)
516}
517
518/// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
519/// little endian. Can be used in conjunction with `compress_prepend_size_with_dict`
520#[inline]
521pub fn decompress_size_prepended_with_dict(
522 input: &[u8],
523 ext_dict: &[u8],
524) -> Result<Vec<u8>, DecompressError> {
525 let (uncompressed_size, input) = super::uncompressed_size(input)?;
526 decompress_with_dict(input, uncompressed_size, ext_dict)
527}
528
529#[cfg(test)]
530mod test {
531 use super::*;
532
533 #[test]
534 fn all_literal() {
535 assert_eq!(decompress(&[0x30, b'a', b'4', b'9'], 3).unwrap(), b"a49");
536 }
537
538 // this error test is only valid with checked-decode.
539 #[cfg(not(feature = "unchecked-decode"))]
540 #[test]
541 fn offset_oob() {
542 decompress(&[0x10, b'a', 2, 0], 4).unwrap_err();
543 decompress(&[0x40, b'a', 1, 0], 4).unwrap_err();
544 }
545}