1use std::ffi::CString;
2use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
3use std::time;
4
5use crate::bufreader::BufReader;
6use crate::{Compression, Crc};
7
8pub static FHCRC: u8 = 1 << 1;
9pub static FEXTRA: u8 = 1 << 2;
10pub static FNAME: u8 = 1 << 3;
11pub static FCOMMENT: u8 = 1 << 4;
12pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
13
14pub mod bufread;
15pub mod read;
16pub mod write;
17
18const MAX_HEADER_BUF: usize = 65535;
21
22#[derive(PartialEq, Clone, Debug, Default)]
27pub struct GzHeader {
28 extra: Option<Vec<u8>>,
29 filename: Option<Vec<u8>>,
30 comment: Option<Vec<u8>>,
31 operating_system: u8,
32 mtime: u32,
33}
34
35impl GzHeader {
36 pub fn filename(&self) -> Option<&[u8]> {
38 self.filename.as_ref().map(|s| &s[..])
39 }
40
41 pub fn extra(&self) -> Option<&[u8]> {
43 self.extra.as_ref().map(|s| &s[..])
44 }
45
46 pub fn comment(&self) -> Option<&[u8]> {
48 self.comment.as_ref().map(|s| &s[..])
49 }
50
51 pub fn operating_system(&self) -> u8 {
56 self.operating_system
57 }
58
59 pub fn mtime(&self) -> u32 {
69 self.mtime
70 }
71
72 pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
80 if self.mtime == 0 {
81 None
82 } else {
83 let duration = time::Duration::new(u64::from(self.mtime), 0);
84 let datetime = time::UNIX_EPOCH + duration;
85 Some(datetime)
86 }
87 }
88}
89
90#[derive(Debug, Default)]
91pub enum GzHeaderState {
92 Start(u8, [u8; 10]),
93 Xlen(Option<Box<Crc>>, u8, [u8; 2]),
94 Extra(Option<Box<Crc>>, u16),
95 Filename(Option<Box<Crc>>),
96 Comment(Option<Box<Crc>>),
97 Crc(Option<Box<Crc>>, u8, [u8; 2]),
98 #[default]
99 Complete,
100}
101
102#[derive(Debug, Default)]
103pub struct GzHeaderParser {
104 state: GzHeaderState,
105 flags: u8,
106 header: GzHeader,
107}
108
109impl GzHeaderParser {
110 fn new() -> Self {
111 GzHeaderParser {
112 state: GzHeaderState::Start(0, [0; 10]),
113 flags: 0,
114 header: GzHeader::default(),
115 }
116 }
117
118 fn parse<R: BufRead>(&mut self, r: &mut R) -> Result<()> {
119 loop {
120 match &mut self.state {
121 GzHeaderState::Start(count, buffer) => {
122 while (*count as usize) < buffer.len() {
123 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
124 }
125 if buffer[0] != 0x1f || buffer[1] != 0x8b {
127 return Err(bad_header());
128 }
129 if buffer[2] != 8 {
131 return Err(bad_header());
132 }
133 self.flags = buffer[3];
134 if self.flags & FRESERVED != 0 {
136 return Err(bad_header());
137 }
138 self.header.mtime = (buffer[4] as u32)
139 | ((buffer[5] as u32) << 8)
140 | ((buffer[6] as u32) << 16)
141 | ((buffer[7] as u32) << 24);
142 let _xfl = buffer[8];
143 self.header.operating_system = buffer[9];
144 let crc = if self.flags & FHCRC != 0 {
145 let mut crc = Box::new(Crc::new());
146 crc.update(buffer);
147 Some(crc)
148 } else {
149 None
150 };
151 self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
152 }
153 GzHeaderState::Xlen(crc, count, buffer) => {
154 if self.flags & FEXTRA != 0 {
155 while (*count as usize) < buffer.len() {
156 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
157 }
158 if let Some(crc) = crc {
159 crc.update(buffer);
160 }
161 let xlen = parse_le_u16(buffer);
162 self.header.extra = Some(vec![0; xlen as usize]);
163 self.state = GzHeaderState::Extra(crc.take(), 0);
164 } else {
165 self.state = GzHeaderState::Filename(crc.take());
166 }
167 }
168 GzHeaderState::Extra(crc, count) => {
169 debug_assert!(self.header.extra.is_some());
170 let extra = self.header.extra.as_mut().unwrap();
171 while (*count as usize) < extra.len() {
172 *count += read_into(r, &mut extra[*count as usize..])? as u16;
173 }
174 if let Some(crc) = crc {
175 crc.update(extra);
176 }
177 self.state = GzHeaderState::Filename(crc.take());
178 }
179 GzHeaderState::Filename(crc) => {
180 if self.flags & FNAME != 0 {
181 let filename = self.header.filename.get_or_insert_with(Vec::new);
182 read_to_nul(r, filename)?;
183 if let Some(crc) = crc {
184 crc.update(filename);
185 crc.update(b"\0");
186 }
187 }
188 self.state = GzHeaderState::Comment(crc.take());
189 }
190 GzHeaderState::Comment(crc) => {
191 if self.flags & FCOMMENT != 0 {
192 let comment = self.header.comment.get_or_insert_with(Vec::new);
193 read_to_nul(r, comment)?;
194 if let Some(crc) = crc {
195 crc.update(comment);
196 crc.update(b"\0");
197 }
198 }
199 self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
200 }
201 GzHeaderState::Crc(crc, count, buffer) => {
202 if let Some(crc) = crc {
203 debug_assert!(self.flags & FHCRC != 0);
204 while (*count as usize) < buffer.len() {
205 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
206 }
207 let stored_crc = parse_le_u16(buffer);
208 let calced_crc = crc.sum() as u16;
209 if stored_crc != calced_crc {
210 return Err(corrupt());
211 }
212 }
213 self.state = GzHeaderState::Complete;
214 }
215 GzHeaderState::Complete => {
216 return Ok(());
217 }
218 }
219 }
220 }
221
222 fn header(&self) -> Option<&GzHeader> {
223 match self.state {
224 GzHeaderState::Complete => Some(&self.header),
225 _ => None,
226 }
227 }
228}
229
230impl From<GzHeaderParser> for GzHeader {
231 fn from(parser: GzHeaderParser) -> Self {
232 debug_assert!(matches!(parser.state, GzHeaderState::Complete));
233 parser.header
234 }
235}
236
237fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
241 debug_assert!(!buffer.is_empty());
242 match r.read(buffer) {
243 Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
244 Ok(n) => Ok(n),
245 Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
246 Err(e) => Err(e),
247 }
248}
249
250fn read_to_nul<R: BufRead>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
252 let mut bytes = r.bytes();
253 loop {
254 match bytes.next().transpose()? {
255 Some(0) => return Ok(()),
256 Some(_) if buffer.len() == MAX_HEADER_BUF => {
257 return Err(Error::new(
258 ErrorKind::InvalidInput,
259 "gzip header field too long",
260 ));
261 }
262 Some(byte) => {
263 buffer.push(byte);
264 }
265 None => {
266 return Err(ErrorKind::UnexpectedEof.into());
267 }
268 }
269 }
270}
271
272fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
273 u16::from_le_bytes(*buffer)
274}
275
276fn bad_header() -> Error {
277 Error::new(ErrorKind::InvalidInput, "invalid gzip header")
278}
279
280fn corrupt() -> Error {
281 Error::new(
282 ErrorKind::InvalidInput,
283 "corrupt gzip stream does not have a matching checksum",
284 )
285}
286
287#[derive(Debug, Default)]
314pub struct GzBuilder {
315 extra: Option<Vec<u8>>,
316 filename: Option<CString>,
317 comment: Option<CString>,
318 operating_system: Option<u8>,
319 mtime: u32,
320}
321
322impl GzBuilder {
323 pub fn new() -> GzBuilder {
325 Self::default()
326 }
327
328 pub fn mtime(mut self, mtime: u32) -> GzBuilder {
330 self.mtime = mtime;
331 self
332 }
333
334 pub fn operating_system(mut self, os: u8) -> GzBuilder {
336 self.operating_system = Some(os);
337 self
338 }
339
340 pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
342 self.extra = Some(extra.into());
343 self
344 }
345
346 pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
352 self.filename = Some(CString::new(filename.into()).unwrap());
353 self
354 }
355
356 pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
362 self.comment = Some(CString::new(comment.into()).unwrap());
363 self
364 }
365
366 pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
371 write::gz_encoder(self.into_header(lvl), w, lvl)
372 }
373
374 pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
379 read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
380 }
381
382 pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
387 where
388 R: BufRead,
389 {
390 bufread::gz_encoder(self.into_header(lvl), r, lvl)
391 }
392
393 fn into_header(self, lvl: Compression) -> Vec<u8> {
394 let GzBuilder {
395 extra,
396 filename,
397 comment,
398 operating_system,
399 mtime,
400 } = self;
401 let mut flg = 0;
402 let mut header = vec![0u8; 10];
403 if let Some(v) = extra {
404 flg |= FEXTRA;
405 header.extend((v.len() as u16).to_le_bytes());
406 header.extend(v);
407 }
408 if let Some(filename) = filename {
409 flg |= FNAME;
410 header.extend(filename.as_bytes_with_nul().iter().copied());
411 }
412 if let Some(comment) = comment {
413 flg |= FCOMMENT;
414 header.extend(comment.as_bytes_with_nul().iter().copied());
415 }
416 header[0] = 0x1f;
417 header[1] = 0x8b;
418 header[2] = 8;
419 header[3] = flg;
420 header[4] = mtime as u8;
421 header[5] = (mtime >> 8) as u8;
422 header[6] = (mtime >> 16) as u8;
423 header[7] = (mtime >> 24) as u8;
424 header[8] = if lvl.0 >= Compression::best().0 {
425 2
426 } else if lvl.0 <= Compression::fast().0 {
427 4
428 } else {
429 0
430 };
431
432 header[9] = operating_system.unwrap_or(255);
437 header
438 }
439}
440
441#[cfg(test)]
442mod tests {
443 use std::io::prelude::*;
444
445 use super::{read, write, GzBuilder, GzHeaderParser};
446 use crate::{Compression, GzHeader};
447 use rand::{rng, Rng};
448
449 #[test]
450 fn roundtrip() {
451 let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
452 e.write_all(b"foo bar baz").unwrap();
453 let inner = e.finish().unwrap();
454 let mut d = read::GzDecoder::new(&inner[..]);
455 let mut s = String::new();
456 d.read_to_string(&mut s).unwrap();
457 assert_eq!(s, "foo bar baz");
458 }
459
460 #[test]
461 fn roundtrip_zero() {
462 let e = write::GzEncoder::new(Vec::new(), Compression::default());
463 let inner = e.finish().unwrap();
464 let mut d = read::GzDecoder::new(&inner[..]);
465 let mut s = String::new();
466 d.read_to_string(&mut s).unwrap();
467 assert_eq!(s, "");
468 }
469
470 #[test]
471 fn roundtrip_big() {
472 let mut real = Vec::new();
473 let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
474 let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
475 for _ in 0..200 {
476 let to_write = &v[..rng().random_range(0..v.len())];
477 real.extend(to_write.iter().copied());
478 w.write_all(to_write).unwrap();
479 }
480 let result = w.finish().unwrap();
481 let mut r = read::GzDecoder::new(&result[..]);
482 let mut v = Vec::new();
483 r.read_to_end(&mut v).unwrap();
484 assert_eq!(v, real);
485 }
486
487 #[test]
488 fn roundtrip_big2() {
489 let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
490 let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
491 let mut res = Vec::new();
492 r.read_to_end(&mut res).unwrap();
493 assert_eq!(res, v);
494 }
495
496 struct Rfc1952Crc {
499 crc_table: [u32; 256],
501 }
502
503 impl Rfc1952Crc {
504 fn new() -> Self {
505 let mut crc = Rfc1952Crc {
506 crc_table: [0; 256],
507 };
508 for n in 0usize..256 {
510 let mut c = n as u32;
511 for _k in 0..8 {
512 if c & 1 != 0 {
513 c = 0xedb88320 ^ (c >> 1);
514 } else {
515 c >>= 1;
516 }
517 }
518 crc.crc_table[n] = c;
519 }
520 crc
521 }
522
523 fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
530 let mut c = crc ^ 0xffffffff;
531
532 for b in buf {
533 c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
534 }
535 c ^ 0xffffffff
536 }
537
538 fn crc(&self, buf: &[u8]) -> u32 {
540 self.update_crc(0, buf)
541 }
542 }
543
544 #[test]
545 fn roundtrip_header() {
546 let mut header = GzBuilder::new()
547 .mtime(1234)
548 .operating_system(57)
549 .filename("filename")
550 .comment("comment")
551 .into_header(Compression::fast());
552
553 header[3] ^= super::FHCRC;
555 let rfc1952_crc = Rfc1952Crc::new();
556 let crc32 = rfc1952_crc.crc(&header);
557 let crc16 = crc32 as u16;
558 header.extend(&crc16.to_le_bytes());
559
560 let mut parser = GzHeaderParser::new();
561 parser.parse(&mut header.as_slice()).unwrap();
562 let actual = parser.header().unwrap();
563 assert_eq!(
564 actual,
565 &GzHeader {
566 extra: None,
567 filename: Some("filename".as_bytes().to_vec()),
568 comment: Some("comment".as_bytes().to_vec()),
569 operating_system: 57,
570 mtime: 1234
571 }
572 )
573 }
574
575 #[test]
576 fn gzip_encoder_matches_rfc1952() {
577 fn extract_zip_footer(compressed: &[u8]) -> (u32, u32) {
579 assert!(compressed.len() >= 8, "Gzip output too short");
580 let footer_start = compressed.len() - 8;
581
582 let crc = u32::from_le_bytes([
583 compressed[footer_start],
584 compressed[footer_start + 1],
585 compressed[footer_start + 2],
586 compressed[footer_start + 3],
587 ]);
588
589 let size = u32::from_le_bytes([
590 compressed[footer_start + 4],
591 compressed[footer_start + 5],
592 compressed[footer_start + 6],
593 compressed[footer_start + 7],
594 ]);
595
596 (crc, size)
597 }
598
599 #[track_caller]
600 fn test_crc_for_write(data: &[u8], expected_crc: u32, description: &str) {
601 let mut encoder = write::GzEncoder::new(Vec::new(), Compression::default());
603 encoder.write_all(data).unwrap();
604 let compressed = encoder.finish().unwrap();
605
606 let expected_size = data.len() as u32;
607 let (actual_crc, actual_size) = extract_zip_footer(&compressed);
608
609 assert_eq!(
610 expected_crc, actual_crc,
611 "CRC32 mismatch for write {}: expected {:#08x}, got {:#08x}",
612 description, expected_crc, actual_crc
613 );
614 assert_eq!(
615 expected_size, actual_size,
616 "Size mismatch for write {}: expected {}, got {}",
617 description, expected_size, actual_size
618 );
619 }
620
621 #[track_caller]
622 fn test_crc_for_read(data: &[u8], expected_crc: u32, description: &str) {
623 let data_reader = std::io::Cursor::new(data);
625 let mut encoder = read::GzEncoder::new(data_reader, Compression::default());
626 let mut compressed = Vec::new();
627 encoder.read_to_end(&mut compressed).unwrap();
628
629 let expected_size = data.len() as u32;
630 let (actual_crc, actual_size) = extract_zip_footer(&compressed);
631
632 assert_eq!(
633 expected_crc, actual_crc,
634 "CRC32 mismatch for read {}: expected {:#08x}, got {:#08x}",
635 description, expected_crc, actual_crc
636 );
637 assert_eq!(
638 expected_size, actual_size,
639 "Size mismatch for read {}: expected {}, got {}",
640 description, expected_size, actual_size
641 );
642 }
643
644 #[track_caller]
645 fn test_crc_for_data(data: &[u8], description: &str) {
646 let rfc1952_crc = Rfc1952Crc::new();
647 let expected_crc = rfc1952_crc.crc(data);
648
649 test_crc_for_write(data, expected_crc, description);
650 test_crc_for_read(data, expected_crc, description);
651 }
652
653 test_crc_for_data(&[], "empty data");
655 test_crc_for_data(&[0x00], "single zero byte");
656 test_crc_for_data(&[0xFF], "single 0xFF byte");
657
658 test_crc_for_data(b"Hello World", "simple ASCII");
660 test_crc_for_data(b"AAAAAAA", "repeated 'A'");
661 test_crc_for_data(b"1234567890", "digits");
662
663 test_crc_for_data(&[0x00, 0x01, 0x02, 0x03, 0x04, 0x05], "sequential bytes");
665 test_crc_for_data(&[0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55], "alternating pattern");
666 test_crc_for_data(&[0x00; 10], "all zeros");
667 test_crc_for_data(&[0xFF; 10], "all ones");
668
669 let large_data = vec![0x42; 10240];
671 test_crc_for_data(&large_data, "10 kiB data");
672
673 {
675 let data = b"This is a test of multi-write CRC accumulation";
676 let rfc1952_crc = Rfc1952Crc::new();
677 let expected_crc = rfc1952_crc.crc(data);
678
679 let mut encoder = write::GzEncoder::new(Vec::new(), Compression::default());
680 encoder.write_all(&data[..10]).unwrap();
682 encoder.write_all(&data[10..20]).unwrap();
683 encoder.write_all(&data[20..]).unwrap();
684 let compressed = encoder.finish().unwrap();
685
686 let expected_size = data.len() as u32;
687 let (actual_crc, actual_size) = extract_zip_footer(&compressed);
688
689 assert_eq!(
690 expected_crc, actual_crc,
691 "Multi-write CRC mismatch: expected {:#08x}, got {:#08x}",
692 expected_crc, actual_crc
693 );
694 assert_eq!(
695 expected_size, actual_size,
696 "Size mismatch for multi-write: expected {}, got {}",
697 expected_size, actual_size
698 );
699 }
700 }
701
702 fn gzip_corrupted_crc() -> Vec<u8> {
703 let test_data = b"The quick brown fox jumps over the lazy dog";
704
705 let mut encoder = write::GzEncoder::new(Vec::new(), Compression::default());
706 encoder.write_all(test_data).unwrap();
707 let mut compressed = encoder.finish().unwrap();
708
709 let crc_offset = compressed.len() - 8;
711 compressed[crc_offset] ^= 0xFF;
712
713 compressed
714 }
715
716 #[test]
717 fn read_decoder_detects_corrupted_crc() {
718 let compressed = gzip_corrupted_crc();
719 let mut decoder = read::GzDecoder::new(&compressed[..]);
720 let mut output = Vec::new();
721 let error = decoder.read_to_end(&mut output).unwrap_err();
722 assert_eq!(error.kind(), std::io::ErrorKind::InvalidInput);
723 }
724
725 #[test]
726 fn write_decoder_detects_corrupted_crc() {
727 let compressed = gzip_corrupted_crc();
728 let mut decoder = write::GzDecoder::new(Vec::new());
729 decoder.write_all(&compressed).unwrap();
730 let error = decoder.finish().unwrap_err();
731 assert_eq!(error.kind(), std::io::ErrorKind::InvalidInput);
732 }
733
734 #[test]
735 fn fields() {
736 let r = [0, 2, 4, 6];
737 let e = GzBuilder::new()
738 .filename("foo.rs")
739 .comment("bar")
740 .extra(vec![0, 1, 2, 3])
741 .read(&r[..], Compression::default());
742 let mut d = read::GzDecoder::new(e);
743 assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
744 assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
745 assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
746 let mut res = Vec::new();
747 d.read_to_end(&mut res).unwrap();
748 assert_eq!(res, vec![0, 2, 4, 6]);
749 }
750
751 #[test]
752 fn keep_reading_after_end() {
753 let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
754 e.write_all(b"foo bar baz").unwrap();
755 let inner = e.finish().unwrap();
756 let mut d = read::GzDecoder::new(&inner[..]);
757 let mut s = String::new();
758 d.read_to_string(&mut s).unwrap();
759 assert_eq!(s, "foo bar baz");
760 d.read_to_string(&mut s).unwrap();
761 assert_eq!(s, "foo bar baz");
762 }
763
764 #[test]
765 fn qc_reader() {
766 ::quickcheck::quickcheck(test as fn(_) -> _);
767
768 fn test(v: Vec<u8>) -> bool {
769 let r = read::GzEncoder::new(&v[..], Compression::default());
770 let mut r = read::GzDecoder::new(r);
771 let mut v2 = Vec::new();
772 r.read_to_end(&mut v2).unwrap();
773 v == v2
774 }
775 }
776
777 #[test]
778 fn flush_after_write() {
779 let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
780 write!(f, "Hello world").unwrap();
781 f.flush().unwrap();
782 }
783}