1use std::cell::{Cell, RefCell};
2use std::cmp;
3use std::convert::TryFrom;
4use std::fs;
5use std::io::prelude::*;
6use std::io::{self, SeekFrom};
7use std::marker;
8use std::path::Path;
9
10use crate::entry::{EntryFields, EntryIo};
11use crate::error::TarError;
12use crate::header::BLOCK_SIZE;
13use crate::other;
14use crate::pax::*;
15use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
16
17pub struct Archive<R: ?Sized + Read> {
21 inner: ArchiveInner<R>,
22}
23
24pub struct ArchiveInner<R: ?Sized> {
25 pos: Cell<u64>,
26 mask: u32,
27 unpack_xattrs: bool,
28 preserve_permissions: bool,
29 preserve_ownerships: bool,
30 preserve_mtime: bool,
31 overwrite: bool,
32 ignore_zeros: bool,
33 obj: RefCell<R>,
34}
35
36pub struct Entries<'a, R: 'a + Read> {
38 fields: EntriesFields<'a>,
39 _ignored: marker::PhantomData<&'a Archive<R>>,
40}
41
42trait SeekRead: Read + Seek {}
43impl<R: Read + Seek> SeekRead for R {}
44
45struct EntriesFields<'a> {
46 archive: &'a Archive<dyn Read + 'a>,
47 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
48 next: u64,
49 done: bool,
50 raw: bool,
51}
52
53impl<R: Read> Archive<R> {
54 pub fn new(obj: R) -> Archive<R> {
56 Archive {
57 inner: ArchiveInner {
58 mask: u32::MIN,
59 unpack_xattrs: false,
60 preserve_permissions: false,
61 preserve_ownerships: false,
62 preserve_mtime: true,
63 overwrite: true,
64 ignore_zeros: false,
65 obj: RefCell::new(obj),
66 pos: Cell::new(0),
67 },
68 }
69 }
70
71 pub fn into_inner(self) -> R {
73 self.inner.obj.into_inner()
74 }
75
76 pub fn entries(&mut self) -> io::Result<Entries<'_, R>> {
83 let me: &mut Archive<dyn Read> = self;
84 me._entries(None).map(|fields| Entries {
85 fields,
86 _ignored: marker::PhantomData,
87 })
88 }
89
90 pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
112 let me: &mut Archive<dyn Read> = self;
113 me._unpack(dst.as_ref())
114 }
115
116 pub fn set_mask(&mut self, mask: u32) {
127 self.inner.mask = mask;
128 }
129
130 pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
138 self.inner.unpack_xattrs = unpack_xattrs;
139 }
140
141 pub fn set_preserve_permissions(&mut self, preserve: bool) {
147 self.inner.preserve_permissions = preserve;
148 }
149
150 pub fn set_preserve_ownerships(&mut self, preserve: bool) {
156 self.inner.preserve_ownerships = preserve;
157 }
158
159 pub fn set_overwrite(&mut self, overwrite: bool) {
161 self.inner.overwrite = overwrite;
162 }
163
164 pub fn set_preserve_mtime(&mut self, preserve: bool) {
169 self.inner.preserve_mtime = preserve;
170 }
171
172 pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
177 self.inner.ignore_zeros = ignore_zeros;
178 }
179}
180
181impl<R: Seek + Read> Archive<R> {
182 pub fn entries_with_seek(&mut self) -> io::Result<Entries<'_, R>> {
190 let me: &Archive<dyn Read> = self;
191 let me_seekable: &Archive<dyn SeekRead> = self;
192 me._entries(Some(me_seekable)).map(|fields| Entries {
193 fields,
194 _ignored: marker::PhantomData,
195 })
196 }
197}
198
199impl Archive<dyn Read + '_> {
200 fn _entries<'a>(
201 &'a self,
202 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
203 ) -> io::Result<EntriesFields<'a>> {
204 if self.inner.pos.get() != 0 {
205 return Err(other(
206 "cannot call entries unless archive is at \
207 position 0",
208 ));
209 }
210 Ok(EntriesFields {
211 archive: self,
212 seekable_archive,
213 done: false,
214 next: 0,
215 raw: false,
216 })
217 }
218
219 fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
220 if dst.symlink_metadata().is_err() {
221 fs::create_dir_all(dst)
222 .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
223 }
224
225 let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
231
232 let mut directories = Vec::new();
236 for entry in self._entries(None)? {
237 let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
238 if file.header().entry_type() == crate::EntryType::Directory {
239 directories.push(file);
240 } else {
241 file.unpack_in(dst)?;
242 }
243 }
244
245 directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes()));
253 for mut dir in directories {
254 dir.unpack_in(dst)?;
255 }
256
257 Ok(())
258 }
259}
260
261impl<'a, R: Read> Entries<'a, R> {
262 pub fn raw(self, raw: bool) -> Entries<'a, R> {
268 Entries {
269 fields: EntriesFields { raw, ..self.fields },
270 _ignored: marker::PhantomData,
271 }
272 }
273}
274impl<'a, R: Read> Iterator for Entries<'a, R> {
275 type Item = io::Result<Entry<'a, R>>;
276
277 fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
278 self.fields
279 .next()
280 .map(|result| result.map(|e| EntryFields::from(e).into_entry()))
281 }
282}
283
284impl<'a> EntriesFields<'a> {
285 fn next_entry_raw(
286 &mut self,
287 pax_extensions: Option<&[u8]>,
288 ) -> io::Result<Option<Entry<'a, io::Empty>>> {
289 let mut header = Header::new_old();
290 let mut header_pos = self.next;
291 loop {
292 let delta = self.next - self.archive.inner.pos.get();
294 self.skip(delta)?;
295
296 if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
298 return Ok(None);
299 }
300
301 if !header.as_bytes().iter().all(|i| *i == 0) {
305 self.next += BLOCK_SIZE;
306 break;
307 }
308
309 if !self.archive.inner.ignore_zeros {
310 return Ok(None);
311 }
312 self.next += BLOCK_SIZE;
313 header_pos = self.next;
314 }
315
316 let sum = header.as_bytes()[..148]
318 .iter()
319 .chain(&header.as_bytes()[156..])
320 .fold(0, |a, b| a + (*b as u32))
321 + 8 * 32;
322 let cksum = header.cksum()?;
323 if sum != cksum {
324 return Err(other("archive header checksum mismatch"));
325 }
326
327 let entry_type = header.entry_type();
330 let is_extension_header = entry_type.is_gnu_longname()
331 || entry_type.is_gnu_longlink()
332 || entry_type.is_pax_local_extensions()
333 || entry_type.is_pax_global_extensions();
334
335 let mut pax_size: Option<u64> = None;
336 if let Some(pax_extensions_ref) = pax_extensions.filter(|_| !is_extension_header) {
337 pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE);
338
339 if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) {
340 header.set_uid(pax_uid);
341 }
342
343 if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) {
344 header.set_gid(pax_gid);
345 }
346 }
347
348 let file_pos = self.next;
349 let mut size = header.entry_size()?;
350 if let Some(pax_size) = pax_size {
354 size = pax_size;
355 }
356 let ret = EntryFields {
357 size,
358 header_pos,
359 file_pos,
360 data: vec![EntryIo::Data((&self.archive.inner).take(size))],
361 header,
362 long_pathname: None,
363 long_linkname: None,
364 pax_extensions: None,
365 mask: self.archive.inner.mask,
366 unpack_xattrs: self.archive.inner.unpack_xattrs,
367 preserve_permissions: self.archive.inner.preserve_permissions,
368 preserve_mtime: self.archive.inner.preserve_mtime,
369 overwrite: self.archive.inner.overwrite,
370 preserve_ownerships: self.archive.inner.preserve_ownerships,
371 };
372
373 let size = size
376 .checked_add(BLOCK_SIZE - 1)
377 .ok_or_else(|| other("size overflow"))?;
378 self.next = self
379 .next
380 .checked_add(size & !(BLOCK_SIZE - 1))
381 .ok_or_else(|| other("size overflow"))?;
382
383 Ok(Some(ret.into_entry()))
384 }
385
386 fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
387 if self.raw {
388 return self.next_entry_raw(None);
389 }
390
391 let mut gnu_longname = None;
392 let mut gnu_longlink = None;
393 let mut pax_extensions = None;
394 let mut processed = 0;
395 loop {
396 processed += 1;
397 let entry = match self.next_entry_raw(pax_extensions.as_deref())? {
398 Some(entry) => entry,
399 None if processed > 1 => {
400 return Err(other(
401 "members found describing a future member \
402 but no future member found",
403 ));
404 }
405 None => return Ok(None),
406 };
407
408 let is_recognized_header =
409 entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
410
411 if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
412 if gnu_longname.is_some() {
413 return Err(other(
414 "two long name entries describing \
415 the same member",
416 ));
417 }
418 gnu_longname = Some(EntryFields::from(entry).read_all()?);
419 continue;
420 }
421
422 if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
423 if gnu_longlink.is_some() {
424 return Err(other(
425 "two long name entries describing \
426 the same member",
427 ));
428 }
429 gnu_longlink = Some(EntryFields::from(entry).read_all()?);
430 continue;
431 }
432
433 if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
434 if pax_extensions.is_some() {
435 return Err(other(
436 "two pax extensions entries describing \
437 the same member",
438 ));
439 }
440 pax_extensions = Some(EntryFields::from(entry).read_all()?);
441 continue;
442 }
443
444 let mut fields = EntryFields::from(entry);
445 fields.long_pathname = gnu_longname;
446 fields.long_linkname = gnu_longlink;
447 fields.pax_extensions = pax_extensions;
448 self.parse_sparse_header(&mut fields)?;
449 return Ok(Some(fields.into_entry()));
450 }
451 }
452
453 fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
454 if !entry.header.entry_type().is_gnu_sparse() {
455 return Ok(());
456 }
457 let gnu = match entry.header.as_gnu() {
458 Some(gnu) => gnu,
459 None => return Err(other("sparse entry type listed but not GNU header")),
460 };
461
462 entry.data.truncate(0);
482
483 let mut cur = 0;
484 let mut remaining = entry.size;
485 {
486 let data = &mut entry.data;
487 let reader = &self.archive.inner;
488 let size = entry.size;
489 let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
490 if block.is_empty() {
491 return Ok(());
492 }
493 let off = block.offset()?;
494 let len = block.length()?;
495 if len != 0 && (size - remaining) % BLOCK_SIZE != 0 {
496 return Err(other(
497 "previous block in sparse file was not \
498 aligned to 512-byte boundary",
499 ));
500 } else if off < cur {
501 return Err(other(
502 "out of order or overlapping sparse \
503 blocks",
504 ));
505 } else if cur < off {
506 let block = io::repeat(0).take(off - cur);
507 data.push(EntryIo::Pad(block));
508 }
509 cur = off
510 .checked_add(len)
511 .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
512 remaining = remaining.checked_sub(len).ok_or_else(|| {
513 other(
514 "sparse file consumed more data than the header \
515 listed",
516 )
517 })?;
518 data.push(EntryIo::Data(reader.take(len)));
519 Ok(())
520 };
521 for block in gnu.sparse.iter() {
522 add_block(block)?
523 }
524 if gnu.is_extended() {
525 let mut ext = GnuExtSparseHeader::new();
526 ext.isextended[0] = 1;
527 while ext.is_extended() {
528 if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
529 return Err(other("failed to read extension"));
530 }
531
532 self.next += BLOCK_SIZE;
533 for block in ext.sparse.iter() {
534 add_block(block)?;
535 }
536 }
537 }
538 }
539 if cur != gnu.real_size()? {
540 return Err(other(
541 "mismatch in sparse file chunks and \
542 size in header",
543 ));
544 }
545 entry.size = cur;
546 if remaining > 0 {
547 return Err(other(
548 "mismatch in sparse file chunks and \
549 entry size in header",
550 ));
551 }
552 Ok(())
553 }
554
555 fn skip(&mut self, mut amt: u64) -> io::Result<()> {
556 if let Some(seekable_archive) = self.seekable_archive {
557 let pos = io::SeekFrom::Current(
558 i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?,
559 );
560 (&seekable_archive.inner).seek(pos)?;
561 } else {
562 let mut buf = [0u8; 4096 * 8];
563 while amt > 0 {
564 let n = cmp::min(amt, buf.len() as u64);
565 let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
566 if n == 0 {
567 return Err(other("unexpected EOF during skip"));
568 }
569 amt -= n as u64;
570 }
571 }
572 Ok(())
573 }
574}
575
576impl<'a> Iterator for EntriesFields<'a> {
577 type Item = io::Result<Entry<'a, io::Empty>>;
578
579 fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
580 if self.done {
581 None
582 } else {
583 match self.next_entry() {
584 Ok(Some(e)) => Some(Ok(e)),
585 Ok(None) => {
586 self.done = true;
587 None
588 }
589 Err(e) => {
590 self.done = true;
591 Some(Err(e))
592 }
593 }
594 }
595 }
596}
597
598impl<R: ?Sized + Read> Read for &ArchiveInner<R> {
599 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
600 let i = self.obj.borrow_mut().read(into)?;
601 self.pos.set(self.pos.get() + i as u64);
602 Ok(i)
603 }
604}
605
606impl<R: ?Sized + Seek> Seek for &ArchiveInner<R> {
607 fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
608 let pos = self.obj.borrow_mut().seek(pos)?;
609 self.pos.set(pos);
610 Ok(pos)
611 }
612}
613
614fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
619 let mut read = 0;
620 while read < buf.len() {
621 match r.read(&mut buf[read..])? {
622 0 => {
623 if read == 0 {
624 return Ok(false);
625 }
626
627 return Err(other("failed to read entire block"));
628 }
629 n => read += n,
630 }
631 }
632 Ok(true)
633}