tar/
archive.rs

1use std::cell::{Cell, RefCell};
2use std::cmp;
3use std::convert::TryFrom;
4use std::fs;
5use std::io::prelude::*;
6use std::io::{self, SeekFrom};
7use std::marker;
8use std::path::Path;
9
10use crate::entry::{EntryFields, EntryIo};
11use crate::error::TarError;
12use crate::header::BLOCK_SIZE;
13use crate::other;
14use crate::pax::*;
15use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
16
17/// A top-level representation of an archive file.
18///
19/// This archive can have an entry added to it and it can be iterated over.
20pub struct Archive<R: ?Sized + Read> {
21    inner: ArchiveInner<R>,
22}
23
24pub struct ArchiveInner<R: ?Sized> {
25    pos: Cell<u64>,
26    mask: u32,
27    unpack_xattrs: bool,
28    preserve_permissions: bool,
29    preserve_ownerships: bool,
30    preserve_mtime: bool,
31    overwrite: bool,
32    ignore_zeros: bool,
33    obj: RefCell<R>,
34}
35
36/// An iterator over the entries of an archive.
37pub struct Entries<'a, R: 'a + Read> {
38    fields: EntriesFields<'a>,
39    _ignored: marker::PhantomData<&'a Archive<R>>,
40}
41
42trait SeekRead: Read + Seek {}
43impl<R: Read + Seek> SeekRead for R {}
44
45struct EntriesFields<'a> {
46    archive: &'a Archive<dyn Read + 'a>,
47    seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
48    next: u64,
49    done: bool,
50    raw: bool,
51}
52
53impl<R: Read> Archive<R> {
54    /// Create a new archive with the underlying object as the reader.
55    pub fn new(obj: R) -> Archive<R> {
56        Archive {
57            inner: ArchiveInner {
58                mask: u32::MIN,
59                unpack_xattrs: false,
60                preserve_permissions: false,
61                preserve_ownerships: false,
62                preserve_mtime: true,
63                overwrite: true,
64                ignore_zeros: false,
65                obj: RefCell::new(obj),
66                pos: Cell::new(0),
67            },
68        }
69    }
70
71    /// Unwrap this archive, returning the underlying object.
72    pub fn into_inner(self) -> R {
73        self.inner.obj.into_inner()
74    }
75
76    /// Construct an iterator over the entries in this archive.
77    ///
78    /// Note that care must be taken to consider each entry within an archive in
79    /// sequence. If entries are processed out of sequence (from what the
80    /// iterator returns), then the contents read for each entry may be
81    /// corrupted.
82    pub fn entries(&mut self) -> io::Result<Entries<R>> {
83        let me: &mut Archive<dyn Read> = self;
84        me._entries(None).map(|fields| Entries {
85            fields: fields,
86            _ignored: marker::PhantomData,
87        })
88    }
89
90    /// Unpacks the contents tarball into the specified `dst`.
91    ///
92    /// This function will iterate over the entire contents of this tarball,
93    /// extracting each file in turn to the location specified by the entry's
94    /// path name.
95    ///
96    /// This operation is relatively sensitive in that it will not write files
97    /// outside of the path specified by `dst`. Files in the archive which have
98    /// a '..' in their path are skipped during the unpacking process.
99    ///
100    /// # Examples
101    ///
102    /// ```no_run
103    /// use std::fs::File;
104    /// use tar::Archive;
105    ///
106    /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
107    /// ar.unpack("foo").unwrap();
108    /// ```
109    pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
110        let me: &mut Archive<dyn Read> = self;
111        me._unpack(dst.as_ref())
112    }
113
114    /// Set the mask of the permission bits when unpacking this entry.
115    ///
116    /// The mask will be inverted when applying against a mode, similar to how
117    /// `umask` works on Unix. In logical notation it looks like:
118    ///
119    /// ```text
120    /// new_mode = old_mode & (~mask)
121    /// ```
122    ///
123    /// The mask is 0 by default and is currently only implemented on Unix.
124    pub fn set_mask(&mut self, mask: u32) {
125        self.inner.mask = mask;
126    }
127
128    /// Indicate whether extended file attributes (xattrs on Unix) are preserved
129    /// when unpacking this archive.
130    ///
131    /// This flag is disabled by default and is currently only implemented on
132    /// Unix using xattr support. This may eventually be implemented for
133    /// Windows, however, if other archive implementations are found which do
134    /// this as well.
135    pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
136        self.inner.unpack_xattrs = unpack_xattrs;
137    }
138
139    /// Indicate whether extended permissions (like suid on Unix) are preserved
140    /// when unpacking this entry.
141    ///
142    /// This flag is disabled by default and is currently only implemented on
143    /// Unix.
144    pub fn set_preserve_permissions(&mut self, preserve: bool) {
145        self.inner.preserve_permissions = preserve;
146    }
147
148    /// Indicate whether numeric ownership ids (like uid and gid on Unix)
149    /// are preserved when unpacking this entry.
150    ///
151    /// This flag is disabled by default and is currently only implemented on
152    /// Unix.
153    pub fn set_preserve_ownerships(&mut self, preserve: bool) {
154        self.inner.preserve_ownerships = preserve;
155    }
156
157    /// Indicate whether files and symlinks should be overwritten on extraction.
158    pub fn set_overwrite(&mut self, overwrite: bool) {
159        self.inner.overwrite = overwrite;
160    }
161
162    /// Indicate whether access time information is preserved when unpacking
163    /// this entry.
164    ///
165    /// This flag is enabled by default.
166    pub fn set_preserve_mtime(&mut self, preserve: bool) {
167        self.inner.preserve_mtime = preserve;
168    }
169
170    /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
171    /// entries.
172    ///
173    /// This can be used in case multiple tar archives have been concatenated together.
174    pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
175        self.inner.ignore_zeros = ignore_zeros;
176    }
177}
178
179impl<R: Seek + Read> Archive<R> {
180    /// Construct an iterator over the entries in this archive for a seekable
181    /// reader. Seek will be used to efficiently skip over file contents.
182    ///
183    /// Note that care must be taken to consider each entry within an archive in
184    /// sequence. If entries are processed out of sequence (from what the
185    /// iterator returns), then the contents read for each entry may be
186    /// corrupted.
187    pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> {
188        let me: &Archive<dyn Read> = self;
189        let me_seekable: &Archive<dyn SeekRead> = self;
190        me._entries(Some(me_seekable)).map(|fields| Entries {
191            fields: fields,
192            _ignored: marker::PhantomData,
193        })
194    }
195}
196
197impl Archive<dyn Read + '_> {
198    fn _entries<'a>(
199        &'a self,
200        seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
201    ) -> io::Result<EntriesFields<'a>> {
202        if self.inner.pos.get() != 0 {
203            return Err(other(
204                "cannot call entries unless archive is at \
205                 position 0",
206            ));
207        }
208        Ok(EntriesFields {
209            archive: self,
210            seekable_archive,
211            done: false,
212            next: 0,
213            raw: false,
214        })
215    }
216
217    fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
218        if dst.symlink_metadata().is_err() {
219            fs::create_dir_all(&dst)
220                .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
221        }
222
223        // Canonicalizing the dst directory will prepend the path with '\\?\'
224        // on windows which will allow windows APIs to treat the path as an
225        // extended-length path with a 32,767 character limit. Otherwise all
226        // unpacked paths over 260 characters will fail on creation with a
227        // NotFound exception.
228        let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
229
230        // Delay any directory entries until the end (they will be created if needed by
231        // descendants), to ensure that directory permissions do not interfer with descendant
232        // extraction.
233        let mut directories = Vec::new();
234        for entry in self._entries(None)? {
235            let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
236            if file.header().entry_type() == crate::EntryType::Directory {
237                directories.push(file);
238            } else {
239                file.unpack_in(dst)?;
240            }
241        }
242
243        // Apply the directories.
244        //
245        // Note: the order of application is important to permissions. That is, we must traverse
246        // the filesystem graph in topological ordering or else we risk not being able to create
247        // child directories within those of more restrictive permissions. See [0] for details.
248        //
249        // [0]: <https://github.com/alexcrichton/tar-rs/issues/242>
250        directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes()));
251        for mut dir in directories {
252            dir.unpack_in(dst)?;
253        }
254
255        Ok(())
256    }
257}
258
259impl<'a, R: Read> Entries<'a, R> {
260    /// Indicates whether this iterator will return raw entries or not.
261    ///
262    /// If the raw list of entries is returned, then no preprocessing happens
263    /// on account of this library, for example taking into account GNU long name
264    /// or long link archive members. Raw iteration is disabled by default.
265    pub fn raw(self, raw: bool) -> Entries<'a, R> {
266        Entries {
267            fields: EntriesFields {
268                raw: raw,
269                ..self.fields
270            },
271            _ignored: marker::PhantomData,
272        }
273    }
274}
275impl<'a, R: Read> Iterator for Entries<'a, R> {
276    type Item = io::Result<Entry<'a, R>>;
277
278    fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
279        self.fields
280            .next()
281            .map(|result| result.map(|e| EntryFields::from(e).into_entry()))
282    }
283}
284
285impl<'a> EntriesFields<'a> {
286    fn next_entry_raw(
287        &mut self,
288        pax_extensions: Option<&[u8]>,
289    ) -> io::Result<Option<Entry<'a, io::Empty>>> {
290        let mut header = Header::new_old();
291        let mut header_pos = self.next;
292        loop {
293            // Seek to the start of the next header in the archive
294            let delta = self.next - self.archive.inner.pos.get();
295            self.skip(delta)?;
296
297            // EOF is an indicator that we are at the end of the archive.
298            if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
299                return Ok(None);
300            }
301
302            // If a header is not all zeros, we have another valid header.
303            // Otherwise, check if we are ignoring zeros and continue, or break as if this is the
304            // end of the archive.
305            if !header.as_bytes().iter().all(|i| *i == 0) {
306                self.next += BLOCK_SIZE;
307                break;
308            }
309
310            if !self.archive.inner.ignore_zeros {
311                return Ok(None);
312            }
313            self.next += BLOCK_SIZE;
314            header_pos = self.next;
315        }
316
317        // Make sure the checksum is ok
318        let sum = header.as_bytes()[..148]
319            .iter()
320            .chain(&header.as_bytes()[156..])
321            .fold(0, |a, b| a + (*b as u32))
322            + 8 * 32;
323        let cksum = header.cksum()?;
324        if sum != cksum {
325            return Err(other("archive header checksum mismatch"));
326        }
327
328        let mut pax_size: Option<u64> = None;
329        if let Some(pax_extensions_ref) = &pax_extensions {
330            pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE);
331
332            if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) {
333                header.set_uid(pax_uid);
334            }
335
336            if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) {
337                header.set_gid(pax_gid);
338            }
339        }
340
341        let file_pos = self.next;
342        let mut size = header.entry_size()?;
343        if size == 0 {
344            if let Some(pax_size) = pax_size {
345                size = pax_size;
346            }
347        }
348        let ret = EntryFields {
349            size: size,
350            header_pos: header_pos,
351            file_pos: file_pos,
352            data: vec![EntryIo::Data((&self.archive.inner).take(size))],
353            header: header,
354            long_pathname: None,
355            long_linkname: None,
356            pax_extensions: None,
357            mask: self.archive.inner.mask,
358            unpack_xattrs: self.archive.inner.unpack_xattrs,
359            preserve_permissions: self.archive.inner.preserve_permissions,
360            preserve_mtime: self.archive.inner.preserve_mtime,
361            overwrite: self.archive.inner.overwrite,
362            preserve_ownerships: self.archive.inner.preserve_ownerships,
363        };
364
365        // Store where the next entry is, rounding up by 512 bytes (the size of
366        // a header);
367        let size = size
368            .checked_add(BLOCK_SIZE - 1)
369            .ok_or_else(|| other("size overflow"))?;
370        self.next = self
371            .next
372            .checked_add(size & !(BLOCK_SIZE - 1))
373            .ok_or_else(|| other("size overflow"))?;
374
375        Ok(Some(ret.into_entry()))
376    }
377
378    fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
379        if self.raw {
380            return self.next_entry_raw(None);
381        }
382
383        let mut gnu_longname = None;
384        let mut gnu_longlink = None;
385        let mut pax_extensions = None;
386        let mut processed = 0;
387        loop {
388            processed += 1;
389            let entry = match self.next_entry_raw(pax_extensions.as_deref())? {
390                Some(entry) => entry,
391                None if processed > 1 => {
392                    return Err(other(
393                        "members found describing a future member \
394                         but no future member found",
395                    ));
396                }
397                None => return Ok(None),
398            };
399
400            let is_recognized_header =
401                entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
402
403            if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
404                if gnu_longname.is_some() {
405                    return Err(other(
406                        "two long name entries describing \
407                         the same member",
408                    ));
409                }
410                gnu_longname = Some(EntryFields::from(entry).read_all()?);
411                continue;
412            }
413
414            if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
415                if gnu_longlink.is_some() {
416                    return Err(other(
417                        "two long name entries describing \
418                         the same member",
419                    ));
420                }
421                gnu_longlink = Some(EntryFields::from(entry).read_all()?);
422                continue;
423            }
424
425            if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
426                if pax_extensions.is_some() {
427                    return Err(other(
428                        "two pax extensions entries describing \
429                         the same member",
430                    ));
431                }
432                pax_extensions = Some(EntryFields::from(entry).read_all()?);
433                continue;
434            }
435
436            let mut fields = EntryFields::from(entry);
437            fields.long_pathname = gnu_longname;
438            fields.long_linkname = gnu_longlink;
439            fields.pax_extensions = pax_extensions;
440            self.parse_sparse_header(&mut fields)?;
441            return Ok(Some(fields.into_entry()));
442        }
443    }
444
445    fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
446        if !entry.header.entry_type().is_gnu_sparse() {
447            return Ok(());
448        }
449        let gnu = match entry.header.as_gnu() {
450            Some(gnu) => gnu,
451            None => return Err(other("sparse entry type listed but not GNU header")),
452        };
453
454        // Sparse files are represented internally as a list of blocks that are
455        // read. Blocks are either a bunch of 0's or they're data from the
456        // underlying archive.
457        //
458        // Blocks of a sparse file are described by the `GnuSparseHeader`
459        // structure, some of which are contained in `GnuHeader` but some of
460        // which may also be contained after the first header in further
461        // headers.
462        //
463        // We read off all the blocks here and use the `add_block` function to
464        // incrementally add them to the list of I/O block (in `entry.data`).
465        // The `add_block` function also validates that each chunk comes after
466        // the previous, we don't overrun the end of the file, and each block is
467        // aligned to a 512-byte boundary in the archive itself.
468        //
469        // At the end we verify that the sparse file size (`Header::size`) is
470        // the same as the current offset (described by the list of blocks) as
471        // well as the amount of data read equals the size of the entry
472        // (`Header::entry_size`).
473        entry.data.truncate(0);
474
475        let mut cur = 0;
476        let mut remaining = entry.size;
477        {
478            let data = &mut entry.data;
479            let reader = &self.archive.inner;
480            let size = entry.size;
481            let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
482                if block.is_empty() {
483                    return Ok(());
484                }
485                let off = block.offset()?;
486                let len = block.length()?;
487                if len != 0 && (size - remaining) % BLOCK_SIZE != 0 {
488                    return Err(other(
489                        "previous block in sparse file was not \
490                         aligned to 512-byte boundary",
491                    ));
492                } else if off < cur {
493                    return Err(other(
494                        "out of order or overlapping sparse \
495                         blocks",
496                    ));
497                } else if cur < off {
498                    let block = io::repeat(0).take(off - cur);
499                    data.push(EntryIo::Pad(block));
500                }
501                cur = off
502                    .checked_add(len)
503                    .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
504                remaining = remaining.checked_sub(len).ok_or_else(|| {
505                    other(
506                        "sparse file consumed more data than the header \
507                         listed",
508                    )
509                })?;
510                data.push(EntryIo::Data(reader.take(len)));
511                Ok(())
512            };
513            for block in gnu.sparse.iter() {
514                add_block(block)?
515            }
516            if gnu.is_extended() {
517                let mut ext = GnuExtSparseHeader::new();
518                ext.isextended[0] = 1;
519                while ext.is_extended() {
520                    if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
521                        return Err(other("failed to read extension"));
522                    }
523
524                    self.next += BLOCK_SIZE;
525                    for block in ext.sparse.iter() {
526                        add_block(block)?;
527                    }
528                }
529            }
530        }
531        if cur != gnu.real_size()? {
532            return Err(other(
533                "mismatch in sparse file chunks and \
534                 size in header",
535            ));
536        }
537        entry.size = cur;
538        if remaining > 0 {
539            return Err(other(
540                "mismatch in sparse file chunks and \
541                 entry size in header",
542            ));
543        }
544        Ok(())
545    }
546
547    fn skip(&mut self, mut amt: u64) -> io::Result<()> {
548        if let Some(seekable_archive) = self.seekable_archive {
549            let pos = io::SeekFrom::Current(
550                i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?,
551            );
552            (&seekable_archive.inner).seek(pos)?;
553        } else {
554            let mut buf = [0u8; 4096 * 8];
555            while amt > 0 {
556                let n = cmp::min(amt, buf.len() as u64);
557                let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
558                if n == 0 {
559                    return Err(other("unexpected EOF during skip"));
560                }
561                amt -= n as u64;
562            }
563        }
564        Ok(())
565    }
566}
567
568impl<'a> Iterator for EntriesFields<'a> {
569    type Item = io::Result<Entry<'a, io::Empty>>;
570
571    fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
572        if self.done {
573            None
574        } else {
575            match self.next_entry() {
576                Ok(Some(e)) => Some(Ok(e)),
577                Ok(None) => {
578                    self.done = true;
579                    None
580                }
581                Err(e) => {
582                    self.done = true;
583                    Some(Err(e))
584                }
585            }
586        }
587    }
588}
589
590impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
591    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
592        let i = self.obj.borrow_mut().read(into)?;
593        self.pos.set(self.pos.get() + i as u64);
594        Ok(i)
595    }
596}
597
598impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> {
599    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
600        let pos = self.obj.borrow_mut().seek(pos)?;
601        self.pos.set(pos);
602        Ok(pos)
603    }
604}
605
606/// Try to fill the buffer from the reader.
607///
608/// If the reader reaches its end before filling the buffer at all, returns `false`.
609/// Otherwise returns `true`.
610fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
611    let mut read = 0;
612    while read < buf.len() {
613        match r.read(&mut buf[read..])? {
614            0 => {
615                if read == 0 {
616                    return Ok(false);
617                }
618
619                return Err(other("failed to read entire block"));
620            }
621            n => read += n,
622        }
623    }
624    Ok(true)
625}