Skip to main content

tar/
builder.rs

1use std::fs;
2use std::io;
3use std::io::prelude::*;
4use std::path::Path;
5use std::str;
6
7use crate::header::BLOCK_SIZE;
8use crate::header::GNU_SPARSE_HEADERS_COUNT;
9use crate::header::{path2bytes, HeaderMode};
10use crate::GnuExtSparseHeader;
11use crate::{other, EntryType, Header};
12
13/// A structure for building archives
14///
15/// This structure has methods for building up an archive from scratch into any
16/// arbitrary writer.
17pub struct Builder<W: Write> {
18    options: BuilderOptions,
19    finished: bool,
20    obj: Option<W>,
21}
22
23#[derive(Clone, Copy)]
24struct BuilderOptions {
25    mode: HeaderMode,
26    preserve_absolute: bool,
27    follow: bool,
28    sparse: bool,
29}
30
31impl<W: Write> Builder<W> {
32    /// Create a new archive builder with the underlying object as the
33    /// destination of all data written. The builder will use
34    /// `HeaderMode::Complete` by default.
35    pub fn new(obj: W) -> Builder<W> {
36        Builder {
37            options: BuilderOptions {
38                mode: HeaderMode::Complete,
39                preserve_absolute: false,
40                follow: true,
41                sparse: true,
42            },
43            finished: false,
44            obj: Some(obj),
45        }
46    }
47
48    /// Changes the HeaderMode that will be used when reading fs Metadata for
49    /// methods that implicitly read metadata for an input Path. Notably, this
50    /// does _not_ apply to `append(Header)`.
51    pub fn mode(&mut self, mode: HeaderMode) {
52        self.options.mode = mode;
53    }
54
55    /// Peserve absolute path while creating an archive
56    pub fn preserve_absolute(&mut self, preserve: bool) {
57        self.options.preserve_absolute = preserve;
58    }
59
60    /// Control whether symlinks are followed when reading from the filesystem.
61    /// Defaults to `true` (but see the note below — you almost certainly want
62    /// to call `follow_symlinks(false)`).
63    ///
64    /// When `true`, symlinks are dereferenced: the archive entry contains the
65    /// contents of the symlink target rather than the symlink itself,
66    /// equivalent to GNU `tar --dereference` (`-h`). When `false` (the default
67    /// for all mainstream tar implementations), symlinks are stored as symlink
68    /// entries in the archive.
69    ///
70    /// # Why you should almost always use `follow_symlinks(false)`
71    ///
72    /// Every mainstream tar implementation preserves symlinks by default.
73    /// GNU `tar` requires the explicit `--dereference` (`-h`) flag to follow
74    /// them. Go's `archive/tar` stores whatever the underlying `fs.FS` reports
75    /// and never dereferences on its own. BSD `tar` behaves the same way.
76    /// This crate's default of `true` is a historical quirk kept for
77    /// compatibility but is wrong for most use-cases:
78    ///
79    /// - Symlinks in the source tree are part of its structure and should
80    ///   normally be preserved, not silently replaced by their targets.
81    /// - When `true`, [`append_dir_all`](Builder::append_dir_all) follows
82    ///   symlinks that point *outside* `src_path` just as readily as those
83    ///   inside it. If the archiving process has broader filesystem read access
84    ///   than whoever controls the source tree (e.g. a privileged backup
85    ///   service, a CI runner archiving user-submitted workspaces), an attacker
86    ///   can plant a symlink inside `src_path` to silently include arbitrary
87    ///   files from the host.
88    ///
89    /// Call `follow_symlinks(false)` unless you have a specific reason to
90    /// flatten symlinks into their targets. For the strongest guarantee, open
91    /// `src_path` with [`cap-std`] and walk the tree with capability-safe I/O,
92    /// which blocks symlink escapes at the OS level regardless of this setting.
93    ///
94    /// [`cap-std`]: https://docs.rs/cap-std/
95    pub fn follow_symlinks(&mut self, follow: bool) {
96        self.options.follow = follow;
97    }
98
99    /// Handle sparse files efficiently, if supported by the underlying
100    /// filesystem. When true, sparse file information is read from disk and
101    /// empty segments are omitted from the archive. Defaults to true.
102    pub fn sparse(&mut self, sparse: bool) {
103        self.options.sparse = sparse;
104    }
105
106    /// Gets shared reference to the underlying object.
107    pub fn get_ref(&self) -> &W {
108        self.obj.as_ref().unwrap()
109    }
110
111    /// Gets mutable reference to the underlying object.
112    ///
113    /// Note that care must be taken while writing to the underlying
114    /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
115    /// useful in the situations when one needs to be ensured that
116    /// tar entry was flushed to the disk.
117    pub fn get_mut(&mut self) -> &mut W {
118        self.obj.as_mut().unwrap()
119    }
120
121    /// Unwrap this archive, returning the underlying object.
122    ///
123    /// This function will finish writing the archive if the `finish` function
124    /// hasn't yet been called, returning any I/O error which happens during
125    /// that operation.
126    pub fn into_inner(mut self) -> io::Result<W> {
127        if !self.finished {
128            self.finish()?;
129        }
130        Ok(self.obj.take().unwrap())
131    }
132
133    /// Adds a new entry to this archive.
134    ///
135    /// This function will append the header specified, followed by contents of
136    /// the stream specified by `data`. To produce a valid archive the `size`
137    /// field of `header` must be the same as the length of the stream that's
138    /// being written. Additionally the checksum for the header should have been
139    /// set via the `set_cksum` method.
140    ///
141    /// Note that this will not attempt to seek the archive to a valid position,
142    /// so if the archive is in the middle of a read or some other similar
143    /// operation then this may corrupt the archive.
144    ///
145    /// Also note that after all entries have been written to an archive the
146    /// `finish` function needs to be called to finish writing the archive.
147    ///
148    /// # Errors
149    ///
150    /// This function will return an error for any intermittent I/O error which
151    /// occurs when either reading or writing.
152    ///
153    /// # Examples
154    ///
155    /// ```
156    /// use tar::{Builder, Header};
157    ///
158    /// let mut header = Header::new_gnu();
159    /// header.set_path("foo").unwrap();
160    /// header.set_size(4);
161    /// header.set_cksum();
162    ///
163    /// let mut data: &[u8] = &[1, 2, 3, 4];
164    ///
165    /// let mut ar = Builder::new(Vec::new());
166    /// ar.append(&header, data).unwrap();
167    /// let data = ar.into_inner().unwrap();
168    /// ```
169    pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
170        append(self.get_mut(), header, &mut data)
171    }
172
173    /// Adds a new entry to this archive with the specified path.
174    ///
175    /// This function will set the specified path in the given header, which may
176    /// require appending a GNU long-name extension entry to the archive first.
177    /// The checksum for the header will be automatically updated via the
178    /// `set_cksum` method after setting the path. No other metadata in the
179    /// header will be modified.
180    ///
181    /// Then it will append the header, followed by contents of the stream
182    /// specified by `data`. To produce a valid archive the `size` field of
183    /// `header` must be the same as the length of the stream that's being
184    /// written.
185    ///
186    /// Note that this will not attempt to seek the archive to a valid position,
187    /// so if the archive is in the middle of a read or some other similar
188    /// operation then this may corrupt the archive.
189    ///
190    /// Also note that after all entries have been written to an archive the
191    /// `finish` function needs to be called to finish writing the archive.
192    ///
193    /// # Errors
194    ///
195    /// This function will return an error for any intermittent I/O error which
196    /// occurs when either reading or writing.
197    ///
198    /// # Examples
199    ///
200    /// ```
201    /// use tar::{Builder, Header};
202    ///
203    /// let mut header = Header::new_gnu();
204    /// header.set_size(4);
205    /// header.set_cksum();
206    ///
207    /// let mut data: &[u8] = &[1, 2, 3, 4];
208    ///
209    /// let mut ar = Builder::new(Vec::new());
210    /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap();
211    /// let data = ar.into_inner().unwrap();
212    /// ```
213    pub fn append_data<P: AsRef<Path>, R: Read>(
214        &mut self,
215        header: &mut Header,
216        path: P,
217        data: R,
218    ) -> io::Result<()> {
219        let allow_absolute = self.options.preserve_absolute;
220        prepare_header_path(self.get_mut(), header, path.as_ref(), allow_absolute)?;
221        header.set_cksum();
222        self.append(header, data)
223    }
224
225    /// Adds a new entry to this archive and returns an [`EntryWriter`] for
226    /// adding its contents.
227    ///
228    /// This function is similar to [`Self::append_data`] but returns a
229    /// [`io::Write`] implementation instead of taking data as a parameter.
230    ///
231    /// Similar constraints around the position of the archive and completion
232    /// apply as with [`Self::append_data`]. It requires the underlying writer
233    /// to implement [`Seek`] to update the header after writing the data.
234    ///
235    /// # Errors
236    ///
237    /// This function will return an error for any intermittent I/O error which
238    /// occurs when either reading or writing.
239    ///
240    /// # Examples
241    ///
242    /// ```
243    /// use std::io::Cursor;
244    /// use std::io::Write as _;
245    /// use tar::{Builder, Header};
246    ///
247    /// let mut header = Header::new_gnu();
248    ///
249    /// let mut ar = Builder::new(Cursor::new(Vec::new()));
250    /// let mut entry = ar.append_writer(&mut header, "hi.txt").unwrap();
251    /// entry.write_all(b"Hello, ").unwrap();
252    /// entry.write_all(b"world!\n").unwrap();
253    /// entry.finish().unwrap();
254    /// ```
255    pub fn append_writer<'a, P: AsRef<Path>>(
256        &'a mut self,
257        header: &'a mut Header,
258        path: P,
259    ) -> io::Result<EntryWriter<'a>>
260    where
261        W: Seek,
262    {
263        let allow_absolute = self.options.preserve_absolute;
264        EntryWriter::start(self.get_mut(), header, path.as_ref(), allow_absolute)
265    }
266
267    /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target.
268    ///
269    /// This function is similar to [`Self::append_data`] which supports long filenames,
270    /// but also supports long link targets using GNU extensions if necessary.
271    /// You must set the entry type to either [`EntryType::Link`] or [`EntryType::Symlink`].
272    /// The `set_cksum` method will be invoked after setting the path. No other metadata in the
273    /// header will be modified.
274    ///
275    /// If you are intending to use GNU extensions, you must use this method over calling
276    /// [`Header::set_link_name`] because that function will fail on long links.
277    ///
278    /// Similar constraints around the position of the archive and completion
279    /// apply as with [`Self::append_data`].
280    ///
281    /// # Errors
282    ///
283    /// This function will return an error for any intermittent I/O error which
284    /// occurs when either reading or writing.
285    ///
286    /// # Examples
287    ///
288    /// ```
289    /// use tar::{Builder, Header, EntryType};
290    ///
291    /// let mut ar = Builder::new(Vec::new());
292    /// let mut header = Header::new_gnu();
293    /// header.set_username("foo");
294    /// header.set_entry_type(EntryType::Symlink);
295    /// header.set_size(0);
296    /// ar.append_link(&mut header, "really/long/path/to/foo", "other/really/long/target").unwrap();
297    /// let data = ar.into_inner().unwrap();
298    /// ```
299    pub fn append_link<P: AsRef<Path>, T: AsRef<Path>>(
300        &mut self,
301        header: &mut Header,
302        path: P,
303        target: T,
304    ) -> io::Result<()> {
305        self._append_link(header, path.as_ref(), target.as_ref())
306    }
307
308    fn _append_link(&mut self, header: &mut Header, path: &Path, target: &Path) -> io::Result<()> {
309        let allow_abolute = self.options.preserve_absolute;
310        prepare_header_path(self.get_mut(), header, path, allow_abolute)?;
311        prepare_header_link(self.get_mut(), header, target)?;
312        header.set_cksum();
313        self.append(header, std::io::empty())
314    }
315
316    /// Adds a file on the local filesystem to this archive.
317    ///
318    /// This function will open the file specified by `path` and insert the file
319    /// into the archive with the appropriate metadata set, returning any I/O
320    /// error which occurs while writing. The path name for the file inside of
321    /// this archive will be the same as `path`, and it is required that the
322    /// path is a relative path.
323    ///
324    /// Note that this will not attempt to seek the archive to a valid position,
325    /// so if the archive is in the middle of a read or some other similar
326    /// operation then this may corrupt the archive.
327    ///
328    /// Also note that after all files have been written to an archive the
329    /// `finish` function needs to be called to finish writing the archive.
330    ///
331    /// # Examples
332    ///
333    /// ```no_run
334    /// use tar::Builder;
335    ///
336    /// let mut ar = Builder::new(Vec::new());
337    ///
338    /// ar.append_path("foo/bar.txt").unwrap();
339    /// ```
340    pub fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
341        let options = self.options;
342        append_path_with_name(self.get_mut(), path.as_ref(), None, options)
343    }
344
345    /// Adds a file on the local filesystem to this archive under another name.
346    ///
347    /// This function will open the file specified by `path` and insert the file
348    /// into the archive as `name` with appropriate metadata set, returning any
349    /// I/O error which occurs while writing. The path name for the file inside
350    /// of this archive will be `name` is required to be a relative path.
351    ///
352    /// Note that this will not attempt to seek the archive to a valid position,
353    /// so if the archive is in the middle of a read or some other similar
354    /// operation then this may corrupt the archive.
355    ///
356    /// Note if the `path` is a directory. This will just add an entry to the archive,
357    /// rather than contents of the directory.
358    ///
359    /// Also note that after all files have been written to an archive the
360    /// `finish` function needs to be called to finish writing the archive.
361    ///
362    /// # Examples
363    ///
364    /// ```no_run
365    /// use tar::Builder;
366    ///
367    /// let mut ar = Builder::new(Vec::new());
368    ///
369    /// // Insert the local file "foo/bar.txt" in the archive but with the name
370    /// // "bar/foo.txt".
371    /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap();
372    /// ```
373    pub fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
374        &mut self,
375        path: P,
376        name: N,
377    ) -> io::Result<()> {
378        let options = self.options;
379        append_path_with_name(self.get_mut(), path.as_ref(), Some(name.as_ref()), options)
380    }
381
382    /// Adds a file to this archive with the given path as the name of the file
383    /// in the archive.
384    ///
385    /// This will use the metadata of `file` to populate a `Header`, and it will
386    /// then append the file to the archive with the name `path`.
387    ///
388    /// Note that this will not attempt to seek the archive to a valid position,
389    /// so if the archive is in the middle of a read or some other similar
390    /// operation then this may corrupt the archive.
391    ///
392    /// Also note that after all files have been written to an archive the
393    /// `finish` function needs to be called to finish writing the archive.
394    ///
395    /// # Examples
396    ///
397    /// ```no_run
398    /// use std::fs::File;
399    /// use tar::Builder;
400    ///
401    /// let mut ar = Builder::new(Vec::new());
402    ///
403    /// // Open the file at one location, but insert it into the archive with a
404    /// // different name.
405    /// let mut f = File::open("foo/bar/baz.txt").unwrap();
406    /// ar.append_file("bar/baz.txt", &mut f).unwrap();
407    /// ```
408    pub fn append_file<P: AsRef<Path>>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> {
409        let options = self.options;
410        append_file(self.get_mut(), path.as_ref(), file, options)
411    }
412
413    /// Adds a directory to this archive with the given path as the name of the
414    /// directory in the archive.
415    ///
416    /// This will use `stat` to populate a `Header`, and it will then append the
417    /// directory to the archive with the name `path`.
418    ///
419    /// Note that this will not attempt to seek the archive to a valid position,
420    /// so if the archive is in the middle of a read or some other similar
421    /// operation then this may corrupt the archive.
422    ///
423    /// Note this will not add the contents of the directory to the archive.
424    /// See `append_dir_all` for recursively adding the contents of the directory.
425    ///
426    /// Also note that after all files have been written to an archive the
427    /// `finish` function needs to be called to finish writing the archive.
428    ///
429    /// # Examples
430    ///
431    /// ```
432    /// use std::fs;
433    /// use tar::Builder;
434    ///
435    /// let mut ar = Builder::new(Vec::new());
436    ///
437    /// // Use the directory at one location, but insert it into the archive
438    /// // with a different name.
439    /// ar.append_dir("bardir", ".").unwrap();
440    /// ```
441    pub fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
442    where
443        P: AsRef<Path>,
444        Q: AsRef<Path>,
445    {
446        let options = self.options;
447        append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), options)
448    }
449
450    /// Adds a directory and all of its contents (recursively) to this archive
451    /// with the given path as the name of the directory in the archive.
452    ///
453    /// Note that this will not attempt to seek the archive to a valid position,
454    /// so if the archive is in the middle of a read or some other similar
455    /// operation then this may corrupt the archive.
456    ///
457    /// Also note that after all files have been written to an archive the
458    /// `finish` or `into_inner` function needs to be called to finish
459    /// writing the archive.
460    ///
461    /// # Security
462    ///
463    /// **Call [`follow_symlinks(false)`](Builder::follow_symlinks) before this
464    /// method** unless you have an explicit reason to dereference symlinks.
465    /// All mainstream tar implementations (GNU tar, BSD tar, Go's
466    /// `archive/tar`) preserve symlinks by default; this crate's default of
467    /// `true` is a historical quirk.
468    ///
469    /// When `follow_symlinks` is `true` (the current default), this method
470    /// dereferences every symlink it encounters, including ones whose targets
471    /// lie **outside** `src_path`. When the archiver runs with broader
472    /// filesystem access than whoever controls the source tree (e.g. a
473    /// privileged backup or export service), an attacker can plant a symlink
474    /// inside `src_path` to silently include arbitrary files the archiver can
475    /// read, with no indication in the archive that they came from outside the
476    /// source root.
477    ///
478    /// ```no_run
479    /// use tar::Builder;
480    ///
481    /// # let src_path = std::path::Path::new(".");
482    /// # let writer = std::io::sink();
483    /// // Recommended: preserve symlinks as-is, matching GNU tar's default.
484    /// let mut ar = Builder::new(writer);
485    /// ar.follow_symlinks(false);
486    /// ar.append_dir_all("", src_path).unwrap();
487    /// ar.finish().unwrap();
488    /// ```
489    ///
490    /// With `follow_symlinks(false)`, symlinks inside the source tree are
491    /// stored as symlink entries in the archive rather than being read through.
492    /// Note that the resulting archive may then contain symlinks with absolute
493    /// or `..`-relative targets; validate or strip those on extraction if the
494    /// archive consumer is also untrusted.
495    ///
496    /// For the strongest available guarantee, open `src_path` using [`cap-std`]
497    /// and walk the directory tree with capability-safe I/O. This prevents
498    /// symlink escapes at the OS level and protects against TOCTOU races that
499    /// a purely path-based check cannot close.
500    ///
501    /// [`cap-std`]: https://docs.rs/cap-std/
502    ///
503    /// # Examples
504    ///
505    /// ```
506    /// use std::fs;
507    /// use tar::Builder;
508    ///
509    /// let mut ar = Builder::new(Vec::new());
510    ///
511    /// // Use the directory at one location ("."), but insert it into the archive
512    /// // with a different name ("bardir").
513    /// ar.append_dir_all("bardir", ".").unwrap();
514    /// ar.finish().unwrap();
515    /// ```
516    ///
517    /// Use `append_dir_all` with an empty string as the first path argument to
518    /// create an archive from all files in a directory without renaming.
519    ///
520    /// ```
521    /// use std::fs;
522    /// use std::path::PathBuf;
523    /// use tar::{Archive, Builder};
524    ///
525    /// let tmpdir = tempfile::tempdir().unwrap();
526    /// let path = tmpdir.path();
527    /// fs::write(path.join("a.txt"), b"hello").unwrap();
528    /// fs::write(path.join("b.txt"), b"world").unwrap();
529    ///
530    /// // Create a tarball from the files in the directory
531    /// let mut ar = Builder::new(Vec::new());
532    /// ar.append_dir_all("", path).unwrap();
533    ///
534    /// // List files in the archive
535    /// let archive = ar.into_inner().unwrap();
536    /// let archived_files = Archive::new(archive.as_slice())
537    ///     .entries()
538    ///     .unwrap()
539    ///     .map(|entry| entry.unwrap().path().unwrap().into_owned())
540    ///     .collect::<Vec<_>>();
541    ///
542    /// assert!(archived_files.contains(&PathBuf::from("a.txt")));
543    /// assert!(archived_files.contains(&PathBuf::from("b.txt")));
544    /// ```
545    pub fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
546    where
547        P: AsRef<Path>,
548        Q: AsRef<Path>,
549    {
550        let options = self.options;
551        append_dir_all(self.get_mut(), path.as_ref(), src_path.as_ref(), options)
552    }
553
554    /// Finish writing this archive, emitting the termination sections.
555    ///
556    /// This function should only be called when the archive has been written
557    /// entirely and if an I/O error happens the underlying object still needs
558    /// to be acquired.
559    ///
560    /// In most situations the `into_inner` method should be preferred.
561    pub fn finish(&mut self) -> io::Result<()> {
562        if self.finished {
563            return Ok(());
564        }
565        self.finished = true;
566        self.get_mut().write_all(&[0; 1024])
567    }
568}
569
570trait SeekWrite: Write + Seek {
571    fn as_write(&mut self) -> &mut dyn Write;
572}
573
574impl<T: Write + Seek> SeekWrite for T {
575    fn as_write(&mut self) -> &mut dyn Write {
576        self
577    }
578}
579
580/// A writer for a single entry in a tar archive.
581///
582/// This struct is returned by [`Builder::append_writer`] and provides a
583/// [`Write`] implementation for adding content to an archive entry.
584///
585/// After writing all data to the entry, it must be finalized either by
586/// explicitly calling [`EntryWriter::finish`] or by letting it drop.
587pub struct EntryWriter<'a> {
588    // NOTE: Do not add any fields here which require Drop!
589    // See the comment below in finish().
590    obj: &'a mut dyn SeekWrite,
591    header: &'a mut Header,
592    written: u64,
593}
594
595impl EntryWriter<'_> {
596    fn start<'a>(
597        obj: &'a mut dyn SeekWrite,
598        header: &'a mut Header,
599        path: &Path,
600        allow_absolute: bool,
601    ) -> io::Result<EntryWriter<'a>> {
602        prepare_header_path(obj.as_write(), header, path, allow_absolute)?;
603
604        // Reserve space for header, will be overwritten once data is written.
605        obj.write_all([0u8; BLOCK_SIZE as usize].as_ref())?;
606
607        Ok(EntryWriter {
608            obj,
609            header,
610            written: 0,
611        })
612    }
613
614    /// Finish writing the current entry in the archive.
615    pub fn finish(self) -> io::Result<()> {
616        // NOTE: This is an optimization for "fallible destructuring".
617        // We want finish() to return an error, but we also need to invoke
618        // cleanup in our Drop handler, which will run unconditionally
619        // and try to do the same work.
620        // By using ManuallyDrop, we suppress that drop. However, this would
621        // be a memory leak if we ever had any struct members which required
622        // Drop - which we don't right now.
623        // But if we ever gain one, we will need to change to use e.g. Option<>
624        // around some of the fields or have a `bool finished` etc.
625        let mut this = std::mem::ManuallyDrop::new(self);
626        this.do_finish()
627    }
628
629    fn do_finish(&mut self) -> io::Result<()> {
630        // Pad with zeros if necessary.
631        let buf = [0u8; BLOCK_SIZE as usize];
632        let remaining = BLOCK_SIZE.wrapping_sub(self.written) % BLOCK_SIZE;
633        self.obj.write_all(&buf[..remaining as usize])?;
634        let written = (self.written + remaining) as i64;
635
636        // Seek back to the header position.
637        self.obj
638            .seek(io::SeekFrom::Current(-written - BLOCK_SIZE as i64))?;
639
640        self.header.set_size(self.written);
641        self.header.set_cksum();
642        self.obj.write_all(self.header.as_bytes())?;
643
644        // Seek forward to restore the position.
645        self.obj.seek(io::SeekFrom::Current(written))?;
646
647        Ok(())
648    }
649}
650
651impl Write for EntryWriter<'_> {
652    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
653        let len = self.obj.write(buf)?;
654        self.written += len as u64;
655        Ok(len)
656    }
657
658    fn flush(&mut self) -> io::Result<()> {
659        self.obj.flush()
660    }
661}
662
663impl Drop for EntryWriter<'_> {
664    fn drop(&mut self) {
665        let _ = self.do_finish();
666    }
667}
668
669fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> {
670    dst.write_all(header.as_bytes())?;
671    let len = io::copy(&mut data, &mut dst)?;
672    pad_zeroes(&mut dst, len)?;
673    Ok(())
674}
675
676fn pad_zeroes(dst: &mut dyn Write, len: u64) -> io::Result<()> {
677    let buf = [0; BLOCK_SIZE as usize];
678    let remaining = BLOCK_SIZE - (len % BLOCK_SIZE);
679    if remaining < BLOCK_SIZE {
680        dst.write_all(&buf[..remaining as usize])?;
681    }
682    Ok(())
683}
684
685fn append_path_with_name(
686    dst: &mut dyn Write,
687    path: &Path,
688    name: Option<&Path>,
689    options: BuilderOptions,
690) -> io::Result<()> {
691    let stat = if options.follow {
692        fs::metadata(path).map_err(|err| {
693            io::Error::new(
694                err.kind(),
695                format!("{} when getting metadata for {}", err, path.display()),
696            )
697        })?
698    } else {
699        fs::symlink_metadata(path).map_err(|err| {
700            io::Error::new(
701                err.kind(),
702                format!("{} when getting metadata for {}", err, path.display()),
703            )
704        })?
705    };
706    let ar_name = name.unwrap_or(path);
707    if stat.is_file() {
708        append_file(dst, ar_name, &mut fs::File::open(path)?, options)
709    } else if stat.is_dir() {
710        append_fs(
711            dst,
712            ar_name,
713            &stat,
714            options.mode,
715            options.preserve_absolute,
716            None,
717        )
718    } else if stat.file_type().is_symlink() {
719        let link_name = fs::read_link(path)?;
720        append_fs(
721            dst,
722            ar_name,
723            &stat,
724            options.mode,
725            options.preserve_absolute,
726            Some(&link_name),
727        )
728    } else {
729        #[cfg(unix)]
730        {
731            append_special(dst, path, &stat, options.mode, options.preserve_absolute)
732        }
733        #[cfg(not(unix))]
734        {
735            Err(other(&format!("{} has unknown file type", path.display())))
736        }
737    }
738}
739
740#[cfg(unix)]
741fn append_special(
742    dst: &mut dyn Write,
743    path: &Path,
744    stat: &fs::Metadata,
745    mode: HeaderMode,
746    allow_absolute: bool,
747) -> io::Result<()> {
748    use ::std::os::unix::fs::{FileTypeExt, MetadataExt};
749
750    let file_type = stat.file_type();
751    let entry_type;
752    if file_type.is_socket() {
753        // sockets can't be archived
754        return Err(other(&format!(
755            "{}: socket can not be archived",
756            path.display()
757        )));
758    } else if file_type.is_fifo() {
759        entry_type = EntryType::Fifo;
760    } else if file_type.is_char_device() {
761        entry_type = EntryType::Char;
762    } else if file_type.is_block_device() {
763        entry_type = EntryType::Block;
764    } else {
765        return Err(other(&format!("{} has unknown file type", path.display())));
766    }
767
768    let mut header = Header::new_gnu();
769    header.set_metadata_in_mode(stat, mode);
770    prepare_header_path(dst, &mut header, path, allow_absolute)?;
771
772    header.set_entry_type(entry_type);
773    let dev_id = stat.rdev();
774    let dev_major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff);
775    let dev_minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff);
776    header.set_device_major(dev_major as u32)?;
777    header.set_device_minor(dev_minor as u32)?;
778
779    header.set_cksum();
780    dst.write_all(header.as_bytes())?;
781
782    Ok(())
783}
784
785fn append_file(
786    dst: &mut dyn Write,
787    path: &Path,
788    file: &mut fs::File,
789    options: BuilderOptions,
790) -> io::Result<()> {
791    let stat = file.metadata()?;
792    let mut header = Header::new_gnu();
793
794    prepare_header_path(dst, &mut header, path, options.preserve_absolute)?;
795    header.set_metadata_in_mode(&stat, options.mode);
796    let sparse_entries = if options.sparse {
797        prepare_header_sparse(file, &stat, &mut header)?
798    } else {
799        None
800    };
801    header.set_cksum();
802    dst.write_all(header.as_bytes())?;
803
804    if let Some(sparse_entries) = sparse_entries {
805        append_extended_sparse_headers(dst, &sparse_entries)?;
806        for entry in sparse_entries.entries {
807            file.seek(io::SeekFrom::Start(entry.offset))?;
808            io::copy(&mut file.take(entry.num_bytes), dst)?;
809        }
810        pad_zeroes(dst, sparse_entries.on_disk_size)?;
811    } else {
812        let len = io::copy(file, dst)?;
813        pad_zeroes(dst, len)?;
814    }
815
816    Ok(())
817}
818
819fn append_dir(
820    dst: &mut dyn Write,
821    path: &Path,
822    src_path: &Path,
823    options: BuilderOptions,
824) -> io::Result<()> {
825    let stat = fs::metadata(src_path)?;
826    append_fs(
827        dst,
828        path,
829        &stat,
830        options.mode,
831        options.preserve_absolute,
832        None,
833    )
834}
835
836fn prepare_header(size: u64, entry_type: u8) -> Header {
837    let mut header = Header::new_gnu();
838    let name = b"././@LongLink";
839    header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
840    header.set_mode(0o644);
841    header.set_uid(0);
842    header.set_gid(0);
843    header.set_mtime(0);
844    // + 1 to be compliant with GNU tar
845    header.set_size(size + 1);
846    header.set_entry_type(EntryType::new(entry_type));
847    header.set_cksum();
848    header
849}
850
851fn prepare_header_path(
852    dst: &mut dyn Write,
853    header: &mut Header,
854    path: &Path,
855    allow_absolute: bool,
856) -> io::Result<()> {
857    // Try to encode the path directly in the header, but if it ends up not
858    // working (probably because it's too long) then try to use the GNU-specific
859    // long name extension by emitting an entry which indicates that it's the
860    // filename.
861    let result = if allow_absolute {
862        header.set_path_absolute(path)
863    } else {
864        header.set_path(path)
865    };
866
867    if let Err(e) = result {
868        let data = path2bytes(path)?;
869        let max = header.as_old().name.len();
870        // Since `e` isn't specific enough to let us know the path is indeed too
871        // long, verify it first before using the extension.
872        if data.len() < max {
873            return Err(e);
874        }
875        // Truncate the path to store in the header we're about to emit to
876        // ensure we've got something at least mentioned. Note that we use
877        // `str`-encoding to be compatible with Windows, but in general the
878        // entry in the header itself shouldn't matter too much since extraction
879        // doesn't look at it.
880        //
881        // Validate the truncated path BEFORE writing the long-name extension
882        // to the stream. If validation fails after writing, the orphaned
883        // extension entry corrupts subsequent archive entries.
884        let truncated = match str::from_utf8(&data[..max]) {
885            Ok(s) => s,
886            Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
887        };
888        header.set_truncated_path_for_gnu_header(truncated, allow_absolute)?;
889
890        let header2 = prepare_header(data.len() as u64, b'L');
891        // null-terminated string
892        let mut data2 = data.chain(io::repeat(0).take(1));
893        append(dst, &header2, &mut data2)?;
894    }
895    Ok(())
896}
897
898fn prepare_header_link(
899    dst: &mut dyn Write,
900    header: &mut Header,
901    link_name: &Path,
902) -> io::Result<()> {
903    // Same as previous function but for linkname
904    if let Err(e) = header.set_link_name(link_name) {
905        let data = path2bytes(link_name)?;
906        if data.len() < header.as_old().linkname.len() {
907            return Err(e);
908        }
909        let header2 = prepare_header(data.len() as u64, b'K');
910        let mut data2 = data.chain(io::repeat(0).take(1));
911        append(dst, &header2, &mut data2)?;
912    }
913    Ok(())
914}
915
916fn prepare_header_sparse(
917    file: &mut fs::File,
918    stat: &fs::Metadata,
919    header: &mut Header,
920) -> io::Result<Option<SparseEntries>> {
921    let entries = match find_sparse_entries(file, stat)? {
922        Some(entries) => entries,
923        _ => return Ok(None),
924    };
925
926    header.set_entry_type(EntryType::GNUSparse);
927    header.set_size(entries.on_disk_size);
928
929    // Write the first 4 (GNU_SPARSE_HEADERS_COUNT) entries to the given header.
930    // The remaining entries will be written as subsequent extended headers. See
931    // https://www.gnu.org/software/tar/manual/html_section/Sparse-Formats.html#Old-GNU-Format
932    // for details on the format.
933    let gnu_header = &mut header.as_gnu_mut().unwrap();
934    gnu_header.set_real_size(entries.size());
935
936    for (entry, header_entry) in std::iter::zip(&entries.entries, &mut gnu_header.sparse) {
937        header_entry.set_offset(entry.offset);
938        header_entry.set_length(entry.num_bytes);
939    }
940    gnu_header.set_is_extended(entries.entries.len() > gnu_header.sparse.len());
941
942    Ok(Some(entries))
943}
944
945/// Write extra sparse headers into `dst` for those entries that did not fit in the main header.
946fn append_extended_sparse_headers(dst: &mut dyn Write, entries: &SparseEntries) -> io::Result<()> {
947    // The first `GNU_SPARSE_HEADERS_COUNT` entries are written to the main header, so skip them.
948    let mut it = entries
949        .entries
950        .iter()
951        .skip(GNU_SPARSE_HEADERS_COUNT)
952        .peekable();
953
954    // Each GnuExtSparseHeader can hold up to fixed number of sparse entries (21).
955    // So we pack entries into multiple headers if necessary.
956    while it.peek().is_some() {
957        let mut ext_header = GnuExtSparseHeader::new();
958        for header_entry in ext_header.sparse.iter_mut() {
959            if let Some(entry) = it.next() {
960                header_entry.set_offset(entry.offset);
961                header_entry.set_length(entry.num_bytes);
962            } else {
963                break;
964            }
965        }
966        ext_header.set_is_extended(it.peek().is_some());
967        dst.write_all(ext_header.as_bytes())?;
968    }
969
970    Ok(())
971}
972
973fn append_fs(
974    dst: &mut dyn Write,
975    path: &Path,
976    meta: &fs::Metadata,
977    mode: HeaderMode,
978    allow_absolute: bool,
979    link_name: Option<&Path>,
980) -> io::Result<()> {
981    let mut header = Header::new_gnu();
982
983    prepare_header_path(dst, &mut header, path, allow_absolute)?;
984    header.set_metadata_in_mode(meta, mode);
985    if let Some(link_name) = link_name {
986        prepare_header_link(dst, &mut header, link_name)?;
987    }
988    header.set_cksum();
989    dst.write_all(header.as_bytes())
990}
991
992fn append_dir_all(
993    dst: &mut dyn Write,
994    path: &Path,
995    src_path: &Path,
996    options: BuilderOptions,
997) -> io::Result<()> {
998    let mut stack = vec![(src_path.to_path_buf(), true, false)];
999    while let Some((src, is_dir, is_symlink)) = stack.pop() {
1000        let dest = path.join(src.strip_prefix(src_path).unwrap());
1001        // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
1002        if is_dir || (is_symlink && options.follow && src.is_dir()) {
1003            for entry in fs::read_dir(&src)? {
1004                let entry = entry?;
1005                let file_type = entry.file_type()?;
1006                stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
1007            }
1008            if dest != Path::new("") {
1009                append_dir(dst, &dest, &src, options)?;
1010            }
1011        } else if !options.follow && is_symlink {
1012            let stat = fs::symlink_metadata(&src)?;
1013            let link_name = fs::read_link(&src)?;
1014            append_fs(
1015                dst,
1016                &dest,
1017                &stat,
1018                options.mode,
1019                options.preserve_absolute,
1020                Some(&link_name),
1021            )?;
1022        } else {
1023            #[cfg(unix)]
1024            {
1025                let stat = fs::metadata(&src)?;
1026                if !stat.is_file() {
1027                    append_special(dst, &dest, &stat, options.mode, options.preserve_absolute)?;
1028                    continue;
1029                }
1030            }
1031            append_file(dst, &dest, &mut fs::File::open(src)?, options)?;
1032        }
1033    }
1034    Ok(())
1035}
1036
1037#[derive(Debug, Clone, PartialEq, Eq)]
1038struct SparseEntries {
1039    entries: Vec<SparseEntry>,
1040    on_disk_size: u64,
1041}
1042
1043impl SparseEntries {
1044    fn size(&self) -> u64 {
1045        self.entries.last().map_or(0, |e| e.offset + e.num_bytes)
1046    }
1047}
1048
1049#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1050struct SparseEntry {
1051    offset: u64,
1052    num_bytes: u64,
1053}
1054
1055/// Find sparse entries in a file. Returns:
1056/// * `Ok(Some(_))` if the file is sparse.
1057/// * `Ok(None)` if the file is not sparse, or if the file system does not support sparse files.
1058/// * `Err(_)` if an error occurred. The lack of support for sparse files is not
1059///   considered an error. It might return an error if the file is modified
1060///   while reading.
1061fn find_sparse_entries(
1062    file: &mut fs::File,
1063    stat: &fs::Metadata,
1064) -> io::Result<Option<SparseEntries>> {
1065    #[cfg(not(any(target_os = "android", target_os = "freebsd", target_os = "linux")))]
1066    {
1067        let _ = file;
1068        let _ = stat;
1069        Ok(None)
1070    }
1071
1072    #[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))]
1073    find_sparse_entries_seek(file, stat)
1074}
1075
1076/// Implementation of `find_sparse_entries` using `SEEK_HOLE` and `SEEK_DATA`.
1077#[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))]
1078fn find_sparse_entries_seek(
1079    file: &mut fs::File,
1080    stat: &fs::Metadata,
1081) -> io::Result<Option<SparseEntries>> {
1082    use std::os::unix::fs::MetadataExt as _;
1083    use std::os::unix::io::AsRawFd as _;
1084
1085    fn lseek(file: &fs::File, offset: i64, whence: libc::c_int) -> Result<i64, i32> {
1086        #[cfg(any(target_os = "linux", target_os = "android"))]
1087        let lseek = libc::lseek64;
1088        #[cfg(not(any(target_os = "linux", target_os = "android")))]
1089        let lseek = libc::lseek;
1090
1091        match unsafe { lseek(file.as_raw_fd(), offset, whence) } {
1092            -1 => Err(io::Error::last_os_error().raw_os_error().unwrap()),
1093            off => Ok(off),
1094        }
1095    }
1096
1097    if stat.blocks() == 0 {
1098        return Ok(if stat.size() == 0 {
1099            // Empty file.
1100            None
1101        } else {
1102            // Fully sparse file.
1103            Some(SparseEntries {
1104                entries: vec![SparseEntry {
1105                    offset: stat.size(),
1106                    num_bytes: 0,
1107                }],
1108                on_disk_size: 0,
1109            })
1110        });
1111    }
1112
1113    // On most Unixes, we need to read `_PC_MIN_HOLE_SIZE` to see if the file
1114    // system supports `SEEK_HOLE`.
1115    // FreeBSD: https://man.freebsd.org/cgi/man.cgi?query=lseek&sektion=2&manpath=FreeBSD+14.1-STABLE
1116    #[cfg(not(any(target_os = "linux", target_os = "android")))]
1117    if unsafe { libc::fpathconf(file.as_raw_fd(), libc::_PC_MIN_HOLE_SIZE) } == -1 {
1118        return Ok(None);
1119    }
1120
1121    // Linux is the only UNIX-like without support for `_PC_MIN_HOLE_SIZE`, so
1122    // instead we try to call `lseek` and see if it fails.
1123    #[cfg(any(target_os = "linux", target_os = "android"))]
1124    match lseek(file, 0, libc::SEEK_HOLE) {
1125        Ok(_) => (),
1126        Err(libc::ENXIO) => {
1127            // The file is empty. Treat it as non-sparse.
1128            return Ok(None);
1129        }
1130        Err(_) => return Ok(None),
1131    }
1132
1133    let mut entries = Vec::new();
1134    let mut on_disk_size = 0;
1135    let mut off_s = 0;
1136    loop {
1137        //  off_s=0      │     off_s               │ off_s
1138        //    ↓          │       ↓                 │   ↓
1139        //    | DATA |…  │  ……………| HOLE | DATA |…  │  …|×EOF×
1140        //    ↑          │       ↑      ↑          │
1141        //   (a)         │  (b) (c)    (d)         │     (e)
1142        match lseek(file, off_s, libc::SEEK_DATA) {
1143            Ok(0) if off_s == 0 => (), // (a) The file starts with data.
1144            Ok(off) if off < off_s => {
1145                // (b) Unlikely.
1146                return Err(std::io::Error::new(
1147                    io::ErrorKind::Other,
1148                    "lseek(SEEK_DATA) went backwards",
1149                ));
1150            }
1151            Ok(off) if off == off_s => {
1152                // (c) The data at the same offset as the hole.
1153                return Err(std::io::Error::new(
1154                    io::ErrorKind::Other,
1155                    "lseek(SEEK_DATA) did not advance. \
1156                     Did the file change while appending?",
1157                ));
1158            }
1159            Ok(off) => off_s = off,    // (d) Jump to the next hole.
1160            Err(libc::ENXIO) => break, // (e) Reached the end of the file.
1161            Err(errno) => return Err(io::Error::from_raw_os_error(errno)),
1162        };
1163
1164        // off_s=0          │     off_s               │    off_s
1165        //   ↓              │       ↓                 │      ↓
1166        //   | DATA |×EOF×  │  ……………| DATA | HOLE |…  │  …|×EOF×
1167        //          ↑       │       ↑      ↑          │
1168        //         (a)      │  (b) (c)    (d)         │     (e)
1169        match lseek(file, off_s, libc::SEEK_HOLE) {
1170            Ok(off_e) if off_s == 0 && (off_e as u64) == stat.size() => {
1171                // (a) The file is not sparse.
1172                file.seek(io::SeekFrom::Start(0))?;
1173                return Ok(None);
1174            }
1175            Ok(off_e) if off_e < off_s => {
1176                // (b) Unlikely.
1177                return Err(std::io::Error::new(
1178                    io::ErrorKind::Other,
1179                    "lseek(SEEK_HOLE) went backwards",
1180                ));
1181            }
1182            Ok(off_e) if off_e == off_s => {
1183                // (c) The hole at the same offset as the data.
1184                return Err(std::io::Error::new(
1185                    io::ErrorKind::Other,
1186                    "lseek(SEEK_HOLE) did not advance. \
1187                     Did the file change while appending?",
1188                ));
1189            }
1190            Ok(off_e) => {
1191                // (d) Found a hole or reached the end of the file (implicit
1192                // zero-length hole).
1193                entries.push(SparseEntry {
1194                    offset: off_s as u64,
1195                    num_bytes: off_e as u64 - off_s as u64,
1196                });
1197                on_disk_size += off_e as u64 - off_s as u64;
1198                off_s = off_e;
1199            }
1200            Err(libc::ENXIO) => {
1201                // (e) off_s was already beyond the end of the file.
1202                return Err(std::io::Error::new(
1203                    io::ErrorKind::Other,
1204                    "lseek(SEEK_HOLE) returned ENXIO. \
1205                     Did the file change while appending?",
1206                ));
1207            }
1208            Err(errno) => return Err(io::Error::from_raw_os_error(errno)),
1209        };
1210    }
1211
1212    if off_s as u64 > stat.size() {
1213        return Err(std::io::Error::new(
1214            io::ErrorKind::Other,
1215            "lseek(SEEK_DATA) went beyond the end of the file. \
1216             Did the file change while appending?",
1217        ));
1218    }
1219
1220    // Add a final zero-length entry. It is required if the file ends with a
1221    // hole, and redundant otherwise. However, we add it unconditionally to
1222    // mimic GNU tar behavior.
1223    entries.push(SparseEntry {
1224        offset: stat.size(),
1225        num_bytes: 0,
1226    });
1227
1228    file.seek(io::SeekFrom::Start(0))?;
1229
1230    Ok(Some(SparseEntries {
1231        entries,
1232        on_disk_size,
1233    }))
1234}
1235
1236impl<W: Write> Drop for Builder<W> {
1237    fn drop(&mut self) {
1238        let _ = self.finish();
1239    }
1240}
1241
1242#[cfg(test)]
1243mod tests {
1244    use super::*;
1245
1246    /// Should be multiple of 4KiB on ext4, multiple of 32KiB on FreeBSD/UFS, multiple of 64KiB on
1247    /// ppc64el
1248    const SPARSE_BLOCK_SIZE: u64 = 64 * 1024;
1249
1250    #[test]
1251    fn test_find_sparse_entries() {
1252        let cases: &[(&str, &[SparseEntry])] = &[
1253            ("|", &[]),
1254            (
1255                "|    |    |    |    |",
1256                &[SparseEntry {
1257                    offset: 4 * SPARSE_BLOCK_SIZE,
1258                    num_bytes: 0,
1259                }],
1260            ),
1261            (
1262                "|####|####|####|####|",
1263                &[
1264                    SparseEntry {
1265                        offset: 0,
1266                        num_bytes: 4 * SPARSE_BLOCK_SIZE,
1267                    },
1268                    SparseEntry {
1269                        offset: 4 * SPARSE_BLOCK_SIZE,
1270                        num_bytes: 0,
1271                    },
1272                ],
1273            ),
1274            (
1275                "|####|####|    |    |",
1276                &[
1277                    SparseEntry {
1278                        offset: 0,
1279                        num_bytes: 2 * SPARSE_BLOCK_SIZE,
1280                    },
1281                    SparseEntry {
1282                        offset: 4 * SPARSE_BLOCK_SIZE,
1283                        num_bytes: 0,
1284                    },
1285                ],
1286            ),
1287            (
1288                "|    |    |####|####|",
1289                &[
1290                    SparseEntry {
1291                        offset: 2 * SPARSE_BLOCK_SIZE,
1292                        num_bytes: 2 * SPARSE_BLOCK_SIZE,
1293                    },
1294                    SparseEntry {
1295                        offset: 4 * SPARSE_BLOCK_SIZE,
1296                        num_bytes: 0,
1297                    },
1298                ],
1299            ),
1300            (
1301                "|####|    |####|    |",
1302                &[
1303                    SparseEntry {
1304                        offset: 0,
1305                        num_bytes: SPARSE_BLOCK_SIZE,
1306                    },
1307                    SparseEntry {
1308                        offset: 2 * SPARSE_BLOCK_SIZE,
1309                        num_bytes: SPARSE_BLOCK_SIZE,
1310                    },
1311                    SparseEntry {
1312                        offset: 4 * SPARSE_BLOCK_SIZE,
1313                        num_bytes: 0,
1314                    },
1315                ],
1316            ),
1317            (
1318                "|####|    |    |####|",
1319                &[
1320                    SparseEntry {
1321                        offset: 0,
1322                        num_bytes: SPARSE_BLOCK_SIZE,
1323                    },
1324                    SparseEntry {
1325                        offset: 3 * SPARSE_BLOCK_SIZE,
1326                        num_bytes: SPARSE_BLOCK_SIZE,
1327                    },
1328                    SparseEntry {
1329                        offset: 4 * SPARSE_BLOCK_SIZE,
1330                        num_bytes: 0,
1331                    },
1332                ],
1333            ),
1334            (
1335                "|    |####|####|    |",
1336                &[
1337                    SparseEntry {
1338                        offset: SPARSE_BLOCK_SIZE,
1339                        num_bytes: 2 * SPARSE_BLOCK_SIZE,
1340                    },
1341                    SparseEntry {
1342                        offset: 4 * SPARSE_BLOCK_SIZE,
1343                        num_bytes: 0,
1344                    },
1345                ],
1346            ),
1347        ];
1348
1349        let mut file = tempfile::tempfile().unwrap();
1350
1351        for &(description, map) in cases {
1352            file.set_len(0).unwrap();
1353            file.set_len(map.last().map_or(0, |e| e.offset + e.num_bytes))
1354                .unwrap();
1355
1356            for e in map {
1357                file.seek(io::SeekFrom::Start(e.offset)).unwrap();
1358                for _ in 0..e.num_bytes / SPARSE_BLOCK_SIZE {
1359                    file.write_all(&[0xFF; SPARSE_BLOCK_SIZE as usize]).unwrap();
1360                }
1361            }
1362
1363            let expected = match map {
1364                // Empty file.
1365                &[] => None,
1366
1367                // 100% dense.
1368                &[SparseEntry {
1369                    offset: 0,
1370                    num_bytes: x1,
1371                }, SparseEntry {
1372                    offset: x2,
1373                    num_bytes: 0,
1374                }] if x1 == x2 => None,
1375
1376                // Sparse.
1377                map => Some(SparseEntries {
1378                    entries: map.to_vec(),
1379                    on_disk_size: map.iter().map(|e| e.num_bytes).sum(),
1380                }),
1381            };
1382
1383            let stat = file.metadata().unwrap();
1384            let reported = find_sparse_entries(&mut file, &stat).unwrap();
1385
1386            // Loose check: we did not miss any data blocks.
1387            if let Err(e) = loose_check_sparse_entries(reported.as_ref(), expected.as_ref()) {
1388                panic!(
1389                    "Case: {description}\n\
1390                     Reported: {reported:?}\n\
1391                     Expected: {expected:?}\n\
1392                     Error: {e}",
1393                );
1394            }
1395
1396            // On Linux, always do a strict check. Skip on FreeBSD, as on UFS
1397            // the last block is always dense, even if it's zero-filled.
1398            #[cfg(any(target_os = "android", target_os = "linux"))]
1399            assert_eq!(reported, expected, "Case: {description}");
1400        }
1401    }
1402
1403    fn loose_check_sparse_entries(
1404        reported: Option<&SparseEntries>,
1405        expected: Option<&SparseEntries>,
1406    ) -> Result<(), &'static str> {
1407        let reported = match reported {
1408            Some(entries) => entries, // Reported as sparse.
1409            // It's not an error to report a sparse file as non-sparse.
1410            None => return Ok(()),
1411        };
1412        let expected = match expected {
1413            Some(entries) => entries,
1414            None => return Err("Expected dense file, but reported as sparse"),
1415        };
1416
1417        // Check that we didn't miss any data blocks. However, reporting some
1418        // holes as data is not an error during the loose check.
1419        if expected.entries.iter().any(|e| {
1420            !reported
1421                .entries
1422                .iter()
1423                .any(|r| e.offset >= r.offset && e.offset + e.num_bytes <= r.offset + r.num_bytes)
1424        }) {
1425            return Err("Reported is not a superset of expected");
1426        }
1427
1428        if reported.entries.last() != expected.entries.last() {
1429            return Err("Last zero-length entry is not as expected");
1430        }
1431
1432        // Check invariants of SparseEntries.
1433        let mut prev_end = None;
1434        for e in &reported.entries[..reported.entries.len()] {
1435            if prev_end.map_or(false, |p| e.offset < p) {
1436                return Err("Overlapping or unsorted entries");
1437            }
1438            prev_end = Some(e.offset + e.num_bytes);
1439        }
1440
1441        if reported.on_disk_size != reported.entries.iter().map(|e| e.num_bytes).sum() {
1442            return Err("Incorrect on-disk size");
1443        }
1444
1445        Ok(())
1446    }
1447}