tar/builder.rs
1use std::fs;
2use std::io;
3use std::io::prelude::*;
4use std::path::Path;
5use std::str;
6
7use crate::header::BLOCK_SIZE;
8use crate::header::GNU_SPARSE_HEADERS_COUNT;
9use crate::header::{path2bytes, HeaderMode};
10use crate::GnuExtSparseHeader;
11use crate::{other, EntryType, Header};
12
13/// A structure for building archives
14///
15/// This structure has methods for building up an archive from scratch into any
16/// arbitrary writer.
17pub struct Builder<W: Write> {
18 options: BuilderOptions,
19 finished: bool,
20 obj: Option<W>,
21}
22
23#[derive(Clone, Copy)]
24struct BuilderOptions {
25 mode: HeaderMode,
26 preserve_absolute: bool,
27 follow: bool,
28 sparse: bool,
29}
30
31impl<W: Write> Builder<W> {
32 /// Create a new archive builder with the underlying object as the
33 /// destination of all data written. The builder will use
34 /// `HeaderMode::Complete` by default.
35 pub fn new(obj: W) -> Builder<W> {
36 Builder {
37 options: BuilderOptions {
38 mode: HeaderMode::Complete,
39 preserve_absolute: false,
40 follow: true,
41 sparse: true,
42 },
43 finished: false,
44 obj: Some(obj),
45 }
46 }
47
48 /// Changes the HeaderMode that will be used when reading fs Metadata for
49 /// methods that implicitly read metadata for an input Path. Notably, this
50 /// does _not_ apply to `append(Header)`.
51 pub fn mode(&mut self, mode: HeaderMode) {
52 self.options.mode = mode;
53 }
54
55 /// Peserve absolute path while creating an archive
56 pub fn preserve_absolute(&mut self, preserve: bool) {
57 self.options.preserve_absolute = preserve;
58 }
59
60 /// Control whether symlinks are followed when reading from the filesystem.
61 /// Defaults to `true` (but see the note below — you almost certainly want
62 /// to call `follow_symlinks(false)`).
63 ///
64 /// When `true`, symlinks are dereferenced: the archive entry contains the
65 /// contents of the symlink target rather than the symlink itself,
66 /// equivalent to GNU `tar --dereference` (`-h`). When `false` (the default
67 /// for all mainstream tar implementations), symlinks are stored as symlink
68 /// entries in the archive.
69 ///
70 /// # Why you should almost always use `follow_symlinks(false)`
71 ///
72 /// Every mainstream tar implementation preserves symlinks by default.
73 /// GNU `tar` requires the explicit `--dereference` (`-h`) flag to follow
74 /// them. Go's `archive/tar` stores whatever the underlying `fs.FS` reports
75 /// and never dereferences on its own. BSD `tar` behaves the same way.
76 /// This crate's default of `true` is a historical quirk kept for
77 /// compatibility but is wrong for most use-cases:
78 ///
79 /// - Symlinks in the source tree are part of its structure and should
80 /// normally be preserved, not silently replaced by their targets.
81 /// - When `true`, [`append_dir_all`](Builder::append_dir_all) follows
82 /// symlinks that point *outside* `src_path` just as readily as those
83 /// inside it. If the archiving process has broader filesystem read access
84 /// than whoever controls the source tree (e.g. a privileged backup
85 /// service, a CI runner archiving user-submitted workspaces), an attacker
86 /// can plant a symlink inside `src_path` to silently include arbitrary
87 /// files from the host.
88 ///
89 /// Call `follow_symlinks(false)` unless you have a specific reason to
90 /// flatten symlinks into their targets. For the strongest guarantee, open
91 /// `src_path` with [`cap-std`] and walk the tree with capability-safe I/O,
92 /// which blocks symlink escapes at the OS level regardless of this setting.
93 ///
94 /// [`cap-std`]: https://docs.rs/cap-std/
95 pub fn follow_symlinks(&mut self, follow: bool) {
96 self.options.follow = follow;
97 }
98
99 /// Handle sparse files efficiently, if supported by the underlying
100 /// filesystem. When true, sparse file information is read from disk and
101 /// empty segments are omitted from the archive. Defaults to true.
102 pub fn sparse(&mut self, sparse: bool) {
103 self.options.sparse = sparse;
104 }
105
106 /// Gets shared reference to the underlying object.
107 pub fn get_ref(&self) -> &W {
108 self.obj.as_ref().unwrap()
109 }
110
111 /// Gets mutable reference to the underlying object.
112 ///
113 /// Note that care must be taken while writing to the underlying
114 /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
115 /// useful in the situations when one needs to be ensured that
116 /// tar entry was flushed to the disk.
117 pub fn get_mut(&mut self) -> &mut W {
118 self.obj.as_mut().unwrap()
119 }
120
121 /// Unwrap this archive, returning the underlying object.
122 ///
123 /// This function will finish writing the archive if the `finish` function
124 /// hasn't yet been called, returning any I/O error which happens during
125 /// that operation.
126 pub fn into_inner(mut self) -> io::Result<W> {
127 if !self.finished {
128 self.finish()?;
129 }
130 Ok(self.obj.take().unwrap())
131 }
132
133 /// Adds a new entry to this archive.
134 ///
135 /// This function will append the header specified, followed by contents of
136 /// the stream specified by `data`. To produce a valid archive the `size`
137 /// field of `header` must be the same as the length of the stream that's
138 /// being written. Additionally the checksum for the header should have been
139 /// set via the `set_cksum` method.
140 ///
141 /// Note that this will not attempt to seek the archive to a valid position,
142 /// so if the archive is in the middle of a read or some other similar
143 /// operation then this may corrupt the archive.
144 ///
145 /// Also note that after all entries have been written to an archive the
146 /// `finish` function needs to be called to finish writing the archive.
147 ///
148 /// # Errors
149 ///
150 /// This function will return an error for any intermittent I/O error which
151 /// occurs when either reading or writing.
152 ///
153 /// # Examples
154 ///
155 /// ```
156 /// use tar::{Builder, Header};
157 ///
158 /// let mut header = Header::new_gnu();
159 /// header.set_path("foo").unwrap();
160 /// header.set_size(4);
161 /// header.set_cksum();
162 ///
163 /// let mut data: &[u8] = &[1, 2, 3, 4];
164 ///
165 /// let mut ar = Builder::new(Vec::new());
166 /// ar.append(&header, data).unwrap();
167 /// let data = ar.into_inner().unwrap();
168 /// ```
169 pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
170 append(self.get_mut(), header, &mut data)
171 }
172
173 /// Adds a new entry to this archive with the specified path.
174 ///
175 /// This function will set the specified path in the given header, which may
176 /// require appending a GNU long-name extension entry to the archive first.
177 /// The checksum for the header will be automatically updated via the
178 /// `set_cksum` method after setting the path. No other metadata in the
179 /// header will be modified.
180 ///
181 /// Then it will append the header, followed by contents of the stream
182 /// specified by `data`. To produce a valid archive the `size` field of
183 /// `header` must be the same as the length of the stream that's being
184 /// written.
185 ///
186 /// Note that this will not attempt to seek the archive to a valid position,
187 /// so if the archive is in the middle of a read or some other similar
188 /// operation then this may corrupt the archive.
189 ///
190 /// Also note that after all entries have been written to an archive the
191 /// `finish` function needs to be called to finish writing the archive.
192 ///
193 /// # Errors
194 ///
195 /// This function will return an error for any intermittent I/O error which
196 /// occurs when either reading or writing.
197 ///
198 /// # Examples
199 ///
200 /// ```
201 /// use tar::{Builder, Header};
202 ///
203 /// let mut header = Header::new_gnu();
204 /// header.set_size(4);
205 /// header.set_cksum();
206 ///
207 /// let mut data: &[u8] = &[1, 2, 3, 4];
208 ///
209 /// let mut ar = Builder::new(Vec::new());
210 /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap();
211 /// let data = ar.into_inner().unwrap();
212 /// ```
213 pub fn append_data<P: AsRef<Path>, R: Read>(
214 &mut self,
215 header: &mut Header,
216 path: P,
217 data: R,
218 ) -> io::Result<()> {
219 let allow_absolute = self.options.preserve_absolute;
220 prepare_header_path(self.get_mut(), header, path.as_ref(), allow_absolute)?;
221 header.set_cksum();
222 self.append(header, data)
223 }
224
225 /// Adds a new entry to this archive and returns an [`EntryWriter`] for
226 /// adding its contents.
227 ///
228 /// This function is similar to [`Self::append_data`] but returns a
229 /// [`io::Write`] implementation instead of taking data as a parameter.
230 ///
231 /// Similar constraints around the position of the archive and completion
232 /// apply as with [`Self::append_data`]. It requires the underlying writer
233 /// to implement [`Seek`] to update the header after writing the data.
234 ///
235 /// # Errors
236 ///
237 /// This function will return an error for any intermittent I/O error which
238 /// occurs when either reading or writing.
239 ///
240 /// # Examples
241 ///
242 /// ```
243 /// use std::io::Cursor;
244 /// use std::io::Write as _;
245 /// use tar::{Builder, Header};
246 ///
247 /// let mut header = Header::new_gnu();
248 ///
249 /// let mut ar = Builder::new(Cursor::new(Vec::new()));
250 /// let mut entry = ar.append_writer(&mut header, "hi.txt").unwrap();
251 /// entry.write_all(b"Hello, ").unwrap();
252 /// entry.write_all(b"world!\n").unwrap();
253 /// entry.finish().unwrap();
254 /// ```
255 pub fn append_writer<'a, P: AsRef<Path>>(
256 &'a mut self,
257 header: &'a mut Header,
258 path: P,
259 ) -> io::Result<EntryWriter<'a>>
260 where
261 W: Seek,
262 {
263 let allow_absolute = self.options.preserve_absolute;
264 EntryWriter::start(self.get_mut(), header, path.as_ref(), allow_absolute)
265 }
266
267 /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target.
268 ///
269 /// This function is similar to [`Self::append_data`] which supports long filenames,
270 /// but also supports long link targets using GNU extensions if necessary.
271 /// You must set the entry type to either [`EntryType::Link`] or [`EntryType::Symlink`].
272 /// The `set_cksum` method will be invoked after setting the path. No other metadata in the
273 /// header will be modified.
274 ///
275 /// If you are intending to use GNU extensions, you must use this method over calling
276 /// [`Header::set_link_name`] because that function will fail on long links.
277 ///
278 /// Similar constraints around the position of the archive and completion
279 /// apply as with [`Self::append_data`].
280 ///
281 /// # Errors
282 ///
283 /// This function will return an error for any intermittent I/O error which
284 /// occurs when either reading or writing.
285 ///
286 /// # Examples
287 ///
288 /// ```
289 /// use tar::{Builder, Header, EntryType};
290 ///
291 /// let mut ar = Builder::new(Vec::new());
292 /// let mut header = Header::new_gnu();
293 /// header.set_username("foo");
294 /// header.set_entry_type(EntryType::Symlink);
295 /// header.set_size(0);
296 /// ar.append_link(&mut header, "really/long/path/to/foo", "other/really/long/target").unwrap();
297 /// let data = ar.into_inner().unwrap();
298 /// ```
299 pub fn append_link<P: AsRef<Path>, T: AsRef<Path>>(
300 &mut self,
301 header: &mut Header,
302 path: P,
303 target: T,
304 ) -> io::Result<()> {
305 self._append_link(header, path.as_ref(), target.as_ref())
306 }
307
308 fn _append_link(&mut self, header: &mut Header, path: &Path, target: &Path) -> io::Result<()> {
309 let allow_abolute = self.options.preserve_absolute;
310 prepare_header_path(self.get_mut(), header, path, allow_abolute)?;
311 prepare_header_link(self.get_mut(), header, target)?;
312 header.set_cksum();
313 self.append(header, std::io::empty())
314 }
315
316 /// Adds a file on the local filesystem to this archive.
317 ///
318 /// This function will open the file specified by `path` and insert the file
319 /// into the archive with the appropriate metadata set, returning any I/O
320 /// error which occurs while writing. The path name for the file inside of
321 /// this archive will be the same as `path`, and it is required that the
322 /// path is a relative path.
323 ///
324 /// Note that this will not attempt to seek the archive to a valid position,
325 /// so if the archive is in the middle of a read or some other similar
326 /// operation then this may corrupt the archive.
327 ///
328 /// Also note that after all files have been written to an archive the
329 /// `finish` function needs to be called to finish writing the archive.
330 ///
331 /// # Examples
332 ///
333 /// ```no_run
334 /// use tar::Builder;
335 ///
336 /// let mut ar = Builder::new(Vec::new());
337 ///
338 /// ar.append_path("foo/bar.txt").unwrap();
339 /// ```
340 pub fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
341 let options = self.options;
342 append_path_with_name(self.get_mut(), path.as_ref(), None, options)
343 }
344
345 /// Adds a file on the local filesystem to this archive under another name.
346 ///
347 /// This function will open the file specified by `path` and insert the file
348 /// into the archive as `name` with appropriate metadata set, returning any
349 /// I/O error which occurs while writing. The path name for the file inside
350 /// of this archive will be `name` is required to be a relative path.
351 ///
352 /// Note that this will not attempt to seek the archive to a valid position,
353 /// so if the archive is in the middle of a read or some other similar
354 /// operation then this may corrupt the archive.
355 ///
356 /// Note if the `path` is a directory. This will just add an entry to the archive,
357 /// rather than contents of the directory.
358 ///
359 /// Also note that after all files have been written to an archive the
360 /// `finish` function needs to be called to finish writing the archive.
361 ///
362 /// # Examples
363 ///
364 /// ```no_run
365 /// use tar::Builder;
366 ///
367 /// let mut ar = Builder::new(Vec::new());
368 ///
369 /// // Insert the local file "foo/bar.txt" in the archive but with the name
370 /// // "bar/foo.txt".
371 /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap();
372 /// ```
373 pub fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
374 &mut self,
375 path: P,
376 name: N,
377 ) -> io::Result<()> {
378 let options = self.options;
379 append_path_with_name(self.get_mut(), path.as_ref(), Some(name.as_ref()), options)
380 }
381
382 /// Adds a file to this archive with the given path as the name of the file
383 /// in the archive.
384 ///
385 /// This will use the metadata of `file` to populate a `Header`, and it will
386 /// then append the file to the archive with the name `path`.
387 ///
388 /// Note that this will not attempt to seek the archive to a valid position,
389 /// so if the archive is in the middle of a read or some other similar
390 /// operation then this may corrupt the archive.
391 ///
392 /// Also note that after all files have been written to an archive the
393 /// `finish` function needs to be called to finish writing the archive.
394 ///
395 /// # Examples
396 ///
397 /// ```no_run
398 /// use std::fs::File;
399 /// use tar::Builder;
400 ///
401 /// let mut ar = Builder::new(Vec::new());
402 ///
403 /// // Open the file at one location, but insert it into the archive with a
404 /// // different name.
405 /// let mut f = File::open("foo/bar/baz.txt").unwrap();
406 /// ar.append_file("bar/baz.txt", &mut f).unwrap();
407 /// ```
408 pub fn append_file<P: AsRef<Path>>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> {
409 let options = self.options;
410 append_file(self.get_mut(), path.as_ref(), file, options)
411 }
412
413 /// Adds a directory to this archive with the given path as the name of the
414 /// directory in the archive.
415 ///
416 /// This will use `stat` to populate a `Header`, and it will then append the
417 /// directory to the archive with the name `path`.
418 ///
419 /// Note that this will not attempt to seek the archive to a valid position,
420 /// so if the archive is in the middle of a read or some other similar
421 /// operation then this may corrupt the archive.
422 ///
423 /// Note this will not add the contents of the directory to the archive.
424 /// See `append_dir_all` for recursively adding the contents of the directory.
425 ///
426 /// Also note that after all files have been written to an archive the
427 /// `finish` function needs to be called to finish writing the archive.
428 ///
429 /// # Examples
430 ///
431 /// ```
432 /// use std::fs;
433 /// use tar::Builder;
434 ///
435 /// let mut ar = Builder::new(Vec::new());
436 ///
437 /// // Use the directory at one location, but insert it into the archive
438 /// // with a different name.
439 /// ar.append_dir("bardir", ".").unwrap();
440 /// ```
441 pub fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
442 where
443 P: AsRef<Path>,
444 Q: AsRef<Path>,
445 {
446 let options = self.options;
447 append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), options)
448 }
449
450 /// Adds a directory and all of its contents (recursively) to this archive
451 /// with the given path as the name of the directory in the archive.
452 ///
453 /// Note that this will not attempt to seek the archive to a valid position,
454 /// so if the archive is in the middle of a read or some other similar
455 /// operation then this may corrupt the archive.
456 ///
457 /// Also note that after all files have been written to an archive the
458 /// `finish` or `into_inner` function needs to be called to finish
459 /// writing the archive.
460 ///
461 /// # Security
462 ///
463 /// **Call [`follow_symlinks(false)`](Builder::follow_symlinks) before this
464 /// method** unless you have an explicit reason to dereference symlinks.
465 /// All mainstream tar implementations (GNU tar, BSD tar, Go's
466 /// `archive/tar`) preserve symlinks by default; this crate's default of
467 /// `true` is a historical quirk.
468 ///
469 /// When `follow_symlinks` is `true` (the current default), this method
470 /// dereferences every symlink it encounters, including ones whose targets
471 /// lie **outside** `src_path`. When the archiver runs with broader
472 /// filesystem access than whoever controls the source tree (e.g. a
473 /// privileged backup or export service), an attacker can plant a symlink
474 /// inside `src_path` to silently include arbitrary files the archiver can
475 /// read, with no indication in the archive that they came from outside the
476 /// source root.
477 ///
478 /// ```no_run
479 /// use tar::Builder;
480 ///
481 /// # let src_path = std::path::Path::new(".");
482 /// # let writer = std::io::sink();
483 /// // Recommended: preserve symlinks as-is, matching GNU tar's default.
484 /// let mut ar = Builder::new(writer);
485 /// ar.follow_symlinks(false);
486 /// ar.append_dir_all("", src_path).unwrap();
487 /// ar.finish().unwrap();
488 /// ```
489 ///
490 /// With `follow_symlinks(false)`, symlinks inside the source tree are
491 /// stored as symlink entries in the archive rather than being read through.
492 /// Note that the resulting archive may then contain symlinks with absolute
493 /// or `..`-relative targets; validate or strip those on extraction if the
494 /// archive consumer is also untrusted.
495 ///
496 /// For the strongest available guarantee, open `src_path` using [`cap-std`]
497 /// and walk the directory tree with capability-safe I/O. This prevents
498 /// symlink escapes at the OS level and protects against TOCTOU races that
499 /// a purely path-based check cannot close.
500 ///
501 /// [`cap-std`]: https://docs.rs/cap-std/
502 ///
503 /// # Examples
504 ///
505 /// ```
506 /// use std::fs;
507 /// use tar::Builder;
508 ///
509 /// let mut ar = Builder::new(Vec::new());
510 ///
511 /// // Use the directory at one location ("."), but insert it into the archive
512 /// // with a different name ("bardir").
513 /// ar.append_dir_all("bardir", ".").unwrap();
514 /// ar.finish().unwrap();
515 /// ```
516 ///
517 /// Use `append_dir_all` with an empty string as the first path argument to
518 /// create an archive from all files in a directory without renaming.
519 ///
520 /// ```
521 /// use std::fs;
522 /// use std::path::PathBuf;
523 /// use tar::{Archive, Builder};
524 ///
525 /// let tmpdir = tempfile::tempdir().unwrap();
526 /// let path = tmpdir.path();
527 /// fs::write(path.join("a.txt"), b"hello").unwrap();
528 /// fs::write(path.join("b.txt"), b"world").unwrap();
529 ///
530 /// // Create a tarball from the files in the directory
531 /// let mut ar = Builder::new(Vec::new());
532 /// ar.append_dir_all("", path).unwrap();
533 ///
534 /// // List files in the archive
535 /// let archive = ar.into_inner().unwrap();
536 /// let archived_files = Archive::new(archive.as_slice())
537 /// .entries()
538 /// .unwrap()
539 /// .map(|entry| entry.unwrap().path().unwrap().into_owned())
540 /// .collect::<Vec<_>>();
541 ///
542 /// assert!(archived_files.contains(&PathBuf::from("a.txt")));
543 /// assert!(archived_files.contains(&PathBuf::from("b.txt")));
544 /// ```
545 pub fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
546 where
547 P: AsRef<Path>,
548 Q: AsRef<Path>,
549 {
550 let options = self.options;
551 append_dir_all(self.get_mut(), path.as_ref(), src_path.as_ref(), options)
552 }
553
554 /// Finish writing this archive, emitting the termination sections.
555 ///
556 /// This function should only be called when the archive has been written
557 /// entirely and if an I/O error happens the underlying object still needs
558 /// to be acquired.
559 ///
560 /// In most situations the `into_inner` method should be preferred.
561 pub fn finish(&mut self) -> io::Result<()> {
562 if self.finished {
563 return Ok(());
564 }
565 self.finished = true;
566 self.get_mut().write_all(&[0; 1024])
567 }
568}
569
570trait SeekWrite: Write + Seek {
571 fn as_write(&mut self) -> &mut dyn Write;
572}
573
574impl<T: Write + Seek> SeekWrite for T {
575 fn as_write(&mut self) -> &mut dyn Write {
576 self
577 }
578}
579
580/// A writer for a single entry in a tar archive.
581///
582/// This struct is returned by [`Builder::append_writer`] and provides a
583/// [`Write`] implementation for adding content to an archive entry.
584///
585/// After writing all data to the entry, it must be finalized either by
586/// explicitly calling [`EntryWriter::finish`] or by letting it drop.
587pub struct EntryWriter<'a> {
588 // NOTE: Do not add any fields here which require Drop!
589 // See the comment below in finish().
590 obj: &'a mut dyn SeekWrite,
591 header: &'a mut Header,
592 written: u64,
593}
594
595impl EntryWriter<'_> {
596 fn start<'a>(
597 obj: &'a mut dyn SeekWrite,
598 header: &'a mut Header,
599 path: &Path,
600 allow_absolute: bool,
601 ) -> io::Result<EntryWriter<'a>> {
602 prepare_header_path(obj.as_write(), header, path, allow_absolute)?;
603
604 // Reserve space for header, will be overwritten once data is written.
605 obj.write_all([0u8; BLOCK_SIZE as usize].as_ref())?;
606
607 Ok(EntryWriter {
608 obj,
609 header,
610 written: 0,
611 })
612 }
613
614 /// Finish writing the current entry in the archive.
615 pub fn finish(self) -> io::Result<()> {
616 // NOTE: This is an optimization for "fallible destructuring".
617 // We want finish() to return an error, but we also need to invoke
618 // cleanup in our Drop handler, which will run unconditionally
619 // and try to do the same work.
620 // By using ManuallyDrop, we suppress that drop. However, this would
621 // be a memory leak if we ever had any struct members which required
622 // Drop - which we don't right now.
623 // But if we ever gain one, we will need to change to use e.g. Option<>
624 // around some of the fields or have a `bool finished` etc.
625 let mut this = std::mem::ManuallyDrop::new(self);
626 this.do_finish()
627 }
628
629 fn do_finish(&mut self) -> io::Result<()> {
630 // Pad with zeros if necessary.
631 let buf = [0u8; BLOCK_SIZE as usize];
632 let remaining = BLOCK_SIZE.wrapping_sub(self.written) % BLOCK_SIZE;
633 self.obj.write_all(&buf[..remaining as usize])?;
634 let written = (self.written + remaining) as i64;
635
636 // Seek back to the header position.
637 self.obj
638 .seek(io::SeekFrom::Current(-written - BLOCK_SIZE as i64))?;
639
640 self.header.set_size(self.written);
641 self.header.set_cksum();
642 self.obj.write_all(self.header.as_bytes())?;
643
644 // Seek forward to restore the position.
645 self.obj.seek(io::SeekFrom::Current(written))?;
646
647 Ok(())
648 }
649}
650
651impl Write for EntryWriter<'_> {
652 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
653 let len = self.obj.write(buf)?;
654 self.written += len as u64;
655 Ok(len)
656 }
657
658 fn flush(&mut self) -> io::Result<()> {
659 self.obj.flush()
660 }
661}
662
663impl Drop for EntryWriter<'_> {
664 fn drop(&mut self) {
665 let _ = self.do_finish();
666 }
667}
668
669fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> {
670 dst.write_all(header.as_bytes())?;
671 let len = io::copy(&mut data, &mut dst)?;
672 pad_zeroes(&mut dst, len)?;
673 Ok(())
674}
675
676fn pad_zeroes(dst: &mut dyn Write, len: u64) -> io::Result<()> {
677 let buf = [0; BLOCK_SIZE as usize];
678 let remaining = BLOCK_SIZE - (len % BLOCK_SIZE);
679 if remaining < BLOCK_SIZE {
680 dst.write_all(&buf[..remaining as usize])?;
681 }
682 Ok(())
683}
684
685fn append_path_with_name(
686 dst: &mut dyn Write,
687 path: &Path,
688 name: Option<&Path>,
689 options: BuilderOptions,
690) -> io::Result<()> {
691 let stat = if options.follow {
692 fs::metadata(path).map_err(|err| {
693 io::Error::new(
694 err.kind(),
695 format!("{} when getting metadata for {}", err, path.display()),
696 )
697 })?
698 } else {
699 fs::symlink_metadata(path).map_err(|err| {
700 io::Error::new(
701 err.kind(),
702 format!("{} when getting metadata for {}", err, path.display()),
703 )
704 })?
705 };
706 let ar_name = name.unwrap_or(path);
707 if stat.is_file() {
708 append_file(dst, ar_name, &mut fs::File::open(path)?, options)
709 } else if stat.is_dir() {
710 append_fs(
711 dst,
712 ar_name,
713 &stat,
714 options.mode,
715 options.preserve_absolute,
716 None,
717 )
718 } else if stat.file_type().is_symlink() {
719 let link_name = fs::read_link(path)?;
720 append_fs(
721 dst,
722 ar_name,
723 &stat,
724 options.mode,
725 options.preserve_absolute,
726 Some(&link_name),
727 )
728 } else {
729 #[cfg(unix)]
730 {
731 append_special(dst, path, &stat, options.mode, options.preserve_absolute)
732 }
733 #[cfg(not(unix))]
734 {
735 Err(other(&format!("{} has unknown file type", path.display())))
736 }
737 }
738}
739
740#[cfg(unix)]
741fn append_special(
742 dst: &mut dyn Write,
743 path: &Path,
744 stat: &fs::Metadata,
745 mode: HeaderMode,
746 allow_absolute: bool,
747) -> io::Result<()> {
748 use ::std::os::unix::fs::{FileTypeExt, MetadataExt};
749
750 let file_type = stat.file_type();
751 let entry_type;
752 if file_type.is_socket() {
753 // sockets can't be archived
754 return Err(other(&format!(
755 "{}: socket can not be archived",
756 path.display()
757 )));
758 } else if file_type.is_fifo() {
759 entry_type = EntryType::Fifo;
760 } else if file_type.is_char_device() {
761 entry_type = EntryType::Char;
762 } else if file_type.is_block_device() {
763 entry_type = EntryType::Block;
764 } else {
765 return Err(other(&format!("{} has unknown file type", path.display())));
766 }
767
768 let mut header = Header::new_gnu();
769 header.set_metadata_in_mode(stat, mode);
770 prepare_header_path(dst, &mut header, path, allow_absolute)?;
771
772 header.set_entry_type(entry_type);
773 let dev_id = stat.rdev();
774 let dev_major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff);
775 let dev_minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff);
776 header.set_device_major(dev_major as u32)?;
777 header.set_device_minor(dev_minor as u32)?;
778
779 header.set_cksum();
780 dst.write_all(header.as_bytes())?;
781
782 Ok(())
783}
784
785fn append_file(
786 dst: &mut dyn Write,
787 path: &Path,
788 file: &mut fs::File,
789 options: BuilderOptions,
790) -> io::Result<()> {
791 let stat = file.metadata()?;
792 let mut header = Header::new_gnu();
793
794 prepare_header_path(dst, &mut header, path, options.preserve_absolute)?;
795 header.set_metadata_in_mode(&stat, options.mode);
796 let sparse_entries = if options.sparse {
797 prepare_header_sparse(file, &stat, &mut header)?
798 } else {
799 None
800 };
801 header.set_cksum();
802 dst.write_all(header.as_bytes())?;
803
804 if let Some(sparse_entries) = sparse_entries {
805 append_extended_sparse_headers(dst, &sparse_entries)?;
806 for entry in sparse_entries.entries {
807 file.seek(io::SeekFrom::Start(entry.offset))?;
808 io::copy(&mut file.take(entry.num_bytes), dst)?;
809 }
810 pad_zeroes(dst, sparse_entries.on_disk_size)?;
811 } else {
812 let len = io::copy(file, dst)?;
813 pad_zeroes(dst, len)?;
814 }
815
816 Ok(())
817}
818
819fn append_dir(
820 dst: &mut dyn Write,
821 path: &Path,
822 src_path: &Path,
823 options: BuilderOptions,
824) -> io::Result<()> {
825 let stat = fs::metadata(src_path)?;
826 append_fs(
827 dst,
828 path,
829 &stat,
830 options.mode,
831 options.preserve_absolute,
832 None,
833 )
834}
835
836fn prepare_header(size: u64, entry_type: u8) -> Header {
837 let mut header = Header::new_gnu();
838 let name = b"././@LongLink";
839 header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
840 header.set_mode(0o644);
841 header.set_uid(0);
842 header.set_gid(0);
843 header.set_mtime(0);
844 // + 1 to be compliant with GNU tar
845 header.set_size(size + 1);
846 header.set_entry_type(EntryType::new(entry_type));
847 header.set_cksum();
848 header
849}
850
851fn prepare_header_path(
852 dst: &mut dyn Write,
853 header: &mut Header,
854 path: &Path,
855 allow_absolute: bool,
856) -> io::Result<()> {
857 // Try to encode the path directly in the header, but if it ends up not
858 // working (probably because it's too long) then try to use the GNU-specific
859 // long name extension by emitting an entry which indicates that it's the
860 // filename.
861 let result = if allow_absolute {
862 header.set_path_absolute(path)
863 } else {
864 header.set_path(path)
865 };
866
867 if let Err(e) = result {
868 let data = path2bytes(path)?;
869 let max = header.as_old().name.len();
870 // Since `e` isn't specific enough to let us know the path is indeed too
871 // long, verify it first before using the extension.
872 if data.len() < max {
873 return Err(e);
874 }
875 // Truncate the path to store in the header we're about to emit to
876 // ensure we've got something at least mentioned. Note that we use
877 // `str`-encoding to be compatible with Windows, but in general the
878 // entry in the header itself shouldn't matter too much since extraction
879 // doesn't look at it.
880 //
881 // Validate the truncated path BEFORE writing the long-name extension
882 // to the stream. If validation fails after writing, the orphaned
883 // extension entry corrupts subsequent archive entries.
884 let truncated = match str::from_utf8(&data[..max]) {
885 Ok(s) => s,
886 Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
887 };
888 header.set_truncated_path_for_gnu_header(truncated, allow_absolute)?;
889
890 let header2 = prepare_header(data.len() as u64, b'L');
891 // null-terminated string
892 let mut data2 = data.chain(io::repeat(0).take(1));
893 append(dst, &header2, &mut data2)?;
894 }
895 Ok(())
896}
897
898fn prepare_header_link(
899 dst: &mut dyn Write,
900 header: &mut Header,
901 link_name: &Path,
902) -> io::Result<()> {
903 // Same as previous function but for linkname
904 if let Err(e) = header.set_link_name(link_name) {
905 let data = path2bytes(link_name)?;
906 if data.len() < header.as_old().linkname.len() {
907 return Err(e);
908 }
909 let header2 = prepare_header(data.len() as u64, b'K');
910 let mut data2 = data.chain(io::repeat(0).take(1));
911 append(dst, &header2, &mut data2)?;
912 }
913 Ok(())
914}
915
916fn prepare_header_sparse(
917 file: &mut fs::File,
918 stat: &fs::Metadata,
919 header: &mut Header,
920) -> io::Result<Option<SparseEntries>> {
921 let entries = match find_sparse_entries(file, stat)? {
922 Some(entries) => entries,
923 _ => return Ok(None),
924 };
925
926 header.set_entry_type(EntryType::GNUSparse);
927 header.set_size(entries.on_disk_size);
928
929 // Write the first 4 (GNU_SPARSE_HEADERS_COUNT) entries to the given header.
930 // The remaining entries will be written as subsequent extended headers. See
931 // https://www.gnu.org/software/tar/manual/html_section/Sparse-Formats.html#Old-GNU-Format
932 // for details on the format.
933 let gnu_header = &mut header.as_gnu_mut().unwrap();
934 gnu_header.set_real_size(entries.size());
935
936 for (entry, header_entry) in std::iter::zip(&entries.entries, &mut gnu_header.sparse) {
937 header_entry.set_offset(entry.offset);
938 header_entry.set_length(entry.num_bytes);
939 }
940 gnu_header.set_is_extended(entries.entries.len() > gnu_header.sparse.len());
941
942 Ok(Some(entries))
943}
944
945/// Write extra sparse headers into `dst` for those entries that did not fit in the main header.
946fn append_extended_sparse_headers(dst: &mut dyn Write, entries: &SparseEntries) -> io::Result<()> {
947 // The first `GNU_SPARSE_HEADERS_COUNT` entries are written to the main header, so skip them.
948 let mut it = entries
949 .entries
950 .iter()
951 .skip(GNU_SPARSE_HEADERS_COUNT)
952 .peekable();
953
954 // Each GnuExtSparseHeader can hold up to fixed number of sparse entries (21).
955 // So we pack entries into multiple headers if necessary.
956 while it.peek().is_some() {
957 let mut ext_header = GnuExtSparseHeader::new();
958 for header_entry in ext_header.sparse.iter_mut() {
959 if let Some(entry) = it.next() {
960 header_entry.set_offset(entry.offset);
961 header_entry.set_length(entry.num_bytes);
962 } else {
963 break;
964 }
965 }
966 ext_header.set_is_extended(it.peek().is_some());
967 dst.write_all(ext_header.as_bytes())?;
968 }
969
970 Ok(())
971}
972
973fn append_fs(
974 dst: &mut dyn Write,
975 path: &Path,
976 meta: &fs::Metadata,
977 mode: HeaderMode,
978 allow_absolute: bool,
979 link_name: Option<&Path>,
980) -> io::Result<()> {
981 let mut header = Header::new_gnu();
982
983 prepare_header_path(dst, &mut header, path, allow_absolute)?;
984 header.set_metadata_in_mode(meta, mode);
985 if let Some(link_name) = link_name {
986 prepare_header_link(dst, &mut header, link_name)?;
987 }
988 header.set_cksum();
989 dst.write_all(header.as_bytes())
990}
991
992fn append_dir_all(
993 dst: &mut dyn Write,
994 path: &Path,
995 src_path: &Path,
996 options: BuilderOptions,
997) -> io::Result<()> {
998 let mut stack = vec![(src_path.to_path_buf(), true, false)];
999 while let Some((src, is_dir, is_symlink)) = stack.pop() {
1000 let dest = path.join(src.strip_prefix(src_path).unwrap());
1001 // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
1002 if is_dir || (is_symlink && options.follow && src.is_dir()) {
1003 for entry in fs::read_dir(&src)? {
1004 let entry = entry?;
1005 let file_type = entry.file_type()?;
1006 stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
1007 }
1008 if dest != Path::new("") {
1009 append_dir(dst, &dest, &src, options)?;
1010 }
1011 } else if !options.follow && is_symlink {
1012 let stat = fs::symlink_metadata(&src)?;
1013 let link_name = fs::read_link(&src)?;
1014 append_fs(
1015 dst,
1016 &dest,
1017 &stat,
1018 options.mode,
1019 options.preserve_absolute,
1020 Some(&link_name),
1021 )?;
1022 } else {
1023 #[cfg(unix)]
1024 {
1025 let stat = fs::metadata(&src)?;
1026 if !stat.is_file() {
1027 append_special(dst, &dest, &stat, options.mode, options.preserve_absolute)?;
1028 continue;
1029 }
1030 }
1031 append_file(dst, &dest, &mut fs::File::open(src)?, options)?;
1032 }
1033 }
1034 Ok(())
1035}
1036
1037#[derive(Debug, Clone, PartialEq, Eq)]
1038struct SparseEntries {
1039 entries: Vec<SparseEntry>,
1040 on_disk_size: u64,
1041}
1042
1043impl SparseEntries {
1044 fn size(&self) -> u64 {
1045 self.entries.last().map_or(0, |e| e.offset + e.num_bytes)
1046 }
1047}
1048
1049#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1050struct SparseEntry {
1051 offset: u64,
1052 num_bytes: u64,
1053}
1054
1055/// Find sparse entries in a file. Returns:
1056/// * `Ok(Some(_))` if the file is sparse.
1057/// * `Ok(None)` if the file is not sparse, or if the file system does not support sparse files.
1058/// * `Err(_)` if an error occurred. The lack of support for sparse files is not
1059/// considered an error. It might return an error if the file is modified
1060/// while reading.
1061fn find_sparse_entries(
1062 file: &mut fs::File,
1063 stat: &fs::Metadata,
1064) -> io::Result<Option<SparseEntries>> {
1065 #[cfg(not(any(target_os = "android", target_os = "freebsd", target_os = "linux")))]
1066 {
1067 let _ = file;
1068 let _ = stat;
1069 Ok(None)
1070 }
1071
1072 #[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))]
1073 find_sparse_entries_seek(file, stat)
1074}
1075
1076/// Implementation of `find_sparse_entries` using `SEEK_HOLE` and `SEEK_DATA`.
1077#[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))]
1078fn find_sparse_entries_seek(
1079 file: &mut fs::File,
1080 stat: &fs::Metadata,
1081) -> io::Result<Option<SparseEntries>> {
1082 use std::os::unix::fs::MetadataExt as _;
1083 use std::os::unix::io::AsRawFd as _;
1084
1085 fn lseek(file: &fs::File, offset: i64, whence: libc::c_int) -> Result<i64, i32> {
1086 #[cfg(any(target_os = "linux", target_os = "android"))]
1087 let lseek = libc::lseek64;
1088 #[cfg(not(any(target_os = "linux", target_os = "android")))]
1089 let lseek = libc::lseek;
1090
1091 match unsafe { lseek(file.as_raw_fd(), offset, whence) } {
1092 -1 => Err(io::Error::last_os_error().raw_os_error().unwrap()),
1093 off => Ok(off),
1094 }
1095 }
1096
1097 if stat.blocks() == 0 {
1098 return Ok(if stat.size() == 0 {
1099 // Empty file.
1100 None
1101 } else {
1102 // Fully sparse file.
1103 Some(SparseEntries {
1104 entries: vec![SparseEntry {
1105 offset: stat.size(),
1106 num_bytes: 0,
1107 }],
1108 on_disk_size: 0,
1109 })
1110 });
1111 }
1112
1113 // On most Unixes, we need to read `_PC_MIN_HOLE_SIZE` to see if the file
1114 // system supports `SEEK_HOLE`.
1115 // FreeBSD: https://man.freebsd.org/cgi/man.cgi?query=lseek&sektion=2&manpath=FreeBSD+14.1-STABLE
1116 #[cfg(not(any(target_os = "linux", target_os = "android")))]
1117 if unsafe { libc::fpathconf(file.as_raw_fd(), libc::_PC_MIN_HOLE_SIZE) } == -1 {
1118 return Ok(None);
1119 }
1120
1121 // Linux is the only UNIX-like without support for `_PC_MIN_HOLE_SIZE`, so
1122 // instead we try to call `lseek` and see if it fails.
1123 #[cfg(any(target_os = "linux", target_os = "android"))]
1124 match lseek(file, 0, libc::SEEK_HOLE) {
1125 Ok(_) => (),
1126 Err(libc::ENXIO) => {
1127 // The file is empty. Treat it as non-sparse.
1128 return Ok(None);
1129 }
1130 Err(_) => return Ok(None),
1131 }
1132
1133 let mut entries = Vec::new();
1134 let mut on_disk_size = 0;
1135 let mut off_s = 0;
1136 loop {
1137 // off_s=0 │ off_s │ off_s
1138 // ↓ │ ↓ │ ↓
1139 // | DATA |… │ ……………| HOLE | DATA |… │ …|×EOF×
1140 // ↑ │ ↑ ↑ │
1141 // (a) │ (b) (c) (d) │ (e)
1142 match lseek(file, off_s, libc::SEEK_DATA) {
1143 Ok(0) if off_s == 0 => (), // (a) The file starts with data.
1144 Ok(off) if off < off_s => {
1145 // (b) Unlikely.
1146 return Err(std::io::Error::new(
1147 io::ErrorKind::Other,
1148 "lseek(SEEK_DATA) went backwards",
1149 ));
1150 }
1151 Ok(off) if off == off_s => {
1152 // (c) The data at the same offset as the hole.
1153 return Err(std::io::Error::new(
1154 io::ErrorKind::Other,
1155 "lseek(SEEK_DATA) did not advance. \
1156 Did the file change while appending?",
1157 ));
1158 }
1159 Ok(off) => off_s = off, // (d) Jump to the next hole.
1160 Err(libc::ENXIO) => break, // (e) Reached the end of the file.
1161 Err(errno) => return Err(io::Error::from_raw_os_error(errno)),
1162 };
1163
1164 // off_s=0 │ off_s │ off_s
1165 // ↓ │ ↓ │ ↓
1166 // | DATA |×EOF× │ ……………| DATA | HOLE |… │ …|×EOF×
1167 // ↑ │ ↑ ↑ │
1168 // (a) │ (b) (c) (d) │ (e)
1169 match lseek(file, off_s, libc::SEEK_HOLE) {
1170 Ok(off_e) if off_s == 0 && (off_e as u64) == stat.size() => {
1171 // (a) The file is not sparse.
1172 file.seek(io::SeekFrom::Start(0))?;
1173 return Ok(None);
1174 }
1175 Ok(off_e) if off_e < off_s => {
1176 // (b) Unlikely.
1177 return Err(std::io::Error::new(
1178 io::ErrorKind::Other,
1179 "lseek(SEEK_HOLE) went backwards",
1180 ));
1181 }
1182 Ok(off_e) if off_e == off_s => {
1183 // (c) The hole at the same offset as the data.
1184 return Err(std::io::Error::new(
1185 io::ErrorKind::Other,
1186 "lseek(SEEK_HOLE) did not advance. \
1187 Did the file change while appending?",
1188 ));
1189 }
1190 Ok(off_e) => {
1191 // (d) Found a hole or reached the end of the file (implicit
1192 // zero-length hole).
1193 entries.push(SparseEntry {
1194 offset: off_s as u64,
1195 num_bytes: off_e as u64 - off_s as u64,
1196 });
1197 on_disk_size += off_e as u64 - off_s as u64;
1198 off_s = off_e;
1199 }
1200 Err(libc::ENXIO) => {
1201 // (e) off_s was already beyond the end of the file.
1202 return Err(std::io::Error::new(
1203 io::ErrorKind::Other,
1204 "lseek(SEEK_HOLE) returned ENXIO. \
1205 Did the file change while appending?",
1206 ));
1207 }
1208 Err(errno) => return Err(io::Error::from_raw_os_error(errno)),
1209 };
1210 }
1211
1212 if off_s as u64 > stat.size() {
1213 return Err(std::io::Error::new(
1214 io::ErrorKind::Other,
1215 "lseek(SEEK_DATA) went beyond the end of the file. \
1216 Did the file change while appending?",
1217 ));
1218 }
1219
1220 // Add a final zero-length entry. It is required if the file ends with a
1221 // hole, and redundant otherwise. However, we add it unconditionally to
1222 // mimic GNU tar behavior.
1223 entries.push(SparseEntry {
1224 offset: stat.size(),
1225 num_bytes: 0,
1226 });
1227
1228 file.seek(io::SeekFrom::Start(0))?;
1229
1230 Ok(Some(SparseEntries {
1231 entries,
1232 on_disk_size,
1233 }))
1234}
1235
1236impl<W: Write> Drop for Builder<W> {
1237 fn drop(&mut self) {
1238 let _ = self.finish();
1239 }
1240}
1241
1242#[cfg(test)]
1243mod tests {
1244 use super::*;
1245
1246 /// Should be multiple of 4KiB on ext4, multiple of 32KiB on FreeBSD/UFS, multiple of 64KiB on
1247 /// ppc64el
1248 const SPARSE_BLOCK_SIZE: u64 = 64 * 1024;
1249
1250 #[test]
1251 fn test_find_sparse_entries() {
1252 let cases: &[(&str, &[SparseEntry])] = &[
1253 ("|", &[]),
1254 (
1255 "| | | | |",
1256 &[SparseEntry {
1257 offset: 4 * SPARSE_BLOCK_SIZE,
1258 num_bytes: 0,
1259 }],
1260 ),
1261 (
1262 "|####|####|####|####|",
1263 &[
1264 SparseEntry {
1265 offset: 0,
1266 num_bytes: 4 * SPARSE_BLOCK_SIZE,
1267 },
1268 SparseEntry {
1269 offset: 4 * SPARSE_BLOCK_SIZE,
1270 num_bytes: 0,
1271 },
1272 ],
1273 ),
1274 (
1275 "|####|####| | |",
1276 &[
1277 SparseEntry {
1278 offset: 0,
1279 num_bytes: 2 * SPARSE_BLOCK_SIZE,
1280 },
1281 SparseEntry {
1282 offset: 4 * SPARSE_BLOCK_SIZE,
1283 num_bytes: 0,
1284 },
1285 ],
1286 ),
1287 (
1288 "| | |####|####|",
1289 &[
1290 SparseEntry {
1291 offset: 2 * SPARSE_BLOCK_SIZE,
1292 num_bytes: 2 * SPARSE_BLOCK_SIZE,
1293 },
1294 SparseEntry {
1295 offset: 4 * SPARSE_BLOCK_SIZE,
1296 num_bytes: 0,
1297 },
1298 ],
1299 ),
1300 (
1301 "|####| |####| |",
1302 &[
1303 SparseEntry {
1304 offset: 0,
1305 num_bytes: SPARSE_BLOCK_SIZE,
1306 },
1307 SparseEntry {
1308 offset: 2 * SPARSE_BLOCK_SIZE,
1309 num_bytes: SPARSE_BLOCK_SIZE,
1310 },
1311 SparseEntry {
1312 offset: 4 * SPARSE_BLOCK_SIZE,
1313 num_bytes: 0,
1314 },
1315 ],
1316 ),
1317 (
1318 "|####| | |####|",
1319 &[
1320 SparseEntry {
1321 offset: 0,
1322 num_bytes: SPARSE_BLOCK_SIZE,
1323 },
1324 SparseEntry {
1325 offset: 3 * SPARSE_BLOCK_SIZE,
1326 num_bytes: SPARSE_BLOCK_SIZE,
1327 },
1328 SparseEntry {
1329 offset: 4 * SPARSE_BLOCK_SIZE,
1330 num_bytes: 0,
1331 },
1332 ],
1333 ),
1334 (
1335 "| |####|####| |",
1336 &[
1337 SparseEntry {
1338 offset: SPARSE_BLOCK_SIZE,
1339 num_bytes: 2 * SPARSE_BLOCK_SIZE,
1340 },
1341 SparseEntry {
1342 offset: 4 * SPARSE_BLOCK_SIZE,
1343 num_bytes: 0,
1344 },
1345 ],
1346 ),
1347 ];
1348
1349 let mut file = tempfile::tempfile().unwrap();
1350
1351 for &(description, map) in cases {
1352 file.set_len(0).unwrap();
1353 file.set_len(map.last().map_or(0, |e| e.offset + e.num_bytes))
1354 .unwrap();
1355
1356 for e in map {
1357 file.seek(io::SeekFrom::Start(e.offset)).unwrap();
1358 for _ in 0..e.num_bytes / SPARSE_BLOCK_SIZE {
1359 file.write_all(&[0xFF; SPARSE_BLOCK_SIZE as usize]).unwrap();
1360 }
1361 }
1362
1363 let expected = match map {
1364 // Empty file.
1365 &[] => None,
1366
1367 // 100% dense.
1368 &[SparseEntry {
1369 offset: 0,
1370 num_bytes: x1,
1371 }, SparseEntry {
1372 offset: x2,
1373 num_bytes: 0,
1374 }] if x1 == x2 => None,
1375
1376 // Sparse.
1377 map => Some(SparseEntries {
1378 entries: map.to_vec(),
1379 on_disk_size: map.iter().map(|e| e.num_bytes).sum(),
1380 }),
1381 };
1382
1383 let stat = file.metadata().unwrap();
1384 let reported = find_sparse_entries(&mut file, &stat).unwrap();
1385
1386 // Loose check: we did not miss any data blocks.
1387 if let Err(e) = loose_check_sparse_entries(reported.as_ref(), expected.as_ref()) {
1388 panic!(
1389 "Case: {description}\n\
1390 Reported: {reported:?}\n\
1391 Expected: {expected:?}\n\
1392 Error: {e}",
1393 );
1394 }
1395
1396 // On Linux, always do a strict check. Skip on FreeBSD, as on UFS
1397 // the last block is always dense, even if it's zero-filled.
1398 #[cfg(any(target_os = "android", target_os = "linux"))]
1399 assert_eq!(reported, expected, "Case: {description}");
1400 }
1401 }
1402
1403 fn loose_check_sparse_entries(
1404 reported: Option<&SparseEntries>,
1405 expected: Option<&SparseEntries>,
1406 ) -> Result<(), &'static str> {
1407 let reported = match reported {
1408 Some(entries) => entries, // Reported as sparse.
1409 // It's not an error to report a sparse file as non-sparse.
1410 None => return Ok(()),
1411 };
1412 let expected = match expected {
1413 Some(entries) => entries,
1414 None => return Err("Expected dense file, but reported as sparse"),
1415 };
1416
1417 // Check that we didn't miss any data blocks. However, reporting some
1418 // holes as data is not an error during the loose check.
1419 if expected.entries.iter().any(|e| {
1420 !reported
1421 .entries
1422 .iter()
1423 .any(|r| e.offset >= r.offset && e.offset + e.num_bytes <= r.offset + r.num_bytes)
1424 }) {
1425 return Err("Reported is not a superset of expected");
1426 }
1427
1428 if reported.entries.last() != expected.entries.last() {
1429 return Err("Last zero-length entry is not as expected");
1430 }
1431
1432 // Check invariants of SparseEntries.
1433 let mut prev_end = None;
1434 for e in &reported.entries[..reported.entries.len()] {
1435 if prev_end.map_or(false, |p| e.offset < p) {
1436 return Err("Overlapping or unsorted entries");
1437 }
1438 prev_end = Some(e.offset + e.num_bytes);
1439 }
1440
1441 if reported.on_disk_size != reported.entries.iter().map(|e| e.num_bytes).sum() {
1442 return Err("Incorrect on-disk size");
1443 }
1444
1445 Ok(())
1446 }
1447}