tar/builder.rs
1use std::fs;
2use std::io;
3use std::io::prelude::*;
4use std::path::Path;
5use std::str;
6
7use crate::header::BLOCK_SIZE;
8use crate::header::GNU_SPARSE_HEADERS_COUNT;
9use crate::header::{path2bytes, HeaderMode};
10use crate::GnuExtSparseHeader;
11use crate::{other, EntryType, Header};
12
13/// A structure for building archives
14///
15/// This structure has methods for building up an archive from scratch into any
16/// arbitrary writer.
17pub struct Builder<W: Write> {
18 options: BuilderOptions,
19 finished: bool,
20 obj: Option<W>,
21}
22
23#[derive(Clone, Copy)]
24struct BuilderOptions {
25 mode: HeaderMode,
26 follow: bool,
27 sparse: bool,
28}
29
30impl<W: Write> Builder<W> {
31 /// Create a new archive builder with the underlying object as the
32 /// destination of all data written. The builder will use
33 /// `HeaderMode::Complete` by default.
34 pub fn new(obj: W) -> Builder<W> {
35 Builder {
36 options: BuilderOptions {
37 mode: HeaderMode::Complete,
38 follow: true,
39 sparse: true,
40 },
41 finished: false,
42 obj: Some(obj),
43 }
44 }
45
46 /// Changes the HeaderMode that will be used when reading fs Metadata for
47 /// methods that implicitly read metadata for an input Path. Notably, this
48 /// does _not_ apply to `append(Header)`.
49 pub fn mode(&mut self, mode: HeaderMode) {
50 self.options.mode = mode;
51 }
52
53 /// Follow symlinks, archiving the contents of the file they point to rather
54 /// than adding a symlink to the archive. Defaults to true.
55 ///
56 /// When true, it exhibits the same behavior as GNU `tar` command's
57 /// `--dereference` or `-h` options <https://man7.org/linux/man-pages/man1/tar.1.html>.
58 pub fn follow_symlinks(&mut self, follow: bool) {
59 self.options.follow = follow;
60 }
61
62 /// Handle sparse files efficiently, if supported by the underlying
63 /// filesystem. When true, sparse file information is read from disk and
64 /// empty segments are omitted from the archive. Defaults to true.
65 pub fn sparse(&mut self, sparse: bool) {
66 self.options.sparse = sparse;
67 }
68
69 /// Gets shared reference to the underlying object.
70 pub fn get_ref(&self) -> &W {
71 self.obj.as_ref().unwrap()
72 }
73
74 /// Gets mutable reference to the underlying object.
75 ///
76 /// Note that care must be taken while writing to the underlying
77 /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
78 /// useful in the situations when one needs to be ensured that
79 /// tar entry was flushed to the disk.
80 pub fn get_mut(&mut self) -> &mut W {
81 self.obj.as_mut().unwrap()
82 }
83
84 /// Unwrap this archive, returning the underlying object.
85 ///
86 /// This function will finish writing the archive if the `finish` function
87 /// hasn't yet been called, returning any I/O error which happens during
88 /// that operation.
89 pub fn into_inner(mut self) -> io::Result<W> {
90 if !self.finished {
91 self.finish()?;
92 }
93 Ok(self.obj.take().unwrap())
94 }
95
96 /// Adds a new entry to this archive.
97 ///
98 /// This function will append the header specified, followed by contents of
99 /// the stream specified by `data`. To produce a valid archive the `size`
100 /// field of `header` must be the same as the length of the stream that's
101 /// being written. Additionally the checksum for the header should have been
102 /// set via the `set_cksum` method.
103 ///
104 /// Note that this will not attempt to seek the archive to a valid position,
105 /// so if the archive is in the middle of a read or some other similar
106 /// operation then this may corrupt the archive.
107 ///
108 /// Also note that after all entries have been written to an archive the
109 /// `finish` function needs to be called to finish writing the archive.
110 ///
111 /// # Errors
112 ///
113 /// This function will return an error for any intermittent I/O error which
114 /// occurs when either reading or writing.
115 ///
116 /// # Examples
117 ///
118 /// ```
119 /// use tar::{Builder, Header};
120 ///
121 /// let mut header = Header::new_gnu();
122 /// header.set_path("foo").unwrap();
123 /// header.set_size(4);
124 /// header.set_cksum();
125 ///
126 /// let mut data: &[u8] = &[1, 2, 3, 4];
127 ///
128 /// let mut ar = Builder::new(Vec::new());
129 /// ar.append(&header, data).unwrap();
130 /// let data = ar.into_inner().unwrap();
131 /// ```
132 pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
133 append(self.get_mut(), header, &mut data)
134 }
135
136 /// Adds a new entry to this archive with the specified path.
137 ///
138 /// This function will set the specified path in the given header, which may
139 /// require appending a GNU long-name extension entry to the archive first.
140 /// The checksum for the header will be automatically updated via the
141 /// `set_cksum` method after setting the path. No other metadata in the
142 /// header will be modified.
143 ///
144 /// Then it will append the header, followed by contents of the stream
145 /// specified by `data`. To produce a valid archive the `size` field of
146 /// `header` must be the same as the length of the stream that's being
147 /// written.
148 ///
149 /// Note that this will not attempt to seek the archive to a valid position,
150 /// so if the archive is in the middle of a read or some other similar
151 /// operation then this may corrupt the archive.
152 ///
153 /// Also note that after all entries have been written to an archive the
154 /// `finish` function needs to be called to finish writing the archive.
155 ///
156 /// # Errors
157 ///
158 /// This function will return an error for any intermittent I/O error which
159 /// occurs when either reading or writing.
160 ///
161 /// # Examples
162 ///
163 /// ```
164 /// use tar::{Builder, Header};
165 ///
166 /// let mut header = Header::new_gnu();
167 /// header.set_size(4);
168 /// header.set_cksum();
169 ///
170 /// let mut data: &[u8] = &[1, 2, 3, 4];
171 ///
172 /// let mut ar = Builder::new(Vec::new());
173 /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap();
174 /// let data = ar.into_inner().unwrap();
175 /// ```
176 pub fn append_data<P: AsRef<Path>, R: Read>(
177 &mut self,
178 header: &mut Header,
179 path: P,
180 data: R,
181 ) -> io::Result<()> {
182 prepare_header_path(self.get_mut(), header, path.as_ref())?;
183 header.set_cksum();
184 self.append(header, data)
185 }
186
187 /// Adds a new entry to this archive and returns an [`EntryWriter`] for
188 /// adding its contents.
189 ///
190 /// This function is similar to [`Self::append_data`] but returns a
191 /// [`io::Write`] implementation instead of taking data as a parameter.
192 ///
193 /// Similar constraints around the position of the archive and completion
194 /// apply as with [`Self::append_data`]. It requires the underlying writer
195 /// to implement [`Seek`] to update the header after writing the data.
196 ///
197 /// # Errors
198 ///
199 /// This function will return an error for any intermittent I/O error which
200 /// occurs when either reading or writing.
201 ///
202 /// # Examples
203 ///
204 /// ```
205 /// use std::io::Cursor;
206 /// use std::io::Write as _;
207 /// use tar::{Builder, Header};
208 ///
209 /// let mut header = Header::new_gnu();
210 ///
211 /// let mut ar = Builder::new(Cursor::new(Vec::new()));
212 /// let mut entry = ar.append_writer(&mut header, "hi.txt").unwrap();
213 /// entry.write_all(b"Hello, ").unwrap();
214 /// entry.write_all(b"world!\n").unwrap();
215 /// entry.finish().unwrap();
216 /// ```
217 pub fn append_writer<'a, P: AsRef<Path>>(
218 &'a mut self,
219 header: &'a mut Header,
220 path: P,
221 ) -> io::Result<EntryWriter<'a>>
222 where
223 W: Seek,
224 {
225 EntryWriter::start(self.get_mut(), header, path.as_ref())
226 }
227
228 /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target.
229 ///
230 /// This function is similar to [`Self::append_data`] which supports long filenames,
231 /// but also supports long link targets using GNU extensions if necessary.
232 /// You must set the entry type to either [`EntryType::Link`] or [`EntryType::Symlink`].
233 /// The `set_cksum` method will be invoked after setting the path. No other metadata in the
234 /// header will be modified.
235 ///
236 /// If you are intending to use GNU extensions, you must use this method over calling
237 /// [`Header::set_link_name`] because that function will fail on long links.
238 ///
239 /// Similar constraints around the position of the archive and completion
240 /// apply as with [`Self::append_data`].
241 ///
242 /// # Errors
243 ///
244 /// This function will return an error for any intermittent I/O error which
245 /// occurs when either reading or writing.
246 ///
247 /// # Examples
248 ///
249 /// ```
250 /// use tar::{Builder, Header, EntryType};
251 ///
252 /// let mut ar = Builder::new(Vec::new());
253 /// let mut header = Header::new_gnu();
254 /// header.set_username("foo");
255 /// header.set_entry_type(EntryType::Symlink);
256 /// header.set_size(0);
257 /// ar.append_link(&mut header, "really/long/path/to/foo", "other/really/long/target").unwrap();
258 /// let data = ar.into_inner().unwrap();
259 /// ```
260 pub fn append_link<P: AsRef<Path>, T: AsRef<Path>>(
261 &mut self,
262 header: &mut Header,
263 path: P,
264 target: T,
265 ) -> io::Result<()> {
266 self._append_link(header, path.as_ref(), target.as_ref())
267 }
268
269 fn _append_link(&mut self, header: &mut Header, path: &Path, target: &Path) -> io::Result<()> {
270 prepare_header_path(self.get_mut(), header, path)?;
271 prepare_header_link(self.get_mut(), header, target)?;
272 header.set_cksum();
273 self.append(header, std::io::empty())
274 }
275
276 /// Adds a file on the local filesystem to this archive.
277 ///
278 /// This function will open the file specified by `path` and insert the file
279 /// into the archive with the appropriate metadata set, returning any I/O
280 /// error which occurs while writing. The path name for the file inside of
281 /// this archive will be the same as `path`, and it is required that the
282 /// path is a relative path.
283 ///
284 /// Note that this will not attempt to seek the archive to a valid position,
285 /// so if the archive is in the middle of a read or some other similar
286 /// operation then this may corrupt the archive.
287 ///
288 /// Also note that after all files have been written to an archive the
289 /// `finish` function needs to be called to finish writing the archive.
290 ///
291 /// # Examples
292 ///
293 /// ```no_run
294 /// use tar::Builder;
295 ///
296 /// let mut ar = Builder::new(Vec::new());
297 ///
298 /// ar.append_path("foo/bar.txt").unwrap();
299 /// ```
300 pub fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
301 let options = self.options;
302 append_path_with_name(self.get_mut(), path.as_ref(), None, options)
303 }
304
305 /// Adds a file on the local filesystem to this archive under another name.
306 ///
307 /// This function will open the file specified by `path` and insert the file
308 /// into the archive as `name` with appropriate metadata set, returning any
309 /// I/O error which occurs while writing. The path name for the file inside
310 /// of this archive will be `name` is required to be a relative path.
311 ///
312 /// Note that this will not attempt to seek the archive to a valid position,
313 /// so if the archive is in the middle of a read or some other similar
314 /// operation then this may corrupt the archive.
315 ///
316 /// Note if the `path` is a directory. This will just add an entry to the archive,
317 /// rather than contents of the directory.
318 ///
319 /// Also note that after all files have been written to an archive the
320 /// `finish` function needs to be called to finish writing the archive.
321 ///
322 /// # Examples
323 ///
324 /// ```no_run
325 /// use tar::Builder;
326 ///
327 /// let mut ar = Builder::new(Vec::new());
328 ///
329 /// // Insert the local file "foo/bar.txt" in the archive but with the name
330 /// // "bar/foo.txt".
331 /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap();
332 /// ```
333 pub fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
334 &mut self,
335 path: P,
336 name: N,
337 ) -> io::Result<()> {
338 let options = self.options;
339 append_path_with_name(self.get_mut(), path.as_ref(), Some(name.as_ref()), options)
340 }
341
342 /// Adds a file to this archive with the given path as the name of the file
343 /// in the archive.
344 ///
345 /// This will use the metadata of `file` to populate a `Header`, and it will
346 /// then append the file to the archive with the name `path`.
347 ///
348 /// Note that this will not attempt to seek the archive to a valid position,
349 /// so if the archive is in the middle of a read or some other similar
350 /// operation then this may corrupt the archive.
351 ///
352 /// Also note that after all files have been written to an archive the
353 /// `finish` function needs to be called to finish writing the archive.
354 ///
355 /// # Examples
356 ///
357 /// ```no_run
358 /// use std::fs::File;
359 /// use tar::Builder;
360 ///
361 /// let mut ar = Builder::new(Vec::new());
362 ///
363 /// // Open the file at one location, but insert it into the archive with a
364 /// // different name.
365 /// let mut f = File::open("foo/bar/baz.txt").unwrap();
366 /// ar.append_file("bar/baz.txt", &mut f).unwrap();
367 /// ```
368 pub fn append_file<P: AsRef<Path>>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> {
369 let options = self.options;
370 append_file(self.get_mut(), path.as_ref(), file, options)
371 }
372
373 /// Adds a directory to this archive with the given path as the name of the
374 /// directory in the archive.
375 ///
376 /// This will use `stat` to populate a `Header`, and it will then append the
377 /// directory to the archive with the name `path`.
378 ///
379 /// Note that this will not attempt to seek the archive to a valid position,
380 /// so if the archive is in the middle of a read or some other similar
381 /// operation then this may corrupt the archive.
382 ///
383 /// Note this will not add the contents of the directory to the archive.
384 /// See `append_dir_all` for recursively adding the contents of the directory.
385 ///
386 /// Also note that after all files have been written to an archive the
387 /// `finish` function needs to be called to finish writing the archive.
388 ///
389 /// # Examples
390 ///
391 /// ```
392 /// use std::fs;
393 /// use tar::Builder;
394 ///
395 /// let mut ar = Builder::new(Vec::new());
396 ///
397 /// // Use the directory at one location, but insert it into the archive
398 /// // with a different name.
399 /// ar.append_dir("bardir", ".").unwrap();
400 /// ```
401 pub fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
402 where
403 P: AsRef<Path>,
404 Q: AsRef<Path>,
405 {
406 let options = self.options;
407 append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), options)
408 }
409
410 /// Adds a directory and all of its contents (recursively) to this archive
411 /// with the given path as the name of the directory in the archive.
412 ///
413 /// Note that this will not attempt to seek the archive to a valid position,
414 /// so if the archive is in the middle of a read or some other similar
415 /// operation then this may corrupt the archive.
416 ///
417 /// Also note that after all files have been written to an archive the
418 /// `finish` or `into_inner` function needs to be called to finish
419 /// writing the archive.
420 ///
421 /// # Examples
422 ///
423 /// ```
424 /// use std::fs;
425 /// use tar::Builder;
426 ///
427 /// let mut ar = Builder::new(Vec::new());
428 ///
429 /// // Use the directory at one location ("."), but insert it into the archive
430 /// // with a different name ("bardir").
431 /// ar.append_dir_all("bardir", ".").unwrap();
432 /// ar.finish().unwrap();
433 /// ```
434 ///
435 /// Use `append_dir_all` with an empty string as the first path argument to
436 /// create an archive from all files in a directory without renaming.
437 ///
438 /// ```
439 /// use std::fs;
440 /// use std::path::PathBuf;
441 /// use tar::{Archive, Builder};
442 ///
443 /// let tmpdir = tempfile::tempdir().unwrap();
444 /// let path = tmpdir.path();
445 /// fs::write(path.join("a.txt"), b"hello").unwrap();
446 /// fs::write(path.join("b.txt"), b"world").unwrap();
447 ///
448 /// // Create a tarball from the files in the directory
449 /// let mut ar = Builder::new(Vec::new());
450 /// ar.append_dir_all("", path).unwrap();
451 ///
452 /// // List files in the archive
453 /// let archive = ar.into_inner().unwrap();
454 /// let archived_files = Archive::new(archive.as_slice())
455 /// .entries()
456 /// .unwrap()
457 /// .map(|entry| entry.unwrap().path().unwrap().into_owned())
458 /// .collect::<Vec<_>>();
459 ///
460 /// assert!(archived_files.contains(&PathBuf::from("a.txt")));
461 /// assert!(archived_files.contains(&PathBuf::from("b.txt")));
462 /// ```
463 pub fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
464 where
465 P: AsRef<Path>,
466 Q: AsRef<Path>,
467 {
468 let options = self.options;
469 append_dir_all(self.get_mut(), path.as_ref(), src_path.as_ref(), options)
470 }
471
472 /// Finish writing this archive, emitting the termination sections.
473 ///
474 /// This function should only be called when the archive has been written
475 /// entirely and if an I/O error happens the underlying object still needs
476 /// to be acquired.
477 ///
478 /// In most situations the `into_inner` method should be preferred.
479 pub fn finish(&mut self) -> io::Result<()> {
480 if self.finished {
481 return Ok(());
482 }
483 self.finished = true;
484 self.get_mut().write_all(&[0; 1024])
485 }
486}
487
488trait SeekWrite: Write + Seek {
489 fn as_write(&mut self) -> &mut dyn Write;
490}
491
492impl<T: Write + Seek> SeekWrite for T {
493 fn as_write(&mut self) -> &mut dyn Write {
494 self
495 }
496}
497
498/// A writer for a single entry in a tar archive.
499///
500/// This struct is returned by [`Builder::append_writer`] and provides a
501/// [`Write`] implementation for adding content to an archive entry.
502///
503/// After writing all data to the entry, it must be finalized either by
504/// explicitly calling [`EntryWriter::finish`] or by letting it drop.
505pub struct EntryWriter<'a> {
506 // NOTE: Do not add any fields here which require Drop!
507 // See the comment below in finish().
508 obj: &'a mut dyn SeekWrite,
509 header: &'a mut Header,
510 written: u64,
511}
512
513impl EntryWriter<'_> {
514 fn start<'a>(
515 obj: &'a mut dyn SeekWrite,
516 header: &'a mut Header,
517 path: &Path,
518 ) -> io::Result<EntryWriter<'a>> {
519 prepare_header_path(obj.as_write(), header, path)?;
520
521 // Reserve space for header, will be overwritten once data is written.
522 obj.write_all([0u8; BLOCK_SIZE as usize].as_ref())?;
523
524 Ok(EntryWriter {
525 obj,
526 header,
527 written: 0,
528 })
529 }
530
531 /// Finish writing the current entry in the archive.
532 pub fn finish(self) -> io::Result<()> {
533 // NOTE: This is an optimization for "fallible destructuring".
534 // We want finish() to return an error, but we also need to invoke
535 // cleanup in our Drop handler, which will run unconditionally
536 // and try to do the same work.
537 // By using ManuallyDrop, we suppress that drop. However, this would
538 // be a memory leak if we ever had any struct members which required
539 // Drop - which we don't right now.
540 // But if we ever gain one, we will need to change to use e.g. Option<>
541 // around some of the fields or have a `bool finished` etc.
542 let mut this = std::mem::ManuallyDrop::new(self);
543 this.do_finish()
544 }
545
546 fn do_finish(&mut self) -> io::Result<()> {
547 // Pad with zeros if necessary.
548 let buf = [0u8; BLOCK_SIZE as usize];
549 let remaining = BLOCK_SIZE.wrapping_sub(self.written) % BLOCK_SIZE;
550 self.obj.write_all(&buf[..remaining as usize])?;
551 let written = (self.written + remaining) as i64;
552
553 // Seek back to the header position.
554 self.obj
555 .seek(io::SeekFrom::Current(-written - BLOCK_SIZE as i64))?;
556
557 self.header.set_size(self.written);
558 self.header.set_cksum();
559 self.obj.write_all(self.header.as_bytes())?;
560
561 // Seek forward to restore the position.
562 self.obj.seek(io::SeekFrom::Current(written))?;
563
564 Ok(())
565 }
566}
567
568impl Write for EntryWriter<'_> {
569 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
570 let len = self.obj.write(buf)?;
571 self.written += len as u64;
572 Ok(len)
573 }
574
575 fn flush(&mut self) -> io::Result<()> {
576 self.obj.flush()
577 }
578}
579
580impl Drop for EntryWriter<'_> {
581 fn drop(&mut self) {
582 let _ = self.do_finish();
583 }
584}
585
586fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> {
587 dst.write_all(header.as_bytes())?;
588 let len = io::copy(&mut data, &mut dst)?;
589 pad_zeroes(&mut dst, len)?;
590 Ok(())
591}
592
593fn pad_zeroes(dst: &mut dyn Write, len: u64) -> io::Result<()> {
594 let buf = [0; BLOCK_SIZE as usize];
595 let remaining = BLOCK_SIZE - (len % BLOCK_SIZE);
596 if remaining < BLOCK_SIZE {
597 dst.write_all(&buf[..remaining as usize])?;
598 }
599 Ok(())
600}
601
602fn append_path_with_name(
603 dst: &mut dyn Write,
604 path: &Path,
605 name: Option<&Path>,
606 options: BuilderOptions,
607) -> io::Result<()> {
608 let stat = if options.follow {
609 fs::metadata(path).map_err(|err| {
610 io::Error::new(
611 err.kind(),
612 format!("{} when getting metadata for {}", err, path.display()),
613 )
614 })?
615 } else {
616 fs::symlink_metadata(path).map_err(|err| {
617 io::Error::new(
618 err.kind(),
619 format!("{} when getting metadata for {}", err, path.display()),
620 )
621 })?
622 };
623 let ar_name = name.unwrap_or(path);
624 if stat.is_file() {
625 append_file(dst, ar_name, &mut fs::File::open(path)?, options)
626 } else if stat.is_dir() {
627 append_fs(dst, ar_name, &stat, options.mode, None)
628 } else if stat.file_type().is_symlink() {
629 let link_name = fs::read_link(path)?;
630 append_fs(dst, ar_name, &stat, options.mode, Some(&link_name))
631 } else {
632 #[cfg(unix)]
633 {
634 append_special(dst, path, &stat, options.mode)
635 }
636 #[cfg(not(unix))]
637 {
638 Err(other(&format!("{} has unknown file type", path.display())))
639 }
640 }
641}
642
643#[cfg(unix)]
644fn append_special(
645 dst: &mut dyn Write,
646 path: &Path,
647 stat: &fs::Metadata,
648 mode: HeaderMode,
649) -> io::Result<()> {
650 use ::std::os::unix::fs::{FileTypeExt, MetadataExt};
651
652 let file_type = stat.file_type();
653 let entry_type;
654 if file_type.is_socket() {
655 // sockets can't be archived
656 return Err(other(&format!(
657 "{}: socket can not be archived",
658 path.display()
659 )));
660 } else if file_type.is_fifo() {
661 entry_type = EntryType::Fifo;
662 } else if file_type.is_char_device() {
663 entry_type = EntryType::Char;
664 } else if file_type.is_block_device() {
665 entry_type = EntryType::Block;
666 } else {
667 return Err(other(&format!("{} has unknown file type", path.display())));
668 }
669
670 let mut header = Header::new_gnu();
671 header.set_metadata_in_mode(stat, mode);
672 prepare_header_path(dst, &mut header, path)?;
673
674 header.set_entry_type(entry_type);
675 let dev_id = stat.rdev();
676 let dev_major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff);
677 let dev_minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff);
678 header.set_device_major(dev_major as u32)?;
679 header.set_device_minor(dev_minor as u32)?;
680
681 header.set_cksum();
682 dst.write_all(header.as_bytes())?;
683
684 Ok(())
685}
686
687fn append_file(
688 dst: &mut dyn Write,
689 path: &Path,
690 file: &mut fs::File,
691 options: BuilderOptions,
692) -> io::Result<()> {
693 let stat = file.metadata()?;
694 let mut header = Header::new_gnu();
695
696 prepare_header_path(dst, &mut header, path)?;
697 header.set_metadata_in_mode(&stat, options.mode);
698 let sparse_entries = if options.sparse {
699 prepare_header_sparse(file, &stat, &mut header)?
700 } else {
701 None
702 };
703 header.set_cksum();
704 dst.write_all(header.as_bytes())?;
705
706 if let Some(sparse_entries) = sparse_entries {
707 append_extended_sparse_headers(dst, &sparse_entries)?;
708 for entry in sparse_entries.entries {
709 file.seek(io::SeekFrom::Start(entry.offset))?;
710 io::copy(&mut file.take(entry.num_bytes), dst)?;
711 }
712 pad_zeroes(dst, sparse_entries.on_disk_size)?;
713 } else {
714 let len = io::copy(file, dst)?;
715 pad_zeroes(dst, len)?;
716 }
717
718 Ok(())
719}
720
721fn append_dir(
722 dst: &mut dyn Write,
723 path: &Path,
724 src_path: &Path,
725 options: BuilderOptions,
726) -> io::Result<()> {
727 let stat = fs::metadata(src_path)?;
728 append_fs(dst, path, &stat, options.mode, None)
729}
730
731fn prepare_header(size: u64, entry_type: u8) -> Header {
732 let mut header = Header::new_gnu();
733 let name = b"././@LongLink";
734 header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
735 header.set_mode(0o644);
736 header.set_uid(0);
737 header.set_gid(0);
738 header.set_mtime(0);
739 // + 1 to be compliant with GNU tar
740 header.set_size(size + 1);
741 header.set_entry_type(EntryType::new(entry_type));
742 header.set_cksum();
743 header
744}
745
746fn prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()> {
747 // Try to encode the path directly in the header, but if it ends up not
748 // working (probably because it's too long) then try to use the GNU-specific
749 // long name extension by emitting an entry which indicates that it's the
750 // filename.
751 if let Err(e) = header.set_path(path) {
752 let data = path2bytes(path)?;
753 let max = header.as_old().name.len();
754 // Since `e` isn't specific enough to let us know the path is indeed too
755 // long, verify it first before using the extension.
756 if data.len() < max {
757 return Err(e);
758 }
759 // Truncate the path to store in the header we're about to emit to
760 // ensure we've got something at least mentioned. Note that we use
761 // `str`-encoding to be compatible with Windows, but in general the
762 // entry in the header itself shouldn't matter too much since extraction
763 // doesn't look at it.
764 //
765 // Validate the truncated path BEFORE writing the long-name extension
766 // to the stream. If validation fails after writing, the orphaned
767 // extension entry corrupts subsequent archive entries.
768 let truncated = match str::from_utf8(&data[..max]) {
769 Ok(s) => s,
770 Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
771 };
772 header.set_truncated_path_for_gnu_header(truncated)?;
773
774 let header2 = prepare_header(data.len() as u64, b'L');
775 // null-terminated string
776 let mut data2 = data.chain(io::repeat(0).take(1));
777 append(dst, &header2, &mut data2)?;
778 }
779 Ok(())
780}
781
782fn prepare_header_link(
783 dst: &mut dyn Write,
784 header: &mut Header,
785 link_name: &Path,
786) -> io::Result<()> {
787 // Same as previous function but for linkname
788 if let Err(e) = header.set_link_name(link_name) {
789 let data = path2bytes(link_name)?;
790 if data.len() < header.as_old().linkname.len() {
791 return Err(e);
792 }
793 let header2 = prepare_header(data.len() as u64, b'K');
794 let mut data2 = data.chain(io::repeat(0).take(1));
795 append(dst, &header2, &mut data2)?;
796 }
797 Ok(())
798}
799
800fn prepare_header_sparse(
801 file: &mut fs::File,
802 stat: &fs::Metadata,
803 header: &mut Header,
804) -> io::Result<Option<SparseEntries>> {
805 let entries = match find_sparse_entries(file, stat)? {
806 Some(entries) => entries,
807 _ => return Ok(None),
808 };
809
810 header.set_entry_type(EntryType::GNUSparse);
811 header.set_size(entries.on_disk_size);
812
813 // Write the first 4 (GNU_SPARSE_HEADERS_COUNT) entries to the given header.
814 // The remaining entries will be written as subsequent extended headers. See
815 // https://www.gnu.org/software/tar/manual/html_section/Sparse-Formats.html#Old-GNU-Format
816 // for details on the format.
817 let gnu_header = &mut header.as_gnu_mut().unwrap();
818 gnu_header.set_real_size(entries.size());
819
820 for (entry, header_entry) in std::iter::zip(&entries.entries, &mut gnu_header.sparse) {
821 header_entry.set_offset(entry.offset);
822 header_entry.set_length(entry.num_bytes);
823 }
824 gnu_header.set_is_extended(entries.entries.len() > gnu_header.sparse.len());
825
826 Ok(Some(entries))
827}
828
829/// Write extra sparse headers into `dst` for those entries that did not fit in the main header.
830fn append_extended_sparse_headers(dst: &mut dyn Write, entries: &SparseEntries) -> io::Result<()> {
831 // The first `GNU_SPARSE_HEADERS_COUNT` entries are written to the main header, so skip them.
832 let mut it = entries
833 .entries
834 .iter()
835 .skip(GNU_SPARSE_HEADERS_COUNT)
836 .peekable();
837
838 // Each GnuExtSparseHeader can hold up to fixed number of sparse entries (21).
839 // So we pack entries into multiple headers if necessary.
840 while it.peek().is_some() {
841 let mut ext_header = GnuExtSparseHeader::new();
842 for header_entry in ext_header.sparse.iter_mut() {
843 if let Some(entry) = it.next() {
844 header_entry.set_offset(entry.offset);
845 header_entry.set_length(entry.num_bytes);
846 } else {
847 break;
848 }
849 }
850 ext_header.set_is_extended(it.peek().is_some());
851 dst.write_all(ext_header.as_bytes())?;
852 }
853
854 Ok(())
855}
856
857fn append_fs(
858 dst: &mut dyn Write,
859 path: &Path,
860 meta: &fs::Metadata,
861 mode: HeaderMode,
862 link_name: Option<&Path>,
863) -> io::Result<()> {
864 let mut header = Header::new_gnu();
865
866 prepare_header_path(dst, &mut header, path)?;
867 header.set_metadata_in_mode(meta, mode);
868 if let Some(link_name) = link_name {
869 prepare_header_link(dst, &mut header, link_name)?;
870 }
871 header.set_cksum();
872 dst.write_all(header.as_bytes())
873}
874
875fn append_dir_all(
876 dst: &mut dyn Write,
877 path: &Path,
878 src_path: &Path,
879 options: BuilderOptions,
880) -> io::Result<()> {
881 let mut stack = vec![(src_path.to_path_buf(), true, false)];
882 while let Some((src, is_dir, is_symlink)) = stack.pop() {
883 let dest = path.join(src.strip_prefix(src_path).unwrap());
884 // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
885 if is_dir || (is_symlink && options.follow && src.is_dir()) {
886 for entry in fs::read_dir(&src)? {
887 let entry = entry?;
888 let file_type = entry.file_type()?;
889 stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
890 }
891 if dest != Path::new("") {
892 append_dir(dst, &dest, &src, options)?;
893 }
894 } else if !options.follow && is_symlink {
895 let stat = fs::symlink_metadata(&src)?;
896 let link_name = fs::read_link(&src)?;
897 append_fs(dst, &dest, &stat, options.mode, Some(&link_name))?;
898 } else {
899 #[cfg(unix)]
900 {
901 let stat = fs::metadata(&src)?;
902 if !stat.is_file() {
903 append_special(dst, &dest, &stat, options.mode)?;
904 continue;
905 }
906 }
907 append_file(dst, &dest, &mut fs::File::open(src)?, options)?;
908 }
909 }
910 Ok(())
911}
912
913#[derive(Debug, Clone, PartialEq, Eq)]
914struct SparseEntries {
915 entries: Vec<SparseEntry>,
916 on_disk_size: u64,
917}
918
919impl SparseEntries {
920 fn size(&self) -> u64 {
921 self.entries.last().map_or(0, |e| e.offset + e.num_bytes)
922 }
923}
924
925#[derive(Debug, Copy, Clone, PartialEq, Eq)]
926struct SparseEntry {
927 offset: u64,
928 num_bytes: u64,
929}
930
931/// Find sparse entries in a file. Returns:
932/// * `Ok(Some(_))` if the file is sparse.
933/// * `Ok(None)` if the file is not sparse, or if the file system does not support sparse files.
934/// * `Err(_)` if an error occurred. The lack of support for sparse files is not
935/// considered an error. It might return an error if the file is modified
936/// while reading.
937fn find_sparse_entries(
938 file: &mut fs::File,
939 stat: &fs::Metadata,
940) -> io::Result<Option<SparseEntries>> {
941 #[cfg(not(any(target_os = "android", target_os = "freebsd", target_os = "linux")))]
942 {
943 let _ = file;
944 let _ = stat;
945 Ok(None)
946 }
947
948 #[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))]
949 find_sparse_entries_seek(file, stat)
950}
951
952/// Implementation of `find_sparse_entries` using `SEEK_HOLE` and `SEEK_DATA`.
953#[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))]
954fn find_sparse_entries_seek(
955 file: &mut fs::File,
956 stat: &fs::Metadata,
957) -> io::Result<Option<SparseEntries>> {
958 use std::os::unix::fs::MetadataExt as _;
959 use std::os::unix::io::AsRawFd as _;
960
961 fn lseek(file: &fs::File, offset: i64, whence: libc::c_int) -> Result<i64, i32> {
962 #[cfg(any(target_os = "linux", target_os = "android"))]
963 let lseek = libc::lseek64;
964 #[cfg(not(any(target_os = "linux", target_os = "android")))]
965 let lseek = libc::lseek;
966
967 match unsafe { lseek(file.as_raw_fd(), offset, whence) } {
968 -1 => Err(io::Error::last_os_error().raw_os_error().unwrap()),
969 off => Ok(off),
970 }
971 }
972
973 if stat.blocks() == 0 {
974 return Ok(if stat.size() == 0 {
975 // Empty file.
976 None
977 } else {
978 // Fully sparse file.
979 Some(SparseEntries {
980 entries: vec![SparseEntry {
981 offset: stat.size(),
982 num_bytes: 0,
983 }],
984 on_disk_size: 0,
985 })
986 });
987 }
988
989 // On most Unixes, we need to read `_PC_MIN_HOLE_SIZE` to see if the file
990 // system supports `SEEK_HOLE`.
991 // FreeBSD: https://man.freebsd.org/cgi/man.cgi?query=lseek&sektion=2&manpath=FreeBSD+14.1-STABLE
992 #[cfg(not(any(target_os = "linux", target_os = "android")))]
993 if unsafe { libc::fpathconf(file.as_raw_fd(), libc::_PC_MIN_HOLE_SIZE) } == -1 {
994 return Ok(None);
995 }
996
997 // Linux is the only UNIX-like without support for `_PC_MIN_HOLE_SIZE`, so
998 // instead we try to call `lseek` and see if it fails.
999 #[cfg(any(target_os = "linux", target_os = "android"))]
1000 match lseek(file, 0, libc::SEEK_HOLE) {
1001 Ok(_) => (),
1002 Err(libc::ENXIO) => {
1003 // The file is empty. Treat it as non-sparse.
1004 return Ok(None);
1005 }
1006 Err(_) => return Ok(None),
1007 }
1008
1009 let mut entries = Vec::new();
1010 let mut on_disk_size = 0;
1011 let mut off_s = 0;
1012 loop {
1013 // off_s=0 │ off_s │ off_s
1014 // ↓ │ ↓ │ ↓
1015 // | DATA |… │ ……………| HOLE | DATA |… │ …|×EOF×
1016 // ↑ │ ↑ ↑ │
1017 // (a) │ (b) (c) (d) │ (e)
1018 match lseek(file, off_s, libc::SEEK_DATA) {
1019 Ok(0) if off_s == 0 => (), // (a) The file starts with data.
1020 Ok(off) if off < off_s => {
1021 // (b) Unlikely.
1022 return Err(std::io::Error::new(
1023 io::ErrorKind::Other,
1024 "lseek(SEEK_DATA) went backwards",
1025 ));
1026 }
1027 Ok(off) if off == off_s => {
1028 // (c) The data at the same offset as the hole.
1029 return Err(std::io::Error::new(
1030 io::ErrorKind::Other,
1031 "lseek(SEEK_DATA) did not advance. \
1032 Did the file change while appending?",
1033 ));
1034 }
1035 Ok(off) => off_s = off, // (d) Jump to the next hole.
1036 Err(libc::ENXIO) => break, // (e) Reached the end of the file.
1037 Err(errno) => return Err(io::Error::from_raw_os_error(errno)),
1038 };
1039
1040 // off_s=0 │ off_s │ off_s
1041 // ↓ │ ↓ │ ↓
1042 // | DATA |×EOF× │ ……………| DATA | HOLE |… │ …|×EOF×
1043 // ↑ │ ↑ ↑ │
1044 // (a) │ (b) (c) (d) │ (e)
1045 match lseek(file, off_s, libc::SEEK_HOLE) {
1046 Ok(off_e) if off_s == 0 && (off_e as u64) == stat.size() => {
1047 // (a) The file is not sparse.
1048 file.seek(io::SeekFrom::Start(0))?;
1049 return Ok(None);
1050 }
1051 Ok(off_e) if off_e < off_s => {
1052 // (b) Unlikely.
1053 return Err(std::io::Error::new(
1054 io::ErrorKind::Other,
1055 "lseek(SEEK_HOLE) went backwards",
1056 ));
1057 }
1058 Ok(off_e) if off_e == off_s => {
1059 // (c) The hole at the same offset as the data.
1060 return Err(std::io::Error::new(
1061 io::ErrorKind::Other,
1062 "lseek(SEEK_HOLE) did not advance. \
1063 Did the file change while appending?",
1064 ));
1065 }
1066 Ok(off_e) => {
1067 // (d) Found a hole or reached the end of the file (implicit
1068 // zero-length hole).
1069 entries.push(SparseEntry {
1070 offset: off_s as u64,
1071 num_bytes: off_e as u64 - off_s as u64,
1072 });
1073 on_disk_size += off_e as u64 - off_s as u64;
1074 off_s = off_e;
1075 }
1076 Err(libc::ENXIO) => {
1077 // (e) off_s was already beyond the end of the file.
1078 return Err(std::io::Error::new(
1079 io::ErrorKind::Other,
1080 "lseek(SEEK_HOLE) returned ENXIO. \
1081 Did the file change while appending?",
1082 ));
1083 }
1084 Err(errno) => return Err(io::Error::from_raw_os_error(errno)),
1085 };
1086 }
1087
1088 if off_s as u64 > stat.size() {
1089 return Err(std::io::Error::new(
1090 io::ErrorKind::Other,
1091 "lseek(SEEK_DATA) went beyond the end of the file. \
1092 Did the file change while appending?",
1093 ));
1094 }
1095
1096 // Add a final zero-length entry. It is required if the file ends with a
1097 // hole, and redundant otherwise. However, we add it unconditionally to
1098 // mimic GNU tar behavior.
1099 entries.push(SparseEntry {
1100 offset: stat.size(),
1101 num_bytes: 0,
1102 });
1103
1104 file.seek(io::SeekFrom::Start(0))?;
1105
1106 Ok(Some(SparseEntries {
1107 entries,
1108 on_disk_size,
1109 }))
1110}
1111
1112impl<W: Write> Drop for Builder<W> {
1113 fn drop(&mut self) {
1114 let _ = self.finish();
1115 }
1116}
1117
1118#[cfg(test)]
1119mod tests {
1120 use super::*;
1121
1122 /// Should be multiple of 4KiB on ext4, multiple of 32KiB on FreeBSD/UFS, multiple of 64KiB on
1123 /// ppc64el
1124 const SPARSE_BLOCK_SIZE: u64 = 64 * 1024;
1125
1126 #[test]
1127 fn test_find_sparse_entries() {
1128 let cases: &[(&str, &[SparseEntry])] = &[
1129 ("|", &[]),
1130 (
1131 "| | | | |",
1132 &[SparseEntry {
1133 offset: 4 * SPARSE_BLOCK_SIZE,
1134 num_bytes: 0,
1135 }],
1136 ),
1137 (
1138 "|####|####|####|####|",
1139 &[
1140 SparseEntry {
1141 offset: 0,
1142 num_bytes: 4 * SPARSE_BLOCK_SIZE,
1143 },
1144 SparseEntry {
1145 offset: 4 * SPARSE_BLOCK_SIZE,
1146 num_bytes: 0,
1147 },
1148 ],
1149 ),
1150 (
1151 "|####|####| | |",
1152 &[
1153 SparseEntry {
1154 offset: 0,
1155 num_bytes: 2 * SPARSE_BLOCK_SIZE,
1156 },
1157 SparseEntry {
1158 offset: 4 * SPARSE_BLOCK_SIZE,
1159 num_bytes: 0,
1160 },
1161 ],
1162 ),
1163 (
1164 "| | |####|####|",
1165 &[
1166 SparseEntry {
1167 offset: 2 * SPARSE_BLOCK_SIZE,
1168 num_bytes: 2 * SPARSE_BLOCK_SIZE,
1169 },
1170 SparseEntry {
1171 offset: 4 * SPARSE_BLOCK_SIZE,
1172 num_bytes: 0,
1173 },
1174 ],
1175 ),
1176 (
1177 "|####| |####| |",
1178 &[
1179 SparseEntry {
1180 offset: 0,
1181 num_bytes: SPARSE_BLOCK_SIZE,
1182 },
1183 SparseEntry {
1184 offset: 2 * SPARSE_BLOCK_SIZE,
1185 num_bytes: SPARSE_BLOCK_SIZE,
1186 },
1187 SparseEntry {
1188 offset: 4 * SPARSE_BLOCK_SIZE,
1189 num_bytes: 0,
1190 },
1191 ],
1192 ),
1193 (
1194 "|####| | |####|",
1195 &[
1196 SparseEntry {
1197 offset: 0,
1198 num_bytes: SPARSE_BLOCK_SIZE,
1199 },
1200 SparseEntry {
1201 offset: 3 * SPARSE_BLOCK_SIZE,
1202 num_bytes: SPARSE_BLOCK_SIZE,
1203 },
1204 SparseEntry {
1205 offset: 4 * SPARSE_BLOCK_SIZE,
1206 num_bytes: 0,
1207 },
1208 ],
1209 ),
1210 (
1211 "| |####|####| |",
1212 &[
1213 SparseEntry {
1214 offset: SPARSE_BLOCK_SIZE,
1215 num_bytes: 2 * SPARSE_BLOCK_SIZE,
1216 },
1217 SparseEntry {
1218 offset: 4 * SPARSE_BLOCK_SIZE,
1219 num_bytes: 0,
1220 },
1221 ],
1222 ),
1223 ];
1224
1225 let mut file = tempfile::tempfile().unwrap();
1226
1227 for &(description, map) in cases {
1228 file.set_len(0).unwrap();
1229 file.set_len(map.last().map_or(0, |e| e.offset + e.num_bytes))
1230 .unwrap();
1231
1232 for e in map {
1233 file.seek(io::SeekFrom::Start(e.offset)).unwrap();
1234 for _ in 0..e.num_bytes / SPARSE_BLOCK_SIZE {
1235 file.write_all(&[0xFF; SPARSE_BLOCK_SIZE as usize]).unwrap();
1236 }
1237 }
1238
1239 let expected = match map {
1240 // Empty file.
1241 &[] => None,
1242
1243 // 100% dense.
1244 &[SparseEntry {
1245 offset: 0,
1246 num_bytes: x1,
1247 }, SparseEntry {
1248 offset: x2,
1249 num_bytes: 0,
1250 }] if x1 == x2 => None,
1251
1252 // Sparse.
1253 map => Some(SparseEntries {
1254 entries: map.to_vec(),
1255 on_disk_size: map.iter().map(|e| e.num_bytes).sum(),
1256 }),
1257 };
1258
1259 let stat = file.metadata().unwrap();
1260 let reported = find_sparse_entries(&mut file, &stat).unwrap();
1261
1262 // Loose check: we did not miss any data blocks.
1263 if let Err(e) = loose_check_sparse_entries(reported.as_ref(), expected.as_ref()) {
1264 panic!(
1265 "Case: {description}\n\
1266 Reported: {reported:?}\n\
1267 Expected: {expected:?}\n\
1268 Error: {e}",
1269 );
1270 }
1271
1272 // On Linux, always do a strict check. Skip on FreeBSD, as on UFS
1273 // the last block is always dense, even if it's zero-filled.
1274 #[cfg(any(target_os = "android", target_os = "linux"))]
1275 assert_eq!(reported, expected, "Case: {description}");
1276 }
1277 }
1278
1279 fn loose_check_sparse_entries(
1280 reported: Option<&SparseEntries>,
1281 expected: Option<&SparseEntries>,
1282 ) -> Result<(), &'static str> {
1283 let reported = match reported {
1284 Some(entries) => entries, // Reported as sparse.
1285 // It's not an error to report a sparse file as non-sparse.
1286 None => return Ok(()),
1287 };
1288 let expected = match expected {
1289 Some(entries) => entries,
1290 None => return Err("Expected dense file, but reported as sparse"),
1291 };
1292
1293 // Check that we didn't miss any data blocks. However, reporting some
1294 // holes as data is not an error during the loose check.
1295 if expected.entries.iter().any(|e| {
1296 !reported
1297 .entries
1298 .iter()
1299 .any(|r| e.offset >= r.offset && e.offset + e.num_bytes <= r.offset + r.num_bytes)
1300 }) {
1301 return Err("Reported is not a superset of expected");
1302 }
1303
1304 if reported.entries.last() != expected.entries.last() {
1305 return Err("Last zero-length entry is not as expected");
1306 }
1307
1308 // Check invariants of SparseEntries.
1309 let mut prev_end = None;
1310 for e in &reported.entries[..reported.entries.len()] {
1311 if prev_end.map_or(false, |p| e.offset < p) {
1312 return Err("Overlapping or unsorted entries");
1313 }
1314 prev_end = Some(e.offset + e.num_bytes);
1315 }
1316
1317 if reported.on_disk_size != reported.entries.iter().map(|e| e.num_bytes).sum() {
1318 return Err("Incorrect on-disk size");
1319 }
1320
1321 Ok(())
1322 }
1323}