zip/read/
stream.rs

1use super::{
2    central_header_to_zip_file_inner, make_symlink, read_zipfile_from_stream, ZipCentralEntryBlock,
3    ZipFile, ZipFileData, ZipResult,
4};
5use crate::spec::FixedSizeBlock;
6use indexmap::IndexMap;
7use std::fs;
8use std::fs::create_dir_all;
9use std::io::{self, Read};
10use std::path::{Path, PathBuf};
11
12/// Stream decoder for zip.
13#[derive(Debug)]
14pub struct ZipStreamReader<R>(R);
15
16impl<R> ZipStreamReader<R> {
17    /// Create a new ZipStreamReader
18    pub const fn new(reader: R) -> Self {
19        Self(reader)
20    }
21}
22
23impl<R: Read> ZipStreamReader<R> {
24    fn parse_central_directory(&mut self) -> ZipResult<ZipStreamFileMetadata> {
25        // Give archive_offset and central_header_start dummy value 0, since
26        // they are not used in the output.
27        let archive_offset = 0;
28        let central_header_start = 0;
29
30        // Parse central header
31        let block = ZipCentralEntryBlock::parse(&mut self.0)?;
32        let file = central_header_to_zip_file_inner(
33            &mut self.0,
34            archive_offset,
35            central_header_start,
36            block,
37        )?;
38        Ok(ZipStreamFileMetadata(file))
39    }
40
41    /// Iterate over the stream and extract all file and their
42    /// metadata.
43    pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
44        while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
45            visitor.visit_file(&mut file)?;
46        }
47
48        while let Ok(metadata) = self.parse_central_directory() {
49            visitor.visit_additional_metadata(&metadata)?;
50        }
51
52        Ok(())
53    }
54
55    /// Extract a Zip archive into a directory, overwriting files if they
56    /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
57    ///
58    /// Extraction is not atomic; If an error is encountered, some of the files
59    /// may be left on disk.
60    pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> {
61        create_dir_all(&directory)?;
62        let directory = directory.as_ref().canonicalize()?;
63        struct Extractor(PathBuf, IndexMap<Box<str>, ()>);
64        impl ZipStreamVisitor for Extractor {
65            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
66                self.1.insert(file.name().into(), ());
67                let mut outpath = self.0.clone();
68                file.safe_prepare_path(&self.0, &mut outpath, None::<&(_, fn(&Path) -> bool)>)?;
69
70                if file.is_symlink() {
71                    let mut target = Vec::with_capacity(file.size() as usize);
72                    file.read_to_end(&mut target)?;
73                    make_symlink(&outpath, &target, &self.1)?;
74                    return Ok(());
75                }
76
77                if file.is_dir() {
78                    fs::create_dir_all(&outpath)?;
79                } else {
80                    let mut outfile = fs::File::create(&outpath)?;
81                    io::copy(file, &mut outfile)?;
82                }
83
84                Ok(())
85            }
86
87            #[allow(unused)]
88            fn visit_additional_metadata(
89                &mut self,
90                metadata: &ZipStreamFileMetadata,
91            ) -> ZipResult<()> {
92                #[cfg(unix)]
93                {
94                    use super::ZipError;
95                    let filepath = metadata
96                        .enclosed_name()
97                        .ok_or(crate::result::invalid!("Invalid file path"))?;
98
99                    let outpath = self.0.join(filepath);
100
101                    use std::os::unix::fs::PermissionsExt;
102                    if let Some(mode) = metadata.unix_mode() {
103                        fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?;
104                    }
105                }
106
107                Ok(())
108            }
109        }
110
111        self.visit(&mut Extractor(directory, IndexMap::new()))
112    }
113}
114
115/// Visitor for ZipStreamReader
116pub trait ZipStreamVisitor {
117    ///  * `file` - contains the content of the file and most of the metadata,
118    ///    except:
119    ///     - `comment`: set to an empty string
120    ///     - `data_start`: set to 0
121    ///     - `external_attributes`: `unix_mode()`: will return None
122    fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>;
123
124    /// This function is guranteed to be called after all `visit_file`s.
125    ///
126    ///  * `metadata` - Provides missing metadata in `visit_file`.
127    fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>;
128}
129
130/// Additional metadata for the file.
131#[derive(Debug)]
132pub struct ZipStreamFileMetadata(ZipFileData);
133
134impl ZipStreamFileMetadata {
135    /// Get the name of the file
136    ///
137    /// # Warnings
138    ///
139    /// It is dangerous to use this name directly when extracting an archive.
140    /// It may contain an absolute path (`/etc/shadow`), or break out of the
141    /// current directory (`../runtime`). Carelessly writing to these paths
142    /// allows an attacker to craft a ZIP archive that will overwrite critical
143    /// files.
144    ///
145    /// You can use the [`ZipFile::enclosed_name`] method to validate the name
146    /// as a safe path.
147    pub fn name(&self) -> &str {
148        &self.0.file_name
149    }
150
151    /// Get the name of the file, in the raw (internal) byte representation.
152    ///
153    /// The encoding of this data is currently undefined.
154    pub fn name_raw(&self) -> &[u8] {
155        &self.0.file_name_raw
156    }
157
158    /// Rewrite the path, ignoring any path components with special meaning.
159    ///
160    /// - Absolute paths are made relative
161    /// - [std::path::Component::ParentDir]s are ignored
162    /// - Truncates the filename at a NULL byte
163    ///
164    /// This is appropriate if you need to be able to extract *something* from
165    /// any archive, but will easily misrepresent trivial paths like
166    /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
167    /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
168    pub fn mangled_name(&self) -> PathBuf {
169        self.0.file_name_sanitized()
170    }
171
172    /// Ensure the file path is safe to use as a [`Path`].
173    ///
174    /// - It can't contain NULL bytes
175    /// - It can't resolve to a path outside the current directory
176    ///   > `foo/../bar` is fine, `foo/../../bar` is not.
177    /// - It can't be an absolute path
178    ///
179    /// This will read well-formed ZIP files correctly, and is resistant
180    /// to path-based exploits. It is recommended over
181    /// [`ZipFile::mangled_name`].
182    pub fn enclosed_name(&self) -> Option<PathBuf> {
183        self.0.enclosed_name()
184    }
185
186    /// Returns whether the file is actually a directory
187    pub fn is_dir(&self) -> bool {
188        self.name()
189            .chars()
190            .next_back()
191            .is_some_and(|c| c == '/' || c == '\\')
192    }
193
194    /// Returns whether the file is a regular file
195    pub fn is_file(&self) -> bool {
196        !self.is_dir()
197    }
198
199    /// Get the comment of the file
200    pub fn comment(&self) -> &str {
201        &self.0.file_comment
202    }
203
204    /// Get unix mode for the file
205    pub const fn unix_mode(&self) -> Option<u32> {
206        self.0.unix_mode()
207    }
208}
209
210#[cfg(test)]
211mod test {
212    use tempfile::TempDir;
213
214    use super::*;
215    use crate::write::SimpleFileOptions;
216    use crate::ZipWriter;
217    use std::collections::BTreeSet;
218    use std::io::Cursor;
219
220    struct DummyVisitor;
221    impl ZipStreamVisitor for DummyVisitor {
222        fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
223            Ok(())
224        }
225
226        fn visit_additional_metadata(
227            &mut self,
228            _metadata: &ZipStreamFileMetadata,
229        ) -> ZipResult<()> {
230            Ok(())
231        }
232    }
233
234    #[allow(dead_code)]
235    #[derive(Default, Debug, Eq, PartialEq)]
236    struct CounterVisitor(u64, u64);
237    impl ZipStreamVisitor for CounterVisitor {
238        fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
239            self.0 += 1;
240            Ok(())
241        }
242
243        fn visit_additional_metadata(
244            &mut self,
245            _metadata: &ZipStreamFileMetadata,
246        ) -> ZipResult<()> {
247            self.1 += 1;
248            Ok(())
249        }
250    }
251
252    #[test]
253    fn invalid_offset() {
254        ZipStreamReader::new(io::Cursor::new(include_bytes!(
255            "../../tests/data/invalid_offset.zip"
256        )))
257        .visit(&mut DummyVisitor)
258        .unwrap_err();
259    }
260
261    #[test]
262    fn invalid_offset2() {
263        ZipStreamReader::new(io::Cursor::new(include_bytes!(
264            "../../tests/data/invalid_offset2.zip"
265        )))
266        .visit(&mut DummyVisitor)
267        .unwrap_err();
268    }
269
270    #[test]
271    fn zip_read_streaming() {
272        let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
273            "../../tests/data/mimetype.zip"
274        )));
275
276        #[derive(Default)]
277        struct V {
278            filenames: BTreeSet<Box<str>>,
279        }
280        impl ZipStreamVisitor for V {
281            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
282                if file.is_file() {
283                    self.filenames.insert(file.name().into());
284                }
285
286                Ok(())
287            }
288            fn visit_additional_metadata(
289                &mut self,
290                metadata: &ZipStreamFileMetadata,
291            ) -> ZipResult<()> {
292                if metadata.is_file() {
293                    assert!(
294                        self.filenames.contains(metadata.name()),
295                        "{} is missing its file content",
296                        metadata.name()
297                    );
298                }
299
300                Ok(())
301            }
302        }
303
304        reader.visit(&mut V::default()).unwrap();
305    }
306
307    #[test]
308    fn file_and_dir_predicates() {
309        let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
310            "../../tests/data/files_and_dirs.zip"
311        )));
312
313        #[derive(Default)]
314        struct V {
315            filenames: BTreeSet<Box<str>>,
316        }
317        impl ZipStreamVisitor for V {
318            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
319                let full_name = file.enclosed_name().unwrap();
320                let file_name = full_name.file_name().unwrap().to_str().unwrap();
321                assert!(
322                    (file_name.starts_with("dir") && file.is_dir())
323                        || (file_name.starts_with("file") && file.is_file())
324                );
325
326                if file.is_file() {
327                    self.filenames.insert(file.name().into());
328                }
329
330                Ok(())
331            }
332            fn visit_additional_metadata(
333                &mut self,
334                metadata: &ZipStreamFileMetadata,
335            ) -> ZipResult<()> {
336                if metadata.is_file() {
337                    assert!(
338                        self.filenames.contains(metadata.name()),
339                        "{} is missing its file content",
340                        metadata.name()
341                    );
342                }
343
344                Ok(())
345            }
346        }
347
348        reader.visit(&mut V::default()).unwrap();
349    }
350
351    /// test case to ensure we don't preemptively over allocate based on the
352    /// declared number of files in the CDE of an invalid zip when the number of
353    /// files declared is more than the alleged offset in the CDE
354    #[test]
355    fn invalid_cde_number_of_files_allocation_smaller_offset() {
356        ZipStreamReader::new(io::Cursor::new(include_bytes!(
357            "../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
358        )))
359        .visit(&mut DummyVisitor)
360        .unwrap_err();
361    }
362
363    /// test case to ensure we don't preemptively over allocate based on the
364    /// declared number of files in the CDE of an invalid zip when the number of
365    /// files declared is less than the alleged offset in the CDE
366    #[test]
367    fn invalid_cde_number_of_files_allocation_greater_offset() {
368        ZipStreamReader::new(io::Cursor::new(include_bytes!(
369            "../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
370        )))
371        .visit(&mut DummyVisitor)
372        .unwrap_err();
373    }
374
375    /// Symlinks being extracted shouldn't be followed out of the destination directory.
376    #[test]
377    fn test_cannot_symlink_outside_destination() -> ZipResult<()> {
378        use std::fs::create_dir;
379
380        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
381        writer.add_symlink("symlink/", "../dest-sibling/", SimpleFileOptions::default())?;
382        writer.start_file("symlink/dest-file", SimpleFileOptions::default())?;
383        let reader = ZipStreamReader::new(writer.finish()?);
384        let dest_parent = TempDir::with_prefix("stream__cannot_symlink_outside_destination")?;
385        let dest_sibling = dest_parent.path().join("dest-sibling");
386        create_dir(&dest_sibling)?;
387        let dest = dest_parent.path().join("dest");
388        create_dir(&dest)?;
389        assert!(reader.extract(dest).is_err());
390        assert!(!dest_sibling.join("dest-file").exists());
391        Ok(())
392    }
393
394    #[test]
395    fn test_can_create_destination() -> ZipResult<()> {
396        let mut v = Vec::new();
397        v.extend_from_slice(include_bytes!("../../tests/data/mimetype.zip"));
398        let reader = ZipStreamReader::new(v.as_slice());
399        let dest = TempDir::with_prefix("stream_test_can_create_destination").unwrap();
400        reader.extract(&dest)?;
401        assert!(dest.path().join("mimetype").exists());
402        Ok(())
403    }
404}