zip/
read.rs

1//! Types for reading ZIP archives
2
3#[cfg(feature = "aes-crypto")]
4use crate::aes::{AesReader, AesReaderValid};
5use crate::compression::{CompressionMethod, Decompressor};
6use crate::cp437::FromCp437;
7use crate::crc32::Crc32Reader;
8use crate::extra_fields::{ExtendedTimestamp, ExtraField, Ntfs};
9use crate::read::zip_archive::{Shared, SharedBuilder};
10use crate::result::invalid;
11use crate::result::{ZipError, ZipResult};
12use crate::spec::{self, CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, Pod};
13use crate::types::{
14    AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData,
15    ZipLocalEntryBlock,
16};
17use crate::write::SimpleFileOptions;
18use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
19use crate::ZIP64_BYTES_THR;
20use indexmap::IndexMap;
21use std::borrow::Cow;
22use std::ffi::OsStr;
23use std::fs::create_dir_all;
24use std::io::{self, copy, prelude::*, sink, SeekFrom};
25use std::mem;
26use std::mem::size_of;
27use std::ops::{Deref, Range};
28use std::path::{Component, Path, PathBuf};
29use std::sync::{Arc, OnceLock};
30
31mod config;
32
33pub use config::*;
34
35/// Provides high level API for reading from a stream.
36pub(crate) mod stream;
37
38pub(crate) mod magic_finder;
39
40// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
41pub(crate) mod zip_archive {
42    use indexmap::IndexMap;
43    use std::sync::Arc;
44
45    /// Extract immutable data from `ZipArchive` to make it cheap to clone
46    #[derive(Debug)]
47    pub(crate) struct Shared {
48        pub(crate) files: IndexMap<Box<str>, super::ZipFileData>,
49        pub(super) offset: u64,
50        pub(super) dir_start: u64,
51        // This isn't yet used anywhere, but it is here for use cases in the future.
52        #[allow(dead_code)]
53        pub(super) config: super::Config,
54        pub(crate) comment: Box<[u8]>,
55        pub(crate) zip64_comment: Option<Box<[u8]>>,
56    }
57
58    #[derive(Debug)]
59    pub(crate) struct SharedBuilder {
60        pub(crate) files: Vec<super::ZipFileData>,
61        pub(super) offset: u64,
62        pub(super) dir_start: u64,
63        // This isn't yet used anywhere, but it is here for use cases in the future.
64        #[allow(dead_code)]
65        pub(super) config: super::Config,
66    }
67
68    impl SharedBuilder {
69        pub fn build(self, comment: Box<[u8]>, zip64_comment: Option<Box<[u8]>>) -> Shared {
70            let mut index_map = IndexMap::with_capacity(self.files.len());
71            self.files.into_iter().for_each(|file| {
72                index_map.insert(file.file_name.clone(), file);
73            });
74            Shared {
75                files: index_map,
76                offset: self.offset,
77                dir_start: self.dir_start,
78                config: self.config,
79                comment,
80                zip64_comment,
81            }
82        }
83    }
84
85    /// ZIP archive reader
86    ///
87    /// At the moment, this type is cheap to clone if this is the case for the
88    /// reader it uses. However, this is not guaranteed by this crate and it may
89    /// change in the future.
90    ///
91    /// ```no_run
92    /// use std::io::prelude::*;
93    /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
94    ///     use zip::HasZipMetadata;
95    ///     let mut zip = zip::ZipArchive::new(reader)?;
96    ///
97    ///     for i in 0..zip.len() {
98    ///         let mut file = zip.by_index(i)?;
99    ///         println!("Filename: {}", file.name());
100    ///         std::io::copy(&mut file, &mut std::io::stdout())?;
101    ///     }
102    ///
103    ///     Ok(())
104    /// }
105    /// ```
106    #[derive(Clone, Debug)]
107    pub struct ZipArchive<R> {
108        pub(super) reader: R,
109        pub(super) shared: Arc<Shared>,
110    }
111}
112
113#[cfg(feature = "aes-crypto")]
114use crate::aes::PWD_VERIFY_LENGTH;
115use crate::extra_fields::UnicodeExtraField;
116use crate::result::ZipError::InvalidPassword;
117use crate::spec::is_dir;
118use crate::types::ffi::{S_IFLNK, S_IFREG};
119use crate::unstable::{path_to_string, LittleEndianReadExt};
120pub use zip_archive::ZipArchive;
121
122#[allow(clippy::large_enum_variant)]
123pub(crate) enum CryptoReader<'a, R: Read> {
124    Plaintext(io::Take<&'a mut R>),
125    ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut R>>),
126    #[cfg(feature = "aes-crypto")]
127    Aes {
128        reader: AesReaderValid<io::Take<&'a mut R>>,
129        vendor_version: AesVendorVersion,
130    },
131}
132
133impl<R: Read> Read for CryptoReader<'_, R> {
134    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
135        match self {
136            CryptoReader::Plaintext(r) => r.read(buf),
137            CryptoReader::ZipCrypto(r) => r.read(buf),
138            #[cfg(feature = "aes-crypto")]
139            CryptoReader::Aes { reader: r, .. } => r.read(buf),
140        }
141    }
142
143    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
144        match self {
145            CryptoReader::Plaintext(r) => r.read_to_end(buf),
146            CryptoReader::ZipCrypto(r) => r.read_to_end(buf),
147            #[cfg(feature = "aes-crypto")]
148            CryptoReader::Aes { reader: r, .. } => r.read_to_end(buf),
149        }
150    }
151
152    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
153        match self {
154            CryptoReader::Plaintext(r) => r.read_to_string(buf),
155            CryptoReader::ZipCrypto(r) => r.read_to_string(buf),
156            #[cfg(feature = "aes-crypto")]
157            CryptoReader::Aes { reader: r, .. } => r.read_to_string(buf),
158        }
159    }
160}
161
162impl<'a, R: Read> CryptoReader<'a, R> {
163    /// Consumes this decoder, returning the underlying reader.
164    pub fn into_inner(self) -> io::Take<&'a mut R> {
165        match self {
166            CryptoReader::Plaintext(r) => r,
167            CryptoReader::ZipCrypto(r) => r.into_inner(),
168            #[cfg(feature = "aes-crypto")]
169            CryptoReader::Aes { reader: r, .. } => r.into_inner(),
170        }
171    }
172
173    /// Returns `true` if the data is encrypted using AE2.
174    pub const fn is_ae2_encrypted(&self) -> bool {
175        #[cfg(feature = "aes-crypto")]
176        return matches!(
177            self,
178            CryptoReader::Aes {
179                vendor_version: AesVendorVersion::Ae2,
180                ..
181            }
182        );
183        #[cfg(not(feature = "aes-crypto"))]
184        false
185    }
186}
187
188#[cold]
189fn invalid_state<T>() -> io::Result<T> {
190    Err(io::Error::other("ZipFileReader was in an invalid state"))
191}
192
193pub(crate) enum ZipFileReader<'a, R: Read> {
194    NoReader,
195    Raw(io::Take<&'a mut R>),
196    Compressed(Box<Crc32Reader<Decompressor<io::BufReader<CryptoReader<'a, R>>>>>),
197}
198
199impl<R: Read> Read for ZipFileReader<'_, R> {
200    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
201        match self {
202            ZipFileReader::NoReader => invalid_state(),
203            ZipFileReader::Raw(r) => r.read(buf),
204            ZipFileReader::Compressed(r) => r.read(buf),
205        }
206    }
207
208    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
209        match self {
210            ZipFileReader::NoReader => invalid_state(),
211            ZipFileReader::Raw(r) => r.read_exact(buf),
212            ZipFileReader::Compressed(r) => r.read_exact(buf),
213        }
214    }
215
216    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
217        match self {
218            ZipFileReader::NoReader => invalid_state(),
219            ZipFileReader::Raw(r) => r.read_to_end(buf),
220            ZipFileReader::Compressed(r) => r.read_to_end(buf),
221        }
222    }
223
224    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
225        match self {
226            ZipFileReader::NoReader => invalid_state(),
227            ZipFileReader::Raw(r) => r.read_to_string(buf),
228            ZipFileReader::Compressed(r) => r.read_to_string(buf),
229        }
230    }
231}
232
233impl<'a, R: Read> ZipFileReader<'a, R> {
234    fn into_inner(self) -> io::Result<io::Take<&'a mut R>> {
235        match self {
236            ZipFileReader::NoReader => invalid_state(),
237            ZipFileReader::Raw(r) => Ok(r),
238            ZipFileReader::Compressed(r) => {
239                Ok(r.into_inner().into_inner()?.into_inner().into_inner())
240            }
241        }
242    }
243}
244
245/// A struct for reading a zip file
246pub struct ZipFile<'a, R: Read> {
247    pub(crate) data: Cow<'a, ZipFileData>,
248    pub(crate) reader: ZipFileReader<'a, R>,
249}
250
251/// A struct for reading and seeking a zip file
252pub struct ZipFileSeek<'a, R> {
253    data: Cow<'a, ZipFileData>,
254    reader: ZipFileSeekReader<'a, R>,
255}
256
257enum ZipFileSeekReader<'a, R> {
258    Raw(SeekableTake<'a, R>),
259}
260
261struct SeekableTake<'a, R> {
262    inner: &'a mut R,
263    inner_starting_offset: u64,
264    length: u64,
265    current_offset: u64,
266}
267
268impl<'a, R: Seek> SeekableTake<'a, R> {
269    pub fn new(inner: &'a mut R, length: u64) -> io::Result<Self> {
270        let inner_starting_offset = inner.stream_position()?;
271        Ok(Self {
272            inner,
273            inner_starting_offset,
274            length,
275            current_offset: 0,
276        })
277    }
278}
279
280impl<R: Seek> Seek for SeekableTake<'_, R> {
281    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
282        let offset = match pos {
283            SeekFrom::Start(offset) => Some(offset),
284            SeekFrom::End(offset) => self.length.checked_add_signed(offset),
285            SeekFrom::Current(offset) => self.current_offset.checked_add_signed(offset),
286        };
287        match offset {
288            None => Err(io::Error::new(
289                io::ErrorKind::InvalidInput,
290                "invalid seek to a negative or overflowing position",
291            )),
292            Some(offset) => {
293                let clamped_offset = std::cmp::min(self.length, offset);
294                let new_inner_offset = self
295                    .inner
296                    .seek(SeekFrom::Start(self.inner_starting_offset + clamped_offset))?;
297                self.current_offset = new_inner_offset - self.inner_starting_offset;
298                Ok(self.current_offset)
299            }
300        }
301    }
302}
303
304impl<R: Read> Read for SeekableTake<'_, R> {
305    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
306        let written = self
307            .inner
308            .take(self.length - self.current_offset)
309            .read(buf)?;
310        self.current_offset += written as u64;
311        Ok(written)
312    }
313}
314
315pub(crate) fn make_writable_dir_all<T: AsRef<Path>>(outpath: T) -> Result<(), ZipError> {
316    create_dir_all(outpath.as_ref())?;
317    #[cfg(unix)]
318    {
319        // Dirs must be writable until all normal files are extracted
320        use std::os::unix::fs::PermissionsExt;
321        std::fs::set_permissions(
322            outpath.as_ref(),
323            std::fs::Permissions::from_mode(
324                0o700 | std::fs::metadata(outpath.as_ref())?.permissions().mode(),
325            ),
326        )?;
327    }
328    Ok(())
329}
330
331pub(crate) fn find_content<'a, R: Read + Seek>(
332    data: &ZipFileData,
333    reader: &'a mut R,
334) -> ZipResult<io::Take<&'a mut R>> {
335    // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result!
336    let data_start = data.data_start(reader)?;
337
338    reader.seek(SeekFrom::Start(data_start))?;
339    Ok(reader.take(data.compressed_size))
340}
341
342fn find_content_seek<'a, R: Read + Seek>(
343    data: &ZipFileData,
344    reader: &'a mut R,
345) -> ZipResult<SeekableTake<'a, R>> {
346    // Parse local header
347    let data_start = data.data_start(reader)?;
348    reader.seek(SeekFrom::Start(data_start))?;
349
350    // Explicit Ok and ? are needed to convert io::Error to ZipError
351    Ok(SeekableTake::new(reader, data.compressed_size)?)
352}
353
354pub(crate) fn find_data_start(
355    data: &ZipFileData,
356    reader: &mut (impl Read + Seek + Sized),
357) -> Result<u64, ZipError> {
358    // Go to start of data.
359    reader.seek(SeekFrom::Start(data.header_start))?;
360
361    // Parse static-sized fields and check the magic value.
362    let block = ZipLocalEntryBlock::parse(reader)?;
363
364    // Calculate the end of the local header from the fields we just parsed.
365    let variable_fields_len =
366        // Each of these fields must be converted to u64 before adding, as the result may
367        // easily overflow a u16.
368        block.file_name_length as u64 + block.extra_field_length as u64;
369    let data_start =
370        data.header_start + size_of::<ZipLocalEntryBlock>() as u64 + variable_fields_len;
371
372    // Set the value so we don't have to read it again.
373    match data.data_start.set(data_start) {
374        Ok(()) => (),
375        // If the value was already set in the meantime, ensure it matches (this is probably
376        // unnecessary).
377        Err(_) => {
378            debug_assert_eq!(*data.data_start.get().unwrap(), data_start);
379        }
380    }
381
382    Ok(data_start)
383}
384
385#[allow(clippy::too_many_arguments)]
386pub(crate) fn make_crypto_reader<'a, R: Read>(
387    data: &ZipFileData,
388    reader: io::Take<&'a mut R>,
389    password: Option<&[u8]>,
390    aes_info: Option<(AesMode, AesVendorVersion, CompressionMethod)>,
391) -> ZipResult<CryptoReader<'a, R>> {
392    #[allow(deprecated)]
393    {
394        if let CompressionMethod::Unsupported(_) = data.compression_method {
395            return unsupported_zip_error("Compression method not supported");
396        }
397    }
398
399    let reader = match (password, aes_info) {
400        #[cfg(not(feature = "aes-crypto"))]
401        (Some(_), Some(_)) => {
402            return Err(ZipError::UnsupportedArchive(
403                "AES encrypted files cannot be decrypted without the aes-crypto feature.",
404            ))
405        }
406        #[cfg(feature = "aes-crypto")]
407        (Some(password), Some((aes_mode, vendor_version, _))) => CryptoReader::Aes {
408            reader: AesReader::new(reader, aes_mode, data.compressed_size).validate(password)?,
409            vendor_version,
410        },
411        (Some(password), None) => {
412            let validator = if data.using_data_descriptor {
413                ZipCryptoValidator::InfoZipMsdosTime(
414                    data.last_modified_time.map_or(0, |x| x.timepart()),
415                )
416            } else {
417                ZipCryptoValidator::PkzipCrc32(data.crc32)
418            };
419            CryptoReader::ZipCrypto(ZipCryptoReader::new(reader, password).validate(validator)?)
420        }
421        (None, Some(_)) => return Err(InvalidPassword),
422        (None, None) => CryptoReader::Plaintext(reader),
423    };
424    Ok(reader)
425}
426
427pub(crate) fn make_reader<R: Read>(
428    compression_method: CompressionMethod,
429    uncompressed_size: u64,
430    crc32: u32,
431    reader: CryptoReader<R>,
432    flags: u16,
433) -> ZipResult<ZipFileReader<R>> {
434    let ae2_encrypted = reader.is_ae2_encrypted();
435
436    Ok(ZipFileReader::Compressed(Box::new(Crc32Reader::new(
437        Decompressor::new(
438            io::BufReader::new(reader),
439            compression_method,
440            uncompressed_size,
441            flags,
442        )?,
443        crc32,
444        ae2_encrypted,
445    ))))
446}
447
448pub(crate) fn make_symlink<T>(
449    outpath: &Path,
450    target: &[u8],
451    #[allow(unused)] existing_files: &IndexMap<Box<str>, T>,
452) -> ZipResult<()> {
453    let Ok(target_str) = std::str::from_utf8(target) else {
454        return Err(invalid!("Invalid UTF-8 as symlink target"));
455    };
456
457    #[cfg(not(any(unix, windows)))]
458    {
459        use std::fs::File;
460        let output = File::create(outpath);
461        output?.write_all(target)?;
462    }
463    #[cfg(unix)]
464    {
465        std::os::unix::fs::symlink(Path::new(&target_str), outpath)?;
466    }
467    #[cfg(windows)]
468    {
469        let target = Path::new(OsStr::new(&target_str));
470        let target_is_dir_from_archive =
471            existing_files.contains_key(target_str) && is_dir(target_str);
472        let target_is_dir = if target_is_dir_from_archive {
473            true
474        } else if let Ok(meta) = std::fs::metadata(target) {
475            meta.is_dir()
476        } else {
477            false
478        };
479        if target_is_dir {
480            std::os::windows::fs::symlink_dir(target, outpath)?;
481        } else {
482            std::os::windows::fs::symlink_file(target, outpath)?;
483        }
484    }
485    Ok(())
486}
487
488#[derive(Debug)]
489pub(crate) struct CentralDirectoryInfo {
490    pub(crate) archive_offset: u64,
491    pub(crate) directory_start: u64,
492    pub(crate) number_of_files: usize,
493    pub(crate) disk_number: u32,
494    pub(crate) disk_with_central_directory: u32,
495}
496
497impl<'a> TryFrom<&'a CentralDirectoryEndInfo> for CentralDirectoryInfo {
498    type Error = ZipError;
499
500    fn try_from(value: &'a CentralDirectoryEndInfo) -> Result<Self, Self::Error> {
501        let (relative_cd_offset, number_of_files, disk_number, disk_with_central_directory) =
502            match &value.eocd64 {
503                Some(DataAndPosition { data: eocd64, .. }) => {
504                    if eocd64.number_of_files_on_this_disk > eocd64.number_of_files {
505                        return Err(invalid!("ZIP64 footer indicates more files on this disk than in the whole archive"));
506                    }
507                    (
508                        eocd64.central_directory_offset,
509                        eocd64.number_of_files as usize,
510                        eocd64.disk_number,
511                        eocd64.disk_with_central_directory,
512                    )
513                }
514                _ => (
515                    value.eocd.data.central_directory_offset as u64,
516                    value.eocd.data.number_of_files_on_this_disk as usize,
517                    value.eocd.data.disk_number as u32,
518                    value.eocd.data.disk_with_central_directory as u32,
519                ),
520            };
521
522        let directory_start = relative_cd_offset
523            .checked_add(value.archive_offset)
524            .ok_or(invalid!("Invalid central directory size or offset"))?;
525
526        Ok(Self {
527            archive_offset: value.archive_offset,
528            directory_start,
529            number_of_files,
530            disk_number,
531            disk_with_central_directory,
532        })
533    }
534}
535
536impl<R> ZipArchive<R> {
537    pub(crate) fn from_finalized_writer(
538        files: IndexMap<Box<str>, ZipFileData>,
539        comment: Box<[u8]>,
540        zip64_comment: Option<Box<[u8]>>,
541        reader: R,
542        central_start: u64,
543    ) -> ZipResult<Self> {
544        let initial_offset = match files.first() {
545            Some((_, file)) => file.header_start,
546            None => central_start,
547        };
548        let shared = Arc::new(Shared {
549            files,
550            offset: initial_offset,
551            dir_start: central_start,
552            config: Config {
553                archive_offset: ArchiveOffset::Known(initial_offset),
554            },
555            comment,
556            zip64_comment,
557        });
558        Ok(Self { reader, shared })
559    }
560
561    /// Total size of the files in the archive, if it can be known. Doesn't include directories or
562    /// metadata.
563    pub fn decompressed_size(&self) -> Option<u128> {
564        let mut total = 0u128;
565        for file in self.shared.files.values() {
566            if file.using_data_descriptor {
567                return None;
568            }
569            total = total.checked_add(file.uncompressed_size as u128)?;
570        }
571        Some(total)
572    }
573}
574
575impl<R: Read + Seek> ZipArchive<R> {
576    pub(crate) fn merge_contents<W: Write + Seek>(
577        &mut self,
578        mut w: W,
579    ) -> ZipResult<IndexMap<Box<str>, ZipFileData>> {
580        if self.shared.files.is_empty() {
581            return Ok(IndexMap::new());
582        }
583        let mut new_files = self.shared.files.clone();
584        /* The first file header will probably start at the beginning of the file, but zip doesn't
585         * enforce that, and executable zips like PEX files will have a shebang line so will
586         * definitely be greater than 0.
587         *
588         * assert_eq!(0, new_files[0].header_start); // Avoid this.
589         */
590
591        let first_new_file_header_start = w.stream_position()?;
592
593        /* Push back file header starts for all entries in the covered files. */
594        new_files.values_mut().try_for_each(|f| {
595            /* This is probably the only really important thing to change. */
596            f.header_start = f
597                .header_start
598                .checked_add(first_new_file_header_start)
599                .ok_or(invalid!(
600                    "new header start from merge would have been too large"
601                ))?;
602            /* This is only ever used internally to cache metadata lookups (it's not part of the
603             * zip spec), and 0 is the sentinel value. */
604            f.central_header_start = 0;
605            /* This is an atomic variable so it can be updated from another thread in the
606             * implementation (which is good!). */
607            if let Some(old_data_start) = f.data_start.take() {
608                let new_data_start = old_data_start
609                    .checked_add(first_new_file_header_start)
610                    .ok_or(invalid!(
611                        "new data start from merge would have been too large"
612                    ))?;
613                f.data_start.get_or_init(|| new_data_start);
614            }
615            Ok::<_, ZipError>(())
616        })?;
617
618        /* Rewind to the beginning of the file.
619         *
620         * NB: we *could* decide to start copying from new_files[0].header_start instead, which
621         * would avoid copying over e.g. any pex shebangs or other file contents that start before
622         * the first zip file entry. However, zip files actually shouldn't care about garbage data
623         * in *between* real entries, since the central directory header records the correct start
624         * location of each, and keeping track of that math is more complicated logic that will only
625         * rarely be used, since most zips that get merged together are likely to be produced
626         * specifically for that purpose (and therefore are unlikely to have a shebang or other
627         * preface). Finally, this preserves any data that might actually be useful.
628         */
629        self.reader.rewind()?;
630        /* Find the end of the file data. */
631        let length_to_read = self.shared.dir_start;
632        /* Produce a Read that reads bytes up until the start of the central directory header.
633         * This "as &mut dyn Read" trick is used elsewhere to avoid having to clone the underlying
634         * handle, which it really shouldn't need to anyway. */
635        let mut limited_raw = (&mut self.reader as &mut dyn Read).take(length_to_read);
636        /* Copy over file data from source archive directly. */
637        io::copy(&mut limited_raw, &mut w)?;
638
639        /* Return the files we've just written to the data stream. */
640        Ok(new_files)
641    }
642
643    /// Get the directory start offset and number of files. This is done in a
644    /// separate function to ease the control flow design.
645    pub(crate) fn get_metadata(config: Config, reader: &mut R) -> ZipResult<Shared> {
646        // End of the probed region, initially set to the end of the file
647        let file_len = reader.seek(io::SeekFrom::End(0))?;
648        let mut end_exclusive = file_len;
649
650        loop {
651            // Find the EOCD and possibly EOCD64 entries and determine the archive offset.
652            let cde = spec::find_central_directory(
653                reader,
654                config.archive_offset,
655                end_exclusive,
656                file_len,
657            )?;
658
659            // Turn EOCD into internal representation.
660            let Ok(shared) = CentralDirectoryInfo::try_from(&cde)
661                .and_then(|info| Self::read_central_header(info, config, reader))
662            else {
663                // The next EOCD candidate should start before the current one.
664                end_exclusive = cde.eocd.position;
665                continue;
666            };
667
668            return Ok(shared.build(
669                cde.eocd.data.zip_file_comment,
670                cde.eocd64.map(|v| v.data.extensible_data_sector),
671            ));
672        }
673    }
674
675    fn read_central_header(
676        dir_info: CentralDirectoryInfo,
677        config: Config,
678        reader: &mut R,
679    ) -> Result<SharedBuilder, ZipError> {
680        // If the parsed number of files is greater than the offset then
681        // something fishy is going on and we shouldn't trust number_of_files.
682        let file_capacity = if dir_info.number_of_files > dir_info.directory_start as usize {
683            0
684        } else {
685            dir_info.number_of_files
686        };
687
688        if dir_info.disk_number != dir_info.disk_with_central_directory {
689            return unsupported_zip_error("Support for multi-disk files is not implemented");
690        }
691
692        if file_capacity.saturating_mul(size_of::<ZipFileData>()) > isize::MAX as usize {
693            return unsupported_zip_error("Oversized central directory");
694        }
695
696        let mut files = Vec::with_capacity(file_capacity);
697        reader.seek(SeekFrom::Start(dir_info.directory_start))?;
698        for _ in 0..dir_info.number_of_files {
699            let file = central_header_to_zip_file(reader, &dir_info)?;
700            files.push(file);
701        }
702
703        Ok(SharedBuilder {
704            files,
705            offset: dir_info.archive_offset,
706            dir_start: dir_info.directory_start,
707            config,
708        })
709    }
710
711    /// Returns the verification value and salt for the AES encryption of the file
712    ///
713    /// It fails if the file number is invalid.
714    ///
715    /// # Returns
716    ///
717    /// - None if the file is not encrypted with AES
718    #[cfg(feature = "aes-crypto")]
719    pub fn get_aes_verification_key_and_salt(
720        &mut self,
721        file_number: usize,
722    ) -> ZipResult<Option<AesInfo>> {
723        let (_, data) = self
724            .shared
725            .files
726            .get_index(file_number)
727            .ok_or(ZipError::FileNotFound)?;
728
729        let limit_reader = find_content(data, &mut self.reader)?;
730        match data.aes_mode {
731            None => Ok(None),
732            Some((aes_mode, _, _)) => {
733                let (verification_value, salt) =
734                    AesReader::new(limit_reader, aes_mode, data.compressed_size)
735                        .get_verification_value_and_salt()?;
736                let aes_info = AesInfo {
737                    aes_mode,
738                    verification_value,
739                    salt,
740                };
741                Ok(Some(aes_info))
742            }
743        }
744    }
745
746    /// Read a ZIP archive, collecting the files it contains.
747    ///
748    /// This uses the central directory record of the ZIP file, and ignores local file headers.
749    ///
750    /// A default [`Config`] is used.
751    pub fn new(reader: R) -> ZipResult<ZipArchive<R>> {
752        Self::with_config(Default::default(), reader)
753    }
754
755    /// Read a ZIP archive providing a read configuration, collecting the files it contains.
756    ///
757    /// This uses the central directory record of the ZIP file, and ignores local file headers.
758    pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
759        let shared = Self::get_metadata(config, &mut reader)?;
760
761        Ok(ZipArchive {
762            reader,
763            shared: shared.into(),
764        })
765    }
766
767    /// Extract a Zip archive into a directory, overwriting files if they
768    /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. Symbolic links are only
769    /// created and followed if the target is within the destination directory (this is checked
770    /// conservatively using [`std::fs::canonicalize`]).
771    ///
772    /// Extraction is not atomic. If an error is encountered, some of the files
773    /// may be left on disk. However, on Unix targets, no newly-created directories with part but
774    /// not all of their contents extracted will be readable, writable or usable as process working
775    /// directories by any non-root user except you.
776    ///
777    /// On Unix and Windows, symbolic links are extracted correctly. On other platforms such as
778    /// WebAssembly, symbolic links aren't supported, so they're extracted as normal files
779    /// containing the target path in UTF-8.
780    pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
781        self.extract_internal(directory, None::<fn(&Path) -> bool>)
782    }
783
784    /// Extracts a Zip archive into a directory in the same fashion as
785    /// [`ZipArchive::extract`], but detects a "root" directory in the archive
786    /// (a single top-level directory that contains the rest of the archive's
787    /// entries) and extracts its contents directly.
788    ///
789    /// For a sensible default `filter`, you can use [`root_dir_common_filter`].
790    /// For a custom `filter`, see [`RootDirFilter`].
791    ///
792    /// See [`ZipArchive::root_dir`] for more information on how the root
793    /// directory is detected and the meaning of the `filter` parameter.
794    ///
795    /// ## Example
796    ///
797    /// Imagine a Zip archive with the following structure:
798    ///
799    /// ```text
800    /// root/file1.txt
801    /// root/file2.txt
802    /// root/sub/file3.txt
803    /// root/sub/subsub/file4.txt
804    /// ```
805    ///
806    /// If the archive is extracted to `foo` using [`ZipArchive::extract`],
807    /// the resulting directory structure will be:
808    ///
809    /// ```text
810    /// foo/root/file1.txt
811    /// foo/root/file2.txt
812    /// foo/root/sub/file3.txt
813    /// foo/root/sub/subsub/file4.txt
814    /// ```
815    ///
816    /// If the archive is extracted to `foo` using
817    /// [`ZipArchive::extract_unwrapped_root_dir`], the resulting directory
818    /// structure will be:
819    ///
820    /// ```text
821    /// foo/file1.txt
822    /// foo/file2.txt
823    /// foo/sub/file3.txt
824    /// foo/sub/subsub/file4.txt
825    /// ```
826    ///
827    /// ## Example - No Root Directory
828    ///
829    /// Imagine a Zip archive with the following structure:
830    ///
831    /// ```text
832    /// root/file1.txt
833    /// root/file2.txt
834    /// root/sub/file3.txt
835    /// root/sub/subsub/file4.txt
836    /// other/file5.txt
837    /// ```
838    ///
839    /// Due to the presence of the `other` directory,
840    /// [`ZipArchive::extract_unwrapped_root_dir`] will extract this in the same
841    /// fashion as [`ZipArchive::extract`] as there is now no "root directory."
842    pub fn extract_unwrapped_root_dir<P: AsRef<Path>>(
843        &mut self,
844        directory: P,
845        root_dir_filter: impl RootDirFilter,
846    ) -> ZipResult<()> {
847        self.extract_internal(directory, Some(root_dir_filter))
848    }
849
850    fn extract_internal<P: AsRef<Path>>(
851        &mut self,
852        directory: P,
853        root_dir_filter: Option<impl RootDirFilter>,
854    ) -> ZipResult<()> {
855        use std::fs;
856
857        create_dir_all(&directory)?;
858        let directory = directory.as_ref().canonicalize()?;
859
860        let root_dir = root_dir_filter
861            .and_then(|filter| {
862                self.root_dir(&filter)
863                    .transpose()
864                    .map(|root_dir| root_dir.map(|root_dir| (root_dir, filter)))
865            })
866            .transpose()?;
867
868        // If we have a root dir, simplify the path components to be more
869        // appropriate for passing to `safe_prepare_path`
870        let root_dir = root_dir
871            .as_ref()
872            .map(|(root_dir, filter)| {
873                crate::path::simplified_components(root_dir)
874                    .ok_or_else(|| {
875                        // Should be unreachable
876                        debug_assert!(false, "Invalid root dir path");
877
878                        invalid!("Invalid root dir path")
879                    })
880                    .map(|root_dir| (root_dir, filter))
881            })
882            .transpose()?;
883
884        #[cfg(unix)]
885        let mut files_by_unix_mode = Vec::new();
886
887        for i in 0..self.len() {
888            let mut file = self.by_index(i)?;
889
890            let mut outpath = directory.clone();
891            file.safe_prepare_path(directory.as_ref(), &mut outpath, root_dir.as_ref())?;
892
893            let symlink_target = if file.is_symlink() && (cfg!(unix) || cfg!(windows)) {
894                let mut target = Vec::with_capacity(file.size() as usize);
895                file.read_to_end(&mut target)?;
896                Some(target)
897            } else {
898                if file.is_dir() {
899                    crate::read::make_writable_dir_all(&outpath)?;
900                    continue;
901                }
902                None
903            };
904
905            drop(file);
906
907            if let Some(target) = symlink_target {
908                make_symlink(&outpath, &target, &self.shared.files)?;
909                continue;
910            }
911            let mut file = self.by_index(i)?;
912            let mut outfile = fs::File::create(&outpath)?;
913
914            io::copy(&mut file, &mut outfile)?;
915            #[cfg(unix)]
916            {
917                // Check for real permissions, which we'll set in a second pass
918                if let Some(mode) = file.unix_mode() {
919                    files_by_unix_mode.push((outpath.clone(), mode));
920                }
921            }
922            #[cfg(feature = "chrono")]
923            {
924                // Set original timestamp.
925                if let Some(last_modified) = file.last_modified() {
926                    if let Some(t) = datetime_to_systemtime(&last_modified) {
927                        outfile.set_modified(t)?;
928                    }
929                }
930            }
931        }
932        #[cfg(unix)]
933        {
934            use std::cmp::Reverse;
935            use std::os::unix::fs::PermissionsExt;
936
937            if files_by_unix_mode.len() > 1 {
938                // Ensure we update children's permissions before making a parent unwritable
939                files_by_unix_mode.sort_by_key(|(path, _)| Reverse(path.clone()));
940            }
941            for (path, mode) in files_by_unix_mode.into_iter() {
942                fs::set_permissions(&path, fs::Permissions::from_mode(mode))?;
943            }
944        }
945        Ok(())
946    }
947
948    /// Number of files contained in this zip.
949    pub fn len(&self) -> usize {
950        self.shared.files.len()
951    }
952
953    /// Get the starting offset of the zip central directory.
954    pub fn central_directory_start(&self) -> u64 {
955        self.shared.dir_start
956    }
957
958    /// Whether this zip archive contains no files
959    pub fn is_empty(&self) -> bool {
960        self.len() == 0
961    }
962
963    /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
964    ///
965    /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
966    /// of that prepended data.
967    pub fn offset(&self) -> u64 {
968        self.shared.offset
969    }
970
971    /// Get the comment of the zip archive.
972    pub fn comment(&self) -> &[u8] {
973        &self.shared.comment
974    }
975
976    /// Get the ZIP64 comment of the zip archive, if it is ZIP64.
977    pub fn zip64_comment(&self) -> Option<&[u8]> {
978        self.shared.zip64_comment.as_deref()
979    }
980
981    /// Returns an iterator over all the file and directory names in this archive.
982    pub fn file_names(&self) -> impl Iterator<Item = &str> {
983        self.shared.files.keys().map(|s| s.as_ref())
984    }
985
986    /// Returns Ok(true) if any compressed data in this archive belongs to more than one file. This
987    /// doesn't make the archive invalid, but some programs will refuse to decompress it because the
988    /// copies would take up space independently in the destination.
989    pub fn has_overlapping_files(&mut self) -> ZipResult<bool> {
990        let mut ranges = Vec::<Range<u64>>::with_capacity(self.shared.files.len());
991        for file in self.shared.files.values() {
992            if file.compressed_size == 0 {
993                continue;
994            }
995            let start = file.data_start(&mut self.reader)?;
996            let end = start + file.compressed_size;
997            if ranges
998                .iter()
999                .any(|range| range.start <= end && start <= range.end)
1000            {
1001                return Ok(true);
1002            }
1003            ranges.push(start..end);
1004        }
1005        Ok(false)
1006    }
1007
1008    /// Search for a file entry by name, decrypt with given password
1009    ///
1010    /// # Warning
1011    ///
1012    /// The implementation of the cryptographic algorithms has not
1013    /// gone through a correctness review, and you should assume it is insecure:
1014    /// passwords used with this API may be compromised.
1015    ///
1016    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1017    /// to check for a 1/256 chance that the password is correct.
1018    /// There are many passwords out there that will also pass the validity checks
1019    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1020    /// due to its fairly primitive approach to cryptography.
1021    pub fn by_name_decrypt(&mut self, name: &str, password: &[u8]) -> ZipResult<ZipFile<'_, R>> {
1022        self.by_name_with_optional_password(name, Some(password))
1023    }
1024
1025    /// Search for a file entry by name
1026    pub fn by_name(&mut self, name: &str) -> ZipResult<ZipFile<'_, R>> {
1027        self.by_name_with_optional_password(name, None)
1028    }
1029
1030    /// Get the index of a file entry by name, if it's present.
1031    #[inline(always)]
1032    pub fn index_for_name(&self, name: &str) -> Option<usize> {
1033        self.shared.files.get_index_of(name)
1034    }
1035
1036    /// Search for a file entry by path, decrypt with given password
1037    ///
1038    /// # Warning
1039    ///
1040    /// The implementation of the cryptographic algorithms has not
1041    /// gone through a correctness review, and you should assume it is insecure:
1042    /// passwords used with this API may be compromised.
1043    ///
1044    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1045    /// to check for a 1/256 chance that the password is correct.
1046    /// There are many passwords out there that will also pass the validity checks
1047    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1048    /// due to its fairly primitive approach to cryptography.
1049    pub fn by_path_decrypt<T: AsRef<Path>>(
1050        &mut self,
1051        path: T,
1052        password: &[u8],
1053    ) -> ZipResult<ZipFile<'_, R>> {
1054        self.index_for_path(path)
1055            .ok_or(ZipError::FileNotFound)
1056            .and_then(|index| {
1057                self.by_index_with_options(index, ZipReadOptions::new().password(Some(password)))
1058            })
1059    }
1060
1061    /// Search for a file entry by path
1062    pub fn by_path<T: AsRef<Path>>(&mut self, path: T) -> ZipResult<ZipFile<'_, R>> {
1063        self.index_for_path(path)
1064            .ok_or(ZipError::FileNotFound)
1065            .and_then(|index| self.by_index_with_options(index, ZipReadOptions::new()))
1066    }
1067
1068    /// Get the index of a file entry by path, if it's present.
1069    #[inline(always)]
1070    pub fn index_for_path<T: AsRef<Path>>(&self, path: T) -> Option<usize> {
1071        self.index_for_name(&path_to_string(path))
1072    }
1073
1074    /// Get the name of a file entry, if it's present.
1075    #[inline(always)]
1076    pub fn name_for_index(&self, index: usize) -> Option<&str> {
1077        self.shared
1078            .files
1079            .get_index(index)
1080            .map(|(name, _)| name.as_ref())
1081    }
1082
1083    /// Search for a file entry by name and return a seekable object.
1084    pub fn by_name_seek(&mut self, name: &str) -> ZipResult<ZipFileSeek<'_, R>> {
1085        self.by_index_seek(self.index_for_name(name).ok_or(ZipError::FileNotFound)?)
1086    }
1087
1088    /// Search for a file entry by index and return a seekable object.
1089    pub fn by_index_seek(&mut self, index: usize) -> ZipResult<ZipFileSeek<'_, R>> {
1090        let reader = &mut self.reader;
1091        self.shared
1092            .files
1093            .get_index(index)
1094            .ok_or(ZipError::FileNotFound)
1095            .and_then(move |(_, data)| {
1096                let seek_reader = match data.compression_method {
1097                    CompressionMethod::Stored => {
1098                        ZipFileSeekReader::Raw(find_content_seek(data, reader)?)
1099                    }
1100                    _ => {
1101                        return Err(ZipError::UnsupportedArchive(
1102                            "Seekable compressed files are not yet supported",
1103                        ))
1104                    }
1105                };
1106                Ok(ZipFileSeek {
1107                    reader: seek_reader,
1108                    data: Cow::Borrowed(data),
1109                })
1110            })
1111    }
1112
1113    fn by_name_with_optional_password<'a>(
1114        &'a mut self,
1115        name: &str,
1116        password: Option<&[u8]>,
1117    ) -> ZipResult<ZipFile<'a, R>> {
1118        let Some(index) = self.shared.files.get_index_of(name) else {
1119            return Err(ZipError::FileNotFound);
1120        };
1121        self.by_index_with_options(index, ZipReadOptions::new().password(password))
1122    }
1123
1124    /// Get a contained file by index, decrypt with given password
1125    ///
1126    /// # Warning
1127    ///
1128    /// The implementation of the cryptographic algorithms has not
1129    /// gone through a correctness review, and you should assume it is insecure:
1130    /// passwords used with this API may be compromised.
1131    ///
1132    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1133    /// to check for a 1/256 chance that the password is correct.
1134    /// There are many passwords out there that will also pass the validity checks
1135    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1136    /// due to its fairly primitive approach to cryptography.
1137    pub fn by_index_decrypt(
1138        &mut self,
1139        file_number: usize,
1140        password: &[u8],
1141    ) -> ZipResult<ZipFile<'_, R>> {
1142        self.by_index_with_options(file_number, ZipReadOptions::new().password(Some(password)))
1143    }
1144
1145    /// Get a contained file by index
1146    pub fn by_index(&mut self, file_number: usize) -> ZipResult<ZipFile<'_, R>> {
1147        self.by_index_with_options(file_number, ZipReadOptions::new())
1148    }
1149
1150    /// Get a contained file by index without decompressing it
1151    pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_, R>> {
1152        let reader = &mut self.reader;
1153        let (_, data) = self
1154            .shared
1155            .files
1156            .get_index(file_number)
1157            .ok_or(ZipError::FileNotFound)?;
1158        Ok(ZipFile {
1159            reader: ZipFileReader::Raw(find_content(data, reader)?),
1160            data: Cow::Borrowed(data),
1161        })
1162    }
1163
1164    /// Get a contained file by index with options.
1165    pub fn by_index_with_options(
1166        &mut self,
1167        file_number: usize,
1168        mut options: ZipReadOptions<'_>,
1169    ) -> ZipResult<ZipFile<'_, R>> {
1170        let (_, data) = self
1171            .shared
1172            .files
1173            .get_index(file_number)
1174            .ok_or(ZipError::FileNotFound)?;
1175
1176        if options.ignore_encryption_flag {
1177            // Always use no password when we're ignoring the encryption flag.
1178            options.password = None;
1179        } else {
1180            // Require and use the password only if the file is encrypted.
1181            match (options.password, data.encrypted) {
1182                (None, true) => {
1183                    return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED))
1184                }
1185                // Password supplied, but none needed! Discard.
1186                (Some(_), false) => options.password = None,
1187                _ => {}
1188            }
1189        }
1190        let limit_reader = find_content(data, &mut self.reader)?;
1191
1192        let crypto_reader =
1193            make_crypto_reader(data, limit_reader, options.password, data.aes_mode)?;
1194
1195        Ok(ZipFile {
1196            data: Cow::Borrowed(data),
1197            reader: make_reader(
1198                data.compression_method,
1199                data.uncompressed_size,
1200                data.crc32,
1201                crypto_reader,
1202                data.flags,
1203            )?,
1204        })
1205    }
1206
1207    /// Find the "root directory" of an archive if it exists, filtering out
1208    /// irrelevant entries when searching.
1209    ///
1210    /// Our definition of a "root directory" is a single top-level directory
1211    /// that contains the rest of the archive's entries. This is useful for
1212    /// extracting archives that contain a single top-level directory that
1213    /// you want to "unwrap" and extract directly.
1214    ///
1215    /// For a sensible default filter, you can use [`root_dir_common_filter`].
1216    /// For a custom filter, see [`RootDirFilter`].
1217    pub fn root_dir(&self, filter: impl RootDirFilter) -> ZipResult<Option<PathBuf>> {
1218        let mut root_dir: Option<PathBuf> = None;
1219
1220        for i in 0..self.len() {
1221            let (_, file) = self
1222                .shared
1223                .files
1224                .get_index(i)
1225                .ok_or(ZipError::FileNotFound)?;
1226
1227            let path = match file.enclosed_name() {
1228                Some(path) => path,
1229                None => return Ok(None),
1230            };
1231
1232            if !filter(&path) {
1233                continue;
1234            }
1235
1236            macro_rules! replace_root_dir {
1237                ($path:ident) => {
1238                    match &mut root_dir {
1239                        Some(root_dir) => {
1240                            if *root_dir != $path {
1241                                // We've found multiple root directories,
1242                                // abort.
1243                                return Ok(None);
1244                            } else {
1245                                continue;
1246                            }
1247                        }
1248
1249                        None => {
1250                            root_dir = Some($path.into());
1251                            continue;
1252                        }
1253                    }
1254                };
1255            }
1256
1257            // If this entry is located at the root of the archive...
1258            if path.components().count() == 1 {
1259                if file.is_dir() {
1260                    // If it's a directory, it could be the root directory.
1261                    replace_root_dir!(path);
1262                } else {
1263                    // If it's anything else, this archive does not have a
1264                    // root directory.
1265                    return Ok(None);
1266                }
1267            }
1268
1269            // Find the root directory for this entry.
1270            let mut path = path.as_path();
1271            while let Some(parent) = path.parent().filter(|path| *path != Path::new("")) {
1272                path = parent;
1273            }
1274
1275            replace_root_dir!(path);
1276        }
1277
1278        Ok(root_dir)
1279    }
1280
1281    /// Unwrap and return the inner reader object
1282    ///
1283    /// The position of the reader is undefined.
1284    pub fn into_inner(self) -> R {
1285        self.reader
1286    }
1287}
1288
1289/// Holds the AES information of a file in the zip archive
1290#[derive(Debug)]
1291#[cfg(feature = "aes-crypto")]
1292pub struct AesInfo {
1293    /// The AES encryption mode
1294    pub aes_mode: AesMode,
1295    /// The verification key
1296    pub verification_value: [u8; PWD_VERIFY_LENGTH],
1297    /// The salt
1298    pub salt: Vec<u8>,
1299}
1300
1301const fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
1302    Err(ZipError::UnsupportedArchive(detail))
1303}
1304
1305/// Parse a central directory entry to collect the information for the file.
1306pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
1307    reader: &mut R,
1308    central_directory: &CentralDirectoryInfo,
1309) -> ZipResult<ZipFileData> {
1310    let central_header_start = reader.stream_position()?;
1311
1312    // Parse central header
1313    let block = ZipCentralEntryBlock::parse(reader)?;
1314
1315    let file = central_header_to_zip_file_inner(
1316        reader,
1317        central_directory.archive_offset,
1318        central_header_start,
1319        block,
1320    )?;
1321
1322    let central_header_end = reader.stream_position()?;
1323
1324    reader.seek(SeekFrom::Start(central_header_end))?;
1325    Ok(file)
1326}
1327
1328#[inline]
1329fn read_variable_length_byte_field<R: Read>(reader: &mut R, len: usize) -> io::Result<Box<[u8]>> {
1330    let mut data = vec![0; len].into_boxed_slice();
1331    reader.read_exact(&mut data)?;
1332    Ok(data)
1333}
1334
1335/// Parse a central directory entry to collect the information for the file.
1336fn central_header_to_zip_file_inner<R: Read>(
1337    reader: &mut R,
1338    archive_offset: u64,
1339    central_header_start: u64,
1340    block: ZipCentralEntryBlock,
1341) -> ZipResult<ZipFileData> {
1342    let ZipCentralEntryBlock {
1343        // magic,
1344        version_made_by,
1345        // version_to_extract,
1346        flags,
1347        compression_method,
1348        last_mod_time,
1349        last_mod_date,
1350        crc32,
1351        compressed_size,
1352        uncompressed_size,
1353        file_name_length,
1354        extra_field_length,
1355        file_comment_length,
1356        // disk_number,
1357        // internal_file_attributes,
1358        external_file_attributes,
1359        offset,
1360        ..
1361    } = block;
1362
1363    let encrypted = flags & 1 == 1;
1364    let is_utf8 = flags & (1 << 11) != 0;
1365    let using_data_descriptor = flags & (1 << 3) != 0;
1366
1367    let file_name_raw = read_variable_length_byte_field(reader, file_name_length as usize)?;
1368    let extra_field = read_variable_length_byte_field(reader, extra_field_length as usize)?;
1369    let file_comment_raw = read_variable_length_byte_field(reader, file_comment_length as usize)?;
1370    let file_name: Box<str> = match is_utf8 {
1371        true => String::from_utf8_lossy(&file_name_raw).into(),
1372        false => file_name_raw.clone().from_cp437(),
1373    };
1374    let file_comment: Box<str> = match is_utf8 {
1375        true => String::from_utf8_lossy(&file_comment_raw).into(),
1376        false => file_comment_raw.from_cp437(),
1377    };
1378
1379    // Construct the result
1380    let mut result = ZipFileData {
1381        system: System::from((version_made_by >> 8) as u8),
1382        /* NB: this strips the top 8 bits! */
1383        version_made_by: version_made_by as u8,
1384        encrypted,
1385        using_data_descriptor,
1386        is_utf8,
1387        compression_method: CompressionMethod::parse_from_u16(compression_method),
1388        compression_level: None,
1389        last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
1390        crc32,
1391        compressed_size: compressed_size.into(),
1392        uncompressed_size: uncompressed_size.into(),
1393        flags,
1394        file_name,
1395        file_name_raw,
1396        extra_field: Some(Arc::new(extra_field.to_vec())),
1397        central_extra_field: None,
1398        file_comment,
1399        header_start: offset.into(),
1400        extra_data_start: None,
1401        central_header_start,
1402        data_start: OnceLock::new(),
1403        external_attributes: external_file_attributes,
1404        large_file: false,
1405        aes_mode: None,
1406        aes_extra_data_start: 0,
1407        extra_fields: Vec::new(),
1408    };
1409    match parse_extra_field(&mut result) {
1410        Ok(stripped_extra_field) => {
1411            result.extra_field = stripped_extra_field;
1412        }
1413        Err(ZipError::Io(..)) => {}
1414        Err(e) => return Err(e),
1415    }
1416
1417    let aes_enabled = result.compression_method == CompressionMethod::AES;
1418    if aes_enabled && result.aes_mode.is_none() {
1419        return Err(invalid!("AES encryption without AES extra data field"));
1420    }
1421
1422    // Account for shifted zip offsets.
1423    result.header_start = result
1424        .header_start
1425        .checked_add(archive_offset)
1426        .ok_or(invalid!("Archive header is too large"))?;
1427
1428    Ok(result)
1429}
1430
1431pub(crate) fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<Option<Arc<Vec<u8>>>> {
1432    let Some(ref extra_field) = file.extra_field else {
1433        return Ok(None);
1434    };
1435    let extra_field = extra_field.clone();
1436    let mut processed_extra_field = extra_field.clone();
1437    let len = extra_field.len();
1438    let mut reader = io::Cursor::new(&**extra_field);
1439
1440    /* TODO: codify this structure into Zip64ExtraFieldBlock fields! */
1441    let mut position = reader.position() as usize;
1442    while (position) < len {
1443        let old_position = position;
1444        let remove = parse_single_extra_field(file, &mut reader, position as u64, false)?;
1445        position = reader.position() as usize;
1446        if remove {
1447            let remaining = len - (position - old_position);
1448            if remaining == 0 {
1449                return Ok(None);
1450            }
1451            let mut new_extra_field = Vec::with_capacity(remaining);
1452            new_extra_field.extend_from_slice(&extra_field[0..old_position]);
1453            new_extra_field.extend_from_slice(&extra_field[position..]);
1454            processed_extra_field = Arc::new(new_extra_field);
1455        }
1456    }
1457    Ok(Some(processed_extra_field))
1458}
1459
1460pub(crate) fn parse_single_extra_field<R: Read>(
1461    file: &mut ZipFileData,
1462    reader: &mut R,
1463    bytes_already_read: u64,
1464    disallow_zip64: bool,
1465) -> ZipResult<bool> {
1466    let kind = reader.read_u16_le()?;
1467    let len = reader.read_u16_le()?;
1468    match kind {
1469        // Zip64 extended information extra field
1470        0x0001 => {
1471            if disallow_zip64 {
1472                return Err(invalid!("Can't write a custom field using the ZIP64 ID"));
1473            }
1474            file.large_file = true;
1475            let mut consumed_len = 0;
1476            if len >= 24 || file.uncompressed_size == spec::ZIP64_BYTES_THR {
1477                file.uncompressed_size = reader.read_u64_le()?;
1478                consumed_len += size_of::<u64>();
1479            }
1480            if len >= 24 || file.compressed_size == spec::ZIP64_BYTES_THR {
1481                file.compressed_size = reader.read_u64_le()?;
1482                consumed_len += size_of::<u64>();
1483            }
1484            if len >= 24 || file.header_start == spec::ZIP64_BYTES_THR {
1485                file.header_start = reader.read_u64_le()?;
1486                consumed_len += size_of::<u64>();
1487            }
1488            let Some(leftover_len) = (len as usize).checked_sub(consumed_len) else {
1489                return Err(invalid!("ZIP64 extra-data field is the wrong length"));
1490            };
1491            reader.read_exact(&mut vec![0u8; leftover_len])?;
1492            return Ok(true);
1493        }
1494        0x000a => {
1495            // NTFS extra field
1496            file.extra_fields
1497                .push(ExtraField::Ntfs(Ntfs::try_from_reader(reader, len)?));
1498        }
1499        0x9901 => {
1500            // AES
1501            if len != 7 {
1502                return Err(ZipError::UnsupportedArchive(
1503                    "AES extra data field has an unsupported length",
1504                ));
1505            }
1506            let vendor_version = reader.read_u16_le()?;
1507            let vendor_id = reader.read_u16_le()?;
1508            let mut out = [0u8];
1509            reader.read_exact(&mut out)?;
1510            let aes_mode = out[0];
1511            let compression_method = CompressionMethod::parse_from_u16(reader.read_u16_le()?);
1512
1513            if vendor_id != 0x4541 {
1514                return Err(invalid!("Invalid AES vendor"));
1515            }
1516            let vendor_version = match vendor_version {
1517                0x0001 => AesVendorVersion::Ae1,
1518                0x0002 => AesVendorVersion::Ae2,
1519                _ => return Err(invalid!("Invalid AES vendor version")),
1520            };
1521            match aes_mode {
1522                0x01 => file.aes_mode = Some((AesMode::Aes128, vendor_version, compression_method)),
1523                0x02 => file.aes_mode = Some((AesMode::Aes192, vendor_version, compression_method)),
1524                0x03 => file.aes_mode = Some((AesMode::Aes256, vendor_version, compression_method)),
1525                _ => return Err(invalid!("Invalid AES encryption strength")),
1526            };
1527            file.compression_method = compression_method;
1528            file.aes_extra_data_start = bytes_already_read;
1529        }
1530        0x5455 => {
1531            // extended timestamp
1532            // https://libzip.org/specifications/extrafld.txt
1533
1534            file.extra_fields.push(ExtraField::ExtendedTimestamp(
1535                ExtendedTimestamp::try_from_reader(reader, len)?,
1536            ));
1537        }
1538        0x6375 => {
1539            // Info-ZIP Unicode Comment Extra Field
1540            // APPNOTE 4.6.8 and https://libzip.org/specifications/extrafld.txt
1541            file.file_comment = String::from_utf8(
1542                UnicodeExtraField::try_from_reader(reader, len)?
1543                    .unwrap_valid(file.file_comment.as_bytes())?
1544                    .into_vec(),
1545            )?
1546            .into();
1547        }
1548        0x7075 => {
1549            // Info-ZIP Unicode Path Extra Field
1550            // APPNOTE 4.6.9 and https://libzip.org/specifications/extrafld.txt
1551            file.file_name_raw = UnicodeExtraField::try_from_reader(reader, len)?
1552                .unwrap_valid(&file.file_name_raw)?;
1553            file.file_name =
1554                String::from_utf8(file.file_name_raw.clone().into_vec())?.into_boxed_str();
1555            file.is_utf8 = true;
1556        }
1557        _ => {
1558            reader.read_exact(&mut vec![0u8; len as usize])?;
1559            // Other fields are ignored
1560        }
1561    }
1562    Ok(false)
1563}
1564
1565/// A trait for exposing file metadata inside the zip.
1566pub trait HasZipMetadata {
1567    /// Get the file metadata
1568    fn get_metadata(&self) -> &ZipFileData;
1569}
1570
1571/// Options for reading a file from an archive.
1572#[derive(Default)]
1573pub struct ZipReadOptions<'a> {
1574    /// The password to use when decrypting the file.  This is ignored if not required.
1575    password: Option<&'a [u8]>,
1576
1577    /// Ignore the value of the encryption flag and proceed as if the file were plaintext.
1578    ignore_encryption_flag: bool,
1579}
1580
1581impl<'a> ZipReadOptions<'a> {
1582    /// Create a new set of options with the default values.
1583    #[must_use]
1584    pub fn new() -> Self {
1585        Self::default()
1586    }
1587
1588    /// Set the password, if any, to use.  Return for chaining.
1589    #[must_use]
1590    pub fn password(mut self, password: Option<&'a [u8]>) -> Self {
1591        self.password = password;
1592        self
1593    }
1594
1595    /// Set the ignore encryption flag.  Return for chaining.
1596    #[must_use]
1597    pub fn ignore_encryption_flag(mut self, ignore: bool) -> Self {
1598        self.ignore_encryption_flag = ignore;
1599        self
1600    }
1601}
1602
1603/// Methods for retrieving information on zip files
1604impl<'a, R: Read> ZipFile<'a, R> {
1605    pub(crate) fn take_raw_reader(&mut self) -> io::Result<io::Take<&'a mut R>> {
1606        mem::replace(&mut self.reader, ZipFileReader::NoReader).into_inner()
1607    }
1608
1609    /// Get the version of the file
1610    pub fn version_made_by(&self) -> (u8, u8) {
1611        (
1612            self.get_metadata().version_made_by / 10,
1613            self.get_metadata().version_made_by % 10,
1614        )
1615    }
1616
1617    /// Get the name of the file
1618    ///
1619    /// # Warnings
1620    ///
1621    /// It is dangerous to use this name directly when extracting an archive.
1622    /// It may contain an absolute path (`/etc/shadow`), or break out of the
1623    /// current directory (`../runtime`). Carelessly writing to these paths
1624    /// allows an attacker to craft a ZIP archive that will overwrite critical
1625    /// files.
1626    ///
1627    /// You can use the [`ZipFile::enclosed_name`] method to validate the name
1628    /// as a safe path.
1629    pub fn name(&self) -> &str {
1630        &self.get_metadata().file_name
1631    }
1632
1633    /// Get the name of the file, in the raw (internal) byte representation.
1634    ///
1635    /// The encoding of this data is currently undefined.
1636    pub fn name_raw(&self) -> &[u8] {
1637        &self.get_metadata().file_name_raw
1638    }
1639
1640    /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
1641    /// removes a leading '/' and removes '..' parts.
1642    #[deprecated(
1643        since = "0.5.7",
1644        note = "by stripping `..`s from the path, the meaning of paths can change.
1645                `mangled_name` can be used if this behaviour is desirable"
1646    )]
1647    pub fn sanitized_name(&self) -> PathBuf {
1648        self.mangled_name()
1649    }
1650
1651    /// Rewrite the path, ignoring any path components with special meaning.
1652    ///
1653    /// - Absolute paths are made relative
1654    /// - [`ParentDir`]s are ignored
1655    /// - Truncates the filename at a NULL byte
1656    ///
1657    /// This is appropriate if you need to be able to extract *something* from
1658    /// any archive, but will easily misrepresent trivial paths like
1659    /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
1660    /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
1661    ///
1662    /// [`ParentDir`]: `PathBuf::Component::ParentDir`
1663    pub fn mangled_name(&self) -> PathBuf {
1664        self.get_metadata().file_name_sanitized()
1665    }
1666
1667    /// Ensure the file path is safe to use as a [`Path`].
1668    ///
1669    /// - It can't contain NULL bytes
1670    /// - It can't resolve to a path outside the current directory
1671    ///   > `foo/../bar` is fine, `foo/../../bar` is not.
1672    /// - It can't be an absolute path
1673    ///
1674    /// This will read well-formed ZIP files correctly, and is resistant
1675    /// to path-based exploits. It is recommended over
1676    /// [`ZipFile::mangled_name`].
1677    pub fn enclosed_name(&self) -> Option<PathBuf> {
1678        self.get_metadata().enclosed_name()
1679    }
1680
1681    pub(crate) fn simplified_components(&self) -> Option<Vec<&OsStr>> {
1682        self.get_metadata().simplified_components()
1683    }
1684
1685    /// Prepare the path for extraction by creating necessary missing directories and checking for symlinks to be contained within the base path.
1686    ///
1687    /// `base_path` parameter is assumed to be canonicalized.
1688    pub(crate) fn safe_prepare_path(
1689        &self,
1690        base_path: &Path,
1691        outpath: &mut PathBuf,
1692        root_dir: Option<&(Vec<&OsStr>, impl RootDirFilter)>,
1693    ) -> ZipResult<()> {
1694        let components = self
1695            .simplified_components()
1696            .ok_or(invalid!("Invalid file path"))?;
1697
1698        let components = match root_dir {
1699            Some((root_dir, filter)) => match components.strip_prefix(&**root_dir) {
1700                Some(components) => components,
1701
1702                // In this case, we expect that the file was not in the root
1703                // directory, but was filtered out when searching for the
1704                // root directory.
1705                None => {
1706                    // We could technically find ourselves at this code
1707                    // path if the user provides an unstable or
1708                    // non-deterministic `filter` function.
1709                    //
1710                    // If debug assertions are on, we should panic here.
1711                    // Otherwise, the safest thing to do here is to just
1712                    // extract as-is.
1713                    debug_assert!(
1714                        !filter(&PathBuf::from_iter(components.iter())),
1715                        "Root directory filter should not match at this point"
1716                    );
1717
1718                    // Extract as-is.
1719                    &components[..]
1720                }
1721            },
1722
1723            None => &components[..],
1724        };
1725
1726        let components_len = components.len();
1727
1728        for (is_last, component) in components
1729            .iter()
1730            .copied()
1731            .enumerate()
1732            .map(|(i, c)| (i == components_len - 1, c))
1733        {
1734            // we can skip the target directory itself because the base path is assumed to be "trusted" (if the user say extract to a symlink we can follow it)
1735            outpath.push(component);
1736
1737            // check if the path is a symlink, the target must be _inherently_ within the directory
1738            for limit in (0..5u8).rev() {
1739                let meta = match std::fs::symlink_metadata(&outpath) {
1740                    Ok(meta) => meta,
1741                    Err(e) if e.kind() == io::ErrorKind::NotFound => {
1742                        if !is_last {
1743                            crate::read::make_writable_dir_all(&outpath)?;
1744                        }
1745                        break;
1746                    }
1747                    Err(e) => return Err(e.into()),
1748                };
1749
1750                if !meta.is_symlink() {
1751                    break;
1752                }
1753
1754                if limit == 0 {
1755                    return Err(invalid!("Extraction followed a symlink too deep"));
1756                }
1757
1758                // note that we cannot accept links that do not inherently resolve to a path inside the directory to prevent:
1759                // - disclosure of unrelated path exists (no check for a path exist and then ../ out)
1760                // - issues with file-system specific path resolution (case sensitivity, etc)
1761                let target = std::fs::read_link(&outpath)?;
1762
1763                if !crate::path::simplified_components(&target)
1764                    .ok_or(invalid!("Invalid symlink target path"))?
1765                    .starts_with(
1766                        &crate::path::simplified_components(base_path)
1767                            .ok_or(invalid!("Invalid base path"))?,
1768                    )
1769                {
1770                    let is_absolute_enclosed = base_path
1771                        .components()
1772                        .map(Some)
1773                        .chain(std::iter::once(None))
1774                        .zip(target.components().map(Some).chain(std::iter::repeat(None)))
1775                        .all(|(a, b)| match (a, b) {
1776                            // both components are normal
1777                            (Some(Component::Normal(a)), Some(Component::Normal(b))) => a == b,
1778                            // both components consumed fully
1779                            (None, None) => true,
1780                            // target consumed fully but base path is not
1781                            (Some(_), None) => false,
1782                            // base path consumed fully but target is not (and normal)
1783                            (None, Some(Component::CurDir | Component::Normal(_))) => true,
1784                            _ => false,
1785                        });
1786
1787                    if !is_absolute_enclosed {
1788                        return Err(invalid!("Symlink is not inherently safe"));
1789                    }
1790                }
1791
1792                outpath.push(target);
1793            }
1794        }
1795        Ok(())
1796    }
1797
1798    /// Get the comment of the file
1799    pub fn comment(&self) -> &str {
1800        &self.get_metadata().file_comment
1801    }
1802
1803    /// Get the compression method used to store the file
1804    pub fn compression(&self) -> CompressionMethod {
1805        self.get_metadata().compression_method
1806    }
1807
1808    /// Get if the files is encrypted or not
1809    pub fn encrypted(&self) -> bool {
1810        self.data.encrypted
1811    }
1812
1813    /// Get the size of the file, in bytes, in the archive
1814    pub fn compressed_size(&self) -> u64 {
1815        self.get_metadata().compressed_size
1816    }
1817
1818    /// Get the size of the file, in bytes, when uncompressed
1819    pub fn size(&self) -> u64 {
1820        self.get_metadata().uncompressed_size
1821    }
1822
1823    /// Get the time the file was last modified
1824    pub fn last_modified(&self) -> Option<DateTime> {
1825        self.data.last_modified_time
1826    }
1827    /// Returns whether the file is actually a directory
1828    pub fn is_dir(&self) -> bool {
1829        is_dir(self.name())
1830    }
1831
1832    /// Returns whether the file is actually a symbolic link
1833    pub fn is_symlink(&self) -> bool {
1834        self.unix_mode()
1835            .is_some_and(|mode| mode & S_IFLNK == S_IFLNK)
1836    }
1837
1838    /// Returns whether the file is a normal file (i.e. not a directory or symlink)
1839    pub fn is_file(&self) -> bool {
1840        !self.is_dir() && !self.is_symlink()
1841    }
1842
1843    /// Get unix mode for the file
1844    pub fn unix_mode(&self) -> Option<u32> {
1845        self.get_metadata().unix_mode()
1846    }
1847
1848    /// Get the CRC32 hash of the original file
1849    pub fn crc32(&self) -> u32 {
1850        self.get_metadata().crc32
1851    }
1852
1853    /// Get the extra data of the zip header for this file
1854    pub fn extra_data(&self) -> Option<&[u8]> {
1855        self.get_metadata()
1856            .extra_field
1857            .as_ref()
1858            .map(|v| v.deref().deref())
1859    }
1860
1861    /// Get the starting offset of the data of the compressed file
1862    pub fn data_start(&self) -> u64 {
1863        *self.data.data_start.get().unwrap()
1864    }
1865
1866    /// Get the starting offset of the zip header for this file
1867    pub fn header_start(&self) -> u64 {
1868        self.get_metadata().header_start
1869    }
1870    /// Get the starting offset of the zip header in the central directory for this file
1871    pub fn central_header_start(&self) -> u64 {
1872        self.get_metadata().central_header_start
1873    }
1874
1875    /// Get the [`SimpleFileOptions`] that would be used to write this file to
1876    /// a new zip archive.
1877    pub fn options(&self) -> SimpleFileOptions {
1878        let mut options = SimpleFileOptions::default()
1879            .large_file(self.compressed_size().max(self.size()) > ZIP64_BYTES_THR)
1880            .compression_method(self.compression())
1881            .unix_permissions(self.unix_mode().unwrap_or(0o644) | S_IFREG)
1882            .last_modified_time(
1883                self.last_modified()
1884                    .filter(|m| m.is_valid())
1885                    .unwrap_or_else(DateTime::default_for_write),
1886            );
1887
1888        options.normalize();
1889        #[cfg(feature = "aes-crypto")]
1890        if let Some(aes) = self.get_metadata().aes_mode {
1891            // Preserve AES metadata in options for downstream writers.
1892            // This is metadata-only and does not trigger encryption.
1893            options.aes_mode = Some(aes);
1894        }
1895        options
1896    }
1897}
1898
1899/// Methods for retrieving information on zip files
1900impl<R: Read> ZipFile<'_, R> {
1901    /// iterate through all extra fields
1902    pub fn extra_data_fields(&self) -> impl Iterator<Item = &ExtraField> {
1903        self.data.extra_fields.iter()
1904    }
1905}
1906
1907impl<R: Read> HasZipMetadata for ZipFile<'_, R> {
1908    fn get_metadata(&self) -> &ZipFileData {
1909        self.data.as_ref()
1910    }
1911}
1912
1913impl<R: Read> Read for ZipFile<'_, R> {
1914    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1915        self.reader.read(buf)
1916    }
1917
1918    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
1919        self.reader.read_exact(buf)
1920    }
1921
1922    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
1923        self.reader.read_to_end(buf)
1924    }
1925
1926    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
1927        self.reader.read_to_string(buf)
1928    }
1929}
1930
1931impl<R: Read> Read for ZipFileSeek<'_, R> {
1932    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1933        match &mut self.reader {
1934            ZipFileSeekReader::Raw(r) => r.read(buf),
1935        }
1936    }
1937}
1938
1939impl<R: Seek> Seek for ZipFileSeek<'_, R> {
1940    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
1941        match &mut self.reader {
1942            ZipFileSeekReader::Raw(r) => r.seek(pos),
1943        }
1944    }
1945}
1946
1947impl<R> HasZipMetadata for ZipFileSeek<'_, R> {
1948    fn get_metadata(&self) -> &ZipFileData {
1949        self.data.as_ref()
1950    }
1951}
1952
1953impl<R: Read> Drop for ZipFile<'_, R> {
1954    fn drop(&mut self) {
1955        // self.data is Owned, this reader is constructed by a streaming reader.
1956        // In this case, we want to exhaust the reader so that the next file is accessible.
1957        if let Cow::Owned(_) = self.data {
1958            // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
1959            if let Ok(mut inner) = self.take_raw_reader() {
1960                let _ = copy(&mut inner, &mut sink());
1961            }
1962        }
1963    }
1964}
1965
1966/// Read ZipFile structures from a non-seekable reader.
1967///
1968/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
1969/// as some information will be missing when reading this manner.
1970///
1971/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
1972/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
1973/// is encountered. No more files should be read after this.
1974///
1975/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
1976/// the structure is done.
1977///
1978/// Missing fields are:
1979/// * `comment`: set to an empty string
1980/// * `data_start`: set to 0
1981/// * `external_attributes`: `unix_mode()`: will return None
1982pub fn read_zipfile_from_stream<R: Read>(reader: &mut R) -> ZipResult<Option<ZipFile<'_, R>>> {
1983    // We can't use the typical ::parse() method, as we follow separate code paths depending on the
1984    // "magic" value (since the magic value will be from the central directory header if we've
1985    // finished iterating over all the actual files).
1986    /* TODO: smallvec? */
1987
1988    let mut block = ZipLocalEntryBlock::zeroed();
1989    reader.read_exact(block.as_bytes_mut())?;
1990
1991    match block.magic().from_le() {
1992        spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (),
1993        spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
1994        _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR),
1995    }
1996
1997    let block = block.from_le();
1998
1999    let mut result = ZipFileData::from_local_block(block, reader)?;
2000
2001    match parse_extra_field(&mut result) {
2002        Ok(..) | Err(ZipError::Io(..)) => {}
2003        Err(e) => return Err(e),
2004    }
2005
2006    let limit_reader = reader.take(result.compressed_size);
2007
2008    let result_flags = result.flags;
2009    let crypto_reader = make_crypto_reader(&result, limit_reader, None, None)?;
2010    let ZipFileData {
2011        crc32,
2012        uncompressed_size,
2013        compression_method,
2014        ..
2015    } = result;
2016
2017    Ok(Some(ZipFile {
2018        data: Cow::Owned(result),
2019        reader: make_reader(
2020            compression_method,
2021            uncompressed_size,
2022            crc32,
2023            crypto_reader,
2024            result_flags,
2025        )?,
2026    }))
2027}
2028
2029/// A filter that determines whether an entry should be ignored when searching
2030/// for the root directory of a Zip archive.
2031///
2032/// Returns `true` if the entry should be considered, and `false` if it should
2033/// be ignored.
2034///
2035/// See [`root_dir_common_filter`] for a sensible default filter.
2036pub trait RootDirFilter: Fn(&Path) -> bool {}
2037impl<F: Fn(&Path) -> bool> RootDirFilter for F {}
2038
2039/// Common filters when finding the root directory of a Zip archive.
2040///
2041/// This filter is a sensible default for most use cases and filters out common
2042/// system files that are usually irrelevant to the contents of the archive.
2043///
2044/// Currently, the filter ignores:
2045/// - `/__MACOSX/`
2046/// - `/.DS_Store`
2047/// - `/Thumbs.db`
2048///
2049/// **This function is not guaranteed to be stable and may change in future versions.**
2050///
2051/// # Example
2052///
2053/// ```rust
2054/// # use std::path::Path;
2055/// assert!(zip::read::root_dir_common_filter(Path::new("foo.txt")));
2056/// assert!(!zip::read::root_dir_common_filter(Path::new(".DS_Store")));
2057/// assert!(!zip::read::root_dir_common_filter(Path::new("Thumbs.db")));
2058/// assert!(!zip::read::root_dir_common_filter(Path::new("__MACOSX")));
2059/// assert!(!zip::read::root_dir_common_filter(Path::new("__MACOSX/foo.txt")));
2060/// ```
2061pub fn root_dir_common_filter(path: &Path) -> bool {
2062    const COMMON_FILTER_ROOT_FILES: &[&str] = &[".DS_Store", "Thumbs.db"];
2063
2064    if path.starts_with("__MACOSX") {
2065        return false;
2066    }
2067
2068    if path.components().count() == 1
2069        && path.file_name().is_some_and(|file_name| {
2070            COMMON_FILTER_ROOT_FILES
2071                .iter()
2072                .map(OsStr::new)
2073                .any(|cmp| cmp == file_name)
2074        })
2075    {
2076        return false;
2077    }
2078
2079    true
2080}
2081
2082#[cfg(feature = "chrono")]
2083/// Generate a `SystemTime` from a `DateTime`.
2084fn datetime_to_systemtime(datetime: &DateTime) -> Option<std::time::SystemTime> {
2085    if let Some(t) = generate_chrono_datetime(datetime) {
2086        let time = chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(t, chrono::Utc);
2087        return Some(time.into());
2088    }
2089    None
2090}
2091
2092#[cfg(feature = "chrono")]
2093/// Generate a `NaiveDateTime` from a `DateTime`.
2094fn generate_chrono_datetime(datetime: &DateTime) -> Option<chrono::NaiveDateTime> {
2095    if let Some(d) = chrono::NaiveDate::from_ymd_opt(
2096        datetime.year().into(),
2097        datetime.month().into(),
2098        datetime.day().into(),
2099    ) {
2100        if let Some(d) = d.and_hms_opt(
2101            datetime.hour().into(),
2102            datetime.minute().into(),
2103            datetime.second().into(),
2104        ) {
2105            return Some(d);
2106        }
2107    }
2108    None
2109}
2110
2111#[cfg(test)]
2112mod test {
2113    use crate::read::ZipReadOptions;
2114    use crate::result::ZipResult;
2115    use crate::write::SimpleFileOptions;
2116    use crate::CompressionMethod::Stored;
2117    use crate::{ZipArchive, ZipWriter};
2118    use std::io::{Cursor, Read, Write};
2119    use tempfile::TempDir;
2120
2121    #[test]
2122    fn invalid_offset() {
2123        use super::ZipArchive;
2124
2125        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2126            "../tests/data/invalid_offset.zip"
2127        )));
2128        assert!(reader.is_err());
2129    }
2130
2131    #[test]
2132    fn invalid_offset2() {
2133        use super::ZipArchive;
2134
2135        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2136            "../tests/data/invalid_offset2.zip"
2137        )));
2138        assert!(reader.is_err());
2139    }
2140
2141    #[test]
2142    fn zip64_with_leading_junk() {
2143        use super::ZipArchive;
2144
2145        let reader =
2146            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/zip64_demo.zip"))).unwrap();
2147        assert_eq!(reader.len(), 1);
2148    }
2149
2150    #[test]
2151    fn zip_contents() {
2152        use super::ZipArchive;
2153
2154        let mut reader =
2155            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/mimetype.zip"))).unwrap();
2156        assert_eq!(reader.comment(), b"");
2157        assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77);
2158    }
2159
2160    #[test]
2161    fn zip_read_streaming() {
2162        use super::read_zipfile_from_stream;
2163
2164        let mut reader = Cursor::new(include_bytes!("../tests/data/mimetype.zip"));
2165        loop {
2166            if read_zipfile_from_stream(&mut reader).unwrap().is_none() {
2167                break;
2168            }
2169        }
2170    }
2171
2172    #[test]
2173    fn zip_clone() {
2174        use super::ZipArchive;
2175        use std::io::Read;
2176
2177        let mut reader1 =
2178            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/mimetype.zip"))).unwrap();
2179        let mut reader2 = reader1.clone();
2180
2181        let mut file1 = reader1.by_index(0).unwrap();
2182        let mut file2 = reader2.by_index(0).unwrap();
2183
2184        let t = file1.last_modified().unwrap();
2185        assert_eq!(
2186            (
2187                t.year(),
2188                t.month(),
2189                t.day(),
2190                t.hour(),
2191                t.minute(),
2192                t.second()
2193            ),
2194            (1980, 1, 1, 0, 0, 0)
2195        );
2196
2197        let mut buf1 = [0; 5];
2198        let mut buf2 = [0; 5];
2199        let mut buf3 = [0; 5];
2200        let mut buf4 = [0; 5];
2201
2202        file1.read_exact(&mut buf1).unwrap();
2203        file2.read_exact(&mut buf2).unwrap();
2204        file1.read_exact(&mut buf3).unwrap();
2205        file2.read_exact(&mut buf4).unwrap();
2206
2207        assert_eq!(buf1, buf2);
2208        assert_eq!(buf3, buf4);
2209        assert_ne!(buf1, buf3);
2210    }
2211
2212    #[test]
2213    fn file_and_dir_predicates() {
2214        use super::ZipArchive;
2215
2216        let mut zip = ZipArchive::new(Cursor::new(include_bytes!(
2217            "../tests/data/files_and_dirs.zip"
2218        )))
2219        .unwrap();
2220
2221        for i in 0..zip.len() {
2222            let zip_file = zip.by_index(i).unwrap();
2223            let full_name = zip_file.enclosed_name().unwrap();
2224            let file_name = full_name.file_name().unwrap().to_str().unwrap();
2225            assert!(
2226                (file_name.starts_with("dir") && zip_file.is_dir())
2227                    || (file_name.starts_with("file") && zip_file.is_file())
2228            );
2229        }
2230    }
2231
2232    #[test]
2233    fn zip64_magic_in_filenames() {
2234        let files = vec![
2235            include_bytes!("../tests/data/zip64_magic_in_filename_1.zip").to_vec(),
2236            include_bytes!("../tests/data/zip64_magic_in_filename_2.zip").to_vec(),
2237            include_bytes!("../tests/data/zip64_magic_in_filename_3.zip").to_vec(),
2238            include_bytes!("../tests/data/zip64_magic_in_filename_4.zip").to_vec(),
2239            include_bytes!("../tests/data/zip64_magic_in_filename_5.zip").to_vec(),
2240        ];
2241        // Although we don't allow adding files whose names contain the ZIP64 CDB-end or
2242        // CDB-end-locator signatures, we still read them when they aren't genuinely ambiguous.
2243        for file in files {
2244            ZipArchive::new(Cursor::new(file)).unwrap();
2245        }
2246    }
2247
2248    /// test case to ensure we don't preemptively over allocate based on the
2249    /// declared number of files in the CDE of an invalid zip when the number of
2250    /// files declared is more than the alleged offset in the CDE
2251    #[test]
2252    fn invalid_cde_number_of_files_allocation_smaller_offset() {
2253        use super::ZipArchive;
2254
2255        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2256            "../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
2257        )));
2258        assert!(reader.is_err() || reader.unwrap().is_empty());
2259    }
2260
2261    /// test case to ensure we don't preemptively over allocate based on the
2262    /// declared number of files in the CDE of an invalid zip when the number of
2263    /// files declared is less than the alleged offset in the CDE
2264    #[test]
2265    fn invalid_cde_number_of_files_allocation_greater_offset() {
2266        use super::ZipArchive;
2267
2268        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2269            "../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
2270        )));
2271        assert!(reader.is_err());
2272    }
2273
2274    #[cfg(feature = "deflate64")]
2275    #[test]
2276    fn deflate64_index_out_of_bounds() -> std::io::Result<()> {
2277        let mut reader = ZipArchive::new(Cursor::new(include_bytes!(
2278            "../tests/data/raw_deflate64_index_out_of_bounds.zip"
2279        )))?;
2280        std::io::copy(&mut reader.by_index(0)?, &mut std::io::sink()).expect_err("Invalid file");
2281        Ok(())
2282    }
2283
2284    #[cfg(feature = "deflate64")]
2285    #[test]
2286    fn deflate64_not_enough_space() {
2287        ZipArchive::new(Cursor::new(include_bytes!(
2288            "../tests/data/deflate64_issue_25.zip"
2289        )))
2290        .expect_err("Invalid file");
2291    }
2292
2293    #[cfg(feature = "deflate-flate2")]
2294    #[test]
2295    fn test_read_with_data_descriptor() {
2296        use std::io::Read;
2297
2298        let mut reader = ZipArchive::new(Cursor::new(include_bytes!(
2299            "../tests/data/data_descriptor.zip"
2300        )))
2301        .unwrap();
2302        let mut decompressed = [0u8; 16];
2303        let mut file = reader.by_index(0).unwrap();
2304        assert_eq!(file.read(&mut decompressed).unwrap(), 12);
2305    }
2306
2307    #[test]
2308    fn test_is_symlink() -> std::io::Result<()> {
2309        let mut reader = ZipArchive::new(Cursor::new(include_bytes!("../tests/data/symlink.zip")))?;
2310        assert!(reader.by_index(0)?.is_symlink());
2311        let tempdir = TempDir::with_prefix("test_is_symlink")?;
2312        reader.extract(&tempdir)?;
2313        assert!(tempdir.path().join("bar").is_symlink());
2314        Ok(())
2315    }
2316
2317    #[test]
2318    #[cfg(feature = "deflate-flate2")]
2319    fn test_utf8_extra_field() {
2320        let mut reader =
2321            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/chinese.zip"))).unwrap();
2322        reader.by_name("七个房间.txt").unwrap();
2323    }
2324
2325    #[test]
2326    fn test_utf8() {
2327        let mut reader =
2328            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/linux-7z.zip"))).unwrap();
2329        reader.by_name("你好.txt").unwrap();
2330    }
2331
2332    #[test]
2333    fn test_utf8_2() {
2334        let mut reader = ZipArchive::new(Cursor::new(include_bytes!(
2335            "../tests/data/windows-7zip.zip"
2336        )))
2337        .unwrap();
2338        reader.by_name("你好.txt").unwrap();
2339    }
2340
2341    #[test]
2342    fn test_64k_files() -> ZipResult<()> {
2343        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
2344        let options = SimpleFileOptions {
2345            compression_method: Stored,
2346            ..Default::default()
2347        };
2348        for i in 0..=u16::MAX {
2349            let file_name = format!("{i}.txt");
2350            writer.start_file(&*file_name, options)?;
2351            writer.write_all(i.to_string().as_bytes())?;
2352        }
2353
2354        let mut reader = ZipArchive::new(writer.finish()?)?;
2355        for i in 0..=u16::MAX {
2356            let expected_name = format!("{i}.txt");
2357            let expected_contents = i.to_string();
2358            let expected_contents = expected_contents.as_bytes();
2359            let mut file = reader.by_name(&expected_name)?;
2360            let mut contents = Vec::with_capacity(expected_contents.len());
2361            file.read_to_end(&mut contents)?;
2362            assert_eq!(contents, expected_contents);
2363            drop(file);
2364            contents.clear();
2365            let mut file = reader.by_index(i as usize)?;
2366            file.read_to_end(&mut contents)?;
2367            assert_eq!(contents, expected_contents);
2368        }
2369        Ok(())
2370    }
2371
2372    /// Symlinks being extracted shouldn't be followed out of the destination directory.
2373    #[test]
2374    fn test_cannot_symlink_outside_destination() -> ZipResult<()> {
2375        use std::fs::create_dir;
2376
2377        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
2378        writer.add_symlink("symlink/", "../dest-sibling/", SimpleFileOptions::default())?;
2379        writer.start_file("symlink/dest-file", SimpleFileOptions::default())?;
2380        let mut reader = writer.finish_into_readable()?;
2381        let dest_parent = TempDir::with_prefix("read__test_cannot_symlink_outside_destination")?;
2382        let dest_sibling = dest_parent.path().join("dest-sibling");
2383        create_dir(&dest_sibling)?;
2384        let dest = dest_parent.path().join("dest");
2385        create_dir(&dest)?;
2386        assert!(reader.extract(dest).is_err());
2387        assert!(!dest_sibling.join("dest-file").exists());
2388        Ok(())
2389    }
2390
2391    #[test]
2392    fn test_can_create_destination() -> ZipResult<()> {
2393        let mut reader =
2394            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/mimetype.zip")))?;
2395        let dest = TempDir::with_prefix("read__test_can_create_destination")?;
2396        reader.extract(&dest)?;
2397        assert!(dest.path().join("mimetype").exists());
2398        Ok(())
2399    }
2400
2401    #[test]
2402    fn test_central_directory_not_at_end() -> ZipResult<()> {
2403        let mut reader = ZipArchive::new(Cursor::new(include_bytes!("../tests/data/omni.ja")))?;
2404        let mut file = reader.by_name("chrome.manifest")?;
2405        let mut contents = String::new();
2406        file.read_to_string(&mut contents)?; // ensures valid UTF-8
2407        assert!(!contents.is_empty(), "chrome.manifest should not be empty");
2408        drop(file);
2409        for i in 0..reader.len() {
2410            let mut file = reader.by_index(i)?;
2411            // Attempt to read a small portion or all of each file to ensure it's accessible
2412            let mut buffer = Vec::new();
2413            file.read_to_end(&mut buffer)?;
2414            assert_eq!(
2415                buffer.len(),
2416                file.size() as usize,
2417                "File size mismatch for {}",
2418                file.name()
2419            );
2420        }
2421        Ok(())
2422    }
2423
2424    #[test]
2425    fn test_ignore_encryption_flag() -> ZipResult<()> {
2426        let mut reader = ZipArchive::new(Cursor::new(include_bytes!(
2427            "../tests/data/ignore_encryption_flag.zip"
2428        )))?;
2429
2430        // Get the file entry by ignoring its encryption flag.
2431        let mut file =
2432            reader.by_index_with_options(0, ZipReadOptions::new().ignore_encryption_flag(true))?;
2433        let mut contents = String::new();
2434        assert_eq!(file.name(), "plaintext.txt");
2435
2436        // The file claims it is encrypted, but it is not.
2437        assert!(file.encrypted());
2438        file.read_to_string(&mut contents)?; // ensures valid UTF-8
2439        assert_eq!(contents, "This file is not encrypted.\n");
2440        Ok(())
2441    }
2442}