| use std::fs; |
| use std::io::{self, Read}; |
| use std::path::Path; |
| |
| use super::{ |
| central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile, |
| ZipFileData, ZipResult, |
| }; |
| |
| use byteorder::{LittleEndian, ReadBytesExt}; |
| |
| /// Stream decoder for zip. |
| #[derive(Debug)] |
| pub struct ZipStreamReader<R>(R); |
| |
| impl<R> ZipStreamReader<R> { |
| /// Create a new ZipStreamReader |
| pub fn new(reader: R) -> Self { |
| Self(reader) |
| } |
| } |
| |
| impl<R: Read> ZipStreamReader<R> { |
| fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> { |
| // Give archive_offset and central_header_start dummy value 0, since |
| // they are not used in the output. |
| let archive_offset = 0; |
| let central_header_start = 0; |
| |
| // Parse central header |
| let signature = self.0.read_u32::<LittleEndian>()?; |
| if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { |
| Ok(None) |
| } else { |
| central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start) |
| .map(ZipStreamFileMetadata) |
| .map(Some) |
| } |
| } |
| |
| /// Iteraate over the stream and extract all file and their |
| /// metadata. |
| pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> { |
| while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? { |
| visitor.visit_file(&mut file)?; |
| } |
| |
| while let Some(metadata) = self.parse_central_directory()? { |
| visitor.visit_additional_metadata(&metadata)?; |
| } |
| |
| Ok(()) |
| } |
| |
| /// Extract a Zip archive into a directory, overwriting files if they |
| /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. |
| /// |
| /// Extraction is not atomic; If an error is encountered, some of the files |
| /// may be left on disk. |
| pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> { |
| struct Extractor<'a>(&'a Path); |
| impl ZipStreamVisitor for Extractor<'_> { |
| fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { |
| let filepath = file |
| .enclosed_name() |
| .ok_or(ZipError::InvalidArchive("Invalid file path"))?; |
| |
| let outpath = self.0.join(filepath); |
| |
| if file.name().ends_with('/') { |
| fs::create_dir_all(&outpath)?; |
| } else { |
| if let Some(p) = outpath.parent() { |
| fs::create_dir_all(p)?; |
| } |
| let mut outfile = fs::File::create(&outpath)?; |
| io::copy(file, &mut outfile)?; |
| } |
| |
| Ok(()) |
| } |
| |
| #[allow(unused)] |
| fn visit_additional_metadata( |
| &mut self, |
| metadata: &ZipStreamFileMetadata, |
| ) -> ZipResult<()> { |
| #[cfg(unix)] |
| { |
| let filepath = metadata |
| .enclosed_name() |
| .ok_or(ZipError::InvalidArchive("Invalid file path"))?; |
| |
| let outpath = self.0.join(filepath); |
| |
| use std::os::unix::fs::PermissionsExt; |
| if let Some(mode) = metadata.unix_mode() { |
| fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?; |
| } |
| } |
| |
| Ok(()) |
| } |
| } |
| |
| self.visit(&mut Extractor(directory.as_ref())) |
| } |
| } |
| |
| /// Visitor for ZipStreamReader |
| pub trait ZipStreamVisitor { |
| /// * `file` - contains the content of the file and most of the metadata, |
| /// except: |
| /// - `comment`: set to an empty string |
| /// - `data_start`: set to 0 |
| /// - `external_attributes`: `unix_mode()`: will return None |
| fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>; |
| |
| /// This function is guranteed to be called after all `visit_file`s. |
| /// |
| /// * `metadata` - Provides missing metadata in `visit_file`. |
| fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>; |
| } |
| |
| /// Additional metadata for the file. |
| #[derive(Debug)] |
| pub struct ZipStreamFileMetadata(ZipFileData); |
| |
| impl ZipStreamFileMetadata { |
| /// Get the name of the file |
| /// |
| /// # Warnings |
| /// |
| /// It is dangerous to use this name directly when extracting an archive. |
| /// It may contain an absolute path (`/etc/shadow`), or break out of the |
| /// current directory (`../runtime`). Carelessly writing to these paths |
| /// allows an attacker to craft a ZIP archive that will overwrite critical |
| /// files. |
| /// |
| /// You can use the [`ZipFile::enclosed_name`] method to validate the name |
| /// as a safe path. |
| pub fn name(&self) -> &str { |
| &self.0.file_name |
| } |
| |
| /// Get the name of the file, in the raw (internal) byte representation. |
| /// |
| /// The encoding of this data is currently undefined. |
| pub fn name_raw(&self) -> &[u8] { |
| &self.0.file_name_raw |
| } |
| |
| /// Rewrite the path, ignoring any path components with special meaning. |
| /// |
| /// - Absolute paths are made relative |
| /// - [`ParentDir`]s are ignored |
| /// - Truncates the filename at a NULL byte |
| /// |
| /// This is appropriate if you need to be able to extract *something* from |
| /// any archive, but will easily misrepresent trivial paths like |
| /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, |
| /// [`ZipFile::enclosed_name`] is the better option in most scenarios. |
| /// |
| /// [`ParentDir`]: `Component::ParentDir` |
| pub fn mangled_name(&self) -> ::std::path::PathBuf { |
| self.0.file_name_sanitized() |
| } |
| |
| /// Ensure the file path is safe to use as a [`Path`]. |
| /// |
| /// - It can't contain NULL bytes |
| /// - It can't resolve to a path outside the current directory |
| /// > `foo/../bar` is fine, `foo/../../bar` is not. |
| /// - It can't be an absolute path |
| /// |
| /// This will read well-formed ZIP files correctly, and is resistant |
| /// to path-based exploits. It is recommended over |
| /// [`ZipFile::mangled_name`]. |
| pub fn enclosed_name(&self) -> Option<&Path> { |
| self.0.enclosed_name() |
| } |
| |
| /// Returns whether the file is actually a directory |
| pub fn is_dir(&self) -> bool { |
| self.name() |
| .chars() |
| .rev() |
| .next() |
| .map_or(false, |c| c == '/' || c == '\\') |
| } |
| |
| /// Returns whether the file is a regular file |
| pub fn is_file(&self) -> bool { |
| !self.is_dir() |
| } |
| |
| /// Get the comment of the file |
| pub fn comment(&self) -> &str { |
| &self.0.file_comment |
| } |
| |
| /// Get the starting offset of the data of the compressed file |
| pub fn data_start(&self) -> u64 { |
| self.0.data_start.load() |
| } |
| |
| /// Get unix mode for the file |
| pub fn unix_mode(&self) -> Option<u32> { |
| self.0.unix_mode() |
| } |
| } |
| |
| #[cfg(test)] |
| mod test { |
| use super::*; |
| use std::collections::BTreeSet; |
| use std::io; |
| |
| struct DummyVisitor; |
| impl ZipStreamVisitor for DummyVisitor { |
| fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { |
| Ok(()) |
| } |
| |
| fn visit_additional_metadata( |
| &mut self, |
| _metadata: &ZipStreamFileMetadata, |
| ) -> ZipResult<()> { |
| Ok(()) |
| } |
| } |
| |
| #[derive(Default, Debug, Eq, PartialEq)] |
| struct CounterVisitor(u64, u64); |
| impl ZipStreamVisitor for CounterVisitor { |
| fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { |
| self.0 += 1; |
| Ok(()) |
| } |
| |
| fn visit_additional_metadata( |
| &mut self, |
| _metadata: &ZipStreamFileMetadata, |
| ) -> ZipResult<()> { |
| self.1 += 1; |
| Ok(()) |
| } |
| } |
| |
| #[test] |
| fn invalid_offset() { |
| ZipStreamReader::new(io::Cursor::new(include_bytes!( |
| "../../tests/data/invalid_offset.zip" |
| ))) |
| .visit(&mut DummyVisitor) |
| .unwrap_err(); |
| } |
| |
| #[test] |
| fn invalid_offset2() { |
| ZipStreamReader::new(io::Cursor::new(include_bytes!( |
| "../../tests/data/invalid_offset2.zip" |
| ))) |
| .visit(&mut DummyVisitor) |
| .unwrap_err(); |
| } |
| |
| #[test] |
| fn zip_read_streaming() { |
| let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!( |
| "../../tests/data/mimetype.zip" |
| ))); |
| |
| #[derive(Default)] |
| struct V { |
| filenames: BTreeSet<Box<str>>, |
| } |
| impl ZipStreamVisitor for V { |
| fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { |
| if file.is_file() { |
| self.filenames.insert(file.name().into()); |
| } |
| |
| Ok(()) |
| } |
| fn visit_additional_metadata( |
| &mut self, |
| metadata: &ZipStreamFileMetadata, |
| ) -> ZipResult<()> { |
| if metadata.is_file() { |
| assert!( |
| self.filenames.contains(metadata.name()), |
| "{} is missing its file content", |
| metadata.name() |
| ); |
| } |
| |
| Ok(()) |
| } |
| } |
| |
| reader.visit(&mut V::default()).unwrap(); |
| } |
| |
| #[test] |
| fn file_and_dir_predicates() { |
| let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!( |
| "../../tests/data/files_and_dirs.zip" |
| ))); |
| |
| #[derive(Default)] |
| struct V { |
| filenames: BTreeSet<Box<str>>, |
| } |
| impl ZipStreamVisitor for V { |
| fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { |
| let full_name = file.enclosed_name().unwrap(); |
| let file_name = full_name.file_name().unwrap().to_str().unwrap(); |
| assert!( |
| (file_name.starts_with("dir") && file.is_dir()) |
| || (file_name.starts_with("file") && file.is_file()) |
| ); |
| |
| if file.is_file() { |
| self.filenames.insert(file.name().into()); |
| } |
| |
| Ok(()) |
| } |
| fn visit_additional_metadata( |
| &mut self, |
| metadata: &ZipStreamFileMetadata, |
| ) -> ZipResult<()> { |
| if metadata.is_file() { |
| assert!( |
| self.filenames.contains(metadata.name()), |
| "{} is missing its file content", |
| metadata.name() |
| ); |
| } |
| |
| Ok(()) |
| } |
| } |
| |
| reader.visit(&mut V::default()).unwrap(); |
| } |
| |
| /// test case to ensure we don't preemptively over allocate based on the |
| /// declared number of files in the CDE of an invalid zip when the number of |
| /// files declared is more than the alleged offset in the CDE |
| #[test] |
| fn invalid_cde_number_of_files_allocation_smaller_offset() { |
| ZipStreamReader::new(io::Cursor::new(include_bytes!( |
| "../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip" |
| ))) |
| .visit(&mut DummyVisitor) |
| .unwrap_err(); |
| } |
| |
| /// test case to ensure we don't preemptively over allocate based on the |
| /// declared number of files in the CDE of an invalid zip when the number of |
| /// files declared is less than the alleged offset in the CDE |
| #[test] |
| fn invalid_cde_number_of_files_allocation_greater_offset() { |
| ZipStreamReader::new(io::Cursor::new(include_bytes!( |
| "../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip" |
| ))) |
| .visit(&mut DummyVisitor) |
| .unwrap_err(); |
| } |
| } |