| use std::cmp; |
| use std::io; |
| use std::io::prelude::*; |
| use std::mem; |
| |
| use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser}; |
| use crate::crc::CrcReader; |
| use crate::deflate; |
| use crate::Compression; |
| |
| fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { |
| let min = cmp::min(into.len(), from.len() - *pos); |
| for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) { |
| *slot = *val; |
| } |
| *pos += min; |
| min |
| } |
| |
| /// A gzip streaming encoder |
| /// |
| /// This structure implements a [`Read`] interface. When read from, it reads |
| /// uncompressed data from the underlying [`BufRead`] and provides the compressed data. |
| /// |
| /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
| /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use std::io::prelude::*; |
| /// use std::io; |
| /// use flate2::Compression; |
| /// use flate2::bufread::GzEncoder; |
| /// use std::fs::File; |
| /// use std::io::BufReader; |
| /// |
| /// // Opens sample file, compresses the contents and returns a Vector or error |
| /// // File wrapped in a BufReader implements BufRead |
| /// |
| /// fn open_hello_world() -> io::Result<Vec<u8>> { |
| /// let f = File::open("examples/hello_world.txt")?; |
| /// let b = BufReader::new(f); |
| /// let mut gz = GzEncoder::new(b, Compression::fast()); |
| /// let mut buffer = Vec::new(); |
| /// gz.read_to_end(&mut buffer)?; |
| /// Ok(buffer) |
| /// } |
| /// ``` |
| #[derive(Debug)] |
| pub struct GzEncoder<R> { |
| inner: deflate::bufread::DeflateEncoder<CrcReader<R>>, |
| header: Vec<u8>, |
| pos: usize, |
| eof: bool, |
| } |
| |
| pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> { |
| let crc = CrcReader::new(r); |
| GzEncoder { |
| inner: deflate::bufread::DeflateEncoder::new(crc, lvl), |
| header, |
| pos: 0, |
| eof: false, |
| } |
| } |
| |
| impl<R: BufRead> GzEncoder<R> { |
| /// Creates a new encoder which will use the given compression level. |
| /// |
| /// The encoder is not configured specially for the emitted header. For |
| /// header configuration, see the `GzBuilder` type. |
| /// |
| /// The data read from the stream `r` will be compressed and available |
| /// through the returned reader. |
| pub fn new(r: R, level: Compression) -> GzEncoder<R> { |
| GzBuilder::new().buf_read(r, level) |
| } |
| |
| fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| if self.pos == 8 { |
| return Ok(0); |
| } |
| let crc = self.inner.get_ref().crc(); |
| let ref arr = [ |
| (crc.sum() >> 0) as u8, |
| (crc.sum() >> 8) as u8, |
| (crc.sum() >> 16) as u8, |
| (crc.sum() >> 24) as u8, |
| (crc.amount() >> 0) as u8, |
| (crc.amount() >> 8) as u8, |
| (crc.amount() >> 16) as u8, |
| (crc.amount() >> 24) as u8, |
| ]; |
| Ok(copy(into, arr, &mut self.pos)) |
| } |
| } |
| |
| impl<R> GzEncoder<R> { |
| /// Acquires a reference to the underlying reader. |
| pub fn get_ref(&self) -> &R { |
| self.inner.get_ref().get_ref() |
| } |
| |
| /// Acquires a mutable reference to the underlying reader. |
| /// |
| /// Note that mutation of the reader may result in surprising results if |
| /// this encoder is continued to be used. |
| pub fn get_mut(&mut self) -> &mut R { |
| self.inner.get_mut().get_mut() |
| } |
| |
| /// Returns the underlying stream, consuming this encoder |
| pub fn into_inner(self) -> R { |
| self.inner.into_inner().into_inner() |
| } |
| } |
| |
| #[inline] |
| fn finish(buf: &[u8; 8]) -> (u32, u32) { |
| let crc = ((buf[0] as u32) << 0) |
| | ((buf[1] as u32) << 8) |
| | ((buf[2] as u32) << 16) |
| | ((buf[3] as u32) << 24); |
| let amt = ((buf[4] as u32) << 0) |
| | ((buf[5] as u32) << 8) |
| | ((buf[6] as u32) << 16) |
| | ((buf[7] as u32) << 24); |
| (crc, amt) |
| } |
| |
| impl<R: BufRead> Read for GzEncoder<R> { |
| fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> { |
| let mut amt = 0; |
| if self.eof { |
| return self.read_footer(into); |
| } else if self.pos < self.header.len() { |
| amt += copy(into, &self.header, &mut self.pos); |
| if amt == into.len() { |
| return Ok(amt); |
| } |
| let tmp = into; |
| into = &mut tmp[amt..]; |
| } |
| match self.inner.read(into)? { |
| 0 => { |
| self.eof = true; |
| self.pos = 0; |
| self.read_footer(into) |
| } |
| n => Ok(amt + n), |
| } |
| } |
| } |
| |
| impl<R: BufRead + Write> Write for GzEncoder<R> { |
| fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
| self.get_mut().write(buf) |
| } |
| |
| fn flush(&mut self) -> io::Result<()> { |
| self.get_mut().flush() |
| } |
| } |
| |
| /// A decoder for a single member of a [gzip file]. |
| /// |
| /// This structure implements a [`Read`] interface. When read from, it reads |
| /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
| /// |
| /// After reading a single member of the gzip data this reader will return |
| /// Ok(0) even if there are more bytes available in the underlying reader. |
| /// If you need the following bytes, call `into_inner()` after Ok(0) to |
| /// recover the underlying reader. |
| /// |
| /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] |
| /// or read more |
| /// [in the introduction](../index.html#about-multi-member-gzip-files). |
| /// |
| /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
| /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
| /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use std::io::prelude::*; |
| /// use std::io; |
| /// # use flate2::Compression; |
| /// # use flate2::write::GzEncoder; |
| /// use flate2::bufread::GzDecoder; |
| /// |
| /// # fn main() { |
| /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| /// # e.write_all(b"Hello World").unwrap(); |
| /// # let bytes = e.finish().unwrap(); |
| /// # println!("{}", decode_reader(bytes).unwrap()); |
| /// # } |
| /// # |
| /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
| /// // Here &[u8] implements BufRead |
| /// |
| /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
| /// let mut gz = GzDecoder::new(&bytes[..]); |
| /// let mut s = String::new(); |
| /// gz.read_to_string(&mut s)?; |
| /// Ok(s) |
| /// } |
| /// ``` |
| #[derive(Debug)] |
| pub struct GzDecoder<R> { |
| state: GzState, |
| reader: CrcReader<deflate::bufread::DeflateDecoder<R>>, |
| multi: bool, |
| } |
| |
| #[derive(Debug)] |
| enum GzState { |
| Header(GzHeaderParser), |
| Body(GzHeader), |
| Finished(GzHeader, usize, [u8; 8]), |
| Err(io::Error), |
| End(Option<GzHeader>), |
| } |
| |
| impl<R: BufRead> GzDecoder<R> { |
| /// Creates a new decoder from the given reader, immediately parsing the |
| /// gzip header. |
| pub fn new(mut r: R) -> GzDecoder<R> { |
| let mut header_parser = GzHeaderParser::new(); |
| |
| let state = match header_parser.parse(&mut r) { |
| Ok(_) => GzState::Body(GzHeader::from(header_parser)), |
| Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => { |
| GzState::Header(header_parser) |
| } |
| Err(err) => GzState::Err(err), |
| }; |
| |
| GzDecoder { |
| state, |
| reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), |
| multi: false, |
| } |
| } |
| |
| fn multi(mut self, flag: bool) -> GzDecoder<R> { |
| self.multi = flag; |
| self |
| } |
| } |
| |
| impl<R> GzDecoder<R> { |
| /// Returns the header associated with this stream, if it was valid |
| pub fn header(&self) -> Option<&GzHeader> { |
| match &self.state { |
| GzState::Body(header) | GzState::Finished(header, _, _) => Some(header), |
| GzState::End(header) => header.as_ref(), |
| _ => None, |
| } |
| } |
| |
| /// Acquires a reference to the underlying reader. |
| pub fn get_ref(&self) -> &R { |
| self.reader.get_ref().get_ref() |
| } |
| |
| /// Acquires a mutable reference to the underlying stream. |
| /// |
| /// Note that mutation of the stream may result in surprising results if |
| /// this decoder is continued to be used. |
| pub fn get_mut(&mut self) -> &mut R { |
| self.reader.get_mut().get_mut() |
| } |
| |
| /// Consumes this decoder, returning the underlying reader. |
| pub fn into_inner(self) -> R { |
| self.reader.into_inner().into_inner() |
| } |
| } |
| |
| impl<R: BufRead> Read for GzDecoder<R> { |
| fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| loop { |
| match &mut self.state { |
| GzState::Header(parser) => { |
| parser.parse(self.reader.get_mut().get_mut())?; |
| self.state = GzState::Body(GzHeader::from(mem::take(parser))); |
| } |
| GzState::Body(header) => { |
| if into.is_empty() { |
| return Ok(0); |
| } |
| match self.reader.read(into)? { |
| 0 => { |
| self.state = GzState::Finished(mem::take(header), 0, [0; 8]); |
| } |
| n => { |
| return Ok(n); |
| } |
| } |
| } |
| GzState::Finished(header, pos, buf) => { |
| if *pos < buf.len() { |
| *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?; |
| } else { |
| let (crc, amt) = finish(&buf); |
| |
| if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() { |
| self.state = GzState::End(Some(mem::take(header))); |
| return Err(corrupt()); |
| } else if self.multi { |
| let is_eof = self |
| .reader |
| .get_mut() |
| .get_mut() |
| .fill_buf() |
| .map(|buf| buf.is_empty())?; |
| |
| if is_eof { |
| self.state = GzState::End(Some(mem::take(header))); |
| } else { |
| self.reader.reset(); |
| self.reader.get_mut().reset_data(); |
| self.state = GzState::Header(GzHeaderParser::new()) |
| } |
| } else { |
| self.state = GzState::End(Some(mem::take(header))); |
| } |
| } |
| } |
| GzState::Err(err) => { |
| let result = Err(mem::replace(err, io::ErrorKind::Other.into())); |
| self.state = GzState::End(None); |
| return result; |
| } |
| GzState::End(_) => return Ok(0), |
| } |
| } |
| } |
| } |
| |
| impl<R: BufRead + Write> Write for GzDecoder<R> { |
| fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
| self.get_mut().write(buf) |
| } |
| |
| fn flush(&mut self) -> io::Result<()> { |
| self.get_mut().flush() |
| } |
| } |
| |
| /// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. |
| /// |
| /// This structure implements a [`Read`] interface. When read from, it reads |
| /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
| /// |
| /// A gzip file consists of a series of *members* concatenated one after another. |
| /// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the |
| /// underlying reader does. For a file, this reads to the end of the file. |
| /// |
| /// To handle members seperately, see [GzDecoder] or read more |
| /// [in the introduction](../index.html#about-multi-member-gzip-files). |
| /// |
| /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
| /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
| /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use std::io::prelude::*; |
| /// use std::io; |
| /// # use flate2::Compression; |
| /// # use flate2::write::GzEncoder; |
| /// use flate2::bufread::MultiGzDecoder; |
| /// |
| /// # fn main() { |
| /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| /// # e.write_all(b"Hello World").unwrap(); |
| /// # let bytes = e.finish().unwrap(); |
| /// # println!("{}", decode_reader(bytes).unwrap()); |
| /// # } |
| /// # |
| /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
| /// // Here &[u8] implements BufRead |
| /// |
| /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
| /// let mut gz = MultiGzDecoder::new(&bytes[..]); |
| /// let mut s = String::new(); |
| /// gz.read_to_string(&mut s)?; |
| /// Ok(s) |
| /// } |
| /// ``` |
| #[derive(Debug)] |
| pub struct MultiGzDecoder<R>(GzDecoder<R>); |
| |
| impl<R: BufRead> MultiGzDecoder<R> { |
| /// Creates a new decoder from the given reader, immediately parsing the |
| /// (first) gzip header. If the gzip stream contains multiple members all will |
| /// be decoded. |
| pub fn new(r: R) -> MultiGzDecoder<R> { |
| MultiGzDecoder(GzDecoder::new(r).multi(true)) |
| } |
| } |
| |
| impl<R> MultiGzDecoder<R> { |
| /// Returns the current header associated with this stream, if it's valid |
| pub fn header(&self) -> Option<&GzHeader> { |
| self.0.header() |
| } |
| |
| /// Acquires a reference to the underlying reader. |
| pub fn get_ref(&self) -> &R { |
| self.0.get_ref() |
| } |
| |
| /// Acquires a mutable reference to the underlying stream. |
| /// |
| /// Note that mutation of the stream may result in surprising results if |
| /// this decoder is continued to be used. |
| pub fn get_mut(&mut self) -> &mut R { |
| self.0.get_mut() |
| } |
| |
| /// Consumes this decoder, returning the underlying reader. |
| pub fn into_inner(self) -> R { |
| self.0.into_inner() |
| } |
| } |
| |
| impl<R: BufRead> Read for MultiGzDecoder<R> { |
| fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| self.0.read(into) |
| } |
| } |
| |
| #[cfg(test)] |
| mod test { |
| use crate::bufread::GzDecoder; |
| use crate::gz::write; |
| use crate::Compression; |
| use std::io::{Read, Write}; |
| |
| // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any |
| // additional data to be consumed by the caller. |
| #[test] |
| fn decode_extra_data() { |
| let expected = "Hello World"; |
| |
| let compressed = { |
| let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); |
| e.write(expected.as_ref()).unwrap(); |
| let mut b = e.finish().unwrap(); |
| b.push(b'x'); |
| b |
| }; |
| |
| let mut output = Vec::new(); |
| let mut decoder = GzDecoder::new(compressed.as_slice()); |
| let decoded_bytes = decoder.read_to_end(&mut output).unwrap(); |
| assert_eq!(decoded_bytes, output.len()); |
| let actual = std::str::from_utf8(&output).expect("String parsing error"); |
| assert_eq!( |
| actual, expected, |
| "after decompression we obtain the original input" |
| ); |
| |
| output.clear(); |
| assert_eq!( |
| decoder.read(&mut output).unwrap(), |
| 0, |
| "subsequent read of decoder returns 0, but inner reader can return additional data" |
| ); |
| let mut reader = decoder.into_inner(); |
| assert_eq!( |
| reader.read_to_end(&mut output).unwrap(), |
| 1, |
| "extra data is accessible in underlying buf-read" |
| ); |
| assert_eq!(output, b"x"); |
| } |
| } |