blob: 1513c11c282f4286c139bde97ab694727b9bbedf [file] [log] [blame]
use twox_hash::XxHash32;
use super::Error;
use std::{
convert::TryInto,
fmt::Debug,
hash::Hasher,
io,
io::{Read, Write},
};
const FLG_RESERVED_MASK: u8 = 0b00000010;
const FLG_VERSION_MASK: u8 = 0b11000000;
const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000;
const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000;
const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000;
const FLG_CONTENT_SIZE: u8 = 0b00001000;
const FLG_CONTENT_CHECKSUM: u8 = 0b00000100;
const FLG_DICTIONARY_ID: u8 = 0b00000001;
const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK;
const BD_BLOCK_SIZE_MASK: u8 = 0b01110000;
const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4;
const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000;
const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204;
pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102;
const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive<u32> = 0x184D2A50..=0x184D2A5F;
pub(crate) const MAGIC_NUMBER_SIZE: usize = 4;
pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7;
pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19;
pub(crate) const BLOCK_INFO_SIZE: usize = 4;
#[derive(Clone, Copy, PartialEq, Debug)]
/// Different predefines blocksizes to choose when compressing data.
#[derive(Default)]
pub enum BlockSize {
/// Will detect optimal frame size based on the size of the first write call
#[default]
Auto = 0,
/// The default block size.
Max64KB = 4,
/// 256KB block size.
Max256KB = 5,
/// 1MB block size.
Max1MB = 6,
/// 4MB block size.
Max4MB = 7,
/// 8MB block size.
Max8MB = 8,
}
impl BlockSize {
/// Try to find optimal size based on passed buffer length.
pub(crate) fn from_buf_length(buf_len: usize) -> Self {
let mut blocksize = BlockSize::Max4MB;
for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] {
if buf_len > candidate.get_size() {
return blocksize;
}
blocksize = candidate;
}
BlockSize::Max64KB
}
pub(crate) fn get_size(&self) -> usize {
match self {
BlockSize::Auto => unreachable!(),
BlockSize::Max64KB => 64 * 1024,
BlockSize::Max256KB => 256 * 1024,
BlockSize::Max1MB => 1024 * 1024,
BlockSize::Max4MB => 4 * 1024 * 1024,
BlockSize::Max8MB => 8 * 1024 * 1024,
}
}
}
#[derive(Clone, Copy, PartialEq, Debug)]
/// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo]
#[derive(Default)]
pub enum BlockMode {
/// Every block is compressed independently. The default.
#[default]
Independent,
/// Blocks can reference data from previous blocks.
///
/// Effective when the stream contains small blocks.
Linked,
}
// From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
//
// General Structure of LZ4 Frame format
// -------------------------------------
//
// | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum |
// |:-------:|:-------------:| ----- | ----- | ------- | ----------- |
// | 4 bytes | 3-15 bytes | | | 4 bytes | 0-4 bytes |
//
// Frame Descriptor
// ----------------
//
// | FLG | BD | (Content Size) | (Dictionary ID) | HC |
// | ------- | ------- |:--------------:|:---------------:| ------- |
// | 1 byte | 1 byte | 0 - 8 bytes | 0 - 4 bytes | 1 byte |
//
// __FLG byte__
//
// | BitNb | 7-6 | 5 | 4 | 3 | 2 | 1 | 0 |
// | ------- |-------|-------|----------|------|----------|----------|------|
// |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID|
//
// __BD byte__
//
// | BitNb | 7 | 6-5-4 | 3-2-1-0 |
// | ------- | -------- | ------------- | -------- |
// |FieldName|*Reserved*| Block MaxSize |*Reserved*|
//
// Data Blocks
// -----------
//
// | Block Size | data | (Block Checksum) |
// |:----------:| ------ |:----------------:|
// | 4 bytes | | 0 - 4 bytes |
//
#[derive(Debug, Default, Clone)]
/// The metadata for de/compressing with lz4 frame format.
pub struct FrameInfo {
/// If set, includes the total uncompressed size of data in the frame.
pub content_size: Option<u64>,
/// The identifier for the dictionary that must be used to correctly decode data.
/// The compressor and the decompressor must use exactly the same dictionary.
///
/// Note that this is currently unsupported and for this reason it's not pub.
pub(crate) dict_id: Option<u32>,
/// The maximum uncompressed size of each data block.
pub block_size: BlockSize,
/// The block mode.
pub block_mode: BlockMode,
/// If set, includes a checksum for each data block in the frame.
pub block_checksums: bool,
/// If set, includes a content checksum to verify that the full frame contents have been
/// decoded correctly.
pub content_checksum: bool,
/// If set, use the legacy frame format
pub legacy_frame: bool,
}
impl FrameInfo {
/// Create a new `FrameInfo`.
pub fn new() -> Self {
Self::default()
}
/// Whether to include the total uncompressed size of data in the frame.
pub fn content_size(mut self, content_size: Option<u64>) -> Self {
self.content_size = content_size;
self
}
/// The maximum uncompressed size of each data block.
pub fn block_size(mut self, block_size: BlockSize) -> Self {
self.block_size = block_size;
self
}
/// The block mode.
pub fn block_mode(mut self, block_mode: BlockMode) -> Self {
self.block_mode = block_mode;
self
}
/// If set, includes a checksum for each data block in the frame.
pub fn block_checksums(mut self, block_checksums: bool) -> Self {
self.block_checksums = block_checksums;
self
}
/// If set, includes a content checksum to verify that the full frame contents have been
/// decoded correctly.
pub fn content_checksum(mut self, content_checksum: bool) -> Self {
self.content_checksum = content_checksum;
self
}
/// If set, use the legacy frame format.
pub fn legacy_frame(mut self, legacy_frame: bool) -> Self {
self.legacy_frame = legacy_frame;
self
}
pub(crate) fn read_size(input: &[u8]) -> Result<usize, Error> {
let mut required = MIN_FRAME_INFO_SIZE;
let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap());
if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
return Ok(MAGIC_NUMBER_SIZE);
}
if input.len() < required {
return Ok(required);
}
if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
return Ok(8);
}
if magic_num != LZ4F_MAGIC_NUMBER {
return Err(Error::WrongMagicNumber);
}
if input[4] & FLG_CONTENT_SIZE != 0 {
required += 8;
}
if input[4] & FLG_DICTIONARY_ID != 0 {
required += 4
}
Ok(required)
}
pub(crate) fn write_size(&self) -> usize {
let mut required = MIN_FRAME_INFO_SIZE;
if self.content_size.is_some() {
required += 8;
}
if self.dict_id.is_some() {
required += 4;
}
required
}
pub(crate) fn write(&self, output: &mut [u8]) -> Result<usize, Error> {
let write_size = self.write_size();
if output.len() < write_size {
return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into()));
}
let mut buffer = [0u8; MAX_FRAME_INFO_SIZE];
assert!(write_size <= buffer.len());
buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes());
buffer[4] = FLG_SUPPORTED_VERSION_BITS;
if self.block_checksums {
buffer[4] |= FLG_BLOCK_CHECKSUMS;
}
if self.content_checksum {
buffer[4] |= FLG_CONTENT_CHECKSUM;
}
if self.block_mode == BlockMode::Independent {
buffer[4] |= FLG_INDEPENDENT_BLOCKS;
}
buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT;
// Optional section
let mut offset = 6;
if let Some(size) = self.content_size {
buffer[4] |= FLG_CONTENT_SIZE;
buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes());
offset += 8;
}
if let Some(dict_id) = self.dict_id {
buffer[4] |= FLG_DICTIONARY_ID;
buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes());
offset += 4;
}
// Header checksum
let mut hasher = XxHash32::with_seed(0);
hasher.write(&buffer[4..offset]);
let header_checksum = (hasher.finish() >> 8) as u8;
buffer[offset] = header_checksum;
offset += 1;
debug_assert_eq!(offset, write_size);
output[..write_size].copy_from_slice(&buffer[..write_size]);
Ok(write_size)
}
pub(crate) fn read(mut input: &[u8]) -> Result<FrameInfo, Error> {
let original_input = input;
// 4 byte Magic
let magic_num = {
let mut buffer = [0u8; 4];
input.read_exact(&mut buffer)?;
u32::from_le_bytes(buffer)
};
if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
return Ok(FrameInfo {
block_size: BlockSize::Max8MB,
legacy_frame: true,
..FrameInfo::default()
});
}
if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
let mut buffer = [0u8; 4];
input.read_exact(&mut buffer)?;
let user_data_len = u32::from_le_bytes(buffer);
return Err(Error::SkippableFrame(user_data_len));
}
if magic_num != LZ4F_MAGIC_NUMBER {
return Err(Error::WrongMagicNumber);
}
// fixed size section
let [flg_byte, bd_byte] = {
let mut buffer = [0u8, 0];
input.read_exact(&mut buffer)?;
buffer
};
if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS {
// version is always 01
return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK));
}
if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 {
return Err(Error::ReservedBitsSet);
}
let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 {
BlockMode::Independent
} else {
BlockMode::Linked
};
let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0;
let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0;
let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT {
i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)),
4 => BlockSize::Max64KB,
5 => BlockSize::Max256KB,
6 => BlockSize::Max1MB,
7 => BlockSize::Max4MB,
_ => unreachable!(),
};
// var len section
let mut content_size = None;
if flg_byte & FLG_CONTENT_SIZE != 0 {
let mut buffer = [0u8; 8];
input.read_exact(&mut buffer).unwrap();
content_size = Some(u64::from_le_bytes(buffer));
}
let mut dict_id = None;
if flg_byte & FLG_DICTIONARY_ID != 0 {
let mut buffer = [0u8; 4];
input.read_exact(&mut buffer)?;
dict_id = Some(u32::from_le_bytes(buffer));
}
// 1 byte header checksum
let expected_checksum = {
let mut buffer = [0u8; 1];
input.read_exact(&mut buffer)?;
buffer[0]
};
let mut hasher = XxHash32::with_seed(0);
hasher.write(&original_input[4..original_input.len() - input.len() - 1]);
let header_hash = (hasher.finish() >> 8) as u8;
if header_hash != expected_checksum {
return Err(Error::HeaderChecksumError);
}
Ok(FrameInfo {
content_size,
dict_id,
block_size,
block_mode,
block_checksums,
content_checksum,
legacy_frame: false,
})
}
}
#[derive(Debug)]
pub(crate) enum BlockInfo {
Compressed(u32),
Uncompressed(u32),
EndMark,
}
impl BlockInfo {
pub(crate) fn read(mut input: &[u8]) -> Result<Self, Error> {
let mut size_buffer = [0u8; 4];
input.read_exact(&mut size_buffer)?;
let size = u32::from_le_bytes(size_buffer);
if size == 0 {
Ok(BlockInfo::EndMark)
} else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 {
Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT))
} else {
Ok(BlockInfo::Compressed(size))
}
}
pub(crate) fn write(&self, mut output: &mut [u8]) -> Result<usize, Error> {
let value = match self {
BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo),
BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len)
if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 =>
{
return Err(Error::InvalidBlockInfo)
}
BlockInfo::Compressed(len) => *len,
BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT,
BlockInfo::EndMark => 0,
};
output.write_all(&value.to_le_bytes())?;
Ok(4)
}
}