blob: e95e9e60b7d7fd3e6aa4cd8dbffbad3f6d70354d [file] [log] [blame]
use super::super::blocks::sequence_section::ModeType;
use super::super::blocks::sequence_section::Sequence;
use super::super::blocks::sequence_section::SequencesHeader;
use super::bit_reader_reverse::{BitReaderReversed, GetBitsError};
use super::scratch::FSEScratch;
use crate::fse::{FSEDecoder, FSEDecoderError, FSETableError};
use alloc::vec::Vec;
#[derive(Debug, derive_more::Display, derive_more::From)]
#[cfg_attr(feature = "std", derive(derive_more::Error))]
#[non_exhaustive]
pub enum DecodeSequenceError {
#[display(fmt = "{_0:?}")]
#[from]
GetBitsError(GetBitsError),
#[display(fmt = "{_0:?}")]
#[from]
FSEDecoderError(FSEDecoderError),
#[display(fmt = "{_0:?}")]
#[from]
FSETableError(FSETableError),
#[display(
fmt = "Padding at the end of the sequence_section was more than a byte long: {skipped_bits} bits. Probably caused by data corruption"
)]
ExtraPadding { skipped_bits: i32 },
#[display(fmt = "Do not support offsets bigger than 1<<32; got: {offset_code}")]
UnsupportedOffset { offset_code: u8 },
#[display(fmt = "Read an offset == 0. That is an illegal value for offsets")]
ZeroOffset,
#[display(fmt = "Bytestream did not contain enough bytes to decode num_sequences")]
NotEnoughBytesForNumSequences,
#[display(fmt = "Did not use full bitstream. Bits left: {bits_remaining} ({} bytes)", bits_remaining / 8)]
ExtraBits { bits_remaining: isize },
#[display(fmt = "compression modes are none but they must be set to something")]
MissingCompressionMode,
#[display(fmt = "Need a byte to read for RLE ll table")]
MissingByteForRleLlTable,
#[display(fmt = "Need a byte to read for RLE of table")]
MissingByteForRleOfTable,
#[display(fmt = "Need a byte to read for RLE ml table")]
MissingByteForRleMlTable,
}
pub fn decode_sequences(
section: &SequencesHeader,
source: &[u8],
scratch: &mut FSEScratch,
target: &mut Vec<Sequence>,
) -> Result<(), DecodeSequenceError> {
let bytes_read = maybe_update_fse_tables(section, source, scratch)?;
vprintln!("Updating tables used {} bytes", bytes_read);
let bit_stream = &source[bytes_read..];
let mut br = BitReaderReversed::new(bit_stream);
//skip the 0 padding at the end of the last byte of the bit stream and throw away the first 1 found
let mut skipped_bits = 0;
loop {
let val = br.get_bits(1)?;
skipped_bits += 1;
if val == 1 || skipped_bits > 8 {
break;
}
}
if skipped_bits > 8 {
//if more than 7 bits are 0, this is not the correct end of the bitstream. Either a bug or corrupted data
return Err(DecodeSequenceError::ExtraPadding { skipped_bits });
}
if scratch.ll_rle.is_some() || scratch.ml_rle.is_some() || scratch.of_rle.is_some() {
decode_sequences_with_rle(section, &mut br, scratch, target)
} else {
decode_sequences_without_rle(section, &mut br, scratch, target)
}
}
fn decode_sequences_with_rle(
section: &SequencesHeader,
br: &mut BitReaderReversed<'_>,
scratch: &FSEScratch,
target: &mut Vec<Sequence>,
) -> Result<(), DecodeSequenceError> {
let mut ll_dec = FSEDecoder::new(&scratch.literal_lengths);
let mut ml_dec = FSEDecoder::new(&scratch.match_lengths);
let mut of_dec = FSEDecoder::new(&scratch.offsets);
if scratch.ll_rle.is_none() {
ll_dec.init_state(br)?;
}
if scratch.of_rle.is_none() {
of_dec.init_state(br)?;
}
if scratch.ml_rle.is_none() {
ml_dec.init_state(br)?;
}
target.clear();
target.reserve(section.num_sequences as usize);
for _seq_idx in 0..section.num_sequences {
//get the codes from either the RLE byte or from the decoder
let ll_code = if scratch.ll_rle.is_some() {
scratch.ll_rle.unwrap()
} else {
ll_dec.decode_symbol()
};
let ml_code = if scratch.ml_rle.is_some() {
scratch.ml_rle.unwrap()
} else {
ml_dec.decode_symbol()
};
let of_code = if scratch.of_rle.is_some() {
scratch.of_rle.unwrap()
} else {
of_dec.decode_symbol()
};
let (ll_value, ll_num_bits) = lookup_ll_code(ll_code);
let (ml_value, ml_num_bits) = lookup_ml_code(ml_code);
//println!("Sequence: {}", i);
//println!("of stat: {}", of_dec.state);
//println!("of Code: {}", of_code);
//println!("ll stat: {}", ll_dec.state);
//println!("ll bits: {}", ll_num_bits);
//println!("ll Code: {}", ll_value);
//println!("ml stat: {}", ml_dec.state);
//println!("ml bits: {}", ml_num_bits);
//println!("ml Code: {}", ml_value);
//println!("");
if of_code >= 32 {
return Err(DecodeSequenceError::UnsupportedOffset {
offset_code: of_code,
});
}
let (obits, ml_add, ll_add) = br.get_bits_triple(of_code, ml_num_bits, ll_num_bits)?;
let offset = obits as u32 + (1u32 << of_code);
if offset == 0 {
return Err(DecodeSequenceError::ZeroOffset);
}
target.push(Sequence {
ll: ll_value + ll_add as u32,
ml: ml_value + ml_add as u32,
of: offset,
});
if target.len() < section.num_sequences as usize {
//println!(
// "Bits left: {} ({} bytes)",
// br.bits_remaining(),
// br.bits_remaining() / 8,
//);
if scratch.ll_rle.is_none() {
ll_dec.update_state(br)?;
}
if scratch.ml_rle.is_none() {
ml_dec.update_state(br)?;
}
if scratch.of_rle.is_none() {
of_dec.update_state(br)?;
}
}
if br.bits_remaining() < 0 {
return Err(DecodeSequenceError::NotEnoughBytesForNumSequences);
}
}
if br.bits_remaining() > 0 {
Err(DecodeSequenceError::ExtraBits {
bits_remaining: br.bits_remaining(),
})
} else {
Ok(())
}
}
fn decode_sequences_without_rle(
section: &SequencesHeader,
br: &mut BitReaderReversed<'_>,
scratch: &FSEScratch,
target: &mut Vec<Sequence>,
) -> Result<(), DecodeSequenceError> {
let mut ll_dec = FSEDecoder::new(&scratch.literal_lengths);
let mut ml_dec = FSEDecoder::new(&scratch.match_lengths);
let mut of_dec = FSEDecoder::new(&scratch.offsets);
ll_dec.init_state(br)?;
of_dec.init_state(br)?;
ml_dec.init_state(br)?;
target.clear();
target.reserve(section.num_sequences as usize);
for _seq_idx in 0..section.num_sequences {
let ll_code = ll_dec.decode_symbol();
let ml_code = ml_dec.decode_symbol();
let of_code = of_dec.decode_symbol();
let (ll_value, ll_num_bits) = lookup_ll_code(ll_code);
let (ml_value, ml_num_bits) = lookup_ml_code(ml_code);
if of_code >= 32 {
return Err(DecodeSequenceError::UnsupportedOffset {
offset_code: of_code,
});
}
let (obits, ml_add, ll_add) = br.get_bits_triple(of_code, ml_num_bits, ll_num_bits)?;
let offset = obits as u32 + (1u32 << of_code);
if offset == 0 {
return Err(DecodeSequenceError::ZeroOffset);
}
target.push(Sequence {
ll: ll_value + ll_add as u32,
ml: ml_value + ml_add as u32,
of: offset,
});
if target.len() < section.num_sequences as usize {
//println!(
// "Bits left: {} ({} bytes)",
// br.bits_remaining(),
// br.bits_remaining() / 8,
//);
ll_dec.update_state(br)?;
ml_dec.update_state(br)?;
of_dec.update_state(br)?;
}
if br.bits_remaining() < 0 {
return Err(DecodeSequenceError::NotEnoughBytesForNumSequences);
}
}
if br.bits_remaining() > 0 {
Err(DecodeSequenceError::ExtraBits {
bits_remaining: br.bits_remaining(),
})
} else {
Ok(())
}
}
fn lookup_ll_code(code: u8) -> (u32, u8) {
match code {
0..=15 => (u32::from(code), 0),
16 => (16, 1),
17 => (18, 1),
18 => (20, 1),
19 => (22, 1),
20 => (24, 2),
21 => (28, 2),
22 => (32, 3),
23 => (40, 3),
24 => (48, 4),
25 => (64, 6),
26 => (128, 7),
27 => (256, 8),
28 => (512, 9),
29 => (1024, 10),
30 => (2048, 11),
31 => (4096, 12),
32 => (8192, 13),
33 => (16384, 14),
34 => (32768, 15),
35 => (65536, 16),
_ => (0, 255),
}
}
fn lookup_ml_code(code: u8) -> (u32, u8) {
match code {
0..=31 => (u32::from(code) + 3, 0),
32 => (35, 1),
33 => (37, 1),
34 => (39, 1),
35 => (41, 1),
36 => (43, 2),
37 => (47, 2),
38 => (51, 3),
39 => (59, 3),
40 => (67, 4),
41 => (83, 4),
42 => (99, 5),
43 => (131, 7),
44 => (259, 8),
45 => (515, 9),
46 => (1027, 10),
47 => (2051, 11),
48 => (4099, 12),
49 => (8195, 13),
50 => (16387, 14),
51 => (32771, 15),
52 => (65539, 16),
_ => (0, 255),
}
}
pub const LL_MAX_LOG: u8 = 9;
pub const ML_MAX_LOG: u8 = 9;
pub const OF_MAX_LOG: u8 = 8;
fn maybe_update_fse_tables(
section: &SequencesHeader,
source: &[u8],
scratch: &mut FSEScratch,
) -> Result<usize, DecodeSequenceError> {
let modes = section
.modes
.ok_or(DecodeSequenceError::MissingCompressionMode)?;
let mut bytes_read = 0;
match modes.ll_mode() {
ModeType::FSECompressed => {
let bytes = scratch.literal_lengths.build_decoder(source, LL_MAX_LOG)?;
bytes_read += bytes;
vprintln!("Updating ll table");
vprintln!("Used bytes: {}", bytes);
scratch.ll_rle = None;
}
ModeType::RLE => {
vprintln!("Use RLE ll table");
if source.is_empty() {
return Err(DecodeSequenceError::MissingByteForRleLlTable);
}
bytes_read += 1;
scratch.ll_rle = Some(source[0]);
}
ModeType::Predefined => {
vprintln!("Use predefined ll table");
scratch.literal_lengths.build_from_probabilities(
LL_DEFAULT_ACC_LOG,
&Vec::from(&LITERALS_LENGTH_DEFAULT_DISTRIBUTION[..]),
)?;
scratch.ll_rle = None;
}
ModeType::Repeat => {
vprintln!("Repeat ll table");
/* Nothing to do */
}
};
let of_source = &source[bytes_read..];
match modes.of_mode() {
ModeType::FSECompressed => {
let bytes = scratch.offsets.build_decoder(of_source, OF_MAX_LOG)?;
vprintln!("Updating of table");
vprintln!("Used bytes: {}", bytes);
bytes_read += bytes;
scratch.of_rle = None;
}
ModeType::RLE => {
vprintln!("Use RLE of table");
if of_source.is_empty() {
return Err(DecodeSequenceError::MissingByteForRleOfTable);
}
bytes_read += 1;
scratch.of_rle = Some(of_source[0]);
}
ModeType::Predefined => {
vprintln!("Use predefined of table");
scratch.offsets.build_from_probabilities(
OF_DEFAULT_ACC_LOG,
&Vec::from(&OFFSET_DEFAULT_DISTRIBUTION[..]),
)?;
scratch.of_rle = None;
}
ModeType::Repeat => {
vprintln!("Repeat of table");
/* Nothing to do */
}
};
let ml_source = &source[bytes_read..];
match modes.ml_mode() {
ModeType::FSECompressed => {
let bytes = scratch.match_lengths.build_decoder(ml_source, ML_MAX_LOG)?;
bytes_read += bytes;
vprintln!("Updating ml table");
vprintln!("Used bytes: {}", bytes);
scratch.ml_rle = None;
}
ModeType::RLE => {
vprintln!("Use RLE ml table");
if ml_source.is_empty() {
return Err(DecodeSequenceError::MissingByteForRleMlTable);
}
bytes_read += 1;
scratch.ml_rle = Some(ml_source[0]);
}
ModeType::Predefined => {
vprintln!("Use predefined ml table");
scratch.match_lengths.build_from_probabilities(
ML_DEFAULT_ACC_LOG,
&Vec::from(&MATCH_LENGTH_DEFAULT_DISTRIBUTION[..]),
)?;
scratch.ml_rle = None;
}
ModeType::Repeat => {
vprintln!("Repeat ml table");
/* Nothing to do */
}
};
Ok(bytes_read)
}
const LL_DEFAULT_ACC_LOG: u8 = 6;
const LITERALS_LENGTH_DEFAULT_DISTRIBUTION: [i32; 36] = [
4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
-1, -1, -1, -1,
];
const ML_DEFAULT_ACC_LOG: u8 = 6;
const MATCH_LENGTH_DEFAULT_DISTRIBUTION: [i32; 53] = [
1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1,
];
const OF_DEFAULT_ACC_LOG: u8 = 5;
const OFFSET_DEFAULT_DISTRIBUTION: [i32; 29] = [
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
];
#[test]
fn test_ll_default() {
let mut table = crate::fse::FSETable::new();
table
.build_from_probabilities(
LL_DEFAULT_ACC_LOG,
&Vec::from(&LITERALS_LENGTH_DEFAULT_DISTRIBUTION[..]),
)
.unwrap();
#[cfg(feature = "std")]
for idx in 0..table.decode.len() {
std::println!(
"{:3}: {:3} {:3} {:3}",
idx,
table.decode[idx].symbol,
table.decode[idx].num_bits,
table.decode[idx].base_line
);
}
assert!(table.decode.len() == 64);
//just test a few values. TODO test all values
assert!(table.decode[0].symbol == 0);
assert!(table.decode[0].num_bits == 4);
assert!(table.decode[0].base_line == 0);
assert!(table.decode[19].symbol == 27);
assert!(table.decode[19].num_bits == 6);
assert!(table.decode[19].base_line == 0);
assert!(table.decode[39].symbol == 25);
assert!(table.decode[39].num_bits == 4);
assert!(table.decode[39].base_line == 16);
assert!(table.decode[60].symbol == 35);
assert!(table.decode[60].num_bits == 6);
assert!(table.decode[60].base_line == 0);
assert!(table.decode[59].symbol == 24);
assert!(table.decode[59].num_bits == 5);
assert!(table.decode[59].base_line == 32);
}