vendor/libflate/src/huffman.rs - toolchain/rustc - Git at Google

 use std::cmp;
 /// Length-limited Huffman Codes
 ///
 use std::io;

 use bit;

 const MAX_BITWIDTH: u8 = 15;

 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Code {
     pub width: u8,
     pub bits: u16,
 }
 impl Code {
     pub fn new(width: u8, bits: u16) -> Self {
         debug_assert!(width <= MAX_BITWIDTH);
         Code { width, bits }
     }
     fn inverse_endian(&self) -> Self {
         let mut f = self.bits;
         let mut t = 0;
         for _ in 0..self.width {
             t <<= 1;
             t |= f & 1;
             f >>= 1;
         }
         Code::new(self.width, t)
     }
 }

 pub trait Builder: Sized {
     type Instance;
     fn set_mapping(&mut self, symbol: u16, code: Code) -> io::Result<()>;
     fn finish(self) -> Self::Instance;
     fn restore_canonical_huffman_codes(mut self, bitwidthes: &[u8]) -> io::Result<Self::Instance> {
         debug_assert!(!bitwidthes.is_empty());

         let mut symbols = bitwidthes
             .iter()
             .enumerate()
             .filter(|&(_, &code_bitwidth)| code_bitwidth > 0)
             .map(|(symbol, &code_bitwidth)| (symbol as u16, code_bitwidth))
             .collect::<Vec<_>>();
         symbols.sort_by_key(|x| x.1);

         let mut code = 0;
         let mut prev_width = 0;
         for (symbol, bitwidth) in symbols {
             code <<= bitwidth - prev_width;
             self.set_mapping(symbol, Code::new(bitwidth, code))?;
             code += 1;
             prev_width = bitwidth;
         }
         Ok(self.finish())
     }
 }

 pub struct DecoderBuilder {
     table: Vec<u16>,
     eob_symbol: Option<u16>,
     eob_bitwidth: u8,
     max_bitwidth: u8,
 }
 impl DecoderBuilder {
     pub fn new(max_bitwidth: u8, eob_symbol: Option<u16>) -> Self {
         debug_assert!(max_bitwidth <= MAX_BITWIDTH);
         DecoderBuilder {
             table: vec![u16::from(MAX_BITWIDTH) + 1; 1 << max_bitwidth],
             eob_symbol,
             eob_bitwidth: max_bitwidth,
             max_bitwidth,
         }
     }
     pub fn from_bitwidthes(bitwidthes: &[u8], eob_symbol: Option<u16>) -> io::Result<Decoder> {
         let builder = Self::new(bitwidthes.iter().cloned().max().unwrap_or(0), eob_symbol);
         builder.restore_canonical_huffman_codes(bitwidthes)
     }
 }
 impl Builder for DecoderBuilder {
     type Instance = Decoder;
     fn set_mapping(&mut self, symbol: u16, code: Code) -> io::Result<()> {
         debug_assert!(code.width <= self.max_bitwidth);
         if Some(symbol) == self.eob_symbol {
             self.eob_bitwidth = code.width;
         }

         // `bitwidth` encoded `to` value
         let value = (symbol << 5) | u16::from(code.width);

         // Sets the mapping to all possible indices
         let code_be = code.inverse_endian();
         for padding in 0..(1 << (self.max_bitwidth - code.width)) {
             let i = ((padding << code.width) | code_be.bits) as usize;
             if self.table[i] != u16::from(MAX_BITWIDTH) + 1 {
                 let message = format!(
                     "Bit region conflict: i={}, old_value={}, new_value={}, symbol={}, code={:?}",
                     i, self.table[i], value, symbol, code
                 );
                 return Err(io::Error::new(io::ErrorKind::InvalidData, message));
             }
             self.table[i] = value;
         }
         Ok(())
     }
     fn finish(self) -> Self::Instance {
         Decoder {
             table: self.table,
             eob_bitwidth: self.eob_bitwidth,
             max_bitwidth: self.max_bitwidth,
         }
     }
 }

 #[derive(Debug)]
 pub struct Decoder {
     table: Vec<u16>,
     eob_bitwidth: u8,
     max_bitwidth: u8,
 }
 impl Decoder {
     #[inline(always)]
     pub fn decode<R>(&self, reader: &mut bit::BitReader<R>) -> io::Result<u16>
     where
         R: io::Read,
     {
         let v = self.decode_unchecked(reader);
         reader.check_last_error()?;
         Ok(v)
     }

     #[inline(always)]
     pub fn decode_unchecked<R>(&self, reader: &mut bit::BitReader<R>) -> u16
     where
         R: io::Read,
     {
         let code = reader.peek_bits_unchecked(self.eob_bitwidth);
         let mut value = self.table[code as usize];
         let mut bitwidth = (value & 0b1_1111) as u8;
         if bitwidth > self.eob_bitwidth {
             let code = reader.peek_bits_unchecked(self.max_bitwidth);
             value = self.table[code as usize];
             bitwidth = (value & 0b1_1111) as u8;
             if bitwidth > self.max_bitwidth {
                 reader.set_last_error(invalid_data_error!("Invalid huffman coded stream"));
             }
         }
         reader.skip_bits(bitwidth as u8);
         value >> 5
     }
 }

 #[derive(Debug)]
 pub struct EncoderBuilder {
     table: Vec<Code>,
 }
 impl EncoderBuilder {
     pub fn new(symbol_count: usize) -> Self {
         EncoderBuilder {
             table: vec![Code::new(0, 0); symbol_count],
         }
     }
     pub fn from_bitwidthes(bitwidthes: &[u8]) -> io::Result<Encoder> {
         let symbol_count = bitwidthes
             .iter()
             .enumerate()
             .filter(|e| *e.1 > 0)
             .last()
             .map_or(0, |e| e.0)
             + 1;
         let builder = Self::new(symbol_count);
         builder.restore_canonical_huffman_codes(bitwidthes)
     }
     pub fn from_frequencies(symbol_frequencies: &[usize], max_bitwidth: u8) -> io::Result<Encoder> {
         let max_bitwidth = cmp::min(
             max_bitwidth,
             ordinary_huffman_codes::calc_optimal_max_bitwidth(symbol_frequencies),
         );
         let code_bitwidthes = length_limited_huffman_codes::calc(max_bitwidth, symbol_frequencies);
         Self::from_bitwidthes(&code_bitwidthes)
     }
 }
 impl Builder for EncoderBuilder {
     type Instance = Encoder;
     fn set_mapping(&mut self, symbol: u16, code: Code) -> io::Result<()> {
         debug_assert_eq!(self.table[symbol as usize], Code::new(0, 0));
         self.table[symbol as usize] = code.inverse_endian();
         Ok(())
     }
     fn finish(self) -> Self::Instance {
         Encoder { table: self.table }
     }
 }

 #[derive(Debug, Clone)]
 pub struct Encoder {
     table: Vec<Code>,
 }
 impl Encoder {
     #[inline(always)]
     pub fn encode<W>(&self, writer: &mut bit::BitWriter<W>, symbol: u16) -> io::Result<()>
     where
         W: io::Write,
     {
         let code = self.lookup(symbol);
         debug_assert_ne!(code, Code::new(0, 0));
         writer.write_bits(code.width, code.bits)
     }
     #[inline(always)]
     pub fn lookup(&self, symbol: u16) -> Code {
         debug_assert!(
             symbol < self.table.len() as u16,
             "symbol:{}, table:{}",
             symbol,
             self.table.len()
         );
         self.table[symbol as usize].clone()
     }
     pub fn used_max_symbol(&self) -> Option<u16> {
         self.table
             .iter()
             .rev()
             .position(|x| x.width > 0)
             .map(|trailing_zeros| (self.table.len() - 1 - trailing_zeros) as u16)
     }
 }

 #[allow(dead_code)]
 mod ordinary_huffman_codes {
     use std::cmp;
     use std::collections::BinaryHeap;

     pub fn calc_optimal_max_bitwidth(frequencies: &[usize]) -> u8 {
         let mut heap = BinaryHeap::new();
         for &freq in frequencies.iter().filter(|&&f| f > 0) {
             let weight = -(freq as isize);
             heap.push((weight, 0 as u8));
         }
         while heap.len() > 1 {
             let (weight1, width1) = heap.pop().unwrap();
             let (weight2, width2) = heap.pop().unwrap();
             heap.push((weight1 + weight2, 1 + cmp::max(width1, width2)));
         }
         let max_bitwidth = heap.pop().map_or(0, |x| x.1);
         cmp::max(1, max_bitwidth)
     }
 }
 mod length_limited_huffman_codes {
     use std::mem;

     #[derive(Debug, Clone)]
     struct Node {
         symbols: Vec<u16>,
         weight: usize,
     }
     impl Node {
         pub fn empty() -> Self {
             Node {
                 symbols: vec![],
                 weight: 0,
             }
         }
         pub fn single(symbol: u16, weight: usize) -> Self {
             Node {
                 symbols: vec![symbol],
                 weight,
             }
         }
         pub fn merge(&mut self, other: Self) {
             self.weight += other.weight;
             self.symbols.extend(other.symbols);
         }
     }

     /// Reference: [A Fast Algorithm for Optimal Length-Limited Huffman Codes][LenLimHuff.pdf]
     ///
     /// [LenLimHuff.pdf]: https://www.ics.uci.edu/~dan/pubs/LenLimHuff.pdf
     pub fn calc(max_bitwidth: u8, frequencies: &[usize]) -> Vec<u8> {
         // NOTE: unoptimized implementation
         let mut source = frequencies
             .iter()
             .enumerate()
             .filter(|&(_, &f)| f > 0)
             .map(|(symbol, &weight)| Node::single(symbol as u16, weight))
             .collect::<Vec<_>>();
         source.sort_by_key(|o| o.weight);

         let weighted =
             (0..max_bitwidth - 1).fold(source.clone(), |w, _| merge(package(w), source.clone()));

         let mut code_bitwidthes = vec![0; frequencies.len()];
         for symbol in package(weighted)
             .into_iter()
             .flat_map(|n| n.symbols.into_iter())
         {
             code_bitwidthes[symbol as usize] += 1;
         }
         code_bitwidthes
     }
     fn merge(x: Vec<Node>, y: Vec<Node>) -> Vec<Node> {
         let mut z = Vec::with_capacity(x.len() + y.len());
         let mut x = x.into_iter().peekable();
         let mut y = y.into_iter().peekable();
         loop {
             let x_weight = x.peek().map(|s| s.weight);
             let y_weight = y.peek().map(|s| s.weight);
             if x_weight.is_none() {
                 z.extend(y);
                 break;
             } else if y_weight.is_none() {
                 z.extend(x);
                 break;
             } else if x_weight < y_weight {
                 z.push(x.next().unwrap());
             } else {
                 z.push(y.next().unwrap());
             }
         }
         z
     }
     fn package(mut nodes: Vec<Node>) -> Vec<Node> {
         if nodes.len() >= 2 {
             let new_len = nodes.len() / 2;

             for i in 0..new_len {
                 nodes[i] = mem::replace(&mut nodes[i * 2], Node::empty());
                 let other = mem::replace(&mut nodes[i * 2 + 1], Node::empty());
                 nodes[i].merge(other);
             }
             nodes.truncate(new_len);
         }
         nodes
     }
 }

 #[cfg(test)]
 mod test {
     #[test]
     fn it_works() {}
 }
	use std::cmp;
	/// Length-limited Huffman Codes
	///
	use std::io;

	use bit;

	const MAX_BITWIDTH: u8 = 15;

	#[derive(Debug, Clone, PartialEq, Eq)]
	pub struct Code {
	pub width: u8,
	pub bits: u16,
	}
	impl Code {
	pub fn new(width: u8, bits: u16) -> Self {
	debug_assert!(width <= MAX_BITWIDTH);
	Code { width, bits }
	}
	fn inverse_endian(&self) -> Self {
	let mut f = self.bits;
	let mut t = 0;
	for _ in 0..self.width {
	t <<= 1;
	t \|= f & 1;
	f >>= 1;
	}
	Code::new(self.width, t)
	}
	}

	pub trait Builder: Sized {
	type Instance;
	fn set_mapping(&mut self, symbol: u16, code: Code) -> io::Result<()>;
	fn finish(self) -> Self::Instance;
	fn restore_canonical_huffman_codes(mut self, bitwidthes: &[u8]) -> io::Result<Self::Instance> {
	debug_assert!(!bitwidthes.is_empty());

	let mut symbols = bitwidthes
	.iter()
	.enumerate()
	.filter(\|&(_, &code_bitwidth)\| code_bitwidth > 0)
	.map(\|(symbol, &code_bitwidth)\| (symbol as u16, code_bitwidth))
	.collect::<Vec<_>>();
	symbols.sort_by_key(\|x\| x.1);

	let mut code = 0;
	let mut prev_width = 0;
	for (symbol, bitwidth) in symbols {
	code <<= bitwidth - prev_width;
	self.set_mapping(symbol, Code::new(bitwidth, code))?;
	code += 1;
	prev_width = bitwidth;
	}
	Ok(self.finish())
	}
	}

	pub struct DecoderBuilder {
	table: Vec<u16>,
	eob_symbol: Option<u16>,
	eob_bitwidth: u8,
	max_bitwidth: u8,
	}
	impl DecoderBuilder {
	pub fn new(max_bitwidth: u8, eob_symbol: Option<u16>) -> Self {
	debug_assert!(max_bitwidth <= MAX_BITWIDTH);
	DecoderBuilder {
	table: vec![u16::from(MAX_BITWIDTH) + 1; 1 << max_bitwidth],
	eob_symbol,
	eob_bitwidth: max_bitwidth,
	max_bitwidth,
	}
	}
	pub fn from_bitwidthes(bitwidthes: &[u8], eob_symbol: Option<u16>) -> io::Result<Decoder> {
	let builder = Self::new(bitwidthes.iter().cloned().max().unwrap_or(0), eob_symbol);
	builder.restore_canonical_huffman_codes(bitwidthes)
	}
	}
	impl Builder for DecoderBuilder {
	type Instance = Decoder;
	fn set_mapping(&mut self, symbol: u16, code: Code) -> io::Result<()> {
	debug_assert!(code.width <= self.max_bitwidth);
	if Some(symbol) == self.eob_symbol {
	self.eob_bitwidth = code.width;
	}

	// `bitwidth` encoded `to` value
	let value = (symbol << 5) \| u16::from(code.width);

	// Sets the mapping to all possible indices
	let code_be = code.inverse_endian();
	for padding in 0..(1 << (self.max_bitwidth - code.width)) {
	let i = ((padding << code.width) \| code_be.bits) as usize;
	if self.table[i] != u16::from(MAX_BITWIDTH) + 1 {
	let message = format!(
	"Bit region conflict: i={}, old_value={}, new_value={}, symbol={}, code={:?}",
	i, self.table[i], value, symbol, code
	);
	return Err(io::Error::new(io::ErrorKind::InvalidData, message));
	}
	self.table[i] = value;
	}
	Ok(())
	}
	fn finish(self) -> Self::Instance {
	Decoder {
	table: self.table,
	eob_bitwidth: self.eob_bitwidth,
	max_bitwidth: self.max_bitwidth,
	}
	}
	}

	#[derive(Debug)]
	pub struct Decoder {
	table: Vec<u16>,
	eob_bitwidth: u8,
	max_bitwidth: u8,
	}
	impl Decoder {
	#[inline(always)]
	pub fn decode<R>(&self, reader: &mut bit::BitReader<R>) -> io::Result<u16>
	where
	R: io::Read,
	{
	let v = self.decode_unchecked(reader);
	reader.check_last_error()?;
	Ok(v)
	}

	#[inline(always)]
	pub fn decode_unchecked<R>(&self, reader: &mut bit::BitReader<R>) -> u16
	where
	R: io::Read,
	{
	let code = reader.peek_bits_unchecked(self.eob_bitwidth);
	let mut value = self.table[code as usize];
	let mut bitwidth = (value & 0b1_1111) as u8;
	if bitwidth > self.eob_bitwidth {
	let code = reader.peek_bits_unchecked(self.max_bitwidth);
	value = self.table[code as usize];
	bitwidth = (value & 0b1_1111) as u8;
	if bitwidth > self.max_bitwidth {
	reader.set_last_error(invalid_data_error!("Invalid huffman coded stream"));
	}
	}
	reader.skip_bits(bitwidth as u8);
	value >> 5
	}
	}

	#[derive(Debug)]
	pub struct EncoderBuilder {
	table: Vec<Code>,
	}
	impl EncoderBuilder {
	pub fn new(symbol_count: usize) -> Self {
	EncoderBuilder {
	table: vec![Code::new(0, 0); symbol_count],
	}
	}
	pub fn from_bitwidthes(bitwidthes: &[u8]) -> io::Result<Encoder> {
	let symbol_count = bitwidthes
	.iter()
	.enumerate()
	.filter(\|e\| *e.1 > 0)
	.last()
	.map_or(0, \|e\| e.0)
	+ 1;
	let builder = Self::new(symbol_count);
	builder.restore_canonical_huffman_codes(bitwidthes)
	}
	pub fn from_frequencies(symbol_frequencies: &[usize], max_bitwidth: u8) -> io::Result<Encoder> {
	let max_bitwidth = cmp::min(
	max_bitwidth,
	ordinary_huffman_codes::calc_optimal_max_bitwidth(symbol_frequencies),
	);
	let code_bitwidthes = length_limited_huffman_codes::calc(max_bitwidth, symbol_frequencies);
	Self::from_bitwidthes(&code_bitwidthes)
	}
	}
	impl Builder for EncoderBuilder {
	type Instance = Encoder;
	fn set_mapping(&mut self, symbol: u16, code: Code) -> io::Result<()> {
	debug_assert_eq!(self.table[symbol as usize], Code::new(0, 0));
	self.table[symbol as usize] = code.inverse_endian();
	Ok(())
	}
	fn finish(self) -> Self::Instance {
	Encoder { table: self.table }
	}
	}

	#[derive(Debug, Clone)]
	pub struct Encoder {
	table: Vec<Code>,
	}
	impl Encoder {
	#[inline(always)]
	pub fn encode<W>(&self, writer: &mut bit::BitWriter<W>, symbol: u16) -> io::Result<()>
	where
	W: io::Write,
	{
	let code = self.lookup(symbol);
	debug_assert_ne!(code, Code::new(0, 0));
	writer.write_bits(code.width, code.bits)
	}
	#[inline(always)]
	pub fn lookup(&self, symbol: u16) -> Code {
	debug_assert!(
	symbol < self.table.len() as u16,
	"symbol:{}, table:{}",
	symbol,
	self.table.len()
	);
	self.table[symbol as usize].clone()
	}
	pub fn used_max_symbol(&self) -> Option<u16> {
	self.table
	.iter()
	.rev()
	.position(\|x\| x.width > 0)
	.map(\|trailing_zeros\| (self.table.len() - 1 - trailing_zeros) as u16)
	}
	}

	#[allow(dead_code)]
	mod ordinary_huffman_codes {
	use std::cmp;
	use std::collections::BinaryHeap;

	pub fn calc_optimal_max_bitwidth(frequencies: &[usize]) -> u8 {
	let mut heap = BinaryHeap::new();
	for &freq in frequencies.iter().filter(\|&&f\| f > 0) {
	let weight = -(freq as isize);
	heap.push((weight, 0 as u8));
	}
	while heap.len() > 1 {
	let (weight1, width1) = heap.pop().unwrap();
	let (weight2, width2) = heap.pop().unwrap();
	heap.push((weight1 + weight2, 1 + cmp::max(width1, width2)));
	}
	let max_bitwidth = heap.pop().map_or(0, \|x\| x.1);
	cmp::max(1, max_bitwidth)
	}
	}
	mod length_limited_huffman_codes {
	use std::mem;

	#[derive(Debug, Clone)]
	struct Node {
	symbols: Vec<u16>,
	weight: usize,
	}
	impl Node {
	pub fn empty() -> Self {
	Node {
	symbols: vec![],
	weight: 0,
	}
	}
	pub fn single(symbol: u16, weight: usize) -> Self {
	Node {
	symbols: vec![symbol],
	weight,
	}
	}
	pub fn merge(&mut self, other: Self) {
	self.weight += other.weight;
	self.symbols.extend(other.symbols);
	}
	}

	/// Reference: [A Fast Algorithm for Optimal Length-Limited Huffman Codes][LenLimHuff.pdf]
	///
	/// [LenLimHuff.pdf]: https://www.ics.uci.edu/~dan/pubs/LenLimHuff.pdf
	pub fn calc(max_bitwidth: u8, frequencies: &[usize]) -> Vec<u8> {
	// NOTE: unoptimized implementation
	let mut source = frequencies
	.iter()
	.enumerate()
	.filter(\|&(_, &f)\| f > 0)
	.map(\|(symbol, &weight)\| Node::single(symbol as u16, weight))
	.collect::<Vec<_>>();
	source.sort_by_key(\|o\| o.weight);

	let weighted =
	(0..max_bitwidth - 1).fold(source.clone(), \|w, _\| merge(package(w), source.clone()));

	let mut code_bitwidthes = vec![0; frequencies.len()];
	for symbol in package(weighted)
	.into_iter()
	.flat_map(\|n\| n.symbols.into_iter())
	{
	code_bitwidthes[symbol as usize] += 1;
	}
	code_bitwidthes
	}
	fn merge(x: Vec<Node>, y: Vec<Node>) -> Vec<Node> {
	let mut z = Vec::with_capacity(x.len() + y.len());
	let mut x = x.into_iter().peekable();
	let mut y = y.into_iter().peekable();
	loop {
	let x_weight = x.peek().map(\|s\| s.weight);
	let y_weight = y.peek().map(\|s\| s.weight);
	if x_weight.is_none() {
	z.extend(y);
	break;
	} else if y_weight.is_none() {
	z.extend(x);
	break;
	} else if x_weight < y_weight {
	z.push(x.next().unwrap());
	} else {
	z.push(y.next().unwrap());
	}
	}
	z
	}
	fn package(mut nodes: Vec<Node>) -> Vec<Node> {
	if nodes.len() >= 2 {
	let new_len = nodes.len() / 2;

	for i in 0..new_len {
	nodes[i] = mem::replace(&mut nodes[i * 2], Node::empty());
	let other = mem::replace(&mut nodes[i * 2 + 1], Node::empty());
	nodes[i].merge(other);
	}
	nodes.truncate(new_len);
	}
	nodes
	}
	}

	#[cfg(test)]
	mod test {
	#[test]
	fn it_works() {}
	}