diff options
Diffstat (limited to 'test2/src/huffman.rs')
-rw-r--r-- | test2/src/huffman.rs | 181 |
1 files changed, 181 insertions, 0 deletions
diff --git a/test2/src/huffman.rs b/test2/src/huffman.rs new file mode 100644 index 0000000..40a6730 --- /dev/null +++ b/test2/src/huffman.rs @@ -0,0 +1,181 @@ +#[derive(Debug, Clone)] +enum HT { + Branch(Box<HT>, Box<HT>), + Terminal(u8), +} + +pub fn encode_huff(buf: &[u8]) -> Vec<u8> { + let mut w = BitIO::new(Vec::new()); + assert!(buf.len() <= 0xffffff, "huff frame too big"); + w.wbyte((buf.len() & 0xff) as u8); + w.wbyte(((buf.len() >> 8) & 0xff) as u8); + w.wbyte(((buf.len() >> 16) & 0xff) as u8); + + let mut probs = [0usize; 256]; + for b in buf { + probs[*b as usize] += 1; + } + let tree = HT::from_probabilities(probs); + let mut table = [0u32; 256]; + tree.create_lut(&mut table, 1); + tree.write(&mut w); + + for b in buf { + let mut k = table[*b as usize]; + while k != 1 { + w.wbit((k & 1) == 1); + k >>= 1; + } + } + + w.flush(); + w.buffer +} + +pub fn decode_huff(r: Vec<u8>) -> Vec<u8> { + let mut r = BitIO::new(r); + + let mut len = 0usize; + len |= r.rbyte() as usize; + len |= (r.rbyte() as usize) << 8; + len |= (r.rbyte() as usize) << 16; + + let root = HT::read(&mut r); + let root = match &root { + HT::Branch(a, b) => [a, b], + _ => panic!("no!"), + }; + + let mut cursor = root; + let mut buf = Vec::new(); + while buf.len() != len { + let v = r.rbit(); + match cursor[v as usize].as_ref() { + HT::Branch(a, b) => { + cursor = [a, b]; + } + HT::Terminal(n) => { + buf.push(*n); + cursor = root; + } + } + } + buf +} + +impl HT { + pub fn from_probabilities(ps: [usize; 256]) -> Self { + let mut parts = ps + .into_iter() + .enumerate() + .map(|(n, p)| (p, HT::Terminal(n as u8))) + .collect::<Vec<_>>(); + + while parts.len() != 1 { + parts.sort_by_key(|e| -(e.0 as isize)); + let ((ap, at), (bp, bt)) = (parts.pop().unwrap(), parts.pop().unwrap()); + parts.push((ap + bp + 1, HT::Branch(Box::new(at), Box::new(bt)))) + } + parts[0].1.clone() + } + pub fn create_lut(&self, table: &mut [u32; 256], mut prefix: u32) { + assert!(self.depth() < 30, "too deep! doesnt fit {}", self.depth()); + match self { + HT::Branch(a, b) => { + let pz = 32 - prefix.leading_zeros(); + prefix ^= 1 << pz; + prefix ^= 1 << (pz - 1); + a.create_lut(table, prefix); + prefix ^= 1 << (pz - 1); + b.create_lut(table, prefix); + } + HT::Terminal(n) => { + assert_eq!(table[*n as usize], 0); + table[*n as usize] = prefix + } + } + } + pub fn depth(&self) -> usize { + match self { + HT::Branch(a, b) => a.depth().max(b.depth()) + 1, + HT::Terminal(_) => 0, + } + } + pub fn write(&self, w: &mut BitIO) { + match self { + HT::Branch(a, b) => { + w.wbit(false); + a.write(w); + b.write(w); + } + HT::Terminal(n) => { + w.wbit(true); + w.wbyte(*n); + } + } + } + pub fn read(r: &mut BitIO) -> Self { + match r.rbit() { + false => Self::Branch(Box::new(Self::read(r)), Box::new(Self::read(r))), + true => Self::Terminal(r.rbyte()), + } + } +} + +pub struct BitIO { + buffer: Vec<u8>, + byte: u8, + position: usize, +} +impl BitIO { + pub fn new(inner: Vec<u8>) -> Self { + Self { + buffer: inner, + byte: 0, + position: 0, + } + } + #[inline] + pub fn wbit(&mut self, b: bool) { + self.byte <<= 1; + self.byte |= b as u8; + self.position += 1; + if self.position & 0b111 == 0 { + self.buffer.push(self.byte) + } + } + #[inline] + pub fn wbyte(&mut self, v: u8) { + for i in 0..8 { + self.wbit((v & (1 << i)) != 0); + } + } + + #[inline] + pub fn flush(&mut self) { + while self.position & 0b111 != 0 { + self.wbit(false); + } + } + + #[inline] + pub fn rbit(&mut self) -> bool { + if self.position & 0b111 == 0 { + self.byte = self.buffer[self.position >> 3]; + } + + let v = (self.byte & 0b10000000) != 0; + self.byte <<= 1; + self.position += 1; + v + } + + #[inline] + pub fn rbyte(&mut self) -> u8 { + let mut v = 0u8; + for i in 0..8 { + v |= (self.rbit() as u8) << i; + } + v + } +} |