diff options
Diffstat (limited to 'matroska/src/read.rs')
-rw-r--r-- | matroska/src/read.rs | 227 |
1 files changed, 227 insertions, 0 deletions
diff --git a/matroska/src/read.rs b/matroska/src/read.rs new file mode 100644 index 0000000..95a98b5 --- /dev/null +++ b/matroska/src/read.rs @@ -0,0 +1,227 @@ +use crate::{matroska::MatroskaTag, size::EbmlSize, Master}; +use anyhow::{anyhow, bail, Result}; +use log::{debug, warn}; +use std::{ + collections::VecDeque, + io::{Read, Seek, SeekFrom}, +}; + +trait ReadAndSeek: Read + Seek {} +impl<T: Read + Seek> ReadAndSeek for T {} + +#[derive(Debug, Clone, Copy)] +pub struct StackTag { + end: Option<usize>, + id: u64, +} + +pub struct EbmlReader { + inner: Box<dyn ReadAndSeek>, + stack: Vec<StackTag>, + queue: VecDeque<MatroskaTag>, + pub position: usize, +} + +impl EbmlReader { + pub fn new<T: Seek + Read + 'static>(inner: T) -> Self { + Self { + queue: VecDeque::new(), + inner: Box::new(inner), + stack: vec![], + position: 0, + } + } + + pub fn read_byte(&mut self) -> Result<u8> { + let mut b = [0u8]; + self.inner.read_exact(&mut b)?; + self.position += 1; + Ok(b[0]) + } + pub fn read_buf(&mut self, size: impl Into<usize>) -> Result<Vec<u8>> { + let size = size.into(); + let mut b = vec![0u8; size]; + self.inner.read_exact(&mut b)?; + self.position += size; + Ok(b) + } + pub fn read_vint_len(&mut self) -> Result<(u64, usize)> { + let s = self.read_byte()?; + let len = s.leading_zeros() + 1; + if len > 8 { + bail!("varint too long"); + } + let mut value = s as u64; + value -= 1 << (8 - len); + for _ in 1..len { + value <<= 8; + value += self.read_byte()? as u64; + } + Ok((value, len as usize)) + } + pub fn read_vint(&mut self) -> Result<u64> { + Ok(self.read_vint_len()?.0) + } + pub fn read_utf8(&mut self, size: impl Into<usize>) -> Result<String> { + let b = self.read_buf(size)?; + Ok(String::from_utf8(b)?) + } + pub fn read_tag_id(&mut self) -> Result<u64> { + let (value, len) = self.read_vint_len()?; + Ok(value + (1 << (7 * len))) + } + pub fn read_tag_size(&mut self) -> Result<EbmlSize> { + Ok(EbmlSize::from_vint(self.read_vint_len()?)) + } + pub fn read_stuff(&mut self) -> Result<()> { + while let Some(e) = self.stack.last().map(|e| *e) { + if let Some(end) = e.end { + if self.position >= end { + if self.position != end { + warn!("we missed the end by {} bytes", self.position - end) + } + self.stack.pop(); + self.queue + .push_back(MatroskaTag::construct_master(e.id, Master::End)?); + } else { + break; + } + } else { + break; + } + } + + let id = self.read_tag_id()?; + let size = self.read_tag_size()?; + let is_master = MatroskaTag::is_master(id)?; + let tag = if is_master { + MatroskaTag::construct_master(id, Master::Start)? + } else { + let data = self.read_buf(size.some().unwrap())?; + MatroskaTag::parse(id, &data)? + }; + + if let Some(path) = tag.path() { + // we have slightly different rules for closing tags implicitly + // this closes as many tags as needed to make the next tag a valid child + while let Some(tag @ StackTag { end: None, .. }) = self.stack.last() { + if path.last() == Some(&tag.id) { + break; + } else { + self.queue.push_back(MatroskaTag::construct_master( + self.stack.pop().unwrap().id, + Master::End, + )?); + } + } + } + + if is_master { + self.stack.push(StackTag { + end: size.some().map(|s| s + self.position), + id, + }); + } + self.queue.push_back(tag); + Ok(()) + } + + /// context should be the next expected tag, such that the stack can be derived from its path. + pub fn seek(&mut self, position: usize, context: MatroskaTag) -> Result<()> { + let path = context + .path() + .ok_or(anyhow!("global tags dont give context"))?; + debug!( + "seeking to {position} with a context restored from path {:x?}", + path + ); + self.queue.clear(); + self.position = position; + self.inner.seek(SeekFrom::Start(position as u64))?; + self.stack = path + .iter() + .map(|id| StackTag { id: *id, end: None }) + .collect(); + Ok(()) + } +} + +impl Iterator for EbmlReader { + type Item = Result<MatroskaTag>; + + fn next(&mut self) -> Option<Self::Item> { + if let Some(t) = self.queue.pop_front() { + // match t { + // MatroskaTag::SimpleBlock(_) | MatroskaTag::Block(_) => (), + // _ => debug!("reader yield: {t:?}"), + // }; + Some(Ok(t)) + } else { + match self.read_stuff() { + Ok(()) => self.next(), + Err(e) => Some(Err(e)), + } + } + } +} + +pub trait ReadValue: Sized { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self>; +} + +impl ReadValue for u64 { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { + if buf.len() > 8 { + bail!("u64 too big") + } + let mut val = 0u64; + for byte in buf { + val <<= 8; + val |= *byte as u64; + } + Ok(val) + } +} +impl ReadValue for i64 { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { + if buf.len() > 8 { + bail!("i64 too big") + } + Ok(if buf[0] > 127 { + if buf.len() == 8 { + i64::from_be_bytes(buf.try_into().unwrap()) + } else { + -((1 << (buf.len() * 8)) - (u64::from_buf(buf)? as i64)) + } + } else { + u64::from_buf(buf)? as i64 + }) + } +} +impl ReadValue for f64 { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { + Ok(if buf.len() == 4 { + f32::from_be_bytes(buf.try_into().unwrap()) as f64 + } else if buf.len() == 8 { + f64::from_be_bytes(buf.try_into().unwrap()) + } else { + bail!("float is not 4 or 8 bytes long"); + }) + } +} + +impl ReadValue for Vec<u8> { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { + Ok(buf.to_vec()) + } +} +impl ReadValue for String { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { + Ok(String::from_utf8(Vec::from(buf))?) + } +} +impl ReadValue for Master { + fn from_buf(_: &[u8]) -> anyhow::Result<Self> { + panic!("master shall not be read like this") + } +} |