aboutsummaryrefslogtreecommitdiff
path: root/matroska/src/read.rs
diff options
context:
space:
mode:
Diffstat (limited to 'matroska/src/read.rs')
-rw-r--r--matroska/src/read.rs227
1 files changed, 227 insertions, 0 deletions
diff --git a/matroska/src/read.rs b/matroska/src/read.rs
new file mode 100644
index 0000000..95a98b5
--- /dev/null
+++ b/matroska/src/read.rs
@@ -0,0 +1,227 @@
+use crate::{matroska::MatroskaTag, size::EbmlSize, Master};
+use anyhow::{anyhow, bail, Result};
+use log::{debug, warn};
+use std::{
+ collections::VecDeque,
+ io::{Read, Seek, SeekFrom},
+};
+
+trait ReadAndSeek: Read + Seek {}
+impl<T: Read + Seek> ReadAndSeek for T {}
+
+#[derive(Debug, Clone, Copy)]
+pub struct StackTag {
+ end: Option<usize>,
+ id: u64,
+}
+
+pub struct EbmlReader {
+ inner: Box<dyn ReadAndSeek>,
+ stack: Vec<StackTag>,
+ queue: VecDeque<MatroskaTag>,
+ pub position: usize,
+}
+
+impl EbmlReader {
+ pub fn new<T: Seek + Read + 'static>(inner: T) -> Self {
+ Self {
+ queue: VecDeque::new(),
+ inner: Box::new(inner),
+ stack: vec![],
+ position: 0,
+ }
+ }
+
+ pub fn read_byte(&mut self) -> Result<u8> {
+ let mut b = [0u8];
+ self.inner.read_exact(&mut b)?;
+ self.position += 1;
+ Ok(b[0])
+ }
+ pub fn read_buf(&mut self, size: impl Into<usize>) -> Result<Vec<u8>> {
+ let size = size.into();
+ let mut b = vec![0u8; size];
+ self.inner.read_exact(&mut b)?;
+ self.position += size;
+ Ok(b)
+ }
+ pub fn read_vint_len(&mut self) -> Result<(u64, usize)> {
+ let s = self.read_byte()?;
+ let len = s.leading_zeros() + 1;
+ if len > 8 {
+ bail!("varint too long");
+ }
+ let mut value = s as u64;
+ value -= 1 << (8 - len);
+ for _ in 1..len {
+ value <<= 8;
+ value += self.read_byte()? as u64;
+ }
+ Ok((value, len as usize))
+ }
+ pub fn read_vint(&mut self) -> Result<u64> {
+ Ok(self.read_vint_len()?.0)
+ }
+ pub fn read_utf8(&mut self, size: impl Into<usize>) -> Result<String> {
+ let b = self.read_buf(size)?;
+ Ok(String::from_utf8(b)?)
+ }
+ pub fn read_tag_id(&mut self) -> Result<u64> {
+ let (value, len) = self.read_vint_len()?;
+ Ok(value + (1 << (7 * len)))
+ }
+ pub fn read_tag_size(&mut self) -> Result<EbmlSize> {
+ Ok(EbmlSize::from_vint(self.read_vint_len()?))
+ }
+ pub fn read_stuff(&mut self) -> Result<()> {
+ while let Some(e) = self.stack.last().map(|e| *e) {
+ if let Some(end) = e.end {
+ if self.position >= end {
+ if self.position != end {
+ warn!("we missed the end by {} bytes", self.position - end)
+ }
+ self.stack.pop();
+ self.queue
+ .push_back(MatroskaTag::construct_master(e.id, Master::End)?);
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+
+ let id = self.read_tag_id()?;
+ let size = self.read_tag_size()?;
+ let is_master = MatroskaTag::is_master(id)?;
+ let tag = if is_master {
+ MatroskaTag::construct_master(id, Master::Start)?
+ } else {
+ let data = self.read_buf(size.some().unwrap())?;
+ MatroskaTag::parse(id, &data)?
+ };
+
+ if let Some(path) = tag.path() {
+ // we have slightly different rules for closing tags implicitly
+ // this closes as many tags as needed to make the next tag a valid child
+ while let Some(tag @ StackTag { end: None, .. }) = self.stack.last() {
+ if path.last() == Some(&tag.id) {
+ break;
+ } else {
+ self.queue.push_back(MatroskaTag::construct_master(
+ self.stack.pop().unwrap().id,
+ Master::End,
+ )?);
+ }
+ }
+ }
+
+ if is_master {
+ self.stack.push(StackTag {
+ end: size.some().map(|s| s + self.position),
+ id,
+ });
+ }
+ self.queue.push_back(tag);
+ Ok(())
+ }
+
+ /// context should be the next expected tag, such that the stack can be derived from its path.
+ pub fn seek(&mut self, position: usize, context: MatroskaTag) -> Result<()> {
+ let path = context
+ .path()
+ .ok_or(anyhow!("global tags dont give context"))?;
+ debug!(
+ "seeking to {position} with a context restored from path {:x?}",
+ path
+ );
+ self.queue.clear();
+ self.position = position;
+ self.inner.seek(SeekFrom::Start(position as u64))?;
+ self.stack = path
+ .iter()
+ .map(|id| StackTag { id: *id, end: None })
+ .collect();
+ Ok(())
+ }
+}
+
+impl Iterator for EbmlReader {
+ type Item = Result<MatroskaTag>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if let Some(t) = self.queue.pop_front() {
+ // match t {
+ // MatroskaTag::SimpleBlock(_) | MatroskaTag::Block(_) => (),
+ // _ => debug!("reader yield: {t:?}"),
+ // };
+ Some(Ok(t))
+ } else {
+ match self.read_stuff() {
+ Ok(()) => self.next(),
+ Err(e) => Some(Err(e)),
+ }
+ }
+ }
+}
+
+pub trait ReadValue: Sized {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self>;
+}
+
+impl ReadValue for u64 {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
+ if buf.len() > 8 {
+ bail!("u64 too big")
+ }
+ let mut val = 0u64;
+ for byte in buf {
+ val <<= 8;
+ val |= *byte as u64;
+ }
+ Ok(val)
+ }
+}
+impl ReadValue for i64 {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
+ if buf.len() > 8 {
+ bail!("i64 too big")
+ }
+ Ok(if buf[0] > 127 {
+ if buf.len() == 8 {
+ i64::from_be_bytes(buf.try_into().unwrap())
+ } else {
+ -((1 << (buf.len() * 8)) - (u64::from_buf(buf)? as i64))
+ }
+ } else {
+ u64::from_buf(buf)? as i64
+ })
+ }
+}
+impl ReadValue for f64 {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
+ Ok(if buf.len() == 4 {
+ f32::from_be_bytes(buf.try_into().unwrap()) as f64
+ } else if buf.len() == 8 {
+ f64::from_be_bytes(buf.try_into().unwrap())
+ } else {
+ bail!("float is not 4 or 8 bytes long");
+ })
+ }
+}
+
+impl ReadValue for Vec<u8> {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
+ Ok(buf.to_vec())
+ }
+}
+impl ReadValue for String {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
+ Ok(String::from_utf8(Vec::from(buf))?)
+ }
+}
+impl ReadValue for Master {
+ fn from_buf(_: &[u8]) -> anyhow::Result<Self> {
+ panic!("master shall not be read like this")
+ }
+}