diff options
Diffstat (limited to 'matroska/src/read.rs')
-rw-r--r-- | matroska/src/read.rs | 298 |
1 files changed, 0 insertions, 298 deletions
diff --git a/matroska/src/read.rs b/matroska/src/read.rs deleted file mode 100644 index c3d06fa..0000000 --- a/matroska/src/read.rs +++ /dev/null @@ -1,298 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use crate::{error::Error, matroska::MatroskaTag, size::EbmlSize, Master, Result}; -use log::{debug, warn}; -use std::{ - collections::VecDeque, - io::{Read, Seek, SeekFrom}, -}; - -trait ReadAndSeek: Read + Seek {} -impl<T: Read + Seek> ReadAndSeek for T {} - -#[derive(Debug, Clone, Copy)] -pub struct StackTag { - end: Option<u64>, - id: u64, -} - -pub struct EbmlReader { - inner: Box<dyn ReadAndSeek>, - stack: Vec<StackTag>, - queue: VecDeque<(Option<u64>, MatroskaTag)>, - position: u64, -} - -impl Read for EbmlReader { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> { - let r = self.inner.read(buf)?; - self.position += r as u64; - Ok(r) - } -} - -impl EbmlReader { - pub fn new<T: Seek + Read + 'static>(inner: T) -> Self { - Self { - queue: VecDeque::new(), - inner: Box::new(inner), - stack: vec![], - position: 0, - } - } - - #[inline] - pub fn read_byte(&mut self) -> Result<u8> { - let mut b = [0u8]; - self.inner.read_exact(&mut b).map_err(Error::Io)?; - self.position += 1; - Ok(b[0]) - } - - pub fn read_buf(&mut self, size: impl Into<usize>) -> Result<Vec<u8>> { - let size = size.into(); - let mut b = vec![0u8; size]; - self.inner.read_exact(&mut b).map_err(Error::Io)?; - self.position += size as u64; - Ok(b) - } - - pub fn read_vint_len(&mut self) -> Result<(u64, usize)> { - let s = self.read_byte()?; - let len = s.leading_zeros() + 1; - if len > 8 { - Err(Error::VarintTooLong)? - } - let mut value = s as u64; - value -= 1 << (8 - len); - for _ in 1..len { - value <<= 8; - value += self.read_byte()? as u64; - } - Ok((value, len as usize)) - } - - #[inline] - pub fn read_vint(&mut self) -> Result<u64> { - Ok(self.read_vint_len()?.0) - } - - #[inline] - pub fn read_utf8(&mut self, size: impl Into<usize>) -> Result<String> { - let b = self.read_buf(size)?; - String::from_utf8(b).map_err(|_| Error::InvalidUTF8) - } - - #[inline] - pub fn read_tag_id(&mut self) -> Result<u64> { - let (value, len) = self.read_vint_len()?; - Ok(value + (1 << (7 * len))) - } - - #[inline] - pub fn read_tag_size(&mut self) -> Result<EbmlSize> { - Ok(EbmlSize::from_vint(self.read_vint_len()?)) - } - - /// reads *some* amount of tags from the stream and pushes it to the queue. - pub fn read_stuff(&mut self) -> Result<()> { - while let Some(e) = self.stack.last().copied() { - if let Some(end) = e.end { - if self.position >= end { - if self.position != end { - warn!("we missed the end by {} bytes", self.position - end) - } - self.stack.pop(); - self.queue - .push_back((None, MatroskaTag::construct_master(e.id, Master::End)?)); - } else { - break; - } - } else { - break; - } - } - - let start_position = self.position; - let id = self.read_tag_id()?; - let size = self.read_tag_size()?; - let is_master = MatroskaTag::is_master(id)?; - let tag = if is_master { - MatroskaTag::construct_master(id, Master::Start)? - } else { - let data = self.read_buf(size.some().unwrap())?; - MatroskaTag::parse(id, &data)? - }; - - if let Some(path) = tag.path() { - // we have slightly different rules for closing tags implicitly - // this closes as many tags as needed to make the next tag a valid child - while let Some(stag @ StackTag { end: None, .. }) = self.stack.last() { - if path.last() == Some(&stag.id) { - break; - } else { - let end = - MatroskaTag::construct_master(self.stack.pop().unwrap().id, Master::End)?; - self.queue.push_back((None, end)); - } - } - } - - if is_master { - self.stack.push(StackTag { - end: size.some().map(|s| s as u64 + self.position), - id, - }); - } - self.queue.push_back((Some(start_position), tag)); - Ok(()) - } - - /// context should be the next expected tag, such that the stack can be derived from its path. - pub fn seek(&mut self, position: u64, context: MatroskaTag) -> Result<()> { - let path = context.path().ok_or(Error::GlobalTagsAsContext)?; - debug!( - "seeking to {position} with a context restored from path {:x?}", - path - ); - self.queue.clear(); - self.position = position; - self.inner.seek(SeekFrom::Start(position))?; - self.stack = path - .iter() - .map(|id| StackTag { id: *id, end: None }) - .collect(); - Ok(()) - } -} - -impl Iterator for EbmlReader { - type Item = Result<(Option<u64>, MatroskaTag)>; - fn next(&mut self) -> Option<Self::Item> { - if let Some(t) = self.queue.pop_front() { - // match t { - // MatroskaTag::SimpleBlock(_) | MatroskaTag::Block(_) => (), - // _ => debug!("reader yield: {t:?}"), - // }; - Some(Ok(t)) - } else { - match self.read_stuff() { - Ok(()) => self.next(), - // in case we reached the end (error: failed to fill whole buffer), - // return the rest in the queue and pop all items of the stack - Err(e) => { - // TODO this is horrible, should use a custom error enum instead - if format!("{e}").as_str() == "failed to fill whole buffer" { - match self.queue.pop_front() { - Some(q) => Some(Ok(q)), - None => match self.stack.pop() { - Some(q) => Some(Ok(( - None, - MatroskaTag::construct_master(q.id, Master::End).unwrap(), - ))), - None => Some(Err(e)), - }, - } - } else { - Some(Err(e)) - } - } - } - } - } -} - -pub trait ReadValue: Sized { - fn from_buf(buf: &[u8]) -> Result<Self>; -} - -impl ReadValue for u64 { - fn from_buf(buf: &[u8]) -> Result<Self> { - if buf.len() > 8 { - Err(Error::InvalidTypeLen)? - } - let mut val = 0u64; - for byte in buf { - val <<= 8; - val |= *byte as u64; - } - Ok(val) - } -} -impl ReadValue for i64 { - fn from_buf(buf: &[u8]) -> Result<Self> { - if buf.len() > 8 { - Err(Error::InvalidTypeLen)? - } - Ok(if buf[0] > 127 { - if buf.len() == 8 { - i64::from_be_bytes(buf.try_into().unwrap()) - } else { - -((1 << (buf.len() * 8)) - (u64::from_buf(buf)? as i64)) - } - } else { - u64::from_buf(buf)? as i64 - }) - } -} -impl ReadValue for f64 { - fn from_buf(buf: &[u8]) -> Result<Self> { - Ok(if buf.len() == 4 { - f32::from_be_bytes(buf.try_into().unwrap()) as f64 - } else if buf.len() == 8 { - f64::from_be_bytes(buf.try_into().unwrap()) - } else { - Err(Error::InvalidTypeLen)? - }) - } -} - -impl ReadValue for Vec<u8> { - fn from_buf(buf: &[u8]) -> Result<Self> { - Ok(buf.to_vec()) - } -} -impl ReadValue for String { - fn from_buf(buf: &[u8]) -> Result<Self> { - String::from_utf8(Vec::from(buf)).map_err(|_| Error::InvalidUTF8) - } -} -impl ReadValue for Master { - fn from_buf(_: &[u8]) -> Result<Self> { - panic!("master shall not be read like this") - } -} - -pub trait ReadExt: Read { - fn read_byte(&mut self) -> Result<u8>; - fn read_vint_len(&mut self) -> Result<(u64, usize)>; - fn read_vint(&mut self) -> Result<u64>; -} -impl<T: Read> ReadExt for T { - fn read_byte(&mut self) -> Result<u8> { - let mut b = [0u8]; - self.read_exact(&mut b).map_err(Error::Io)?; - Ok(b[0]) - } - fn read_vint_len(&mut self) -> Result<(u64, usize)> { - let s = self.read_byte()?; - let len = s.leading_zeros() + 1; - if len > 8 { - Err(Error::VarintTooLong)? - } - let mut value = s as u64; - value -= 1 << (8 - len); - for _ in 1..len { - value <<= 8; - value += self.read_byte()? as u64; - } - Ok((value, len as usize)) - } - #[inline] - fn read_vint(&mut self) -> Result<u64> { - Ok(self.read_vint_len()?.0) - } -} |