aboutsummaryrefslogtreecommitdiff
path: root/matroska/src/read.rs
diff options
context:
space:
mode:
Diffstat (limited to 'matroska/src/read.rs')
-rw-r--r--matroska/src/read.rs298
1 files changed, 0 insertions, 298 deletions
diff --git a/matroska/src/read.rs b/matroska/src/read.rs
deleted file mode 100644
index c3d06fa..0000000
--- a/matroska/src/read.rs
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- This file is part of jellything (https://codeberg.org/metamuffin/jellything)
- which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
- Copyright (C) 2025 metamuffin <metamuffin.org>
-*/
-use crate::{error::Error, matroska::MatroskaTag, size::EbmlSize, Master, Result};
-use log::{debug, warn};
-use std::{
- collections::VecDeque,
- io::{Read, Seek, SeekFrom},
-};
-
-trait ReadAndSeek: Read + Seek {}
-impl<T: Read + Seek> ReadAndSeek for T {}
-
-#[derive(Debug, Clone, Copy)]
-pub struct StackTag {
- end: Option<u64>,
- id: u64,
-}
-
-pub struct EbmlReader {
- inner: Box<dyn ReadAndSeek>,
- stack: Vec<StackTag>,
- queue: VecDeque<(Option<u64>, MatroskaTag)>,
- position: u64,
-}
-
-impl Read for EbmlReader {
- fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
- let r = self.inner.read(buf)?;
- self.position += r as u64;
- Ok(r)
- }
-}
-
-impl EbmlReader {
- pub fn new<T: Seek + Read + 'static>(inner: T) -> Self {
- Self {
- queue: VecDeque::new(),
- inner: Box::new(inner),
- stack: vec![],
- position: 0,
- }
- }
-
- #[inline]
- pub fn read_byte(&mut self) -> Result<u8> {
- let mut b = [0u8];
- self.inner.read_exact(&mut b).map_err(Error::Io)?;
- self.position += 1;
- Ok(b[0])
- }
-
- pub fn read_buf(&mut self, size: impl Into<usize>) -> Result<Vec<u8>> {
- let size = size.into();
- let mut b = vec![0u8; size];
- self.inner.read_exact(&mut b).map_err(Error::Io)?;
- self.position += size as u64;
- Ok(b)
- }
-
- pub fn read_vint_len(&mut self) -> Result<(u64, usize)> {
- let s = self.read_byte()?;
- let len = s.leading_zeros() + 1;
- if len > 8 {
- Err(Error::VarintTooLong)?
- }
- let mut value = s as u64;
- value -= 1 << (8 - len);
- for _ in 1..len {
- value <<= 8;
- value += self.read_byte()? as u64;
- }
- Ok((value, len as usize))
- }
-
- #[inline]
- pub fn read_vint(&mut self) -> Result<u64> {
- Ok(self.read_vint_len()?.0)
- }
-
- #[inline]
- pub fn read_utf8(&mut self, size: impl Into<usize>) -> Result<String> {
- let b = self.read_buf(size)?;
- String::from_utf8(b).map_err(|_| Error::InvalidUTF8)
- }
-
- #[inline]
- pub fn read_tag_id(&mut self) -> Result<u64> {
- let (value, len) = self.read_vint_len()?;
- Ok(value + (1 << (7 * len)))
- }
-
- #[inline]
- pub fn read_tag_size(&mut self) -> Result<EbmlSize> {
- Ok(EbmlSize::from_vint(self.read_vint_len()?))
- }
-
- /// reads *some* amount of tags from the stream and pushes it to the queue.
- pub fn read_stuff(&mut self) -> Result<()> {
- while let Some(e) = self.stack.last().copied() {
- if let Some(end) = e.end {
- if self.position >= end {
- if self.position != end {
- warn!("we missed the end by {} bytes", self.position - end)
- }
- self.stack.pop();
- self.queue
- .push_back((None, MatroskaTag::construct_master(e.id, Master::End)?));
- } else {
- break;
- }
- } else {
- break;
- }
- }
-
- let start_position = self.position;
- let id = self.read_tag_id()?;
- let size = self.read_tag_size()?;
- let is_master = MatroskaTag::is_master(id)?;
- let tag = if is_master {
- MatroskaTag::construct_master(id, Master::Start)?
- } else {
- let data = self.read_buf(size.some().unwrap())?;
- MatroskaTag::parse(id, &data)?
- };
-
- if let Some(path) = tag.path() {
- // we have slightly different rules for closing tags implicitly
- // this closes as many tags as needed to make the next tag a valid child
- while let Some(stag @ StackTag { end: None, .. }) = self.stack.last() {
- if path.last() == Some(&stag.id) {
- break;
- } else {
- let end =
- MatroskaTag::construct_master(self.stack.pop().unwrap().id, Master::End)?;
- self.queue.push_back((None, end));
- }
- }
- }
-
- if is_master {
- self.stack.push(StackTag {
- end: size.some().map(|s| s as u64 + self.position),
- id,
- });
- }
- self.queue.push_back((Some(start_position), tag));
- Ok(())
- }
-
- /// context should be the next expected tag, such that the stack can be derived from its path.
- pub fn seek(&mut self, position: u64, context: MatroskaTag) -> Result<()> {
- let path = context.path().ok_or(Error::GlobalTagsAsContext)?;
- debug!(
- "seeking to {position} with a context restored from path {:x?}",
- path
- );
- self.queue.clear();
- self.position = position;
- self.inner.seek(SeekFrom::Start(position))?;
- self.stack = path
- .iter()
- .map(|id| StackTag { id: *id, end: None })
- .collect();
- Ok(())
- }
-}
-
-impl Iterator for EbmlReader {
- type Item = Result<(Option<u64>, MatroskaTag)>;
- fn next(&mut self) -> Option<Self::Item> {
- if let Some(t) = self.queue.pop_front() {
- // match t {
- // MatroskaTag::SimpleBlock(_) | MatroskaTag::Block(_) => (),
- // _ => debug!("reader yield: {t:?}"),
- // };
- Some(Ok(t))
- } else {
- match self.read_stuff() {
- Ok(()) => self.next(),
- // in case we reached the end (error: failed to fill whole buffer),
- // return the rest in the queue and pop all items of the stack
- Err(e) => {
- // TODO this is horrible, should use a custom error enum instead
- if format!("{e}").as_str() == "failed to fill whole buffer" {
- match self.queue.pop_front() {
- Some(q) => Some(Ok(q)),
- None => match self.stack.pop() {
- Some(q) => Some(Ok((
- None,
- MatroskaTag::construct_master(q.id, Master::End).unwrap(),
- ))),
- None => Some(Err(e)),
- },
- }
- } else {
- Some(Err(e))
- }
- }
- }
- }
- }
-}
-
-pub trait ReadValue: Sized {
- fn from_buf(buf: &[u8]) -> Result<Self>;
-}
-
-impl ReadValue for u64 {
- fn from_buf(buf: &[u8]) -> Result<Self> {
- if buf.len() > 8 {
- Err(Error::InvalidTypeLen)?
- }
- let mut val = 0u64;
- for byte in buf {
- val <<= 8;
- val |= *byte as u64;
- }
- Ok(val)
- }
-}
-impl ReadValue for i64 {
- fn from_buf(buf: &[u8]) -> Result<Self> {
- if buf.len() > 8 {
- Err(Error::InvalidTypeLen)?
- }
- Ok(if buf[0] > 127 {
- if buf.len() == 8 {
- i64::from_be_bytes(buf.try_into().unwrap())
- } else {
- -((1 << (buf.len() * 8)) - (u64::from_buf(buf)? as i64))
- }
- } else {
- u64::from_buf(buf)? as i64
- })
- }
-}
-impl ReadValue for f64 {
- fn from_buf(buf: &[u8]) -> Result<Self> {
- Ok(if buf.len() == 4 {
- f32::from_be_bytes(buf.try_into().unwrap()) as f64
- } else if buf.len() == 8 {
- f64::from_be_bytes(buf.try_into().unwrap())
- } else {
- Err(Error::InvalidTypeLen)?
- })
- }
-}
-
-impl ReadValue for Vec<u8> {
- fn from_buf(buf: &[u8]) -> Result<Self> {
- Ok(buf.to_vec())
- }
-}
-impl ReadValue for String {
- fn from_buf(buf: &[u8]) -> Result<Self> {
- String::from_utf8(Vec::from(buf)).map_err(|_| Error::InvalidUTF8)
- }
-}
-impl ReadValue for Master {
- fn from_buf(_: &[u8]) -> Result<Self> {
- panic!("master shall not be read like this")
- }
-}
-
-pub trait ReadExt: Read {
- fn read_byte(&mut self) -> Result<u8>;
- fn read_vint_len(&mut self) -> Result<(u64, usize)>;
- fn read_vint(&mut self) -> Result<u64>;
-}
-impl<T: Read> ReadExt for T {
- fn read_byte(&mut self) -> Result<u8> {
- let mut b = [0u8];
- self.read_exact(&mut b).map_err(Error::Io)?;
- Ok(b[0])
- }
- fn read_vint_len(&mut self) -> Result<(u64, usize)> {
- let s = self.read_byte()?;
- let len = s.leading_zeros() + 1;
- if len > 8 {
- Err(Error::VarintTooLong)?
- }
- let mut value = s as u64;
- value -= 1 << (8 - len);
- for _ in 1..len {
- value <<= 8;
- value += self.read_byte()? as u64;
- }
- Ok((value, len as usize))
- }
- #[inline]
- fn read_vint(&mut self) -> Result<u64> {
- Ok(self.read_vint_len()?.0)
- }
-}