/* This file is part of jellything (https://codeberg.org/metamuffin/jellything) which is licensed under the GNU Affero General Public License (version 3); see /COPYING. Copyright (C) 2025 metamuffin */ use crate::{error::Error, matroska::MatroskaTag, size::EbmlSize, Master, Result}; use log::{debug, warn}; use std::{ collections::VecDeque, io::{Read, Seek, SeekFrom}, }; trait ReadAndSeek: Read + Seek {} impl ReadAndSeek for T {} #[derive(Debug, Clone, Copy)] pub struct StackTag { end: Option, id: u64, } pub struct EbmlReader { inner: Box, stack: Vec, queue: VecDeque<(Option, MatroskaTag)>, position: u64, } impl Read for EbmlReader { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let r = self.inner.read(buf)?; self.position += r as u64; Ok(r) } } impl EbmlReader { pub fn new(inner: T) -> Self { Self { queue: VecDeque::new(), inner: Box::new(inner), stack: vec![], position: 0, } } #[inline] pub fn read_byte(&mut self) -> Result { let mut b = [0u8]; self.inner.read_exact(&mut b).map_err(Error::Io)?; self.position += 1; Ok(b[0]) } pub fn read_buf(&mut self, size: impl Into) -> Result> { let size = size.into(); let mut b = vec![0u8; size]; self.inner.read_exact(&mut b).map_err(Error::Io)?; self.position += size as u64; Ok(b) } pub fn read_vint_len(&mut self) -> Result<(u64, usize)> { let s = self.read_byte()?; let len = s.leading_zeros() + 1; if len > 8 { Err(Error::VarintTooLong)? } let mut value = s as u64; value -= 1 << (8 - len); for _ in 1..len { value <<= 8; value += self.read_byte()? as u64; } Ok((value, len as usize)) } #[inline] pub fn read_vint(&mut self) -> Result { Ok(self.read_vint_len()?.0) } #[inline] pub fn read_utf8(&mut self, size: impl Into) -> Result { let b = self.read_buf(size)?; String::from_utf8(b).map_err(|_| Error::InvalidUTF8) } #[inline] pub fn read_tag_id(&mut self) -> Result { let (value, len) = self.read_vint_len()?; Ok(value + (1 << (7 * len))) } #[inline] pub fn read_tag_size(&mut self) -> Result { Ok(EbmlSize::from_vint(self.read_vint_len()?)) } /// reads *some* amount of tags from the stream and pushes it to the queue. pub fn read_stuff(&mut self) -> Result<()> { while let Some(e) = self.stack.last().copied() { if let Some(end) = e.end { if self.position >= end { if self.position != end { warn!("we missed the end by {} bytes", self.position - end) } self.stack.pop(); self.queue .push_back((None, MatroskaTag::construct_master(e.id, Master::End)?)); } else { break; } } else { break; } } let start_position = self.position; let id = self.read_tag_id()?; let size = self.read_tag_size()?; let is_master = MatroskaTag::is_master(id)?; let tag = if is_master { MatroskaTag::construct_master(id, Master::Start)? } else { let data = self.read_buf(size.some().unwrap())?; MatroskaTag::parse(id, &data)? }; if let Some(path) = tag.path() { // we have slightly different rules for closing tags implicitly // this closes as many tags as needed to make the next tag a valid child while let Some(stag @ StackTag { end: None, .. }) = self.stack.last() { if path.last() == Some(&stag.id) { break; } else { let end = MatroskaTag::construct_master(self.stack.pop().unwrap().id, Master::End)?; self.queue.push_back((None, end)); } } } if is_master { self.stack.push(StackTag { end: size.some().map(|s| s as u64 + self.position), id, }); } self.queue.push_back((Some(start_position), tag)); Ok(()) } /// context should be the next expected tag, such that the stack can be derived from its path. pub fn seek(&mut self, position: u64, context: MatroskaTag) -> Result<()> { let path = context.path().ok_or(Error::GlobalTagsAsContext)?; debug!( "seeking to {position} with a context restored from path {:x?}", path ); self.queue.clear(); self.position = position; self.inner.seek(SeekFrom::Start(position))?; self.stack = path .iter() .map(|id| StackTag { id: *id, end: None }) .collect(); Ok(()) } } impl Iterator for EbmlReader { type Item = Result<(Option, MatroskaTag)>; fn next(&mut self) -> Option { if let Some(t) = self.queue.pop_front() { // match t { // MatroskaTag::SimpleBlock(_) | MatroskaTag::Block(_) => (), // _ => debug!("reader yield: {t:?}"), // }; Some(Ok(t)) } else { match self.read_stuff() { Ok(()) => self.next(), // in case we reached the end (error: failed to fill whole buffer), // return the rest in the queue and pop all items of the stack Err(e) => { // TODO this is horrible, should use a custom error enum instead if format!("{e}").as_str() == "failed to fill whole buffer" { match self.queue.pop_front() { Some(q) => Some(Ok(q)), None => match self.stack.pop() { Some(q) => Some(Ok(( None, MatroskaTag::construct_master(q.id, Master::End).unwrap(), ))), None => Some(Err(e)), }, } } else { Some(Err(e)) } } } } } } pub trait ReadValue: Sized { fn from_buf(buf: &[u8]) -> Result; } impl ReadValue for u64 { fn from_buf(buf: &[u8]) -> Result { if buf.len() > 8 { Err(Error::InvalidTypeLen)? } let mut val = 0u64; for byte in buf { val <<= 8; val |= *byte as u64; } Ok(val) } } impl ReadValue for i64 { fn from_buf(buf: &[u8]) -> Result { if buf.len() > 8 { Err(Error::InvalidTypeLen)? } Ok(if buf[0] > 127 { if buf.len() == 8 { i64::from_be_bytes(buf.try_into().unwrap()) } else { -((1 << (buf.len() * 8)) - (u64::from_buf(buf)? as i64)) } } else { u64::from_buf(buf)? as i64 }) } } impl ReadValue for f64 { fn from_buf(buf: &[u8]) -> Result { Ok(if buf.len() == 4 { f32::from_be_bytes(buf.try_into().unwrap()) as f64 } else if buf.len() == 8 { f64::from_be_bytes(buf.try_into().unwrap()) } else { Err(Error::InvalidTypeLen)? }) } } impl ReadValue for Vec { fn from_buf(buf: &[u8]) -> Result { Ok(buf.to_vec()) } } impl ReadValue for String { fn from_buf(buf: &[u8]) -> Result { String::from_utf8(Vec::from(buf)).map_err(|_| Error::InvalidUTF8) } } impl ReadValue for Master { fn from_buf(_: &[u8]) -> Result { panic!("master shall not be read like this") } } pub trait ReadExt: Read { fn read_byte(&mut self) -> Result; fn read_vint_len(&mut self) -> Result<(u64, usize)>; fn read_vint(&mut self) -> Result; } impl ReadExt for T { fn read_byte(&mut self) -> Result { let mut b = [0u8]; self.read_exact(&mut b).map_err(Error::Io)?; Ok(b[0]) } fn read_vint_len(&mut self) -> Result<(u64, usize)> { let s = self.read_byte()?; let len = s.leading_zeros() + 1; if len > 8 { Err(Error::VarintTooLong)? } let mut value = s as u64; value -= 1 << (8 - len); for _ in 1..len { value <<= 8; value += self.read_byte()? as u64; } Ok((value, len as usize)) } #[inline] fn read_vint(&mut self) -> Result { Ok(self.read_vint_len()?.0) } }