diff options
author | metamuffin <metamuffin@disroot.org> | 2023-01-14 18:04:16 +0100 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2023-01-14 18:04:16 +0100 |
commit | f452df18749b13f9d83a6ea679361d195b4a9ae1 (patch) | |
tree | 04897eef044ebed319949a0cdbd04232f0dce98c /ebml | |
parent | 6c023ddeaa0894813fc74038af7568c2d867c052 (diff) | |
download | jellything-f452df18749b13f9d83a6ea679361d195b4a9ae1.tar jellything-f452df18749b13f9d83a6ea679361d195b4a9ae1.tar.bz2 jellything-f452df18749b13f9d83a6ea679361d195b4a9ae1.tar.zst |
seeking and broken writing
Diffstat (limited to 'ebml')
-rw-r--r-- | ebml/src/bin/experiment.rs | 33 | ||||
-rw-r--r-- | ebml/src/bin/mkvdump.rs | 14 | ||||
-rw-r--r-- | ebml/src/lib.rs | 75 | ||||
-rw-r--r-- | ebml/src/matroska.rs | 2 | ||||
-rw-r--r-- | ebml/src/read.rs | 127 | ||||
-rw-r--r-- | ebml/src/size.rs | 29 | ||||
-rw-r--r-- | ebml/src/write.rs | 141 |
7 files changed, 310 insertions, 111 deletions
diff --git a/ebml/src/bin/experiment.rs b/ebml/src/bin/experiment.rs index c510bb5..c850753 100644 --- a/ebml/src/bin/experiment.rs +++ b/ebml/src/bin/experiment.rs @@ -1,12 +1,31 @@ -use ebml::read::EbmlReader; -use std::{fs::File, io::BufReader}; +use ebml::{matroska::MatroskaTag, read::EbmlReader, write::EbmlWriter}; +use std::{ + fs::File, + io::{stdin, stdout, BufReader, BufWriter}, +}; -fn main() { +fn main() -> anyhow::Result<()> { env_logger::init_from_env("LOG"); - let f = File::open(std::env::args().skip(1).next().unwrap()).unwrap(); - let mut r = EbmlReader::new(BufReader::new(f)); + let path = std::env::args().skip(1).next().unwrap(); + let mut r = EbmlReader::new(BufReader::new(File::open(path)?)); + let mut w = EbmlWriter::new(BufWriter::new(stdout()), 0); - for tag in r { - println!("{:?}", tag); + // r.seek( + // 631147167 + 52, + // ebml::matroska::MatroskaTag::Cues(Master::Start), + // ) + // .unwrap(); + + while let Some(tag) = r.next() { + let tag = tag?; + w.write_tag(&tag)?; + // println!("{} {tag:?}", r.position) + // match tag { + // tag @ MatroskaTag::SeekHead(_) => { + // println!("{:?}", r.collect_master(tag)); + // } + // _ => (), + // } } + Ok(()) } diff --git a/ebml/src/bin/mkvdump.rs b/ebml/src/bin/mkvdump.rs new file mode 100644 index 0000000..ae621e6 --- /dev/null +++ b/ebml/src/bin/mkvdump.rs @@ -0,0 +1,14 @@ +use ebml::read::EbmlReader; +use std::{fs::File, io::BufReader}; + +fn main() -> anyhow::Result<()> { + env_logger::init_from_env("LOG"); + let path = std::env::args().skip(1).next().unwrap(); + let mut r = EbmlReader::new(BufReader::new(File::open(path)?)); + + while let Some(tag) = r.next() { + let tag = tag?; + println!("{} {tag:?}", r.position) + } + Ok(()) +} diff --git a/ebml/src/lib.rs b/ebml/src/lib.rs index 22af473..a03ce0d 100644 --- a/ebml/src/lib.rs +++ b/ebml/src/lib.rs @@ -1,77 +1,18 @@ -use anyhow::bail; - pub mod matroska; pub mod read; pub mod write; +pub mod size; + +use size::EbmlSize; + +pub use read::ReadValue; +pub use write::WriteValue; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Master { - Start, + Start(EbmlSize), End, } -pub trait ValueFromBuf: Sized { - fn from_buf(buf: &[u8]) -> anyhow::Result<Self>; -} -impl ValueFromBuf for u64 { - fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { - if buf.len() > 8 { - bail!("u64 too big") - } - let mut val = 0u64; - for byte in buf { - val <<= 8; - val |= *byte as u64; - } - Ok(val) - } -} -impl ValueFromBuf for i64 { - fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { - if buf.len() > 8 { - bail!("i64 too big") - } - Ok(if buf[0] > 127 { - if buf.len() == 8 { - i64::from_be_bytes(buf.try_into().unwrap()) - } else { - -((1 << (buf.len() * 8)) - (u64::from_buf(buf)? as i64)) - } - } else { - u64::from_buf(buf)? as i64 - }) - } -} -impl ValueFromBuf for f64 { - fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { - Ok(if buf.len() == 4 { - f32::from_be_bytes(buf.try_into().unwrap()) as f64 - } else if buf.len() == 8 { - f64::from_be_bytes(buf.try_into().unwrap()) - } else { - bail!("float is not 4 or 8 bytes long"); - }) - } -} -impl ValueFromBuf for Vec<u8> { - fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { - Ok(buf.to_vec()) - } -} -impl ValueFromBuf for String { - fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { - Ok(String::from_utf8(Vec::from(buf))?) - } -} -impl ValueFromBuf for Master { - /// this has an internal usage, where buf has len of 1, when an end should be constructed - fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { - Ok(match buf.len() { - 0 => Master::Start, - 1 => Master::End, - _ => unreachable!(), - }) - } -} diff --git a/ebml/src/matroska.rs b/ebml/src/matroska.rs index 9678e80..b4078ab 100644 --- a/ebml/src/matroska.rs +++ b/ebml/src/matroska.rs @@ -34,7 +34,7 @@ define_ebml! { Chapters[0x1043A770]: { EditionEntry[0x45B9]: { - ChapPterAtom[0xB6]: { + ChapterAtom[0xB6]: { ChapProcess[0x6944]: { ChapProcessCodecID[0x6955]: Uint, ChapProcessCommand[0x6911]: { diff --git a/ebml/src/read.rs b/ebml/src/read.rs index 3e02e18..3853782 100644 --- a/ebml/src/read.rs +++ b/ebml/src/read.rs @@ -1,11 +1,11 @@ -use anyhow::{bail, Result}; -use log::warn; +use anyhow::{anyhow, bail, Result}; +use log::{debug, warn}; use std::{ collections::VecDeque, - io::{Read, Seek}, + io::{Read, Seek, SeekFrom}, }; -use crate::matroska::MatroskaTag; +use crate::{matroska::MatroskaTag, size::EbmlSize, Master}; trait ReadAndSeek: Read + Seek {} impl<T: Read + Seek> ReadAndSeek for T {} @@ -83,10 +83,11 @@ impl EbmlReader { if let Some(end) = e.end { if self.position >= end { if self.position != end { - warn!("we missed the end") + warn!("we missed the end by {} bytes", self.position - end) } self.stack.pop(); - self.queue.push_back(MatroskaTag::parse(e.id, &[0])?); + self.queue + .push_back(MatroskaTag::construct_master(e.id, Master::End)?); } } } @@ -95,7 +96,7 @@ impl EbmlReader { let size = self.read_tag_size()?; let is_master = MatroskaTag::is_master(id)?; let tag = if is_master { - MatroskaTag::parse(id, &[])? + MatroskaTag::construct_master(id, Master::Start(size))? } else { let data = self.read_buf(size)?; MatroskaTag::parse(id, &data)? @@ -114,8 +115,11 @@ impl EbmlReader { if valid_child { break; } else { - self.queue - .push_back(MatroskaTag::parse(self.stack.pop().unwrap().id, &[0])?); + debug!("{id}"); + self.queue.push_back(MatroskaTag::construct_master( + self.stack.pop().unwrap().id, + Master::End, + )?); } } } @@ -129,6 +133,39 @@ impl EbmlReader { self.queue.push_back(tag); Ok(()) } + + /// context should be the next expected tag, such that the stack can be derived from its path. + pub fn seek(&mut self, position: usize, context: MatroskaTag) -> Result<()> { + let path = context + .path() + .ok_or(anyhow!("global tags dont give context"))?; + debug!( + "seeking to {position} with a context restored from path {:x?}", + path + ); + self.queue.clear(); + self.position = position; + self.inner.seek(SeekFrom::Start(position as u64))?; + self.stack = path + .iter() + .map(|id| StackTag { id: *id, end: None }) + .collect(); + Ok(()) + } + + pub fn collect_master(&mut self, master: MatroskaTag) -> Result<Vec<MatroskaTag>> { + let end = MatroskaTag::construct_master(master.id(), Master::End)?; + let mut out = vec![]; + while let Some(t) = self.next() { + let t = t?; + if t == end { + break; + } else { + out.push(t) + } + } + Ok(out) + } } impl Iterator for EbmlReader { @@ -146,31 +183,63 @@ impl Iterator for EbmlReader { } } -#[derive(Debug, Clone, Copy)] -pub enum EbmlSize { - Exact(usize), - Unknown, +pub trait ReadValue: Sized { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self>; } -impl EbmlSize { - pub fn from_vint((value, len): (u64, usize)) -> EbmlSize { - if value == ((1 << (7 * len)) - 1) { - Self::Unknown - } else { - Self::Exact(value as usize) + +impl ReadValue for u64 { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { + if buf.len() > 8 { + bail!("u64 too big") } - } - pub fn some(self) -> Option<usize> { - match self { - EbmlSize::Exact(s) => Some(s), - EbmlSize::Unknown => None, + let mut val = 0u64; + for byte in buf { + val <<= 8; + val |= *byte as u64; } + Ok(val) } } -impl Into<usize> for EbmlSize { - fn into(self) -> usize { - match self { - EbmlSize::Exact(s) => s, - EbmlSize::Unknown => panic!("unknown size, where it should have been known"), +impl ReadValue for i64 { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { + if buf.len() > 8 { + bail!("i64 too big") } + Ok(if buf[0] > 127 { + if buf.len() == 8 { + i64::from_be_bytes(buf.try_into().unwrap()) + } else { + -((1 << (buf.len() * 8)) - (u64::from_buf(buf)? as i64)) + } + } else { + u64::from_buf(buf)? as i64 + }) + } +} +impl ReadValue for f64 { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { + Ok(if buf.len() == 4 { + f32::from_be_bytes(buf.try_into().unwrap()) as f64 + } else if buf.len() == 8 { + f64::from_be_bytes(buf.try_into().unwrap()) + } else { + bail!("float is not 4 or 8 bytes long"); + }) + } +} + +impl ReadValue for Vec<u8> { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { + Ok(buf.to_vec()) + } +} +impl ReadValue for String { + fn from_buf(buf: &[u8]) -> anyhow::Result<Self> { + Ok(String::from_utf8(Vec::from(buf))?) + } +} +impl ReadValue for Master { + fn from_buf(_: &[u8]) -> anyhow::Result<Self> { + bail!("master cannot be parsed") } } diff --git a/ebml/src/size.rs b/ebml/src/size.rs new file mode 100644 index 0000000..06d6ba8 --- /dev/null +++ b/ebml/src/size.rs @@ -0,0 +1,29 @@ + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EbmlSize { + Exact(usize), + Unknown, +} +impl EbmlSize { + pub fn from_vint((value, len): (u64, usize)) -> EbmlSize { + if value == ((1 << (7 * len)) - 1) { + Self::Unknown + } else { + Self::Exact(value as usize) + } + } + pub fn some(self) -> Option<usize> { + match self { + EbmlSize::Exact(s) => Some(s), + EbmlSize::Unknown => None, + } + } +} +impl Into<usize> for EbmlSize { + fn into(self) -> usize { + match self { + EbmlSize::Exact(s) => s, + EbmlSize::Unknown => panic!("unknown size, where it should have been known"), + } + } +} diff --git a/ebml/src/write.rs b/ebml/src/write.rs index b294d44..f57a5c9 100644 --- a/ebml/src/write.rs +++ b/ebml/src/write.rs @@ -1,14 +1,19 @@ use anyhow::{bail, Result}; use std::io::Write; -pub struct EbmlWriter<T> { - inner: T, +use crate::{matroska::MatroskaTag, size::EbmlSize, Master}; + +pub struct EbmlWriter { + inner: Box<dyn Write>, position: usize, } -impl<T: Write> EbmlWriter<T> { - pub fn new(inner: T, position: usize) -> Self { - Self { inner, position } +impl EbmlWriter { + pub fn new<T: Write + 'static>(inner: T, position: usize) -> Self { + Self { + inner: Box::new(inner), + position, + } } pub fn write(&mut self, data: &[u8]) -> Result<()> { @@ -17,6 +22,21 @@ impl<T: Write> EbmlWriter<T> { Ok(()) } + pub fn write_tag(&mut self, tag: &MatroskaTag) -> Result<()> { + self.write_tag_id(tag.id())?; + let mut buf = vec![]; + tag.write(&mut buf)?; + self.write(&buf)?; + Ok(()) + } + + pub fn write_tag_id(&mut self, id: u64) -> Result<()> { + for n in id.to_be_bytes().iter().skip_while(|&v| *v == 0u8) { + self.write(&[*n])?; + } + Ok(()) + } + pub fn write_vint(&mut self, i: u64) -> Result<()> { if i > (1 << 56) - 1 { bail!("vint does not fit"); @@ -29,8 +49,115 @@ impl<T: Write> EbmlWriter<T> { len += 1; } let mut bytes = i.to_be_bytes(); - bytes[0] |= 1 << (8 - len); - self.write(&bytes) + let trunc = &mut bytes[(8 - len)..]; + trunc[0] |= 1 << (8 - len); + self.write(&trunc) } } +pub trait WriteValue { + fn write_to(&self, w: &mut Vec<u8>) -> Result<()>; +} + +impl WriteValue for i64 { + fn write_to(&self, w: &mut Vec<u8>) -> Result<()> { + Ok(match 64 - self.leading_zeros() { + x if x <= 8 => { + w.push(0x81); + w.extend_from_slice(&(*self as i8).to_be_bytes()); + } + x if x <= 16 => { + w.push(0x82); + w.extend_from_slice(&(*self as i16).to_be_bytes()); + } + x if x <= 32 => { + w.push(0x84); + w.extend_from_slice(&(*self as i32).to_be_bytes()); + } + _ => { + w.push(0x88); + w.extend_from_slice(&self.to_be_bytes()); + } + }) + } +} +impl WriteValue for u64 { + fn write_to(&self, w: &mut Vec<u8>) -> Result<()> { + Ok(match 64 - self.leading_zeros() { + x if x <= 8 => { + w.push(0x81); + w.extend_from_slice(&(*self as u8).to_be_bytes()); + } + x if x <= 16 => { + w.push(0x82); + w.extend_from_slice(&(*self as u16).to_be_bytes()); + } + x if x <= 32 => { + w.push(0x84); + w.extend_from_slice(&(*self as u32).to_be_bytes()); + } + _ => { + w.push(0x88); + w.extend_from_slice(&self.to_be_bytes()); + } + }) + } +} +impl WriteValue for f64 { + fn write_to(&self, w: &mut Vec<u8>) -> Result<(), anyhow::Error> { + w.push(0x88); + w.extend_from_slice(&self.to_be_bytes()); + Ok(()) + } +} +impl WriteValue for Vec<u8> { + fn write_to(&self, w: &mut Vec<u8>) -> Result<(), anyhow::Error> { + write_vint(w, self.len() as u64)?; + w.extend_from_slice(&self); + Ok(()) + } +} +impl WriteValue for String { + fn write_to(&self, w: &mut Vec<u8>) -> Result<(), anyhow::Error> { + let sl = self.as_bytes(); + write_vint(w, sl.len() as u64)?; + w.extend_from_slice(sl); + Ok(()) + } +} +impl WriteValue for EbmlSize { + fn write_to(&self, w: &mut Vec<u8>) -> Result<()> { + match self { + EbmlSize::Exact(s) => write_vint(w, *s as u64)?, + EbmlSize::Unknown => w.extend_from_slice(&(u64::MAX >> 7).to_be_bytes()), + } + Ok(()) + } +} + +impl WriteValue for Master { + fn write_to(&self, w: &mut Vec<u8>) -> Result<()> { + match self { + Master::Start(size) => size.write_to(w), + Master::End => Ok(()), + } + } +} + +pub fn write_vint(w: &mut Vec<u8>, i: u64) -> Result<()> { + if i > (1 << 56) - 1 { + bail!("vint does not fit"); + } + let mut len = 1; + while len <= 8 { + if i < (1 << ((7 * len) - 1)) { + break; + } + len += 1; + } + let mut bytes = i.to_be_bytes(); + let trunc = &mut bytes[(8 - len)..]; + trunc[0] |= 1 << (8 - len); + w.extend_from_slice(&trunc); + Ok(()) +} |