aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2023-01-14 18:04:16 +0100
committermetamuffin <metamuffin@disroot.org>2023-01-14 18:04:16 +0100
commitf452df18749b13f9d83a6ea679361d195b4a9ae1 (patch)
tree04897eef044ebed319949a0cdbd04232f0dce98c
parent6c023ddeaa0894813fc74038af7568c2d867c052 (diff)
downloadjellything-f452df18749b13f9d83a6ea679361d195b4a9ae1.tar
jellything-f452df18749b13f9d83a6ea679361d195b4a9ae1.tar.bz2
jellything-f452df18749b13f9d83a6ea679361d195b4a9ae1.tar.zst
seeking and broken writing
-rw-r--r--ebml/src/bin/experiment.rs33
-rw-r--r--ebml/src/bin/mkvdump.rs14
-rw-r--r--ebml/src/lib.rs75
-rw-r--r--ebml/src/matroska.rs2
-rw-r--r--ebml/src/read.rs127
-rw-r--r--ebml/src/size.rs29
-rw-r--r--ebml/src/write.rs141
-rw-r--r--ebml_derive/src/lib.rs57
8 files changed, 360 insertions, 118 deletions
diff --git a/ebml/src/bin/experiment.rs b/ebml/src/bin/experiment.rs
index c510bb5..c850753 100644
--- a/ebml/src/bin/experiment.rs
+++ b/ebml/src/bin/experiment.rs
@@ -1,12 +1,31 @@
-use ebml::read::EbmlReader;
-use std::{fs::File, io::BufReader};
+use ebml::{matroska::MatroskaTag, read::EbmlReader, write::EbmlWriter};
+use std::{
+ fs::File,
+ io::{stdin, stdout, BufReader, BufWriter},
+};
-fn main() {
+fn main() -> anyhow::Result<()> {
env_logger::init_from_env("LOG");
- let f = File::open(std::env::args().skip(1).next().unwrap()).unwrap();
- let mut r = EbmlReader::new(BufReader::new(f));
+ let path = std::env::args().skip(1).next().unwrap();
+ let mut r = EbmlReader::new(BufReader::new(File::open(path)?));
+ let mut w = EbmlWriter::new(BufWriter::new(stdout()), 0);
- for tag in r {
- println!("{:?}", tag);
+ // r.seek(
+ // 631147167 + 52,
+ // ebml::matroska::MatroskaTag::Cues(Master::Start),
+ // )
+ // .unwrap();
+
+ while let Some(tag) = r.next() {
+ let tag = tag?;
+ w.write_tag(&tag)?;
+ // println!("{} {tag:?}", r.position)
+ // match tag {
+ // tag @ MatroskaTag::SeekHead(_) => {
+ // println!("{:?}", r.collect_master(tag));
+ // }
+ // _ => (),
+ // }
}
+ Ok(())
}
diff --git a/ebml/src/bin/mkvdump.rs b/ebml/src/bin/mkvdump.rs
new file mode 100644
index 0000000..ae621e6
--- /dev/null
+++ b/ebml/src/bin/mkvdump.rs
@@ -0,0 +1,14 @@
+use ebml::read::EbmlReader;
+use std::{fs::File, io::BufReader};
+
+fn main() -> anyhow::Result<()> {
+ env_logger::init_from_env("LOG");
+ let path = std::env::args().skip(1).next().unwrap();
+ let mut r = EbmlReader::new(BufReader::new(File::open(path)?));
+
+ while let Some(tag) = r.next() {
+ let tag = tag?;
+ println!("{} {tag:?}", r.position)
+ }
+ Ok(())
+}
diff --git a/ebml/src/lib.rs b/ebml/src/lib.rs
index 22af473..a03ce0d 100644
--- a/ebml/src/lib.rs
+++ b/ebml/src/lib.rs
@@ -1,77 +1,18 @@
-use anyhow::bail;
-
pub mod matroska;
pub mod read;
pub mod write;
+pub mod size;
+
+use size::EbmlSize;
+
+pub use read::ReadValue;
+pub use write::WriteValue;
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Master {
- Start,
+ Start(EbmlSize),
End,
}
-pub trait ValueFromBuf: Sized {
- fn from_buf(buf: &[u8]) -> anyhow::Result<Self>;
-}
-impl ValueFromBuf for u64 {
- fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
- if buf.len() > 8 {
- bail!("u64 too big")
- }
- let mut val = 0u64;
- for byte in buf {
- val <<= 8;
- val |= *byte as u64;
- }
- Ok(val)
- }
-}
-impl ValueFromBuf for i64 {
- fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
- if buf.len() > 8 {
- bail!("i64 too big")
- }
- Ok(if buf[0] > 127 {
- if buf.len() == 8 {
- i64::from_be_bytes(buf.try_into().unwrap())
- } else {
- -((1 << (buf.len() * 8)) - (u64::from_buf(buf)? as i64))
- }
- } else {
- u64::from_buf(buf)? as i64
- })
- }
-}
-impl ValueFromBuf for f64 {
- fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
- Ok(if buf.len() == 4 {
- f32::from_be_bytes(buf.try_into().unwrap()) as f64
- } else if buf.len() == 8 {
- f64::from_be_bytes(buf.try_into().unwrap())
- } else {
- bail!("float is not 4 or 8 bytes long");
- })
- }
-}
-impl ValueFromBuf for Vec<u8> {
- fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
- Ok(buf.to_vec())
- }
-}
-impl ValueFromBuf for String {
- fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
- Ok(String::from_utf8(Vec::from(buf))?)
- }
-}
-impl ValueFromBuf for Master {
- /// this has an internal usage, where buf has len of 1, when an end should be constructed
- fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
- Ok(match buf.len() {
- 0 => Master::Start,
- 1 => Master::End,
- _ => unreachable!(),
- })
- }
-}
diff --git a/ebml/src/matroska.rs b/ebml/src/matroska.rs
index 9678e80..b4078ab 100644
--- a/ebml/src/matroska.rs
+++ b/ebml/src/matroska.rs
@@ -34,7 +34,7 @@ define_ebml! {
Chapters[0x1043A770]: {
EditionEntry[0x45B9]: {
- ChapPterAtom[0xB6]: {
+ ChapterAtom[0xB6]: {
ChapProcess[0x6944]: {
ChapProcessCodecID[0x6955]: Uint,
ChapProcessCommand[0x6911]: {
diff --git a/ebml/src/read.rs b/ebml/src/read.rs
index 3e02e18..3853782 100644
--- a/ebml/src/read.rs
+++ b/ebml/src/read.rs
@@ -1,11 +1,11 @@
-use anyhow::{bail, Result};
-use log::warn;
+use anyhow::{anyhow, bail, Result};
+use log::{debug, warn};
use std::{
collections::VecDeque,
- io::{Read, Seek},
+ io::{Read, Seek, SeekFrom},
};
-use crate::matroska::MatroskaTag;
+use crate::{matroska::MatroskaTag, size::EbmlSize, Master};
trait ReadAndSeek: Read + Seek {}
impl<T: Read + Seek> ReadAndSeek for T {}
@@ -83,10 +83,11 @@ impl EbmlReader {
if let Some(end) = e.end {
if self.position >= end {
if self.position != end {
- warn!("we missed the end")
+ warn!("we missed the end by {} bytes", self.position - end)
}
self.stack.pop();
- self.queue.push_back(MatroskaTag::parse(e.id, &[0])?);
+ self.queue
+ .push_back(MatroskaTag::construct_master(e.id, Master::End)?);
}
}
}
@@ -95,7 +96,7 @@ impl EbmlReader {
let size = self.read_tag_size()?;
let is_master = MatroskaTag::is_master(id)?;
let tag = if is_master {
- MatroskaTag::parse(id, &[])?
+ MatroskaTag::construct_master(id, Master::Start(size))?
} else {
let data = self.read_buf(size)?;
MatroskaTag::parse(id, &data)?
@@ -114,8 +115,11 @@ impl EbmlReader {
if valid_child {
break;
} else {
- self.queue
- .push_back(MatroskaTag::parse(self.stack.pop().unwrap().id, &[0])?);
+ debug!("{id}");
+ self.queue.push_back(MatroskaTag::construct_master(
+ self.stack.pop().unwrap().id,
+ Master::End,
+ )?);
}
}
}
@@ -129,6 +133,39 @@ impl EbmlReader {
self.queue.push_back(tag);
Ok(())
}
+
+ /// context should be the next expected tag, such that the stack can be derived from its path.
+ pub fn seek(&mut self, position: usize, context: MatroskaTag) -> Result<()> {
+ let path = context
+ .path()
+ .ok_or(anyhow!("global tags dont give context"))?;
+ debug!(
+ "seeking to {position} with a context restored from path {:x?}",
+ path
+ );
+ self.queue.clear();
+ self.position = position;
+ self.inner.seek(SeekFrom::Start(position as u64))?;
+ self.stack = path
+ .iter()
+ .map(|id| StackTag { id: *id, end: None })
+ .collect();
+ Ok(())
+ }
+
+ pub fn collect_master(&mut self, master: MatroskaTag) -> Result<Vec<MatroskaTag>> {
+ let end = MatroskaTag::construct_master(master.id(), Master::End)?;
+ let mut out = vec![];
+ while let Some(t) = self.next() {
+ let t = t?;
+ if t == end {
+ break;
+ } else {
+ out.push(t)
+ }
+ }
+ Ok(out)
+ }
}
impl Iterator for EbmlReader {
@@ -146,31 +183,63 @@ impl Iterator for EbmlReader {
}
}
-#[derive(Debug, Clone, Copy)]
-pub enum EbmlSize {
- Exact(usize),
- Unknown,
+pub trait ReadValue: Sized {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self>;
}
-impl EbmlSize {
- pub fn from_vint((value, len): (u64, usize)) -> EbmlSize {
- if value == ((1 << (7 * len)) - 1) {
- Self::Unknown
- } else {
- Self::Exact(value as usize)
+
+impl ReadValue for u64 {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
+ if buf.len() > 8 {
+ bail!("u64 too big")
}
- }
- pub fn some(self) -> Option<usize> {
- match self {
- EbmlSize::Exact(s) => Some(s),
- EbmlSize::Unknown => None,
+ let mut val = 0u64;
+ for byte in buf {
+ val <<= 8;
+ val |= *byte as u64;
}
+ Ok(val)
}
}
-impl Into<usize> for EbmlSize {
- fn into(self) -> usize {
- match self {
- EbmlSize::Exact(s) => s,
- EbmlSize::Unknown => panic!("unknown size, where it should have been known"),
+impl ReadValue for i64 {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
+ if buf.len() > 8 {
+ bail!("i64 too big")
}
+ Ok(if buf[0] > 127 {
+ if buf.len() == 8 {
+ i64::from_be_bytes(buf.try_into().unwrap())
+ } else {
+ -((1 << (buf.len() * 8)) - (u64::from_buf(buf)? as i64))
+ }
+ } else {
+ u64::from_buf(buf)? as i64
+ })
+ }
+}
+impl ReadValue for f64 {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
+ Ok(if buf.len() == 4 {
+ f32::from_be_bytes(buf.try_into().unwrap()) as f64
+ } else if buf.len() == 8 {
+ f64::from_be_bytes(buf.try_into().unwrap())
+ } else {
+ bail!("float is not 4 or 8 bytes long");
+ })
+ }
+}
+
+impl ReadValue for Vec<u8> {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
+ Ok(buf.to_vec())
+ }
+}
+impl ReadValue for String {
+ fn from_buf(buf: &[u8]) -> anyhow::Result<Self> {
+ Ok(String::from_utf8(Vec::from(buf))?)
+ }
+}
+impl ReadValue for Master {
+ fn from_buf(_: &[u8]) -> anyhow::Result<Self> {
+ bail!("master cannot be parsed")
}
}
diff --git a/ebml/src/size.rs b/ebml/src/size.rs
new file mode 100644
index 0000000..06d6ba8
--- /dev/null
+++ b/ebml/src/size.rs
@@ -0,0 +1,29 @@
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum EbmlSize {
+ Exact(usize),
+ Unknown,
+}
+impl EbmlSize {
+ pub fn from_vint((value, len): (u64, usize)) -> EbmlSize {
+ if value == ((1 << (7 * len)) - 1) {
+ Self::Unknown
+ } else {
+ Self::Exact(value as usize)
+ }
+ }
+ pub fn some(self) -> Option<usize> {
+ match self {
+ EbmlSize::Exact(s) => Some(s),
+ EbmlSize::Unknown => None,
+ }
+ }
+}
+impl Into<usize> for EbmlSize {
+ fn into(self) -> usize {
+ match self {
+ EbmlSize::Exact(s) => s,
+ EbmlSize::Unknown => panic!("unknown size, where it should have been known"),
+ }
+ }
+}
diff --git a/ebml/src/write.rs b/ebml/src/write.rs
index b294d44..f57a5c9 100644
--- a/ebml/src/write.rs
+++ b/ebml/src/write.rs
@@ -1,14 +1,19 @@
use anyhow::{bail, Result};
use std::io::Write;
-pub struct EbmlWriter<T> {
- inner: T,
+use crate::{matroska::MatroskaTag, size::EbmlSize, Master};
+
+pub struct EbmlWriter {
+ inner: Box<dyn Write>,
position: usize,
}
-impl<T: Write> EbmlWriter<T> {
- pub fn new(inner: T, position: usize) -> Self {
- Self { inner, position }
+impl EbmlWriter {
+ pub fn new<T: Write + 'static>(inner: T, position: usize) -> Self {
+ Self {
+ inner: Box::new(inner),
+ position,
+ }
}
pub fn write(&mut self, data: &[u8]) -> Result<()> {
@@ -17,6 +22,21 @@ impl<T: Write> EbmlWriter<T> {
Ok(())
}
+ pub fn write_tag(&mut self, tag: &MatroskaTag) -> Result<()> {
+ self.write_tag_id(tag.id())?;
+ let mut buf = vec![];
+ tag.write(&mut buf)?;
+ self.write(&buf)?;
+ Ok(())
+ }
+
+ pub fn write_tag_id(&mut self, id: u64) -> Result<()> {
+ for n in id.to_be_bytes().iter().skip_while(|&v| *v == 0u8) {
+ self.write(&[*n])?;
+ }
+ Ok(())
+ }
+
pub fn write_vint(&mut self, i: u64) -> Result<()> {
if i > (1 << 56) - 1 {
bail!("vint does not fit");
@@ -29,8 +49,115 @@ impl<T: Write> EbmlWriter<T> {
len += 1;
}
let mut bytes = i.to_be_bytes();
- bytes[0] |= 1 << (8 - len);
- self.write(&bytes)
+ let trunc = &mut bytes[(8 - len)..];
+ trunc[0] |= 1 << (8 - len);
+ self.write(&trunc)
}
}
+pub trait WriteValue {
+ fn write_to(&self, w: &mut Vec<u8>) -> Result<()>;
+}
+
+impl WriteValue for i64 {
+ fn write_to(&self, w: &mut Vec<u8>) -> Result<()> {
+ Ok(match 64 - self.leading_zeros() {
+ x if x <= 8 => {
+ w.push(0x81);
+ w.extend_from_slice(&(*self as i8).to_be_bytes());
+ }
+ x if x <= 16 => {
+ w.push(0x82);
+ w.extend_from_slice(&(*self as i16).to_be_bytes());
+ }
+ x if x <= 32 => {
+ w.push(0x84);
+ w.extend_from_slice(&(*self as i32).to_be_bytes());
+ }
+ _ => {
+ w.push(0x88);
+ w.extend_from_slice(&self.to_be_bytes());
+ }
+ })
+ }
+}
+impl WriteValue for u64 {
+ fn write_to(&self, w: &mut Vec<u8>) -> Result<()> {
+ Ok(match 64 - self.leading_zeros() {
+ x if x <= 8 => {
+ w.push(0x81);
+ w.extend_from_slice(&(*self as u8).to_be_bytes());
+ }
+ x if x <= 16 => {
+ w.push(0x82);
+ w.extend_from_slice(&(*self as u16).to_be_bytes());
+ }
+ x if x <= 32 => {
+ w.push(0x84);
+ w.extend_from_slice(&(*self as u32).to_be_bytes());
+ }
+ _ => {
+ w.push(0x88);
+ w.extend_from_slice(&self.to_be_bytes());
+ }
+ })
+ }
+}
+impl WriteValue for f64 {
+ fn write_to(&self, w: &mut Vec<u8>) -> Result<(), anyhow::Error> {
+ w.push(0x88);
+ w.extend_from_slice(&self.to_be_bytes());
+ Ok(())
+ }
+}
+impl WriteValue for Vec<u8> {
+ fn write_to(&self, w: &mut Vec<u8>) -> Result<(), anyhow::Error> {
+ write_vint(w, self.len() as u64)?;
+ w.extend_from_slice(&self);
+ Ok(())
+ }
+}
+impl WriteValue for String {
+ fn write_to(&self, w: &mut Vec<u8>) -> Result<(), anyhow::Error> {
+ let sl = self.as_bytes();
+ write_vint(w, sl.len() as u64)?;
+ w.extend_from_slice(sl);
+ Ok(())
+ }
+}
+impl WriteValue for EbmlSize {
+ fn write_to(&self, w: &mut Vec<u8>) -> Result<()> {
+ match self {
+ EbmlSize::Exact(s) => write_vint(w, *s as u64)?,
+ EbmlSize::Unknown => w.extend_from_slice(&(u64::MAX >> 7).to_be_bytes()),
+ }
+ Ok(())
+ }
+}
+
+impl WriteValue for Master {
+ fn write_to(&self, w: &mut Vec<u8>) -> Result<()> {
+ match self {
+ Master::Start(size) => size.write_to(w),
+ Master::End => Ok(()),
+ }
+ }
+}
+
+pub fn write_vint(w: &mut Vec<u8>, i: u64) -> Result<()> {
+ if i > (1 << 56) - 1 {
+ bail!("vint does not fit");
+ }
+ let mut len = 1;
+ while len <= 8 {
+ if i < (1 << ((7 * len) - 1)) {
+ break;
+ }
+ len += 1;
+ }
+ let mut bytes = i.to_be_bytes();
+ let trunc = &mut bytes[(8 - len)..];
+ trunc[0] |= 1 << (8 - len);
+ w.extend_from_slice(&trunc);
+ Ok(())
+}
diff --git a/ebml_derive/src/lib.rs b/ebml_derive/src/lib.rs
index fc8af7d..056071d 100644
--- a/ebml_derive/src/lib.rs
+++ b/ebml_derive/src/lib.rs
@@ -43,8 +43,7 @@ pub fn define_ebml(ts: TokenStream) -> TokenStream {
.iter()
.map(|e| {
let name = &e.name;
- let mut path = e.path.clone();
- path.reverse();
+ let path = e.path.clone();
if e.global {
quote! { Self::#name(_) => None }
} else {
@@ -52,14 +51,48 @@ pub fn define_ebml(ts: TokenStream) -> TokenStream {
}
})
.collect::<Vec<_>>();
+ let id_match = tags
+ .iter()
+ .map(|Tag { id, name, .. }| {
+ quote! { Self::#name(_) => #id }
+ })
+ .collect::<Vec<_>>();
let parse_match = tags
.iter()
- .map(|Tag { id, name, .. }| {
- quote! { #id => Self::#name(crate::ValueFromBuf::from_buf(data)?) }
+ .filter_map(
+ |Tag {
+ id, name, r#type, ..
+ }| {
+ if let Some(_) = r#type {
+ Some(quote! { #id => Self::#name(crate::ReadValue::from_buf(data)?) })
+ } else {
+ None
+ }
+ },
+ )
+ .collect::<Vec<_>>();
+ let write_match = tags
+ .iter()
+ .filter_map(|Tag { name, .. }| {
+ Some(quote! { Self::#name(v) => v.write_to(w) })
})
.collect::<Vec<_>>();
- let master_match = tags
+ let cons_master_match = tags
+ .iter()
+ .filter_map(
+ |Tag {
+ id, name, r#type, ..
+ }| {
+ if let None = r#type {
+ Some(quote! { #id => Self::#name(kind) })
+ } else {
+ None
+ }
+ },
+ )
+ .collect::<Vec<_>>();
+ let is_master_match = tags
.iter()
.map(|Tag { id, r#type, .. }| match r#type {
None => quote!(#id => true),
@@ -69,8 +102,9 @@ pub fn define_ebml(ts: TokenStream) -> TokenStream {
quote! {
use crate::Master;
+ use crate::WriteValue;
- #[derive(Debug)]
+ #[derive(Debug, PartialEq, Clone)]
pub enum MatroskaTag {
#(#enum_variants),*
}
@@ -79,12 +113,21 @@ pub fn define_ebml(ts: TokenStream) -> TokenStream {
pub fn path(&self) -> Option<&'static [u64]> {
match self { #(#path_match),* }
}
+ pub fn id(&self) -> u64 {
+ match self { #(#id_match),* }
+ }
pub fn is_master(id: u64) -> anyhow::Result<bool> {
- Ok(match id { #(#master_match),*, _ => anyhow::bail!("unknown id") })
+ Ok(match id { #(#is_master_match),*, _ => anyhow::bail!("unknown id") })
+ }
+ pub fn construct_master(id: u64, kind: Master) -> anyhow::Result<Self> {
+ Ok(match id { #(#cons_master_match),*, _ => anyhow::bail!("unknown id") })
}
pub fn parse(id: u64, data: &[u8]) -> anyhow::Result<Self> {
Ok(match id { #(#parse_match),*, _ => anyhow::bail!("unknown id or master") })
}
+ pub fn write(&self, w: &mut Vec<u8>) -> anyhow::Result<()> {
+ match self { #(#write_match),* }
+ }
}
}
.into()