aboutsummaryrefslogtreecommitdiff
path: root/remuxer/src/demuxers/matroska.rs
diff options
context:
space:
mode:
Diffstat (limited to 'remuxer/src/demuxers/matroska.rs')
-rw-r--r--remuxer/src/demuxers/matroska.rs199
1 files changed, 199 insertions, 0 deletions
diff --git a/remuxer/src/demuxers/matroska.rs b/remuxer/src/demuxers/matroska.rs
new file mode 100644
index 0000000..000970e
--- /dev/null
+++ b/remuxer/src/demuxers/matroska.rs
@@ -0,0 +1,199 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+
+use crate::demuxers::{Demuxer, DemuxerNew, ReadSeek};
+use anyhow::{Context, Result, anyhow, bail};
+use log::debug;
+use std::io::{BufReader, Read, Seek, SeekFrom};
+use winter_ebml::{Ebml, EbmlHeader, VintReadExt, read_vint_slice};
+use winter_matroska::{
+ Attachments, Chapters, Cluster, Cues, Info, MatroskaFile, SeekHead, Segment, Tags, Tracks,
+};
+
+pub struct MatroskaDemuxer {
+ reader: BufReader<Box<dyn ReadSeek>>,
+ segment_offset: Option<u64>,
+ seek_head: Option<SeekHead>,
+}
+
+impl DemuxerNew for MatroskaDemuxer {
+ fn new(reader: Box<dyn ReadSeek>) -> Self {
+ Self {
+ reader: BufReader::new(reader),
+ seek_head: None,
+ segment_offset: None,
+ }
+ }
+}
+impl MatroskaDemuxer {
+ pub fn segment_offset(&mut self) -> Result<u64> {
+ if let Some(s) = self.segment_offset {
+ return Ok(s);
+ }
+ self.reader.seek(SeekFrom::Start(0))?;
+
+ let header_tag = self.reader.read_vint()?;
+ let header_size = self.reader.read_vint()?;
+ if header_tag != MatroskaFile::TAG_EBML_HEADER {
+ bail!("file is not ebml")
+ }
+
+ let mut header_raw = vec![0u8; header_size as usize];
+ self.reader.read_exact(&mut header_raw)?;
+ let header = EbmlHeader::read(&header_raw).context("parsing ebml header")?;
+ if !matches!(header.doc_type.as_str(), "matroska" | "webm") {
+ bail!("file is {:?} but not matroska/webm", header.doc_type)
+ }
+ if header.ebml_max_id_length != 4 {
+ bail!(
+ "file has invalid EBMLMaxIDLength of {}",
+ header.ebml_max_id_length
+ )
+ }
+ if !matches!(header.ebml_max_size_length, 1..=8) {
+ bail!(
+ "file has invalid EBMLMaxIDLength of {}",
+ header.ebml_max_id_length
+ )
+ }
+
+ let segment_tag = self.reader.read_vint()?;
+ let _segment_size = self.reader.read_vint()?;
+ if segment_tag != MatroskaFile::TAG_SEGMENT {
+ bail!("header not followed by segment")
+ }
+
+ let off = self.reader.stream_position()?;
+ debug!("segment offset is {off} (0x{off:x})");
+ self.segment_offset = Some(off);
+ Ok(off)
+ }
+
+ pub fn seek_segment_start(&mut self) -> Result<()> {
+ let seg_start = self.segment_offset()?;
+ self.reader.seek(SeekFrom::Start(seg_start))?;
+ Ok(())
+ }
+
+ /// Parse SeekHead at segment start if exists
+ pub fn seek_head<'a>(&'a mut self) -> Result<Option<&'a SeekHead>> {
+ if self.seek_head.is_some() {
+ return Ok(self.seek_head.as_ref());
+ }
+ self.seek_segment_start()?;
+ let tag = self.reader.read_vint()?;
+ let size = self.reader.read_vint()?;
+ // TODO skip possible CRC32 tag
+ if tag != Segment::TAG_SEEK_HEADS {
+ return Ok(None);
+ }
+
+ let mut raw = vec![0u8; size as usize];
+ self.reader.read_exact(&mut raw)?;
+ let seek_head = SeekHead::read(&raw).context("parsing seek head")?;
+ debug!("parsed {seek_head:#?}");
+ self.seek_head = Some(seek_head);
+ Ok(self.seek_head.as_ref())
+ }
+
+ /// Seeks to the content of child tag of Segment possibly optimized via SeekHead. Returns the size of the content.
+ pub fn seek_to_segment_tag(&mut self, search_tag: u64) -> Result<Option<u64>> {
+ if let Some(seek_head) = self.seek_head()? {
+ let Some(segment_position) = seek_head
+ .seeks
+ .iter()
+ .find(|s| read_vint_slice(&mut s.id.as_slice()).map_or(false, |x| x == search_tag))
+ .map(|s| s.position)
+ else {
+ return Ok(None);
+ };
+ let segment_offset = self.segment_offset()?;
+ self.reader
+ .seek(SeekFrom::Start(segment_offset + segment_position))?;
+
+ let tag = self.reader.read_vint()?;
+ let size = self.reader.read_vint()?;
+ if tag != search_tag {
+ bail!("SeekHead was lying (expected {search_tag:?}, got {tag:x})");
+ }
+ Ok(Some(size))
+ } else {
+ self.seek_segment_start()?;
+ loop {
+ let tag = self.reader.read_vint()?;
+ let size = self.reader.read_vint()?;
+ if tag == search_tag {
+ break Ok(Some(size));
+ }
+ if tag == Segment::TAG_CLUSTERS {
+ break Ok(None);
+ }
+ self.reader.seek_relative(size as i64)?;
+ }
+ }
+ }
+
+ pub fn read_tag<Tag: Ebml>(&mut self, size: u64) -> Result<Tag> {
+ let mut buffer = vec![0u8; size as usize];
+ self.reader.read_exact(&mut buffer)?;
+ Ok(Tag::read(&buffer)?)
+ }
+ pub fn read_segment_tag<Tag: Ebml>(&mut self, name: &'static str, tag: u64) -> Result<Tag> {
+ debug!("reading {name:?}");
+ let size = self
+ .seek_to_segment_tag(tag)?
+ .ok_or(anyhow!("{name} tag missing"))?;
+ self.read_tag(size)
+ .context(anyhow!("parsing {name} failed"))
+ }
+}
+impl Demuxer for MatroskaDemuxer {
+ fn info(&mut self) -> Result<Info> {
+ self.read_segment_tag("Info", Segment::TAG_INFO)
+ }
+ fn tracks(&mut self) -> Result<Tracks> {
+ self.read_segment_tag("Tracks", Segment::TAG_TRACKS)
+ }
+ fn chapters(&mut self) -> Result<Chapters> {
+ self.read_segment_tag("Chapters", Segment::TAG_CHAPTERS)
+ }
+ fn attachments(&mut self) -> Result<Attachments> {
+ self.read_segment_tag("Attachments", Segment::TAG_ATTACHMENTS)
+ }
+ fn tags(&mut self) -> Result<Tags> {
+ self.read_segment_tag("Tags", Segment::TAG_TAGS)
+ }
+ fn cues(&mut self) -> Result<Cues> {
+ self.read_segment_tag("Cues", Segment::TAG_CUES)
+ }
+
+ fn seek_cluster(&mut self, position: Option<u64>) -> Result<()> {
+ if let Some(pos) = position {
+ self.reader.seek(SeekFrom::Start(pos))?;
+ } else {
+ self.seek_to_segment_tag(Segment::TAG_CLUSTERS)?;
+ }
+ Ok(())
+ }
+ fn read_cluster(&mut self) -> Result<Option<(u64, Cluster)>> {
+ loop {
+ let position = self.reader.stream_position()?;
+ // TODO handle eof
+ let tag = self.reader.read_vint()?;
+ let size = self.reader.read_vint()?;
+ if tag != Segment::TAG_CLUSTERS {
+ self.reader.seek_relative(size as i64)?;
+ continue;
+ }
+
+ let mut buffer = vec![0u8; size as usize];
+ self.reader.read_exact(&mut buffer)?;
+ let cluster = Cluster::read(&buffer).context("parsing Cluster")?;
+
+ break Ok(Some((position, cluster)));
+ }
+ }
+}