diff options
Diffstat (limited to 'remuxer/src/demuxers/matroska.rs')
-rw-r--r-- | remuxer/src/demuxers/matroska.rs | 199 |
1 files changed, 199 insertions, 0 deletions
diff --git a/remuxer/src/demuxers/matroska.rs b/remuxer/src/demuxers/matroska.rs new file mode 100644 index 0000000..000970e --- /dev/null +++ b/remuxer/src/demuxers/matroska.rs @@ -0,0 +1,199 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::demuxers::{Demuxer, DemuxerNew, ReadSeek}; +use anyhow::{Context, Result, anyhow, bail}; +use log::debug; +use std::io::{BufReader, Read, Seek, SeekFrom}; +use winter_ebml::{Ebml, EbmlHeader, VintReadExt, read_vint_slice}; +use winter_matroska::{ + Attachments, Chapters, Cluster, Cues, Info, MatroskaFile, SeekHead, Segment, Tags, Tracks, +}; + +pub struct MatroskaDemuxer { + reader: BufReader<Box<dyn ReadSeek>>, + segment_offset: Option<u64>, + seek_head: Option<SeekHead>, +} + +impl DemuxerNew for MatroskaDemuxer { + fn new(reader: Box<dyn ReadSeek>) -> Self { + Self { + reader: BufReader::new(reader), + seek_head: None, + segment_offset: None, + } + } +} +impl MatroskaDemuxer { + pub fn segment_offset(&mut self) -> Result<u64> { + if let Some(s) = self.segment_offset { + return Ok(s); + } + self.reader.seek(SeekFrom::Start(0))?; + + let header_tag = self.reader.read_vint()?; + let header_size = self.reader.read_vint()?; + if header_tag != MatroskaFile::TAG_EBML_HEADER { + bail!("file is not ebml") + } + + let mut header_raw = vec![0u8; header_size as usize]; + self.reader.read_exact(&mut header_raw)?; + let header = EbmlHeader::read(&header_raw).context("parsing ebml header")?; + if !matches!(header.doc_type.as_str(), "matroska" | "webm") { + bail!("file is {:?} but not matroska/webm", header.doc_type) + } + if header.ebml_max_id_length != 4 { + bail!( + "file has invalid EBMLMaxIDLength of {}", + header.ebml_max_id_length + ) + } + if !matches!(header.ebml_max_size_length, 1..=8) { + bail!( + "file has invalid EBMLMaxIDLength of {}", + header.ebml_max_id_length + ) + } + + let segment_tag = self.reader.read_vint()?; + let _segment_size = self.reader.read_vint()?; + if segment_tag != MatroskaFile::TAG_SEGMENT { + bail!("header not followed by segment") + } + + let off = self.reader.stream_position()?; + debug!("segment offset is {off} (0x{off:x})"); + self.segment_offset = Some(off); + Ok(off) + } + + pub fn seek_segment_start(&mut self) -> Result<()> { + let seg_start = self.segment_offset()?; + self.reader.seek(SeekFrom::Start(seg_start))?; + Ok(()) + } + + /// Parse SeekHead at segment start if exists + pub fn seek_head<'a>(&'a mut self) -> Result<Option<&'a SeekHead>> { + if self.seek_head.is_some() { + return Ok(self.seek_head.as_ref()); + } + self.seek_segment_start()?; + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + // TODO skip possible CRC32 tag + if tag != Segment::TAG_SEEK_HEADS { + return Ok(None); + } + + let mut raw = vec![0u8; size as usize]; + self.reader.read_exact(&mut raw)?; + let seek_head = SeekHead::read(&raw).context("parsing seek head")?; + debug!("parsed {seek_head:#?}"); + self.seek_head = Some(seek_head); + Ok(self.seek_head.as_ref()) + } + + /// Seeks to the content of child tag of Segment possibly optimized via SeekHead. Returns the size of the content. + pub fn seek_to_segment_tag(&mut self, search_tag: u64) -> Result<Option<u64>> { + if let Some(seek_head) = self.seek_head()? { + let Some(segment_position) = seek_head + .seeks + .iter() + .find(|s| read_vint_slice(&mut s.id.as_slice()).map_or(false, |x| x == search_tag)) + .map(|s| s.position) + else { + return Ok(None); + }; + let segment_offset = self.segment_offset()?; + self.reader + .seek(SeekFrom::Start(segment_offset + segment_position))?; + + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + if tag != search_tag { + bail!("SeekHead was lying (expected {search_tag:?}, got {tag:x})"); + } + Ok(Some(size)) + } else { + self.seek_segment_start()?; + loop { + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + if tag == search_tag { + break Ok(Some(size)); + } + if tag == Segment::TAG_CLUSTERS { + break Ok(None); + } + self.reader.seek_relative(size as i64)?; + } + } + } + + pub fn read_tag<Tag: Ebml>(&mut self, size: u64) -> Result<Tag> { + let mut buffer = vec![0u8; size as usize]; + self.reader.read_exact(&mut buffer)?; + Ok(Tag::read(&buffer)?) + } + pub fn read_segment_tag<Tag: Ebml>(&mut self, name: &'static str, tag: u64) -> Result<Tag> { + debug!("reading {name:?}"); + let size = self + .seek_to_segment_tag(tag)? + .ok_or(anyhow!("{name} tag missing"))?; + self.read_tag(size) + .context(anyhow!("parsing {name} failed")) + } +} +impl Demuxer for MatroskaDemuxer { + fn info(&mut self) -> Result<Info> { + self.read_segment_tag("Info", Segment::TAG_INFO) + } + fn tracks(&mut self) -> Result<Tracks> { + self.read_segment_tag("Tracks", Segment::TAG_TRACKS) + } + fn chapters(&mut self) -> Result<Chapters> { + self.read_segment_tag("Chapters", Segment::TAG_CHAPTERS) + } + fn attachments(&mut self) -> Result<Attachments> { + self.read_segment_tag("Attachments", Segment::TAG_ATTACHMENTS) + } + fn tags(&mut self) -> Result<Tags> { + self.read_segment_tag("Tags", Segment::TAG_TAGS) + } + fn cues(&mut self) -> Result<Cues> { + self.read_segment_tag("Cues", Segment::TAG_CUES) + } + + fn seek_cluster(&mut self, position: Option<u64>) -> Result<()> { + if let Some(pos) = position { + self.reader.seek(SeekFrom::Start(pos))?; + } else { + self.seek_to_segment_tag(Segment::TAG_CLUSTERS)?; + } + Ok(()) + } + fn read_cluster(&mut self) -> Result<Option<(u64, Cluster)>> { + loop { + let position = self.reader.stream_position()?; + // TODO handle eof + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + if tag != Segment::TAG_CLUSTERS { + self.reader.seek_relative(size as i64)?; + continue; + } + + let mut buffer = vec![0u8; size as usize]; + self.reader.read_exact(&mut buffer)?; + let cluster = Cluster::read(&buffer).context("parsing Cluster")?; + + break Ok(Some((position, cluster))); + } + } +} |