/* This file is part of jellything (https://codeberg.org/metamuffin/jellything) which is licensed under the GNU Affero General Public License (version 3); see /COPYING. Copyright (C) 2025 metamuffin */ use crate::demuxers::{Demuxer, DemuxerNew, ReadSeek}; use anyhow::{Context, Result, anyhow, bail}; use log::debug; use std::io::{BufReader, ErrorKind, Read, Seek, SeekFrom}; use winter_ebml::{Ebml, EbmlHeader, VintReadExt, read_vint_slice}; use winter_matroska::{ Attachments, Chapters, Cluster, Cues, Info, MatroskaFile, SeekHead, Segment, Tags, Tracks, }; pub struct MatroskaDemuxer { reader: BufReader>, segment_offset: Option, seek_head: Option, } impl DemuxerNew for MatroskaDemuxer { fn new(reader: Box) -> Self { Self { reader: BufReader::new(reader), seek_head: None, segment_offset: None, } } } impl MatroskaDemuxer { pub fn segment_offset(&mut self) -> Result { if let Some(s) = self.segment_offset { return Ok(s); } self.reader.seek(SeekFrom::Start(0))?; let header_tag = self.reader.read_vint()?; let header_size = self.reader.read_vint()?; if header_tag != MatroskaFile::TAG_EBML_HEADER { bail!("file is not ebml") } let mut header_raw = vec![0u8; header_size as usize]; self.reader.read_exact(&mut header_raw)?; let header = EbmlHeader::read(&header_raw).context("parsing ebml header")?; if !matches!(header.doc_type.as_str(), "matroska" | "webm") { bail!("file is {:?} but not matroska/webm", header.doc_type) } eprintln!("{header:?}"); if header.ebml_max_id_length != 4 { bail!( "file has invalid EBMLMaxIDLength of {}", header.ebml_max_id_length ) } if !matches!(header.ebml_max_size_length, 1..=8) { bail!( "file has invalid EBMLMaxIDLength of {}", header.ebml_max_id_length ) } let segment_tag = self.reader.read_vint()?; let _segment_size = self.reader.read_vint()?; if segment_tag != MatroskaFile::TAG_SEGMENT { bail!("header not followed by segment") } let off = self.reader.stream_position()?; debug!("segment offset is {off} (0x{off:x})"); self.segment_offset = Some(off); Ok(off) } pub fn seek_segment_start(&mut self) -> Result<()> { let seg_start = self.segment_offset()?; self.reader.seek(SeekFrom::Start(seg_start))?; Ok(()) } /// Parse SeekHead at segment start if exists pub fn seek_head<'a>(&'a mut self) -> Result> { if self.seek_head.is_some() { return Ok(self.seek_head.as_ref()); } self.seek_segment_start()?; let tag = self.reader.read_vint()?; let size = self.reader.read_vint()?; // TODO skip possible CRC32 tag if tag != Segment::TAG_SEEK_HEADS { return Ok(None); } let mut raw = vec![0u8; size as usize]; self.reader.read_exact(&mut raw)?; let seek_head = SeekHead::read(&raw).context("parsing seek head")?; debug!("parsed {seek_head:#?}"); self.seek_head = Some(seek_head); Ok(self.seek_head.as_ref()) } /// Seeks to the content of child tag of Segment possibly optimized via SeekHead. Returns the size of the content. pub fn seek_to_segment_tag(&mut self, search_tag: u64) -> Result> { if let Some(seek_head) = self.seek_head()? { let Some(segment_position) = seek_head .seeks .iter() .find(|s| read_vint_slice(&mut s.id.as_slice()).map_or(false, |x| x == search_tag)) .map(|s| s.position) else { return Ok(None); }; let segment_offset = self.segment_offset()?; self.reader .seek(SeekFrom::Start(segment_offset + segment_position))?; let tag = self.reader.read_vint()?; let size = self.reader.read_vint()?; if tag != search_tag { bail!("SeekHead was lying (expected {search_tag:?}, got {tag:x})"); } Ok(Some(size)) } else { self.seek_segment_start()?; loop { let tag = self.reader.read_vint()?; let size = self.reader.read_vint()?; if tag == search_tag { break Ok(Some(size)); } if tag == Segment::TAG_CLUSTERS { break Ok(None); } self.reader.seek_relative(size as i64)?; } } } pub fn read_tag(&mut self, size: u64) -> Result { let mut buffer = vec![0u8; size as usize]; self.reader.read_exact(&mut buffer)?; Ok(Tag::read(&buffer)?) } pub fn read_segment_tag( &mut self, name: &'static str, tag: u64, ) -> Result> { debug!("reading {name:?}"); let Some(size) = self.seek_to_segment_tag(tag)? else { return Ok(None); }; self.read_tag(size) .context(anyhow!("parsing {name} failed")) .map(Some) } } impl Demuxer for MatroskaDemuxer { fn info(&mut self) -> Result { Ok(self .read_segment_tag("Info", Segment::TAG_INFO)? .ok_or(anyhow!("info missing"))?) } fn tracks(&mut self) -> Result> { self.read_segment_tag("Tracks", Segment::TAG_TRACKS) } fn chapters(&mut self) -> Result> { self.read_segment_tag("Chapters", Segment::TAG_CHAPTERS) } fn attachments(&mut self) -> Result> { self.read_segment_tag("Attachments", Segment::TAG_ATTACHMENTS) } fn tags(&mut self) -> Result> { self.read_segment_tag("Tags", Segment::TAG_TAGS) } fn cues(&mut self) -> Result> { self.read_segment_tag("Cues", Segment::TAG_CUES) } fn seek_cluster(&mut self, position: Option) -> Result<()> { if let Some(pos) = position { self.reader.seek(SeekFrom::Start(pos))?; } else { self.seek_to_segment_tag(Segment::TAG_CLUSTERS)?; } Ok(()) } fn read_cluster(&mut self) -> Result> { loop { let position = self.reader.stream_position()?; // TODO handle eof let tag = match self.reader.read_vint() { Ok(val) => val, Err(e) if e.kind() == ErrorKind::UnexpectedEof => return Ok(None), Err(e) => return Err(e.into()), }; let size = self.reader.read_vint()?; if tag != Segment::TAG_CLUSTERS { self.reader.seek_relative(size as i64)?; continue; } let mut buffer = vec![0u8; size as usize]; self.reader.read_exact(&mut buffer)?; let cluster = Cluster::read(&buffer).context("parsing Cluster")?; break Ok(Some((position, cluster))); } } }