diff options
author | metamuffin <metamuffin@disroot.org> | 2025-09-13 16:08:42 +0200 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2025-09-13 16:08:42 +0200 |
commit | 044c7e1c75145f1ec9d002b4f6fc4433ff7f9540 (patch) | |
tree | db326c8f2327396ed443a1822936927e7c847494 /remuxer | |
parent | e99bde7a00a161ff5dd91eaf1ce546a9d98cef05 (diff) | |
download | jellything-044c7e1c75145f1ec9d002b4f6fc4433ff7f9540.tar jellything-044c7e1c75145f1ec9d002b4f6fc4433ff7f9540.tar.bz2 jellything-044c7e1c75145f1ec9d002b4f6fc4433ff7f9540.tar.zst |
start remuxer crate rewrite; added matroska demuxer and format detection
Diffstat (limited to 'remuxer')
-rw-r--r-- | remuxer/Cargo.toml | 11 | ||||
-rw-r--r-- | remuxer/src/bin/mkvinfo.rs | 20 | ||||
-rw-r--r-- | remuxer/src/demuxers/matroska.rs | 199 | ||||
-rw-r--r-- | remuxer/src/demuxers/mod.rs | 29 | ||||
-rw-r--r-- | remuxer/src/extract.rs | 51 | ||||
-rw-r--r-- | remuxer/src/fragment.rs | 219 | ||||
-rw-r--r-- | remuxer/src/lib.rs | 100 | ||||
-rw-r--r-- | remuxer/src/magic.rs | 67 | ||||
-rw-r--r-- | remuxer/src/matroska_to_mpeg4.rs | 36 | ||||
-rw-r--r-- | remuxer/src/matroska_to_webm.rs | 89 | ||||
-rw-r--r-- | remuxer/src/metadata.rs | 112 | ||||
-rw-r--r-- | remuxer/src/remux.rs | 311 | ||||
-rw-r--r-- | remuxer/src/seek_index.rs | 152 | ||||
-rw-r--r-- | remuxer/src/segment_extractor.rs | 60 | ||||
-rw-r--r-- | remuxer/src/trim_writer.rs | 72 |
15 files changed, 326 insertions, 1202 deletions
diff --git a/remuxer/Cargo.toml b/remuxer/Cargo.toml index 98dd86c..24cd9ab 100644 --- a/remuxer/Cargo.toml +++ b/remuxer/Cargo.toml @@ -1,19 +1,18 @@ [package] name = "jellyremuxer" version = "0.1.0" -edition = "2021" +edition = "2024" [dependencies] -jellymatroska = { path = "../matroska" } jellycache = { path = "../cache" } +hex = "0.4.3" -tokio = { version = "1.43.0", features = ["io-util"] } anyhow = "1.0.95" +env_logger = "0.11.8" log = { workspace = true } serde = { version = "1.0.217", features = ["derive"] } bincode = { version = "2.0.0-rc.3", features = ["serde"] } -ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct", features = [ - "bincode", -] } +winter-ebml = { git = "https://codeberg.org/metamuffin/ebml-rs", package = "ebml" } +winter-matroska = { git = "https://codeberg.org/metamuffin/ebml-rs", package = "matroska" } diff --git a/remuxer/src/bin/mkvinfo.rs b/remuxer/src/bin/mkvinfo.rs new file mode 100644 index 0000000..0899245 --- /dev/null +++ b/remuxer/src/bin/mkvinfo.rs @@ -0,0 +1,20 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use anyhow::{Result, anyhow}; +use jellyremuxer::demuxers::{Demuxer, DemuxerNew, matroska::MatroskaDemuxer}; +use std::{env::args, fs::File}; + +fn main() -> Result<()> { + env_logger::init_from_env("LOG"); + let path = args().nth(1).ok_or(anyhow!("first arg is input path"))?; + let file = File::open(path)?; + let mut reader = MatroskaDemuxer::new(Box::new(file)); + + println!("INFO: {:#?}", reader.info()?); + + Ok(()) +} diff --git a/remuxer/src/demuxers/matroska.rs b/remuxer/src/demuxers/matroska.rs new file mode 100644 index 0000000..000970e --- /dev/null +++ b/remuxer/src/demuxers/matroska.rs @@ -0,0 +1,199 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::demuxers::{Demuxer, DemuxerNew, ReadSeek}; +use anyhow::{Context, Result, anyhow, bail}; +use log::debug; +use std::io::{BufReader, Read, Seek, SeekFrom}; +use winter_ebml::{Ebml, EbmlHeader, VintReadExt, read_vint_slice}; +use winter_matroska::{ + Attachments, Chapters, Cluster, Cues, Info, MatroskaFile, SeekHead, Segment, Tags, Tracks, +}; + +pub struct MatroskaDemuxer { + reader: BufReader<Box<dyn ReadSeek>>, + segment_offset: Option<u64>, + seek_head: Option<SeekHead>, +} + +impl DemuxerNew for MatroskaDemuxer { + fn new(reader: Box<dyn ReadSeek>) -> Self { + Self { + reader: BufReader::new(reader), + seek_head: None, + segment_offset: None, + } + } +} +impl MatroskaDemuxer { + pub fn segment_offset(&mut self) -> Result<u64> { + if let Some(s) = self.segment_offset { + return Ok(s); + } + self.reader.seek(SeekFrom::Start(0))?; + + let header_tag = self.reader.read_vint()?; + let header_size = self.reader.read_vint()?; + if header_tag != MatroskaFile::TAG_EBML_HEADER { + bail!("file is not ebml") + } + + let mut header_raw = vec![0u8; header_size as usize]; + self.reader.read_exact(&mut header_raw)?; + let header = EbmlHeader::read(&header_raw).context("parsing ebml header")?; + if !matches!(header.doc_type.as_str(), "matroska" | "webm") { + bail!("file is {:?} but not matroska/webm", header.doc_type) + } + if header.ebml_max_id_length != 4 { + bail!( + "file has invalid EBMLMaxIDLength of {}", + header.ebml_max_id_length + ) + } + if !matches!(header.ebml_max_size_length, 1..=8) { + bail!( + "file has invalid EBMLMaxIDLength of {}", + header.ebml_max_id_length + ) + } + + let segment_tag = self.reader.read_vint()?; + let _segment_size = self.reader.read_vint()?; + if segment_tag != MatroskaFile::TAG_SEGMENT { + bail!("header not followed by segment") + } + + let off = self.reader.stream_position()?; + debug!("segment offset is {off} (0x{off:x})"); + self.segment_offset = Some(off); + Ok(off) + } + + pub fn seek_segment_start(&mut self) -> Result<()> { + let seg_start = self.segment_offset()?; + self.reader.seek(SeekFrom::Start(seg_start))?; + Ok(()) + } + + /// Parse SeekHead at segment start if exists + pub fn seek_head<'a>(&'a mut self) -> Result<Option<&'a SeekHead>> { + if self.seek_head.is_some() { + return Ok(self.seek_head.as_ref()); + } + self.seek_segment_start()?; + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + // TODO skip possible CRC32 tag + if tag != Segment::TAG_SEEK_HEADS { + return Ok(None); + } + + let mut raw = vec![0u8; size as usize]; + self.reader.read_exact(&mut raw)?; + let seek_head = SeekHead::read(&raw).context("parsing seek head")?; + debug!("parsed {seek_head:#?}"); + self.seek_head = Some(seek_head); + Ok(self.seek_head.as_ref()) + } + + /// Seeks to the content of child tag of Segment possibly optimized via SeekHead. Returns the size of the content. + pub fn seek_to_segment_tag(&mut self, search_tag: u64) -> Result<Option<u64>> { + if let Some(seek_head) = self.seek_head()? { + let Some(segment_position) = seek_head + .seeks + .iter() + .find(|s| read_vint_slice(&mut s.id.as_slice()).map_or(false, |x| x == search_tag)) + .map(|s| s.position) + else { + return Ok(None); + }; + let segment_offset = self.segment_offset()?; + self.reader + .seek(SeekFrom::Start(segment_offset + segment_position))?; + + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + if tag != search_tag { + bail!("SeekHead was lying (expected {search_tag:?}, got {tag:x})"); + } + Ok(Some(size)) + } else { + self.seek_segment_start()?; + loop { + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + if tag == search_tag { + break Ok(Some(size)); + } + if tag == Segment::TAG_CLUSTERS { + break Ok(None); + } + self.reader.seek_relative(size as i64)?; + } + } + } + + pub fn read_tag<Tag: Ebml>(&mut self, size: u64) -> Result<Tag> { + let mut buffer = vec![0u8; size as usize]; + self.reader.read_exact(&mut buffer)?; + Ok(Tag::read(&buffer)?) + } + pub fn read_segment_tag<Tag: Ebml>(&mut self, name: &'static str, tag: u64) -> Result<Tag> { + debug!("reading {name:?}"); + let size = self + .seek_to_segment_tag(tag)? + .ok_or(anyhow!("{name} tag missing"))?; + self.read_tag(size) + .context(anyhow!("parsing {name} failed")) + } +} +impl Demuxer for MatroskaDemuxer { + fn info(&mut self) -> Result<Info> { + self.read_segment_tag("Info", Segment::TAG_INFO) + } + fn tracks(&mut self) -> Result<Tracks> { + self.read_segment_tag("Tracks", Segment::TAG_TRACKS) + } + fn chapters(&mut self) -> Result<Chapters> { + self.read_segment_tag("Chapters", Segment::TAG_CHAPTERS) + } + fn attachments(&mut self) -> Result<Attachments> { + self.read_segment_tag("Attachments", Segment::TAG_ATTACHMENTS) + } + fn tags(&mut self) -> Result<Tags> { + self.read_segment_tag("Tags", Segment::TAG_TAGS) + } + fn cues(&mut self) -> Result<Cues> { + self.read_segment_tag("Cues", Segment::TAG_CUES) + } + + fn seek_cluster(&mut self, position: Option<u64>) -> Result<()> { + if let Some(pos) = position { + self.reader.seek(SeekFrom::Start(pos))?; + } else { + self.seek_to_segment_tag(Segment::TAG_CLUSTERS)?; + } + Ok(()) + } + fn read_cluster(&mut self) -> Result<Option<(u64, Cluster)>> { + loop { + let position = self.reader.stream_position()?; + // TODO handle eof + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + if tag != Segment::TAG_CLUSTERS { + self.reader.seek_relative(size as i64)?; + continue; + } + + let mut buffer = vec![0u8; size as usize]; + self.reader.read_exact(&mut buffer)?; + let cluster = Cluster::read(&buffer).context("parsing Cluster")?; + + break Ok(Some((position, cluster))); + } + } +} diff --git a/remuxer/src/demuxers/mod.rs b/remuxer/src/demuxers/mod.rs new file mode 100644 index 0000000..e47e3d7 --- /dev/null +++ b/remuxer/src/demuxers/mod.rs @@ -0,0 +1,29 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +pub mod matroska; + +use anyhow::Result; +use std::io::{Read, Seek}; +use winter_matroska::{Attachments, Chapters, Cluster, Cues, Info, Tags, Tracks}; + +pub trait ReadSeek: Read + Seek {} +impl<T: Read + Seek> ReadSeek for T {} + +pub trait DemuxerNew: Demuxer + Sized { + fn new(reader: Box<dyn ReadSeek>) -> Self; +} +pub trait Demuxer { + fn info(&mut self) -> Result<Info>; + fn tracks(&mut self) -> Result<Tracks>; + fn chapters(&mut self) -> Result<Chapters>; + fn attachments(&mut self) -> Result<Attachments>; + fn tags(&mut self) -> Result<Tags>; + fn cues(&mut self) -> Result<Cues>; + + fn seek_cluster(&mut self, position: Option<u64>) -> Result<()>; + fn read_cluster(&mut self) -> Result<Option<(u64, Cluster)>>; +} diff --git a/remuxer/src/extract.rs b/remuxer/src/extract.rs deleted file mode 100644 index 15c1e9d..0000000 --- a/remuxer/src/extract.rs +++ /dev/null @@ -1,51 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use crate::seek_index::get_seek_index; -use anyhow::{anyhow, bail}; -use jellymatroska::{block::Block, read::EbmlReader, Master, MatroskaTag}; -use log::debug; -use std::{fs::File, io::BufReader, path::PathBuf}; - -pub type TrackExtract = Vec<(u64, Option<u64>, Vec<u8>)>; -pub fn extract_track(path: PathBuf, track: u64) -> anyhow::Result<TrackExtract> { - let file = File::open(&path)?; - let mut reader = EbmlReader::new(BufReader::new(file)); - let index = get_seek_index(&path)?; - let index = index.get(&track).ok_or(anyhow!("track missing"))?; - - let mut out = Vec::new(); - for b in &index.blocks { - reader.seek(b.source_off, MatroskaTag::BlockGroup(Master::Start))?; - let (duration, block) = read_group(&mut reader)?; - assert_eq!(track, block.track, "seek index is wrong"); - out.push((b.pts, duration, block.data)) - } - Ok(out) -} - -pub fn read_group(segment: &mut EbmlReader) -> anyhow::Result<(Option<u64>, Block)> { - let (mut dur, mut block) = (None, None); - for _ in 0..10 { - let (_, item) = segment.next().ok_or(anyhow!("eof"))??; - match item { - MatroskaTag::Void(_) => (), - MatroskaTag::Crc32(_) => (), - MatroskaTag::Cluster(_) => bail!("unexpected cluster"), - MatroskaTag::Timestamp(_) => (), - MatroskaTag::SimpleBlock(block) => { - return Ok((None, block)); // HDMV/PGS does not use duration?! - } - MatroskaTag::BlockGroup(Master::Start) => (), - MatroskaTag::BlockGroup(Master::End) => return Ok((dur, block.unwrap())), - MatroskaTag::BlockDuration(duration) => dur = Some(duration), - MatroskaTag::Block(blk) => block = Some(blk), - MatroskaTag::Cues(_) => bail!("reached cues, this is the end"), - MatroskaTag::Segment(Master::End) => bail!("extractor reached segment end"), - _ => debug!("(rs) tag ignored: {item:?}"), - } - } - bail!(".") -} diff --git a/remuxer/src/fragment.rs b/remuxer/src/fragment.rs deleted file mode 100644 index 45a671f..0000000 --- a/remuxer/src/fragment.rs +++ /dev/null @@ -1,219 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ - -use crate::{ - ebml_header, ebml_segment_info, ebml_track_entry, - metadata::{matroska_metadata, MatroskaMetadata}, - seek_index::get_seek_index, - segment_extractor::SegmentExtractIter, -}; -use anyhow::{anyhow, Context, Result}; -use jellymatroska::{read::EbmlReader, write::EbmlWriter, Master, MatroskaTag}; -use log::{debug, info}; -use std::{ - fs::File, - io::{BufReader, BufWriter, Write}, - ops::Range, - path::Path, -}; - -const FRAGMENT_LENGTH: f64 = 4.; - -pub fn fragment_index(path: &Path, track: u64) -> Result<Vec<Range<f64>>> { - let meta = matroska_metadata(path)?; - let duration = media_duration(&meta); - let force_kf = meta - .as_ref() - .tracks - .as_ref() - .unwrap() - .entries - .iter() - .find(|t| t.track_number == track) - .unwrap() - .track_type - == 17; - - let index = get_seek_index(path)?; - let index = index - .get(&track) - .ok_or(anyhow!("seek index track missing"))?; - - let n_kf = if force_kf { - index.blocks.len() - } else { - index.keyframes.len() - }; - - let average_kf_interval = duration / n_kf as f64; - let kf_per_frag = (FRAGMENT_LENGTH / average_kf_interval).ceil() as usize; - debug!("average keyframe interval: {average_kf_interval}"); - debug!(" => keyframes per frag {kf_per_frag}"); - - let n_frags = n_kf.div_ceil(kf_per_frag); - Ok((0..n_frags) - .map(|i| { - let start = index.blocks[if force_kf { - i * kf_per_frag - } else { - index.keyframes[i * kf_per_frag] - }] - .pts as f64 - / 1000.; - let end = if force_kf { - let n = (i + 1) * kf_per_frag; - if n >= index.blocks.len() { - None - } else { - Some(n) - } - } else { - index.keyframes.get((i + 1) * kf_per_frag).copied() - } - .map(|i| index.blocks[i].pts as f64 / 1000.) - .unwrap_or(duration); - start..end - }) - .collect()) -} - -pub fn write_fragment_into( - writer: impl Write, - path: &Path, - track: u64, - webm: bool, - title: &str, - n: usize, -) -> anyhow::Result<()> { - let meta = matroska_metadata(path)?; - let duration = media_duration(&meta); - let track_meta = meta - .as_ref() - .tracks - .as_ref() - .unwrap() - .entries - .iter() - .find(|t| t.track_number == track) - .unwrap(); - let force_kf = track_meta.track_type == 17; - - info!("writing fragment {n} of {:?} (track {track})", title); - let mut output = EbmlWriter::new(BufWriter::new(writer), 0); - let mapped = 1; - info!("\t- {track} {path:?} ({} => {mapped})", track); - // info!("\t {}", info); - let file = File::open(path).context("opening source file")?; - let index = get_seek_index(path)?; - let index = index - .get(&track) - .ok_or(anyhow!("track missing 2"))? - .to_owned(); - debug!("\t seek index: {} blocks loaded", index.blocks.len()); - let mut reader = EbmlReader::new(BufReader::new(file)); - - let n_kf = if force_kf { - index.blocks.len() - } else { - index.keyframes.len() - }; - debug!("{duration} {n_kf}"); - let average_kf_interval = duration / n_kf as f64; - let kf_per_frag = (FRAGMENT_LENGTH / average_kf_interval).ceil() as usize; - debug!("average keyframe interval: {average_kf_interval}"); - debug!(" => keyframes per frag {kf_per_frag}"); - - let (start_block_index, end_block_index) = if force_kf { - (n * kf_per_frag, (n + 1) * kf_per_frag) - } else { - ( - *index - .keyframes - .get(n * kf_per_frag) - .ok_or(anyhow!("fragment index out of range"))?, - *index - .keyframes - .get((n + 1) * kf_per_frag) - .unwrap_or(&index.blocks.len()), - ) - }; - debug!("writing blocks {start_block_index} to {end_block_index}."); - - let start_block = &index.blocks[start_block_index]; - let last_block_pts = index - .blocks - .get(end_block_index) - .map(|b| b.pts) - .unwrap_or((duration * 1000.) as u64); - - output.write_tag(&ebml_header(webm))?; - output.write_tag(&MatroskaTag::Segment(Master::Start))?; - output.write_tag(&ebml_segment_info( - title.to_string(), - (last_block_pts - start_block.pts) as f64 / 1000., - ))?; - output.write_tag(&MatroskaTag::Tracks(Master::Collected(vec![ - ebml_track_entry(mapped, track_meta), - ])))?; - - reader.seek(start_block.source_off, MatroskaTag::Cluster(Master::Start))?; - let mut reader = SegmentExtractIter::new(&mut reader, track); - - { - // TODO this one caused fragments to get dropped by MSE for no reason - // for i in start_block_index..end_block_index { - // let index_block = &index.blocks[i]; - // let (mut block, duration) = reader.next()?; - - // assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); - - // block.track = 1; - // block.timestamp_off = 0; - // output.write_tag(&MatroskaTag::Cluster(Master::Collected(vec![ - // MatroskaTag::Timestamp(index_block.pts - start_block.pts), - // if let Some(duration) = duration { - // MatroskaTag::BlockGroup(Master::Collected(vec![ - // MatroskaTag::BlockDuration(duration), - // MatroskaTag::Block(block), - // ])) - // } else { - // MatroskaTag::SimpleBlock(block) - // }, - // ])))?; - // } - } - { - let mut blocks = vec![MatroskaTag::Timestamp(start_block.pts)]; - for i in start_block_index..end_block_index { - let index_block = &index.blocks[i]; - let (mut block, duration) = reader.next_block()?; - - assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); - - block.track = 1; - // TODO this does generate overflows sometimes - block.timestamp_off = (index_block.pts as i64 - start_block.pts as i64) - .try_into() - .unwrap(); - if let Some(duration) = duration { - blocks.push(MatroskaTag::BlockGroup(Master::Collected(vec![ - MatroskaTag::BlockDuration(duration), - MatroskaTag::Block(block), - ]))) - } else { - blocks.push(MatroskaTag::SimpleBlock(block)) - } - } - output.write_tag(&MatroskaTag::Cluster(Master::Collected(blocks)))?; - } - debug!("wrote {} bytes", output.position()); - Ok(()) -} - -fn media_duration(m: &MatroskaMetadata) -> f64 { - let info = m.info.as_ref().unwrap(); - (info.duration.unwrap_or_default() * info.timestamp_scale as f64) / 1_000_000_000. -} diff --git a/remuxer/src/lib.rs b/remuxer/src/lib.rs index bb732d7..041f386 100644 --- a/remuxer/src/lib.rs +++ b/remuxer/src/lib.rs @@ -3,100 +3,12 @@ which is licensed under the GNU Affero General Public License (version 3); see /COPYING. Copyright (C) 2025 metamuffin <metamuffin.org> */ -#![feature(random, exit_status_error)] -pub mod extract; -pub mod fragment; -pub mod matroska_to_mpeg4; -pub mod matroska_to_webm; -pub mod metadata; -pub mod remux; -pub mod seek_index; -pub mod segment_extractor; -pub mod trim_writer; -use ebml_struct::matroska::TrackEntry; -pub use fragment::write_fragment_into; -use jellymatroska::{Master, MatroskaTag}; -pub use matroska_to_mpeg4::matroska_to_mpeg4; -pub use remux::remux_stream_into; +pub mod demuxers; +pub mod magic; -pub fn ebml_header(webm: bool) -> MatroskaTag { - MatroskaTag::Ebml(Master::Collected(vec![ - MatroskaTag::EbmlVersion(1), - MatroskaTag::EbmlReadVersion(1), - MatroskaTag::EbmlMaxIdLength(4), - MatroskaTag::EbmlMaxSizeLength(8), - MatroskaTag::DocType(if webm { - "webm".to_string() - } else { - "matroska".to_string() - }), - MatroskaTag::DocTypeVersion(4), - MatroskaTag::DocTypeReadVersion(2), - ])) -} -pub fn ebml_segment_info(title: String, duration: f64) -> MatroskaTag { - MatroskaTag::Info(Master::Collected(vec![ - MatroskaTag::TimestampScale(1_000_000), - MatroskaTag::Duration(duration * 1000.0), - MatroskaTag::Title(title), - MatroskaTag::MuxingApp("jellyremux".to_string()), - MatroskaTag::WritingApp("jellything".to_string()), - ])) -} - -pub fn ebml_track_entry(number: u64, track: &TrackEntry) -> MatroskaTag { - let mut els = vec![ - MatroskaTag::TrackNumber(number), - MatroskaTag::TrackUID(number * 100), // TODO is this ok? - MatroskaTag::FlagLacing(track.flag_lacing), - MatroskaTag::Language(track.language.clone()), - MatroskaTag::CodecID(track.codec_id.clone()), - MatroskaTag::CodecDelay(track.codec_delay), - MatroskaTag::SeekPreRoll(track.seek_pre_roll), - ]; - if let Some(d) = &track.default_duration { - els.push(MatroskaTag::DefaultDuration(*d)); - } - match track.track_type { - 1 => { - let video = track.video.as_ref().unwrap(); - els.push(MatroskaTag::TrackType(1)); - let mut props = vec![ - MatroskaTag::PixelWidth(video.pixel_width), - MatroskaTag::PixelHeight(video.pixel_height), - ]; - props.push(MatroskaTag::DisplayWidth( - video.display_width.unwrap_or(video.pixel_width), - )); - props.push(MatroskaTag::DisplayHeight( - video.display_height.unwrap_or(video.pixel_height), - )); - props.push(MatroskaTag::DisplayUnit(video.display_unit)); - if let Some(fps) = video.frame_rate { - props.push(MatroskaTag::FrameRate(fps)) - } - els.push(MatroskaTag::Video(Master::Collected(props))) - } - 2 => { - let audio = track.audio.as_ref().unwrap(); - els.push(MatroskaTag::TrackType(2)); - let mut props = vec![ - MatroskaTag::SamplingFrequency(audio.sampling_frequency), - MatroskaTag::Channels(audio.channels), - ]; - if let Some(bit_depth) = audio.bit_depth { - props.push(MatroskaTag::BitDepth(bit_depth)); - } - els.push(MatroskaTag::Audio(Master::Collected(props))); - } - 17 => { - els.push(MatroskaTag::TrackType(17)); - } - _ => unreachable!(), - } - if let Some(d) = &track.codec_private { - els.push(MatroskaTag::CodecPrivate(d.clone())); - } - MatroskaTag::TrackEntry(Master::Collected(els)) +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ContainerFormat { + Matroska, + Webm, } diff --git a/remuxer/src/magic.rs b/remuxer/src/magic.rs new file mode 100644 index 0000000..65ab4de --- /dev/null +++ b/remuxer/src/magic.rs @@ -0,0 +1,67 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::ContainerFormat; +use anyhow::Result; +use std::io::Read; +use winter_ebml::{Ebml, EbmlHeader, read_vint_slice}; +use winter_matroska::MatroskaFile; + +pub fn detect_container_format(reader: &mut dyn Read) -> Result<Option<ContainerFormat>> { + let mut data = Vec::new(); + reader.take(128).read_to_end(&mut data)?; + Ok(test_matroska(&data)) +} + +fn test_matroska(mut data: &[u8]) -> Option<ContainerFormat> { + let tag = read_vint_slice(&mut data)?; + if tag != MatroskaFile::TAG_EBML_HEADER { + return None; + }; + let size = read_vint_slice(&mut data)? as usize; + if size > data.len() { + return None; + } + let header = EbmlHeader::read(&data[..size]).ok()?; + match header.doc_type.as_str() { + "matroska" => Some(ContainerFormat::Matroska), + "webm" => Some(ContainerFormat::Webm), + _ => None, + } +} + +#[test] +fn verify_matroska() { + // WebM + let sample = "\ +1a45dfa39f4286810142f7810142f2810442f381084282847765626d4287\ +8104428581021853806701000000088a9c1a114d9b74bc4dbb8b53ab8415\ +49a96653ac81a14dbb8b53ab841654ae6b53ac81d64dbb8c53ab841254c3\ +6753ac8201a04dbb8e53ab841c53bb6b53ac84088a9accec010000000000\ +0057000000000000"; + let sample = hex::decode(sample).unwrap(); + assert_eq!(test_matroska(&sample), Some(ContainerFormat::Webm)); + + // Matroska + let sample = "\ +1a45dfa3a34286810142f7810142f2810442f381084282886d6174726f73\ +6b61428781044285810218538067010000005d66b4a2114d9b74c2bf8492\ +1ae3e14dbb8b53ab841549a96653ac81a14dbb8b53ab841654ae6b53ac81\ +ef4dbb8c53ab841254c36753ac82019b4dbb8e53ab841c53bb6b53ac845d\ +66a14aec01000000"; + let sample = hex::decode(sample).unwrap(); + assert_eq!(test_matroska(&sample), Some(ContainerFormat::Matroska)); + + // GIF + let sample = "\ +47494638396100010001f71f000000002400004800006c0000900000b400\ +00d80000fc00000024002424004824006c2400902400b42400d82400fc24\ +000048002448004848006c4800904800b44800d84800fc4800006c00246c\ +00486c006c6c00906c00b46c00d86c00fc6c000090002490004890006c90\ +00909000b49000d8"; + let sample = hex::decode(sample).unwrap(); + assert_eq!(test_matroska(&sample), None) +} diff --git a/remuxer/src/matroska_to_mpeg4.rs b/remuxer/src/matroska_to_mpeg4.rs deleted file mode 100644 index cc0b967..0000000 --- a/remuxer/src/matroska_to_mpeg4.rs +++ /dev/null @@ -1,36 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use anyhow::Result; -use std::{ - fs::{remove_file, File}, - io::{copy, Read, Write}, - process::{Command, Stdio}, - random::random, -}; - -pub fn matroska_to_mpeg4( - mut input: impl Read + Send + 'static, - mut output: impl Write, -) -> Result<()> { - let path = format!("/tmp/jellything-tc-hack-{:016x}", random::<u64>(..)); - let args = format!( - "-hide_banner -loglevel warning -f matroska -i pipe:0 -c copy -f mp4 -movflags frag_keyframe+empty_moov {path}" - ); - let mut child = Command::new("ffmpeg") - .args(args.split(" ")) - .stdin(Stdio::piped()) - .stderr(Stdio::inherit()) - .spawn()?; - - let mut stdin = child.stdin.take().unwrap(); - copy(&mut input, &mut stdin)?; - drop(stdin); - child.wait()?.exit_ok()?; - copy(&mut File::open(&path)?, &mut output)?; - remove_file(path)?; - - Ok(()) -} diff --git a/remuxer/src/matroska_to_webm.rs b/remuxer/src/matroska_to_webm.rs deleted file mode 100644 index b77062b..0000000 --- a/remuxer/src/matroska_to_webm.rs +++ /dev/null @@ -1,89 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use crate::ebml_track_entry; -use anyhow::Context; -use ebml_struct::{ - ids::*, - matroska::{Cluster, Ebml, Info, Tracks}, - read::{EbmlReadExt, TagRead}, - write::TagWrite, -}; -use jellymatroska::{read::EbmlReader, write::EbmlWriter, Master, MatroskaTag}; -use log::warn; -use std::io::{BufReader, BufWriter, ErrorKind, Read, Seek, Write}; - -pub fn matroska_to_webm( - input: impl Read + Seek + 'static, - output: impl Write, -) -> anyhow::Result<()> { - let mut output = EbmlWriter::new(BufWriter::new(output), 0); - let mut input = EbmlReader::new(BufReader::new(input)); - - Ebml { - ebml_version: 1, - ebml_read_version: 1, - ebml_max_id_length: 4, - ebml_max_size_length: 8, - doc_type: "webm".to_string(), - doc_type_version: 4, - doc_type_read_version: 2, - doc_type_extensions: vec![], - } - .write(&mut output)?; - output.write_tag(&MatroskaTag::Segment(Master::Start))?; - - let (x, mut ebml) = input.read_tag()?; - assert_eq!(x, EL_EBML); - let ebml = Ebml::read(&mut ebml).unwrap(); - assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm"); - let (x, mut segment) = input.read_tag()?; - assert_eq!(x, EL_SEGMENT); - - loop { - let (x, mut seg) = match segment.read_tag() { - Ok(o) => o, - Err(e) if e.kind() == ErrorKind::UnexpectedEof => break, - Err(e) => return Err(e.into()), - }; - match x { - EL_INFO => { - let info = Info::read(&mut seg).context("info")?; - output.write_tag(&{ - MatroskaTag::Info(Master::Collected(vec![ - MatroskaTag::TimestampScale(info.timestamp_scale), - MatroskaTag::Duration(info.duration.unwrap_or_default()), - MatroskaTag::Title(info.title.unwrap_or_default()), - MatroskaTag::MuxingApp("jellyremux".to_string()), - MatroskaTag::WritingApp("jellything".to_string()), - ])) - })?; - } - EL_TRACKS => { - let tracks = Tracks::read(&mut seg).context("tracks")?; - output.write_tag(&MatroskaTag::Tracks(Master::Collected( - tracks - .entries - .into_iter() - .map(|t| ebml_track_entry(t.track_number, &t)) - .collect(), - )))?; - } - EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD | EL_ATTACHMENTS | EL_TAGS => { - seg.consume()?; - } - EL_CLUSTER => { - let cluster = Cluster::read(&mut seg).context("cluster")?; - // TODO mixing both ebml libraries :))) - cluster.write(&mut output)?; - } - id => { - warn!("unknown top-level element {id:x}"); - seg.consume()?; - } - } - } - Ok(()) -} diff --git a/remuxer/src/metadata.rs b/remuxer/src/metadata.rs deleted file mode 100644 index ddcf4c0..0000000 --- a/remuxer/src/metadata.rs +++ /dev/null @@ -1,112 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use anyhow::{Context, Result}; -use bincode::{Decode, Encode}; -use ebml_struct::{ - ids::*, - matroska::*, - read::{EbmlReadExt, TagRead}, -}; -use jellycache::{cache_file, cache_memory, CachePath}; -use log::{info, warn}; -use std::{ - fs::File, - io::{BufReader, ErrorKind, Read, Write}, - path::Path, - sync::Arc, -}; - -pub use ebml_struct::matroska::TrackEntry as MatroskaTrackEntry; - -#[derive(Debug, Encode, Decode, Clone)] -pub struct MatroskaMetadata { - pub info: Option<Info>, - pub tracks: Option<Tracks>, - pub cover: Option<CachePath>, - pub chapters: Option<Chapters>, - pub tags: Option<Tags>, - pub infojson: Option<Vec<u8>>, -} -pub fn checked_matroska_metadata(path: &Path) -> Result<Arc<Option<MatroskaMetadata>>> { - cache_memory("mkmeta-check-v1", path, || { - let mut magic = [0; 4]; - File::open(path)?.read_exact(&mut magic).ok(); - if !matches!(magic, [0x1A, 0x45, 0xDF, 0xA3]) { - return Ok(None); - } - Ok(Some((*matroska_metadata(path)?).clone())) - }) -} -pub fn matroska_metadata(path: &Path) -> Result<Arc<MatroskaMetadata>> { - cache_memory("mkmeta-v3", path, || { - info!("reading {path:?}"); - let mut file = BufReader::new(File::open(path)?); - let mut file = file.by_ref().take(u64::MAX); - - let (x, mut ebml) = file.read_tag()?; - assert_eq!(x, EL_EBML); - let ebml = Ebml::read(&mut ebml).unwrap(); - assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm"); - let (x, mut segment) = file.read_tag()?; - assert_eq!(x, EL_SEGMENT); - - let mut info = None; - let mut infojson = None; - let mut tracks = None; - let mut cover = None; - let mut chapters = None; - let mut tags = None; - loop { - let (x, mut seg) = match segment.read_tag() { - Ok(o) => o, - Err(e) if e.kind() == ErrorKind::UnexpectedEof => break, - Err(e) => return Err(e.into()), - }; - match x { - EL_INFO => info = Some(Info::read(&mut seg).context("info")?), - EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?), - EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?), - EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?), - EL_ATTACHMENTS => { - let attachments = Attachments::read(&mut seg).context("attachments")?; - for f in attachments.files { - match f.name.as_str() { - "info.json" => { - infojson = Some(f.data); - } - "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg" - | "cover.avif" => { - cover = Some(cache_file("att-cover", path, move |mut file| { - file.write_all(&f.data)?; - Ok(()) - })?) - } - _ => (), - } - } - } - EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => { - seg.consume()?; - } - EL_CLUSTER => { - break; - } - id => { - warn!("unknown top-level element {id:x}"); - seg.consume()?; - } - } - } - Ok(MatroskaMetadata { - chapters, - cover, - info, - infojson, - tags, - tracks, - }) - }) -} diff --git a/remuxer/src/remux.rs b/remuxer/src/remux.rs deleted file mode 100644 index 9e6d4b5..0000000 --- a/remuxer/src/remux.rs +++ /dev/null @@ -1,311 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use std::{io::Write, ops::Range, path::PathBuf}; - -// struct ClusterLayout { -// position: usize, -// timestamp: u64, -// source_offsets: Vec<Option<u64>>, -// blocks: Vec<(usize, BlockIndex)>, -// } - -pub fn remux_stream_into( - _writer: impl Write, - _range: Range<usize>, - _path_base: PathBuf, - _selection: Vec<usize>, - _webm: bool, -) -> anyhow::Result<()> { - // info!("remuxing {:?} to have tracks {selection:?}", item.title); - // let writer = TrimWriter::new(BufWriter::new(writer), range.clone()); - // let mut output = EbmlWriter::new(writer, 0); - - // struct ReaderC { - // info: SourceTrack, - // reader: EbmlReader, - // mapped: u64, - // index: Arc<SeekIndex>, - // source_track_index: usize, - // codec_private: Option<Vec<u8>>, - // layouting_progress_index: usize, - // } - - // let timing_cp = Instant::now(); - - // let mut inputs = selection - // .iter() - // .enumerate() - // .map(|(index, sel)| { - // let info = item - // .media - // .as_ref() - // .unwrap() - // .tracks - // .get(*sel) - // .ok_or(anyhow!("track not available"))? - // .to_owned(); - // let source_path = path_base.join(&private.path); - // let mapped = index as u64 + 1; - // info!("\t- {sel} {source_path:?} ({} => {mapped})", private.track); - // info!("\t {}", info); - // let file = File::open(&source_path).context("opening source file")?; - // let index = get_seek_index(&source_path)?; - // let index = index - // .get(&(private.track as u64)) - // .ok_or(anyhow!("track missing 3"))? - // .to_owned(); - // debug!("\t seek index: {} blocks loaded", index.blocks.len()); - // let reader = EbmlReader::new(BufReader::new(file)); - // Ok(ReaderC { - // index, - // reader, - // info, - // mapped, - // source_track_index: private.track, - // codec_private: private.codec_private.clone(), - // layouting_progress_index: 0, - // }) - // }) - // .collect::<anyhow::Result<Vec<_>>>()?; - - // info!("(perf) prepare inputs: {:?}", Instant::now() - timing_cp); - // let timing_cp = Instant::now(); - - // output.write_tag(&ebml_header(webm))?; - - // output.write_tag(&MatroskaTag::Segment(Master::Start))?; - // let segment_offset = output.position(); - - // output.write_tag(&MatroskaTag::Info(Master::Collected(vec![ - // MatroskaTag::TimestampScale(1_000_000), - // MatroskaTag::Duration(item.media.as_ref().unwrap().duration * 1000.0), - // MatroskaTag::Title(item.title.clone().unwrap_or_default()), - // MatroskaTag::MuxingApp("jellyremux".to_string()), - // MatroskaTag::WritingApp("jellything".to_string()), - // ])))?; - - // let tracks_header = inputs - // .iter_mut() - // .map(|rc| ebml_track_entry(rc.mapped, rc.mapped, &rc.info, rc.codec_private.take())) - // .collect(); - // output.write_tag(&MatroskaTag::Tracks(Master::Collected(tracks_header)))?; - - // let mut segment_layout: Vec<ClusterLayout> = { - // let mut cluster_pts = 0; - // let mut clusters = vec![]; - // let mut cluster = vec![]; - // let mut source_offsets = vec![None; inputs.len()]; - // let mut gp = 0usize; // cluster position (in the segment) - // let mut p = 0usize; // block position (in the cluster) - // loop { - // let (track, block) = { - // let mut best_block = BlockIndex { - // pts: u64::MAX, - // size: 0, - // source_off: 0, - // }; - // let mut best_track = 0; - // for (i, r) in inputs.iter().enumerate() { - // if let Some(v) = r.index.blocks.get(r.layouting_progress_index) { - // if v.pts < best_block.pts { - // best_block = v.to_owned(); - // best_track = i; - // } - // }; - // } - // (best_track, best_block) - // }; - // inputs[track].layouting_progress_index += 1; - // source_offsets[track].get_or_insert(block.source_off); - // if block.pts > cluster_pts + 1_000 { - // let cluster_content_size = 1 + 1 // timestamp {tag, size} - // + bad_vint_length(cluster_pts) // timestamp tag value - // + p; - // let cluster_size = 4 // tag length - // + vint_length(cluster_content_size as u64) // size varint - // + cluster_content_size; - // clusters.push(ClusterLayout { - // position: gp, // relative to the first cluster - // timestamp: cluster_pts, - // source_offsets, - // blocks: std::mem::take(&mut cluster), - // }); - - // cluster_pts = block.pts; - // source_offsets = vec![None; inputs.len()]; - // gp += cluster_size; - // p = 0; - // } - // if block.pts == u64::MAX { - // break; - // } - - // let simpleblock_size = 1 + 2 + 1 // block {tracknum, pts_off, flags} - // // TODO does not work, if more than 127 tracks are present - // + block.size; // block payload - // p += 1; // simpleblock tag - // p += vint_length(simpleblock_size as u64); // simpleblock size vint - // p += simpleblock_size; - - // cluster.push((track, block)) - // } - // info!("segment layout computed ({} clusters)", clusters.len()); - // clusters - // }; - // info!( - // "(perf) compute segment layout: {:?}", - // Instant::now() - timing_cp - // ); - // let timing_cp = Instant::now(); - - // let max_cue_size = 4 // cues id - // + 8 // cues len - // + ( // cues content - // 1 // cp id - // + 1 // cp len - // + ( // cp content - // 1 // ctime id, - // + 1 // ctime len - // + 8 // ctime content uint - // + ( // ctps - // 1 // ctp id - // + 8 // ctp len - // + (// ctp content - // 1 // ctrack id - // + 1 // ctrack size - // + 1 // ctrack content int - // // TODO this breaks if inputs.len() >= 127 - // + 1 // ccp id - // + 1 // ccp len - // + 8 // ccp content offset - // ) - // ) - // ) * inputs.len() - // ) * segment_layout.len() - // + 1 // void id - // + 8; // void len - - // let first_cluster_offset_predict = max_cue_size + output.position(); - - // // make the cluster position relative to the segment start as they should - // segment_layout - // .iter_mut() - // .for_each(|e| e.position += first_cluster_offset_predict - segment_offset); - - // output.write_tag(&MatroskaTag::Cues(Master::Collected( - // segment_layout - // .iter() - // .map(|cluster| { - // MatroskaTag::CuePoint(Master::Collected( - // Some(MatroskaTag::CueTime(cluster.timestamp)) - // .into_iter() - // // TODO: Subtitles should not have cues for every cluster - // .chain(inputs.iter().map(|i| { - // MatroskaTag::CueTrackPositions(Master::Collected(vec![ - // MatroskaTag::CueTrack(i.mapped), - // MatroskaTag::CueClusterPosition(cluster.position as u64), - // ])) - // })) - // .collect(), - // )) - // }) - // .collect(), - // )))?; - // output.write_padding(first_cluster_offset_predict)?; - // let first_cluster_offset = output.position(); - // assert_eq!(first_cluster_offset, first_cluster_offset_predict); - - // let mut skip = 0; - // // TODO binary search - // for (i, cluster) in segment_layout.iter().enumerate() { - // if (cluster.position + segment_offset) >= range.start { - // break; - // } - // skip = i; - // } - - // if skip != 0 { - // info!("skipping {skip} clusters"); - // output.seek(SeekFrom::Start( - // (segment_layout[skip].position + segment_offset) as u64, - // ))?; - // } - - // struct ReaderD<'a> { - // stream: SegmentExtractIter<'a>, - // mapped: u64, - // } - - // let mut track_readers = inputs - // .iter_mut() - // .enumerate() - // .map(|(i, inp)| { - // inp.reader - // .seek( - // // the seek target might be a hole; we continue until the next cluster of that track. - // // this should be fine since tracks are only read according to segment_layout - // find_first_cluster_with_off(&segment_layout, skip, i) - // .ok_or(anyhow!("cluster hole at eof"))?, - // MatroskaTag::Cluster(Master::Start), // TODO shouldn't this be a child of cluster? - // ) - // .context("seeking in input")?; - // let stream = SegmentExtractIter::new(&mut inp.reader, inp.source_track_index as u64); - - // Ok(ReaderD { - // mapped: inp.mapped, - // stream, - // }) - // }) - // .collect::<anyhow::Result<Vec<_>>>()?; - - // info!("(perf) seek inputs: {:?}", Instant::now() - timing_cp); - - // for (cluster_index, cluster) in segment_layout.into_iter().enumerate().skip(skip) { - // debug!( - // "writing cluster {cluster_index} (pts_base={}) with {} blocks", - // cluster.timestamp, - // cluster.blocks.len() - // ); - // { - // let cue_error = cluster.position as i64 - (output.position() - segment_offset) as i64; - // if cue_error != 0 { - // warn!("calculation was {} bytes off", cue_error); - // } - // } - - // let mut cluster_blocks = vec![MatroskaTag::Timestamp(cluster.timestamp)]; - // for (block_track, index_block) in cluster.blocks { - // let track_reader = &mut track_readers[block_track]; - // // TODO handle duration - // let mut block = track_reader.stream.next_block()?.0; - - // assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); - - // block.track = track_reader.mapped; - // block.timestamp_off = (index_block.pts - cluster.timestamp).try_into().unwrap(); - // trace!("n={} tso={}", block.track, block.timestamp_off); - - // cluster_blocks.push(MatroskaTag::SimpleBlock(block)) - // } - // output.write_tag(&MatroskaTag::Cluster(Master::Collected(cluster_blocks)))?; - // } - // // output.write_tag(&MatroskaTag::Segment(Master::End))?; - // Ok(()) - todo!() -} - -// fn find_first_cluster_with_off( -// segment_layout: &[ClusterLayout], -// skip: usize, -// track: usize, -// ) -> Option<u64> { -// for cluster in segment_layout.iter().skip(skip) { -// if let Some(off) = cluster.source_offsets[track] { -// return Some(off); -// } -// } -// None -// } diff --git a/remuxer/src/seek_index.rs b/remuxer/src/seek_index.rs deleted file mode 100644 index a1a97ef..0000000 --- a/remuxer/src/seek_index.rs +++ /dev/null @@ -1,152 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use anyhow::{Context, Result}; -use bincode::{Decode, Encode}; -use jellycache::cache_memory; -use jellymatroska::{ - block::Block, - read::EbmlReader, - unflatten::{Unflat, Unflatten}, - MatroskaTag, -}; -use log::{debug, info, trace, warn}; -use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path, sync::Arc}; - -#[derive(Debug, Clone, Default, Decode, Encode)] -pub struct SeekIndex { - pub blocks: Vec<BlockIndex>, - pub keyframes: Vec<usize>, -} - -#[derive(Debug, Clone, Decode, Encode)] -pub struct BlockIndex { - pub pts: u64, - // pub duration: Option<u64>, - pub source_off: u64, // points to start of SimpleBlock or BlockGroup (not the Block inside it) - pub size: usize, -} - -pub fn get_seek_index(path: &Path) -> anyhow::Result<Arc<BTreeMap<u64, Arc<SeekIndex>>>> { - cache_memory("seekindex-v1", path, move || { - info!("generating seek index for {path:?}"); - let input = File::open(path).context("opening source file")?; - let mut input = EbmlReader::new(BufReader::new(input)); - let index = import_seek_index(&mut input)?; - info!("done"); - Ok(index.into_iter().map(|(k, v)| (k, Arc::new(v))).collect()) - }) -} - -pub fn get_track_sizes(path: &Path) -> Result<BTreeMap<u64, usize>> { - Ok(get_seek_index(path)? - .iter() - .map(|(k, v)| (*k, v.blocks.iter().map(|b| b.size).sum::<usize>())) - .collect()) -} - -pub fn import_seek_index(input: &mut EbmlReader) -> Result<BTreeMap<u64, SeekIndex>> { - let mut seek_index = BTreeMap::new(); - while let Some(item) = input.next() { - let item = match item { - Ok((_, item)) => item, - Err(e) => { - if !matches!(e, jellymatroska::error::Error::Io(_)) { - warn!("{e}"); - } - break; - } - }; - match item { - MatroskaTag::Segment(_) => { - info!("segment start"); - let mut children = Unflatten::new_with_end(input, item); - import_seek_index_segment(&mut children, &mut seek_index)?; - info!("segment end"); - } - _ => debug!("(r) tag ignored: {item:?}"), - } - } - Ok(seek_index) -} - -fn import_seek_index_segment( - segment: &mut Unflatten, - seek_index: &mut BTreeMap<u64, SeekIndex>, -) -> Result<()> { - while let Some(Ok(Unflat { children, item, .. })) = segment.n() { - match item { - MatroskaTag::SeekHead(_) => {} - MatroskaTag::Info(_) => {} - MatroskaTag::Tags(_) => {} - MatroskaTag::Cues(_) => {} - MatroskaTag::Chapters(_) => {} - MatroskaTag::Tracks(_) => {} - MatroskaTag::Void(_) => {} - MatroskaTag::Cluster(_) => { - let mut children = children.unwrap(); - let mut pts = 0; - while let Some(Ok(Unflat { - children, - item, - position, - })) = children.n() - { - match item { - MatroskaTag::Timestamp(ts) => pts = ts, - MatroskaTag::BlockGroup(_) => { - trace!("group"); - let mut children = children.unwrap(); - while let Some(Ok(Unflat { - children: _, item, .. - })) = children.n() - { - match item { - MatroskaTag::Block(ref block) => { - debug!( - "block: track={} tso={}", - block.track, block.timestamp_off - ); - seek_index_add(seek_index, block, position.unwrap(), pts); - } - _ => trace!("{item:?}"), - } - } - } - MatroskaTag::SimpleBlock(block) => { - trace!( - "simple block: track={} tso={}", - block.track, - block.timestamp_off - ); - trace!("{pts} {}", block.timestamp_off); - seek_index_add(seek_index, &block, position.unwrap(), pts); - } - _ => trace!("(rsc) tag ignored: {item:?}"), - } - } - } - _ => debug!("(rs) tag ignored: {item:?}"), - }; - } - Ok(()) -} - -fn seek_index_add( - seek_index: &mut BTreeMap<u64, SeekIndex>, - block: &Block, - position: u64, - pts_base: u64, -) { - let trs = seek_index.entry(block.track).or_default(); - if block.flags.keyframe() { - trs.keyframes.push(trs.blocks.len()); - } - trs.blocks.push(BlockIndex { - pts: (pts_base as i64 + block.timestamp_off as i64) as u64, - source_off: position, - size: block.data.len(), - }); -} diff --git a/remuxer/src/segment_extractor.rs b/remuxer/src/segment_extractor.rs deleted file mode 100644 index 42c85f5..0000000 --- a/remuxer/src/segment_extractor.rs +++ /dev/null @@ -1,60 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use anyhow::{anyhow, bail, Result}; -use jellymatroska::{block::Block, read::EbmlReader, Master, MatroskaTag}; -use log::{debug, info, trace}; - -pub struct SegmentExtractIter<'a> { - segment: &'a mut EbmlReader, - extract: u64, -} - -impl<'a> SegmentExtractIter<'a> { - pub fn new(segment: &'a mut EbmlReader, extract: u64) -> Self { - Self { segment, extract } - } - - /// Returns the next block and sometimes its duration too. - pub fn next_block(&mut self) -> Result<(Block, Option<u64>)> { - let mut duration = None; - let mut group = false; - let mut saved_block = None; - loop { - let (_, item) = self.segment.next().ok_or(anyhow!("eof"))??; - match item { - MatroskaTag::Void(_) => (), - MatroskaTag::Crc32(_) => (), - MatroskaTag::Cluster(_) => (), - MatroskaTag::Timestamp(_) => (), - MatroskaTag::BlockGroup(Master::Start) => group = true, - MatroskaTag::BlockGroup(Master::End) => { - if !group { - bail!("group end without start"); - } - if let Some(block) = saved_block { - return Ok((block, duration)); - } - group = false; - } - MatroskaTag::BlockDuration(d) => duration = Some(d), - MatroskaTag::SimpleBlock(block) | MatroskaTag::Block(block) => { - if block.track == self.extract { - trace!("block: track={} tso={}", block.track, block.timestamp_off); - if group { - // can't return yet; there might be a BlockDuration coming - saved_block = Some(block); - } else { - return Ok((block, duration)); - } - } - } - MatroskaTag::Cues(_) => bail!("reached cues, this is the end"), - MatroskaTag::Segment(Master::End) => info!("extractor reached segment end"), - _ => debug!("(rs) tag ignored: {item:?}"), - } - } - } -} diff --git a/remuxer/src/trim_writer.rs b/remuxer/src/trim_writer.rs deleted file mode 100644 index 2c1b7ed..0000000 --- a/remuxer/src/trim_writer.rs +++ /dev/null @@ -1,72 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use anyhow::anyhow; -use log::{trace, warn}; -use std::{ - io::{Seek, Write}, - ops::Range, -}; - -pub struct TrimWriter<W> { - inner: W, - position: usize, - range: Range<usize>, -} -impl<W: Write> TrimWriter<W> { - pub fn new(inner: W, range: Range<usize>) -> Self { - Self { - inner, - range, - position: 0, - } - } -} - -impl<W: Write> Write for TrimWriter<W> { - fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { - let start = self.range.start as isize - self.position as isize; - let end = self.range.end as isize - self.position as isize; - - let start = start.clamp(0, buf.len() as isize) as usize; - let end = end.clamp(0, buf.len() as isize) as usize; - - if self.position >= self.range.end { - return Err(std::io::Error::other(anyhow!("range ended"))); - } - - let tbuf = &buf[start..end]; - Ok(if !tbuf.is_empty() { - trace!("trim={start}..{end} avail={}", buf.len()); - let sz = self.inner.write(tbuf)?; - self.position += sz; - sz - } else { - trace!("skip={}", buf.len()); - buf.len() - }) - } - - fn flush(&mut self) -> std::io::Result<()> { - self.inner.flush() - } -} - -impl<W> Seek for TrimWriter<W> { - fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> { - match pos { - std::io::SeekFrom::Start(s) => self.position = s as usize, - std::io::SeekFrom::End(_) => unimplemented!(), - std::io::SeekFrom::Current(s) => self.position += s as usize, - } - if self.position > self.range.end { - warn!( - "seeked beyond end: pos={} end={}", - self.position, self.range.end - ) - } - Ok(self.position as u64) - } -} |