From 1d2ed0a54c21e0ab7dd01b5f3975bb5df12d40ea Mon Sep 17 00:00:00 2001 From: metamuffin Date: Tue, 28 Nov 2023 13:31:28 +0100 Subject: ass to webvtt conversion --- remuxer/src/extract.rs | 119 +++++++++++++++++++++++++++++++++++++++++++++++++ remuxer/src/lib.rs | 1 + 2 files changed, 120 insertions(+) create mode 100644 remuxer/src/extract.rs (limited to 'remuxer') diff --git a/remuxer/src/extract.rs b/remuxer/src/extract.rs new file mode 100644 index 0000000..66c0526 --- /dev/null +++ b/remuxer/src/extract.rs @@ -0,0 +1,119 @@ +use crate::seek_index::get_seek_index; +use anyhow::{anyhow, bail}; +use jellycommon::LocalTrack; +use jellymatroska::{block::Block, read::EbmlReader, unflatten::IterWithPos, Master, MatroskaTag}; +use log::{debug, info}; +use std::{fs::File, io::BufReader, path::PathBuf}; + +pub fn extract_track( + path_base: PathBuf, + track_info: LocalTrack, +) -> anyhow::Result)>> { + let source_path = path_base.join(track_info.path); + let file = File::open(&source_path)?; + let mut reader = EbmlReader::new(BufReader::new(file)); + let index = get_seek_index(&source_path)?; + let index = index + .get(&(track_info.track as u64)) + .ok_or(anyhow!("track missing"))?; + + let mut out = Vec::new(); + for b in &index.blocks { + reader.seek(b.source_off, MatroskaTag::Cluster(Master::Start))?; + let (duration, block) = read_group(&mut reader)?; + out.push((b.pts, duration, block.data)) + } + Ok(out) +} + +pub fn read_group(segment: &mut EbmlReader) -> anyhow::Result<(u64, Block)> { + let (mut dur, mut block) = (None, None); + loop { + let item = segment.next().ok_or(anyhow!("eof"))??; + match item { + MatroskaTag::Void(_) => (), + MatroskaTag::Crc32(_) => (), + MatroskaTag::Cluster(_) => (), + MatroskaTag::Timestamp(_) => (), + MatroskaTag::SimpleBlock(_buf) => { + // bail!("unexpected simpleblock, where a group was expected") + } + MatroskaTag::BlockGroup(Master::Start) => (), + MatroskaTag::BlockGroup(Master::End) => return Ok((dur.unwrap(), block.unwrap())), + MatroskaTag::BlockDuration(duration) => dur = Some(duration), + MatroskaTag::Block(buf) => block = Some(Block::parse(&buf)?), + MatroskaTag::Cues(_) => bail!("reached cues, this is the end"), + MatroskaTag::Segment(Master::End) => info!("extractor reached segment end"), + _ => debug!("(rs) tag ignored: {item:?}"), + } + } +} + +// fn import_seek_index_segment( +// segment: &mut Unflatten, +// seek_index: &mut BTreeMap, +// ) -> Result<()> { +// while let Some(Ok(Unflat { children, item, .. })) = segment.n() { +// match item { +// MatroskaTag::SeekHead(_) => {} +// MatroskaTag::Info(_) => {} +// MatroskaTag::Tags(_) => {} +// MatroskaTag::Cues(_) => {} +// MatroskaTag::Chapters(_) => {} +// MatroskaTag::Tracks(_) => {} +// MatroskaTag::Void(_) => {} +// MatroskaTag::Cluster(_) => { +// let mut children = children.unwrap(); +// let mut pts = 0; +// let mut position = children.position(); + +// loop { +// if let Some(Ok(Unflat { children, item, .. })) = children.n() { +// match item { +// MatroskaTag::Timestamp(ts) => pts = ts, +// MatroskaTag::BlockGroup(_) => { +// trace!("group"); +// let mut children = children.unwrap(); +// // let position = children.position(); //? TODO where should this point to? cluster or block? // probably block +// while let Some(Ok(Unflat { +// children: _, +// item, +// position, +// })) = children.n() +// { +// match item { +// MatroskaTag::Block(ref buf) => { +// let block = Block::parse(buf)?; +// debug!( +// "block: track={} tso={}", +// block.track, block.timestamp_off +// ); +// seek_index_add(seek_index, &block, position, pts); +// } +// _ => trace!("{item:?}"), +// } +// } +// } +// MatroskaTag::SimpleBlock(buf) => { +// let block = Block::parse(&buf)?; +// trace!( +// "simple block: track={} tso={}", +// block.track, +// block.timestamp_off +// ); +// trace!("{pts} {}", block.timestamp_off); +// seek_index_add(seek_index, &block, position, pts); +// } +// _ => trace!("(rsc) tag ignored: {item:?}"), +// } +// } else { +// break; +// } +// position = children.position(); +// } +// } +// _ => debug!("(rs) tag ignored: {item:?}"), +// }; +// } +// Ok(()) +// } diff --git a/remuxer/src/lib.rs b/remuxer/src/lib.rs index e1f8c80..88f1916 100644 --- a/remuxer/src/lib.rs +++ b/remuxer/src/lib.rs @@ -9,6 +9,7 @@ pub mod seek_index; pub mod segment_extractor; pub mod snippet; pub mod trim_writer; +pub mod extract; pub use remux::remux_stream_into; pub use snippet::write_snippet_into; -- cgit v1.2.3-70-g09d2