From 4a0f08126d80dc589e3c97bf0a07571b8b828a74 Mon Sep 17 00:00:00 2001 From: metamuffin Date: Sat, 5 Aug 2023 11:35:29 +0200 Subject: speed up metadata import --- import/src/infojson.rs | 2 +- matroska/src/unflatten.rs | 3 +++ remuxer/src/import/mod.rs | 28 +++++++++++++++++++++------- remuxer/src/import/seek_index.rs | 12 ++++++++---- 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/import/src/infojson.rs b/import/src/infojson.rs index e22fd71..2f5eeb7 100644 --- a/import/src/infojson.rs +++ b/import/src/infojson.rs @@ -25,7 +25,7 @@ pub struct YVideo { pub tags: Vec, pub playable_in_embed: bool, pub automatic_captions: HashMap>, - pub comment_count: usize, + pub comment_count: Option, pub chapters: Option>, pub heatmap: Option>, pub like_count: usize, diff --git a/matroska/src/unflatten.rs b/matroska/src/unflatten.rs index e220689..ee1a8d9 100644 --- a/matroska/src/unflatten.rs +++ b/matroska/src/unflatten.rs @@ -42,6 +42,9 @@ impl<'a> Unflatten<'a> { end: Some(MatroskaTag::construct_master(start.id(), Master::End).unwrap()), } } + pub fn exit_dirty(&mut self) { + self.stop = true; + } pub fn position(&self) -> usize { self.inner.position() diff --git a/remuxer/src/import/mod.rs b/remuxer/src/import/mod.rs index 0a9aebf..309357b 100644 --- a/remuxer/src/import/mod.rs +++ b/remuxer/src/import/mod.rs @@ -13,7 +13,7 @@ use jellymatroska::{ unflatten::{IterWithPos, Unflat, Unflatten}, }; use log::{debug, error, info, warn}; -use std::path::PathBuf; +use std::{path::PathBuf, time::Instant}; #[derive(Default)] pub struct MatroskaMetadata { @@ -28,7 +28,6 @@ pub struct MatroskaMetadata { } pub fn import_metadata(input: &mut EbmlReader) -> Result { - let mut m = None; while let Some(item) = input.next() { let item = match item { Ok(item) => item, @@ -55,26 +54,31 @@ pub fn import_metadata(input: &mut EbmlReader) -> Result { } } MatroskaTag::Segment(_) => { - info!("segment start"); + info!("extracting metadata..."); let mut children = Unflatten::new_with_end(input, item); - m = Some(import_read_segment(&mut children)?); - info!("segment end"); + let t = Instant::now(); + let r = import_read_segment(&mut children)?; + info!("done in {:?}", t.elapsed()); + return Ok(r); } _ => debug!("(r) tag ignored: {item:?}"), } } - - Ok(m.ok_or(anyhow!("no segment"))?) + Err(anyhow!("no segment found")) } fn import_read_segment(segment: &mut Unflatten) -> Result { let (mut timestamp_scale, mut duration) = (None, None); let mut m = MatroskaMetadata::default(); + let (mut info_found, mut tags_found, mut attachments_found, mut tracks_found) = + (false, false, false, false); + while let Some(Ok(Unflat { children, item, .. })) = segment.n() { match item { MatroskaTag::SeekHead(_) => {} MatroskaTag::Info(_) => { + info_found = true; let mut children = children.unwrap(); while let Some(Ok(Unflat { children: _, item, .. @@ -90,6 +94,7 @@ fn import_read_segment(segment: &mut Unflatten) -> Result { } MatroskaTag::Void(_) => {} MatroskaTag::Tags(_) => { + tags_found = true; let mut children = children.unwrap(); while let Some(Ok(Unflat { children, item, .. })) = children.n() { match item { @@ -129,6 +134,7 @@ fn import_read_segment(segment: &mut Unflatten) -> Result { } } MatroskaTag::Attachments(_) => { + attachments_found = true; let mut children = children.unwrap(); while let Some(Ok(Unflat { children, item, .. })) = children.n() { match item { @@ -170,6 +176,7 @@ fn import_read_segment(segment: &mut Unflatten) -> Result { MatroskaTag::Cues(_) => {} MatroskaTag::Chapters(_) => {} MatroskaTag::Tracks(_) => { + tracks_found = true; let mut children = children.unwrap(); while let Some(Ok(Unflat { children, item, .. })) = children.n() { match item { @@ -264,9 +271,16 @@ fn import_read_segment(segment: &mut Unflatten) -> Result { } } MatroskaTag::Cluster(_) => {} + _ => warn!("(rs) tag ignored: {item:?}"), }; + if info_found && tracks_found && attachments_found && tags_found { + debug!("we found all we need, stopping read early"); + break; + } } + segment.exit_dirty(); + if let Some(duration) = duration { m.duration = (duration * timestamp_scale.unwrap_or(1_000_000) as f64) / 1_000_000_000_f64; } diff --git a/remuxer/src/import/seek_index.rs b/remuxer/src/import/seek_index.rs index 3ab4a2c..efd2a78 100644 --- a/remuxer/src/import/seek_index.rs +++ b/remuxer/src/import/seek_index.rs @@ -10,6 +10,10 @@ use log::{debug, info, trace, warn}; use std::{collections::BTreeMap, fs::File, path::Path}; pub fn write_all(path: &Path) -> Result<()> { + if path.with_extension(&format!("si.1")).exists() { + info!("seek index already present"); + return Ok(()); + } let seek_index = { let input = File::open(&path).unwrap(); let mut input = EbmlReader::new(input); @@ -72,7 +76,7 @@ fn import_seek_index_segment( match item { MatroskaTag::Timestamp(ts) => pts = ts, MatroskaTag::BlockGroup(_) => { - debug!("group"); + trace!("group"); let mut children = children.unwrap(); // let position = children.position(); //? TODO where should this point to? cluster or block? // probably block while let Some(Ok(Unflat { @@ -104,11 +108,11 @@ fn import_seek_index_segment( } MatroskaTag::SimpleBlock(buf) => { let block = Block::parse(&buf)?; - debug!( + trace!( "simple block: track={} tso={}", block.track, block.timestamp_off ); - debug!("{pts} {}", block.timestamp_off); + trace!("{pts} {}", block.timestamp_off); seek_index .entry(block.track) .or_insert(SeekIndex { blocks: vec![] }) @@ -119,7 +123,7 @@ fn import_seek_index_segment( size: block.data.len(), }); } - _ => debug!("(rsc) tag ignored: {item:?}"), + _ => trace!("(rsc) tag ignored: {item:?}"), } } else { break; -- cgit v1.2.3-70-g09d2