diff options
author | metamuffin <metamuffin@disroot.org> | 2025-02-01 15:11:31 +0100 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2025-02-01 15:11:31 +0100 |
commit | 2f053d597e91585deb9efd1a098537b244c25f56 (patch) | |
tree | cc6b6677f3a375e291be2ae5ef36563543476315 | |
parent | 95d90fbba36a752f7cf96ea0bebd925534341e15 (diff) | |
download | jellything-2f053d597e91585deb9efd1a098537b244c25f56.tar jellything-2f053d597e91585deb9efd1a098537b244c25f56.tar.bz2 jellything-2f053d597e91585deb9efd1a098537b244c25f56.tar.zst |
Revert "ebml-struct based fragment writer (does not work)"
This reverts commit 95d90fbba36a752f7cf96ea0bebd925534341e15.
-rw-r--r-- | Cargo.lock | 28 | ||||
-rw-r--r-- | import/Cargo.toml | 5 | ||||
-rw-r--r-- | import/src/lib.rs | 14 | ||||
-rw-r--r-- | import/src/matroska.rs | 120 | ||||
-rw-r--r-- | remuxer/Cargo.toml | 5 | ||||
-rw-r--r-- | remuxer/src/fragment.rs | 152 | ||||
-rw-r--r-- | remuxer/src/lib.rs | 1 | ||||
-rw-r--r-- | remuxer/src/metadata.rs | 116 |
8 files changed, 201 insertions, 240 deletions
@@ -241,9 +241,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.86" +version = "0.1.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" +checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" dependencies = [ "proc-macro2", "quote", @@ -448,9 +448,9 @@ checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" [[package]] name = "cc" -version = "1.2.11" +version = "1.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf" +checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229" dependencies = [ "jobserver", "libc", @@ -804,6 +804,7 @@ checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" [[package]] name = "ebml-struct" version = "0.1.0" +source = "git+https://codeberg.org/metamuffin/ebml-struct#baa1f77aea4accf7a6046bf6b60275e5d942d816" dependencies = [ "bincode", ] @@ -1751,10 +1752,10 @@ dependencies = [ "base64", "bincode", "crossbeam-channel", + "ebml-struct", "futures", "jellybase", "jellyclient", - "jellyremuxer", "log", "rayon", "regex", @@ -1782,7 +1783,6 @@ version = "0.1.0" dependencies = [ "anyhow", "bincode", - "ebml-struct", "jellybase", "jellymatroska", "log", @@ -3081,9 +3081,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.22" +version = "0.23.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7" +checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8" dependencies = [ "once_cell", "ring", @@ -3784,9 +3784,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.23" +version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02a8b472d1a3d7c18e2d61a489aee3453fd9031c33e4f55bd533f4a7adca1bee" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ "indexmap", "serde", @@ -4192,9 +4192,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.8" +version = "0.26.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9" +checksum = "5d642ff16b7e79272ae451b7322067cdc17cadf68c23264be9d94a32319efe7e" dependencies = [ "rustls-pki-types", ] @@ -4416,9 +4416,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.7.0" +version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e49d2d35d3fad69b39b94139037ecfb4f359f08958b9c11e7315ce770462419" +checksum = "ad699df48212c6cc6eb4435f35500ac6fd3b9913324f938aea302022ce19d310" dependencies = [ "memchr", ] diff --git a/import/Cargo.toml b/import/Cargo.toml index 37b5a77..645326d 100644 --- a/import/Cargo.toml +++ b/import/Cargo.toml @@ -6,7 +6,10 @@ edition = "2021" [dependencies] jellybase = { path = "../base" } jellyclient = { path = "../client" } -jellyremuxer = { path = "../remuxer" } + +ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct", features = [ + "bincode", +] } rayon = "1.10.0" crossbeam-channel = "0.5.14" diff --git a/import/src/lib.rs b/import/src/lib.rs index 2de4cc9..4be2151 100644 --- a/import/src/lib.rs +++ b/import/src/lib.rs @@ -14,7 +14,7 @@ use jellybase::{ database::Database, CONF, SECRETS, }; -use jellyremuxer::metadata::matroska_metadata; +use matroska::matroska_metadata; use rayon::iter::{ParallelDrainRange, ParallelIterator}; use std::{ collections::HashMap, @@ -243,13 +243,6 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> { let info = m.info.ok_or(anyhow!("no info"))?; let tracks = m.tracks.ok_or(anyhow!("no tracks"))?; - let infojson = m - .infojson - .as_ref() - .map(|ij| serde_json::from_slice::<infojson::YVideo>(ij)) - .transpose() - .context("infojson")?; - let mut tags = m .tags .map(|tags| { @@ -267,7 +260,8 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> { .to_string_lossy() .to_string(); - let slug = infojson + let slug = m + .infojson .as_ref() .map(|ij| format!("youtube-{}", ij.id)) .unwrap_or(make_kebab(&filepath_stem)); @@ -317,7 +311,7 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> { }) .collect::<Vec<_>>(); - if let Some(infojson) = infojson { + if let Some(infojson) = m.infojson { node.kind = Some( if !tracks .iter() diff --git a/import/src/matroska.rs b/import/src/matroska.rs index e69de29..f9a59ab 100644 --- a/import/src/matroska.rs +++ b/import/src/matroska.rs @@ -0,0 +1,120 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ +use crate::infojson::{self, YVideo}; +use anyhow::{Context, Result}; +use bincode::{Decode, Encode}; +use ebml_struct::{ + ids::*, + matroska::*, + read::{EbmlReadExt, TagRead}, +}; +use jellybase::{ + assetfed::AssetInner, + cache::{cache_file, cache_memory}, + common::Asset, +}; +use log::info; +use std::{ + fs::File, + io::{BufReader, ErrorKind, Read, Write}, + path::Path, + sync::Arc, +}; + +#[derive(Encode, Decode, Clone)] +pub(crate) struct MatroskaMetadata { + pub info: Option<Info>, + pub tracks: Option<Tracks>, + pub cover: Option<Asset>, + pub chapters: Option<Chapters>, + pub tags: Option<Tags>, + pub infojson: Option<YVideo>, +} +pub(crate) fn matroska_metadata(path: &Path) -> Result<Arc<Option<MatroskaMetadata>>> { + cache_memory(&["mkmeta-v1", path.to_string_lossy().as_ref()], || { + let mut magic = [0; 4]; + File::open(path)?.read_exact(&mut magic).ok(); + if !matches!(magic, [0x1A, 0x45, 0xDF, 0xA3]) { + return Ok(None); + } + + info!("reading media file {path:?}"); + let mut file = BufReader::new(File::open(path)?); + let mut file = file.by_ref().take(u64::MAX); + + let (x, mut ebml) = file.read_tag()?; + assert_eq!(x, EL_EBML); + let ebml = Ebml::read(&mut ebml).unwrap(); + assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm"); + let (x, mut segment) = file.read_tag()?; + assert_eq!(x, EL_SEGMENT); + + let mut info = None; + let mut infojson = None; + let mut tracks = None; + let mut cover = None; + let mut chapters = None; + let mut tags = None; + loop { + let (x, mut seg) = match segment.read_tag() { + Ok(o) => o, + Err(e) if e.kind() == ErrorKind::UnexpectedEof => break, + Err(e) => return Err(e.into()), + }; + match x { + EL_INFO => info = Some(Info::read(&mut seg).context("info")?), + EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?), + EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?), + EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?), + EL_ATTACHMENTS => { + let attachments = Attachments::read(&mut seg).context("attachments")?; + for f in attachments.files { + match f.name.as_str() { + "info.json" => { + infojson = Some( + serde_json::from_slice::<infojson::YVideo>(&f.data) + .context("infojson")?, + ); + } + "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg" + | "cover.avif" => { + cover = Some( + AssetInner::Cache(cache_file( + &["att-cover", path.to_string_lossy().as_ref()], + move |mut file| { + file.write_all(&f.data)?; + Ok(()) + }, + )?) + .ser(), + ) + } + a => println!("{a:?}"), + } + } + } + EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => { + seg.consume()?; + } + EL_CLUSTER => { + break; + } + id => { + eprintln!("unknown top-level element {id:x}"); + seg.consume()?; + } + } + } + Ok(Some(MatroskaMetadata { + chapters, + cover, + info, + infojson, + tags, + tracks, + })) + }) +} diff --git a/remuxer/Cargo.toml b/remuxer/Cargo.toml index acbdb44..2313dcc 100644 --- a/remuxer/Cargo.toml +++ b/remuxer/Cargo.toml @@ -13,8 +13,3 @@ log = { workspace = true } serde = { version = "1.0.217", features = ["derive"] } bincode = { version = "2.0.0-rc.3", features = ["serde"] } - -# ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct", features = [ -# "bincode", -# ] } -ebml-struct = { path = "../../ebml-struct", features = ["bincode"] } diff --git a/remuxer/src/fragment.rs b/remuxer/src/fragment.rs index c7954b0..9fa68f3 100644 --- a/remuxer/src/fragment.rs +++ b/remuxer/src/fragment.rs @@ -5,16 +5,12 @@ */ use crate::{ - metadata::matroska_metadata, seek_index::get_seek_index, segment_extractor::SegmentExtractIter, + ebml_header, ebml_segment_info, ebml_track_entry, seek_index::get_seek_index, + segment_extractor::SegmentExtractIter, }; use anyhow::{anyhow, Context, Result}; -use ebml_struct::{ - matroska::{BlockGroup, Cluster, Ebml, Info, Segment, Tracks}, - write::TagWrite, - Block, -}; use jellybase::common::{LocalTrack, Node, SourceTrackKind}; -use jellymatroska::{read::EbmlReader, Master, MatroskaTag}; +use jellymatroska::{read::EbmlReader, write::EbmlWriter, Master, MatroskaTag}; use log::{debug, info}; use std::{ fs::File, @@ -92,7 +88,7 @@ pub fn write_fragment_into( n: usize, ) -> anyhow::Result<()> { info!("writing fragment {n} of {:?} (track {track})", item.title); - + let mut output = EbmlWriter::new(BufWriter::new(writer), 0); let media_info = item.media.as_ref().unwrap(); let info = media_info .tracks @@ -150,101 +146,71 @@ pub fn write_fragment_into( .map(|b| b.pts) .unwrap_or((media_info.duration * 1000.) as u64); - let input_metadata = (*matroska_metadata(&local_track.path)?).clone().unwrap(); + output.write_tag(&ebml_header(webm))?; + output.write_tag(&MatroskaTag::Segment(Master::Start))?; + output.write_tag(&ebml_segment_info( + format!("{}: {info}", item.title.clone().unwrap_or_default()), + (last_block_pts - start_block.pts) as f64 / 1000., + ))?; + output.write_tag(&MatroskaTag::Tracks(Master::Collected(vec![ + ebml_track_entry( + mapped, + local_track.track as u64 * 100, // TODO something else that is unique to the track + &info, + local_track.codec_private.clone(), + ), + ])))?; reader.seek(start_block.source_off, MatroskaTag::Cluster(Master::Start))?; let mut reader = SegmentExtractIter::new(&mut reader, local_track.track as u64); - let mut cluster = Cluster::default(); - cluster.timestamp = start_block.pts; - for i in start_block_index..end_block_index { - let index_block = &index.blocks[i]; - let (block, duration) = reader.next_block()?; + { + // TODO this one caused fragments to get dropped by MSE for no reason + // for i in start_block_index..end_block_index { + // let index_block = &index.blocks[i]; + // let (mut block, duration) = reader.next()?; - let mut block = Block { - data: block.data, - discardable: block.discardable, - invisible: block.invisible, - keyframe: block.keyframe, - lacing: block.lacing.map(|l| match l { - jellymatroska::block::LacingType::Xiph => ebml_struct::LacingType::Xiph, - jellymatroska::block::LacingType::FixedSize => ebml_struct::LacingType::FixedSize, - jellymatroska::block::LacingType::Ebml => ebml_struct::LacingType::Ebml, - }), - timestamp_off: block.timestamp_off, - track: block.track, - }; + // assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); - assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); - - block.track = 1; - // TODO this does generate overflows sometimes - block.timestamp_off = (index_block.pts as i64 - start_block.pts as i64) - .try_into() - .unwrap(); - if let Some(duration) = duration { - cluster.block_groups.push(BlockGroup { - block_duration: Some(duration), - block, - ..Default::default() - }) - } else { - cluster.simple_blocks.push(block) - } + // block.track = 1; + // block.timestamp_off = 0; + // output.write_tag(&MatroskaTag::Cluster(Master::Collected(vec![ + // MatroskaTag::Timestamp(index_block.pts - start_block.pts), + // if let Some(duration) = duration { + // MatroskaTag::BlockGroup(Master::Collected(vec![ + // MatroskaTag::BlockDuration(duration), + // MatroskaTag::Block(block), + // ])) + // } else { + // MatroskaTag::SimpleBlock(block) + // }, + // ])))?; + // } } + { + let mut blocks = vec![MatroskaTag::Timestamp(start_block.pts)]; + for i in start_block_index..end_block_index { + let index_block = &index.blocks[i]; + let (mut block, duration) = reader.next_block()?; - let mut input_track = input_metadata - .tracks - .unwrap() - .entries - .into_iter() - .find(|t| t.track_number == local_track.track as u64) - .unwrap(); + assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); - input_track.track_number = 1; - if webm { - if let Some(v) = &mut input_track.video { - v.colour = None; + block.track = 1; + // TODO this does generate overflows sometimes + block.timestamp_off = (index_block.pts as i64 - start_block.pts as i64) + .try_into() + .unwrap(); + if let Some(duration) = duration { + blocks.push(MatroskaTag::BlockGroup(Master::Collected(vec![ + MatroskaTag::BlockDuration(duration), + MatroskaTag::Block(block), + ]))) + } else { + blocks.push(MatroskaTag::SimpleBlock(block)) + } } + output.write_tag(&MatroskaTag::Cluster(Master::Collected(blocks)))?; } - - let mut output = BufWriter::new(writer); - - Ebml { - ebml_version: 1, - ebml_read_version: 1, - ebml_max_id_length: 4, - ebml_max_size_length: 8, - doc_type: if webm { - "webm".to_string() - } else { - "matroska".to_string() - }, - doc_type_version: 4, - doc_type_read_version: 2, - doc_type_extensions: vec![], - } - .write(&mut output)?; - - Segment { - info: Info { - timestamp_scale: 1_000_000, - duration: Some((last_block_pts - start_block.pts) as f64), - title: Some(format!( - "{}: {info}", - item.title.clone().unwrap_or_default() - )), - muxing_app: "ebml-struct".to_owned(), - writing_app: "jellything".to_owned(), - ..Default::default() - }, - tracks: Some(Tracks { - entries: vec![input_track], - }), - clusters: vec![cluster], - ..Default::default() - } - .write(&mut output)?; - + debug!("wrote {} bytes", output.position()); Ok(()) } diff --git a/remuxer/src/lib.rs b/remuxer/src/lib.rs index f3526dc..b46369e 100644 --- a/remuxer/src/lib.rs +++ b/remuxer/src/lib.rs @@ -9,7 +9,6 @@ pub mod remux; pub mod seek_index; pub mod segment_extractor; pub mod trim_writer; -pub mod metadata; pub use fragment::write_fragment_into; pub use remux::remux_stream_into; diff --git a/remuxer/src/metadata.rs b/remuxer/src/metadata.rs deleted file mode 100644 index 92b5445..0000000 --- a/remuxer/src/metadata.rs +++ /dev/null @@ -1,116 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use anyhow::{Context, Result}; -use bincode::{Decode, Encode}; -use ebml_struct::{ - ids::*, - matroska::*, - read::{EbmlReadExt, TagRead}, -}; -use jellybase::{ - assetfed::AssetInner, - cache::{cache_file, cache_memory}, - common::Asset, -}; -use log::info; -use std::{ - fs::File, - io::{BufReader, ErrorKind, Read, Write}, - path::Path, - sync::Arc, -}; - -#[derive(Encode, Decode, Clone)] -pub struct MatroskaMetadata { - pub info: Option<Info>, - pub tracks: Option<Tracks>, - pub cover: Option<Asset>, - pub chapters: Option<Chapters>, - pub tags: Option<Tags>, - pub infojson: Option<Vec<u8>>, -} -pub fn matroska_metadata(path: &Path) -> Result<Arc<Option<MatroskaMetadata>>> { - cache_memory(&["mkmeta-v1", path.to_string_lossy().as_ref()], || { - let mut magic = [0; 4]; - File::open(path)?.read_exact(&mut magic).ok(); - if !matches!(magic, [0x1A, 0x45, 0xDF, 0xA3]) { - return Ok(None); - } - - info!("reading media file {path:?}"); - let mut file = BufReader::new(File::open(path)?); - let mut file = file.by_ref().take(u64::MAX); - - let (x, mut ebml) = file.read_tag()?; - assert_eq!(x, EL_EBML); - let ebml = Ebml::read(&mut ebml).unwrap(); - assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm"); - let (x, mut segment) = file.read_tag()?; - assert_eq!(x, EL_SEGMENT); - - let mut info = None; - let mut infojson = None; - let mut tracks = None; - let mut cover = None; - let mut chapters = None; - let mut tags = None; - loop { - let (x, mut seg) = match segment.read_tag() { - Ok(o) => o, - Err(e) if e.kind() == ErrorKind::UnexpectedEof => break, - Err(e) => return Err(e.into()), - }; - match x { - EL_INFO => info = Some(Info::read(&mut seg).context("info")?), - EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?), - EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?), - EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?), - EL_ATTACHMENTS => { - let attachments = Attachments::read(&mut seg).context("attachments")?; - for f in attachments.files { - match f.name.as_str() { - "info.json" => { - infojson = Some(f.data); - } - "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg" - | "cover.avif" => { - cover = Some( - AssetInner::Cache(cache_file( - &["att-cover", path.to_string_lossy().as_ref()], - move |mut file| { - file.write_all(&f.data)?; - Ok(()) - }, - )?) - .ser(), - ) - } - a => println!("{a:?}"), - } - } - } - EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => { - seg.consume()?; - } - EL_CLUSTER => { - break; - } - id => { - eprintln!("unknown top-level element {id:x}"); - seg.consume()?; - } - } - } - Ok(Some(MatroskaMetadata { - chapters, - cover, - info, - infojson, - tags, - tracks, - })) - }) -} |