diff options
author | metamuffin <metamuffin@disroot.org> | 2025-02-01 15:11:15 +0100 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2025-02-01 15:11:15 +0100 |
commit | 95d90fbba36a752f7cf96ea0bebd925534341e15 (patch) | |
tree | 03b51cc0bda1a6332d7b694a37a806b0ad982050 | |
parent | 05ae425c1b906f81d6981e10688cb6949bf9daaf (diff) | |
download | jellything-95d90fbba36a752f7cf96ea0bebd925534341e15.tar jellything-95d90fbba36a752f7cf96ea0bebd925534341e15.tar.bz2 jellything-95d90fbba36a752f7cf96ea0bebd925534341e15.tar.zst |
ebml-struct based fragment writer (does not work)
-rw-r--r-- | Cargo.lock | 28 | ||||
-rw-r--r-- | import/Cargo.toml | 5 | ||||
-rw-r--r-- | import/src/lib.rs | 14 | ||||
-rw-r--r-- | import/src/matroska.rs | 120 | ||||
-rw-r--r-- | remuxer/Cargo.toml | 5 | ||||
-rw-r--r-- | remuxer/src/fragment.rs | 152 | ||||
-rw-r--r-- | remuxer/src/lib.rs | 1 | ||||
-rw-r--r-- | remuxer/src/metadata.rs | 116 |
8 files changed, 240 insertions, 201 deletions
@@ -241,9 +241,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.85" +version = "0.1.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" +checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" dependencies = [ "proc-macro2", "quote", @@ -448,9 +448,9 @@ checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" [[package]] name = "cc" -version = "1.2.10" +version = "1.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229" +checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf" dependencies = [ "jobserver", "libc", @@ -804,7 +804,6 @@ checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" [[package]] name = "ebml-struct" version = "0.1.0" -source = "git+https://codeberg.org/metamuffin/ebml-struct#baa1f77aea4accf7a6046bf6b60275e5d942d816" dependencies = [ "bincode", ] @@ -1752,10 +1751,10 @@ dependencies = [ "base64", "bincode", "crossbeam-channel", - "ebml-struct", "futures", "jellybase", "jellyclient", + "jellyremuxer", "log", "rayon", "regex", @@ -1783,6 +1782,7 @@ version = "0.1.0" dependencies = [ "anyhow", "bincode", + "ebml-struct", "jellybase", "jellymatroska", "log", @@ -3081,9 +3081,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.21" +version = "0.23.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8" +checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7" dependencies = [ "once_cell", "ring", @@ -3784,9 +3784,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.22" +version = "0.22.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +checksum = "02a8b472d1a3d7c18e2d61a489aee3453fd9031c33e4f55bd533f4a7adca1bee" dependencies = [ "indexmap", "serde", @@ -4192,9 +4192,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.7" +version = "0.26.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d642ff16b7e79272ae451b7322067cdc17cadf68c23264be9d94a32319efe7e" +checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9" dependencies = [ "rustls-pki-types", ] @@ -4416,9 +4416,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.25" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad699df48212c6cc6eb4435f35500ac6fd3b9913324f938aea302022ce19d310" +checksum = "7e49d2d35d3fad69b39b94139037ecfb4f359f08958b9c11e7315ce770462419" dependencies = [ "memchr", ] diff --git a/import/Cargo.toml b/import/Cargo.toml index 645326d..37b5a77 100644 --- a/import/Cargo.toml +++ b/import/Cargo.toml @@ -6,10 +6,7 @@ edition = "2021" [dependencies] jellybase = { path = "../base" } jellyclient = { path = "../client" } - -ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct", features = [ - "bincode", -] } +jellyremuxer = { path = "../remuxer" } rayon = "1.10.0" crossbeam-channel = "0.5.14" diff --git a/import/src/lib.rs b/import/src/lib.rs index 4be2151..2de4cc9 100644 --- a/import/src/lib.rs +++ b/import/src/lib.rs @@ -14,7 +14,7 @@ use jellybase::{ database::Database, CONF, SECRETS, }; -use matroska::matroska_metadata; +use jellyremuxer::metadata::matroska_metadata; use rayon::iter::{ParallelDrainRange, ParallelIterator}; use std::{ collections::HashMap, @@ -243,6 +243,13 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> { let info = m.info.ok_or(anyhow!("no info"))?; let tracks = m.tracks.ok_or(anyhow!("no tracks"))?; + let infojson = m + .infojson + .as_ref() + .map(|ij| serde_json::from_slice::<infojson::YVideo>(ij)) + .transpose() + .context("infojson")?; + let mut tags = m .tags .map(|tags| { @@ -260,8 +267,7 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> { .to_string_lossy() .to_string(); - let slug = m - .infojson + let slug = infojson .as_ref() .map(|ij| format!("youtube-{}", ij.id)) .unwrap_or(make_kebab(&filepath_stem)); @@ -311,7 +317,7 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> { }) .collect::<Vec<_>>(); - if let Some(infojson) = m.infojson { + if let Some(infojson) = infojson { node.kind = Some( if !tracks .iter() diff --git a/import/src/matroska.rs b/import/src/matroska.rs index f9a59ab..e69de29 100644 --- a/import/src/matroska.rs +++ b/import/src/matroska.rs @@ -1,120 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use crate::infojson::{self, YVideo}; -use anyhow::{Context, Result}; -use bincode::{Decode, Encode}; -use ebml_struct::{ - ids::*, - matroska::*, - read::{EbmlReadExt, TagRead}, -}; -use jellybase::{ - assetfed::AssetInner, - cache::{cache_file, cache_memory}, - common::Asset, -}; -use log::info; -use std::{ - fs::File, - io::{BufReader, ErrorKind, Read, Write}, - path::Path, - sync::Arc, -}; - -#[derive(Encode, Decode, Clone)] -pub(crate) struct MatroskaMetadata { - pub info: Option<Info>, - pub tracks: Option<Tracks>, - pub cover: Option<Asset>, - pub chapters: Option<Chapters>, - pub tags: Option<Tags>, - pub infojson: Option<YVideo>, -} -pub(crate) fn matroska_metadata(path: &Path) -> Result<Arc<Option<MatroskaMetadata>>> { - cache_memory(&["mkmeta-v1", path.to_string_lossy().as_ref()], || { - let mut magic = [0; 4]; - File::open(path)?.read_exact(&mut magic).ok(); - if !matches!(magic, [0x1A, 0x45, 0xDF, 0xA3]) { - return Ok(None); - } - - info!("reading media file {path:?}"); - let mut file = BufReader::new(File::open(path)?); - let mut file = file.by_ref().take(u64::MAX); - - let (x, mut ebml) = file.read_tag()?; - assert_eq!(x, EL_EBML); - let ebml = Ebml::read(&mut ebml).unwrap(); - assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm"); - let (x, mut segment) = file.read_tag()?; - assert_eq!(x, EL_SEGMENT); - - let mut info = None; - let mut infojson = None; - let mut tracks = None; - let mut cover = None; - let mut chapters = None; - let mut tags = None; - loop { - let (x, mut seg) = match segment.read_tag() { - Ok(o) => o, - Err(e) if e.kind() == ErrorKind::UnexpectedEof => break, - Err(e) => return Err(e.into()), - }; - match x { - EL_INFO => info = Some(Info::read(&mut seg).context("info")?), - EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?), - EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?), - EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?), - EL_ATTACHMENTS => { - let attachments = Attachments::read(&mut seg).context("attachments")?; - for f in attachments.files { - match f.name.as_str() { - "info.json" => { - infojson = Some( - serde_json::from_slice::<infojson::YVideo>(&f.data) - .context("infojson")?, - ); - } - "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg" - | "cover.avif" => { - cover = Some( - AssetInner::Cache(cache_file( - &["att-cover", path.to_string_lossy().as_ref()], - move |mut file| { - file.write_all(&f.data)?; - Ok(()) - }, - )?) - .ser(), - ) - } - a => println!("{a:?}"), - } - } - } - EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => { - seg.consume()?; - } - EL_CLUSTER => { - break; - } - id => { - eprintln!("unknown top-level element {id:x}"); - seg.consume()?; - } - } - } - Ok(Some(MatroskaMetadata { - chapters, - cover, - info, - infojson, - tags, - tracks, - })) - }) -} diff --git a/remuxer/Cargo.toml b/remuxer/Cargo.toml index 2313dcc..acbdb44 100644 --- a/remuxer/Cargo.toml +++ b/remuxer/Cargo.toml @@ -13,3 +13,8 @@ log = { workspace = true } serde = { version = "1.0.217", features = ["derive"] } bincode = { version = "2.0.0-rc.3", features = ["serde"] } + +# ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct", features = [ +# "bincode", +# ] } +ebml-struct = { path = "../../ebml-struct", features = ["bincode"] } diff --git a/remuxer/src/fragment.rs b/remuxer/src/fragment.rs index 9fa68f3..c7954b0 100644 --- a/remuxer/src/fragment.rs +++ b/remuxer/src/fragment.rs @@ -5,12 +5,16 @@ */ use crate::{ - ebml_header, ebml_segment_info, ebml_track_entry, seek_index::get_seek_index, - segment_extractor::SegmentExtractIter, + metadata::matroska_metadata, seek_index::get_seek_index, segment_extractor::SegmentExtractIter, }; use anyhow::{anyhow, Context, Result}; +use ebml_struct::{ + matroska::{BlockGroup, Cluster, Ebml, Info, Segment, Tracks}, + write::TagWrite, + Block, +}; use jellybase::common::{LocalTrack, Node, SourceTrackKind}; -use jellymatroska::{read::EbmlReader, write::EbmlWriter, Master, MatroskaTag}; +use jellymatroska::{read::EbmlReader, Master, MatroskaTag}; use log::{debug, info}; use std::{ fs::File, @@ -88,7 +92,7 @@ pub fn write_fragment_into( n: usize, ) -> anyhow::Result<()> { info!("writing fragment {n} of {:?} (track {track})", item.title); - let mut output = EbmlWriter::new(BufWriter::new(writer), 0); + let media_info = item.media.as_ref().unwrap(); let info = media_info .tracks @@ -146,71 +150,101 @@ pub fn write_fragment_into( .map(|b| b.pts) .unwrap_or((media_info.duration * 1000.) as u64); - output.write_tag(&ebml_header(webm))?; - output.write_tag(&MatroskaTag::Segment(Master::Start))?; - output.write_tag(&ebml_segment_info( - format!("{}: {info}", item.title.clone().unwrap_or_default()), - (last_block_pts - start_block.pts) as f64 / 1000., - ))?; - output.write_tag(&MatroskaTag::Tracks(Master::Collected(vec![ - ebml_track_entry( - mapped, - local_track.track as u64 * 100, // TODO something else that is unique to the track - &info, - local_track.codec_private.clone(), - ), - ])))?; + let input_metadata = (*matroska_metadata(&local_track.path)?).clone().unwrap(); reader.seek(start_block.source_off, MatroskaTag::Cluster(Master::Start))?; let mut reader = SegmentExtractIter::new(&mut reader, local_track.track as u64); - { - // TODO this one caused fragments to get dropped by MSE for no reason - // for i in start_block_index..end_block_index { - // let index_block = &index.blocks[i]; - // let (mut block, duration) = reader.next()?; + let mut cluster = Cluster::default(); + cluster.timestamp = start_block.pts; + for i in start_block_index..end_block_index { + let index_block = &index.blocks[i]; + let (block, duration) = reader.next_block()?; - // assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); + let mut block = Block { + data: block.data, + discardable: block.discardable, + invisible: block.invisible, + keyframe: block.keyframe, + lacing: block.lacing.map(|l| match l { + jellymatroska::block::LacingType::Xiph => ebml_struct::LacingType::Xiph, + jellymatroska::block::LacingType::FixedSize => ebml_struct::LacingType::FixedSize, + jellymatroska::block::LacingType::Ebml => ebml_struct::LacingType::Ebml, + }), + timestamp_off: block.timestamp_off, + track: block.track, + }; - // block.track = 1; - // block.timestamp_off = 0; - // output.write_tag(&MatroskaTag::Cluster(Master::Collected(vec![ - // MatroskaTag::Timestamp(index_block.pts - start_block.pts), - // if let Some(duration) = duration { - // MatroskaTag::BlockGroup(Master::Collected(vec![ - // MatroskaTag::BlockDuration(duration), - // MatroskaTag::Block(block), - // ])) - // } else { - // MatroskaTag::SimpleBlock(block) - // }, - // ])))?; - // } + assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); + + block.track = 1; + // TODO this does generate overflows sometimes + block.timestamp_off = (index_block.pts as i64 - start_block.pts as i64) + .try_into() + .unwrap(); + if let Some(duration) = duration { + cluster.block_groups.push(BlockGroup { + block_duration: Some(duration), + block, + ..Default::default() + }) + } else { + cluster.simple_blocks.push(block) + } } - { - let mut blocks = vec![MatroskaTag::Timestamp(start_block.pts)]; - for i in start_block_index..end_block_index { - let index_block = &index.blocks[i]; - let (mut block, duration) = reader.next_block()?; - assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); + let mut input_track = input_metadata + .tracks + .unwrap() + .entries + .into_iter() + .find(|t| t.track_number == local_track.track as u64) + .unwrap(); - block.track = 1; - // TODO this does generate overflows sometimes - block.timestamp_off = (index_block.pts as i64 - start_block.pts as i64) - .try_into() - .unwrap(); - if let Some(duration) = duration { - blocks.push(MatroskaTag::BlockGroup(Master::Collected(vec![ - MatroskaTag::BlockDuration(duration), - MatroskaTag::Block(block), - ]))) - } else { - blocks.push(MatroskaTag::SimpleBlock(block)) - } + input_track.track_number = 1; + if webm { + if let Some(v) = &mut input_track.video { + v.colour = None; } - output.write_tag(&MatroskaTag::Cluster(Master::Collected(blocks)))?; } - debug!("wrote {} bytes", output.position()); + + let mut output = BufWriter::new(writer); + + Ebml { + ebml_version: 1, + ebml_read_version: 1, + ebml_max_id_length: 4, + ebml_max_size_length: 8, + doc_type: if webm { + "webm".to_string() + } else { + "matroska".to_string() + }, + doc_type_version: 4, + doc_type_read_version: 2, + doc_type_extensions: vec![], + } + .write(&mut output)?; + + Segment { + info: Info { + timestamp_scale: 1_000_000, + duration: Some((last_block_pts - start_block.pts) as f64), + title: Some(format!( + "{}: {info}", + item.title.clone().unwrap_or_default() + )), + muxing_app: "ebml-struct".to_owned(), + writing_app: "jellything".to_owned(), + ..Default::default() + }, + tracks: Some(Tracks { + entries: vec![input_track], + }), + clusters: vec![cluster], + ..Default::default() + } + .write(&mut output)?; + Ok(()) } diff --git a/remuxer/src/lib.rs b/remuxer/src/lib.rs index b46369e..f3526dc 100644 --- a/remuxer/src/lib.rs +++ b/remuxer/src/lib.rs @@ -9,6 +9,7 @@ pub mod remux; pub mod seek_index; pub mod segment_extractor; pub mod trim_writer; +pub mod metadata; pub use fragment::write_fragment_into; pub use remux::remux_stream_into; diff --git a/remuxer/src/metadata.rs b/remuxer/src/metadata.rs new file mode 100644 index 0000000..92b5445 --- /dev/null +++ b/remuxer/src/metadata.rs @@ -0,0 +1,116 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ +use anyhow::{Context, Result}; +use bincode::{Decode, Encode}; +use ebml_struct::{ + ids::*, + matroska::*, + read::{EbmlReadExt, TagRead}, +}; +use jellybase::{ + assetfed::AssetInner, + cache::{cache_file, cache_memory}, + common::Asset, +}; +use log::info; +use std::{ + fs::File, + io::{BufReader, ErrorKind, Read, Write}, + path::Path, + sync::Arc, +}; + +#[derive(Encode, Decode, Clone)] +pub struct MatroskaMetadata { + pub info: Option<Info>, + pub tracks: Option<Tracks>, + pub cover: Option<Asset>, + pub chapters: Option<Chapters>, + pub tags: Option<Tags>, + pub infojson: Option<Vec<u8>>, +} +pub fn matroska_metadata(path: &Path) -> Result<Arc<Option<MatroskaMetadata>>> { + cache_memory(&["mkmeta-v1", path.to_string_lossy().as_ref()], || { + let mut magic = [0; 4]; + File::open(path)?.read_exact(&mut magic).ok(); + if !matches!(magic, [0x1A, 0x45, 0xDF, 0xA3]) { + return Ok(None); + } + + info!("reading media file {path:?}"); + let mut file = BufReader::new(File::open(path)?); + let mut file = file.by_ref().take(u64::MAX); + + let (x, mut ebml) = file.read_tag()?; + assert_eq!(x, EL_EBML); + let ebml = Ebml::read(&mut ebml).unwrap(); + assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm"); + let (x, mut segment) = file.read_tag()?; + assert_eq!(x, EL_SEGMENT); + + let mut info = None; + let mut infojson = None; + let mut tracks = None; + let mut cover = None; + let mut chapters = None; + let mut tags = None; + loop { + let (x, mut seg) = match segment.read_tag() { + Ok(o) => o, + Err(e) if e.kind() == ErrorKind::UnexpectedEof => break, + Err(e) => return Err(e.into()), + }; + match x { + EL_INFO => info = Some(Info::read(&mut seg).context("info")?), + EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?), + EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?), + EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?), + EL_ATTACHMENTS => { + let attachments = Attachments::read(&mut seg).context("attachments")?; + for f in attachments.files { + match f.name.as_str() { + "info.json" => { + infojson = Some(f.data); + } + "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg" + | "cover.avif" => { + cover = Some( + AssetInner::Cache(cache_file( + &["att-cover", path.to_string_lossy().as_ref()], + move |mut file| { + file.write_all(&f.data)?; + Ok(()) + }, + )?) + .ser(), + ) + } + a => println!("{a:?}"), + } + } + } + EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => { + seg.consume()?; + } + EL_CLUSTER => { + break; + } + id => { + eprintln!("unknown top-level element {id:x}"); + seg.consume()?; + } + } + } + Ok(Some(MatroskaMetadata { + chapters, + cover, + info, + infojson, + tags, + tracks, + })) + }) +} |