aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock28
-rw-r--r--import/Cargo.toml5
-rw-r--r--import/src/lib.rs14
-rw-r--r--import/src/matroska.rs120
-rw-r--r--remuxer/Cargo.toml5
-rw-r--r--remuxer/src/fragment.rs152
-rw-r--r--remuxer/src/lib.rs1
-rw-r--r--remuxer/src/metadata.rs116
8 files changed, 240 insertions, 201 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 863b2fe..a09a1dc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -241,9 +241,9 @@ dependencies = [
[[package]]
name = "async-trait"
-version = "0.1.85"
+version = "0.1.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056"
+checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d"
dependencies = [
"proc-macro2",
"quote",
@@ -448,9 +448,9 @@ checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b"
[[package]]
name = "cc"
-version = "1.2.10"
+version = "1.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229"
+checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf"
dependencies = [
"jobserver",
"libc",
@@ -804,7 +804,6 @@ checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"
[[package]]
name = "ebml-struct"
version = "0.1.0"
-source = "git+https://codeberg.org/metamuffin/ebml-struct#baa1f77aea4accf7a6046bf6b60275e5d942d816"
dependencies = [
"bincode",
]
@@ -1752,10 +1751,10 @@ dependencies = [
"base64",
"bincode",
"crossbeam-channel",
- "ebml-struct",
"futures",
"jellybase",
"jellyclient",
+ "jellyremuxer",
"log",
"rayon",
"regex",
@@ -1783,6 +1782,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"bincode",
+ "ebml-struct",
"jellybase",
"jellymatroska",
"log",
@@ -3081,9 +3081,9 @@ dependencies = [
[[package]]
name = "rustls"
-version = "0.23.21"
+version = "0.23.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8"
+checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7"
dependencies = [
"once_cell",
"ring",
@@ -3784,9 +3784,9 @@ dependencies = [
[[package]]
name = "toml_edit"
-version = "0.22.22"
+version = "0.22.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5"
+checksum = "02a8b472d1a3d7c18e2d61a489aee3453fd9031c33e4f55bd533f4a7adca1bee"
dependencies = [
"indexmap",
"serde",
@@ -4192,9 +4192,9 @@ dependencies = [
[[package]]
name = "webpki-roots"
-version = "0.26.7"
+version = "0.26.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d642ff16b7e79272ae451b7322067cdc17cadf68c23264be9d94a32319efe7e"
+checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9"
dependencies = [
"rustls-pki-types",
]
@@ -4416,9 +4416,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "winnow"
-version = "0.6.25"
+version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad699df48212c6cc6eb4435f35500ac6fd3b9913324f938aea302022ce19d310"
+checksum = "7e49d2d35d3fad69b39b94139037ecfb4f359f08958b9c11e7315ce770462419"
dependencies = [
"memchr",
]
diff --git a/import/Cargo.toml b/import/Cargo.toml
index 645326d..37b5a77 100644
--- a/import/Cargo.toml
+++ b/import/Cargo.toml
@@ -6,10 +6,7 @@ edition = "2021"
[dependencies]
jellybase = { path = "../base" }
jellyclient = { path = "../client" }
-
-ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct", features = [
- "bincode",
-] }
+jellyremuxer = { path = "../remuxer" }
rayon = "1.10.0"
crossbeam-channel = "0.5.14"
diff --git a/import/src/lib.rs b/import/src/lib.rs
index 4be2151..2de4cc9 100644
--- a/import/src/lib.rs
+++ b/import/src/lib.rs
@@ -14,7 +14,7 @@ use jellybase::{
database::Database,
CONF, SECRETS,
};
-use matroska::matroska_metadata;
+use jellyremuxer::metadata::matroska_metadata;
use rayon::iter::{ParallelDrainRange, ParallelIterator};
use std::{
collections::HashMap,
@@ -243,6 +243,13 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
let info = m.info.ok_or(anyhow!("no info"))?;
let tracks = m.tracks.ok_or(anyhow!("no tracks"))?;
+ let infojson = m
+ .infojson
+ .as_ref()
+ .map(|ij| serde_json::from_slice::<infojson::YVideo>(ij))
+ .transpose()
+ .context("infojson")?;
+
let mut tags = m
.tags
.map(|tags| {
@@ -260,8 +267,7 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
.to_string_lossy()
.to_string();
- let slug = m
- .infojson
+ let slug = infojson
.as_ref()
.map(|ij| format!("youtube-{}", ij.id))
.unwrap_or(make_kebab(&filepath_stem));
@@ -311,7 +317,7 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
})
.collect::<Vec<_>>();
- if let Some(infojson) = m.infojson {
+ if let Some(infojson) = infojson {
node.kind = Some(
if !tracks
.iter()
diff --git a/import/src/matroska.rs b/import/src/matroska.rs
index f9a59ab..e69de29 100644
--- a/import/src/matroska.rs
+++ b/import/src/matroska.rs
@@ -1,120 +0,0 @@
-/*
- This file is part of jellything (https://codeberg.org/metamuffin/jellything)
- which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
- Copyright (C) 2025 metamuffin <metamuffin.org>
-*/
-use crate::infojson::{self, YVideo};
-use anyhow::{Context, Result};
-use bincode::{Decode, Encode};
-use ebml_struct::{
- ids::*,
- matroska::*,
- read::{EbmlReadExt, TagRead},
-};
-use jellybase::{
- assetfed::AssetInner,
- cache::{cache_file, cache_memory},
- common::Asset,
-};
-use log::info;
-use std::{
- fs::File,
- io::{BufReader, ErrorKind, Read, Write},
- path::Path,
- sync::Arc,
-};
-
-#[derive(Encode, Decode, Clone)]
-pub(crate) struct MatroskaMetadata {
- pub info: Option<Info>,
- pub tracks: Option<Tracks>,
- pub cover: Option<Asset>,
- pub chapters: Option<Chapters>,
- pub tags: Option<Tags>,
- pub infojson: Option<YVideo>,
-}
-pub(crate) fn matroska_metadata(path: &Path) -> Result<Arc<Option<MatroskaMetadata>>> {
- cache_memory(&["mkmeta-v1", path.to_string_lossy().as_ref()], || {
- let mut magic = [0; 4];
- File::open(path)?.read_exact(&mut magic).ok();
- if !matches!(magic, [0x1A, 0x45, 0xDF, 0xA3]) {
- return Ok(None);
- }
-
- info!("reading media file {path:?}");
- let mut file = BufReader::new(File::open(path)?);
- let mut file = file.by_ref().take(u64::MAX);
-
- let (x, mut ebml) = file.read_tag()?;
- assert_eq!(x, EL_EBML);
- let ebml = Ebml::read(&mut ebml).unwrap();
- assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm");
- let (x, mut segment) = file.read_tag()?;
- assert_eq!(x, EL_SEGMENT);
-
- let mut info = None;
- let mut infojson = None;
- let mut tracks = None;
- let mut cover = None;
- let mut chapters = None;
- let mut tags = None;
- loop {
- let (x, mut seg) = match segment.read_tag() {
- Ok(o) => o,
- Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
- Err(e) => return Err(e.into()),
- };
- match x {
- EL_INFO => info = Some(Info::read(&mut seg).context("info")?),
- EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?),
- EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?),
- EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?),
- EL_ATTACHMENTS => {
- let attachments = Attachments::read(&mut seg).context("attachments")?;
- for f in attachments.files {
- match f.name.as_str() {
- "info.json" => {
- infojson = Some(
- serde_json::from_slice::<infojson::YVideo>(&f.data)
- .context("infojson")?,
- );
- }
- "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg"
- | "cover.avif" => {
- cover = Some(
- AssetInner::Cache(cache_file(
- &["att-cover", path.to_string_lossy().as_ref()],
- move |mut file| {
- file.write_all(&f.data)?;
- Ok(())
- },
- )?)
- .ser(),
- )
- }
- a => println!("{a:?}"),
- }
- }
- }
- EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => {
- seg.consume()?;
- }
- EL_CLUSTER => {
- break;
- }
- id => {
- eprintln!("unknown top-level element {id:x}");
- seg.consume()?;
- }
- }
- }
- Ok(Some(MatroskaMetadata {
- chapters,
- cover,
- info,
- infojson,
- tags,
- tracks,
- }))
- })
-}
diff --git a/remuxer/Cargo.toml b/remuxer/Cargo.toml
index 2313dcc..acbdb44 100644
--- a/remuxer/Cargo.toml
+++ b/remuxer/Cargo.toml
@@ -13,3 +13,8 @@ log = { workspace = true }
serde = { version = "1.0.217", features = ["derive"] }
bincode = { version = "2.0.0-rc.3", features = ["serde"] }
+
+# ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct", features = [
+# "bincode",
+# ] }
+ebml-struct = { path = "../../ebml-struct", features = ["bincode"] }
diff --git a/remuxer/src/fragment.rs b/remuxer/src/fragment.rs
index 9fa68f3..c7954b0 100644
--- a/remuxer/src/fragment.rs
+++ b/remuxer/src/fragment.rs
@@ -5,12 +5,16 @@
*/
use crate::{
- ebml_header, ebml_segment_info, ebml_track_entry, seek_index::get_seek_index,
- segment_extractor::SegmentExtractIter,
+ metadata::matroska_metadata, seek_index::get_seek_index, segment_extractor::SegmentExtractIter,
};
use anyhow::{anyhow, Context, Result};
+use ebml_struct::{
+ matroska::{BlockGroup, Cluster, Ebml, Info, Segment, Tracks},
+ write::TagWrite,
+ Block,
+};
use jellybase::common::{LocalTrack, Node, SourceTrackKind};
-use jellymatroska::{read::EbmlReader, write::EbmlWriter, Master, MatroskaTag};
+use jellymatroska::{read::EbmlReader, Master, MatroskaTag};
use log::{debug, info};
use std::{
fs::File,
@@ -88,7 +92,7 @@ pub fn write_fragment_into(
n: usize,
) -> anyhow::Result<()> {
info!("writing fragment {n} of {:?} (track {track})", item.title);
- let mut output = EbmlWriter::new(BufWriter::new(writer), 0);
+
let media_info = item.media.as_ref().unwrap();
let info = media_info
.tracks
@@ -146,71 +150,101 @@ pub fn write_fragment_into(
.map(|b| b.pts)
.unwrap_or((media_info.duration * 1000.) as u64);
- output.write_tag(&ebml_header(webm))?;
- output.write_tag(&MatroskaTag::Segment(Master::Start))?;
- output.write_tag(&ebml_segment_info(
- format!("{}: {info}", item.title.clone().unwrap_or_default()),
- (last_block_pts - start_block.pts) as f64 / 1000.,
- ))?;
- output.write_tag(&MatroskaTag::Tracks(Master::Collected(vec![
- ebml_track_entry(
- mapped,
- local_track.track as u64 * 100, // TODO something else that is unique to the track
- &info,
- local_track.codec_private.clone(),
- ),
- ])))?;
+ let input_metadata = (*matroska_metadata(&local_track.path)?).clone().unwrap();
reader.seek(start_block.source_off, MatroskaTag::Cluster(Master::Start))?;
let mut reader = SegmentExtractIter::new(&mut reader, local_track.track as u64);
- {
- // TODO this one caused fragments to get dropped by MSE for no reason
- // for i in start_block_index..end_block_index {
- // let index_block = &index.blocks[i];
- // let (mut block, duration) = reader.next()?;
+ let mut cluster = Cluster::default();
+ cluster.timestamp = start_block.pts;
+ for i in start_block_index..end_block_index {
+ let index_block = &index.blocks[i];
+ let (block, duration) = reader.next_block()?;
- // assert_eq!(index_block.size, block.data.len(), "seek index is wrong");
+ let mut block = Block {
+ data: block.data,
+ discardable: block.discardable,
+ invisible: block.invisible,
+ keyframe: block.keyframe,
+ lacing: block.lacing.map(|l| match l {
+ jellymatroska::block::LacingType::Xiph => ebml_struct::LacingType::Xiph,
+ jellymatroska::block::LacingType::FixedSize => ebml_struct::LacingType::FixedSize,
+ jellymatroska::block::LacingType::Ebml => ebml_struct::LacingType::Ebml,
+ }),
+ timestamp_off: block.timestamp_off,
+ track: block.track,
+ };
- // block.track = 1;
- // block.timestamp_off = 0;
- // output.write_tag(&MatroskaTag::Cluster(Master::Collected(vec![
- // MatroskaTag::Timestamp(index_block.pts - start_block.pts),
- // if let Some(duration) = duration {
- // MatroskaTag::BlockGroup(Master::Collected(vec![
- // MatroskaTag::BlockDuration(duration),
- // MatroskaTag::Block(block),
- // ]))
- // } else {
- // MatroskaTag::SimpleBlock(block)
- // },
- // ])))?;
- // }
+ assert_eq!(index_block.size, block.data.len(), "seek index is wrong");
+
+ block.track = 1;
+ // TODO this does generate overflows sometimes
+ block.timestamp_off = (index_block.pts as i64 - start_block.pts as i64)
+ .try_into()
+ .unwrap();
+ if let Some(duration) = duration {
+ cluster.block_groups.push(BlockGroup {
+ block_duration: Some(duration),
+ block,
+ ..Default::default()
+ })
+ } else {
+ cluster.simple_blocks.push(block)
+ }
}
- {
- let mut blocks = vec![MatroskaTag::Timestamp(start_block.pts)];
- for i in start_block_index..end_block_index {
- let index_block = &index.blocks[i];
- let (mut block, duration) = reader.next_block()?;
- assert_eq!(index_block.size, block.data.len(), "seek index is wrong");
+ let mut input_track = input_metadata
+ .tracks
+ .unwrap()
+ .entries
+ .into_iter()
+ .find(|t| t.track_number == local_track.track as u64)
+ .unwrap();
- block.track = 1;
- // TODO this does generate overflows sometimes
- block.timestamp_off = (index_block.pts as i64 - start_block.pts as i64)
- .try_into()
- .unwrap();
- if let Some(duration) = duration {
- blocks.push(MatroskaTag::BlockGroup(Master::Collected(vec![
- MatroskaTag::BlockDuration(duration),
- MatroskaTag::Block(block),
- ])))
- } else {
- blocks.push(MatroskaTag::SimpleBlock(block))
- }
+ input_track.track_number = 1;
+ if webm {
+ if let Some(v) = &mut input_track.video {
+ v.colour = None;
}
- output.write_tag(&MatroskaTag::Cluster(Master::Collected(blocks)))?;
}
- debug!("wrote {} bytes", output.position());
+
+ let mut output = BufWriter::new(writer);
+
+ Ebml {
+ ebml_version: 1,
+ ebml_read_version: 1,
+ ebml_max_id_length: 4,
+ ebml_max_size_length: 8,
+ doc_type: if webm {
+ "webm".to_string()
+ } else {
+ "matroska".to_string()
+ },
+ doc_type_version: 4,
+ doc_type_read_version: 2,
+ doc_type_extensions: vec![],
+ }
+ .write(&mut output)?;
+
+ Segment {
+ info: Info {
+ timestamp_scale: 1_000_000,
+ duration: Some((last_block_pts - start_block.pts) as f64),
+ title: Some(format!(
+ "{}: {info}",
+ item.title.clone().unwrap_or_default()
+ )),
+ muxing_app: "ebml-struct".to_owned(),
+ writing_app: "jellything".to_owned(),
+ ..Default::default()
+ },
+ tracks: Some(Tracks {
+ entries: vec![input_track],
+ }),
+ clusters: vec![cluster],
+ ..Default::default()
+ }
+ .write(&mut output)?;
+
Ok(())
}
diff --git a/remuxer/src/lib.rs b/remuxer/src/lib.rs
index b46369e..f3526dc 100644
--- a/remuxer/src/lib.rs
+++ b/remuxer/src/lib.rs
@@ -9,6 +9,7 @@ pub mod remux;
pub mod seek_index;
pub mod segment_extractor;
pub mod trim_writer;
+pub mod metadata;
pub use fragment::write_fragment_into;
pub use remux::remux_stream_into;
diff --git a/remuxer/src/metadata.rs b/remuxer/src/metadata.rs
new file mode 100644
index 0000000..92b5445
--- /dev/null
+++ b/remuxer/src/metadata.rs
@@ -0,0 +1,116 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+use anyhow::{Context, Result};
+use bincode::{Decode, Encode};
+use ebml_struct::{
+ ids::*,
+ matroska::*,
+ read::{EbmlReadExt, TagRead},
+};
+use jellybase::{
+ assetfed::AssetInner,
+ cache::{cache_file, cache_memory},
+ common::Asset,
+};
+use log::info;
+use std::{
+ fs::File,
+ io::{BufReader, ErrorKind, Read, Write},
+ path::Path,
+ sync::Arc,
+};
+
+#[derive(Encode, Decode, Clone)]
+pub struct MatroskaMetadata {
+ pub info: Option<Info>,
+ pub tracks: Option<Tracks>,
+ pub cover: Option<Asset>,
+ pub chapters: Option<Chapters>,
+ pub tags: Option<Tags>,
+ pub infojson: Option<Vec<u8>>,
+}
+pub fn matroska_metadata(path: &Path) -> Result<Arc<Option<MatroskaMetadata>>> {
+ cache_memory(&["mkmeta-v1", path.to_string_lossy().as_ref()], || {
+ let mut magic = [0; 4];
+ File::open(path)?.read_exact(&mut magic).ok();
+ if !matches!(magic, [0x1A, 0x45, 0xDF, 0xA3]) {
+ return Ok(None);
+ }
+
+ info!("reading media file {path:?}");
+ let mut file = BufReader::new(File::open(path)?);
+ let mut file = file.by_ref().take(u64::MAX);
+
+ let (x, mut ebml) = file.read_tag()?;
+ assert_eq!(x, EL_EBML);
+ let ebml = Ebml::read(&mut ebml).unwrap();
+ assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm");
+ let (x, mut segment) = file.read_tag()?;
+ assert_eq!(x, EL_SEGMENT);
+
+ let mut info = None;
+ let mut infojson = None;
+ let mut tracks = None;
+ let mut cover = None;
+ let mut chapters = None;
+ let mut tags = None;
+ loop {
+ let (x, mut seg) = match segment.read_tag() {
+ Ok(o) => o,
+ Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
+ Err(e) => return Err(e.into()),
+ };
+ match x {
+ EL_INFO => info = Some(Info::read(&mut seg).context("info")?),
+ EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?),
+ EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?),
+ EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?),
+ EL_ATTACHMENTS => {
+ let attachments = Attachments::read(&mut seg).context("attachments")?;
+ for f in attachments.files {
+ match f.name.as_str() {
+ "info.json" => {
+ infojson = Some(f.data);
+ }
+ "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg"
+ | "cover.avif" => {
+ cover = Some(
+ AssetInner::Cache(cache_file(
+ &["att-cover", path.to_string_lossy().as_ref()],
+ move |mut file| {
+ file.write_all(&f.data)?;
+ Ok(())
+ },
+ )?)
+ .ser(),
+ )
+ }
+ a => println!("{a:?}"),
+ }
+ }
+ }
+ EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => {
+ seg.consume()?;
+ }
+ EL_CLUSTER => {
+ break;
+ }
+ id => {
+ eprintln!("unknown top-level element {id:x}");
+ seg.consume()?;
+ }
+ }
+ }
+ Ok(Some(MatroskaMetadata {
+ chapters,
+ cover,
+ info,
+ infojson,
+ tags,
+ tracks,
+ }))
+ })
+}