aboutsummaryrefslogtreecommitdiff
path: root/remuxer
diff options
context:
space:
mode:
Diffstat (limited to 'remuxer')
-rw-r--r--remuxer/Cargo.toml4
-rw-r--r--remuxer/src/extract.rs17
-rw-r--r--remuxer/src/fragment.rs109
-rw-r--r--remuxer/src/lib.rs66
-rw-r--r--remuxer/src/matroska_to_mpeg4.rs36
-rw-r--r--remuxer/src/matroska_to_webm.rs84
-rw-r--r--remuxer/src/metadata.rs123
-rw-r--r--remuxer/src/remux.rs572
-rw-r--r--remuxer/src/seek_index.rs33
9 files changed, 642 insertions, 402 deletions
diff --git a/remuxer/Cargo.toml b/remuxer/Cargo.toml
index 2313dcc..16713df 100644
--- a/remuxer/Cargo.toml
+++ b/remuxer/Cargo.toml
@@ -13,3 +13,7 @@ log = { workspace = true }
serde = { version = "1.0.217", features = ["derive"] }
bincode = { version = "2.0.0-rc.3", features = ["serde"] }
+
+ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct", features = [
+ "bincode",
+] }
diff --git a/remuxer/src/extract.rs b/remuxer/src/extract.rs
index 12e4003..15c1e9d 100644
--- a/remuxer/src/extract.rs
+++ b/remuxer/src/extract.rs
@@ -5,29 +5,22 @@
*/
use crate::seek_index::get_seek_index;
use anyhow::{anyhow, bail};
-use jellybase::common::LocalTrack;
use jellymatroska::{block::Block, read::EbmlReader, Master, MatroskaTag};
use log::debug;
use std::{fs::File, io::BufReader, path::PathBuf};
pub type TrackExtract = Vec<(u64, Option<u64>, Vec<u8>)>;
-pub fn extract_track(path_base: PathBuf, track_info: LocalTrack) -> anyhow::Result<TrackExtract> {
- let source_path = path_base.join(track_info.path);
- let file = File::open(&source_path)?;
+pub fn extract_track(path: PathBuf, track: u64) -> anyhow::Result<TrackExtract> {
+ let file = File::open(&path)?;
let mut reader = EbmlReader::new(BufReader::new(file));
- let index = get_seek_index(&source_path)?;
- let index = index
- .get(&(track_info.track as u64))
- .ok_or(anyhow!("track missing"))?;
+ let index = get_seek_index(&path)?;
+ let index = index.get(&track).ok_or(anyhow!("track missing"))?;
let mut out = Vec::new();
for b in &index.blocks {
reader.seek(b.source_off, MatroskaTag::BlockGroup(Master::Start))?;
let (duration, block) = read_group(&mut reader)?;
- assert_eq!(
- track_info.track, block.track as usize,
- "seek index is wrong"
- );
+ assert_eq!(track, block.track, "seek index is wrong");
out.push((b.pts, duration, block.data))
}
Ok(out)
diff --git a/remuxer/src/fragment.rs b/remuxer/src/fragment.rs
index 9fa68f3..0da1ed5 100644
--- a/remuxer/src/fragment.rs
+++ b/remuxer/src/fragment.rs
@@ -5,11 +5,12 @@
*/
use crate::{
- ebml_header, ebml_segment_info, ebml_track_entry, seek_index::get_seek_index,
+ ebml_header, ebml_segment_info, ebml_track_entry,
+ metadata::{matroska_metadata, MatroskaMetadata},
+ seek_index::get_seek_index,
segment_extractor::SegmentExtractIter,
};
use anyhow::{anyhow, Context, Result};
-use jellybase::common::{LocalTrack, Node, SourceTrackKind};
use jellymatroska::{read::EbmlReader, write::EbmlWriter, Master, MatroskaTag};
use log::{debug, info};
use std::{
@@ -19,34 +20,35 @@ use std::{
path::Path,
};
-const FRAGMENT_LENGTH: f64 = 2.;
+const FRAGMENT_LENGTH: f64 = 5.;
-pub fn fragment_index(
- path_base: &Path,
- item: &Node,
- local_track: &LocalTrack,
- track_index: usize,
-) -> Result<Vec<Range<f64>>> {
- let media_info = item.media.as_ref().unwrap();
- let source_path = path_base.join(&local_track.path);
- let index = get_seek_index(&source_path)?;
+pub fn fragment_index(path: &Path, track: u64) -> Result<Vec<Range<f64>>> {
+ let meta = matroska_metadata(path)?;
+ let duration = media_duration(&meta);
+ let force_kf = meta
+ .as_ref()
+ .tracks
+ .as_ref()
+ .unwrap()
+ .entries
+ .iter()
+ .find(|t| t.track_number == track)
+ .unwrap()
+ .track_type
+ == 17;
+
+ let index = get_seek_index(&path)?;
let index = index
- .get(&(local_track.track as u64))
+ .get(&track)
.ok_or(anyhow!("seek index track missing"))?;
- // everything is a keyframe (even though nothing is...)
- let force_kf = matches!(
- media_info.tracks[track_index].kind,
- SourceTrackKind::Subtitles { .. }
- );
-
let n_kf = if force_kf {
index.blocks.len()
} else {
index.keyframes.len()
};
- let average_kf_interval = media_info.duration / n_kf as f64;
+ let average_kf_interval = duration / n_kf as f64;
let kf_per_frag = (FRAGMENT_LENGTH / average_kf_interval).ceil() as usize;
debug!("average keyframe interval: {average_kf_interval}");
debug!(" => keyframes per frag {kf_per_frag}");
@@ -72,7 +74,7 @@ pub fn fragment_index(
index.keyframes.get((i + 1) * kf_per_frag).copied()
}
.map(|i| index.blocks[i].pts as f64 / 1000.)
- .unwrap_or(media_info.duration);
+ .unwrap_or(duration);
start..end
})
.collect())
@@ -80,45 +82,46 @@ pub fn fragment_index(
pub fn write_fragment_into(
writer: impl Write,
- path_base: &Path,
- item: &Node,
- local_track: &LocalTrack,
- track: usize,
+ path: &Path,
+ track: u64,
webm: bool,
+ title: &str,
n: usize,
) -> anyhow::Result<()> {
- info!("writing fragment {n} of {:?} (track {track})", item.title);
- let mut output = EbmlWriter::new(BufWriter::new(writer), 0);
- let media_info = item.media.as_ref().unwrap();
- let info = media_info
+ let meta = matroska_metadata(path)?;
+ let duration = media_duration(&meta);
+ let track_meta = meta
+ .as_ref()
.tracks
- .get(track)
- .ok_or(anyhow!("track not available"))?
- .to_owned();
- let source_path = path_base.join(&local_track.path);
+ .as_ref()
+ .unwrap()
+ .entries
+ .iter()
+ .find(|t| t.track_number == track)
+ .unwrap();
+ let force_kf = track_meta.track_type == 17;
+
+ info!("writing fragment {n} of {:?} (track {track})", title);
+ let mut output = EbmlWriter::new(BufWriter::new(writer), 0);
let mapped = 1;
- info!(
- "\t- {track} {source_path:?} ({} => {mapped})",
- local_track.track
- );
- info!("\t {}", info);
- let file = File::open(&source_path).context("opening source file")?;
- let index = get_seek_index(&source_path)?;
+ info!("\t- {track} {path:?} ({} => {mapped})", track);
+ // info!("\t {}", info);
+ let file = File::open(&path).context("opening source file")?;
+ let index = get_seek_index(&path)?;
let index = index
- .get(&(local_track.track as u64))
+ .get(&track)
.ok_or(anyhow!("track missing 2"))?
.to_owned();
debug!("\t seek index: {} blocks loaded", index.blocks.len());
let mut reader = EbmlReader::new(BufReader::new(file));
- let force_kf = matches!(info.kind, SourceTrackKind::Subtitles { .. });
let n_kf = if force_kf {
index.blocks.len()
} else {
index.keyframes.len()
};
-
- let average_kf_interval = media_info.duration / n_kf as f64;
+ debug!("{duration} {n_kf}");
+ let average_kf_interval = duration / n_kf as f64;
let kf_per_frag = (FRAGMENT_LENGTH / average_kf_interval).ceil() as usize;
debug!("average keyframe interval: {average_kf_interval}");
debug!(" => keyframes per frag {kf_per_frag}");
@@ -144,25 +147,20 @@ pub fn write_fragment_into(
.blocks
.get(end_block_index)
.map(|b| b.pts)
- .unwrap_or((media_info.duration * 1000.) as u64);
+ .unwrap_or((duration * 1000.) as u64);
output.write_tag(&ebml_header(webm))?;
output.write_tag(&MatroskaTag::Segment(Master::Start))?;
output.write_tag(&ebml_segment_info(
- format!("{}: {info}", item.title.clone().unwrap_or_default()),
+ title.to_string(),
(last_block_pts - start_block.pts) as f64 / 1000.,
))?;
output.write_tag(&MatroskaTag::Tracks(Master::Collected(vec![
- ebml_track_entry(
- mapped,
- local_track.track as u64 * 100, // TODO something else that is unique to the track
- &info,
- local_track.codec_private.clone(),
- ),
+ ebml_track_entry(mapped, track_meta),
])))?;
reader.seek(start_block.source_off, MatroskaTag::Cluster(Master::Start))?;
- let mut reader = SegmentExtractIter::new(&mut reader, local_track.track as u64);
+ let mut reader = SegmentExtractIter::new(&mut reader, track);
{
// TODO this one caused fragments to get dropped by MSE for no reason
@@ -214,3 +212,8 @@ pub fn write_fragment_into(
debug!("wrote {} bytes", output.position());
Ok(())
}
+
+fn media_duration(m: &MatroskaMetadata) -> f64 {
+ let info = m.info.as_ref().unwrap();
+ (info.duration.unwrap_or_default() * info.timestamp_scale as f64) / 1_000_000_000.
+}
diff --git a/remuxer/src/lib.rs b/remuxer/src/lib.rs
index a98ffad..931d5e6 100644
--- a/remuxer/src/lib.rs
+++ b/remuxer/src/lib.rs
@@ -3,18 +3,22 @@
which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
Copyright (C) 2025 metamuffin <metamuffin.org>
*/
+#![feature(random, exit_status_error)]
pub mod extract;
pub mod fragment;
+pub mod metadata;
+pub mod matroska_to_mpeg4;
pub mod remux;
pub mod seek_index;
pub mod segment_extractor;
pub mod trim_writer;
+pub mod matroska_to_webm;
+use ebml_struct::matroska::TrackEntry;
pub use fragment::write_fragment_into;
-pub use remux::remux_stream_into;
-
-use jellybase::common::{SourceTrack, SourceTrackKind};
use jellymatroska::{Master, MatroskaTag};
+pub use matroska_to_mpeg4::matroska_to_mpeg4;
+pub use remux::remux_stream_into;
pub fn ebml_header(webm: bool) -> MatroskaTag {
MatroskaTag::Ebml(Master::Collected(vec![
@@ -41,66 +45,56 @@ pub fn ebml_segment_info(title: String, duration: f64) -> MatroskaTag {
]))
}
-pub fn ebml_track_entry(
- number: u64,
- uid: u64,
- track: &SourceTrack,
- codec_private: Option<Vec<u8>>,
-) -> MatroskaTag {
+pub fn ebml_track_entry(number: u64, track: &TrackEntry) -> MatroskaTag {
let mut els = vec![
MatroskaTag::TrackNumber(number),
- MatroskaTag::TrackUID(uid),
MatroskaTag::FlagLacing(track.flag_lacing),
MatroskaTag::Language(track.language.clone()),
- MatroskaTag::CodecID(track.codec.clone()),
+ MatroskaTag::CodecID(track.codec_id.clone()),
MatroskaTag::CodecDelay(track.codec_delay),
MatroskaTag::SeekPreRoll(track.seek_pre_roll),
];
if let Some(d) = &track.default_duration {
els.push(MatroskaTag::DefaultDuration(*d));
}
- match track.kind {
- SourceTrackKind::Video {
- width,
- height,
- display_height,
- display_width,
- display_unit,
- fps,
- } => {
+ match track.track_type {
+ 1 => {
+ let video = track.video.as_ref().unwrap();
els.push(MatroskaTag::TrackType(1));
let mut props = vec![
- MatroskaTag::PixelWidth(width),
- MatroskaTag::PixelHeight(height),
+ MatroskaTag::PixelWidth(video.pixel_width),
+ MatroskaTag::PixelHeight(video.pixel_height),
];
- props.push(MatroskaTag::DisplayWidth(display_width.unwrap_or(width)));
- props.push(MatroskaTag::DisplayHeight(display_height.unwrap_or(height)));
- props.push(MatroskaTag::DisplayUnit(display_unit));
- if let Some(fps) = fps {
+ props.push(MatroskaTag::DisplayWidth(
+ video.display_width.unwrap_or(video.pixel_width),
+ ));
+ props.push(MatroskaTag::DisplayHeight(
+ video.display_height.unwrap_or(video.pixel_height),
+ ));
+ props.push(MatroskaTag::DisplayUnit(video.display_unit));
+ if let Some(fps) = video.frame_rate {
props.push(MatroskaTag::FrameRate(fps))
}
els.push(MatroskaTag::Video(Master::Collected(props)))
}
- SourceTrackKind::Audio {
- channels,
- sample_rate,
- bit_depth,
- } => {
+ 2 => {
+ let audio = track.audio.as_ref().unwrap();
els.push(MatroskaTag::TrackType(2));
let mut props = vec![
- MatroskaTag::SamplingFrequency(sample_rate),
- MatroskaTag::Channels(channels.try_into().unwrap()),
+ MatroskaTag::SamplingFrequency(audio.sampling_frequency),
+ MatroskaTag::Channels(audio.channels),
];
- if let Some(bit_depth) = bit_depth {
+ if let Some(bit_depth) = audio.bit_depth {
props.push(MatroskaTag::BitDepth(bit_depth.try_into().unwrap()));
}
els.push(MatroskaTag::Audio(Master::Collected(props)));
}
- SourceTrackKind::Subtitles => {
+ 17 => {
els.push(MatroskaTag::TrackType(17));
}
+ _ => unreachable!(),
}
- if let Some(d) = &codec_private {
+ if let Some(d) = &track.codec_private {
els.push(MatroskaTag::CodecPrivate(d.clone()));
}
MatroskaTag::TrackEntry(Master::Collected(els))
diff --git a/remuxer/src/matroska_to_mpeg4.rs b/remuxer/src/matroska_to_mpeg4.rs
new file mode 100644
index 0000000..e8268e7
--- /dev/null
+++ b/remuxer/src/matroska_to_mpeg4.rs
@@ -0,0 +1,36 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+use anyhow::Result;
+use std::{
+ fs::{remove_file, File},
+ io::{copy, Read, Write},
+ process::{Command, Stdio},
+ random::random,
+};
+
+pub fn matroska_to_mpeg4(
+ mut input: impl Read + Send + 'static,
+ mut output: impl Write,
+) -> Result<()> {
+ let path = format!("/tmp/jellything-tc-hack-{:016x}", random::<u64>());
+ let args = format!(
+ "-hide_banner -loglevel warning -f matroska -i pipe:0 -copyts -c copy -f mp4 -movflags frag_keyframe+empty_moov {path}"
+ );
+ let mut child = Command::new("ffmpeg")
+ .args(args.split(" "))
+ .stdin(Stdio::piped())
+ .stderr(Stdio::inherit())
+ .spawn()?;
+
+ let mut stdin = child.stdin.take().unwrap();
+ copy(&mut input, &mut stdin)?;
+ drop(stdin);
+ child.wait()?.exit_ok()?;
+ copy(&mut File::open(&path)?, &mut output)?;
+ remove_file(path)?;
+
+ Ok(())
+}
diff --git a/remuxer/src/matroska_to_webm.rs b/remuxer/src/matroska_to_webm.rs
new file mode 100644
index 0000000..b9a1819
--- /dev/null
+++ b/remuxer/src/matroska_to_webm.rs
@@ -0,0 +1,84 @@
+use crate::ebml_track_entry;
+use anyhow::Context;
+use ebml_struct::{
+ ids::*,
+ matroska::{Cluster, Ebml, Info, Tracks},
+ read::{EbmlReadExt, TagRead},
+ write::TagWrite,
+};
+use jellymatroska::{read::EbmlReader, write::EbmlWriter, Master, MatroskaTag};
+use log::warn;
+use std::io::{BufReader, BufWriter, ErrorKind, Read, Seek, Write};
+
+pub fn matroska_to_webm(
+ input: impl Read + Seek + 'static,
+ output: impl Write,
+) -> anyhow::Result<()> {
+ let mut output = EbmlWriter::new(BufWriter::new(output), 0);
+ let mut input = EbmlReader::new(BufReader::new(input));
+
+ Ebml {
+ ebml_version: 1,
+ ebml_read_version: 1,
+ ebml_max_id_length: 4,
+ ebml_max_size_length: 8,
+ doc_type: "webm".to_string(),
+ doc_type_version: 4,
+ doc_type_read_version: 2,
+ doc_type_extensions: vec![],
+ }
+ .write(&mut output)?;
+ output.write_tag(&MatroskaTag::Segment(Master::Start))?;
+
+ let (x, mut ebml) = input.read_tag()?;
+ assert_eq!(x, EL_EBML);
+ let ebml = Ebml::read(&mut ebml).unwrap();
+ assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm");
+ let (x, mut segment) = input.read_tag()?;
+ assert_eq!(x, EL_SEGMENT);
+
+ loop {
+ let (x, mut seg) = match segment.read_tag() {
+ Ok(o) => o,
+ Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
+ Err(e) => return Err(e.into()),
+ };
+ match x {
+ EL_INFO => {
+ let info = Info::read(&mut seg).context("info")?;
+ output.write_tag(&{
+ MatroskaTag::Info(Master::Collected(vec![
+ MatroskaTag::TimestampScale(info.timestamp_scale),
+ MatroskaTag::Duration(info.duration.unwrap_or_default()),
+ MatroskaTag::Title(info.title.unwrap_or_default()),
+ MatroskaTag::MuxingApp("jellyremux".to_string()),
+ MatroskaTag::WritingApp("jellything".to_string()),
+ ]))
+ })?;
+ }
+ EL_TRACKS => {
+ let tracks = Tracks::read(&mut seg).context("tracks")?;
+ output.write_tag(&MatroskaTag::Tracks(Master::Collected(
+ tracks
+ .entries
+ .into_iter()
+ .map(|t| ebml_track_entry(t.track_number, &t))
+ .collect(),
+ )))?;
+ }
+ EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD | EL_ATTACHMENTS | EL_TAGS => {
+ seg.consume()?;
+ }
+ EL_CLUSTER => {
+ let cluster = Cluster::read(&mut seg).context("cluster")?;
+ // TODO mixing both ebml libraries :)))
+ cluster.write(&mut output)?;
+ }
+ id => {
+ warn!("unknown top-level element {id:x}");
+ seg.consume()?;
+ }
+ }
+ }
+ Ok(())
+}
diff --git a/remuxer/src/metadata.rs b/remuxer/src/metadata.rs
new file mode 100644
index 0000000..c8a5f8f
--- /dev/null
+++ b/remuxer/src/metadata.rs
@@ -0,0 +1,123 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+use anyhow::{Context, Result};
+use bincode::{Decode, Encode};
+use ebml_struct::{
+ ids::*,
+ matroska::*,
+ read::{EbmlReadExt, TagRead},
+};
+use jellybase::{
+ assetfed::AssetInner,
+ cache::{cache_file, cache_memory},
+ common::Asset,
+};
+use log::{info, warn};
+use std::{
+ fs::File,
+ io::{BufReader, ErrorKind, Read, Write},
+ path::Path,
+ sync::Arc,
+};
+
+#[derive(Debug, Encode, Decode, Clone)]
+pub struct MatroskaMetadata {
+ pub info: Option<Info>,
+ pub tracks: Option<Tracks>,
+ pub cover: Option<Asset>,
+ pub chapters: Option<Chapters>,
+ pub tags: Option<Tags>,
+ pub infojson: Option<Vec<u8>>,
+}
+pub fn checked_matroska_metadata(path: &Path) -> Result<Arc<Option<MatroskaMetadata>>> {
+ cache_memory(
+ &["mkmeta-check-v1", path.to_string_lossy().as_ref()],
+ || {
+ let mut magic = [0; 4];
+ File::open(path)?.read_exact(&mut magic).ok();
+ if !matches!(magic, [0x1A, 0x45, 0xDF, 0xA3]) {
+ return Ok(None);
+ }
+ Ok(Some((*matroska_metadata(path)?).clone()))
+ },
+ )
+}
+pub fn matroska_metadata(path: &Path) -> Result<Arc<MatroskaMetadata>> {
+ cache_memory(&["mkmeta-v3", path.to_string_lossy().as_ref()], || {
+ info!("reading {path:?}");
+ let mut file = BufReader::new(File::open(path)?);
+ let mut file = file.by_ref().take(u64::MAX);
+
+ let (x, mut ebml) = file.read_tag()?;
+ assert_eq!(x, EL_EBML);
+ let ebml = Ebml::read(&mut ebml).unwrap();
+ assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm");
+ let (x, mut segment) = file.read_tag()?;
+ assert_eq!(x, EL_SEGMENT);
+
+ let mut info = None;
+ let mut infojson = None;
+ let mut tracks = None;
+ let mut cover = None;
+ let mut chapters = None;
+ let mut tags = None;
+ loop {
+ let (x, mut seg) = match segment.read_tag() {
+ Ok(o) => o,
+ Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
+ Err(e) => return Err(e.into()),
+ };
+ match x {
+ EL_INFO => info = Some(Info::read(&mut seg).context("info")?),
+ EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?),
+ EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?),
+ EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?),
+ EL_ATTACHMENTS => {
+ let attachments = Attachments::read(&mut seg).context("attachments")?;
+ for f in attachments.files {
+ match f.name.as_str() {
+ "info.json" => {
+ infojson = Some(f.data);
+ }
+ "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg"
+ | "cover.avif" => {
+ cover = Some(
+ AssetInner::Cache(cache_file(
+ &["att-cover", path.to_string_lossy().as_ref()],
+ move |mut file| {
+ file.write_all(&f.data)?;
+ Ok(())
+ },
+ )?)
+ .ser(),
+ )
+ }
+ _ => (),
+ }
+ }
+ }
+ EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => {
+ seg.consume()?;
+ }
+ EL_CLUSTER => {
+ break;
+ }
+ id => {
+ warn!("unknown top-level element {id:x}");
+ seg.consume()?;
+ }
+ }
+ }
+ Ok(MatroskaMetadata {
+ chapters,
+ cover,
+ info,
+ infojson,
+ tags,
+ tracks,
+ })
+ })
+}
diff --git a/remuxer/src/remux.rs b/remuxer/src/remux.rs
index 0507f1e..a44c58b 100644
--- a/remuxer/src/remux.rs
+++ b/remuxer/src/remux.rs
@@ -3,333 +3,311 @@
which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
Copyright (C) 2025 metamuffin <metamuffin.org>
*/
-use crate::{
- ebml_header, ebml_track_entry, seek_index::get_seek_index,
- segment_extractor::SegmentExtractIter, trim_writer::TrimWriter,
-};
-use anyhow::{anyhow, Context};
-use jellybase::common::{
- seek_index::{BlockIndex, SeekIndex},
- LocalTrack, Node, SourceTrack,
-};
-use jellymatroska::{
- read::EbmlReader,
- write::{bad_vint_length, vint_length, EbmlWriter},
- Master, MatroskaTag,
-};
-use log::{debug, info, trace, warn};
-use std::{
- fs::File,
- io::{BufReader, BufWriter, Seek, SeekFrom, Write},
- ops::Range,
- path::PathBuf,
- sync::Arc,
- time::Instant,
-};
+use jellybase::common::Node;
+use std::{io::Write, ops::Range, path::PathBuf};
-struct ClusterLayout {
- position: usize,
- timestamp: u64,
- source_offsets: Vec<Option<u64>>,
- blocks: Vec<(usize, BlockIndex)>,
-}
+// struct ClusterLayout {
+// position: usize,
+// timestamp: u64,
+// source_offsets: Vec<Option<u64>>,
+// blocks: Vec<(usize, BlockIndex)>,
+// }
pub fn remux_stream_into(
- writer: impl Write,
- range: Range<usize>,
- path_base: PathBuf,
- item: &Node,
- track_sources: Vec<LocalTrack>,
- selection: Vec<usize>,
- webm: bool,
+ _writer: impl Write,
+ _range: Range<usize>,
+ _path_base: PathBuf,
+ _item: &Node,
+ _selection: Vec<usize>,
+ _webm: bool,
) -> anyhow::Result<()> {
- info!("remuxing {:?} to have tracks {selection:?}", item.title);
- let writer = TrimWriter::new(BufWriter::new(writer), range.clone());
- let mut output = EbmlWriter::new(writer, 0);
+ // info!("remuxing {:?} to have tracks {selection:?}", item.title);
+ // let writer = TrimWriter::new(BufWriter::new(writer), range.clone());
+ // let mut output = EbmlWriter::new(writer, 0);
- struct ReaderC {
- info: SourceTrack,
- reader: EbmlReader,
- mapped: u64,
- index: Arc<SeekIndex>,
- source_track_index: usize,
- codec_private: Option<Vec<u8>>,
- layouting_progress_index: usize,
- }
+ // struct ReaderC {
+ // info: SourceTrack,
+ // reader: EbmlReader,
+ // mapped: u64,
+ // index: Arc<SeekIndex>,
+ // source_track_index: usize,
+ // codec_private: Option<Vec<u8>>,
+ // layouting_progress_index: usize,
+ // }
- let timing_cp = Instant::now();
+ // let timing_cp = Instant::now();
- let mut inputs = selection
- .iter()
- .enumerate()
- .map(|(index, sel)| {
- let info = item
- .media
- .as_ref()
- .unwrap()
- .tracks
- .get(*sel)
- .ok_or(anyhow!("track not available"))?
- .to_owned();
- let private = &track_sources[index];
- let source_path = path_base.join(&private.path);
- let mapped = index as u64 + 1;
- info!("\t- {sel} {source_path:?} ({} => {mapped})", private.track);
- info!("\t {}", info);
- let file = File::open(&source_path).context("opening source file")?;
- let index = get_seek_index(&source_path)?;
- let index = index
- .get(&(private.track as u64))
- .ok_or(anyhow!("track missing 3"))?
- .to_owned();
- debug!("\t seek index: {} blocks loaded", index.blocks.len());
- let reader = EbmlReader::new(BufReader::new(file));
- Ok(ReaderC {
- index,
- reader,
- info,
- mapped,
- source_track_index: private.track,
- codec_private: private.codec_private.clone(),
- layouting_progress_index: 0,
- })
- })
- .collect::<anyhow::Result<Vec<_>>>()?;
+ // let mut inputs = selection
+ // .iter()
+ // .enumerate()
+ // .map(|(index, sel)| {
+ // let info = item
+ // .media
+ // .as_ref()
+ // .unwrap()
+ // .tracks
+ // .get(*sel)
+ // .ok_or(anyhow!("track not available"))?
+ // .to_owned();
+ // let source_path = path_base.join(&private.path);
+ // let mapped = index as u64 + 1;
+ // info!("\t- {sel} {source_path:?} ({} => {mapped})", private.track);
+ // info!("\t {}", info);
+ // let file = File::open(&source_path).context("opening source file")?;
+ // let index = get_seek_index(&source_path)?;
+ // let index = index
+ // .get(&(private.track as u64))
+ // .ok_or(anyhow!("track missing 3"))?
+ // .to_owned();
+ // debug!("\t seek index: {} blocks loaded", index.blocks.len());
+ // let reader = EbmlReader::new(BufReader::new(file));
+ // Ok(ReaderC {
+ // index,
+ // reader,
+ // info,
+ // mapped,
+ // source_track_index: private.track,
+ // codec_private: private.codec_private.clone(),
+ // layouting_progress_index: 0,
+ // })
+ // })
+ // .collect::<anyhow::Result<Vec<_>>>()?;
- info!("(perf) prepare inputs: {:?}", Instant::now() - timing_cp);
- let timing_cp = Instant::now();
+ // info!("(perf) prepare inputs: {:?}", Instant::now() - timing_cp);
+ // let timing_cp = Instant::now();
- output.write_tag(&ebml_header(webm))?;
+ // output.write_tag(&ebml_header(webm))?;
- output.write_tag(&MatroskaTag::Segment(Master::Start))?;
- let segment_offset = output.position();
+ // output.write_tag(&MatroskaTag::Segment(Master::Start))?;
+ // let segment_offset = output.position();
- output.write_tag(&MatroskaTag::Info(Master::Collected(vec![
- MatroskaTag::TimestampScale(1_000_000),
- MatroskaTag::Duration(item.media.as_ref().unwrap().duration * 1000.0),
- MatroskaTag::Title(item.title.clone().unwrap_or_default()),
- MatroskaTag::MuxingApp("jellyremux".to_string()),
- MatroskaTag::WritingApp("jellything".to_string()),
- ])))?;
+ // output.write_tag(&MatroskaTag::Info(Master::Collected(vec![
+ // MatroskaTag::TimestampScale(1_000_000),
+ // MatroskaTag::Duration(item.media.as_ref().unwrap().duration * 1000.0),
+ // MatroskaTag::Title(item.title.clone().unwrap_or_default()),
+ // MatroskaTag::MuxingApp("jellyremux".to_string()),
+ // MatroskaTag::WritingApp("jellything".to_string()),
+ // ])))?;
- let tracks_header = inputs
- .iter_mut()
- .map(|rc| ebml_track_entry(rc.mapped, rc.mapped, &rc.info, rc.codec_private.take()))
- .collect();
- output.write_tag(&MatroskaTag::Tracks(Master::Collected(tracks_header)))?;
+ // let tracks_header = inputs
+ // .iter_mut()
+ // .map(|rc| ebml_track_entry(rc.mapped, rc.mapped, &rc.info, rc.codec_private.take()))
+ // .collect();
+ // output.write_tag(&MatroskaTag::Tracks(Master::Collected(tracks_header)))?;
- let mut segment_layout: Vec<ClusterLayout> = {
- let mut cluster_pts = 0;
- let mut clusters = vec![];
- let mut cluster = vec![];
- let mut source_offsets = vec![None; inputs.len()];
- let mut gp = 0usize; // cluster position (in the segment)
- let mut p = 0usize; // block position (in the cluster)
- loop {
- let (track, block) = {
- let mut best_block = BlockIndex {
- pts: u64::MAX,
- size: 0,
- source_off: 0,
- };
- let mut best_track = 0;
- for (i, r) in inputs.iter().enumerate() {
- if let Some(v) = r.index.blocks.get(r.layouting_progress_index) {
- if v.pts < best_block.pts {
- best_block = v.to_owned();
- best_track = i;
- }
- };
- }
- (best_track, best_block)
- };
- inputs[track].layouting_progress_index += 1;
- source_offsets[track].get_or_insert(block.source_off);
- if block.pts > cluster_pts + 1_000 {
- let cluster_content_size = 1 + 1 // timestamp {tag, size}
- + bad_vint_length(cluster_pts) // timestamp tag value
- + p;
- let cluster_size = 4 // tag length
- + vint_length(cluster_content_size as u64) // size varint
- + cluster_content_size;
- clusters.push(ClusterLayout {
- position: gp, // relative to the first cluster
- timestamp: cluster_pts,
- source_offsets,
- blocks: std::mem::take(&mut cluster),
- });
+ // let mut segment_layout: Vec<ClusterLayout> = {
+ // let mut cluster_pts = 0;
+ // let mut clusters = vec![];
+ // let mut cluster = vec![];
+ // let mut source_offsets = vec![None; inputs.len()];
+ // let mut gp = 0usize; // cluster position (in the segment)
+ // let mut p = 0usize; // block position (in the cluster)
+ // loop {
+ // let (track, block) = {
+ // let mut best_block = BlockIndex {
+ // pts: u64::MAX,
+ // size: 0,
+ // source_off: 0,
+ // };
+ // let mut best_track = 0;
+ // for (i, r) in inputs.iter().enumerate() {
+ // if let Some(v) = r.index.blocks.get(r.layouting_progress_index) {
+ // if v.pts < best_block.pts {
+ // best_block = v.to_owned();
+ // best_track = i;
+ // }
+ // };
+ // }
+ // (best_track, best_block)
+ // };
+ // inputs[track].layouting_progress_index += 1;
+ // source_offsets[track].get_or_insert(block.source_off);
+ // if block.pts > cluster_pts + 1_000 {
+ // let cluster_content_size = 1 + 1 // timestamp {tag, size}
+ // + bad_vint_length(cluster_pts) // timestamp tag value
+ // + p;
+ // let cluster_size = 4 // tag length
+ // + vint_length(cluster_content_size as u64) // size varint
+ // + cluster_content_size;
+ // clusters.push(ClusterLayout {
+ // position: gp, // relative to the first cluster
+ // timestamp: cluster_pts,
+ // source_offsets,
+ // blocks: std::mem::take(&mut cluster),
+ // });
- cluster_pts = block.pts;
- source_offsets = vec![None; inputs.len()];
- gp += cluster_size;
- p = 0;
- }
- if block.pts == u64::MAX {
- break;
- }
+ // cluster_pts = block.pts;
+ // source_offsets = vec![None; inputs.len()];
+ // gp += cluster_size;
+ // p = 0;
+ // }
+ // if block.pts == u64::MAX {
+ // break;
+ // }
- let simpleblock_size = 1 + 2 + 1 // block {tracknum, pts_off, flags}
- // TODO does not work, if more than 127 tracks are present
- + block.size; // block payload
- p += 1; // simpleblock tag
- p += vint_length(simpleblock_size as u64); // simpleblock size vint
- p += simpleblock_size;
+ // let simpleblock_size = 1 + 2 + 1 // block {tracknum, pts_off, flags}
+ // // TODO does not work, if more than 127 tracks are present
+ // + block.size; // block payload
+ // p += 1; // simpleblock tag
+ // p += vint_length(simpleblock_size as u64); // simpleblock size vint
+ // p += simpleblock_size;
- cluster.push((track, block))
- }
- info!("segment layout computed ({} clusters)", clusters.len());
- clusters
- };
- info!(
- "(perf) compute segment layout: {:?}",
- Instant::now() - timing_cp
- );
- let timing_cp = Instant::now();
+ // cluster.push((track, block))
+ // }
+ // info!("segment layout computed ({} clusters)", clusters.len());
+ // clusters
+ // };
+ // info!(
+ // "(perf) compute segment layout: {:?}",
+ // Instant::now() - timing_cp
+ // );
+ // let timing_cp = Instant::now();
- let max_cue_size = 4 // cues id
- + 8 // cues len
- + ( // cues content
- 1 // cp id
- + 1 // cp len
- + ( // cp content
- 1 // ctime id,
- + 1 // ctime len
- + 8 // ctime content uint
- + ( // ctps
- 1 // ctp id
- + 8 // ctp len
- + (// ctp content
- 1 // ctrack id
- + 1 // ctrack size
- + 1 // ctrack content int
- // TODO this breaks if inputs.len() >= 127
- + 1 // ccp id
- + 1 // ccp len
- + 8 // ccp content offset
- )
- )
- ) * inputs.len()
- ) * segment_layout.len()
- + 1 // void id
- + 8; // void len
+ // let max_cue_size = 4 // cues id
+ // + 8 // cues len
+ // + ( // cues content
+ // 1 // cp id
+ // + 1 // cp len
+ // + ( // cp content
+ // 1 // ctime id,
+ // + 1 // ctime len
+ // + 8 // ctime content uint
+ // + ( // ctps
+ // 1 // ctp id
+ // + 8 // ctp len
+ // + (// ctp content
+ // 1 // ctrack id
+ // + 1 // ctrack size
+ // + 1 // ctrack content int
+ // // TODO this breaks if inputs.len() >= 127
+ // + 1 // ccp id
+ // + 1 // ccp len
+ // + 8 // ccp content offset
+ // )
+ // )
+ // ) * inputs.len()
+ // ) * segment_layout.len()
+ // + 1 // void id
+ // + 8; // void len
- let first_cluster_offset_predict = max_cue_size + output.position();
+ // let first_cluster_offset_predict = max_cue_size + output.position();
- // make the cluster position relative to the segment start as they should
- segment_layout
- .iter_mut()
- .for_each(|e| e.position += first_cluster_offset_predict - segment_offset);
+ // // make the cluster position relative to the segment start as they should
+ // segment_layout
+ // .iter_mut()
+ // .for_each(|e| e.position += first_cluster_offset_predict - segment_offset);
- output.write_tag(&MatroskaTag::Cues(Master::Collected(
- segment_layout
- .iter()
- .map(|cluster| {
- MatroskaTag::CuePoint(Master::Collected(
- Some(MatroskaTag::CueTime(cluster.timestamp))
- .into_iter()
- // TODO: Subtitles should not have cues for every cluster
- .chain(inputs.iter().map(|i| {
- MatroskaTag::CueTrackPositions(Master::Collected(vec![
- MatroskaTag::CueTrack(i.mapped),
- MatroskaTag::CueClusterPosition(cluster.position as u64),
- ]))
- }))
- .collect(),
- ))
- })
- .collect(),
- )))?;
- output.write_padding(first_cluster_offset_predict)?;
- let first_cluster_offset = output.position();
- assert_eq!(first_cluster_offset, first_cluster_offset_predict);
+ // output.write_tag(&MatroskaTag::Cues(Master::Collected(
+ // segment_layout
+ // .iter()
+ // .map(|cluster| {
+ // MatroskaTag::CuePoint(Master::Collected(
+ // Some(MatroskaTag::CueTime(cluster.timestamp))
+ // .into_iter()
+ // // TODO: Subtitles should not have cues for every cluster
+ // .chain(inputs.iter().map(|i| {
+ // MatroskaTag::CueTrackPositions(Master::Collected(vec![
+ // MatroskaTag::CueTrack(i.mapped),
+ // MatroskaTag::CueClusterPosition(cluster.position as u64),
+ // ]))
+ // }))
+ // .collect(),
+ // ))
+ // })
+ // .collect(),
+ // )))?;
+ // output.write_padding(first_cluster_offset_predict)?;
+ // let first_cluster_offset = output.position();
+ // assert_eq!(first_cluster_offset, first_cluster_offset_predict);
- let mut skip = 0;
- // TODO binary search
- for (i, cluster) in segment_layout.iter().enumerate() {
- if (cluster.position + segment_offset) >= range.start {
- break;
- }
- skip = i;
- }
+ // let mut skip = 0;
+ // // TODO binary search
+ // for (i, cluster) in segment_layout.iter().enumerate() {
+ // if (cluster.position + segment_offset) >= range.start {
+ // break;
+ // }
+ // skip = i;
+ // }
- if skip != 0 {
- info!("skipping {skip} clusters");
- output.seek(SeekFrom::Start(
- (segment_layout[skip].position + segment_offset) as u64,
- ))?;
- }
+ // if skip != 0 {
+ // info!("skipping {skip} clusters");
+ // output.seek(SeekFrom::Start(
+ // (segment_layout[skip].position + segment_offset) as u64,
+ // ))?;
+ // }
- struct ReaderD<'a> {
- stream: SegmentExtractIter<'a>,
- mapped: u64,
- }
+ // struct ReaderD<'a> {
+ // stream: SegmentExtractIter<'a>,
+ // mapped: u64,
+ // }
- let mut track_readers = inputs
- .iter_mut()
- .enumerate()
- .map(|(i, inp)| {
- inp.reader
- .seek(
- // the seek target might be a hole; we continue until the next cluster of that track.
- // this should be fine since tracks are only read according to segment_layout
- find_first_cluster_with_off(&segment_layout, skip, i)
- .ok_or(anyhow!("cluster hole at eof"))?,
- MatroskaTag::Cluster(Master::Start), // TODO shouldn't this be a child of cluster?
- )
- .context("seeking in input")?;
- let stream = SegmentExtractIter::new(&mut inp.reader, inp.source_track_index as u64);
+ // let mut track_readers = inputs
+ // .iter_mut()
+ // .enumerate()
+ // .map(|(i, inp)| {
+ // inp.reader
+ // .seek(
+ // // the seek target might be a hole; we continue until the next cluster of that track.
+ // // this should be fine since tracks are only read according to segment_layout
+ // find_first_cluster_with_off(&segment_layout, skip, i)
+ // .ok_or(anyhow!("cluster hole at eof"))?,
+ // MatroskaTag::Cluster(Master::Start), // TODO shouldn't this be a child of cluster?
+ // )
+ // .context("seeking in input")?;
+ // let stream = SegmentExtractIter::new(&mut inp.reader, inp.source_track_index as u64);
- Ok(ReaderD {
- mapped: inp.mapped,
- stream,
- })
- })
- .collect::<anyhow::Result<Vec<_>>>()?;
+ // Ok(ReaderD {
+ // mapped: inp.mapped,
+ // stream,
+ // })
+ // })
+ // .collect::<anyhow::Result<Vec<_>>>()?;
- info!("(perf) seek inputs: {:?}", Instant::now() - timing_cp);
+ // info!("(perf) seek inputs: {:?}", Instant::now() - timing_cp);
- for (cluster_index, cluster) in segment_layout.into_iter().enumerate().skip(skip) {
- debug!(
- "writing cluster {cluster_index} (pts_base={}) with {} blocks",
- cluster.timestamp,
- cluster.blocks.len()
- );
- {
- let cue_error = cluster.position as i64 - (output.position() - segment_offset) as i64;
- if cue_error != 0 {
- warn!("calculation was {} bytes off", cue_error);
- }
- }
+ // for (cluster_index, cluster) in segment_layout.into_iter().enumerate().skip(skip) {
+ // debug!(
+ // "writing cluster {cluster_index} (pts_base={}) with {} blocks",
+ // cluster.timestamp,
+ // cluster.blocks.len()
+ // );
+ // {
+ // let cue_error = cluster.position as i64 - (output.position() - segment_offset) as i64;
+ // if cue_error != 0 {
+ // warn!("calculation was {} bytes off", cue_error);
+ // }
+ // }
- let mut cluster_blocks = vec![MatroskaTag::Timestamp(cluster.timestamp)];
- for (block_track, index_block) in cluster.blocks {
- let track_reader = &mut track_readers[block_track];
- // TODO handle duration
- let mut block = track_reader.stream.next_block()?.0;
+ // let mut cluster_blocks = vec![MatroskaTag::Timestamp(cluster.timestamp)];
+ // for (block_track, index_block) in cluster.blocks {
+ // let track_reader = &mut track_readers[block_track];
+ // // TODO handle duration
+ // let mut block = track_reader.stream.next_block()?.0;
- assert_eq!(index_block.size, block.data.len(), "seek index is wrong");
+ // assert_eq!(index_block.size, block.data.len(), "seek index is wrong");
- block.track = track_reader.mapped;
- block.timestamp_off = (index_block.pts - cluster.timestamp).try_into().unwrap();
- trace!("n={} tso={}", block.track, block.timestamp_off);
+ // block.track = track_reader.mapped;
+ // block.timestamp_off = (index_block.pts - cluster.timestamp).try_into().unwrap();
+ // trace!("n={} tso={}", block.track, block.timestamp_off);
- cluster_blocks.push(MatroskaTag::SimpleBlock(block))
- }
- output.write_tag(&MatroskaTag::Cluster(Master::Collected(cluster_blocks)))?;
- }
- // output.write_tag(&MatroskaTag::Segment(Master::End))?;
- Ok(())
+ // cluster_blocks.push(MatroskaTag::SimpleBlock(block))
+ // }
+ // output.write_tag(&MatroskaTag::Cluster(Master::Collected(cluster_blocks)))?;
+ // }
+ // // output.write_tag(&MatroskaTag::Segment(Master::End))?;
+ // Ok(())
+ todo!()
}
-fn find_first_cluster_with_off(
- segment_layout: &[ClusterLayout],
- skip: usize,
- track: usize,
-) -> Option<u64> {
- for cluster in segment_layout.iter().skip(skip) {
- if let Some(off) = cluster.source_offsets[track] {
- return Some(off);
- }
- }
- None
-}
+// fn find_first_cluster_with_off(
+// segment_layout: &[ClusterLayout],
+// skip: usize,
+// track: usize,
+// ) -> Option<u64> {
+// for cluster in segment_layout.iter().skip(skip) {
+// if let Some(off) = cluster.source_offsets[track] {
+// return Some(off);
+// }
+// }
+// None
+// }
diff --git a/remuxer/src/seek_index.rs b/remuxer/src/seek_index.rs
index bd351d9..7296d93 100644
--- a/remuxer/src/seek_index.rs
+++ b/remuxer/src/seek_index.rs
@@ -4,10 +4,8 @@
Copyright (C) 2025 metamuffin <metamuffin.org>
*/
use anyhow::{Context, Result};
-use jellybase::{
- cache::cache_memory,
- common::seek_index::{BlockIndex, SeekIndex},
-};
+use bincode::{Decode, Encode};
+use jellybase::cache::cache_memory;
use jellymatroska::{
block::Block,
read::EbmlReader,
@@ -17,6 +15,33 @@ use jellymatroska::{
use log::{debug, info, trace, warn};
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path, sync::Arc};
+pub const SEEK_INDEX_VERSION: u32 = 0x5eef1de4;
+
+#[derive(Debug, Clone, Decode, Encode)]
+pub struct SeekIndex {
+ pub version: u32,
+ pub blocks: Vec<BlockIndex>,
+ pub keyframes: Vec<usize>,
+}
+
+#[derive(Debug, Clone, Decode, Encode)]
+pub struct BlockIndex {
+ pub pts: u64,
+ // pub duration: Option<u64>,
+ pub source_off: u64, // points to start of SimpleBlock or BlockGroup (not the Block inside it)
+ pub size: usize,
+}
+
+impl Default for SeekIndex {
+ fn default() -> Self {
+ Self {
+ version: SEEK_INDEX_VERSION,
+ blocks: Vec::new(),
+ keyframes: Vec::new(),
+ }
+ }
+}
+
pub fn get_seek_index(path: &Path) -> anyhow::Result<Arc<BTreeMap<u64, Arc<SeekIndex>>>> {
cache_memory(&["seekindex", path.to_str().unwrap()], move || {
info!("generating seek index for {path:?}");