aboutsummaryrefslogtreecommitdiff
path: root/remuxer/src/remux.rs
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2023-09-30 15:21:57 +0200
committermetamuffin <metamuffin@disroot.org>2023-09-30 15:21:57 +0200
commit30e3d18c6ec50572365baaaaa3542769e82e763a (patch)
tree3eade459fe488729bbe61dd85ac49948d5e24ef7 /remuxer/src/remux.rs
parentd0d8316a015fa0434c2871541b83ea0aca781a99 (diff)
downloadjellything-30e3d18c6ec50572365baaaaa3542769e82e763a.tar
jellything-30e3d18c6ec50572365baaaaa3542769e82e763a.tar.bz2
jellything-30e3d18c6ec50572365baaaaa3542769e82e763a.tar.zst
move some files around for new remuxer + small changes
Diffstat (limited to 'remuxer/src/remux.rs')
-rw-r--r--remuxer/src/remux.rs381
1 files changed, 381 insertions, 0 deletions
diff --git a/remuxer/src/remux.rs b/remuxer/src/remux.rs
new file mode 100644
index 0000000..8807a38
--- /dev/null
+++ b/remuxer/src/remux.rs
@@ -0,0 +1,381 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2023 metamuffin <metamuffin.org>
+*/
+use crate::{segment_extractor::SegmentExtractIter, trim_writer::TrimWriter};
+use anyhow::{anyhow, Context};
+use jellycommon::{BlockIndex, LocalTrack, NodePublic, SeekIndex, SourceTrack, SourceTrackKind};
+use jellymatroska::{
+ block::Block,
+ read::EbmlReader,
+ write::{bad_vint_length, vint_length, EbmlWriter},
+ Master, MatroskaTag,
+};
+use log::{debug, info, trace, warn};
+use std::{
+ fs::File,
+ io::{Seek, SeekFrom, Write},
+ ops::Range,
+ path::PathBuf,
+ time::Instant,
+};
+
+pub fn remux_stream_into(
+ writer: impl Write,
+ range: Range<usize>,
+ path_base: PathBuf,
+ item: NodePublic,
+ track_sources: Vec<LocalTrack>,
+ selection: Vec<usize>,
+ webm: bool,
+) -> anyhow::Result<()> {
+ info!("remuxing {:?} to have tracks {selection:?}", item.title);
+ let writer = TrimWriter::new(writer, range.clone());
+ let mut output = EbmlWriter::new(writer, 0);
+
+ struct ReaderC {
+ info: SourceTrack,
+ reader: EbmlReader,
+ mapped: u64,
+ index: SeekIndex,
+ source_track_index: usize,
+ codec_private: Option<Vec<u8>>,
+ layouting_progress_index: usize,
+ }
+
+ let timing_cp = Instant::now();
+
+ let mut inputs = selection
+ .iter()
+ .enumerate()
+ .map(|(index, sel)| {
+ let info = item
+ .media
+ .as_ref()
+ .unwrap()
+ .tracks
+ .get(*sel)
+ .ok_or(anyhow!("track not available"))?
+ .to_owned();
+ let private = &track_sources[*sel];
+ let source_path = path_base.join(&private.path);
+ let mapped = index as u64 + 1;
+ info!("\t- {sel} {source_path:?} ({} => {mapped})", private.track);
+ info!("\t {}", info);
+ let file = File::open(&source_path).context("opening source file")?;
+ let mut index = File::open(source_path.with_extension(format!("si.{}", private.track)))
+ .context("opening seek index file")?;
+ let index = bincode::decode_from_std_read::<SeekIndex, _, _>(
+ &mut index,
+ bincode::config::standard(),
+ )?;
+ debug!("\t seek index: {} blocks loaded", index.blocks.len());
+ let reader = EbmlReader::new(file);
+ Ok(ReaderC {
+ index,
+ reader,
+ info,
+ mapped,
+ source_track_index: private.track,
+ codec_private: private.codec_private.clone(),
+ layouting_progress_index: 0,
+ })
+ })
+ .collect::<anyhow::Result<Vec<_>>>()?;
+
+ info!("(perf) prepare inputs: {:?}", Instant::now() - timing_cp);
+ let timing_cp = Instant::now();
+
+ output.write_tag(&MatroskaTag::Ebml(Master::Collected(vec![
+ MatroskaTag::EbmlVersion(1),
+ MatroskaTag::EbmlReadVersion(1),
+ MatroskaTag::EbmlMaxIdLength(4),
+ MatroskaTag::EbmlMaxSizeLength(8),
+ MatroskaTag::DocType(if webm {
+ "webm".to_string()
+ } else {
+ "matroska".to_string()
+ }),
+ MatroskaTag::DocTypeVersion(4),
+ MatroskaTag::DocTypeReadVersion(2),
+ ])))?;
+
+ output.write_tag(&MatroskaTag::Segment(Master::Start))?;
+ let segment_offset = output.position();
+
+ output.write_tag(&MatroskaTag::Info(Master::Collected(vec![
+ MatroskaTag::TimestampScale(1_000_000),
+ MatroskaTag::Duration(item.media.unwrap().duration * 1000.0),
+ MatroskaTag::Title(item.title.clone()),
+ MatroskaTag::MuxingApp("jellyremux".to_string()),
+ MatroskaTag::WritingApp("jellything".to_string()),
+ ])))?;
+ output.write_tag(&MatroskaTag::Tags(Master::Collected(vec![])))?;
+
+ let tracks_header = inputs
+ .iter_mut()
+ .map(|rc| track_to_ebml(rc.mapped, &rc.info, rc.codec_private.take()))
+ .collect();
+ output.write_tag(&MatroskaTag::Tracks(Master::Collected(tracks_header)))?;
+
+ struct ClusterLayout {
+ position: usize,
+ timestamp: u64,
+ source_offsets: Vec<Option<usize>>,
+ blocks: Vec<(usize, BlockIndex)>,
+ }
+
+ let mut segment_layout: Vec<ClusterLayout> = {
+ let mut cluster_pts = 0;
+ let mut clusters = vec![];
+ let mut cluster = vec![];
+ let mut source_offsets = vec![None; inputs.len()];
+ let mut gp = 0usize; // cluster position (in the segment)
+ let mut p = 0usize; // block position (in the cluster)
+ loop {
+ let (track, block) = {
+ let mut best_block = BlockIndex {
+ pts: u64::MAX,
+ size: 0,
+ source_off: 0,
+ };
+ let mut best_track = 0;
+ for (i, r) in inputs.iter().enumerate() {
+ if let Some(v) = r.index.blocks.get(r.layouting_progress_index) {
+ if v.pts < best_block.pts {
+ best_block = v.to_owned();
+ best_track = i;
+ }
+ };
+ }
+ (best_track, best_block)
+ };
+ inputs[track].layouting_progress_index += 1;
+ source_offsets[track].get_or_insert(block.source_off);
+ if block.pts > cluster_pts + 1_000 {
+ let cluster_content_size = 1 + 1 // timestamp {tag, size}
+ + bad_vint_length(cluster_pts) // timestamp tag value
+ + p;
+ let cluster_size = 4 // tag length
+ + vint_length(cluster_content_size as u64) // size varint
+ + cluster_content_size;
+ clusters.push(ClusterLayout {
+ position: gp, // relative to the first cluster
+ timestamp: cluster_pts,
+ source_offsets,
+ blocks: std::mem::take(&mut cluster),
+ });
+
+ cluster_pts = block.pts;
+ source_offsets = vec![None; inputs.len()];
+ gp += cluster_size;
+ p = 0;
+ }
+ if block.pts == u64::MAX {
+ break;
+ }
+
+ let simpleblock_size = 1 + 2 + 1 // block {tracknum, pts_off, flags}
+ // TODO does not work, if more than 127 tracks are present
+ + block.size; // block payload
+ p += 1; // simpleblock tag
+ p += vint_length(simpleblock_size as u64); // simpleblock size vint
+ p += simpleblock_size;
+
+ cluster.push((track, block))
+ }
+ info!("segment layout computed ({} clusters)", clusters.len());
+ clusters
+ };
+ info!(
+ "(perf) compute segment layout: {:?}",
+ Instant::now() - timing_cp
+ );
+ let timing_cp = Instant::now();
+
+ let max_cue_size = 4 // cues id
+ + 8 // cues len
+ + ( // cues content
+ 1 // cp id
+ + 1 // cp len
+ + ( // cp content
+ 1 // ctime id,
+ + 1 // ctime len
+ + 8 // ctime content uint
+ + ( // ctps
+ 1 // ctp id
+ + 8 // ctp len
+ + (// ctp content
+ 1 // ctrack id
+ + 1 // ctrack size
+ + 1 // ctrack content int
+ // TODO break if inputs.len() >= 127
+ + 1 // ccp id
+ + 1 // ccp len
+ + 8 // ccp content offset
+ )
+ )
+ ) * inputs.len()
+ ) * segment_layout.len()
+ + 1 // void id
+ + 8; // void len
+
+ let first_cluster_offset_predict = max_cue_size + output.position();
+
+ // make the cluster position relative to the segment start as they should
+ segment_layout
+ .iter_mut()
+ .for_each(|e| e.position += first_cluster_offset_predict - segment_offset);
+
+ output.write_tag(&MatroskaTag::Cues(Master::Collected(
+ segment_layout
+ .iter()
+ .map(|cluster| {
+ MatroskaTag::CuePoint(Master::Collected(
+ Some(MatroskaTag::CueTime(cluster.timestamp))
+ .into_iter()
+ // TODO: Subtitles should not have cues for every cluster
+ .chain(inputs.iter().map(|i| {
+ MatroskaTag::CueTrackPositions(Master::Collected(vec![
+ MatroskaTag::CueTrack(i.mapped),
+ MatroskaTag::CueClusterPosition(cluster.position as u64),
+ ]))
+ }))
+ .collect(),
+ ))
+ })
+ .collect(),
+ )))?;
+ output.write_padding(first_cluster_offset_predict)?;
+ let first_cluster_offset = output.position();
+ assert_eq!(first_cluster_offset, first_cluster_offset_predict);
+
+ let mut skip = 0;
+ for (i, cluster) in segment_layout.iter().enumerate() {
+ if (cluster.position + segment_offset) >= range.start {
+ break;
+ }
+ skip = i;
+ }
+
+ if skip != 0 {
+ info!("skipping {skip} clusters");
+ output.seek(SeekFrom::Start(
+ (segment_layout[skip].position + segment_offset) as u64,
+ ))?;
+ }
+
+ struct ReaderD<'a> {
+ peek: Option<Block>,
+ stream: SegmentExtractIter<'a>,
+ mapped: u64,
+ }
+
+ let mut track_readers = inputs
+ .iter_mut()
+ .enumerate()
+ .map(|(i, inp)| {
+ inp.reader
+ .seek(
+ segment_layout[skip].source_offsets[i].unwrap(), // TODO will crash if there is a "hole"
+ MatroskaTag::Cluster(Master::Start),
+ )
+ .context("seeking in input")?;
+ let mut stream =
+ SegmentExtractIter::new(&mut inp.reader, inp.source_track_index as u64);
+
+ Ok(ReaderD {
+ mapped: inp.mapped,
+ peek: Some(stream.next()?),
+ stream,
+ })
+ })
+ .collect::<anyhow::Result<Vec<_>>>()?;
+
+ info!("(perf) seek inputs: {:?}", Instant::now() - timing_cp);
+
+ for (cluster_index, cluster) in segment_layout.into_iter().enumerate().skip(skip) {
+ debug!(
+ "writing cluster {cluster_index} (pts_base={}) with {} blocks",
+ cluster.timestamp,
+ cluster.blocks.len()
+ );
+ {
+ let cue_error = cluster.position as i64 - (output.position() - segment_offset) as i64;
+ if cue_error != 0 {
+ warn!("calculation was {} bytes off", cue_error);
+ }
+ }
+
+ let mut cluster_blocks = vec![MatroskaTag::Timestamp(cluster.timestamp)];
+ for (block_track, index_block) in cluster.blocks {
+ let track_reader = &mut track_readers[block_track];
+ let mut block = track_reader
+ .peek
+ .replace(track_reader.stream.next()?)
+ .expect("source file too short");
+
+ assert_eq!(index_block.size, block.data.len(), "seek index is wrong");
+
+ block.track = track_reader.mapped;
+ block.timestamp_off = (index_block.pts - cluster.timestamp).try_into().unwrap();
+ trace!("n={} tso={}", block.track, block.timestamp_off);
+
+ let buf = block.dump();
+ cluster_blocks.push(MatroskaTag::SimpleBlock(buf))
+ }
+ output.write_tag(&MatroskaTag::Cluster(Master::Collected(cluster_blocks)))?;
+ }
+ output.write_tag(&MatroskaTag::Segment(Master::End))?;
+ Ok(())
+}
+
+pub fn track_to_ebml(
+ number: u64,
+ track: &SourceTrack,
+ codec_private: Option<Vec<u8>>,
+) -> MatroskaTag {
+ let mut els = vec![
+ MatroskaTag::TrackNumber(number),
+ MatroskaTag::TrackUID(number),
+ MatroskaTag::FlagLacing(0),
+ MatroskaTag::Language(track.language.clone()),
+ MatroskaTag::CodecID(track.codec.clone()),
+ ];
+ if let Some(d) = &track.default_duration {
+ els.push(MatroskaTag::DefaultDuration(*d));
+ }
+ match track.kind {
+ SourceTrackKind::Video {
+ width,
+ height,
+ fps: _,
+ } => {
+ els.push(MatroskaTag::TrackType(1));
+ els.push(MatroskaTag::Video(Master::Collected(vec![
+ MatroskaTag::PixelWidth(width),
+ MatroskaTag::PixelHeight(height),
+ ])))
+ }
+ SourceTrackKind::Audio {
+ channels,
+ sample_rate,
+ bit_depth,
+ } => {
+ els.push(MatroskaTag::TrackType(2));
+ els.push(MatroskaTag::Audio(Master::Collected(vec![
+ MatroskaTag::SamplingFrequency(sample_rate),
+ MatroskaTag::Channels(channels.try_into().unwrap()),
+ ])));
+ els.push(MatroskaTag::BitDepth(bit_depth.try_into().unwrap()));
+ }
+ SourceTrackKind::Subtitles => {
+ els.push(MatroskaTag::TrackType(19));
+ }
+ }
+ if let Some(d) = &codec_private {
+ els.push(MatroskaTag::CodecPrivate(d.clone()));
+ }
+ MatroskaTag::TrackEntry(Master::Collected(els))
+}