diff options
author | metamuffin <metamuffin@disroot.org> | 2025-09-13 16:08:42 +0200 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2025-09-13 16:08:42 +0200 |
commit | 044c7e1c75145f1ec9d002b4f6fc4433ff7f9540 (patch) | |
tree | db326c8f2327396ed443a1822936927e7c847494 | |
parent | e99bde7a00a161ff5dd91eaf1ce546a9d98cef05 (diff) | |
download | jellything-044c7e1c75145f1ec9d002b4f6fc4433ff7f9540.tar jellything-044c7e1c75145f1ec9d002b4f6fc4433ff7f9540.tar.bz2 jellything-044c7e1c75145f1ec9d002b4f6fc4433ff7f9540.tar.zst |
start remuxer crate rewrite; added matroska demuxer and format detection
30 files changed, 461 insertions, 2576 deletions
@@ -1,3 +1,4 @@ /target /target_perf /data +/remuxer_old @@ -108,12 +108,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] name = "android_system_properties" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -477,9 +471,9 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "cc" -version = "1.2.36" +version = "1.2.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5252b3d2648e5eedbc1a6f501e3c795e07025c1e93bbf8bbdd6eef7f447a6d54" +checksum = "65193589c6404eb80b450d618eaf9a2cafaaafd57ecce47370519ef674a7bd44" dependencies = [ "find-msvc-tools", "jobserver", @@ -527,17 +521,16 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "serde", "wasm-bindgen", - "windows-link", + "windows-link 0.2.0", ] [[package]] @@ -741,6 +734,41 @@ dependencies = [ ] [[package]] +name = "darling" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] name = "data-encoding" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -832,11 +860,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" [[package]] -name = "ebml-struct" +name = "ebml" version = "0.1.0" -source = "git+https://codeberg.org/metamuffin/ebml-struct#fcefaa67b85b96b17cec2d1c7f7c53998520559b" +source = "git+https://codeberg.org/metamuffin/ebml-rs#dd98200be401334639e45fd6d89d30af3b73cce7" dependencies = [ - "bincode", + "ebml-derive", +] + +[[package]] +name = "ebml-derive" +version = "0.1.0" +source = "git+https://codeberg.org/metamuffin/ebml-rs#dd98200be401334639e45fd6d89d30af3b73cce7" +dependencies = [ + "darling", + "quote", + "syn", ] [[package]] @@ -919,12 +957,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.0", ] [[package]] @@ -1194,7 +1232,7 @@ dependencies = [ "js-sys", "libc", "r-efi", - "wasi 0.14.4+wasi-0.2.4", + "wasi 0.14.5+wasi-0.2.4", "wasm-bindgen", ] @@ -1477,9 +1515,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.63" +version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1586,6 +1624,12 @@ dependencies = [ ] [[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] name = "idna" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1666,9 +1710,9 @@ checksum = "d0263a3d970d5c054ed9312c0057b4f3bde9c0b33836d3637361d4a9e6e7a408" [[package]] name = "indexmap" -version = "2.11.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9" +checksum = "206a8042aec68fa4a62e8d3f7aa4ceb508177d9324faf261e1959e495b7a1921" dependencies = [ "equivalent", "hashbrown", @@ -1904,27 +1948,18 @@ dependencies = [ ] [[package]] -name = "jellymatroska" -version = "0.1.0" -dependencies = [ - "ebml_derive", - "env_logger", - "log", - "thiserror 2.0.16", -] - -[[package]] name = "jellyremuxer" version = "0.1.0" dependencies = [ "anyhow", "bincode", - "ebml-struct", + "ebml", + "env_logger", + "hex", "jellycache", - "jellymatroska", "log", + "matroska", "serde", - "tokio", ] [[package]] @@ -2178,9 +2213,9 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] name = "linux-raw-sys" -version = "0.9.4" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "litemap" @@ -2289,6 +2324,14 @@ dependencies = [ ] [[package]] +name = "matroska" +version = "0.1.0" +source = "git+https://codeberg.org/metamuffin/ebml-rs#dd98200be401334639e45fd6d89d30af3b73cce7" +dependencies = [ + "ebml", +] + +[[package]] name = "maybe-rayon" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -3376,15 +3419,15 @@ dependencies = [ [[package]] name = "rustix" -version = "1.0.8" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys 0.9.4", - "windows-sys 0.60.2", + "linux-raw-sys 0.11.0", + "windows-sys 0.61.0", ] [[package]] @@ -3413,9 +3456,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.4" +version = "0.103.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc" +checksum = "b5a37813727b78798e53c2bec3f5e8fe12a6d6f8389bf9ca7802add4c9905ad8" dependencies = [ "ring", "rustls-pki-types", @@ -3901,15 +3944,15 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.21.0" +version = "3.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e" +checksum = "84fa4d11fadde498443cca10fd3ac23c951f0dc59e080e9f4b93d4df4e4eea53" dependencies = [ "fastrand", "getrandom 0.3.3", "once_cell", - "rustix 1.0.8", - "windows-sys 0.60.2", + "rustix 1.1.2", + "windows-sys 0.61.0", ] [[package]] @@ -4312,9 +4355,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" [[package]] name = "unicode-width" @@ -4472,9 +4515,18 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasi" -version = "0.14.4+wasi-0.2.4" +version = "0.14.5+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88a5f4a424faf49c3c2c344f166f0662341d470ea185e939657aaff130f0ec4a" +checksum = "a4494f6290a82f5fe584817a676a34b9d6763e8d9d18204009fb31dceca98fd4" +dependencies = [ + "wasip2", +] + +[[package]] +name = "wasip2" +version = "1.0.0+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03fa2761397e5bd52002cd7e73110c71af2109aca4e521a9f40473fe685b0a24" dependencies = [ "wit-bindgen", ] @@ -4629,13 +4681,13 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.61.2" +version = "0.62.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +checksum = "57fe7168f7de578d2d8a05b07fd61870d2e73b4020e9f49aa00da8471723497c" dependencies = [ "windows-implement", "windows-interface", - "windows-link", + "windows-link 0.2.0", "windows-result", "windows-strings", ] @@ -4669,21 +4721,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" [[package]] +name = "windows-link" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" + +[[package]] name = "windows-result" -version = "0.3.4" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f" dependencies = [ - "windows-link", + "windows-link 0.2.0", ] [[package]] name = "windows-strings" -version = "0.4.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda" dependencies = [ - "windows-link", + "windows-link 0.2.0", ] [[package]] @@ -4714,6 +4772,15 @@ dependencies = [ ] [[package]] +name = "windows-sys" +version = "0.61.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e201184e40b2ede64bc2ea34968b28e33622acdbbf37104f0e4a33f7abe657aa" +dependencies = [ + "windows-link 0.2.0", +] + +[[package]] name = "windows-targets" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -4750,7 +4817,7 @@ version = "0.53.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" dependencies = [ - "windows-link", + "windows-link 0.1.3", "windows_aarch64_gnullvm 0.53.0", "windows_aarch64_msvc 0.53.0", "windows_i686_gnu 0.53.0", @@ -4955,18 +5022,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.26" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.26" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", @@ -8,14 +8,12 @@ members = [ "import/asset_token", "import/fallback_generator", "logic", - "matroska", - "remuxer", "server", "tool", "transcoder", "stream", "stream/types", - "ui", + "ui", "remuxer", ] resolver = "3" diff --git a/import/src/lib.rs b/import/src/lib.rs index 6d2ae0f..941721e 100644 --- a/import/src/lib.rs +++ b/import/src/lib.rs @@ -27,7 +27,6 @@ use jellycommon::{ Person, Rating, SourceTrack, SourceTrackKind, TmdbKind, TrackSource, TraktKind, Visibility, }; use jellyimport_fallback_generator::generate_fallback; -use jellyremuxer::metadata::checked_matroska_metadata; use log::info; use musicbrainz::MusicBrainz; use rayon::iter::{ParallelBridge, ParallelIterator}; diff --git a/matroska/Cargo.toml b/matroska/Cargo.toml deleted file mode 100644 index 23c9c5a..0000000 --- a/matroska/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "jellymatroska" -version = "0.1.0" -edition = "2021" - -[dependencies] -ebml_derive = { path = "../ebml_derive" } -log = "0.4.25" -env_logger = "0.11.6" -thiserror = "2.0.11" diff --git a/matroska/src/bin/mkvdump.rs b/matroska/src/bin/mkvdump.rs deleted file mode 100644 index 48420c6..0000000 --- a/matroska/src/bin/mkvdump.rs +++ /dev/null @@ -1,28 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use jellymatroska::{matroska::MatroskaTag, read::EbmlReader}; -use std::{fs::File, io::BufReader}; - -fn main() { - env_logger::init_from_env("LOG"); - let path = std::env::args().nth(1).unwrap(); - let r = EbmlReader::new(BufReader::new(File::open(path).unwrap())); - - for tag in r { - let (position, tag) = tag.unwrap(); - match tag { - MatroskaTag::SimpleBlock(b) | MatroskaTag::Block(b) => { - println!( - "block t={} kf={} ts_off={}", - b.track, - b.flags.keyframe(), - b.timestamp_off - ) - } - _ => println!("{} {tag:?}", position.unwrap_or(0)), - } - } -} diff --git a/matroska/src/block.rs b/matroska/src/block.rs deleted file mode 100644 index 54d9de5..0000000 --- a/matroska/src/block.rs +++ /dev/null @@ -1,91 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use crate::{ - read::ReadExt, - write::{vint_length, write_vint}, - ReadValue, Result, WriteValue, -}; -use std::io::{Cursor, Write}; - -#[derive(Debug, PartialEq, Clone, Copy)] -pub enum LacingType { - None, - Xiph, - FixedSize, - Ebml, -} - -#[derive(Debug, PartialEq, Clone)] -pub struct Block { - pub track: u64, - pub flags: Flags, - pub timestamp_off: i16, - pub data: Vec<u8>, -} - -#[derive(Debug, PartialEq, Clone)] -pub struct Flags(u8); - -impl Flags { - pub fn keyframe(&self) -> bool { - self.0 & 0b10000000 != 0 - } - pub fn lacing(&self) -> LacingType { - match self.0 & 0b00000110 { - 0b000 => LacingType::None, - 0b010 => LacingType::Xiph, - 0b100 => LacingType::FixedSize, - 0b110 => LacingType::Ebml, - _ => unreachable!(), - } - } - pub fn discardable(&self) -> bool { - self.0 & 0b00000001 != 0 - } - pub fn invisible(&self) -> bool { - self.0 & 0b00001000 != 0 - } -} - -impl ReadValue for Block { - fn from_buf(buf: &[u8]) -> Result<Self> { - let (track, c) = Cursor::new(buf).read_vint_len()?; - let timestamp_off = i16::from_be_bytes(buf[c..c + 2].try_into().unwrap()); - let flags = Flags(buf[c + 2]); - let data = Vec::from(&buf[c + 3..]); - - Ok(Self { - track, - data, - flags, - timestamp_off, - }) - } -} - -impl WriteValue for Block { - fn write_to(&self, w: &mut impl Write) -> Result<()> { - write_vint(w, self.inner_len() as u64)?; - write_vint(w, self.track)?; - w.write_all(&self.timestamp_off.to_be_bytes())?; - w.write_all(&[self.flags.0])?; - w.write_all(&self.data)?; - Ok(()) - } - fn size(&self) -> usize { - let il = self.inner_len(); - vint_length(il as u64) + il - } -} - -impl Block { - fn inner_len(&self) -> usize { - vint_length(self.track) - + 2 // timestamp - + 1 // flags - + self.data.len() - } -} diff --git a/matroska/src/error.rs b/matroska/src/error.rs deleted file mode 100644 index d2bbcbf..0000000 --- a/matroska/src/error.rs +++ /dev/null @@ -1,24 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum Error { - #[error("invalid padding")] - InvalidPadding, - #[error("varint too long")] - VarintTooLong, - #[error("global tags dont provide any context")] - GlobalTagsAsContext, - #[error("invalid length of a exact size type")] - InvalidTypeLen, - #[error("invalid utf8")] - InvalidUTF8, - #[error("unknown id")] - UnknownID, - #[error("io: {0}")] - Io(#[from] std::io::Error), -} diff --git a/matroska/src/lib.rs b/matroska/src/lib.rs deleted file mode 100644 index 1098ca6..0000000 --- a/matroska/src/lib.rs +++ /dev/null @@ -1,27 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -pub mod block; -pub mod error; -pub mod matroska; -pub mod read; -pub mod size; -pub mod unflatten; -pub mod write; - -pub use matroska::MatroskaTag; -pub use read::ReadValue; -pub use write::WriteValue; - -#[derive(Debug, Clone, PartialEq)] -pub enum Master { - Collected(Vec<MatroskaTag>), - Start, - End, -} - -pub(crate) use block::Block; -pub(crate) use error::Error; -pub(crate) type Result<T> = core::result::Result<T, Error>; diff --git a/matroska/src/matroska.rs b/matroska/src/matroska.rs deleted file mode 100644 index 50e12b9..0000000 --- a/matroska/src/matroska.rs +++ /dev/null @@ -1,335 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use ebml_derive::define_ebml; - -define_ebml! { - global Crc32[0xbf]: Binary, - global Void[0xec]: Binary, - - Ebml[0x1a45dfa3]: { - EbmlVersion[0x4286]: Uint, - EbmlReadVersion[0x42f7]: Uint, - EbmlMaxIdLength[0x42f2]: Uint, - EbmlMaxSizeLength[0x42f3]: Uint, - DocType[0x4282]: Utf8, - DocTypeVersion[0x4287]: Uint, - DocTypeReadVersion[0x4285]: Uint, - DocTypeExtension[0x4281]: { - DocTypeExtensionName[0x4283]: Utf8, - DocTypeExtensionVersion[0x4284]: Uint, - }, - }, - - Segment[0x18538067]: { - Attachments[0x1941A469]: { - AttachedFile[0x61A7]: { - FileData[0x465C]: Binary, - FileDescription[0x467E]: Utf8, - FileMimeType[0x4660]: Utf8, - FileName[0x466E]: Utf8, - FileReferral[0x4675]: Binary, - FileUID[0x46AE]: Uint, - FileUsedEndTime[0x4662]: Uint, - FileUsedStartTime[0x4661]: Uint, - }, - }, - - Chapters[0x1043A770]: { - EditionEntry[0x45B9]: { - ChapterAtom[0xB6]: { - ChapProcess[0x6944]: { - ChapProcessCodecID[0x6955]: Uint, - ChapProcessCommand[0x6911]: { - ChapProcessData[0x6933]: Binary, - ChapProcessTime[0x6922]: Uint, - }, - ChapProcessPrivate[0x450D]: Binary, - }, - ChapterDisplay[0x80]: { - ChapCountry[0x437E]: Utf8, - ChapLanguage[0x437C]: Utf8, - ChapLanguageIETF[0x437D]: Utf8, - ChapString[0x85]: Utf8, - }, - ChapterFlagEnabled[0x4598]: Uint, - ChapterFlagHidden[0x98]: Uint, - ChapterPhysicalEquiv[0x63C3]: Uint, - ChapterSegmentEditionUID[0x6EBC]: Uint, - ChapterSegmentUID[0x6E67]: Binary, - ChapterStringUID[0x5654]: Utf8, - ChapterTimeEnd[0x92]: Uint, - ChapterTimeStart[0x91]: Uint, - ChapterUID[0x73C4]: Uint, - ChapterTrack[0x8F]: { - ChapterTrackUID[0x89]: Uint, - }, - }, - EditionFlagDefault[0x45DB]: Uint, - EditionFlagHidden[0x45BD]: Uint, - EditionFlagOrdered[0x45DD]: Uint, - EditionUID[0x45BC]: Uint, - }, - }, - - Cluster[0x1F43B675]: { - BlockGroup[0xA0]: { - Block[0xA1]: Block, - BlockAdditions[0x75A1]: { - BlockMore[0xA6]: { - BlockAddID[0xEE]: Uint, - BlockAdditional[0xA5]: Binary, - }, - }, - BlockDuration[0x9B]: Uint, - BlockVirtual[0xA2]: Binary, - CodecState[0xA4]: Binary, - DiscardPadding[0x75A2]: Int, - ReferenceBlock[0xFB]: Int, - ReferenceFrame[0xC8]: { - ReferenceOffset[0xC9]: Uint, - ReferenceTimestamp[0xCA]: Uint, - }, - ReferencePriority[0xFA]: Uint, - ReferenceVirtual[0xFD]: Int, - Slices[0x8E]: { - TimeSlice[0xE8]: { - BlockAdditionID[0xCB]: Uint, - Delay[0xCE]: Uint, - FrameNumber[0xCD]: Uint, - LaceNumber[0xCC]: Uint, - SliceDuration[0xCF]: Uint, - }, - }, - }, - EncryptedBlock[0xAF]: Binary, - Position[0xA7]: Uint, - PrevSize[0xAB]: Uint, - SilentTracks[0x5854]: { - SilentTrackNumber[0x58D7]: Uint, - }, - SimpleBlock[0xA3]: Block, - Timestamp[0xE7]: Uint, - }, - - Cues[0x1C53BB6B]: { - CuePoint[0xBB]: { - CueTime[0xB3]: Uint, - CueTrackPositions[0xB7]: { - CueBlockNumber[0x5378]: Uint, - CueClusterPosition[0xF1]: Uint, - CueCodecState[0xEA]: Uint, - CueDuration[0xB2]: Uint, - CueReference[0xDB]: { - CueRefCluster[0x97]: Uint, - CueRefCodecState[0xEB]: Uint, - CueRefNumber[0x535F]: Uint, - CueRefTime[0x96]: Uint, - }, - CueRelativePosition[0xF0]: Uint, - CueTrack[0xF7]: Uint, - }, - }, - }, - - Info[0x1549A966]: { - ChapterTranslate[0x6924]: { - ChapterTranslateCodec[0x69BF]: Uint, - ChapterTranslateEditionUID[0x69FC]: Uint, - ChapterTranslateID[0x69A5]: Binary, - }, - DateUTC[0x4461]: Int, - Duration[0x4489]: Float, - MuxingApp[0x4D80]: Utf8, - NextFilename[0x3E83BB]: Utf8, - NextUID[0x3EB923]: Binary, - PrevFilename[0x3C83AB]: Utf8, - PrevUID[0x3CB923]: Binary, - SegmentFamily[0x4444]: Binary, - SegmentFilename[0x7384]: Utf8, - SegmentUID[0x73A4]: Binary, - TimestampScale[0x2AD7B1]: Uint, - Title[0x7BA9]: Utf8, - WritingApp[0x5741]: Utf8, - }, - - SeekHead[0x114D9B74]: { - Seek[0x4DBB]: { - SeekID[0x53AB]: Binary, - SeekPosition[0x53AC]: Uint, - }, - }, - - Tags[0x1254C367]: { - Tag[0x7373]: { - SimpleTag[0x67C8]: { - TagBinary[0x4485]: Binary, - TagDefault[0x4484]: Uint, - TagDefaultBogus[0x44B4]: Uint, - TagLanguage[0x447A]: Utf8, - TagLanguageIETF[0x447B]: Utf8, - TagName[0x45A3]: Utf8, - TagString[0x4487]: Utf8, - }, - Targets[0x63C0]: { - TagAttachmentUID[0x63C6]: Uint, - TagChapterUID[0x63C4]: Uint, - TagEditionUID[0x63C9]: Uint, - TagTrackUID[0x63C5]: Uint, - TargetType[0x63CA]: Utf8, - TargetTypeValue[0x68CA]: Uint, - }, - }, - }, - - Tracks[0x1654AE6B]: { - TrackEntry[0xAE]: { - AttachmentLink[0x7446]: Uint, - Audio[0xE1]: { - BitDepth[0x6264]: Uint, - ChannelPositions[0x7D7B]: Binary, - Channels[0x9F]: Uint, - OutputSamplingFrequency[0x78B5]: Float, - SamplingFrequency[0xB5]: Float, - }, - BlockAdditionMapping[0x41E4]: { - BlockAddIDExtraData[0x41ED]: Binary, - BlockAddIDName[0x41A4]: Utf8, - BlockAddIDType[0x41E7]: Uint, - BlockAddIDValue[0x41F0]: Uint, - }, - CodecDecodeAll[0xAA]: Uint, - CodecDelay[0x56AA]: Uint, - CodecDownloadURL[0x26B240]: Utf8, - CodecID[0x86]: Utf8, - CodecInfoURL[0x3B4040]: Utf8, - CodecName[0x258688]: Utf8, - CodecPrivate[0x63A2]: Binary, - CodecSettings[0x3A9697]: Utf8, - ContentEncodings[0x6D80]: { - ContentEncoding[0x6240]: { - ContentCompression[0x5034]: { - ContentCompAlgo[0x4254]: Uint, - ContentCompSettings[0x4255]: Binary, - }, - ContentEncodingOrder[0x5031]: Uint, - ContentEncodingScope[0x5032]: Uint, - ContentEncodingType[0x5033]: Uint, - ContentEncryption[0x5035]: { - ContentEncAESSettings[0x47E7]: { - AESSettingsCipherMode[0x47E8]: Uint, - }, - ContentEncAlgo[0x47E1]: Uint, - ContentEncKeyID[0x47E2]: Binary, - ContentSigAlgo[0x47E5]: Uint, - ContentSigHashAlgo[0x47E6]: Uint, - ContentSigKeyID[0x47E4]: Binary, - ContentSignature[0x47E3]: Binary, - }, - }, - }, - DefaultDecodedFieldDuration[0x234E7A]: Uint, - DefaultDuration[0x23E383]: Uint, - FlagCommentary[0x55AF]: Uint, - FlagDefault[0x88]: Uint, - FlagEnabled[0xB9]: Uint, - FlagForced[0x55AA]: Uint, - FlagHearingImpaired[0x55AB]: Uint, - FlagLacing[0x9C]: Uint, - FlagOriginal[0x55AE]: Uint, - FlagTextDescriptions[0x55AD]: Uint, - FlagVisualImpaired[0x55AC]: Uint, - Language[0x22B59C]: Utf8, - LanguageIETF[0x22B59D]: Utf8, - MaxBlockAdditionID[0x55EE]: Uint, - MaxCache[0x6DF8]: Uint, - MinCache[0x6DE7]: Uint, - Name[0x536E]: Utf8, - SeekPreRoll[0x56BB]: Uint, - TrackNumber[0xD7]: Uint, - TrackOffset[0x537F]: Int, - TrackOperation[0xE2]: { - TrackCombinePlanes[0xE3]: { - TrackPlane[0xE4]: { - TrackPlaneType[0xE6]: Uint, - TrackPlaneUID[0xE5]: Uint, - }, - }, - TrackJoinBlocks[0xE9]: { - TrackJoinUID[0xED]: Uint, - }, - }, - TrackOverlay[0x6FAB]: Uint, - TrackTimestampScale[0x23314F]: Float, - TrackTranslate[0x6624]: { - TrackTranslateCodec[0x66BF]: Uint, - TrackTranslateEditionUID[0x66FC]: Uint, - TrackTranslateTrackID[0x66A5]: Binary, - }, - TrackType[0x83]: Uint, - TrackUID[0x73C5]: Uint, - TrickMasterTrackSegmentUID[0xC4]: Binary, - TrickMasterTrackUID[0xC7]: Uint, - TrickTrackFlag[0xC6]: Uint, - TrickTrackSegmentUID[0xC1]: Binary, - TrickTrackUID[0xC0]: Uint, - Video[0xE0]: { - AlphaMode[0x53C0]: Uint, - AspectRatioType[0x54B3]: Uint, - Colour[0x55B0]: { - BitsPerChannel[0x55B2]: Uint, - CbSubsamplingHorz[0x55B5]: Uint, - CbSubsamplingVert[0x55B6]: Uint, - ChromaSitingHorz[0x55B7]: Uint, - ChromaSitingVert[0x55B8]: Uint, - ChromaSubsamplingHorz[0x55B3]: Uint, - ChromaSubsamplingVert[0x55B4]: Uint, - MasteringMetadata[0x55D0]: { - LuminanceMax[0x55D9]: Float, - LuminanceMin[0x55DA]: Float, - PrimaryBChromaticityX[0x55D5]: Float, - PrimaryBChromaticityY[0x55D6]: Float, - PrimaryGChromaticityX[0x55D3]: Float, - PrimaryGChromaticityY[0x55D4]: Float, - PrimaryRChromaticityX[0x55D1]: Float, - PrimaryRChromaticityY[0x55D2]: Float, - WhitePointChromaticityX[0x55D7]: Float, - WhitePointChromaticityY[0x55D8]: Float, - }, - MatrixCoefficients[0x55B1]: Uint, - MaxCLL[0x55BC]: Uint, - MaxFALL[0x55BD]: Uint, - Primaries[0x55BB]: Uint, - Range[0x55B9]: Uint, - TransferCharacteristics[0x55BA]: Uint, - }, - DisplayHeight[0x54BA]: Uint, - DisplayUnit[0x54B2]: Uint, - DisplayWidth[0x54B0]: Uint, - FieldOrder[0x9D]: Uint, - FlagInterlaced[0x9A]: Uint, - FrameRate[0x2383E3]: Float, - GammaValue[0x2FB523]: Float, - OldStereoMode[0x53B9]: Uint, - PixelCropBottom[0x54AA]: Uint, - PixelCropLeft[0x54CC]: Uint, - PixelCropRight[0x54DD]: Uint, - PixelCropTop[0x54BB]: Uint, - PixelHeight[0xBA]: Uint, - PixelWidth[0xB0]: Uint, - Projection[0x7670]: { - ProjectionPosePitch[0x7674]: Float, - ProjectionPoseRoll[0x7675]: Float, - ProjectionPoseYaw[0x7673]: Float, - ProjectionPrivate[0x7672]: Binary, - ProjectionType[0x7671]: Uint, - }, - StereoMode[0x53B8]: Uint, - UncompressedFourCC[0x2EB524]: Binary, - }, - }, - }, - }, -} diff --git a/matroska/src/read.rs b/matroska/src/read.rs deleted file mode 100644 index c3d06fa..0000000 --- a/matroska/src/read.rs +++ /dev/null @@ -1,298 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use crate::{error::Error, matroska::MatroskaTag, size::EbmlSize, Master, Result}; -use log::{debug, warn}; -use std::{ - collections::VecDeque, - io::{Read, Seek, SeekFrom}, -}; - -trait ReadAndSeek: Read + Seek {} -impl<T: Read + Seek> ReadAndSeek for T {} - -#[derive(Debug, Clone, Copy)] -pub struct StackTag { - end: Option<u64>, - id: u64, -} - -pub struct EbmlReader { - inner: Box<dyn ReadAndSeek>, - stack: Vec<StackTag>, - queue: VecDeque<(Option<u64>, MatroskaTag)>, - position: u64, -} - -impl Read for EbmlReader { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> { - let r = self.inner.read(buf)?; - self.position += r as u64; - Ok(r) - } -} - -impl EbmlReader { - pub fn new<T: Seek + Read + 'static>(inner: T) -> Self { - Self { - queue: VecDeque::new(), - inner: Box::new(inner), - stack: vec![], - position: 0, - } - } - - #[inline] - pub fn read_byte(&mut self) -> Result<u8> { - let mut b = [0u8]; - self.inner.read_exact(&mut b).map_err(Error::Io)?; - self.position += 1; - Ok(b[0]) - } - - pub fn read_buf(&mut self, size: impl Into<usize>) -> Result<Vec<u8>> { - let size = size.into(); - let mut b = vec![0u8; size]; - self.inner.read_exact(&mut b).map_err(Error::Io)?; - self.position += size as u64; - Ok(b) - } - - pub fn read_vint_len(&mut self) -> Result<(u64, usize)> { - let s = self.read_byte()?; - let len = s.leading_zeros() + 1; - if len > 8 { - Err(Error::VarintTooLong)? - } - let mut value = s as u64; - value -= 1 << (8 - len); - for _ in 1..len { - value <<= 8; - value += self.read_byte()? as u64; - } - Ok((value, len as usize)) - } - - #[inline] - pub fn read_vint(&mut self) -> Result<u64> { - Ok(self.read_vint_len()?.0) - } - - #[inline] - pub fn read_utf8(&mut self, size: impl Into<usize>) -> Result<String> { - let b = self.read_buf(size)?; - String::from_utf8(b).map_err(|_| Error::InvalidUTF8) - } - - #[inline] - pub fn read_tag_id(&mut self) -> Result<u64> { - let (value, len) = self.read_vint_len()?; - Ok(value + (1 << (7 * len))) - } - - #[inline] - pub fn read_tag_size(&mut self) -> Result<EbmlSize> { - Ok(EbmlSize::from_vint(self.read_vint_len()?)) - } - - /// reads *some* amount of tags from the stream and pushes it to the queue. - pub fn read_stuff(&mut self) -> Result<()> { - while let Some(e) = self.stack.last().copied() { - if let Some(end) = e.end { - if self.position >= end { - if self.position != end { - warn!("we missed the end by {} bytes", self.position - end) - } - self.stack.pop(); - self.queue - .push_back((None, MatroskaTag::construct_master(e.id, Master::End)?)); - } else { - break; - } - } else { - break; - } - } - - let start_position = self.position; - let id = self.read_tag_id()?; - let size = self.read_tag_size()?; - let is_master = MatroskaTag::is_master(id)?; - let tag = if is_master { - MatroskaTag::construct_master(id, Master::Start)? - } else { - let data = self.read_buf(size.some().unwrap())?; - MatroskaTag::parse(id, &data)? - }; - - if let Some(path) = tag.path() { - // we have slightly different rules for closing tags implicitly - // this closes as many tags as needed to make the next tag a valid child - while let Some(stag @ StackTag { end: None, .. }) = self.stack.last() { - if path.last() == Some(&stag.id) { - break; - } else { - let end = - MatroskaTag::construct_master(self.stack.pop().unwrap().id, Master::End)?; - self.queue.push_back((None, end)); - } - } - } - - if is_master { - self.stack.push(StackTag { - end: size.some().map(|s| s as u64 + self.position), - id, - }); - } - self.queue.push_back((Some(start_position), tag)); - Ok(()) - } - - /// context should be the next expected tag, such that the stack can be derived from its path. - pub fn seek(&mut self, position: u64, context: MatroskaTag) -> Result<()> { - let path = context.path().ok_or(Error::GlobalTagsAsContext)?; - debug!( - "seeking to {position} with a context restored from path {:x?}", - path - ); - self.queue.clear(); - self.position = position; - self.inner.seek(SeekFrom::Start(position))?; - self.stack = path - .iter() - .map(|id| StackTag { id: *id, end: None }) - .collect(); - Ok(()) - } -} - -impl Iterator for EbmlReader { - type Item = Result<(Option<u64>, MatroskaTag)>; - fn next(&mut self) -> Option<Self::Item> { - if let Some(t) = self.queue.pop_front() { - // match t { - // MatroskaTag::SimpleBlock(_) | MatroskaTag::Block(_) => (), - // _ => debug!("reader yield: {t:?}"), - // }; - Some(Ok(t)) - } else { - match self.read_stuff() { - Ok(()) => self.next(), - // in case we reached the end (error: failed to fill whole buffer), - // return the rest in the queue and pop all items of the stack - Err(e) => { - // TODO this is horrible, should use a custom error enum instead - if format!("{e}").as_str() == "failed to fill whole buffer" { - match self.queue.pop_front() { - Some(q) => Some(Ok(q)), - None => match self.stack.pop() { - Some(q) => Some(Ok(( - None, - MatroskaTag::construct_master(q.id, Master::End).unwrap(), - ))), - None => Some(Err(e)), - }, - } - } else { - Some(Err(e)) - } - } - } - } - } -} - -pub trait ReadValue: Sized { - fn from_buf(buf: &[u8]) -> Result<Self>; -} - -impl ReadValue for u64 { - fn from_buf(buf: &[u8]) -> Result<Self> { - if buf.len() > 8 { - Err(Error::InvalidTypeLen)? - } - let mut val = 0u64; - for byte in buf { - val <<= 8; - val |= *byte as u64; - } - Ok(val) - } -} -impl ReadValue for i64 { - fn from_buf(buf: &[u8]) -> Result<Self> { - if buf.len() > 8 { - Err(Error::InvalidTypeLen)? - } - Ok(if buf[0] > 127 { - if buf.len() == 8 { - i64::from_be_bytes(buf.try_into().unwrap()) - } else { - -((1 << (buf.len() * 8)) - (u64::from_buf(buf)? as i64)) - } - } else { - u64::from_buf(buf)? as i64 - }) - } -} -impl ReadValue for f64 { - fn from_buf(buf: &[u8]) -> Result<Self> { - Ok(if buf.len() == 4 { - f32::from_be_bytes(buf.try_into().unwrap()) as f64 - } else if buf.len() == 8 { - f64::from_be_bytes(buf.try_into().unwrap()) - } else { - Err(Error::InvalidTypeLen)? - }) - } -} - -impl ReadValue for Vec<u8> { - fn from_buf(buf: &[u8]) -> Result<Self> { - Ok(buf.to_vec()) - } -} -impl ReadValue for String { - fn from_buf(buf: &[u8]) -> Result<Self> { - String::from_utf8(Vec::from(buf)).map_err(|_| Error::InvalidUTF8) - } -} -impl ReadValue for Master { - fn from_buf(_: &[u8]) -> Result<Self> { - panic!("master shall not be read like this") - } -} - -pub trait ReadExt: Read { - fn read_byte(&mut self) -> Result<u8>; - fn read_vint_len(&mut self) -> Result<(u64, usize)>; - fn read_vint(&mut self) -> Result<u64>; -} -impl<T: Read> ReadExt for T { - fn read_byte(&mut self) -> Result<u8> { - let mut b = [0u8]; - self.read_exact(&mut b).map_err(Error::Io)?; - Ok(b[0]) - } - fn read_vint_len(&mut self) -> Result<(u64, usize)> { - let s = self.read_byte()?; - let len = s.leading_zeros() + 1; - if len > 8 { - Err(Error::VarintTooLong)? - } - let mut value = s as u64; - value -= 1 << (8 - len); - for _ in 1..len { - value <<= 8; - value += self.read_byte()? as u64; - } - Ok((value, len as usize)) - } - #[inline] - fn read_vint(&mut self) -> Result<u64> { - Ok(self.read_vint_len()?.0) - } -} diff --git a/matroska/src/size.rs b/matroska/src/size.rs deleted file mode 100644 index 7ef0de9..0000000 --- a/matroska/src/size.rs +++ /dev/null @@ -1,25 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum EbmlSize { - Exact(usize), - Unknown, -} -impl EbmlSize { - pub fn from_vint((value, len): (u64, usize)) -> EbmlSize { - if value == ((1 << (7 * len)) - 1) { - Self::Unknown - } else { - Self::Exact(value as usize) - } - } - pub fn some(self) -> Option<usize> { - match self { - EbmlSize::Exact(s) => Some(s), - EbmlSize::Unknown => None, - } - } -} diff --git a/matroska/src/unflatten.rs b/matroska/src/unflatten.rs deleted file mode 100644 index d5fb887..0000000 --- a/matroska/src/unflatten.rs +++ /dev/null @@ -1,83 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use crate::{matroska::MatroskaTag, Master, Result}; - -pub struct Unflat<'a> { - pub item: MatroskaTag, - pub children: Option<Unflatten<'a>>, - pub position: Option<u64>, -} - -pub struct Unflatten<'a> { - inner: &'a mut dyn Iterator<Item = Result<(Option<u64>, MatroskaTag)>>, - stop: bool, - end: Option<MatroskaTag>, -} - -impl<'a> Unflatten<'a> { - pub fn new(inner: &'a mut dyn Iterator<Item = Result<(Option<u64>, MatroskaTag)>>) -> Self { - Self { - inner, - stop: false, - end: None, - } - } - pub fn new_with_end( - inner: &'a mut dyn Iterator<Item = Result<(Option<u64>, MatroskaTag)>>, - start: MatroskaTag, - ) -> Self { - Self { - inner, - stop: false, - end: Some(MatroskaTag::construct_master(start.id(), Master::End).unwrap()), - } - } - pub fn exit_dirty(&mut self) { - self.stop = true; - } - - pub fn n(&mut self) -> Option<Result<Unflat<'_>>> { - if self.stop { - return None; - } - match self.inner.next() { - None => None, - Some(Err(e)) => Some(Err(e)), - Some(Ok((position, item))) => { - let master = MatroskaTag::is_master(item.id()).unwrap(); - if Some(&item) == self.end.as_ref() { - self.stop = true; - None - } else { - Some(Ok(Unflat { - position, - children: if master { - let end = - MatroskaTag::construct_master(item.id(), Master::End).unwrap(); - if end == item { - return None; - } - Some(Unflatten { - inner: self.inner, - stop: false, - end: Some(end), - }) - } else { - None - }, - item, - })) - } - } - } - } -} - -impl Drop for Unflatten<'_> { - fn drop(&mut self) { - while self.n().is_some() {} - } -} diff --git a/matroska/src/write.rs b/matroska/src/write.rs deleted file mode 100644 index 58923c6..0000000 --- a/matroska/src/write.rs +++ /dev/null @@ -1,382 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use crate::{error::Error, matroska::MatroskaTag, size::EbmlSize, Master, Result}; -use log::debug; -use std::io::{Seek, Write}; - -pub struct EbmlWriter<W> { - inner: W, - position: usize, -} - -impl<W: Write> EbmlWriter<W> { - pub fn new(inner: W, position: usize) -> Self { - Self { inner, position } - } - - pub fn position(&self) -> usize { - self.position - } - - pub fn write_padding(&mut self, position: usize) -> Result<()> { - debug!("padding up to {position}"); - let mut size = position - self.position; - match size { - 0 => return Ok(()), - 1 => Err(Error::InvalidPadding)?, - _ => (), - } - size -= 1; // subtract tag size - size -= 4; // subtract vint size - - // match size { - // _ if size < (1 << 7) => size -= 1, - // _ if size < (1 << 14) => size -= 2, - // _ if size < (1 << 21) => size -= 3, - // _ if size < (1 << 28) => size -= 4, - // _ if size < (1 << 35) => size -= 5, - // _ => bail!("padding too large"), - // } - - self.write_all(&[0xec])?; - self.write_vint_len(size.try_into().unwrap(), 4)?; - self.write_all(&vec![0; size])?; - Ok(()) - } - - #[inline] - pub fn write_tag(&mut self, tag: &MatroskaTag) -> Result<()> { - tag.write_full(self)?; - Ok(()) - } - - pub fn write_vint_len(&mut self, i: u64, len: usize) -> Result<()> { - let mut bytes = i.to_be_bytes(); - let trunc = &mut bytes[(8 - len)..]; - trunc[0] |= 1 << (8 - len); - self.write_all(trunc)?; - Ok(()) - } -} - -impl<W: Seek> Seek for EbmlWriter<W> { - fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> { - self.inner.seek(pos)?; - match pos { - std::io::SeekFrom::Start(s) => self.position = s as usize, - std::io::SeekFrom::End(_) => unimplemented!(), - std::io::SeekFrom::Current(s) => self.position += s as usize, - } - Ok(self.position as u64) - } -} - -impl<W: Write> Write for EbmlWriter<W> { - fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { - let size = self.inner.write(buf)?; - self.position += size; - Ok(size) - } - - fn flush(&mut self) -> std::io::Result<()> { - todo!() - } -} - -impl MatroskaTag { - pub fn write_full(&self, w: &mut impl Write) -> Result<()> { - for b in self.id().to_be_bytes().iter().skip_while(|&v| *v == 0u8) { - w.write_all(&[*b])?; - } - self.write(w)?; - Ok(()) - } - pub fn size_full(&self) -> usize { - (8 - self.id().leading_zeros() as usize / 8) + self.size() - } -} - -pub fn write_vint(w: &mut impl Write, i: u64) -> Result<()> { - if i > (1 << 56) - 1 { - Err(Error::VarintTooLong)? - } - let len = (64 - i.leading_zeros() as usize) / 7 + 1; - let mut bytes = i.to_be_bytes(); - let trunc = &mut bytes[(8 - len)..]; - trunc[0] |= 1 << (8 - len); - w.write_all(trunc)?; - Ok(()) -} - -/// this routine works only, if the varint is as small as it can possibly be. -/// thats not always what we do though - see below -pub fn vint_length(v: u64) -> usize { - let mut len = 1; - while len <= 8 { - if v < (1 << ((7 * len) - 1)) { - break; - } - len += 1; - } - len -} -pub fn bad_vint_length(v: u64) -> usize { - match 64 - v.leading_zeros() { - x if x <= 8 => 1, - x if x <= 16 => 2, - x if x <= 32 => 4, - _ => 8, - } -} - -pub trait WriteValue { - /// writes the contents of a tag, including the size but excluding the id. - fn write_to(&self, w: &mut impl Write) -> Result<()>; - fn size(&self) -> usize; -} - -impl WriteValue for i64 { - fn write_to(&self, w: &mut impl Write) -> Result<()> { - match 64 - self.leading_zeros() { - x if x <= 8 => { - w.write_all(&[0x81])?; - w.write_all(&(*self as i8).to_be_bytes())?; - } - x if x <= 16 => { - w.write_all(&[0x82])?; - w.write_all(&(*self as i16).to_be_bytes())?; - } - x if x <= 32 => { - w.write_all(&[0x84])?; - w.write_all(&(*self as i32).to_be_bytes())?; - } - _ => { - w.write_all(&[0x88])?; - w.write_all(&self.to_be_bytes())?; - } - }; - Ok(()) - } - - fn size(&self) -> usize { - 1 + match 64 - self.leading_zeros() { - x if x <= 8 => 1, - x if x <= 16 => 2, - x if x <= 32 => 4, - _ => 8, - } - } -} -impl WriteValue for u64 { - fn write_to(&self, w: &mut impl Write) -> Result<()> { - match 64 - self.leading_zeros() { - x if x <= 8 => { - w.write_all(&[0x81])?; - w.write_all(&(*self as u8).to_be_bytes())?; - } - x if x <= 16 => { - w.write_all(&[0x82])?; - w.write_all(&(*self as u16).to_be_bytes())?; - } - x if x <= 32 => { - w.write_all(&[0x84])?; - w.write_all(&(*self as u32).to_be_bytes())?; - } - _ => { - w.write_all(&[0x88])?; - w.write_all(&self.to_be_bytes())?; - } - }; - Ok(()) - } - fn size(&self) -> usize { - 1 + match 64 - self.leading_zeros() { - x if x <= 8 => 1, - x if x <= 16 => 2, - x if x <= 32 => 4, - _ => 8, - } - } -} -impl WriteValue for f64 { - fn write_to(&self, w: &mut impl Write) -> Result<()> { - w.write_all(&[0x88])?; - w.write_all(&self.to_be_bytes())?; - Ok(()) - } - fn size(&self) -> usize { - 1 + 8 - } -} -impl WriteValue for Vec<u8> { - fn write_to(&self, w: &mut impl Write) -> Result<()> { - write_vint(w, self.len() as u64)?; - w.write_all(self)?; - Ok(()) - } - - fn size(&self) -> usize { - vint_length(self.len() as u64) + self.len() - } -} -impl WriteValue for String { - fn write_to(&self, w: &mut impl Write) -> Result<()> { - let sl = self.as_bytes(); - write_vint(w, sl.len() as u64)?; - w.write_all(sl)?; - Ok(()) - } - - fn size(&self) -> usize { - vint_length(self.len() as u64) + self.len() - } -} -impl WriteValue for EbmlSize { - fn write_to(&self, w: &mut impl Write) -> Result<()> { - match self { - EbmlSize::Exact(s) => write_vint(w, *s as u64)?, - EbmlSize::Unknown => w.write_all(&[0xff])?, - } - Ok(()) - } - fn size(&self) -> usize { - match self { - EbmlSize::Exact(s) => vint_length(*s as u64), - EbmlSize::Unknown => 1, - } - } -} - -impl WriteValue for Master { - fn write_to(&self, w: &mut impl Write) -> Result<()> { - match self { - Master::Start => EbmlSize::Unknown.write_to(w), - Master::End => { - unreachable!() - } - Master::Collected(c) => { - let mut size = 0; - for c in c { - size += c.size_full(); - } - EbmlSize::Exact(size).write_to(w)?; - for c in c { - c.write_full(w)?; - } - Ok(()) - } - } - } - fn size(&self) -> usize { - match self { - Master::Start => EbmlSize::Unknown.size(), - Master::End => unreachable!(), - Master::Collected(c) => { - let mut size = 0; - for c in c { - size += c.size_full(); - } - EbmlSize::Exact(size).size() + size - } - } - } -} - -#[cfg(test)] -mod test { - use crate::{Master, MatroskaTag, WriteValue}; - - #[test] - fn test_int_size() { - let test = |x: i64| { - eprintln!("{x:?}"); - let mut out = Vec::new(); - x.write_to(&mut out).unwrap(); - assert_eq!(out.len(), x.size()) - }; - test(1); - test(2); - test(20); - test(200); - test(2000); - test(20000); - test(200000); - } - - #[test] - fn test_uint_size() { - let test = |x: u64| { - eprintln!("{x:?}"); - let mut out = Vec::new(); - x.write_to(&mut out).unwrap(); - assert_eq!(out.len(), x.size()) - }; - test(1); - test(2); - test(20); - test(200); - test(2000); - test(20000); - test(200000); - } - - #[test] - fn test_string_size() { - let test = |x: &str| { - eprintln!("{x:?}"); - let x = x.to_owned(); - let mut out = Vec::new(); - x.write_to(&mut out).unwrap(); - assert_eq!(out.len(), x.size()) - }; - test(""); - test("x"); - test("wub"); - test("Hello world"); - test("just making sure that"); - test("this is actually working *exactly* how i want it to"); - test("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - } - - #[test] - fn test_vec_size() { - let test = |x: &[u8]| { - eprintln!("{x:?}"); - let x = x.to_owned(); - let mut out = Vec::new(); - x.write_to(&mut out).unwrap(); - assert_eq!(out.len(), x.size()) - }; - test(&[]); - test(&[1]); - test(&[1, 2]); - test(&[23, 4, 4, 23, 4, 234, 232, 4, 234, 23, 1]); - test(&[ - 34, 123, 5, 1, 3, 13, 1, 23, 12, 5, 5, 3, 123, 12, 3, 13, 12, 5, 3, 123, 13, 1, 3, - ]); - } - - #[test] - fn test_master_size() { - let test = |x: Master| { - eprintln!("{x:?}"); - let x = x.to_owned(); - let mut out = Vec::new(); - x.write_to(&mut out).unwrap(); - assert_eq!(out.len(), x.size()) - }; - test(Master::Start); - // test(Master::End); - test(Master::Collected(vec![])); - test(Master::Collected(vec![MatroskaTag::EbmlVersion(1)])); - test(Master::Collected(vec![ - MatroskaTag::EbmlVersion(1), - MatroskaTag::EbmlMaxSizeLength(4), - MatroskaTag::EbmlReadVersion(3), - MatroskaTag::EbmlMaxIdLength(4), - ])); - } -} diff --git a/remuxer/Cargo.toml b/remuxer/Cargo.toml index 98dd86c..24cd9ab 100644 --- a/remuxer/Cargo.toml +++ b/remuxer/Cargo.toml @@ -1,19 +1,18 @@ [package] name = "jellyremuxer" version = "0.1.0" -edition = "2021" +edition = "2024" [dependencies] -jellymatroska = { path = "../matroska" } jellycache = { path = "../cache" } +hex = "0.4.3" -tokio = { version = "1.43.0", features = ["io-util"] } anyhow = "1.0.95" +env_logger = "0.11.8" log = { workspace = true } serde = { version = "1.0.217", features = ["derive"] } bincode = { version = "2.0.0-rc.3", features = ["serde"] } -ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct", features = [ - "bincode", -] } +winter-ebml = { git = "https://codeberg.org/metamuffin/ebml-rs", package = "ebml" } +winter-matroska = { git = "https://codeberg.org/metamuffin/ebml-rs", package = "matroska" } diff --git a/remuxer/src/bin/mkvinfo.rs b/remuxer/src/bin/mkvinfo.rs new file mode 100644 index 0000000..0899245 --- /dev/null +++ b/remuxer/src/bin/mkvinfo.rs @@ -0,0 +1,20 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use anyhow::{Result, anyhow}; +use jellyremuxer::demuxers::{Demuxer, DemuxerNew, matroska::MatroskaDemuxer}; +use std::{env::args, fs::File}; + +fn main() -> Result<()> { + env_logger::init_from_env("LOG"); + let path = args().nth(1).ok_or(anyhow!("first arg is input path"))?; + let file = File::open(path)?; + let mut reader = MatroskaDemuxer::new(Box::new(file)); + + println!("INFO: {:#?}", reader.info()?); + + Ok(()) +} diff --git a/remuxer/src/demuxers/matroska.rs b/remuxer/src/demuxers/matroska.rs new file mode 100644 index 0000000..000970e --- /dev/null +++ b/remuxer/src/demuxers/matroska.rs @@ -0,0 +1,199 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::demuxers::{Demuxer, DemuxerNew, ReadSeek}; +use anyhow::{Context, Result, anyhow, bail}; +use log::debug; +use std::io::{BufReader, Read, Seek, SeekFrom}; +use winter_ebml::{Ebml, EbmlHeader, VintReadExt, read_vint_slice}; +use winter_matroska::{ + Attachments, Chapters, Cluster, Cues, Info, MatroskaFile, SeekHead, Segment, Tags, Tracks, +}; + +pub struct MatroskaDemuxer { + reader: BufReader<Box<dyn ReadSeek>>, + segment_offset: Option<u64>, + seek_head: Option<SeekHead>, +} + +impl DemuxerNew for MatroskaDemuxer { + fn new(reader: Box<dyn ReadSeek>) -> Self { + Self { + reader: BufReader::new(reader), + seek_head: None, + segment_offset: None, + } + } +} +impl MatroskaDemuxer { + pub fn segment_offset(&mut self) -> Result<u64> { + if let Some(s) = self.segment_offset { + return Ok(s); + } + self.reader.seek(SeekFrom::Start(0))?; + + let header_tag = self.reader.read_vint()?; + let header_size = self.reader.read_vint()?; + if header_tag != MatroskaFile::TAG_EBML_HEADER { + bail!("file is not ebml") + } + + let mut header_raw = vec![0u8; header_size as usize]; + self.reader.read_exact(&mut header_raw)?; + let header = EbmlHeader::read(&header_raw).context("parsing ebml header")?; + if !matches!(header.doc_type.as_str(), "matroska" | "webm") { + bail!("file is {:?} but not matroska/webm", header.doc_type) + } + if header.ebml_max_id_length != 4 { + bail!( + "file has invalid EBMLMaxIDLength of {}", + header.ebml_max_id_length + ) + } + if !matches!(header.ebml_max_size_length, 1..=8) { + bail!( + "file has invalid EBMLMaxIDLength of {}", + header.ebml_max_id_length + ) + } + + let segment_tag = self.reader.read_vint()?; + let _segment_size = self.reader.read_vint()?; + if segment_tag != MatroskaFile::TAG_SEGMENT { + bail!("header not followed by segment") + } + + let off = self.reader.stream_position()?; + debug!("segment offset is {off} (0x{off:x})"); + self.segment_offset = Some(off); + Ok(off) + } + + pub fn seek_segment_start(&mut self) -> Result<()> { + let seg_start = self.segment_offset()?; + self.reader.seek(SeekFrom::Start(seg_start))?; + Ok(()) + } + + /// Parse SeekHead at segment start if exists + pub fn seek_head<'a>(&'a mut self) -> Result<Option<&'a SeekHead>> { + if self.seek_head.is_some() { + return Ok(self.seek_head.as_ref()); + } + self.seek_segment_start()?; + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + // TODO skip possible CRC32 tag + if tag != Segment::TAG_SEEK_HEADS { + return Ok(None); + } + + let mut raw = vec![0u8; size as usize]; + self.reader.read_exact(&mut raw)?; + let seek_head = SeekHead::read(&raw).context("parsing seek head")?; + debug!("parsed {seek_head:#?}"); + self.seek_head = Some(seek_head); + Ok(self.seek_head.as_ref()) + } + + /// Seeks to the content of child tag of Segment possibly optimized via SeekHead. Returns the size of the content. + pub fn seek_to_segment_tag(&mut self, search_tag: u64) -> Result<Option<u64>> { + if let Some(seek_head) = self.seek_head()? { + let Some(segment_position) = seek_head + .seeks + .iter() + .find(|s| read_vint_slice(&mut s.id.as_slice()).map_or(false, |x| x == search_tag)) + .map(|s| s.position) + else { + return Ok(None); + }; + let segment_offset = self.segment_offset()?; + self.reader + .seek(SeekFrom::Start(segment_offset + segment_position))?; + + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + if tag != search_tag { + bail!("SeekHead was lying (expected {search_tag:?}, got {tag:x})"); + } + Ok(Some(size)) + } else { + self.seek_segment_start()?; + loop { + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + if tag == search_tag { + break Ok(Some(size)); + } + if tag == Segment::TAG_CLUSTERS { + break Ok(None); + } + self.reader.seek_relative(size as i64)?; + } + } + } + + pub fn read_tag<Tag: Ebml>(&mut self, size: u64) -> Result<Tag> { + let mut buffer = vec![0u8; size as usize]; + self.reader.read_exact(&mut buffer)?; + Ok(Tag::read(&buffer)?) + } + pub fn read_segment_tag<Tag: Ebml>(&mut self, name: &'static str, tag: u64) -> Result<Tag> { + debug!("reading {name:?}"); + let size = self + .seek_to_segment_tag(tag)? + .ok_or(anyhow!("{name} tag missing"))?; + self.read_tag(size) + .context(anyhow!("parsing {name} failed")) + } +} +impl Demuxer for MatroskaDemuxer { + fn info(&mut self) -> Result<Info> { + self.read_segment_tag("Info", Segment::TAG_INFO) + } + fn tracks(&mut self) -> Result<Tracks> { + self.read_segment_tag("Tracks", Segment::TAG_TRACKS) + } + fn chapters(&mut self) -> Result<Chapters> { + self.read_segment_tag("Chapters", Segment::TAG_CHAPTERS) + } + fn attachments(&mut self) -> Result<Attachments> { + self.read_segment_tag("Attachments", Segment::TAG_ATTACHMENTS) + } + fn tags(&mut self) -> Result<Tags> { + self.read_segment_tag("Tags", Segment::TAG_TAGS) + } + fn cues(&mut self) -> Result<Cues> { + self.read_segment_tag("Cues", Segment::TAG_CUES) + } + + fn seek_cluster(&mut self, position: Option<u64>) -> Result<()> { + if let Some(pos) = position { + self.reader.seek(SeekFrom::Start(pos))?; + } else { + self.seek_to_segment_tag(Segment::TAG_CLUSTERS)?; + } + Ok(()) + } + fn read_cluster(&mut self) -> Result<Option<(u64, Cluster)>> { + loop { + let position = self.reader.stream_position()?; + // TODO handle eof + let tag = self.reader.read_vint()?; + let size = self.reader.read_vint()?; + if tag != Segment::TAG_CLUSTERS { + self.reader.seek_relative(size as i64)?; + continue; + } + + let mut buffer = vec![0u8; size as usize]; + self.reader.read_exact(&mut buffer)?; + let cluster = Cluster::read(&buffer).context("parsing Cluster")?; + + break Ok(Some((position, cluster))); + } + } +} diff --git a/remuxer/src/demuxers/mod.rs b/remuxer/src/demuxers/mod.rs new file mode 100644 index 0000000..e47e3d7 --- /dev/null +++ b/remuxer/src/demuxers/mod.rs @@ -0,0 +1,29 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +pub mod matroska; + +use anyhow::Result; +use std::io::{Read, Seek}; +use winter_matroska::{Attachments, Chapters, Cluster, Cues, Info, Tags, Tracks}; + +pub trait ReadSeek: Read + Seek {} +impl<T: Read + Seek> ReadSeek for T {} + +pub trait DemuxerNew: Demuxer + Sized { + fn new(reader: Box<dyn ReadSeek>) -> Self; +} +pub trait Demuxer { + fn info(&mut self) -> Result<Info>; + fn tracks(&mut self) -> Result<Tracks>; + fn chapters(&mut self) -> Result<Chapters>; + fn attachments(&mut self) -> Result<Attachments>; + fn tags(&mut self) -> Result<Tags>; + fn cues(&mut self) -> Result<Cues>; + + fn seek_cluster(&mut self, position: Option<u64>) -> Result<()>; + fn read_cluster(&mut self) -> Result<Option<(u64, Cluster)>>; +} diff --git a/remuxer/src/extract.rs b/remuxer/src/extract.rs deleted file mode 100644 index 15c1e9d..0000000 --- a/remuxer/src/extract.rs +++ /dev/null @@ -1,51 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use crate::seek_index::get_seek_index; -use anyhow::{anyhow, bail}; -use jellymatroska::{block::Block, read::EbmlReader, Master, MatroskaTag}; -use log::debug; -use std::{fs::File, io::BufReader, path::PathBuf}; - -pub type TrackExtract = Vec<(u64, Option<u64>, Vec<u8>)>; -pub fn extract_track(path: PathBuf, track: u64) -> anyhow::Result<TrackExtract> { - let file = File::open(&path)?; - let mut reader = EbmlReader::new(BufReader::new(file)); - let index = get_seek_index(&path)?; - let index = index.get(&track).ok_or(anyhow!("track missing"))?; - - let mut out = Vec::new(); - for b in &index.blocks { - reader.seek(b.source_off, MatroskaTag::BlockGroup(Master::Start))?; - let (duration, block) = read_group(&mut reader)?; - assert_eq!(track, block.track, "seek index is wrong"); - out.push((b.pts, duration, block.data)) - } - Ok(out) -} - -pub fn read_group(segment: &mut EbmlReader) -> anyhow::Result<(Option<u64>, Block)> { - let (mut dur, mut block) = (None, None); - for _ in 0..10 { - let (_, item) = segment.next().ok_or(anyhow!("eof"))??; - match item { - MatroskaTag::Void(_) => (), - MatroskaTag::Crc32(_) => (), - MatroskaTag::Cluster(_) => bail!("unexpected cluster"), - MatroskaTag::Timestamp(_) => (), - MatroskaTag::SimpleBlock(block) => { - return Ok((None, block)); // HDMV/PGS does not use duration?! - } - MatroskaTag::BlockGroup(Master::Start) => (), - MatroskaTag::BlockGroup(Master::End) => return Ok((dur, block.unwrap())), - MatroskaTag::BlockDuration(duration) => dur = Some(duration), - MatroskaTag::Block(blk) => block = Some(blk), - MatroskaTag::Cues(_) => bail!("reached cues, this is the end"), - MatroskaTag::Segment(Master::End) => bail!("extractor reached segment end"), - _ => debug!("(rs) tag ignored: {item:?}"), - } - } - bail!(".") -} diff --git a/remuxer/src/fragment.rs b/remuxer/src/fragment.rs deleted file mode 100644 index 45a671f..0000000 --- a/remuxer/src/fragment.rs +++ /dev/null @@ -1,219 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ - -use crate::{ - ebml_header, ebml_segment_info, ebml_track_entry, - metadata::{matroska_metadata, MatroskaMetadata}, - seek_index::get_seek_index, - segment_extractor::SegmentExtractIter, -}; -use anyhow::{anyhow, Context, Result}; -use jellymatroska::{read::EbmlReader, write::EbmlWriter, Master, MatroskaTag}; -use log::{debug, info}; -use std::{ - fs::File, - io::{BufReader, BufWriter, Write}, - ops::Range, - path::Path, -}; - -const FRAGMENT_LENGTH: f64 = 4.; - -pub fn fragment_index(path: &Path, track: u64) -> Result<Vec<Range<f64>>> { - let meta = matroska_metadata(path)?; - let duration = media_duration(&meta); - let force_kf = meta - .as_ref() - .tracks - .as_ref() - .unwrap() - .entries - .iter() - .find(|t| t.track_number == track) - .unwrap() - .track_type - == 17; - - let index = get_seek_index(path)?; - let index = index - .get(&track) - .ok_or(anyhow!("seek index track missing"))?; - - let n_kf = if force_kf { - index.blocks.len() - } else { - index.keyframes.len() - }; - - let average_kf_interval = duration / n_kf as f64; - let kf_per_frag = (FRAGMENT_LENGTH / average_kf_interval).ceil() as usize; - debug!("average keyframe interval: {average_kf_interval}"); - debug!(" => keyframes per frag {kf_per_frag}"); - - let n_frags = n_kf.div_ceil(kf_per_frag); - Ok((0..n_frags) - .map(|i| { - let start = index.blocks[if force_kf { - i * kf_per_frag - } else { - index.keyframes[i * kf_per_frag] - }] - .pts as f64 - / 1000.; - let end = if force_kf { - let n = (i + 1) * kf_per_frag; - if n >= index.blocks.len() { - None - } else { - Some(n) - } - } else { - index.keyframes.get((i + 1) * kf_per_frag).copied() - } - .map(|i| index.blocks[i].pts as f64 / 1000.) - .unwrap_or(duration); - start..end - }) - .collect()) -} - -pub fn write_fragment_into( - writer: impl Write, - path: &Path, - track: u64, - webm: bool, - title: &str, - n: usize, -) -> anyhow::Result<()> { - let meta = matroska_metadata(path)?; - let duration = media_duration(&meta); - let track_meta = meta - .as_ref() - .tracks - .as_ref() - .unwrap() - .entries - .iter() - .find(|t| t.track_number == track) - .unwrap(); - let force_kf = track_meta.track_type == 17; - - info!("writing fragment {n} of {:?} (track {track})", title); - let mut output = EbmlWriter::new(BufWriter::new(writer), 0); - let mapped = 1; - info!("\t- {track} {path:?} ({} => {mapped})", track); - // info!("\t {}", info); - let file = File::open(path).context("opening source file")?; - let index = get_seek_index(path)?; - let index = index - .get(&track) - .ok_or(anyhow!("track missing 2"))? - .to_owned(); - debug!("\t seek index: {} blocks loaded", index.blocks.len()); - let mut reader = EbmlReader::new(BufReader::new(file)); - - let n_kf = if force_kf { - index.blocks.len() - } else { - index.keyframes.len() - }; - debug!("{duration} {n_kf}"); - let average_kf_interval = duration / n_kf as f64; - let kf_per_frag = (FRAGMENT_LENGTH / average_kf_interval).ceil() as usize; - debug!("average keyframe interval: {average_kf_interval}"); - debug!(" => keyframes per frag {kf_per_frag}"); - - let (start_block_index, end_block_index) = if force_kf { - (n * kf_per_frag, (n + 1) * kf_per_frag) - } else { - ( - *index - .keyframes - .get(n * kf_per_frag) - .ok_or(anyhow!("fragment index out of range"))?, - *index - .keyframes - .get((n + 1) * kf_per_frag) - .unwrap_or(&index.blocks.len()), - ) - }; - debug!("writing blocks {start_block_index} to {end_block_index}."); - - let start_block = &index.blocks[start_block_index]; - let last_block_pts = index - .blocks - .get(end_block_index) - .map(|b| b.pts) - .unwrap_or((duration * 1000.) as u64); - - output.write_tag(&ebml_header(webm))?; - output.write_tag(&MatroskaTag::Segment(Master::Start))?; - output.write_tag(&ebml_segment_info( - title.to_string(), - (last_block_pts - start_block.pts) as f64 / 1000., - ))?; - output.write_tag(&MatroskaTag::Tracks(Master::Collected(vec![ - ebml_track_entry(mapped, track_meta), - ])))?; - - reader.seek(start_block.source_off, MatroskaTag::Cluster(Master::Start))?; - let mut reader = SegmentExtractIter::new(&mut reader, track); - - { - // TODO this one caused fragments to get dropped by MSE for no reason - // for i in start_block_index..end_block_index { - // let index_block = &index.blocks[i]; - // let (mut block, duration) = reader.next()?; - - // assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); - - // block.track = 1; - // block.timestamp_off = 0; - // output.write_tag(&MatroskaTag::Cluster(Master::Collected(vec![ - // MatroskaTag::Timestamp(index_block.pts - start_block.pts), - // if let Some(duration) = duration { - // MatroskaTag::BlockGroup(Master::Collected(vec![ - // MatroskaTag::BlockDuration(duration), - // MatroskaTag::Block(block), - // ])) - // } else { - // MatroskaTag::SimpleBlock(block) - // }, - // ])))?; - // } - } - { - let mut blocks = vec![MatroskaTag::Timestamp(start_block.pts)]; - for i in start_block_index..end_block_index { - let index_block = &index.blocks[i]; - let (mut block, duration) = reader.next_block()?; - - assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); - - block.track = 1; - // TODO this does generate overflows sometimes - block.timestamp_off = (index_block.pts as i64 - start_block.pts as i64) - .try_into() - .unwrap(); - if let Some(duration) = duration { - blocks.push(MatroskaTag::BlockGroup(Master::Collected(vec![ - MatroskaTag::BlockDuration(duration), - MatroskaTag::Block(block), - ]))) - } else { - blocks.push(MatroskaTag::SimpleBlock(block)) - } - } - output.write_tag(&MatroskaTag::Cluster(Master::Collected(blocks)))?; - } - debug!("wrote {} bytes", output.position()); - Ok(()) -} - -fn media_duration(m: &MatroskaMetadata) -> f64 { - let info = m.info.as_ref().unwrap(); - (info.duration.unwrap_or_default() * info.timestamp_scale as f64) / 1_000_000_000. -} diff --git a/remuxer/src/lib.rs b/remuxer/src/lib.rs index bb732d7..041f386 100644 --- a/remuxer/src/lib.rs +++ b/remuxer/src/lib.rs @@ -3,100 +3,12 @@ which is licensed under the GNU Affero General Public License (version 3); see /COPYING. Copyright (C) 2025 metamuffin <metamuffin.org> */ -#![feature(random, exit_status_error)] -pub mod extract; -pub mod fragment; -pub mod matroska_to_mpeg4; -pub mod matroska_to_webm; -pub mod metadata; -pub mod remux; -pub mod seek_index; -pub mod segment_extractor; -pub mod trim_writer; -use ebml_struct::matroska::TrackEntry; -pub use fragment::write_fragment_into; -use jellymatroska::{Master, MatroskaTag}; -pub use matroska_to_mpeg4::matroska_to_mpeg4; -pub use remux::remux_stream_into; +pub mod demuxers; +pub mod magic; -pub fn ebml_header(webm: bool) -> MatroskaTag { - MatroskaTag::Ebml(Master::Collected(vec![ - MatroskaTag::EbmlVersion(1), - MatroskaTag::EbmlReadVersion(1), - MatroskaTag::EbmlMaxIdLength(4), - MatroskaTag::EbmlMaxSizeLength(8), - MatroskaTag::DocType(if webm { - "webm".to_string() - } else { - "matroska".to_string() - }), - MatroskaTag::DocTypeVersion(4), - MatroskaTag::DocTypeReadVersion(2), - ])) -} -pub fn ebml_segment_info(title: String, duration: f64) -> MatroskaTag { - MatroskaTag::Info(Master::Collected(vec![ - MatroskaTag::TimestampScale(1_000_000), - MatroskaTag::Duration(duration * 1000.0), - MatroskaTag::Title(title), - MatroskaTag::MuxingApp("jellyremux".to_string()), - MatroskaTag::WritingApp("jellything".to_string()), - ])) -} - -pub fn ebml_track_entry(number: u64, track: &TrackEntry) -> MatroskaTag { - let mut els = vec![ - MatroskaTag::TrackNumber(number), - MatroskaTag::TrackUID(number * 100), // TODO is this ok? - MatroskaTag::FlagLacing(track.flag_lacing), - MatroskaTag::Language(track.language.clone()), - MatroskaTag::CodecID(track.codec_id.clone()), - MatroskaTag::CodecDelay(track.codec_delay), - MatroskaTag::SeekPreRoll(track.seek_pre_roll), - ]; - if let Some(d) = &track.default_duration { - els.push(MatroskaTag::DefaultDuration(*d)); - } - match track.track_type { - 1 => { - let video = track.video.as_ref().unwrap(); - els.push(MatroskaTag::TrackType(1)); - let mut props = vec![ - MatroskaTag::PixelWidth(video.pixel_width), - MatroskaTag::PixelHeight(video.pixel_height), - ]; - props.push(MatroskaTag::DisplayWidth( - video.display_width.unwrap_or(video.pixel_width), - )); - props.push(MatroskaTag::DisplayHeight( - video.display_height.unwrap_or(video.pixel_height), - )); - props.push(MatroskaTag::DisplayUnit(video.display_unit)); - if let Some(fps) = video.frame_rate { - props.push(MatroskaTag::FrameRate(fps)) - } - els.push(MatroskaTag::Video(Master::Collected(props))) - } - 2 => { - let audio = track.audio.as_ref().unwrap(); - els.push(MatroskaTag::TrackType(2)); - let mut props = vec![ - MatroskaTag::SamplingFrequency(audio.sampling_frequency), - MatroskaTag::Channels(audio.channels), - ]; - if let Some(bit_depth) = audio.bit_depth { - props.push(MatroskaTag::BitDepth(bit_depth)); - } - els.push(MatroskaTag::Audio(Master::Collected(props))); - } - 17 => { - els.push(MatroskaTag::TrackType(17)); - } - _ => unreachable!(), - } - if let Some(d) = &track.codec_private { - els.push(MatroskaTag::CodecPrivate(d.clone())); - } - MatroskaTag::TrackEntry(Master::Collected(els)) +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ContainerFormat { + Matroska, + Webm, } diff --git a/remuxer/src/magic.rs b/remuxer/src/magic.rs new file mode 100644 index 0000000..65ab4de --- /dev/null +++ b/remuxer/src/magic.rs @@ -0,0 +1,67 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::ContainerFormat; +use anyhow::Result; +use std::io::Read; +use winter_ebml::{Ebml, EbmlHeader, read_vint_slice}; +use winter_matroska::MatroskaFile; + +pub fn detect_container_format(reader: &mut dyn Read) -> Result<Option<ContainerFormat>> { + let mut data = Vec::new(); + reader.take(128).read_to_end(&mut data)?; + Ok(test_matroska(&data)) +} + +fn test_matroska(mut data: &[u8]) -> Option<ContainerFormat> { + let tag = read_vint_slice(&mut data)?; + if tag != MatroskaFile::TAG_EBML_HEADER { + return None; + }; + let size = read_vint_slice(&mut data)? as usize; + if size > data.len() { + return None; + } + let header = EbmlHeader::read(&data[..size]).ok()?; + match header.doc_type.as_str() { + "matroska" => Some(ContainerFormat::Matroska), + "webm" => Some(ContainerFormat::Webm), + _ => None, + } +} + +#[test] +fn verify_matroska() { + // WebM + let sample = "\ +1a45dfa39f4286810142f7810142f2810442f381084282847765626d4287\ +8104428581021853806701000000088a9c1a114d9b74bc4dbb8b53ab8415\ +49a96653ac81a14dbb8b53ab841654ae6b53ac81d64dbb8c53ab841254c3\ +6753ac8201a04dbb8e53ab841c53bb6b53ac84088a9accec010000000000\ +0057000000000000"; + let sample = hex::decode(sample).unwrap(); + assert_eq!(test_matroska(&sample), Some(ContainerFormat::Webm)); + + // Matroska + let sample = "\ +1a45dfa3a34286810142f7810142f2810442f381084282886d6174726f73\ +6b61428781044285810218538067010000005d66b4a2114d9b74c2bf8492\ +1ae3e14dbb8b53ab841549a96653ac81a14dbb8b53ab841654ae6b53ac81\ +ef4dbb8c53ab841254c36753ac82019b4dbb8e53ab841c53bb6b53ac845d\ +66a14aec01000000"; + let sample = hex::decode(sample).unwrap(); + assert_eq!(test_matroska(&sample), Some(ContainerFormat::Matroska)); + + // GIF + let sample = "\ +47494638396100010001f71f000000002400004800006c0000900000b400\ +00d80000fc00000024002424004824006c2400902400b42400d82400fc24\ +000048002448004848006c4800904800b44800d84800fc4800006c00246c\ +00486c006c6c00906c00b46c00d86c00fc6c000090002490004890006c90\ +00909000b49000d8"; + let sample = hex::decode(sample).unwrap(); + assert_eq!(test_matroska(&sample), None) +} diff --git a/remuxer/src/matroska_to_mpeg4.rs b/remuxer/src/matroska_to_mpeg4.rs deleted file mode 100644 index cc0b967..0000000 --- a/remuxer/src/matroska_to_mpeg4.rs +++ /dev/null @@ -1,36 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use anyhow::Result; -use std::{ - fs::{remove_file, File}, - io::{copy, Read, Write}, - process::{Command, Stdio}, - random::random, -}; - -pub fn matroska_to_mpeg4( - mut input: impl Read + Send + 'static, - mut output: impl Write, -) -> Result<()> { - let path = format!("/tmp/jellything-tc-hack-{:016x}", random::<u64>(..)); - let args = format!( - "-hide_banner -loglevel warning -f matroska -i pipe:0 -c copy -f mp4 -movflags frag_keyframe+empty_moov {path}" - ); - let mut child = Command::new("ffmpeg") - .args(args.split(" ")) - .stdin(Stdio::piped()) - .stderr(Stdio::inherit()) - .spawn()?; - - let mut stdin = child.stdin.take().unwrap(); - copy(&mut input, &mut stdin)?; - drop(stdin); - child.wait()?.exit_ok()?; - copy(&mut File::open(&path)?, &mut output)?; - remove_file(path)?; - - Ok(()) -} diff --git a/remuxer/src/matroska_to_webm.rs b/remuxer/src/matroska_to_webm.rs deleted file mode 100644 index b77062b..0000000 --- a/remuxer/src/matroska_to_webm.rs +++ /dev/null @@ -1,89 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use crate::ebml_track_entry; -use anyhow::Context; -use ebml_struct::{ - ids::*, - matroska::{Cluster, Ebml, Info, Tracks}, - read::{EbmlReadExt, TagRead}, - write::TagWrite, -}; -use jellymatroska::{read::EbmlReader, write::EbmlWriter, Master, MatroskaTag}; -use log::warn; -use std::io::{BufReader, BufWriter, ErrorKind, Read, Seek, Write}; - -pub fn matroska_to_webm( - input: impl Read + Seek + 'static, - output: impl Write, -) -> anyhow::Result<()> { - let mut output = EbmlWriter::new(BufWriter::new(output), 0); - let mut input = EbmlReader::new(BufReader::new(input)); - - Ebml { - ebml_version: 1, - ebml_read_version: 1, - ebml_max_id_length: 4, - ebml_max_size_length: 8, - doc_type: "webm".to_string(), - doc_type_version: 4, - doc_type_read_version: 2, - doc_type_extensions: vec![], - } - .write(&mut output)?; - output.write_tag(&MatroskaTag::Segment(Master::Start))?; - - let (x, mut ebml) = input.read_tag()?; - assert_eq!(x, EL_EBML); - let ebml = Ebml::read(&mut ebml).unwrap(); - assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm"); - let (x, mut segment) = input.read_tag()?; - assert_eq!(x, EL_SEGMENT); - - loop { - let (x, mut seg) = match segment.read_tag() { - Ok(o) => o, - Err(e) if e.kind() == ErrorKind::UnexpectedEof => break, - Err(e) => return Err(e.into()), - }; - match x { - EL_INFO => { - let info = Info::read(&mut seg).context("info")?; - output.write_tag(&{ - MatroskaTag::Info(Master::Collected(vec![ - MatroskaTag::TimestampScale(info.timestamp_scale), - MatroskaTag::Duration(info.duration.unwrap_or_default()), - MatroskaTag::Title(info.title.unwrap_or_default()), - MatroskaTag::MuxingApp("jellyremux".to_string()), - MatroskaTag::WritingApp("jellything".to_string()), - ])) - })?; - } - EL_TRACKS => { - let tracks = Tracks::read(&mut seg).context("tracks")?; - output.write_tag(&MatroskaTag::Tracks(Master::Collected( - tracks - .entries - .into_iter() - .map(|t| ebml_track_entry(t.track_number, &t)) - .collect(), - )))?; - } - EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD | EL_ATTACHMENTS | EL_TAGS => { - seg.consume()?; - } - EL_CLUSTER => { - let cluster = Cluster::read(&mut seg).context("cluster")?; - // TODO mixing both ebml libraries :))) - cluster.write(&mut output)?; - } - id => { - warn!("unknown top-level element {id:x}"); - seg.consume()?; - } - } - } - Ok(()) -} diff --git a/remuxer/src/metadata.rs b/remuxer/src/metadata.rs deleted file mode 100644 index ddcf4c0..0000000 --- a/remuxer/src/metadata.rs +++ /dev/null @@ -1,112 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use anyhow::{Context, Result}; -use bincode::{Decode, Encode}; -use ebml_struct::{ - ids::*, - matroska::*, - read::{EbmlReadExt, TagRead}, -}; -use jellycache::{cache_file, cache_memory, CachePath}; -use log::{info, warn}; -use std::{ - fs::File, - io::{BufReader, ErrorKind, Read, Write}, - path::Path, - sync::Arc, -}; - -pub use ebml_struct::matroska::TrackEntry as MatroskaTrackEntry; - -#[derive(Debug, Encode, Decode, Clone)] -pub struct MatroskaMetadata { - pub info: Option<Info>, - pub tracks: Option<Tracks>, - pub cover: Option<CachePath>, - pub chapters: Option<Chapters>, - pub tags: Option<Tags>, - pub infojson: Option<Vec<u8>>, -} -pub fn checked_matroska_metadata(path: &Path) -> Result<Arc<Option<MatroskaMetadata>>> { - cache_memory("mkmeta-check-v1", path, || { - let mut magic = [0; 4]; - File::open(path)?.read_exact(&mut magic).ok(); - if !matches!(magic, [0x1A, 0x45, 0xDF, 0xA3]) { - return Ok(None); - } - Ok(Some((*matroska_metadata(path)?).clone())) - }) -} -pub fn matroska_metadata(path: &Path) -> Result<Arc<MatroskaMetadata>> { - cache_memory("mkmeta-v3", path, || { - info!("reading {path:?}"); - let mut file = BufReader::new(File::open(path)?); - let mut file = file.by_ref().take(u64::MAX); - - let (x, mut ebml) = file.read_tag()?; - assert_eq!(x, EL_EBML); - let ebml = Ebml::read(&mut ebml).unwrap(); - assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm"); - let (x, mut segment) = file.read_tag()?; - assert_eq!(x, EL_SEGMENT); - - let mut info = None; - let mut infojson = None; - let mut tracks = None; - let mut cover = None; - let mut chapters = None; - let mut tags = None; - loop { - let (x, mut seg) = match segment.read_tag() { - Ok(o) => o, - Err(e) if e.kind() == ErrorKind::UnexpectedEof => break, - Err(e) => return Err(e.into()), - }; - match x { - EL_INFO => info = Some(Info::read(&mut seg).context("info")?), - EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?), - EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?), - EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?), - EL_ATTACHMENTS => { - let attachments = Attachments::read(&mut seg).context("attachments")?; - for f in attachments.files { - match f.name.as_str() { - "info.json" => { - infojson = Some(f.data); - } - "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg" - | "cover.avif" => { - cover = Some(cache_file("att-cover", path, move |mut file| { - file.write_all(&f.data)?; - Ok(()) - })?) - } - _ => (), - } - } - } - EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => { - seg.consume()?; - } - EL_CLUSTER => { - break; - } - id => { - warn!("unknown top-level element {id:x}"); - seg.consume()?; - } - } - } - Ok(MatroskaMetadata { - chapters, - cover, - info, - infojson, - tags, - tracks, - }) - }) -} diff --git a/remuxer/src/remux.rs b/remuxer/src/remux.rs deleted file mode 100644 index 9e6d4b5..0000000 --- a/remuxer/src/remux.rs +++ /dev/null @@ -1,311 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use std::{io::Write, ops::Range, path::PathBuf}; - -// struct ClusterLayout { -// position: usize, -// timestamp: u64, -// source_offsets: Vec<Option<u64>>, -// blocks: Vec<(usize, BlockIndex)>, -// } - -pub fn remux_stream_into( - _writer: impl Write, - _range: Range<usize>, - _path_base: PathBuf, - _selection: Vec<usize>, - _webm: bool, -) -> anyhow::Result<()> { - // info!("remuxing {:?} to have tracks {selection:?}", item.title); - // let writer = TrimWriter::new(BufWriter::new(writer), range.clone()); - // let mut output = EbmlWriter::new(writer, 0); - - // struct ReaderC { - // info: SourceTrack, - // reader: EbmlReader, - // mapped: u64, - // index: Arc<SeekIndex>, - // source_track_index: usize, - // codec_private: Option<Vec<u8>>, - // layouting_progress_index: usize, - // } - - // let timing_cp = Instant::now(); - - // let mut inputs = selection - // .iter() - // .enumerate() - // .map(|(index, sel)| { - // let info = item - // .media - // .as_ref() - // .unwrap() - // .tracks - // .get(*sel) - // .ok_or(anyhow!("track not available"))? - // .to_owned(); - // let source_path = path_base.join(&private.path); - // let mapped = index as u64 + 1; - // info!("\t- {sel} {source_path:?} ({} => {mapped})", private.track); - // info!("\t {}", info); - // let file = File::open(&source_path).context("opening source file")?; - // let index = get_seek_index(&source_path)?; - // let index = index - // .get(&(private.track as u64)) - // .ok_or(anyhow!("track missing 3"))? - // .to_owned(); - // debug!("\t seek index: {} blocks loaded", index.blocks.len()); - // let reader = EbmlReader::new(BufReader::new(file)); - // Ok(ReaderC { - // index, - // reader, - // info, - // mapped, - // source_track_index: private.track, - // codec_private: private.codec_private.clone(), - // layouting_progress_index: 0, - // }) - // }) - // .collect::<anyhow::Result<Vec<_>>>()?; - - // info!("(perf) prepare inputs: {:?}", Instant::now() - timing_cp); - // let timing_cp = Instant::now(); - - // output.write_tag(&ebml_header(webm))?; - - // output.write_tag(&MatroskaTag::Segment(Master::Start))?; - // let segment_offset = output.position(); - - // output.write_tag(&MatroskaTag::Info(Master::Collected(vec![ - // MatroskaTag::TimestampScale(1_000_000), - // MatroskaTag::Duration(item.media.as_ref().unwrap().duration * 1000.0), - // MatroskaTag::Title(item.title.clone().unwrap_or_default()), - // MatroskaTag::MuxingApp("jellyremux".to_string()), - // MatroskaTag::WritingApp("jellything".to_string()), - // ])))?; - - // let tracks_header = inputs - // .iter_mut() - // .map(|rc| ebml_track_entry(rc.mapped, rc.mapped, &rc.info, rc.codec_private.take())) - // .collect(); - // output.write_tag(&MatroskaTag::Tracks(Master::Collected(tracks_header)))?; - - // let mut segment_layout: Vec<ClusterLayout> = { - // let mut cluster_pts = 0; - // let mut clusters = vec![]; - // let mut cluster = vec![]; - // let mut source_offsets = vec![None; inputs.len()]; - // let mut gp = 0usize; // cluster position (in the segment) - // let mut p = 0usize; // block position (in the cluster) - // loop { - // let (track, block) = { - // let mut best_block = BlockIndex { - // pts: u64::MAX, - // size: 0, - // source_off: 0, - // }; - // let mut best_track = 0; - // for (i, r) in inputs.iter().enumerate() { - // if let Some(v) = r.index.blocks.get(r.layouting_progress_index) { - // if v.pts < best_block.pts { - // best_block = v.to_owned(); - // best_track = i; - // } - // }; - // } - // (best_track, best_block) - // }; - // inputs[track].layouting_progress_index += 1; - // source_offsets[track].get_or_insert(block.source_off); - // if block.pts > cluster_pts + 1_000 { - // let cluster_content_size = 1 + 1 // timestamp {tag, size} - // + bad_vint_length(cluster_pts) // timestamp tag value - // + p; - // let cluster_size = 4 // tag length - // + vint_length(cluster_content_size as u64) // size varint - // + cluster_content_size; - // clusters.push(ClusterLayout { - // position: gp, // relative to the first cluster - // timestamp: cluster_pts, - // source_offsets, - // blocks: std::mem::take(&mut cluster), - // }); - - // cluster_pts = block.pts; - // source_offsets = vec![None; inputs.len()]; - // gp += cluster_size; - // p = 0; - // } - // if block.pts == u64::MAX { - // break; - // } - - // let simpleblock_size = 1 + 2 + 1 // block {tracknum, pts_off, flags} - // // TODO does not work, if more than 127 tracks are present - // + block.size; // block payload - // p += 1; // simpleblock tag - // p += vint_length(simpleblock_size as u64); // simpleblock size vint - // p += simpleblock_size; - - // cluster.push((track, block)) - // } - // info!("segment layout computed ({} clusters)", clusters.len()); - // clusters - // }; - // info!( - // "(perf) compute segment layout: {:?}", - // Instant::now() - timing_cp - // ); - // let timing_cp = Instant::now(); - - // let max_cue_size = 4 // cues id - // + 8 // cues len - // + ( // cues content - // 1 // cp id - // + 1 // cp len - // + ( // cp content - // 1 // ctime id, - // + 1 // ctime len - // + 8 // ctime content uint - // + ( // ctps - // 1 // ctp id - // + 8 // ctp len - // + (// ctp content - // 1 // ctrack id - // + 1 // ctrack size - // + 1 // ctrack content int - // // TODO this breaks if inputs.len() >= 127 - // + 1 // ccp id - // + 1 // ccp len - // + 8 // ccp content offset - // ) - // ) - // ) * inputs.len() - // ) * segment_layout.len() - // + 1 // void id - // + 8; // void len - - // let first_cluster_offset_predict = max_cue_size + output.position(); - - // // make the cluster position relative to the segment start as they should - // segment_layout - // .iter_mut() - // .for_each(|e| e.position += first_cluster_offset_predict - segment_offset); - - // output.write_tag(&MatroskaTag::Cues(Master::Collected( - // segment_layout - // .iter() - // .map(|cluster| { - // MatroskaTag::CuePoint(Master::Collected( - // Some(MatroskaTag::CueTime(cluster.timestamp)) - // .into_iter() - // // TODO: Subtitles should not have cues for every cluster - // .chain(inputs.iter().map(|i| { - // MatroskaTag::CueTrackPositions(Master::Collected(vec![ - // MatroskaTag::CueTrack(i.mapped), - // MatroskaTag::CueClusterPosition(cluster.position as u64), - // ])) - // })) - // .collect(), - // )) - // }) - // .collect(), - // )))?; - // output.write_padding(first_cluster_offset_predict)?; - // let first_cluster_offset = output.position(); - // assert_eq!(first_cluster_offset, first_cluster_offset_predict); - - // let mut skip = 0; - // // TODO binary search - // for (i, cluster) in segment_layout.iter().enumerate() { - // if (cluster.position + segment_offset) >= range.start { - // break; - // } - // skip = i; - // } - - // if skip != 0 { - // info!("skipping {skip} clusters"); - // output.seek(SeekFrom::Start( - // (segment_layout[skip].position + segment_offset) as u64, - // ))?; - // } - - // struct ReaderD<'a> { - // stream: SegmentExtractIter<'a>, - // mapped: u64, - // } - - // let mut track_readers = inputs - // .iter_mut() - // .enumerate() - // .map(|(i, inp)| { - // inp.reader - // .seek( - // // the seek target might be a hole; we continue until the next cluster of that track. - // // this should be fine since tracks are only read according to segment_layout - // find_first_cluster_with_off(&segment_layout, skip, i) - // .ok_or(anyhow!("cluster hole at eof"))?, - // MatroskaTag::Cluster(Master::Start), // TODO shouldn't this be a child of cluster? - // ) - // .context("seeking in input")?; - // let stream = SegmentExtractIter::new(&mut inp.reader, inp.source_track_index as u64); - - // Ok(ReaderD { - // mapped: inp.mapped, - // stream, - // }) - // }) - // .collect::<anyhow::Result<Vec<_>>>()?; - - // info!("(perf) seek inputs: {:?}", Instant::now() - timing_cp); - - // for (cluster_index, cluster) in segment_layout.into_iter().enumerate().skip(skip) { - // debug!( - // "writing cluster {cluster_index} (pts_base={}) with {} blocks", - // cluster.timestamp, - // cluster.blocks.len() - // ); - // { - // let cue_error = cluster.position as i64 - (output.position() - segment_offset) as i64; - // if cue_error != 0 { - // warn!("calculation was {} bytes off", cue_error); - // } - // } - - // let mut cluster_blocks = vec![MatroskaTag::Timestamp(cluster.timestamp)]; - // for (block_track, index_block) in cluster.blocks { - // let track_reader = &mut track_readers[block_track]; - // // TODO handle duration - // let mut block = track_reader.stream.next_block()?.0; - - // assert_eq!(index_block.size, block.data.len(), "seek index is wrong"); - - // block.track = track_reader.mapped; - // block.timestamp_off = (index_block.pts - cluster.timestamp).try_into().unwrap(); - // trace!("n={} tso={}", block.track, block.timestamp_off); - - // cluster_blocks.push(MatroskaTag::SimpleBlock(block)) - // } - // output.write_tag(&MatroskaTag::Cluster(Master::Collected(cluster_blocks)))?; - // } - // // output.write_tag(&MatroskaTag::Segment(Master::End))?; - // Ok(()) - todo!() -} - -// fn find_first_cluster_with_off( -// segment_layout: &[ClusterLayout], -// skip: usize, -// track: usize, -// ) -> Option<u64> { -// for cluster in segment_layout.iter().skip(skip) { -// if let Some(off) = cluster.source_offsets[track] { -// return Some(off); -// } -// } -// None -// } diff --git a/remuxer/src/seek_index.rs b/remuxer/src/seek_index.rs deleted file mode 100644 index a1a97ef..0000000 --- a/remuxer/src/seek_index.rs +++ /dev/null @@ -1,152 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use anyhow::{Context, Result}; -use bincode::{Decode, Encode}; -use jellycache::cache_memory; -use jellymatroska::{ - block::Block, - read::EbmlReader, - unflatten::{Unflat, Unflatten}, - MatroskaTag, -}; -use log::{debug, info, trace, warn}; -use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path, sync::Arc}; - -#[derive(Debug, Clone, Default, Decode, Encode)] -pub struct SeekIndex { - pub blocks: Vec<BlockIndex>, - pub keyframes: Vec<usize>, -} - -#[derive(Debug, Clone, Decode, Encode)] -pub struct BlockIndex { - pub pts: u64, - // pub duration: Option<u64>, - pub source_off: u64, // points to start of SimpleBlock or BlockGroup (not the Block inside it) - pub size: usize, -} - -pub fn get_seek_index(path: &Path) -> anyhow::Result<Arc<BTreeMap<u64, Arc<SeekIndex>>>> { - cache_memory("seekindex-v1", path, move || { - info!("generating seek index for {path:?}"); - let input = File::open(path).context("opening source file")?; - let mut input = EbmlReader::new(BufReader::new(input)); - let index = import_seek_index(&mut input)?; - info!("done"); - Ok(index.into_iter().map(|(k, v)| (k, Arc::new(v))).collect()) - }) -} - -pub fn get_track_sizes(path: &Path) -> Result<BTreeMap<u64, usize>> { - Ok(get_seek_index(path)? - .iter() - .map(|(k, v)| (*k, v.blocks.iter().map(|b| b.size).sum::<usize>())) - .collect()) -} - -pub fn import_seek_index(input: &mut EbmlReader) -> Result<BTreeMap<u64, SeekIndex>> { - let mut seek_index = BTreeMap::new(); - while let Some(item) = input.next() { - let item = match item { - Ok((_, item)) => item, - Err(e) => { - if !matches!(e, jellymatroska::error::Error::Io(_)) { - warn!("{e}"); - } - break; - } - }; - match item { - MatroskaTag::Segment(_) => { - info!("segment start"); - let mut children = Unflatten::new_with_end(input, item); - import_seek_index_segment(&mut children, &mut seek_index)?; - info!("segment end"); - } - _ => debug!("(r) tag ignored: {item:?}"), - } - } - Ok(seek_index) -} - -fn import_seek_index_segment( - segment: &mut Unflatten, - seek_index: &mut BTreeMap<u64, SeekIndex>, -) -> Result<()> { - while let Some(Ok(Unflat { children, item, .. })) = segment.n() { - match item { - MatroskaTag::SeekHead(_) => {} - MatroskaTag::Info(_) => {} - MatroskaTag::Tags(_) => {} - MatroskaTag::Cues(_) => {} - MatroskaTag::Chapters(_) => {} - MatroskaTag::Tracks(_) => {} - MatroskaTag::Void(_) => {} - MatroskaTag::Cluster(_) => { - let mut children = children.unwrap(); - let mut pts = 0; - while let Some(Ok(Unflat { - children, - item, - position, - })) = children.n() - { - match item { - MatroskaTag::Timestamp(ts) => pts = ts, - MatroskaTag::BlockGroup(_) => { - trace!("group"); - let mut children = children.unwrap(); - while let Some(Ok(Unflat { - children: _, item, .. - })) = children.n() - { - match item { - MatroskaTag::Block(ref block) => { - debug!( - "block: track={} tso={}", - block.track, block.timestamp_off - ); - seek_index_add(seek_index, block, position.unwrap(), pts); - } - _ => trace!("{item:?}"), - } - } - } - MatroskaTag::SimpleBlock(block) => { - trace!( - "simple block: track={} tso={}", - block.track, - block.timestamp_off - ); - trace!("{pts} {}", block.timestamp_off); - seek_index_add(seek_index, &block, position.unwrap(), pts); - } - _ => trace!("(rsc) tag ignored: {item:?}"), - } - } - } - _ => debug!("(rs) tag ignored: {item:?}"), - }; - } - Ok(()) -} - -fn seek_index_add( - seek_index: &mut BTreeMap<u64, SeekIndex>, - block: &Block, - position: u64, - pts_base: u64, -) { - let trs = seek_index.entry(block.track).or_default(); - if block.flags.keyframe() { - trs.keyframes.push(trs.blocks.len()); - } - trs.blocks.push(BlockIndex { - pts: (pts_base as i64 + block.timestamp_off as i64) as u64, - source_off: position, - size: block.data.len(), - }); -} diff --git a/remuxer/src/segment_extractor.rs b/remuxer/src/segment_extractor.rs deleted file mode 100644 index 42c85f5..0000000 --- a/remuxer/src/segment_extractor.rs +++ /dev/null @@ -1,60 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use anyhow::{anyhow, bail, Result}; -use jellymatroska::{block::Block, read::EbmlReader, Master, MatroskaTag}; -use log::{debug, info, trace}; - -pub struct SegmentExtractIter<'a> { - segment: &'a mut EbmlReader, - extract: u64, -} - -impl<'a> SegmentExtractIter<'a> { - pub fn new(segment: &'a mut EbmlReader, extract: u64) -> Self { - Self { segment, extract } - } - - /// Returns the next block and sometimes its duration too. - pub fn next_block(&mut self) -> Result<(Block, Option<u64>)> { - let mut duration = None; - let mut group = false; - let mut saved_block = None; - loop { - let (_, item) = self.segment.next().ok_or(anyhow!("eof"))??; - match item { - MatroskaTag::Void(_) => (), - MatroskaTag::Crc32(_) => (), - MatroskaTag::Cluster(_) => (), - MatroskaTag::Timestamp(_) => (), - MatroskaTag::BlockGroup(Master::Start) => group = true, - MatroskaTag::BlockGroup(Master::End) => { - if !group { - bail!("group end without start"); - } - if let Some(block) = saved_block { - return Ok((block, duration)); - } - group = false; - } - MatroskaTag::BlockDuration(d) => duration = Some(d), - MatroskaTag::SimpleBlock(block) | MatroskaTag::Block(block) => { - if block.track == self.extract { - trace!("block: track={} tso={}", block.track, block.timestamp_off); - if group { - // can't return yet; there might be a BlockDuration coming - saved_block = Some(block); - } else { - return Ok((block, duration)); - } - } - } - MatroskaTag::Cues(_) => bail!("reached cues, this is the end"), - MatroskaTag::Segment(Master::End) => info!("extractor reached segment end"), - _ => debug!("(rs) tag ignored: {item:?}"), - } - } - } -} diff --git a/remuxer/src/trim_writer.rs b/remuxer/src/trim_writer.rs deleted file mode 100644 index 2c1b7ed..0000000 --- a/remuxer/src/trim_writer.rs +++ /dev/null @@ -1,72 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin <metamuffin.org> -*/ -use anyhow::anyhow; -use log::{trace, warn}; -use std::{ - io::{Seek, Write}, - ops::Range, -}; - -pub struct TrimWriter<W> { - inner: W, - position: usize, - range: Range<usize>, -} -impl<W: Write> TrimWriter<W> { - pub fn new(inner: W, range: Range<usize>) -> Self { - Self { - inner, - range, - position: 0, - } - } -} - -impl<W: Write> Write for TrimWriter<W> { - fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { - let start = self.range.start as isize - self.position as isize; - let end = self.range.end as isize - self.position as isize; - - let start = start.clamp(0, buf.len() as isize) as usize; - let end = end.clamp(0, buf.len() as isize) as usize; - - if self.position >= self.range.end { - return Err(std::io::Error::other(anyhow!("range ended"))); - } - - let tbuf = &buf[start..end]; - Ok(if !tbuf.is_empty() { - trace!("trim={start}..{end} avail={}", buf.len()); - let sz = self.inner.write(tbuf)?; - self.position += sz; - sz - } else { - trace!("skip={}", buf.len()); - buf.len() - }) - } - - fn flush(&mut self) -> std::io::Result<()> { - self.inner.flush() - } -} - -impl<W> Seek for TrimWriter<W> { - fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> { - match pos { - std::io::SeekFrom::Start(s) => self.position = s as usize, - std::io::SeekFrom::End(_) => unimplemented!(), - std::io::SeekFrom::Current(s) => self.position += s as usize, - } - if self.position > self.range.end { - warn!( - "seeked beyond end: pos={} end={}", - self.position, self.range.end - ) - } - Ok(self.position as u64) - } -} diff --git a/transcoder/src/fragment.rs b/transcoder/src/fragment.rs index a71094d..fc5fb21 100644 --- a/transcoder/src/fragment.rs +++ b/transcoder/src/fragment.rs @@ -6,7 +6,6 @@ use crate::{Config, CONF, LOCAL_VIDEO_TRANSCODING_TASKS}; use anyhow::Result; use jellycache::{async_cache_file, CachePath}; -use jellyremuxer::metadata::MatroskaTrackEntry; use jellystream_types::{StreamFormatInfo, TrackKind}; use log::info; use std::fmt::Write; |