From 0fe48987c336b9b50cba09355aa3c1cf11740edc Mon Sep 17 00:00:00 2001 From: metamuffin Date: Tue, 16 Sep 2025 13:40:50 +0200 Subject: started flac demuxer --- Cargo.lock | 31 +- import/src/lib.rs | 628 ++++++++++++++++++++------------------- remuxer/src/bin/mkvinfo.rs | 3 + remuxer/src/demuxers/flac.rs | 250 ++++++++++++++++ remuxer/src/demuxers/matroska.rs | 25 +- remuxer/src/demuxers/mod.rs | 26 +- remuxer/src/lib.rs | 1 + transcoder/Cargo.toml | 1 + transcoder/src/fragment.rs | 1 + 9 files changed, 628 insertions(+), 338 deletions(-) create mode 100644 remuxer/src/demuxers/flac.rs diff --git a/Cargo.lock b/Cargo.lock index a0383b4..9af686d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -862,7 +862,7 @@ checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" [[package]] name = "ebml" version = "0.1.0" -source = "git+https://codeberg.org/metamuffin/ebml-rs#dd98200be401334639e45fd6d89d30af3b73cce7" +source = "git+https://codeberg.org/metamuffin/ebml-rs#8a50865c9733737fffd05f4c088c9e150cf98520" dependencies = [ "ebml-derive", ] @@ -870,7 +870,7 @@ dependencies = [ [[package]] name = "ebml-derive" version = "0.1.0" -source = "git+https://codeberg.org/metamuffin/ebml-rs#dd98200be401334639e45fd6d89d30af3b73cce7" +source = "git+https://codeberg.org/metamuffin/ebml-rs#8a50865c9733737fffd05f4c088c9e150cf98520" dependencies = [ "darling", "quote", @@ -2052,6 +2052,7 @@ dependencies = [ "jellystream-types", "libavif-image", "log", + "matroska", "rav1e", "ravif", "rayon", @@ -2326,7 +2327,7 @@ dependencies = [ [[package]] name = "matroska" version = "0.1.0" -source = "git+https://codeberg.org/metamuffin/ebml-rs#dd98200be401334639e45fd6d89d30af3b73cce7" +source = "git+https://codeberg.org/metamuffin/ebml-rs#8a50865c9733737fffd05f4c088c9e150cf98520" dependencies = [ "ebml", ] @@ -3500,18 +3501,28 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.219" +version = "1.0.221" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "341877e04a22458705eb4e131a1508483c877dca2792b3781d4e5d8a6019ec43" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.221" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "0c459bc0a14c840cb403fc14b148620de1e0778c96ecd6e0c8c3cacb6d8d00fe" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.221" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d6185cf75117e20e62b1ff867b9518577271e58abe0037c40bb4794969355ab0" dependencies = [ "proc-macro2", "quote", @@ -3520,14 +3531,14 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.143" +version = "1.0.144" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +checksum = "56177480b00303e689183f110b4e727bb4211d692c62d4fcd16d02be93077d40" dependencies = [ "itoa", "memchr", "ryu", - "serde", + "serde_core", ] [[package]] diff --git a/import/src/lib.rs b/import/src/lib.rs index 941721e..d62de57 100644 --- a/import/src/lib.rs +++ b/import/src/lib.rs @@ -27,6 +27,7 @@ use jellycommon::{ Person, Rating, SourceTrack, SourceTrackKind, TmdbKind, TrackSource, TraktKind, Visibility, }; use jellyimport_fallback_generator::generate_fallback; +use jellyremuxer::magic::detect_container_format; use log::info; use musicbrainz::MusicBrainz; use rayon::iter::{ParallelBridge, ParallelIterator}; @@ -350,320 +351,323 @@ fn import_media_file( iflags: InheritedFlags, ) -> Result<()> { info!("media file {path:?}"); - let Some(m) = (*checked_matroska_metadata(path)?).to_owned() else { + let mut file = File::open(path)?; + let Some(container) = detect_container_format(&mut file)? else { return Ok(()); }; - let infojson = m - .infojson - .map(|d| serde_json::from_slice::(&d)) - .transpose() - .context("infojson parsing")?; - - let info = m.info.ok_or(anyhow!("no info"))?; - let tracks = m.tracks.ok_or(anyhow!("no tracks"))?; - - let mut tags = m - .tags - .map(|tags| { - tags.tags - .into_iter() - .flat_map(|t| t.simple_tags) - .map(|st| (st.name, st.string.unwrap_or_default())) - .collect::>() - }) - .unwrap_or_default(); - - let filename = path.file_name().unwrap().to_string_lossy().to_string(); - - let mut episode_index = None; - if let Some(cap) = RE_EPISODE_FILENAME.captures(&filename) { - if let Some(episode) = cap.name("episode").map(|m| m.as_str()) { - let season = cap.name("season").map(|m| m.as_str()); - let episode = episode.parse::().context("parse episode num")?; - let season = season - .unwrap_or("1") - .parse::() - .context("parse season num")?; - episode_index = Some((season, episode)) - } - } - - let mut filename_toks = filename.split("."); - let filepath_stem = filename_toks.next().unwrap(); - - let slug = infojson - .as_ref() - // TODO maybe also include the slug after the primary "id" key - .map(|ij| format!("{}-{}", ij.extractor.to_lowercase(), ij.id)) - .unwrap_or_else(|| { - if let Some((s, e)) = episode_index { - format!( - "{}-s{s}e{e}", - make_kebab( - &path - .parent() - .unwrap() - .file_name() - .unwrap_or_default() - .to_string_lossy() - ) - ) - } else { - make_kebab(filepath_stem) - } - }); - - let node = NodeID::from_slug(&slug); - - let meta = path.metadata()?; - let mut eids = BTreeMap::new(); - - for (key, value) in &tags { - match key.as_str() { - "MUSICBRAINZ_TRACKID" => { - eids.insert("musicbrainz.recording".to_string(), value.to_owned()) - } - "MUSICBRAINZ_ARTISTID" => { - eids.insert("musicbrainz.artist".to_string(), value.to_owned()) - } - "MUSICBRAINZ_ALBUMID" => { - eids.insert("musicbrainz.release".to_string(), value.to_owned()) - } - "MUSICBRAINZ_ALBUMARTISTID" => { - eids.insert("musicbrainz.albumartist".to_string(), value.to_owned()) - } - "MUSICBRAINZ_RELEASEGROUPID" => { - eids.insert("musicbrainz.releasegroup".to_string(), value.to_owned()) - } - "ISRC" => eids.insert("isrc".to_string(), value.to_owned()), - "BARCODE" => eids.insert("barcode".to_string(), value.to_owned()), - _ => None, - }; - } - - if iflags.use_acoustid { - let fp = rthandle.block_on(acoustid_fingerprint(path))?; - if let Some((atid, mbid)) = rthandle.block_on( - apis.acoustid - .as_ref() - .ok_or(anyhow!("need acoustid"))? - .get_atid_mbid(&fp), - )? { - eids.insert("acoustid.track".to_string(), atid); - eids.insert("musicbrainz.recording".to_string(), mbid); - }; - } - - let mbrec = eids.get("musicbrainz.recording").cloned(); - - db.update_node_init(node, |node| { - node.slug = slug; - node.title = info.title.or(node.title.clone()); - node.visibility = iflags.visibility; - node.poster = m - .cover - .map(|a| AssetInner::Cache(a).ser()) - .or(node.poster.clone()); - node.description = tags - .remove("DESCRIPTION") - .or(tags.remove("SYNOPSIS")) - .or(node.description.clone()); - node.tagline = tags.remove("COMMENT").or(node.tagline.clone()); - node.parents.insert(parent); - - node.external_ids.extend(eids); - - if let Some(ct) = tags.get("CONTENT_TYPE") { - node.kind = match ct.to_lowercase().trim() { - "movie" | "documentary" | "film" => NodeKind::Movie, - "music" | "recording" => NodeKind::Music, - _ => NodeKind::Unknown, - } - } - - let tracks = tracks - .entries - .into_iter() - .map(|track| SourceTrack { - codec: track.codec_id, - language: track.language, - name: track.name.unwrap_or_default(), - default_duration: track.default_duration, - federated: Vec::new(), - codec_delay: track.codec_delay, - seek_pre_roll: track.seek_pre_roll, - flag_lacing: track.flag_lacing, - kind: if let Some(video) = track.video { - SourceTrackKind::Video { - width: video.pixel_width, - height: video.pixel_height, - display_width: video.display_width, - display_height: video.display_height, - display_unit: video.display_unit, - fps: video.frame_rate, - } - } else if let Some(audio) = track.audio { - SourceTrackKind::Audio { - channels: audio.channels as usize, - sample_rate: audio.sampling_frequency, - bit_depth: audio.bit_depth.map(|r| r as usize), - } - } else { - SourceTrackKind::Subtitle - }, - source: TrackSource::Local( - AssetInner::LocalTrack(LocalTrack { - path: path.to_owned(), - track: track.track_number as usize, - }) - .ser(), - ), - }) - .collect::>(); - - if let Some(infojson) = infojson { - node.kind = if !tracks - .iter() - .any(|t| matches!(t.kind, SourceTrackKind::Video { .. })) - { - NodeKind::Music - } else if infojson.duration.unwrap_or(0.) < 600. - && infojson.aspect_ratio.unwrap_or(2.) < 1. - { - NodeKind::ShortFormVideo - } else { - NodeKind::Video - }; - node.title = Some(infojson.title); - node.subtitle = if infojson.alt_title != node.title { - infojson.alt_title - } else { - None - } - .or(infojson - .uploader - .as_ref() - .map(|u| clean_uploader_name(u).to_owned())) - .or(node.subtitle.clone()); - - node.tags.extend(infojson.tags.unwrap_or_default()); - - if let Some(desc) = infojson.description { - node.description = Some(desc) - } - node.tagline = Some(infojson.webpage_url); - if let Some(date) = &infojson.upload_date { - node.release_date = - Some(infojson::parse_upload_date(date).context("parsing upload date")?); - } - match infojson.extractor.as_str() { - "youtube" => { - node.external_ids - .insert("youtube.video".to_string(), infojson.id); - node.ratings.insert( - Rating::YoutubeViews, - infojson.view_count.unwrap_or_default() as f64, - ); - if let Some(lc) = infojson.like_count { - node.ratings.insert(Rating::YoutubeLikes, lc as f64); - } - } - "Bandcamp" => drop( - node.external_ids - .insert("bandcamp".to_string(), infojson.id), - ), - _ => (), - } - } - - // TODO merge size - node.storage_size = meta.len(); - // TODO merge tracks - node.media = Some(MediaInfo { - chapters: m - .chapters - .clone() - .map(|c| { - let mut chaps = Vec::new(); - if let Some(ee) = c.edition_entries.first() { - for ca in &ee.chapter_atoms { - let mut labels = Vec::new(); - for cd in &ca.displays { - for lang in &cd.languages { - labels.push((lang.to_owned(), cd.string.clone())) - } - } - chaps.push(Chapter { - labels, - time_start: Some(ca.time_start as f64 * 1e-9), - time_end: ca.time_end.map(|ts| ts as f64 * 1e-9), - }) - } - } - chaps - }) - .unwrap_or_default(), - duration: fix_invalid_runtime( - info.duration.unwrap_or_default() * info.timestamp_scale as f64 * 1e-9, - ), - tracks, - }); - - Ok(()) - })?; - - if let Some((season, episode)) = episode_index { - let mut trakt_id = None; - let flagspath = path.parent().unwrap().join("flags"); - if flagspath.exists() { - for flag in read_to_string(flagspath)?.lines() { - if let Some(value) = flag.strip_prefix("trakt-").or(flag.strip_prefix("trakt=")) { - let (kind, id) = value.split_once(":").unwrap_or(("", value)); - if kind == "show" { - trakt_id = Some(id.parse::()?); - } - } - } - } - if let Some(trakt_id) = trakt_id { - let trakt = apis.trakt.as_ref().ok_or(anyhow!("trakt required"))?; - let seasons = rthandle.block_on(trakt.show_seasons(trakt_id))?; - if seasons.iter().any(|x| x.number == season) { - let episodes = rthandle.block_on(trakt.show_season_episodes(trakt_id, season))?; - let mut poster = None; - if let Some(tmdb) = &apis.tmdb { - let trakt_details = - rthandle.block_on(trakt.lookup(TraktKind::Show, trakt_id))?; - if let Some(tmdb_id) = trakt_details.ids.tmdb { - let tmdb_details = - rthandle.block_on(tmdb.episode_details(tmdb_id, season, episode))?; - if let Some(still) = &tmdb_details.still_path { - poster = - Some(AssetInner::Cache(rthandle.block_on(tmdb.image(still))?).ser()) - } - } - } - if let Some(episode) = episodes.get(episode.saturating_sub(1)) { - db.update_node_init(node, |node| { - node.kind = NodeKind::Episode; - node.index = Some(episode.number); - node.title = Some(episode.title.clone()); - node.poster = poster.or(node.poster.clone()); - node.description = episode.overview.clone().or(node.description.clone()); - node.ratings.insert(Rating::Trakt, episode.rating); - Ok(()) - })? - } - } - } - } - - for tok in filename_toks { - apply_node_flag(db, rthandle, apis, node, tok)?; - } - if let Some(mbid) = mbrec { - apply_musicbrainz_recording(db, rthandle, apis, node, mbid)?; - } + eprintln!("{container:?}"); + + // let infojson = m + // .infojson + // .map(|d| serde_json::from_slice::(&d)) + // .transpose() + // .context("infojson parsing")?; + + // let info = m.info.ok_or(anyhow!("no info"))?; + // let tracks = m.tracks.ok_or(anyhow!("no tracks"))?; + + // let mut tags = m + // .tags + // .map(|tags| { + // tags.tags + // .into_iter() + // .flat_map(|t| t.simple_tags) + // .map(|st| (st.name, st.string.unwrap_or_default())) + // .collect::>() + // }) + // .unwrap_or_default(); + + // let filename = path.file_name().unwrap().to_string_lossy().to_string(); + + // let mut episode_index = None; + // if let Some(cap) = RE_EPISODE_FILENAME.captures(&filename) { + // if let Some(episode) = cap.name("episode").map(|m| m.as_str()) { + // let season = cap.name("season").map(|m| m.as_str()); + // let episode = episode.parse::().context("parse episode num")?; + // let season = season + // .unwrap_or("1") + // .parse::() + // .context("parse season num")?; + // episode_index = Some((season, episode)) + // } + // } + + // let mut filename_toks = filename.split("."); + // let filepath_stem = filename_toks.next().unwrap(); + + // let slug = infojson + // .as_ref() + // // TODO maybe also include the slug after the primary "id" key + // .map(|ij| format!("{}-{}", ij.extractor.to_lowercase(), ij.id)) + // .unwrap_or_else(|| { + // if let Some((s, e)) = episode_index { + // format!( + // "{}-s{s}e{e}", + // make_kebab( + // &path + // .parent() + // .unwrap() + // .file_name() + // .unwrap_or_default() + // .to_string_lossy() + // ) + // ) + // } else { + // make_kebab(filepath_stem) + // } + // }); + + // let node = NodeID::from_slug(&slug); + + // let meta = path.metadata()?; + // let mut eids = BTreeMap::new(); + + // for (key, value) in &tags { + // match key.as_str() { + // "MUSICBRAINZ_TRACKID" => { + // eids.insert("musicbrainz.recording".to_string(), value.to_owned()) + // } + // "MUSICBRAINZ_ARTISTID" => { + // eids.insert("musicbrainz.artist".to_string(), value.to_owned()) + // } + // "MUSICBRAINZ_ALBUMID" => { + // eids.insert("musicbrainz.release".to_string(), value.to_owned()) + // } + // "MUSICBRAINZ_ALBUMARTISTID" => { + // eids.insert("musicbrainz.albumartist".to_string(), value.to_owned()) + // } + // "MUSICBRAINZ_RELEASEGROUPID" => { + // eids.insert("musicbrainz.releasegroup".to_string(), value.to_owned()) + // } + // "ISRC" => eids.insert("isrc".to_string(), value.to_owned()), + // "BARCODE" => eids.insert("barcode".to_string(), value.to_owned()), + // _ => None, + // }; + // } + + // if iflags.use_acoustid { + // let fp = rthandle.block_on(acoustid_fingerprint(path))?; + // if let Some((atid, mbid)) = rthandle.block_on( + // apis.acoustid + // .as_ref() + // .ok_or(anyhow!("need acoustid"))? + // .get_atid_mbid(&fp), + // )? { + // eids.insert("acoustid.track".to_string(), atid); + // eids.insert("musicbrainz.recording".to_string(), mbid); + // }; + // } + + // let mbrec = eids.get("musicbrainz.recording").cloned(); + + // db.update_node_init(node, |node| { + // node.slug = slug; + // node.title = info.title.or(node.title.clone()); + // node.visibility = iflags.visibility; + // node.poster = m + // .cover + // .map(|a| AssetInner::Cache(a).ser()) + // .or(node.poster.clone()); + // node.description = tags + // .remove("DESCRIPTION") + // .or(tags.remove("SYNOPSIS")) + // .or(node.description.clone()); + // node.tagline = tags.remove("COMMENT").or(node.tagline.clone()); + // node.parents.insert(parent); + + // node.external_ids.extend(eids); + + // if let Some(ct) = tags.get("CONTENT_TYPE") { + // node.kind = match ct.to_lowercase().trim() { + // "movie" | "documentary" | "film" => NodeKind::Movie, + // "music" | "recording" => NodeKind::Music, + // _ => NodeKind::Unknown, + // } + // } + + // let tracks = tracks + // .entries + // .into_iter() + // .map(|track| SourceTrack { + // codec: track.codec_id, + // language: track.language, + // name: track.name.unwrap_or_default(), + // default_duration: track.default_duration, + // federated: Vec::new(), + // codec_delay: track.codec_delay, + // seek_pre_roll: track.seek_pre_roll, + // flag_lacing: track.flag_lacing, + // kind: if let Some(video) = track.video { + // SourceTrackKind::Video { + // width: video.pixel_width, + // height: video.pixel_height, + // display_width: video.display_width, + // display_height: video.display_height, + // display_unit: video.display_unit, + // fps: video.frame_rate, + // } + // } else if let Some(audio) = track.audio { + // SourceTrackKind::Audio { + // channels: audio.channels as usize, + // sample_rate: audio.sampling_frequency, + // bit_depth: audio.bit_depth.map(|r| r as usize), + // } + // } else { + // SourceTrackKind::Subtitle + // }, + // source: TrackSource::Local( + // AssetInner::LocalTrack(LocalTrack { + // path: path.to_owned(), + // track: track.track_number as usize, + // }) + // .ser(), + // ), + // }) + // .collect::>(); + + // if let Some(infojson) = infojson { + // node.kind = if !tracks + // .iter() + // .any(|t| matches!(t.kind, SourceTrackKind::Video { .. })) + // { + // NodeKind::Music + // } else if infojson.duration.unwrap_or(0.) < 600. + // && infojson.aspect_ratio.unwrap_or(2.) < 1. + // { + // NodeKind::ShortFormVideo + // } else { + // NodeKind::Video + // }; + // node.title = Some(infojson.title); + // node.subtitle = if infojson.alt_title != node.title { + // infojson.alt_title + // } else { + // None + // } + // .or(infojson + // .uploader + // .as_ref() + // .map(|u| clean_uploader_name(u).to_owned())) + // .or(node.subtitle.clone()); + + // node.tags.extend(infojson.tags.unwrap_or_default()); + + // if let Some(desc) = infojson.description { + // node.description = Some(desc) + // } + // node.tagline = Some(infojson.webpage_url); + // if let Some(date) = &infojson.upload_date { + // node.release_date = + // Some(infojson::parse_upload_date(date).context("parsing upload date")?); + // } + // match infojson.extractor.as_str() { + // "youtube" => { + // node.external_ids + // .insert("youtube.video".to_string(), infojson.id); + // node.ratings.insert( + // Rating::YoutubeViews, + // infojson.view_count.unwrap_or_default() as f64, + // ); + // if let Some(lc) = infojson.like_count { + // node.ratings.insert(Rating::YoutubeLikes, lc as f64); + // } + // } + // "Bandcamp" => drop( + // node.external_ids + // .insert("bandcamp".to_string(), infojson.id), + // ), + // _ => (), + // } + // } + + // // TODO merge size + // node.storage_size = meta.len(); + // // TODO merge tracks + // node.media = Some(MediaInfo { + // chapters: m + // .chapters + // .clone() + // .map(|c| { + // let mut chaps = Vec::new(); + // if let Some(ee) = c.edition_entries.first() { + // for ca in &ee.chapter_atoms { + // let mut labels = Vec::new(); + // for cd in &ca.displays { + // for lang in &cd.languages { + // labels.push((lang.to_owned(), cd.string.clone())) + // } + // } + // chaps.push(Chapter { + // labels, + // time_start: Some(ca.time_start as f64 * 1e-9), + // time_end: ca.time_end.map(|ts| ts as f64 * 1e-9), + // }) + // } + // } + // chaps + // }) + // .unwrap_or_default(), + // duration: fix_invalid_runtime( + // info.duration.unwrap_or_default() * info.timestamp_scale as f64 * 1e-9, + // ), + // tracks, + // }); + + // Ok(()) + // })?; + + // if let Some((season, episode)) = episode_index { + // let mut trakt_id = None; + // let flagspath = path.parent().unwrap().join("flags"); + // if flagspath.exists() { + // for flag in read_to_string(flagspath)?.lines() { + // if let Some(value) = flag.strip_prefix("trakt-").or(flag.strip_prefix("trakt=")) { + // let (kind, id) = value.split_once(":").unwrap_or(("", value)); + // if kind == "show" { + // trakt_id = Some(id.parse::()?); + // } + // } + // } + // } + // if let Some(trakt_id) = trakt_id { + // let trakt = apis.trakt.as_ref().ok_or(anyhow!("trakt required"))?; + // let seasons = rthandle.block_on(trakt.show_seasons(trakt_id))?; + // if seasons.iter().any(|x| x.number == season) { + // let episodes = rthandle.block_on(trakt.show_season_episodes(trakt_id, season))?; + // let mut poster = None; + // if let Some(tmdb) = &apis.tmdb { + // let trakt_details = + // rthandle.block_on(trakt.lookup(TraktKind::Show, trakt_id))?; + // if let Some(tmdb_id) = trakt_details.ids.tmdb { + // let tmdb_details = + // rthandle.block_on(tmdb.episode_details(tmdb_id, season, episode))?; + // if let Some(still) = &tmdb_details.still_path { + // poster = + // Some(AssetInner::Cache(rthandle.block_on(tmdb.image(still))?).ser()) + // } + // } + // } + // if let Some(episode) = episodes.get(episode.saturating_sub(1)) { + // db.update_node_init(node, |node| { + // node.kind = NodeKind::Episode; + // node.index = Some(episode.number); + // node.title = Some(episode.title.clone()); + // node.poster = poster.or(node.poster.clone()); + // node.description = episode.overview.clone().or(node.description.clone()); + // node.ratings.insert(Rating::Trakt, episode.rating); + // Ok(()) + // })? + // } + // } + // } + // } + + // for tok in filename_toks { + // apply_node_flag(db, rthandle, apis, node, tok)?; + // } + // if let Some(mbid) = mbrec { + // apply_musicbrainz_recording(db, rthandle, apis, node, mbid)?; + // } Ok(()) } diff --git a/remuxer/src/bin/mkvinfo.rs b/remuxer/src/bin/mkvinfo.rs index 0899245..2b657ea 100644 --- a/remuxer/src/bin/mkvinfo.rs +++ b/remuxer/src/bin/mkvinfo.rs @@ -15,6 +15,9 @@ fn main() -> Result<()> { let mut reader = MatroskaDemuxer::new(Box::new(file)); println!("INFO: {:#?}", reader.info()?); + println!("TRACKS: {:#?}", reader.tracks()?); + println!("CHAPTERS: {:#?}", reader.chapters()?); + println!("TAGS: {:#?}", reader.tags()?); Ok(()) } diff --git a/remuxer/src/demuxers/flac.rs b/remuxer/src/demuxers/flac.rs new file mode 100644 index 0000000..04d15e0 --- /dev/null +++ b/remuxer/src/demuxers/flac.rs @@ -0,0 +1,250 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ + +use crate::demuxers::{Demuxer, DemuxerNew, ReadSeek}; +use anyhow::{Result, anyhow, bail}; +use std::io::{BufReader, Read, Seek, SeekFrom}; +use winter_matroska::{Audio, Cluster, TrackEntry, TrackType, Tracks}; + +pub struct FlacDemuxer { + reader: BufReader>, + metadata: Option>, + first_frame_offset: u64, +} +impl DemuxerNew for FlacDemuxer { + fn new(reader: Box) -> Self { + Self { + reader: BufReader::new(reader), + metadata: None, + first_frame_offset: 0, + } + } +} + +struct MetadataBlock { + r#type: u8, + data: Vec, +} +impl MetadataBlock { + const TY_STREAMINFO: u8 = 0; + const TY_PADDING: u8 = 1; + const TY_APPLICATION: u8 = 2; + const TY_SEEK_TABLE: u8 = 3; + const TY_VORBIS_COMMENT: u8 = 4; + const TY_CUESHEET: u8 = 5; + const TY_PICTURE: u8 = 6; +} + +struct StreamInfo { + min_block_size: u16, + max_block_size: u16, + min_frame_size: u32, + max_frame_size: u32, + sample_rate: u32, + num_channels: u8, + bit_depth: u8, +} +impl StreamInfo { + pub fn parse(d: &[u8; 22]) -> Self { + let k = u64::from_be_bytes([d[10], d[11], d[12], d[13], d[14], d[15], d[16], d[17]]); + Self { + min_block_size: u16::from_be_bytes([d[0], d[1]]), + max_block_size: u16::from_be_bytes([d[2], d[3]]), + min_frame_size: u32::from_be_bytes([0, d[4], d[5], d[6]]), + max_frame_size: u32::from_be_bytes([0, d[7], d[8], d[9]]), + sample_rate: (k >> (64 - 20)) as u32, + num_channels: ((k >> (64 - 20 - 3)) & 0b111) as u8 + 1, + bit_depth: ((k >> (64 - 20 - 3 - 5)) & 0b11111) as u8 + 1, + } + } +} + +impl FlacDemuxer { + fn read_metadata(&mut self) -> Result<&Vec> { + if self.metadata.is_some() { + return Ok(self.metadata.as_ref().unwrap()); + } + self.reader.seek(SeekFrom::Start(0))?; + + let mut magic = [0u8; 4]; + self.reader.read_exact(&mut magic)?; + if magic != *b"fLaC" { + bail!("incorrect magic bytes") + } + + let mut blocks = Vec::new(); + loop { + let mut header = [0u8; 4]; + self.reader.read_exact(&mut header)?; + let last = header[0] & 0x80 != 0; + let r#type = header[0] & 0x7f; + let size = u32::from_be_bytes(header) & 0x00FFFFFF; + + let mut data = vec![0u8; size as usize]; + self.reader.read_exact(&mut data)?; + blocks.push(MetadataBlock { r#type, data }); + + if last { + break; + } + } + + self.first_frame_offset = self.reader.stream_position()?; + + self.metadata = Some(blocks); + return Ok(self.metadata.as_ref().unwrap()); + } + fn stream_info(&mut self) -> Result { + let m = self.read_metadata()?; + if m[0].r#type == MetadataBlock::TY_STREAMINFO { + Ok(StreamInfo::parse(m[0].data.as_slice().try_into().map_err( + |_| anyhow!("Streaminfo block is not 22 bytes"), + )?)) + } else { + bail!("first metadata block is not Streaminfo") + } + } +} +impl Demuxer for FlacDemuxer { + fn tracks(&mut self) -> Result> { + let si = self.stream_info()?; + let mut buf = Vec::new(); + buf.extend(b"fLaC"); + let meta = self.read_metadata()?; + for (i, mb) in meta.iter().enumerate() { + buf.push(if i == meta.len() - 1 { 0x80 } else { 0 } | mb.r#type); + buf.extend(&u32::to_be_bytes(0)[1..]); + buf.extend(&mb.data); + } + let te = TrackEntry { + codec_id: "A_FLAC".to_string(), + codec_private: Some(buf), + track_number: 1, + track_type: TrackType::Audio, + audio: Some(Audio { + bit_depth: Some(si.bit_depth as u64), + channels: si.num_channels as u64, + sampling_frequency: si.sample_rate as f64, + ..Default::default() + }), + ..Default::default() + }; + Ok(Some(Tracks { entries: vec![te] })) + } + + fn seek_cluster(&mut self, position: Option) -> Result<()> { + if let Some(position) = position { + self.reader.seek(SeekFrom::Start(position))?; + } else { + if self.first_frame_offset == 0 { + self.read_metadata()?; + } + self.reader.seek(SeekFrom::Start(self.first_frame_offset))?; + } + Ok(()) + } + fn read_cluster(&mut self) -> Result> { + let mut header = [0u8; 5]; + self.reader.read_exact(&mut header)?; + + let sync_and_blocking = u16::from_be_bytes([header[0], header[1]]); + let fixed_blocking = match sync_and_blocking { + 0xfff8 => true, + 0xfff9 => false, + _ => bail!("invalid frame sync code"), + }; + + let block_size_bits = header[2] >> 4; + let sample_rate_bits = header[2] & 0x0f; + + let channel_count = match header[3] >> 4 { + x @ 0..8 => x + 1, + 8..11 => 2, + _ => bail!("reserved channel bits used"), + }; + let bit_depth = match (header[3] >> 1) & 0b111 { + 0b000 => 0, // TODO streaminfo + 0b001 => 8, + 0b010 => 12, + 0b011 => bail!("reserved bit depth used"), + 0b100 => 16, + 0b101 => 20, + 0b110 => 24, + 0b111 => 32, + _ => unreachable!(), + }; + if header[3] & 1 != 0 { + bail!("reserveed bit set") + } + + let coded_num_length = match header[4].leading_ones() { + 0 => 0, + 1 => bail!("invalid coded number vint length (loc=1)"), + x @ 2..8 => x - 1, + 8 => bail!("invalid coded number vint length (loc=8)"), + _ => unreachable!(), + }; + let mut coded_num_buf = [0u8; 6]; + self.reader.read_exact(&mut coded_num_buf)?; + + let block_size = match block_size_bits { + 0b0000 => bail!("reserved block size used"), + 0b0001 => 192, + x @ 0b0010..=0b0101 => 144 * 2u32.pow(x as u32), + 0b0110 => { + let mut buf = [0u8; 1]; + self.reader.read_exact(&mut buf)?; + buf[0] as u32 + 1 + } + 0b0111 => { + let mut buf = [0u8; 2]; + self.reader.read_exact(&mut buf)?; + u16::from_be_bytes(buf) as u32 + 1 + } + x @ 0b1000..=0b1111 => 2u32.pow(x as u32), + _ => unreachable!(), + }; + + let sample_rate = match sample_rate_bits { + 0b0000 => 0, // TODO streaminfo, + 0b0001 => 88200, + 0b0010 => 176400, + 0b0011 => 192000, + 0b0100 => 8000, + 0b0101 => 16000, + 0b0110 => 22050, + 0b0111 => 24000, + 0b1000 => 32000, + 0b1001 => 44100, + 0b1010 => 48000, + 0b1011 => 96000, + 0b1100 => { + let mut buf = [0u8; 1]; + self.reader.read_exact(&mut buf)?; + buf[0] as u32 * 1000 + } + 0b1101 => { + let mut buf = [0u8; 2]; + self.reader.read_exact(&mut buf)?; + u16::from_be_bytes(buf) as u32 + } + 0b1110 => { + let mut buf = [0u8; 2]; + self.reader.read_exact(&mut buf)?; + u16::from_be_bytes(buf) as u32 * 10 + } + 0b1111 => bail!("forbidden sample rate bits used"), + _ => unreachable!(), + }; + + let mut crc_buf = [0u8; 1]; + self.reader.read_exact(&mut crc_buf)?; + + + + Ok(None) + } +} diff --git a/remuxer/src/demuxers/matroska.rs b/remuxer/src/demuxers/matroska.rs index 000970e..6301f15 100644 --- a/remuxer/src/demuxers/matroska.rs +++ b/remuxer/src/demuxers/matroska.rs @@ -141,32 +141,37 @@ impl MatroskaDemuxer { self.reader.read_exact(&mut buffer)?; Ok(Tag::read(&buffer)?) } - pub fn read_segment_tag(&mut self, name: &'static str, tag: u64) -> Result { + pub fn read_segment_tag( + &mut self, + name: &'static str, + tag: u64, + ) -> Result> { debug!("reading {name:?}"); - let size = self - .seek_to_segment_tag(tag)? - .ok_or(anyhow!("{name} tag missing"))?; + let Some(size) = self.seek_to_segment_tag(tag)? else { + return Ok(None); + }; self.read_tag(size) .context(anyhow!("parsing {name} failed")) + .map(Some) } } impl Demuxer for MatroskaDemuxer { - fn info(&mut self) -> Result { + fn info(&mut self) -> Result> { self.read_segment_tag("Info", Segment::TAG_INFO) } - fn tracks(&mut self) -> Result { + fn tracks(&mut self) -> Result> { self.read_segment_tag("Tracks", Segment::TAG_TRACKS) } - fn chapters(&mut self) -> Result { + fn chapters(&mut self) -> Result> { self.read_segment_tag("Chapters", Segment::TAG_CHAPTERS) } - fn attachments(&mut self) -> Result { + fn attachments(&mut self) -> Result> { self.read_segment_tag("Attachments", Segment::TAG_ATTACHMENTS) } - fn tags(&mut self) -> Result { + fn tags(&mut self) -> Result> { self.read_segment_tag("Tags", Segment::TAG_TAGS) } - fn cues(&mut self) -> Result { + fn cues(&mut self) -> Result> { self.read_segment_tag("Cues", Segment::TAG_CUES) } diff --git a/remuxer/src/demuxers/mod.rs b/remuxer/src/demuxers/mod.rs index e47e3d7..8940ca5 100644 --- a/remuxer/src/demuxers/mod.rs +++ b/remuxer/src/demuxers/mod.rs @@ -4,8 +4,13 @@ Copyright (C) 2025 metamuffin */ +pub mod flac; pub mod matroska; +use crate::{ + ContainerFormat, + demuxers::{flac::FlacDemuxer, matroska::MatroskaDemuxer}, +}; use anyhow::Result; use std::io::{Read, Seek}; use winter_matroska::{Attachments, Chapters, Cluster, Cues, Info, Tags, Tracks}; @@ -16,14 +21,23 @@ impl ReadSeek for T {} pub trait DemuxerNew: Demuxer + Sized { fn new(reader: Box) -> Self; } + +#[rustfmt::skip] pub trait Demuxer { - fn info(&mut self) -> Result; - fn tracks(&mut self) -> Result; - fn chapters(&mut self) -> Result; - fn attachments(&mut self) -> Result; - fn tags(&mut self) -> Result; - fn cues(&mut self) -> Result; + fn info(&mut self) -> Result> { Ok(None) } + fn tracks(&mut self) -> Result> { Ok(None) } + fn chapters(&mut self) -> Result> { Ok(None) } + fn attachments(&mut self) -> Result> { Ok(None) } + fn tags(&mut self) -> Result> { Ok(None) } + fn cues(&mut self) -> Result> { Ok(None) } fn seek_cluster(&mut self, position: Option) -> Result<()>; fn read_cluster(&mut self) -> Result>; } + +pub fn create_demuxer(container: ContainerFormat, reader: Box) -> Box { + match container { + ContainerFormat::Matroska | ContainerFormat::Webm => Box::new(MatroskaDemuxer::new(reader)), + ContainerFormat::Flac => Box::new(FlacDemuxer::new(reader)), + } +} diff --git a/remuxer/src/lib.rs b/remuxer/src/lib.rs index 041f386..049c12f 100644 --- a/remuxer/src/lib.rs +++ b/remuxer/src/lib.rs @@ -11,4 +11,5 @@ pub mod magic; pub enum ContainerFormat { Matroska, Webm, + Flac, } diff --git a/transcoder/Cargo.toml b/transcoder/Cargo.toml index 06dc1aa..e0d81a6 100644 --- a/transcoder/Cargo.toml +++ b/transcoder/Cargo.toml @@ -26,3 +26,4 @@ imgref = "1.11.0" ravif = "0.11.11" tokio = { workspace = true } serde = { version = "1.0.217", features = ["derive"] } +winter-matroska = { git = "https://codeberg.org/metamuffin/ebml-rs", package = "matroska" } diff --git a/transcoder/src/fragment.rs b/transcoder/src/fragment.rs index fc5fb21..4cd8b6f 100644 --- a/transcoder/src/fragment.rs +++ b/transcoder/src/fragment.rs @@ -14,6 +14,7 @@ use tokio::{ io::copy, process::{ChildStdin, Command}, }; +use winter_matroska::TrackEntry as MatroskaTrackEntry; // TODO odd video resolutions can cause errors when transcoding to YUV42{0,2} // TODO with an implementation that cant handle it (SVT-AV1 is such an impl). -- cgit v1.2.3-70-g09d2