aboutsummaryrefslogtreecommitdiff
path: root/remuxer/src/metadata.rs
diff options
context:
space:
mode:
Diffstat (limited to 'remuxer/src/metadata.rs')
-rw-r--r--remuxer/src/metadata.rs378
1 files changed, 378 insertions, 0 deletions
diff --git a/remuxer/src/metadata.rs b/remuxer/src/metadata.rs
new file mode 100644
index 0000000..3fd82ce
--- /dev/null
+++ b/remuxer/src/metadata.rs
@@ -0,0 +1,378 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2024 metamuffin <metamuffin.org>
+*/
+use anyhow::{anyhow, bail, Context, Result};
+use bincode::{Decode, Encode};
+use jellycommon::{Chapter, LocalTrack, SourceTrack, SourceTrackKind};
+use jellymatroska::{
+ matroska::MatroskaTag,
+ read::EbmlReader,
+ unflatten::{Unflat, Unflatten},
+};
+use log::{debug, error, info, warn};
+use std::{path::PathBuf, time::Instant};
+
+#[derive(Default, Clone, Debug, Encode, Decode)]
+pub struct MatroskaMetadata {
+ pub title: Option<String>,
+ pub description: Option<String>,
+ pub tagline: Option<String>,
+ pub tracks: Vec<SourceTrack>,
+ pub track_sources: Vec<LocalTrack>,
+ pub cover: Option<(String, Vec<u8>)>,
+ pub infojson: Option<String>,
+ pub chapters: Vec<Chapter>,
+ pub duration: f64,
+}
+
+pub fn import_metadata(input: &mut EbmlReader) -> Result<MatroskaMetadata> {
+ while let Some(item) = input.next() {
+ let item = match item {
+ Ok((_, item)) => item,
+ Err(e) => {
+ if !matches!(e, jellymatroska::error::Error::Io(_)) {
+ warn!("{e}");
+ }
+ break;
+ }
+ };
+ match item {
+ MatroskaTag::Ebml(_) => {
+ let mut iter = Unflatten::new_with_end(input, item);
+ while let Some(Ok(Unflat {
+ children: _, item, ..
+ })) = iter.n()
+ {
+ match item {
+ MatroskaTag::DocType(t) => {
+ if !matches!(t.as_str(), "matroska" | "webm") {
+ error!("file is neither matroska nor webm but {:?}", t)
+ }
+ }
+ _ => debug!("(re) tag ignored: {item:?}"),
+ }
+ }
+ }
+ MatroskaTag::Segment(_) => {
+ info!("extracting metadata...");
+ let mut children = Unflatten::new_with_end(input, item);
+ let t = Instant::now();
+ let r = import_read_segment(&mut children)?;
+ info!("done in {:?}", t.elapsed());
+ return Ok(r);
+ }
+ _ => debug!("(r) tag ignored: {item:?}"),
+ }
+ }
+ Err(anyhow!("no segment found"))
+}
+
+fn import_read_segment(segment: &mut Unflatten) -> Result<MatroskaMetadata> {
+ let (mut timestamp_scale, mut duration) = (None, None);
+ let mut m = MatroskaMetadata::default();
+
+ let (
+ mut info_found,
+ mut tags_found,
+ mut attachments_found,
+ mut tracks_found,
+ mut found_chapters,
+ ) = (false, false, false, false, false);
+
+ while let Some(Ok(Unflat { children, item, .. })) = segment.n() {
+ match item {
+ MatroskaTag::SeekHead(_) => {}
+ MatroskaTag::Info(_) => {
+ info_found = true;
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat {
+ children: _, item, ..
+ })) = children.n()
+ {
+ match item {
+ MatroskaTag::Title(t) => m.title = Some(t),
+ MatroskaTag::TimestampScale(v) => timestamp_scale = Some(v),
+ MatroskaTag::Duration(v) => duration = Some(v),
+ _ => debug!("(rsi) tag ignored: {item:?}"),
+ }
+ }
+ }
+ MatroskaTag::Void(_) => {}
+ MatroskaTag::Tags(_) => {
+ tags_found = true;
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat { children, item, .. })) = children.n() {
+ match item {
+ MatroskaTag::Tag(_) => {
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat { children, item, .. })) = children.n() {
+ match item {
+ MatroskaTag::SimpleTag(_) => {
+ let (mut key, mut value) = (None, None);
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat {
+ children: _, item, ..
+ })) = children.n()
+ {
+ match item {
+ MatroskaTag::TagName(k) => key = Some(k),
+ MatroskaTag::TagString(v) => value = Some(v),
+ _ => debug!("(rstts) tag ignored: {item:?}"),
+ }
+ }
+ match (key, value) {
+ (Some(key), Some(value)) => match key.as_str() {
+ "DESCRIPTION" => m.description = Some(value),
+ "COMMENT" => m.tagline = Some(value),
+ _ => debug!("simple tag ignored: {key:?}"),
+ },
+ (None, None) => (),
+ _ => warn!("simple tag with only one of name/string"),
+ }
+ }
+ _ => debug!("(rstt) tag ignored: {item:?}"),
+ }
+ }
+ }
+ MatroskaTag::Crc32(_) => {}
+ _ => debug!("(rst) tag ignored: {item:?}"),
+ }
+ }
+ }
+ MatroskaTag::Attachments(_) => {
+ attachments_found = true;
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat { children, item, .. })) = children.n() {
+ match item {
+ MatroskaTag::AttachedFile(_) => {
+ let (mut name, mut data, mut mime) = Default::default();
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat {
+ children: _, item, ..
+ })) = children.n()
+ {
+ match item {
+ MatroskaTag::FileName(n) => name = Some(n),
+ MatroskaTag::FileData(d) => data = Some(d),
+ MatroskaTag::FileMimeType(m) => mime = Some(m),
+ _ => debug!("(rsaa) tag ignored: {item:?}"),
+ }
+ }
+ let (name, data, mime) = (
+ name.ok_or(anyhow!("attachment without name"))?,
+ data.ok_or(anyhow!("attachment without data"))?,
+ mime.ok_or(anyhow!("attachment without mime type"))?,
+ );
+ info!("attachment found: {name:?} type {mime:?}");
+ match (name.as_str(), mime.as_str()) {
+ ("info.json", "application/json") => {
+ m.infojson =
+ Some(String::from_utf8(data).context("info.json invalid")?)
+ }
+ (_, "image/jpeg" | "image/png" | "image/webp") => {
+ m.cover = Some((mime, data))
+ }
+ _ => (),
+ }
+ }
+ _ => debug!("(rsa) tag ignored: {item:?}"),
+ }
+ }
+ }
+ MatroskaTag::Cues(_) => {}
+ MatroskaTag::Chapters(_) => {
+ found_chapters = true;
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat { children, item, .. })) = children.n() {
+ match item {
+ MatroskaTag::EditionEntry(_) => {
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat { children, item, .. })) = children.n() {
+ match item {
+ MatroskaTag::EditionUID(_)
+ | MatroskaTag::EditionFlagHidden(_)
+ | MatroskaTag::EditionFlagDefault(_) => {}
+ MatroskaTag::ChapterAtom(_) => {
+ let mut children = children.unwrap();
+ let mut chap = Chapter::default();
+ while let Some(Ok(Unflat { children, item, .. })) =
+ children.n()
+ {
+ match item {
+ MatroskaTag::ChapterFlagEnabled(_)
+ | MatroskaTag::ChapterFlagHidden(_)
+ | MatroskaTag::ChapterUID(_) => (),
+ MatroskaTag::ChapterTimeStart(t) => {
+ chap.time_start = Some(t as f64 * 1e-9)
+ }
+ MatroskaTag::ChapterTimeEnd(t) => {
+ chap.time_end = Some(t as f64 * 1e-9)
+ }
+ MatroskaTag::ChapterDisplay(_) => {
+ let mut string = String::new();
+ let mut lang = String::new();
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat { item, .. })) =
+ children.n()
+ {
+ match item {
+ MatroskaTag::ChapString(s) => {
+ string = s
+ }
+ MatroskaTag::ChapLanguage(l) => {
+ lang = l
+ }
+ _ => warn!(
+ "(rscead) tag ignored: {item:?}"
+ ),
+ }
+ }
+ chap.labels.push((lang, string))
+ }
+ _ => warn!("(rscea) tag ignored: {item:?}"),
+ }
+ }
+ m.chapters.push(chap);
+ }
+ _ => warn!("(rsce) tag ignored: {item:?}"),
+ }
+ }
+ if !m.chapters.is_empty() {
+ info!("{} chapters added", m.chapters.len());
+ }
+ }
+ _ => warn!("(rsc) tag ignored: {item:?}"),
+ }
+ }
+ }
+ MatroskaTag::Tracks(_) => {
+ tracks_found = true;
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat { children, item, .. })) = children.n() {
+ match item {
+ MatroskaTag::TrackEntry(_) => {
+ let mut children = children.unwrap();
+ let (
+ mut index,
+ mut language,
+ mut codec,
+ mut kind,
+ mut sample_rate,
+ mut channels,
+ mut width,
+ mut height,
+ mut display_width,
+ mut display_height,
+ mut name,
+ mut fps,
+ mut bit_depth,
+ mut codec_private,
+ mut default_duration,
+ mut display_unit,
+ ) = (
+ None, None, None, None, None, None, None, None, None, None, None,
+ None, None, None, None, None,
+ );
+ while let Some(Ok(Unflat { children, item, .. })) = children.n() {
+ match item {
+ MatroskaTag::CodecID(b) => codec = Some(b),
+ MatroskaTag::Language(v) => language = Some(v),
+ MatroskaTag::TrackNumber(v) => index = Some(v),
+ MatroskaTag::TrackType(v) => kind = Some(v),
+ MatroskaTag::Name(v) => name = Some(v),
+ MatroskaTag::CodecPrivate(v) => codec_private = Some(v),
+ MatroskaTag::DefaultDuration(v) => default_duration = Some(v),
+ MatroskaTag::Audio(_) => {
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat { item, .. })) = children.n() {
+ match item {
+ MatroskaTag::Channels(v) => {
+ channels = Some(v as usize)
+ }
+ MatroskaTag::SamplingFrequency(v) => {
+ sample_rate = Some(v)
+ }
+ MatroskaTag::BitDepth(v) => bit_depth = Some(v),
+ _ => (),
+ }
+ }
+ }
+ MatroskaTag::Video(_) => {
+ let mut children = children.unwrap();
+ while let Some(Ok(Unflat { item, .. })) = children.n() {
+ match item {
+ MatroskaTag::PixelWidth(v) => width = Some(v),
+ MatroskaTag::PixelHeight(v) => height = Some(v),
+ MatroskaTag::DisplayWidth(v) => {
+ display_width = Some(v)
+ }
+ MatroskaTag::DisplayHeight(v) => {
+ display_height = Some(v)
+ }
+ MatroskaTag::DisplayUnit(v) => {
+ display_unit = Some(v)
+ }
+ MatroskaTag::FrameRate(v) => fps = Some(v),
+ _ => (),
+ }
+ }
+ }
+ _ => (),
+ }
+ }
+ let track_index = index.unwrap();
+ let kind = match kind.ok_or(anyhow!("track type required"))? {
+ 1 => SourceTrackKind::Video {
+ fps,
+ width: width.unwrap(),
+ height: height.unwrap(),
+ display_width,
+ display_height,
+ display_unit,
+ },
+ 2 => SourceTrackKind::Audio {
+ bit_depth: bit_depth.map(|x| x as usize),
+ channels: channels.unwrap_or(1), // TODO
+ sample_rate: sample_rate.unwrap_or(41_100.0), // TODO
+ },
+ 17 => SourceTrackKind::Subtitles,
+ _ => bail!("invalid track type"),
+ };
+ m.tracks.push(SourceTrack {
+ federated: vec![],
+ default_duration,
+ name: name.unwrap_or_else(|| "unnamed".to_string()),
+ codec: codec.unwrap(),
+ language: language.unwrap_or_else(|| "none".to_string()),
+ kind,
+ });
+ m.track_sources.push(LocalTrack {
+ track: track_index as usize,
+ path: PathBuf::new(),
+ codec_private,
+ })
+ }
+ MatroskaTag::Crc32(_) => {}
+ _ => warn!("(rst) tag ignored: {item:?}"),
+ }
+ }
+ }
+ MatroskaTag::Cluster(_) => {}
+
+ _ => warn!("(rs) tag ignored: {item:?}"),
+ };
+ if info_found && tracks_found && attachments_found && tags_found && found_chapters {
+ debug!("we found all we need, stopping read early");
+ break;
+ }
+ }
+ segment.exit_dirty();
+
+ if let Some(duration) = duration {
+ m.duration = (duration * timestamp_scale.unwrap_or(1_000_000) as f64) / 1_000_000_000_f64;
+ }
+
+ Ok(m)
+}