aboutsummaryrefslogtreecommitdiff
path: root/import/src
diff options
context:
space:
mode:
Diffstat (limited to 'import/src')
-rw-r--r--import/src/lib.rs14
-rw-r--r--import/src/matroska.rs120
2 files changed, 124 insertions, 10 deletions
diff --git a/import/src/lib.rs b/import/src/lib.rs
index 2de4cc9..4be2151 100644
--- a/import/src/lib.rs
+++ b/import/src/lib.rs
@@ -14,7 +14,7 @@ use jellybase::{
database::Database,
CONF, SECRETS,
};
-use jellyremuxer::metadata::matroska_metadata;
+use matroska::matroska_metadata;
use rayon::iter::{ParallelDrainRange, ParallelIterator};
use std::{
collections::HashMap,
@@ -243,13 +243,6 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
let info = m.info.ok_or(anyhow!("no info"))?;
let tracks = m.tracks.ok_or(anyhow!("no tracks"))?;
- let infojson = m
- .infojson
- .as_ref()
- .map(|ij| serde_json::from_slice::<infojson::YVideo>(ij))
- .transpose()
- .context("infojson")?;
-
let mut tags = m
.tags
.map(|tags| {
@@ -267,7 +260,8 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
.to_string_lossy()
.to_string();
- let slug = infojson
+ let slug = m
+ .infojson
.as_ref()
.map(|ij| format!("youtube-{}", ij.id))
.unwrap_or(make_kebab(&filepath_stem));
@@ -317,7 +311,7 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
})
.collect::<Vec<_>>();
- if let Some(infojson) = infojson {
+ if let Some(infojson) = m.infojson {
node.kind = Some(
if !tracks
.iter()
diff --git a/import/src/matroska.rs b/import/src/matroska.rs
index e69de29..f9a59ab 100644
--- a/import/src/matroska.rs
+++ b/import/src/matroska.rs
@@ -0,0 +1,120 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+use crate::infojson::{self, YVideo};
+use anyhow::{Context, Result};
+use bincode::{Decode, Encode};
+use ebml_struct::{
+ ids::*,
+ matroska::*,
+ read::{EbmlReadExt, TagRead},
+};
+use jellybase::{
+ assetfed::AssetInner,
+ cache::{cache_file, cache_memory},
+ common::Asset,
+};
+use log::info;
+use std::{
+ fs::File,
+ io::{BufReader, ErrorKind, Read, Write},
+ path::Path,
+ sync::Arc,
+};
+
+#[derive(Encode, Decode, Clone)]
+pub(crate) struct MatroskaMetadata {
+ pub info: Option<Info>,
+ pub tracks: Option<Tracks>,
+ pub cover: Option<Asset>,
+ pub chapters: Option<Chapters>,
+ pub tags: Option<Tags>,
+ pub infojson: Option<YVideo>,
+}
+pub(crate) fn matroska_metadata(path: &Path) -> Result<Arc<Option<MatroskaMetadata>>> {
+ cache_memory(&["mkmeta-v1", path.to_string_lossy().as_ref()], || {
+ let mut magic = [0; 4];
+ File::open(path)?.read_exact(&mut magic).ok();
+ if !matches!(magic, [0x1A, 0x45, 0xDF, 0xA3]) {
+ return Ok(None);
+ }
+
+ info!("reading media file {path:?}");
+ let mut file = BufReader::new(File::open(path)?);
+ let mut file = file.by_ref().take(u64::MAX);
+
+ let (x, mut ebml) = file.read_tag()?;
+ assert_eq!(x, EL_EBML);
+ let ebml = Ebml::read(&mut ebml).unwrap();
+ assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm");
+ let (x, mut segment) = file.read_tag()?;
+ assert_eq!(x, EL_SEGMENT);
+
+ let mut info = None;
+ let mut infojson = None;
+ let mut tracks = None;
+ let mut cover = None;
+ let mut chapters = None;
+ let mut tags = None;
+ loop {
+ let (x, mut seg) = match segment.read_tag() {
+ Ok(o) => o,
+ Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
+ Err(e) => return Err(e.into()),
+ };
+ match x {
+ EL_INFO => info = Some(Info::read(&mut seg).context("info")?),
+ EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?),
+ EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?),
+ EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?),
+ EL_ATTACHMENTS => {
+ let attachments = Attachments::read(&mut seg).context("attachments")?;
+ for f in attachments.files {
+ match f.name.as_str() {
+ "info.json" => {
+ infojson = Some(
+ serde_json::from_slice::<infojson::YVideo>(&f.data)
+ .context("infojson")?,
+ );
+ }
+ "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg"
+ | "cover.avif" => {
+ cover = Some(
+ AssetInner::Cache(cache_file(
+ &["att-cover", path.to_string_lossy().as_ref()],
+ move |mut file| {
+ file.write_all(&f.data)?;
+ Ok(())
+ },
+ )?)
+ .ser(),
+ )
+ }
+ a => println!("{a:?}"),
+ }
+ }
+ }
+ EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => {
+ seg.consume()?;
+ }
+ EL_CLUSTER => {
+ break;
+ }
+ id => {
+ eprintln!("unknown top-level element {id:x}");
+ seg.consume()?;
+ }
+ }
+ }
+ Ok(Some(MatroskaMetadata {
+ chapters,
+ cover,
+ info,
+ infojson,
+ tags,
+ tracks,
+ }))
+ })
+}