aboutsummaryrefslogtreecommitdiff
path: root/import
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2025-02-01 15:11:31 +0100
committermetamuffin <metamuffin@disroot.org>2025-02-01 15:11:31 +0100
commit2f053d597e91585deb9efd1a098537b244c25f56 (patch)
treecc6b6677f3a375e291be2ae5ef36563543476315 /import
parent95d90fbba36a752f7cf96ea0bebd925534341e15 (diff)
downloadjellything-2f053d597e91585deb9efd1a098537b244c25f56.tar
jellything-2f053d597e91585deb9efd1a098537b244c25f56.tar.bz2
jellything-2f053d597e91585deb9efd1a098537b244c25f56.tar.zst
Revert "ebml-struct based fragment writer (does not work)"
This reverts commit 95d90fbba36a752f7cf96ea0bebd925534341e15.
Diffstat (limited to 'import')
-rw-r--r--import/Cargo.toml5
-rw-r--r--import/src/lib.rs14
-rw-r--r--import/src/matroska.rs120
3 files changed, 128 insertions, 11 deletions
diff --git a/import/Cargo.toml b/import/Cargo.toml
index 37b5a77..645326d 100644
--- a/import/Cargo.toml
+++ b/import/Cargo.toml
@@ -6,7 +6,10 @@ edition = "2021"
[dependencies]
jellybase = { path = "../base" }
jellyclient = { path = "../client" }
-jellyremuxer = { path = "../remuxer" }
+
+ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct", features = [
+ "bincode",
+] }
rayon = "1.10.0"
crossbeam-channel = "0.5.14"
diff --git a/import/src/lib.rs b/import/src/lib.rs
index 2de4cc9..4be2151 100644
--- a/import/src/lib.rs
+++ b/import/src/lib.rs
@@ -14,7 +14,7 @@ use jellybase::{
database::Database,
CONF, SECRETS,
};
-use jellyremuxer::metadata::matroska_metadata;
+use matroska::matroska_metadata;
use rayon::iter::{ParallelDrainRange, ParallelIterator};
use std::{
collections::HashMap,
@@ -243,13 +243,6 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
let info = m.info.ok_or(anyhow!("no info"))?;
let tracks = m.tracks.ok_or(anyhow!("no tracks"))?;
- let infojson = m
- .infojson
- .as_ref()
- .map(|ij| serde_json::from_slice::<infojson::YVideo>(ij))
- .transpose()
- .context("infojson")?;
-
let mut tags = m
.tags
.map(|tags| {
@@ -267,7 +260,8 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
.to_string_lossy()
.to_string();
- let slug = infojson
+ let slug = m
+ .infojson
.as_ref()
.map(|ij| format!("youtube-{}", ij.id))
.unwrap_or(make_kebab(&filepath_stem));
@@ -317,7 +311,7 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
})
.collect::<Vec<_>>();
- if let Some(infojson) = infojson {
+ if let Some(infojson) = m.infojson {
node.kind = Some(
if !tracks
.iter()
diff --git a/import/src/matroska.rs b/import/src/matroska.rs
index e69de29..f9a59ab 100644
--- a/import/src/matroska.rs
+++ b/import/src/matroska.rs
@@ -0,0 +1,120 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+use crate::infojson::{self, YVideo};
+use anyhow::{Context, Result};
+use bincode::{Decode, Encode};
+use ebml_struct::{
+ ids::*,
+ matroska::*,
+ read::{EbmlReadExt, TagRead},
+};
+use jellybase::{
+ assetfed::AssetInner,
+ cache::{cache_file, cache_memory},
+ common::Asset,
+};
+use log::info;
+use std::{
+ fs::File,
+ io::{BufReader, ErrorKind, Read, Write},
+ path::Path,
+ sync::Arc,
+};
+
+#[derive(Encode, Decode, Clone)]
+pub(crate) struct MatroskaMetadata {
+ pub info: Option<Info>,
+ pub tracks: Option<Tracks>,
+ pub cover: Option<Asset>,
+ pub chapters: Option<Chapters>,
+ pub tags: Option<Tags>,
+ pub infojson: Option<YVideo>,
+}
+pub(crate) fn matroska_metadata(path: &Path) -> Result<Arc<Option<MatroskaMetadata>>> {
+ cache_memory(&["mkmeta-v1", path.to_string_lossy().as_ref()], || {
+ let mut magic = [0; 4];
+ File::open(path)?.read_exact(&mut magic).ok();
+ if !matches!(magic, [0x1A, 0x45, 0xDF, 0xA3]) {
+ return Ok(None);
+ }
+
+ info!("reading media file {path:?}");
+ let mut file = BufReader::new(File::open(path)?);
+ let mut file = file.by_ref().take(u64::MAX);
+
+ let (x, mut ebml) = file.read_tag()?;
+ assert_eq!(x, EL_EBML);
+ let ebml = Ebml::read(&mut ebml).unwrap();
+ assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm");
+ let (x, mut segment) = file.read_tag()?;
+ assert_eq!(x, EL_SEGMENT);
+
+ let mut info = None;
+ let mut infojson = None;
+ let mut tracks = None;
+ let mut cover = None;
+ let mut chapters = None;
+ let mut tags = None;
+ loop {
+ let (x, mut seg) = match segment.read_tag() {
+ Ok(o) => o,
+ Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
+ Err(e) => return Err(e.into()),
+ };
+ match x {
+ EL_INFO => info = Some(Info::read(&mut seg).context("info")?),
+ EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?),
+ EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?),
+ EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?),
+ EL_ATTACHMENTS => {
+ let attachments = Attachments::read(&mut seg).context("attachments")?;
+ for f in attachments.files {
+ match f.name.as_str() {
+ "info.json" => {
+ infojson = Some(
+ serde_json::from_slice::<infojson::YVideo>(&f.data)
+ .context("infojson")?,
+ );
+ }
+ "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg"
+ | "cover.avif" => {
+ cover = Some(
+ AssetInner::Cache(cache_file(
+ &["att-cover", path.to_string_lossy().as_ref()],
+ move |mut file| {
+ file.write_all(&f.data)?;
+ Ok(())
+ },
+ )?)
+ .ser(),
+ )
+ }
+ a => println!("{a:?}"),
+ }
+ }
+ }
+ EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => {
+ seg.consume()?;
+ }
+ EL_CLUSTER => {
+ break;
+ }
+ id => {
+ eprintln!("unknown top-level element {id:x}");
+ seg.consume()?;
+ }
+ }
+ }
+ Ok(Some(MatroskaMetadata {
+ chapters,
+ cover,
+ info,
+ infojson,
+ tags,
+ tracks,
+ }))
+ })
+}