diff options
Diffstat (limited to 'import/src')
-rw-r--r-- | import/src/infojson.rs | 16 | ||||
-rw-r--r-- | import/src/lib.rs | 119 | ||||
-rw-r--r-- | import/src/matroska.rs | 112 |
3 files changed, 150 insertions, 97 deletions
diff --git a/import/src/infojson.rs b/import/src/infojson.rs index c2ae305..3e4667e 100644 --- a/import/src/infojson.rs +++ b/import/src/infojson.rs @@ -3,13 +3,13 @@ which is licensed under the GNU Affero General Public License (version 3); see /COPYING. Copyright (C) 2025 metamuffin <metamuffin.org> */ - use anyhow::Context; +use bincode::{Decode, Encode}; use jellycommon::chrono::{format::Parsed, Utc}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)] pub struct YVideo { pub id: String, pub title: String, @@ -63,7 +63,7 @@ pub struct YVideo { pub epoch: usize, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)] pub struct YCaption { pub url: Option<String>, pub ext: String, //"vtt" | "json3" | "srv1" | "srv2" | "srv3" | "ttml", @@ -71,7 +71,7 @@ pub struct YCaption { pub name: Option<String>, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)] pub struct YFormat { pub format_id: String, pub format_note: Option<String>, @@ -96,13 +96,13 @@ pub struct YFormat { pub format: String, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)] pub struct YFragment { pub url: Option<String>, pub duration: Option<f64>, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)] pub struct YThumbnail { pub url: String, pub preference: Option<i32>, @@ -112,14 +112,14 @@ pub struct YThumbnail { pub resolution: Option<String>, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)] pub struct YChapter { pub start_time: f64, pub end_time: f64, pub title: String, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)] pub struct YHeatmapSample { pub start_time: f64, pub end_time: f64, diff --git a/import/src/lib.rs b/import/src/lib.rs index add7e4d..10bd0ec 100644 --- a/import/src/lib.rs +++ b/import/src/lib.rs @@ -4,24 +4,19 @@ Copyright (C) 2025 metamuffin <metamuffin.org> */ use anyhow::{anyhow, Context, Result}; -use ebml_struct::{ - ids::*, - matroska::*, - read::{EbmlReadExt, TagRead}, -}; use infojson::YVideo; -use jellybase::{assetfed::AssetInner, cache::cache_file, database::Database, CONF, SECRETS}; +use jellybase::{assetfed::AssetInner, database::Database, CONF, SECRETS}; use jellycommon::{ Chapter, LocalTrack, MediaInfo, Node, NodeID, NodeKind, Rating, SourceTrack, SourceTrackKind, TrackSource, }; use log::info; +use matroska::matroska_metadata; use rayon::iter::{ParallelDrainRange, ParallelIterator}; -use regex::Regex; use std::{ collections::HashMap, fs::File, - io::{BufReader, ErrorKind, Read, Write}, + io::{BufReader, Read}, mem::swap, path::{Path, PathBuf}, sync::LazyLock, @@ -35,14 +30,15 @@ use tokio::{ use trakt::Trakt; pub mod infojson; +pub mod matroska; pub mod tmdb; pub mod trakt; static IMPORT_SEM: LazyLock<Semaphore> = LazyLock::new(|| Semaphore::new(1)); pub static IMPORT_ERRORS: RwLock<Vec<String>> = RwLock::const_new(Vec::new()); -static RE_EPISODE_FILENAME: LazyLock<Regex> = - LazyLock::new(|| Regex::new(r#"([sS](\d+))?([eE](\d+))( (.+))?"#).unwrap()); +// static RE_EPISODE_FILENAME: LazyLock<Regex> = +// LazyLock::new(|| Regex::new(r#"([sS](\d+))?([eE](\d+))( (.+))?"#).unwrap()); struct Apis { trakt: Option<Trakt>, @@ -57,6 +53,7 @@ pub async fn import_wrap(db: Database, incremental: bool) -> Result<()> { let _sem = IMPORT_SEM.try_acquire()?; let jh = spawn_blocking(move || { + *IMPORT_ERRORS.blocking_write() = Vec::new(); if let Err(e) = import(&db, incremental) { IMPORT_ERRORS.blocking_write().push(format!("{e:#}")); } @@ -121,25 +118,26 @@ fn import_iter_inner(path: &Path, db: &Database, incremental: bool) -> Result<Ve } fn import_file(db: &Database, path: &Path) -> Result<()> { - let parent = NodeID::from_slug( - &path - .parent() - .ok_or(anyhow!("no parent"))? - .file_name() - .ok_or(anyhow!("parent no filename"))? - .to_string_lossy(), - ); + let parent_slug = path + .parent() + .ok_or(anyhow!("no parent"))? + .file_name() + .ok_or(anyhow!("parent no filename"))? + .to_string_lossy(); + let parent = NodeID::from_slug(&parent_slug); let filename = path.file_name().unwrap().to_string_lossy(); match filename.as_ref() { "poster.jpeg" | "poster.webp" => { db.update_node_init(parent, |node| { + node.slug = parent_slug.to_string(); node.poster = Some(AssetInner::Media(path.to_owned()).ser()); Ok(()) })?; } "backdrop.jpeg" | "backdrop.webp" => { db.update_node_init(parent, |node| { + node.slug = parent_slug.to_string(); node.backdrop = Some(AssetInner::Media(path.to_owned()).ser()); Ok(()) })?; @@ -147,6 +145,7 @@ fn import_file(db: &Database, path: &Path) -> Result<()> { "info.json" | "info.yaml" => { let data = serde_yaml::from_reader::<_, Node>(BufReader::new(File::open(path)?))?; db.update_node_init(parent, |node| { + node.slug = parent_slug.to_string(); fn merge_option<T>(a: &mut Option<T>, b: Option<T>) { if b.is_some() { *a = b; @@ -161,6 +160,7 @@ fn import_file(db: &Database, path: &Path) -> Result<()> { "channel.info.json" => { let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?; db.update_node_init(parent, |node| { + node.slug = parent_slug.to_string(); node.title = Some( data.title .strip_suffix(" - Videos") @@ -189,76 +189,14 @@ fn import_file(db: &Database, path: &Path) -> Result<()> { fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> { info!("reading media file {path:?}"); - let mut file = BufReader::new(File::open(path)?); - let mut file = file.by_ref().take(u64::MAX); - - let (x, mut ebml) = file.read_tag()?; - assert_eq!(x, EL_EBML); - let ebml = Ebml::read(&mut ebml).unwrap(); - assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm"); - let (x, mut segment) = file.read_tag()?; - assert_eq!(x, EL_SEGMENT); - let mut info = None; - let mut infojson = None; - let mut tracks = None; - let mut cover = None; - let mut chapters = None; - let mut tags = None; - loop { - let (x, mut seg) = match segment.read_tag() { - Ok(o) => o, - Err(e) if e.kind() == ErrorKind::UnexpectedEof => break, - Err(e) => return Err(e.into()), - }; - match x { - EL_INFO => info = Some(Info::read(&mut seg).context("info")?), - EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?), - EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?), - EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?), - EL_ATTACHMENTS => { - let attachments = Attachments::read(&mut seg).context("attachments")?; - for f in attachments.files { - match f.name.as_str() { - "info.json" => { - infojson = Some( - serde_json::from_slice::<infojson::YVideo>(&f.data) - .context("infojson")?, - ); - } - "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg" | "cover.avif" => { - cover = Some( - AssetInner::Cache(cache_file( - &["att-cover", path.to_string_lossy().as_ref()], - move |mut file| { - file.write_all(&f.data)?; - Ok(()) - }, - )?) - .ser(), - ) - } - a => println!("{a:?}"), - } - } - } - EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => { - seg.consume()?; - } - EL_CLUSTER => { - break; - } - id => { - eprintln!("unknown top-level element {id:x}"); - seg.consume()?; - } - } - } + let m = (*matroska_metadata(path)?).to_owned(); - let info = info.ok_or(anyhow!("no info"))?; - let tracks = tracks.ok_or(anyhow!("no tracks"))?; + let info = m.info.ok_or(anyhow!("no info"))?; + let tracks = m.tracks.ok_or(anyhow!("no tracks"))?; - let mut tags = tags + let mut tags = m + .tags .map(|tags| { tags.tags .into_iter() @@ -274,7 +212,8 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> { .to_string_lossy() .to_string(); - let slug = infojson + let slug = m + .infojson .as_ref() .map(|ij| format!("youtube-{}", ij.id)) .unwrap_or(make_kebab(&filepath_stem)); @@ -282,13 +221,13 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> { db.update_node_init(NodeID::from_slug(&slug), |node| { node.slug = slug; node.title = info.title; - node.poster = cover; + node.poster = m.cover.clone(); node.description = tags.remove("DESCRIPTION"); node.tagline = tags.remove("COMMENT"); if !node.parents.contains(&parent) { node.parents.push(parent) } - if let Some(infojson) = infojson { + if let Some(infojson) = m.infojson { node.kind = Some( if infojson.duration.unwrap_or(0.) < 600. && infojson.aspect_ratio.unwrap_or(2.) < 1. @@ -314,7 +253,9 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> { } } node.media = Some(MediaInfo { - chapters: chapters + chapters: m + .chapters + .clone() .map(|c| { let mut chaps = Vec::new(); if let Some(ee) = c.edition_entries.first() { diff --git a/import/src/matroska.rs b/import/src/matroska.rs new file mode 100644 index 0000000..bb8d927 --- /dev/null +++ b/import/src/matroska.rs @@ -0,0 +1,112 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ +use crate::infojson::{self, YVideo}; +use anyhow::{Context, Result}; +use bincode::{Decode, Encode}; +use ebml_struct::{ + ids::*, + matroska::*, + read::{EbmlReadExt, TagRead}, +}; +use jellybase::{ + assetfed::AssetInner, + cache::{cache_file, cache_memory}, +}; +use jellycommon::Asset; +use std::{ + fs::File, + io::{BufReader, ErrorKind, Read, Write}, + path::Path, + sync::Arc, +}; + +#[derive(Encode, Decode, Clone)] +pub(crate) struct MatroskaMetadata { + pub info: Option<Info>, + pub tracks: Option<Tracks>, + pub cover: Option<Asset>, + pub chapters: Option<Chapters>, + pub tags: Option<Tags>, + pub infojson: Option<YVideo>, +} +pub(crate) fn matroska_metadata(path: &Path) -> Result<Arc<MatroskaMetadata>> { + cache_memory(&["mkmeta-v1", path.to_string_lossy().as_ref()], || { + let mut file = BufReader::new(File::open(path)?); + let mut file = file.by_ref().take(u64::MAX); + + let (x, mut ebml) = file.read_tag()?; + assert_eq!(x, EL_EBML); + let ebml = Ebml::read(&mut ebml).unwrap(); + assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm"); + let (x, mut segment) = file.read_tag()?; + assert_eq!(x, EL_SEGMENT); + + let mut info = None; + let mut infojson = None; + let mut tracks = None; + let mut cover = None; + let mut chapters = None; + let mut tags = None; + loop { + let (x, mut seg) = match segment.read_tag() { + Ok(o) => o, + Err(e) if e.kind() == ErrorKind::UnexpectedEof => break, + Err(e) => return Err(e.into()), + }; + match x { + EL_INFO => info = Some(Info::read(&mut seg).context("info")?), + EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?), + EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?), + EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?), + EL_ATTACHMENTS => { + let attachments = Attachments::read(&mut seg).context("attachments")?; + for f in attachments.files { + match f.name.as_str() { + "info.json" => { + infojson = Some( + serde_json::from_slice::<infojson::YVideo>(&f.data) + .context("infojson")?, + ); + } + "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg" + | "cover.avif" => { + cover = Some( + AssetInner::Cache(cache_file( + &["att-cover", path.to_string_lossy().as_ref()], + move |mut file| { + file.write_all(&f.data)?; + Ok(()) + }, + )?) + .ser(), + ) + } + a => println!("{a:?}"), + } + } + } + EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => { + seg.consume()?; + } + EL_CLUSTER => { + break; + } + id => { + eprintln!("unknown top-level element {id:x}"); + seg.consume()?; + } + } + } + Ok(MatroskaMetadata { + chapters, + cover, + info, + infojson, + tags, + tracks, + }) + }) +} |