diff options
| author | metamuffin <metamuffin@disroot.org> | 2025-12-10 16:21:38 +0100 |
|---|---|---|
| committer | metamuffin <metamuffin@disroot.org> | 2025-12-10 16:21:38 +0100 |
| commit | a0cfd77b4d19c43a28c4d82072e6ff136e336af3 (patch) | |
| tree | 05df9f5faa54cef0ae4136fffddea57fbbafee6b /import/src/lib.rs | |
| parent | 242d5763d451eed2402be7afde50cd9fa0d6bc79 (diff) | |
| download | jellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar jellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar.bz2 jellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar.zst | |
refactor import plugins part 1
Diffstat (limited to 'import/src/lib.rs')
| -rw-r--r-- | import/src/lib.rs | 473 |
1 files changed, 61 insertions, 412 deletions
diff --git a/import/src/lib.rs b/import/src/lib.rs index e31127e..36c65d3 100644 --- a/import/src/lib.rs +++ b/import/src/lib.rs @@ -5,54 +5,47 @@ */ #![feature(duration_constants)] -pub mod acoustid; -pub mod infojson; -pub mod musicbrainz; -pub mod tmdb; -pub mod trakt; -pub mod vgmdb; -pub mod wikidata; -pub mod wikimedia_commons; +pub mod plugins; -use jellydb::Database; - -use crate::{tmdb::TmdbKind, trakt::TraktKind}; -use acoustid::{acoustid_fingerprint, AcoustID}; -use anyhow::{anyhow, bail, Context, Result}; -use infojson::YVideo; -use jellycache::{cache_memory, cache_read, cache_store, HashKey}; +use crate::plugins::{ + acoustid::AcoustID, + infojson::is_info_json, + misc::is_cover, + musicbrainz::{self, MusicBrainz}, + tmdb::{self, Tmdb, TmdbKind}, + trakt::{Trakt, TraktKind}, + vgmdb::Vgmdb, + wikidata::Wikidata, + wikimedia_commons::WikimediaCommons, +}; +use anyhow::{Context, Result, anyhow}; +use jellycache::{HashKey, cache_memory, cache_store}; use jellycommon::{ - Appearance, Asset, Chapter, CreditCategory, IdentifierType, MediaInfo, Node, NodeID, NodeKind, - PictureSlot, RatingType, SourceTrack, SourceTrackKind, TrackSource, Visibility, + Appearance, Asset, CreditCategory, IdentifierType, Node, NodeID, NodeKind, PictureSlot, + RatingType, Visibility, }; +use jellydb::Database; use jellyimport_fallback_generator::generate_fallback; use jellyremuxer::{ demuxers::create_demuxer_autodetect, - matroska::{self, Segment}, + matroska::{self, AttachedFile, Segment}, }; use log::info; -use musicbrainz::MusicBrainz; -use rayon::iter::{ParallelBridge, ParallelIterator}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; use regex::Regex; use serde::{Deserialize, Serialize}; use std::{ - collections::{BTreeMap, HashMap}, - fs::{read_to_string, File}, - io::{BufReader, Read}, + collections::BTreeMap, + fs::{File, read_to_string}, path::{Path, PathBuf}, sync::{Arc, LazyLock, Mutex}, time::UNIX_EPOCH, }; -use tmdb::Tmdb; use tokio::{ runtime::Handle, sync::{RwLock, Semaphore}, task::spawn_blocking, }; -use trakt::Trakt; -use vgmdb::Vgmdb; -use wikidata::Wikidata; -use wikimedia_commons::WikimediaCommons; #[rustfmt::skip] #[derive(Debug, Deserialize, Serialize, Default)] @@ -89,6 +82,7 @@ pub const USER_AGENT: &str = concat!( static IMPORT_SEM: LazyLock<Semaphore> = LazyLock::new(|| Semaphore::new(1)); pub static IMPORT_ERRORS: RwLock<Vec<String>> = RwLock::const_new(Vec::new()); +pub static IMPORT_PROGRESS: RwLock<Option<(usize, usize, String)>> = RwLock::const_new(None); static RE_EPISODE_FILENAME: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"([sS](?<season>\d+))?([eE](?<episode>\d+))( (.+))?"#).unwrap()); @@ -117,7 +111,7 @@ pub fn get_trakt() -> Result<Trakt> { } pub async fn import_wrap(db: Database, incremental: bool) -> Result<()> { - let _sem = IMPORT_SEM.try_acquire()?; + let _sem = IMPORT_SEM.try_acquire().context("already importing")?; let jh = spawn_blocking(move || { *IMPORT_ERRORS.blocking_write() = Vec::new(); @@ -144,11 +138,10 @@ fn import(db: &Database, incremental: bool) -> Result<()> { let rthandle = Handle::current(); + let mut files = Vec::new(); import_traverse( &CONF.media_path, db, - &apis, - &rthandle, incremental, NodeID::MIN, "", @@ -156,8 +149,17 @@ fn import(db: &Database, incremental: bool) -> Result<()> { visibility: Visibility::Visible, use_acoustid: false, }, + &mut files, )?; + files.into_par_iter().for_each(|(path, parent, iflags)| { + import_file(db, &apis, &rthandle, &path, parent, iflags); + }); + + // let meta = path.metadata()?; + // let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs(); + // db.set_import_file_mtime(path, mtime)?; + Ok(()) } @@ -170,12 +172,11 @@ struct InheritedFlags { fn import_traverse( path: &Path, db: &Database, - apis: &Apis, - rthandle: &Handle, incremental: bool, parent: NodeID, parent_slug_fragment: &str, mut iflags: InheritedFlags, + out: &mut Vec<(PathBuf, NodeID, InheritedFlags)>, ) -> Result<()> { if path.is_dir() { let slug_fragment = if path == CONF.media_path { @@ -211,26 +212,18 @@ fn import_traverse( Ok(()) })?; - path.read_dir()?.par_bridge().try_for_each(|e| { + for e in path.read_dir()? { let path = e?.path(); - if let Err(e) = import_traverse( - &path, - db, - apis, - rthandle, - incremental, - id, - &slug_fragment, - iflags, - ) { + if let Err(e) = import_traverse(&path, db, incremental, id, &slug_fragment, iflags, out) + { IMPORT_ERRORS .blocking_write() .push(format!("{path:?} import failed: {e:#}")); } - Ok::<_, anyhow::Error>(()) - })?; + } return Ok(()); } + if path.is_file() { let meta = path.metadata()?; let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs(); @@ -243,8 +236,7 @@ fn import_traverse( } } - import_file(db, apis, rthandle, path, parent, iflags)?; - db.set_import_file_mtime(path, mtime)?; + out.push((path.to_owned(), parent, iflags)); } Ok(()) } @@ -259,36 +251,6 @@ fn import_file( ) -> Result<()> { let filename = path.file_name().unwrap().to_string_lossy(); match filename.as_ref() { - "poster.jpeg" | "poster.webp" | "poster.png" => { - info!("import poster at {path:?}"); - let asset = Asset(cache_store( - format!("media/literal/{}-poster.image", HashKey(path)), - || { - let mut data = Vec::new(); - File::open(path)?.read_to_end(&mut data)?; - Ok(data) - }, - )?); - db.update_node_init(parent, |node| { - node.pictures.insert(PictureSlot::Cover, asset); - Ok(()) - })?; - } - "backdrop.jpeg" | "backdrop.webp" | "backdrop.png" => { - info!("import backdrop at {path:?}"); - let asset = Asset(cache_store( - format!("media/literal/{}-poster.image", HashKey(path)), - || { - let mut data = Vec::new(); - File::open(path)?.read_to_end(&mut data)?; - Ok(data) - }, - )?); - db.update_node_init(parent, |node| { - node.pictures.insert(PictureSlot::Backdrop, asset); - Ok(()) - })?; - } "node.yaml" => { info!("import node info at {path:?}"); let data = serde_yaml::from_str::<Node>(&read_to_string(path)?)?; @@ -330,29 +292,6 @@ fn import_file( })?; } } - "channel.info.json" => { - info!("import channel info.json at {path:?}"); - let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?; - db.update_node_init(parent, |node| { - node.kind = NodeKind::Channel; - node.title = Some(clean_uploader_name(&data.title).to_owned()); - if let Some(cid) = data.channel_id { - node.identifiers.insert(IdentifierType::YoutubeChannel, cid); - } - if let Some(uid) = data.uploader_id { - node.identifiers - .insert(IdentifierType::YoutubeChannelHandle, uid); - } - if let Some(desc) = data.description { - node.description = Some(desc); - } - if let Some(followers) = data.channel_follower_count { - node.ratings - .insert(RatingType::YoutubeFollowers, followers as f64); - } - Ok(()) - })?; - } _ => import_media_file(db, apis, rthandle, path, parent, iflags).context("media file")?, } @@ -376,7 +315,7 @@ pub fn read_media_metadata(path: &Path) -> Result<Arc<matroska::Segment>> { // Replace data of useful attachments with cache key; delete data of all others if let Some(attachments) = &mut attachments { for att in &mut attachments.files { - if let Some(fname) = attachment_types::is_useful_attachment(&att) { + if let Some(fname) = is_useful_attachment(&att) { let key = cache_store( format!("media/attachment/{}-{fname}", HashKey(path)), || Ok(att.data.clone()), @@ -400,22 +339,11 @@ pub fn read_media_metadata(path: &Path) -> Result<Arc<matroska::Segment>> { ) } -mod attachment_types { - use jellyremuxer::matroska::AttachedFile; - - pub fn is_useful_attachment(a: &AttachedFile) -> Option<&'static str> { - match a { - _ if is_info_json(&a) => Some("info.json"), - _ if is_cover(&a) => Some("cover.image"), - _ => None, - } - } - - pub fn is_info_json(a: &&AttachedFile) -> bool { - a.name == "info.json" && a.media_type == "application/json" - } - pub fn is_cover(a: &&AttachedFile) -> bool { - a.name.starts_with("cover") && a.media_type.starts_with("image/") +pub fn is_useful_attachment(a: &AttachedFile) -> Option<&'static str> { + match a { + _ if is_info_json(&a) => Some("info.json"), + _ if is_cover(&a) => Some("cover.image"), + _ => None, } } @@ -430,38 +358,6 @@ fn import_media_file( info!("media file {path:?}"); let m = read_media_metadata(path)?; - let infojson = m - .attachments - .iter() - .flat_map(|a| &a.files) - .find(attachment_types::is_info_json) - .map(|att| { - let data = cache_read(str::from_utf8(&att.data).unwrap())? - .ok_or(anyhow!("info json cache missing"))?; - anyhow::Ok(serde_json::from_slice::<infojson::YVideo>(&data)?) - }) - .transpose() - .context("infojson parsing")?; - - let cover = m - .attachments - .iter() - .flat_map(|a| &a.files) - .find(attachment_types::is_cover) - .map(|att| Asset(att.data.clone().try_into().unwrap())); - - let mut tags = m - .tags - .first() - .map(|tags| { - tags.tags - .iter() - .flat_map(|t| t.simple_tags.clone()) - .map(|st| (st.name, st.string.unwrap_or_default())) - .collect::<HashMap<_, _>>() - }) - .unwrap_or_default(); - let filename = path.file_name().unwrap().to_string_lossy().to_string(); let mut episode_index = None; @@ -480,215 +376,28 @@ fn import_media_file( let mut filename_toks = filename.split("."); let filepath_stem = filename_toks.next().unwrap(); - let slug = infojson - .as_ref() - // TODO maybe also include the slug after the primary "id" key - .map(|ij| format!("{}-{}", ij.extractor.to_lowercase(), ij.id)) - .unwrap_or_else(|| { - if let Some((s, e)) = episode_index { - format!( - "{}-s{s}e{e}", - make_kebab( - &path - .parent() - .unwrap() - .file_name() - .unwrap_or_default() - .to_string_lossy() - ) - ) - } else { - make_kebab(filepath_stem) - } - }); + let slug = if let Some((s, e)) = episode_index { + format!( + "{}-s{s}e{e}", + make_kebab( + &path + .parent() + .unwrap() + .file_name() + .unwrap_or_default() + .to_string_lossy() + ) + ) + } else { + make_kebab(filepath_stem) + }; let node = NodeID::from_slug(&slug); - let meta = path.metadata()?; - let mut eids = BTreeMap::<IdentifierType, String>::new(); - - for (key, value) in &tags { - match key.as_str() { - "MUSICBRAINZ_TRACKID" => { - eids.insert(IdentifierType::MusicbrainzRecording, value.to_owned()) - } - "MUSICBRAINZ_ARTISTID" => { - eids.insert(IdentifierType::MusicbrainzArtist, value.to_owned()) - } - "MUSICBRAINZ_ALBUMID" => { - eids.insert(IdentifierType::MusicbrainzRelease, value.to_owned()) - } - "MUSICBRAINZ_ALBUMARTISTID" => { - None //? ignore this? - } - "MUSICBRAINZ_RELEASEGROUPID" => { - eids.insert(IdentifierType::MusicbrainzReleaseGroup, value.to_owned()) - } - "ISRC" => eids.insert(IdentifierType::Isrc, value.to_owned()), - "BARCODE" => eids.insert(IdentifierType::Barcode, value.to_owned()), - _ => None, - }; - } - - if iflags.use_acoustid { - let fp = acoustid_fingerprint(path)?; - if let Some((atid, mbid)) = apis - .acoustid - .as_ref() - .ok_or(anyhow!("need acoustid"))? - .get_atid_mbid(&fp, rthandle)? - { - eids.insert(IdentifierType::AcoustIdTrack, atid); - eids.insert(IdentifierType::MusicbrainzRecording, mbid); - }; - } - - let mbrec = eids.get(&IdentifierType::MusicbrainzRecording).cloned(); - db.update_node_init(node, |node| { node.slug = slug; - node.title = m.info.title.clone().or(node.title.clone()); node.visibility = iflags.visibility; - - node.description = tags - .remove("DESCRIPTION") - .or(tags.remove("SYNOPSIS")) - .or(node.description.clone()); - node.tagline = tags.remove("COMMENT").or(node.tagline.clone()); node.parents.insert(parent); - - node.identifiers.extend(eids); - - if let Some(cover) = cover { - node.pictures.insert(PictureSlot::Cover, cover); - } - - if let Some(ct) = tags.get("CONTENT_TYPE") { - node.kind = match ct.to_lowercase().trim() { - "movie" | "documentary" | "film" => NodeKind::Movie, - "music" | "recording" => NodeKind::Music, - _ => NodeKind::Unknown, - } - } - - let tracks = m - .tracks - .as_ref() - .ok_or(anyhow!("no tracks"))? - .entries - .iter() - .map(|track| SourceTrack { - codec: track.codec_id.clone(), - language: track.language.clone(), - name: track.name.clone().unwrap_or_default(), - federated: Vec::new(), - kind: if let Some(video) = &track.video { - SourceTrackKind::Video { - width: video.pixel_width, - height: video.pixel_height, - fps: video.frame_rate, - } - } else if let Some(audio) = &track.audio { - SourceTrackKind::Audio { - channels: audio.channels as usize, - sample_rate: audio.sampling_frequency, - bit_depth: audio.bit_depth.map(|r| r as usize), - } - } else { - SourceTrackKind::Subtitle - }, - source: TrackSource::Local(path.to_owned(), track.track_number), - }) - .collect::<Vec<_>>(); - - if let Some(infojson) = infojson { - node.kind = if !tracks - .iter() - .any(|t| matches!(t.kind, SourceTrackKind::Video { .. })) - { - NodeKind::Music - } else if infojson.duration.unwrap_or(0.) < 600. - && infojson.aspect_ratio.unwrap_or(2.) < 1. - { - NodeKind::ShortFormVideo - } else { - NodeKind::Video - }; - node.title = Some(infojson.title); - node.subtitle = if infojson.alt_title != node.title { - infojson.alt_title - } else { - None - } - .or(infojson - .uploader - .as_ref() - .map(|u| clean_uploader_name(u).to_owned())) - .or(node.subtitle.clone()); - - node.tags.extend(infojson.tags.unwrap_or_default()); - - if let Some(desc) = infojson.description { - node.description = Some(desc) - } - node.tagline = Some(infojson.webpage_url); - if let Some(date) = &infojson.upload_date { - node.release_date = - Some(infojson::parse_upload_date(date).context("parsing upload date")?); - } - match infojson.extractor.as_str() { - "youtube" => { - node.identifiers - .insert(IdentifierType::YoutubeVideo, infojson.id); - node.ratings.insert( - RatingType::YoutubeViews, - infojson.view_count.unwrap_or_default() as f64, - ); - if let Some(lc) = infojson.like_count { - node.ratings.insert(RatingType::YoutubeLikes, lc as f64); - } - } - "Bandcamp" => drop( - node.identifiers - .insert(IdentifierType::Bandcamp, infojson.id), - ), - _ => (), - } - } - - // TODO merge size - node.storage_size = meta.len(); - // TODO merge tracks - node.media = Some(MediaInfo { - chapters: m - .chapters - .clone() - .map(|c| { - let mut chaps = Vec::new(); - if let Some(ee) = c.edition_entries.first() { - for ca in &ee.chapter_atoms { - let mut labels = Vec::new(); - for cd in &ca.displays { - for lang in &cd.languages { - labels.push((lang.to_owned(), cd.string.clone())) - } - } - chaps.push(Chapter { - labels, - time_start: Some(ca.time_start as f64 * 1e-9), - time_end: ca.time_end.map(|ts| ts as f64 * 1e-9), - }) - } - } - chaps - }) - .unwrap_or_default(), - duration: fix_invalid_runtime( - m.info.duration.unwrap_or_default() * m.info.timestamp_scale as f64 * 1e-9, - ), - tracks, - }); - Ok(()) })?; @@ -741,9 +450,6 @@ fn import_media_file( for tok in filename_toks { apply_node_flag(db, rthandle, apis, node, tok)?; } - if let Some(mbid) = mbrec { - apply_musicbrainz_recording(db, rthandle, apis, node, mbid)?; - } Ok(()) } @@ -755,48 +461,6 @@ fn apply_node_flag( node: NodeID, flag: &str, ) -> Result<()> { - if let Some(value) = flag.strip_prefix("trakt-").or(flag.strip_prefix("trakt=")) { - let (kind, id) = value.split_once(":").unwrap_or(("", value)); - let kind = match kind { - "movie" | "" => TraktKind::Movie, - "show" => TraktKind::Show, - "season" => TraktKind::Season, - "episode" => TraktKind::Episode, - _ => bail!("unknown trakt kind"), - }; - apply_trakt_tmdb(db, rthandle, apis, node, kind, id)?; - } - if flag == "hidden" { - db.update_node_init(node, |node| { - node.visibility = node.visibility.min(Visibility::Hidden); - Ok(()) - })?; - } - if flag == "reduced" { - db.update_node_init(node, |node| { - node.visibility = node.visibility.min(Visibility::Reduced); - Ok(()) - })?; - } - if let Some(kind) = flag.strip_prefix("kind-").or(flag.strip_prefix("kind=")) { - let kind = match kind { - "movie" => NodeKind::Movie, - "video" => NodeKind::Video, - "music" => NodeKind::Music, - "short_form_video" => NodeKind::ShortFormVideo, - "collection" => NodeKind::Collection, - "channel" => NodeKind::Channel, - "show" => NodeKind::Show, - "series" => NodeKind::Series, - "season" => NodeKind::Season, - "episode" => NodeKind::Episode, - _ => bail!("unknown node kind"), - }; - db.update_node_init(node, |node| { - node.kind = kind; - Ok(()) - })?; - } if let Some(mbid) = flag.strip_prefix("mbrec-").or(flag.strip_prefix("mbrec=")) { apply_musicbrainz_recording(db, rthandle, apis, node, mbid.to_string())? } @@ -1016,18 +680,3 @@ fn make_kebab(i: &str) -> String { } o } - -fn clean_uploader_name(mut s: &str) -> &str { - s = s.strip_suffix(" - Videos").unwrap_or(s); - s = s.strip_suffix(" - Topic").unwrap_or(s); - s = s.strip_prefix("Uploads from ").unwrap_or(s); - s -} - -fn fix_invalid_runtime(d: f64) -> f64 { - match d { - // Broken durations found experimentally - 359999.999 | 359999.000 | 86399.999 | 86399.99900000001 => 0., - x => x, - } -} |