From a0cfd77b4d19c43a28c4d82072e6ff136e336af3 Mon Sep 17 00:00:00 2001 From: metamuffin Date: Wed, 10 Dec 2025 16:21:38 +0100 Subject: refactor import plugins part 1 --- import/Cargo.toml | 2 +- import/src/acoustid.rs | 154 ----------- import/src/infojson.rs | 146 ---------- import/src/lib.rs | 475 +++++--------------------------- import/src/musicbrainz.rs | 318 --------------------- import/src/plugins/acoustid.rs | 174 ++++++++++++ import/src/plugins/infojson.rs | 272 ++++++++++++++++++ import/src/plugins/media_info.rs | 92 +++++++ import/src/plugins/misc.rs | 100 +++++++ import/src/plugins/mod.rs | 48 ++++ import/src/plugins/musicbrainz.rs | 320 +++++++++++++++++++++ import/src/plugins/tags.rs | 60 ++++ import/src/plugins/tmdb.rs | 281 +++++++++++++++++++ import/src/plugins/trakt.rs | 403 +++++++++++++++++++++++++++ import/src/plugins/vgmdb.rs | 127 +++++++++ import/src/plugins/wikidata.rs | 129 +++++++++ import/src/plugins/wikimedia_commons.rs | 63 +++++ import/src/tmdb.rs | 281 ------------------- import/src/trakt.rs | 380 ------------------------- import/src/vgmdb.rs | 127 --------- import/src/wikidata.rs | 129 --------- import/src/wikimedia_commons.rs | 63 ----- 22 files changed, 2132 insertions(+), 2012 deletions(-) delete mode 100644 import/src/acoustid.rs delete mode 100644 import/src/infojson.rs delete mode 100644 import/src/musicbrainz.rs create mode 100644 import/src/plugins/acoustid.rs create mode 100644 import/src/plugins/infojson.rs create mode 100644 import/src/plugins/media_info.rs create mode 100644 import/src/plugins/misc.rs create mode 100644 import/src/plugins/mod.rs create mode 100644 import/src/plugins/musicbrainz.rs create mode 100644 import/src/plugins/tags.rs create mode 100644 import/src/plugins/tmdb.rs create mode 100644 import/src/plugins/trakt.rs create mode 100644 import/src/plugins/vgmdb.rs create mode 100644 import/src/plugins/wikidata.rs create mode 100644 import/src/plugins/wikimedia_commons.rs delete mode 100644 import/src/tmdb.rs delete mode 100644 import/src/trakt.rs delete mode 100644 import/src/vgmdb.rs delete mode 100644 import/src/wikidata.rs delete mode 100644 import/src/wikimedia_commons.rs (limited to 'import') diff --git a/import/Cargo.toml b/import/Cargo.toml index 4276768..42c1d43 100644 --- a/import/Cargo.toml +++ b/import/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "jellyimport" version = "0.1.0" -edition = "2021" +edition = "2024" [dependencies] jellyremuxer = { path = "../remuxer" } diff --git a/import/src/acoustid.rs b/import/src/acoustid.rs deleted file mode 100644 index 01adb57..0000000 --- a/import/src/acoustid.rs +++ /dev/null @@ -1,154 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin -*/ -use crate::USER_AGENT; -use anyhow::{Context, Result}; -use jellycache::{cache_memory, HashKey}; -use log::info; -use reqwest::{ - header::{HeaderMap, HeaderName, HeaderValue}, - Client, ClientBuilder, -}; -use serde::{Deserialize, Serialize}; -use std::{ - io::Read, - path::Path, - process::{Command, Stdio}, - sync::Arc, - time::Duration, -}; -use tokio::{ - runtime::Handle, - sync::Semaphore, - time::{sleep_until, Instant}, -}; - -pub(crate) struct AcoustID { - client: Client, - key: String, - rate_limit: Arc, -} - -#[derive(Debug, Hash, Clone, Serialize, Deserialize)] -pub(crate) struct Fingerprint { - duration: u32, - fingerprint: String, -} - -#[derive(Debug, Serialize, Deserialize)] -pub(crate) struct FpCalcOutput { - duration: f32, - fingerprint: String, -} - -#[derive(Serialize, Deserialize)] -pub(crate) struct AcoustIDLookupResultRecording { - id: String, -} -#[derive(Serialize, Deserialize)] -pub(crate) struct AcoustIDLookupResult { - id: String, - score: f32, - #[serde(default)] - recordings: Vec, -} -#[derive(Serialize, Deserialize)] -pub(crate) struct AcoustIDLookupResponse { - status: String, - results: Vec, -} - -impl AcoustID { - pub fn new(api_key: &str) -> Self { - let client = ClientBuilder::new() - .default_headers(HeaderMap::from_iter([ - ( - HeaderName::from_static("accept"), - HeaderValue::from_static("application/json"), - ), - ( - HeaderName::from_static("user-agent"), - HeaderValue::from_static(USER_AGENT), - ), - ])) - .build() - .unwrap(); - Self { - client, - // send at most 3 req/s according to acoustid docs, each lock is therefore held for 1s - // this implementation also never sends more than 3 requests in-flight. - rate_limit: Arc::new(Semaphore::new(3)), - key: api_key.to_owned(), - } - } - - pub fn get_atid_mbid(&self, fp: &Fingerprint, rt: &Handle) -> Result> { - let res = self.lookup(fp.to_owned(), rt)?; - for r in &res.results { - if let Some(k) = r.recordings.first() { - return Ok(Some((r.id.clone(), k.id.clone()))); - } - } - Ok(None) - } - - pub fn lookup(&self, fp: Fingerprint, rt: &Handle) -> Result> { - cache_memory(&format!("ext/acoustid/{}.json", HashKey(&fp)) , move || rt.block_on(async { - let _permit = self.rate_limit.clone().acquire_owned().await?; - let permit_drop_ts = Instant::now() + Duration::SECOND; - info!("acoustid lookup"); - - let duration = fp.duration; - let fingerprint = fp.fingerprint.replace("=", "%3D"); - let client = &self.key; - let body = format!("format=json&meta=recordingids&client={client}&duration={duration}&fingerprint={fingerprint}"); - - let resp = self - .client - .post("https://api.acoustid.org/v2/lookup".to_string()) - .header("Content-Type", "application/x-www-form-urlencoded") - .body(body) - .send() - .await?.error_for_status()?.json::().await?; - - tokio::task::spawn(async move { - sleep_until(permit_drop_ts).await; - drop(_permit); - }); - - Ok(resp) - })) - .context("acoustid lookup") - } -} - -pub(crate) fn acoustid_fingerprint(path: &Path) -> Result> { - cache_memory( - &format!("media/chromaprint/{}.json", HashKey(path)), - move || { - let child = Command::new("fpcalc") - .arg("-json") - .arg(path) - .stdout(Stdio::piped()) - .spawn() - .context("fpcalc")?; - - let mut buf = Vec::new(); - child - .stdout - .unwrap() - .read_to_end(&mut buf) - .context("read fpcalc output")?; - - let out: FpCalcOutput = - serde_json::from_slice(&buf).context("parsing fpcalc output")?; - let out = Fingerprint { - duration: out.duration as u32, - fingerprint: out.fingerprint, - }; - Ok(out) - }, - ) -} diff --git a/import/src/infojson.rs b/import/src/infojson.rs deleted file mode 100644 index ada6c3a..0000000 --- a/import/src/infojson.rs +++ /dev/null @@ -1,146 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin -*/ -use anyhow::Context; -use jellycommon::chrono::{format::Parsed, Utc}; -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct YVideo { - pub id: String, - pub title: String, - pub alt_title: Option, - pub formats: Option>, - pub thumbnails: Option>, - pub thumbnail: Option, - pub description: Option, - pub channel_id: Option, - pub duration: Option, - pub view_count: Option, - pub average_rating: Option, - pub age_limit: Option, - pub webpage_url: String, - pub categories: Option>, - pub tags: Option>, - pub playable_in_embed: Option, - pub aspect_ratio: Option, - pub width: Option, - pub height: Option, - pub automatic_captions: Option>>, - pub comment_count: Option, - pub chapters: Option>, - pub heatmap: Option>, - pub like_count: Option, - pub channel: Option, - pub channel_follower_count: Option, - pub channel_is_verified: Option, - pub uploader: Option, - pub uploader_id: Option, - pub uploader_url: Option, - pub upload_date: Option, - pub availability: Option, // "public" | "private" | "unlisted", - pub original_url: Option, - pub webpage_url_basename: String, - pub webpage_url_domain: String, - pub extractor: String, - pub extractor_key: String, - pub playlist_count: Option, - pub playlist: Option, - pub playlist_id: Option, - pub playlist_title: Option, - pub playlist_uploader: Option, - pub playlist_uploader_id: Option, - pub n_entries: Option, - pub playlist_index: Option, - pub display_id: Option, - pub fulltitle: Option, - pub duration_string: Option, - pub is_live: Option, - pub was_live: Option, - pub epoch: usize, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct YCaption { - pub url: Option, - pub ext: String, //"vtt" | "json3" | "srv1" | "srv2" | "srv3" | "ttml", - pub protocol: Option, - pub name: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct YFormat { - pub format_id: String, - pub format_note: Option, - pub ext: String, - pub protocol: String, - pub acodec: Option, - pub vcodec: Option, - pub url: Option, - pub width: Option, - pub height: Option, - pub fps: Option, - pub columns: Option, - pub fragments: Option>, - pub resolution: Option, - pub dynamic_range: Option, - pub aspect_ratio: Option, - pub http_headers: HashMap, - pub audio_ext: String, - pub video_ext: String, - pub vbr: Option, - pub abr: Option, - pub format: String, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct YFragment { - pub url: Option, - pub duration: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct YThumbnail { - pub url: String, - pub preference: Option, - pub id: String, - pub height: Option, - pub width: Option, - pub resolution: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct YChapter { - pub start_time: f64, - pub end_time: f64, - pub title: String, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct YHeatmapSample { - pub start_time: f64, - pub end_time: f64, - pub value: f64, -} - -pub fn parse_upload_date(d: &str) -> anyhow::Result { - let (year, month, day) = (&d[0..4], &d[4..6], &d[6..8]); - let (year, month, day) = ( - year.parse().context("parsing year")?, - month.parse().context("parsing month")?, - day.parse().context("parsing day")?, - ); - - let mut p = Parsed::new(); - p.year = Some(year); - p.month = Some(month); - p.day = Some(day); - p.hour_div_12 = Some(0); - p.hour_mod_12 = Some(0); - p.minute = Some(0); - p.second = Some(0); - Ok(p.to_datetime_with_timezone(&Utc)?.timestamp_millis()) -} diff --git a/import/src/lib.rs b/import/src/lib.rs index e31127e..36c65d3 100644 --- a/import/src/lib.rs +++ b/import/src/lib.rs @@ -5,54 +5,47 @@ */ #![feature(duration_constants)] -pub mod acoustid; -pub mod infojson; -pub mod musicbrainz; -pub mod tmdb; -pub mod trakt; -pub mod vgmdb; -pub mod wikidata; -pub mod wikimedia_commons; - -use jellydb::Database; - -use crate::{tmdb::TmdbKind, trakt::TraktKind}; -use acoustid::{acoustid_fingerprint, AcoustID}; -use anyhow::{anyhow, bail, Context, Result}; -use infojson::YVideo; -use jellycache::{cache_memory, cache_read, cache_store, HashKey}; +pub mod plugins; + +use crate::plugins::{ + acoustid::AcoustID, + infojson::is_info_json, + misc::is_cover, + musicbrainz::{self, MusicBrainz}, + tmdb::{self, Tmdb, TmdbKind}, + trakt::{Trakt, TraktKind}, + vgmdb::Vgmdb, + wikidata::Wikidata, + wikimedia_commons::WikimediaCommons, +}; +use anyhow::{Context, Result, anyhow}; +use jellycache::{HashKey, cache_memory, cache_store}; use jellycommon::{ - Appearance, Asset, Chapter, CreditCategory, IdentifierType, MediaInfo, Node, NodeID, NodeKind, - PictureSlot, RatingType, SourceTrack, SourceTrackKind, TrackSource, Visibility, + Appearance, Asset, CreditCategory, IdentifierType, Node, NodeID, NodeKind, PictureSlot, + RatingType, Visibility, }; +use jellydb::Database; use jellyimport_fallback_generator::generate_fallback; use jellyremuxer::{ demuxers::create_demuxer_autodetect, - matroska::{self, Segment}, + matroska::{self, AttachedFile, Segment}, }; use log::info; -use musicbrainz::MusicBrainz; -use rayon::iter::{ParallelBridge, ParallelIterator}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; use regex::Regex; use serde::{Deserialize, Serialize}; use std::{ - collections::{BTreeMap, HashMap}, - fs::{read_to_string, File}, - io::{BufReader, Read}, + collections::BTreeMap, + fs::{File, read_to_string}, path::{Path, PathBuf}, sync::{Arc, LazyLock, Mutex}, time::UNIX_EPOCH, }; -use tmdb::Tmdb; use tokio::{ runtime::Handle, sync::{RwLock, Semaphore}, task::spawn_blocking, }; -use trakt::Trakt; -use vgmdb::Vgmdb; -use wikidata::Wikidata; -use wikimedia_commons::WikimediaCommons; #[rustfmt::skip] #[derive(Debug, Deserialize, Serialize, Default)] @@ -89,6 +82,7 @@ pub const USER_AGENT: &str = concat!( static IMPORT_SEM: LazyLock = LazyLock::new(|| Semaphore::new(1)); pub static IMPORT_ERRORS: RwLock> = RwLock::const_new(Vec::new()); +pub static IMPORT_PROGRESS: RwLock> = RwLock::const_new(None); static RE_EPISODE_FILENAME: LazyLock = LazyLock::new(|| Regex::new(r#"([sS](?\d+))?([eE](?\d+))( (.+))?"#).unwrap()); @@ -117,7 +111,7 @@ pub fn get_trakt() -> Result { } pub async fn import_wrap(db: Database, incremental: bool) -> Result<()> { - let _sem = IMPORT_SEM.try_acquire()?; + let _sem = IMPORT_SEM.try_acquire().context("already importing")?; let jh = spawn_blocking(move || { *IMPORT_ERRORS.blocking_write() = Vec::new(); @@ -144,11 +138,10 @@ fn import(db: &Database, incremental: bool) -> Result<()> { let rthandle = Handle::current(); + let mut files = Vec::new(); import_traverse( &CONF.media_path, db, - &apis, - &rthandle, incremental, NodeID::MIN, "", @@ -156,8 +149,17 @@ fn import(db: &Database, incremental: bool) -> Result<()> { visibility: Visibility::Visible, use_acoustid: false, }, + &mut files, )?; + files.into_par_iter().for_each(|(path, parent, iflags)| { + import_file(db, &apis, &rthandle, &path, parent, iflags); + }); + + // let meta = path.metadata()?; + // let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs(); + // db.set_import_file_mtime(path, mtime)?; + Ok(()) } @@ -170,12 +172,11 @@ struct InheritedFlags { fn import_traverse( path: &Path, db: &Database, - apis: &Apis, - rthandle: &Handle, incremental: bool, parent: NodeID, parent_slug_fragment: &str, mut iflags: InheritedFlags, + out: &mut Vec<(PathBuf, NodeID, InheritedFlags)>, ) -> Result<()> { if path.is_dir() { let slug_fragment = if path == CONF.media_path { @@ -211,26 +212,18 @@ fn import_traverse( Ok(()) })?; - path.read_dir()?.par_bridge().try_for_each(|e| { + for e in path.read_dir()? { let path = e?.path(); - if let Err(e) = import_traverse( - &path, - db, - apis, - rthandle, - incremental, - id, - &slug_fragment, - iflags, - ) { + if let Err(e) = import_traverse(&path, db, incremental, id, &slug_fragment, iflags, out) + { IMPORT_ERRORS .blocking_write() .push(format!("{path:?} import failed: {e:#}")); } - Ok::<_, anyhow::Error>(()) - })?; + } return Ok(()); } + if path.is_file() { let meta = path.metadata()?; let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs(); @@ -243,8 +236,7 @@ fn import_traverse( } } - import_file(db, apis, rthandle, path, parent, iflags)?; - db.set_import_file_mtime(path, mtime)?; + out.push((path.to_owned(), parent, iflags)); } Ok(()) } @@ -259,36 +251,6 @@ fn import_file( ) -> Result<()> { let filename = path.file_name().unwrap().to_string_lossy(); match filename.as_ref() { - "poster.jpeg" | "poster.webp" | "poster.png" => { - info!("import poster at {path:?}"); - let asset = Asset(cache_store( - format!("media/literal/{}-poster.image", HashKey(path)), - || { - let mut data = Vec::new(); - File::open(path)?.read_to_end(&mut data)?; - Ok(data) - }, - )?); - db.update_node_init(parent, |node| { - node.pictures.insert(PictureSlot::Cover, asset); - Ok(()) - })?; - } - "backdrop.jpeg" | "backdrop.webp" | "backdrop.png" => { - info!("import backdrop at {path:?}"); - let asset = Asset(cache_store( - format!("media/literal/{}-poster.image", HashKey(path)), - || { - let mut data = Vec::new(); - File::open(path)?.read_to_end(&mut data)?; - Ok(data) - }, - )?); - db.update_node_init(parent, |node| { - node.pictures.insert(PictureSlot::Backdrop, asset); - Ok(()) - })?; - } "node.yaml" => { info!("import node info at {path:?}"); let data = serde_yaml::from_str::(&read_to_string(path)?)?; @@ -330,29 +292,6 @@ fn import_file( })?; } } - "channel.info.json" => { - info!("import channel info.json at {path:?}"); - let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?; - db.update_node_init(parent, |node| { - node.kind = NodeKind::Channel; - node.title = Some(clean_uploader_name(&data.title).to_owned()); - if let Some(cid) = data.channel_id { - node.identifiers.insert(IdentifierType::YoutubeChannel, cid); - } - if let Some(uid) = data.uploader_id { - node.identifiers - .insert(IdentifierType::YoutubeChannelHandle, uid); - } - if let Some(desc) = data.description { - node.description = Some(desc); - } - if let Some(followers) = data.channel_follower_count { - node.ratings - .insert(RatingType::YoutubeFollowers, followers as f64); - } - Ok(()) - })?; - } _ => import_media_file(db, apis, rthandle, path, parent, iflags).context("media file")?, } @@ -376,7 +315,7 @@ pub fn read_media_metadata(path: &Path) -> Result> { // Replace data of useful attachments with cache key; delete data of all others if let Some(attachments) = &mut attachments { for att in &mut attachments.files { - if let Some(fname) = attachment_types::is_useful_attachment(&att) { + if let Some(fname) = is_useful_attachment(&att) { let key = cache_store( format!("media/attachment/{}-{fname}", HashKey(path)), || Ok(att.data.clone()), @@ -400,22 +339,11 @@ pub fn read_media_metadata(path: &Path) -> Result> { ) } -mod attachment_types { - use jellyremuxer::matroska::AttachedFile; - - pub fn is_useful_attachment(a: &AttachedFile) -> Option<&'static str> { - match a { - _ if is_info_json(&a) => Some("info.json"), - _ if is_cover(&a) => Some("cover.image"), - _ => None, - } - } - - pub fn is_info_json(a: &&AttachedFile) -> bool { - a.name == "info.json" && a.media_type == "application/json" - } - pub fn is_cover(a: &&AttachedFile) -> bool { - a.name.starts_with("cover") && a.media_type.starts_with("image/") +pub fn is_useful_attachment(a: &AttachedFile) -> Option<&'static str> { + match a { + _ if is_info_json(&a) => Some("info.json"), + _ if is_cover(&a) => Some("cover.image"), + _ => None, } } @@ -430,38 +358,6 @@ fn import_media_file( info!("media file {path:?}"); let m = read_media_metadata(path)?; - let infojson = m - .attachments - .iter() - .flat_map(|a| &a.files) - .find(attachment_types::is_info_json) - .map(|att| { - let data = cache_read(str::from_utf8(&att.data).unwrap())? - .ok_or(anyhow!("info json cache missing"))?; - anyhow::Ok(serde_json::from_slice::(&data)?) - }) - .transpose() - .context("infojson parsing")?; - - let cover = m - .attachments - .iter() - .flat_map(|a| &a.files) - .find(attachment_types::is_cover) - .map(|att| Asset(att.data.clone().try_into().unwrap())); - - let mut tags = m - .tags - .first() - .map(|tags| { - tags.tags - .iter() - .flat_map(|t| t.simple_tags.clone()) - .map(|st| (st.name, st.string.unwrap_or_default())) - .collect::>() - }) - .unwrap_or_default(); - let filename = path.file_name().unwrap().to_string_lossy().to_string(); let mut episode_index = None; @@ -480,215 +376,28 @@ fn import_media_file( let mut filename_toks = filename.split("."); let filepath_stem = filename_toks.next().unwrap(); - let slug = infojson - .as_ref() - // TODO maybe also include the slug after the primary "id" key - .map(|ij| format!("{}-{}", ij.extractor.to_lowercase(), ij.id)) - .unwrap_or_else(|| { - if let Some((s, e)) = episode_index { - format!( - "{}-s{s}e{e}", - make_kebab( - &path - .parent() - .unwrap() - .file_name() - .unwrap_or_default() - .to_string_lossy() - ) - ) - } else { - make_kebab(filepath_stem) - } - }); + let slug = if let Some((s, e)) = episode_index { + format!( + "{}-s{s}e{e}", + make_kebab( + &path + .parent() + .unwrap() + .file_name() + .unwrap_or_default() + .to_string_lossy() + ) + ) + } else { + make_kebab(filepath_stem) + }; let node = NodeID::from_slug(&slug); - let meta = path.metadata()?; - let mut eids = BTreeMap::::new(); - - for (key, value) in &tags { - match key.as_str() { - "MUSICBRAINZ_TRACKID" => { - eids.insert(IdentifierType::MusicbrainzRecording, value.to_owned()) - } - "MUSICBRAINZ_ARTISTID" => { - eids.insert(IdentifierType::MusicbrainzArtist, value.to_owned()) - } - "MUSICBRAINZ_ALBUMID" => { - eids.insert(IdentifierType::MusicbrainzRelease, value.to_owned()) - } - "MUSICBRAINZ_ALBUMARTISTID" => { - None //? ignore this? - } - "MUSICBRAINZ_RELEASEGROUPID" => { - eids.insert(IdentifierType::MusicbrainzReleaseGroup, value.to_owned()) - } - "ISRC" => eids.insert(IdentifierType::Isrc, value.to_owned()), - "BARCODE" => eids.insert(IdentifierType::Barcode, value.to_owned()), - _ => None, - }; - } - - if iflags.use_acoustid { - let fp = acoustid_fingerprint(path)?; - if let Some((atid, mbid)) = apis - .acoustid - .as_ref() - .ok_or(anyhow!("need acoustid"))? - .get_atid_mbid(&fp, rthandle)? - { - eids.insert(IdentifierType::AcoustIdTrack, atid); - eids.insert(IdentifierType::MusicbrainzRecording, mbid); - }; - } - - let mbrec = eids.get(&IdentifierType::MusicbrainzRecording).cloned(); - db.update_node_init(node, |node| { node.slug = slug; - node.title = m.info.title.clone().or(node.title.clone()); node.visibility = iflags.visibility; - - node.description = tags - .remove("DESCRIPTION") - .or(tags.remove("SYNOPSIS")) - .or(node.description.clone()); - node.tagline = tags.remove("COMMENT").or(node.tagline.clone()); node.parents.insert(parent); - - node.identifiers.extend(eids); - - if let Some(cover) = cover { - node.pictures.insert(PictureSlot::Cover, cover); - } - - if let Some(ct) = tags.get("CONTENT_TYPE") { - node.kind = match ct.to_lowercase().trim() { - "movie" | "documentary" | "film" => NodeKind::Movie, - "music" | "recording" => NodeKind::Music, - _ => NodeKind::Unknown, - } - } - - let tracks = m - .tracks - .as_ref() - .ok_or(anyhow!("no tracks"))? - .entries - .iter() - .map(|track| SourceTrack { - codec: track.codec_id.clone(), - language: track.language.clone(), - name: track.name.clone().unwrap_or_default(), - federated: Vec::new(), - kind: if let Some(video) = &track.video { - SourceTrackKind::Video { - width: video.pixel_width, - height: video.pixel_height, - fps: video.frame_rate, - } - } else if let Some(audio) = &track.audio { - SourceTrackKind::Audio { - channels: audio.channels as usize, - sample_rate: audio.sampling_frequency, - bit_depth: audio.bit_depth.map(|r| r as usize), - } - } else { - SourceTrackKind::Subtitle - }, - source: TrackSource::Local(path.to_owned(), track.track_number), - }) - .collect::>(); - - if let Some(infojson) = infojson { - node.kind = if !tracks - .iter() - .any(|t| matches!(t.kind, SourceTrackKind::Video { .. })) - { - NodeKind::Music - } else if infojson.duration.unwrap_or(0.) < 600. - && infojson.aspect_ratio.unwrap_or(2.) < 1. - { - NodeKind::ShortFormVideo - } else { - NodeKind::Video - }; - node.title = Some(infojson.title); - node.subtitle = if infojson.alt_title != node.title { - infojson.alt_title - } else { - None - } - .or(infojson - .uploader - .as_ref() - .map(|u| clean_uploader_name(u).to_owned())) - .or(node.subtitle.clone()); - - node.tags.extend(infojson.tags.unwrap_or_default()); - - if let Some(desc) = infojson.description { - node.description = Some(desc) - } - node.tagline = Some(infojson.webpage_url); - if let Some(date) = &infojson.upload_date { - node.release_date = - Some(infojson::parse_upload_date(date).context("parsing upload date")?); - } - match infojson.extractor.as_str() { - "youtube" => { - node.identifiers - .insert(IdentifierType::YoutubeVideo, infojson.id); - node.ratings.insert( - RatingType::YoutubeViews, - infojson.view_count.unwrap_or_default() as f64, - ); - if let Some(lc) = infojson.like_count { - node.ratings.insert(RatingType::YoutubeLikes, lc as f64); - } - } - "Bandcamp" => drop( - node.identifiers - .insert(IdentifierType::Bandcamp, infojson.id), - ), - _ => (), - } - } - - // TODO merge size - node.storage_size = meta.len(); - // TODO merge tracks - node.media = Some(MediaInfo { - chapters: m - .chapters - .clone() - .map(|c| { - let mut chaps = Vec::new(); - if let Some(ee) = c.edition_entries.first() { - for ca in &ee.chapter_atoms { - let mut labels = Vec::new(); - for cd in &ca.displays { - for lang in &cd.languages { - labels.push((lang.to_owned(), cd.string.clone())) - } - } - chaps.push(Chapter { - labels, - time_start: Some(ca.time_start as f64 * 1e-9), - time_end: ca.time_end.map(|ts| ts as f64 * 1e-9), - }) - } - } - chaps - }) - .unwrap_or_default(), - duration: fix_invalid_runtime( - m.info.duration.unwrap_or_default() * m.info.timestamp_scale as f64 * 1e-9, - ), - tracks, - }); - Ok(()) })?; @@ -741,9 +450,6 @@ fn import_media_file( for tok in filename_toks { apply_node_flag(db, rthandle, apis, node, tok)?; } - if let Some(mbid) = mbrec { - apply_musicbrainz_recording(db, rthandle, apis, node, mbid)?; - } Ok(()) } @@ -755,48 +461,6 @@ fn apply_node_flag( node: NodeID, flag: &str, ) -> Result<()> { - if let Some(value) = flag.strip_prefix("trakt-").or(flag.strip_prefix("trakt=")) { - let (kind, id) = value.split_once(":").unwrap_or(("", value)); - let kind = match kind { - "movie" | "" => TraktKind::Movie, - "show" => TraktKind::Show, - "season" => TraktKind::Season, - "episode" => TraktKind::Episode, - _ => bail!("unknown trakt kind"), - }; - apply_trakt_tmdb(db, rthandle, apis, node, kind, id)?; - } - if flag == "hidden" { - db.update_node_init(node, |node| { - node.visibility = node.visibility.min(Visibility::Hidden); - Ok(()) - })?; - } - if flag == "reduced" { - db.update_node_init(node, |node| { - node.visibility = node.visibility.min(Visibility::Reduced); - Ok(()) - })?; - } - if let Some(kind) = flag.strip_prefix("kind-").or(flag.strip_prefix("kind=")) { - let kind = match kind { - "movie" => NodeKind::Movie, - "video" => NodeKind::Video, - "music" => NodeKind::Music, - "short_form_video" => NodeKind::ShortFormVideo, - "collection" => NodeKind::Collection, - "channel" => NodeKind::Channel, - "show" => NodeKind::Show, - "series" => NodeKind::Series, - "season" => NodeKind::Season, - "episode" => NodeKind::Episode, - _ => bail!("unknown node kind"), - }; - db.update_node_init(node, |node| { - node.kind = kind; - Ok(()) - })?; - } if let Some(mbid) = flag.strip_prefix("mbrec-").or(flag.strip_prefix("mbrec=")) { apply_musicbrainz_recording(db, rthandle, apis, node, mbid.to_string())? } @@ -1016,18 +680,3 @@ fn make_kebab(i: &str) -> String { } o } - -fn clean_uploader_name(mut s: &str) -> &str { - s = s.strip_suffix(" - Videos").unwrap_or(s); - s = s.strip_suffix(" - Topic").unwrap_or(s); - s = s.strip_prefix("Uploads from ").unwrap_or(s); - s -} - -fn fix_invalid_runtime(d: f64) -> f64 { - match d { - // Broken durations found experimentally - 359999.999 | 359999.000 | 86399.999 | 86399.99900000001 => 0., - x => x, - } -} diff --git a/import/src/musicbrainz.rs b/import/src/musicbrainz.rs deleted file mode 100644 index fe86175..0000000 --- a/import/src/musicbrainz.rs +++ /dev/null @@ -1,318 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin -*/ - -use crate::USER_AGENT; -use anyhow::{Context, Result}; -use jellycache::cache_memory; -use log::info; -use reqwest::{ - header::{HeaderMap, HeaderName, HeaderValue}, - Client, ClientBuilder, -}; -use serde::{Deserialize, Serialize}; -use std::{collections::BTreeMap, sync::Arc, time::Duration}; -use tokio::{ - runtime::Handle, - sync::Semaphore, - time::{sleep_until, Instant}, -}; - -pub mod reltypes { - pub const MUSIC_VIDEO: &str = "ce3de655-7451-44d1-9224-87eb948c205d"; - pub const INSTRUMENTAL: &str = "9fc01a58-7801-4bd2-b07d-61cc7ffacf90"; - pub const VOCAL: &str = "0fdbe3c6-7700-4a31-ae54-b53f06ae1cfa"; - pub const RECORDING: &str = "a01ee869-80a8-45ef-9447-c59e91aa7926"; - pub const PROGRAMMING: &str = "36c50022-44e0-488d-994b-33f11d20301e"; - pub const PRODUCER: &str = "5c0ceac3-feb4-41f0-868d-dc06f6e27fc0"; - pub const ARTIST: &str = "5c0ceac3-feb4-41f0-868d-dc06f6e27fc0"; - pub const PHONOGRAPHIC_COPYRIGHT: &str = "7fd5fbc0-fbf4-4d04-be23-417d50a4dc30"; - pub const MIX: &str = "3e3102e1-1896-4f50-b5b2-dd9824e46efe"; - pub const INSTRUMENT: &str = "59054b12-01ac-43ee-a618-285fd397e461"; - pub const WIKIDATA: &str = "689870a4-a1e4-4912-b17f-7b2664215698"; - pub const VGMDB: &str = "0af15ab3-c615-46d6-b95b-a5fcd2a92ed9"; -} - -pub struct MusicBrainz { - client: Client, - rate_limit: Arc, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbRecordingRel { - pub id: String, - pub first_release_date: Option, - pub title: String, - pub isrcs: Vec, - pub video: bool, - pub disambiguation: String, - pub length: Option, - pub relations: Vec, - pub artist_credit: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbArtistRel { - pub id: String, - pub isnis: Vec, - pub ipis: Vec, - pub name: String, - pub disambiguation: String, - pub country: Option, - pub sort_name: String, - pub gender_id: Option, - pub area: Option, - pub begin_area: Option, - pub end_area: Option, - pub life_span: MbTimespan, - pub relations: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbArtistCredit { - pub name: String, - pub artist: MbArtist, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbRelation { - pub direction: String, - pub r#type: String, - pub type_id: String, - pub begin: Option, - pub end: Option, - pub ended: bool, - pub target_type: String, - pub target_credit: String, - pub source_credit: String, - pub attributes: Vec, - pub attribute_ids: BTreeMap, - pub attribute_values: BTreeMap, - - pub work: Option, - pub artist: Option, - pub url: Option, - pub recording: Option, - pub series: Option, - pub event: Option, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbSeries { - pub id: String, - pub r#type: Option, - pub type_id: Option, - pub name: String, - pub disambiguation: String, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbRecording { - pub id: String, - pub title: String, - #[serde(default)] - pub isrcs: Vec, - pub video: bool, - pub disambiguation: String, - pub length: Option, - #[serde(default)] - pub artist_credit: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbWork { - pub id: String, - pub r#type: Option, - pub type_id: Option, - pub languages: Vec, - pub iswcs: Vec, - pub language: Option, - pub title: String, - pub attributes: Vec, - pub disambiguation: String, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbEvent { - pub id: String, - pub r#type: Option, - pub type_id: Option, - pub name: String, - pub time: String, - pub cancelled: bool, - pub setlist: String, - pub life_span: MbTimespan, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbArtist { - pub id: String, - pub r#type: Option, - pub type_id: Option, - pub name: String, - pub disambiguation: String, - pub country: Option, - pub sort_name: String, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbTimespan { - pub begin: Option, - pub end: Option, - pub ended: bool, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbArea { - pub name: String, - pub sort_name: String, - #[serde(default)] - pub iso_3166_1_codes: Vec, - pub id: String, - pub disambiguation: String, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct MbUrl { - pub id: String, - pub resource: String, -} - -impl Default for MusicBrainz { - fn default() -> Self { - Self::new() - } -} - -impl MusicBrainz { - const MAX_PAR_REQ: usize = 4; - pub fn new() -> Self { - let client = ClientBuilder::new() - .default_headers(HeaderMap::from_iter([ - ( - HeaderName::from_static("accept"), - HeaderValue::from_static("application/json"), - ), - ( - HeaderName::from_static("user-agent"), - HeaderValue::from_static(USER_AGENT), - ), - ])) - .build() - .unwrap(); - Self { - client, - // send at most 1 req/s according to musicbrainz docs, each lock is held for 10s - // this implementation also never sends more than MAX_PAR_REQ requests in-flight. - rate_limit: Arc::new(Semaphore::new(Self::MAX_PAR_REQ)), - } - } - - pub fn lookup_recording(&self, id: String, rt: &Handle) -> Result> { - cache_memory(&format!("ext/musicbrainz/recording/{id}.json"), move || { - rt.block_on(async { - let _permit = self.rate_limit.clone().acquire_owned().await?; - let permit_drop_ts = Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64); - info!("recording lookup: {id}"); - - let inc = [ - "isrcs", - "artists", - "area-rels", - "artist-rels", - "event-rels", - "genre-rels", - "instrument-rels", - "label-rels", - "place-rels", - "recording-rels", - "release-rels", - "release-group-rels", - "series-rels", - "url-rels", - "work-rels", - ] - .join("+"); - - let resp = self - .client - .get(format!( - "https://musicbrainz.org/ws/2/recording/{id}?inc={inc}" - )) - .send() - .await? - .error_for_status()? - .json::() - .await?; - - tokio::task::spawn(async move { - sleep_until(permit_drop_ts).await; - drop(_permit); - }); - - Ok(resp) - }) - }) - .context("musicbrainz recording lookup") - } - - pub fn lookup_artist(&self, id: String, rt: &Handle) -> Result> { - cache_memory(&format!("ext/musicbrainz/artist/{id}.json"), move || { - rt.block_on(async { - let _permit = self.rate_limit.clone().acquire_owned().await?; - let permit_drop_ts = Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64); - info!("artist lookup: {id}"); - - let inc = [ - "area-rels", - "artist-rels", - "event-rels", - "genre-rels", - "instrument-rels", - "label-rels", - "place-rels", - "recording-rels", - "release-rels", - "release-group-rels", - "series-rels", - "url-rels", - "work-rels", - ] - .join("+"); - - let resp = self - .client - .get(format!( - "https://musicbrainz.org/ws/2/artist/{id}?inc={inc}" - )) - .send() - .await? - .error_for_status()? - .json::() - .await?; - - tokio::task::spawn(async move { - sleep_until(permit_drop_ts).await; - drop(_permit); - }); - - Ok(resp) - }) - }) - .context("musicbrainz artist lookup") - } -} diff --git a/import/src/plugins/acoustid.rs b/import/src/plugins/acoustid.rs new file mode 100644 index 0000000..154b0a2 --- /dev/null +++ b/import/src/plugins/acoustid.rs @@ -0,0 +1,174 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ +use crate::{ + USER_AGENT, + plugins::{ImportContext, ImportPlugin}, +}; +use anyhow::{Context, Result}; +use jellycache::{HashKey, cache_memory}; +use jellycommon::{IdentifierType, NodeID}; +use jellyremuxer::matroska::Segment; +use log::info; +use reqwest::{ + Client, ClientBuilder, + header::{HeaderMap, HeaderName, HeaderValue}, +}; +use serde::{Deserialize, Serialize}; +use std::{ + io::Read, + path::Path, + process::{Command, Stdio}, + sync::Arc, + time::Duration, +}; +use tokio::{ + runtime::Handle, + sync::Semaphore, + time::{Instant, sleep_until}, +}; + +pub(crate) struct AcoustID { + client: Client, + key: String, + rate_limit: Arc, +} + +#[derive(Debug, Hash, Clone, Serialize, Deserialize)] +pub(crate) struct Fingerprint { + duration: u32, + fingerprint: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub(crate) struct FpCalcOutput { + duration: f32, + fingerprint: String, +} + +#[derive(Serialize, Deserialize)] +pub(crate) struct AcoustIDLookupResultRecording { + id: String, +} +#[derive(Serialize, Deserialize)] +pub(crate) struct AcoustIDLookupResult { + id: String, + score: f32, + #[serde(default)] + recordings: Vec, +} +#[derive(Serialize, Deserialize)] +pub(crate) struct AcoustIDLookupResponse { + status: String, + results: Vec, +} + +impl AcoustID { + pub fn new(api_key: &str) -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("accept"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + Self { + client, + // send at most 3 req/s according to acoustid docs, each lock is therefore held for 1s + // this implementation also never sends more than 3 requests in-flight. + rate_limit: Arc::new(Semaphore::new(3)), + key: api_key.to_owned(), + } + } + + pub fn get_atid_mbid(&self, fp: &Fingerprint, rt: &Handle) -> Result> { + let res = self.lookup(fp.to_owned(), rt)?; + for r in &res.results { + if let Some(k) = r.recordings.first() { + return Ok(Some((r.id.clone(), k.id.clone()))); + } + } + Ok(None) + } + + pub fn lookup(&self, fp: Fingerprint, rt: &Handle) -> Result> { + cache_memory(&format!("ext/acoustid/{}.json", HashKey(&fp)) , move || rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::SECOND; + info!("acoustid lookup"); + + let duration = fp.duration; + let fingerprint = fp.fingerprint.replace("=", "%3D"); + let client = &self.key; + let body = format!("format=json&meta=recordingids&client={client}&duration={duration}&fingerprint={fingerprint}"); + + let resp = self + .client + .post("https://api.acoustid.org/v2/lookup".to_string()) + .header("Content-Type", "application/x-www-form-urlencoded") + .body(body) + .send() + .await?.error_for_status()?.json::().await?; + + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); + + Ok(resp) + })) + .context("acoustid lookup") + } +} + +pub(crate) fn acoustid_fingerprint(path: &Path) -> Result> { + cache_memory( + &format!("media/chromaprint/{}.json", HashKey(path)), + move || { + let child = Command::new("fpcalc") + .arg("-json") + .arg(path) + .stdout(Stdio::piped()) + .spawn() + .context("fpcalc")?; + + let mut buf = Vec::new(); + child + .stdout + .unwrap() + .read_to_end(&mut buf) + .context("read fpcalc output")?; + + let out: FpCalcOutput = + serde_json::from_slice(&buf).context("parsing fpcalc output")?; + let out = Fingerprint { + duration: out.duration as u32, + fingerprint: out.fingerprint, + }; + Ok(out) + }, + ) +} + +impl ImportPlugin for AcoustID { + fn media(&self, ct: &ImportContext, node: NodeID, path: &Path, _seg: &Segment) -> Result<()> { + let fp = acoustid_fingerprint(path)?; + if let Some((atid, mbid)) = self.get_atid_mbid(&fp, &ct.rt)? { + ct.db.update_node_init(node, |n| { + n.identifiers.insert(IdentifierType::AcoustIdTrack, atid); + n.identifiers + .insert(IdentifierType::MusicbrainzRecording, mbid); + Ok(()) + })?; + }; + Ok(()) + } +} diff --git a/import/src/plugins/infojson.rs b/import/src/plugins/infojson.rs new file mode 100644 index 0000000..4dceeb8 --- /dev/null +++ b/import/src/plugins/infojson.rs @@ -0,0 +1,272 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ +use anyhow::{Context, Result, anyhow}; +use jellycache::cache_read; +use jellycommon::{ + IdentifierType, NodeID, NodeKind, RatingType, + chrono::{Utc, format::Parsed}, +}; +use jellyremuxer::matroska::{AttachedFile, Segment}; +use log::info; +use serde::{Deserialize, Serialize}; +use std::{collections::HashMap, fs::File, io::BufReader, path::Path}; + +use crate::plugins::{ImportContext, ImportPlugin}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YVideo { + pub album: Option, + pub age_limit: Option, + pub alt_title: Option, + pub aspect_ratio: Option, + pub automatic_captions: Option>>, + pub availability: Option, // "public" | "private" | "unlisted", + pub average_rating: Option, + pub categories: Option>, + pub channel_follower_count: Option, + pub channel_id: Option, + pub channel_is_verified: Option, + pub channel: Option, + pub chapters: Option>, + pub comment_count: Option, + pub description: Option, + pub display_id: Option, + pub duration_string: Option, + pub duration: Option, + pub epoch: usize, + pub extractor_key: String, + pub extractor: String, + pub formats: Option>, + pub fulltitle: Option, + pub heatmap: Option>, + pub height: Option, + pub id: String, + pub is_live: Option, + pub like_count: Option, + pub media_type: Option, + pub n_entries: Option, + pub original_url: Option, + pub playable_in_embed: Option, + pub playlist_count: Option, + pub playlist_id: Option, + pub playlist_index: Option, + pub playlist_title: Option, + pub playlist_uploader_id: Option, + pub playlist_uploader: Option, + pub playlist: Option, + pub tags: Option>, + pub thumbnail: Option, + pub thumbnails: Option>, + pub title: String, + pub upload_date: Option, + pub uploader_id: Option, + pub uploader_url: Option, + pub uploader: Option, + pub view_count: Option, + pub was_live: Option, + pub webpage_url_basename: String, + pub webpage_url_domain: String, + pub webpage_url: String, + pub width: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YCaption { + pub url: Option, + pub ext: String, //"vtt" | "json3" | "srv1" | "srv2" | "srv3" | "ttml", + pub protocol: Option, + pub name: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YFormat { + pub format_id: String, + pub format_note: Option, + pub ext: String, + pub protocol: String, + pub acodec: Option, + pub vcodec: Option, + pub url: Option, + pub width: Option, + pub height: Option, + pub fps: Option, + pub columns: Option, + pub fragments: Option>, + pub resolution: Option, + pub dynamic_range: Option, + pub aspect_ratio: Option, + pub http_headers: HashMap, + pub audio_ext: String, + pub video_ext: String, + pub vbr: Option, + pub abr: Option, + pub format: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YFragment { + pub url: Option, + pub duration: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YThumbnail { + pub url: String, + pub preference: Option, + pub id: String, + pub height: Option, + pub width: Option, + pub resolution: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YChapter { + pub start_time: f64, + pub end_time: f64, + pub title: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YHeatmapSample { + pub start_time: f64, + pub end_time: f64, + pub value: f64, +} + +pub fn parse_upload_date(d: &str) -> anyhow::Result { + let (year, month, day) = (&d[0..4], &d[4..6], &d[6..8]); + let (year, month, day) = ( + year.parse().context("parsing year")?, + month.parse().context("parsing month")?, + day.parse().context("parsing day")?, + ); + + let mut p = Parsed::new(); + p.year = Some(year); + p.month = Some(month); + p.day = Some(day); + p.hour_div_12 = Some(0); + p.hour_mod_12 = Some(0); + p.minute = Some(0); + p.second = Some(0); + Ok(p.to_datetime_with_timezone(&Utc)?.timestamp_millis()) +} + +pub fn is_info_json(a: &&AttachedFile) -> bool { + a.name == "info.json" && a.media_type == "application/json" +} +pub struct Infojson; +impl ImportPlugin for Infojson { + fn file(&self, ct: &ImportContext, parent: NodeID, path: &Path) -> Result<()> { + let filename = path.file_name().unwrap().to_string_lossy(); + if filename != "channel.info.json" { + return Ok(()); + } + + info!("import channel info.json at {path:?}"); + let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?; + ct.db.update_node_init(parent, |node| { + node.kind = NodeKind::Channel; + node.title = Some(clean_uploader_name(&data.title).to_owned()); + if let Some(cid) = data.channel_id { + node.identifiers.insert(IdentifierType::YoutubeChannel, cid); + } + if let Some(uid) = data.uploader_id { + node.identifiers + .insert(IdentifierType::YoutubeChannelHandle, uid); + } + if let Some(desc) = data.description { + node.description = Some(desc); + } + if let Some(followers) = data.channel_follower_count { + node.ratings + .insert(RatingType::YoutubeFollowers, followers as f64); + } + Ok(()) + })?; + + Ok(()) + } + + fn media(&self, ct: &ImportContext, node: NodeID, _path: &Path, seg: &Segment) -> Result<()> { + let infojson = seg + .attachments + .iter() + .flat_map(|a| &a.files) + .find(is_info_json) + .map(|att| { + let data = cache_read(str::from_utf8(&att.data).unwrap())? + .ok_or(anyhow!("info json cache missing"))?; + anyhow::Ok(serde_json::from_slice::(&data)?) + }) + .transpose() + .context("infojson parsing")?; + + if let Some(infojson) = infojson { + ct.db.update_node_init(node, |node| { + node.kind = if let Some(ty) = &infojson.media_type + && ty == "short" + { + NodeKind::ShortFormVideo + } else if infojson.album.is_some() { + NodeKind::Music + } else { + NodeKind::Video + }; + node.title = Some(infojson.title); + node.subtitle = if infojson.alt_title != node.title { + infojson.alt_title + } else { + None + } + .or(infojson + .uploader + .as_ref() + .map(|u| clean_uploader_name(u).to_owned())) + .or(node.subtitle.clone()); + + node.tags.extend(infojson.tags.unwrap_or_default()); + + if let Some(desc) = infojson.description { + node.description = Some(desc) + } + node.tagline = Some(infojson.webpage_url); + if let Some(date) = &infojson.upload_date { + node.release_date = + Some(parse_upload_date(date).context("parsing upload date")?); + } + match infojson.extractor.as_str() { + "youtube" => { + node.identifiers + .insert(IdentifierType::YoutubeVideo, infojson.id); + node.ratings.insert( + RatingType::YoutubeViews, + infojson.view_count.unwrap_or_default() as f64, + ); + if let Some(lc) = infojson.like_count { + node.ratings.insert(RatingType::YoutubeLikes, lc as f64); + } + } + "Bandcamp" => drop( + node.identifiers + .insert(IdentifierType::Bandcamp, infojson.id), + ), + _ => (), + } + + Ok(()) + })?; + } + Ok(()) + } +} + +fn clean_uploader_name(mut s: &str) -> &str { + s = s.strip_suffix(" - Videos").unwrap_or(s); + s = s.strip_suffix(" - Topic").unwrap_or(s); + s = s.strip_prefix("Uploads from ").unwrap_or(s); + s +} diff --git a/import/src/plugins/media_info.rs b/import/src/plugins/media_info.rs new file mode 100644 index 0000000..1d4d627 --- /dev/null +++ b/import/src/plugins/media_info.rs @@ -0,0 +1,92 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ + +use crate::plugins::{ImportContext, ImportPlugin}; +use anyhow::{Result, anyhow}; +use jellycommon::{Chapter, NodeID, SourceTrack, SourceTrackKind, TrackSource}; +use jellyremuxer::matroska::Segment; +use std::path::Path; + +pub struct MediaInfo; +impl ImportPlugin for MediaInfo { + fn media(&self, ct: &ImportContext, node: NodeID, path: &Path, seg: &Segment) -> Result<()> { + let tracks = seg + .tracks + .as_ref() + .ok_or(anyhow!("no tracks"))? + .entries + .iter() + .map(|track| SourceTrack { + codec: track.codec_id.clone(), + language: track.language.clone(), + name: track.name.clone().unwrap_or_default(), + federated: Vec::new(), + kind: if let Some(video) = &track.video { + SourceTrackKind::Video { + width: video.pixel_width, + height: video.pixel_height, + fps: video.frame_rate, + } + } else if let Some(audio) = &track.audio { + SourceTrackKind::Audio { + channels: audio.channels as usize, + sample_rate: audio.sampling_frequency, + bit_depth: audio.bit_depth.map(|r| r as usize), + } + } else { + SourceTrackKind::Subtitle + }, + source: TrackSource::Local(path.to_owned(), track.track_number), + }) + .collect::>(); + + let size = path.metadata()?.len(); + + ct.db.update_node_init(node, |node| { + node.storage_size = size; + node.media = Some(jellycommon::MediaInfo { + chapters: seg + .chapters + .clone() + .map(|c| { + let mut chaps = Vec::new(); + if let Some(ee) = c.edition_entries.first() { + for ca in &ee.chapter_atoms { + let mut labels = Vec::new(); + for cd in &ca.displays { + for lang in &cd.languages { + labels.push((lang.to_owned(), cd.string.clone())) + } + } + chaps.push(Chapter { + labels, + time_start: Some(ca.time_start as f64 * 1e-9), + time_end: ca.time_end.map(|ts| ts as f64 * 1e-9), + }) + } + } + chaps + }) + .unwrap_or_default(), + duration: fix_invalid_runtime( + seg.info.duration.unwrap_or_default() * seg.info.timestamp_scale as f64 * 1e-9, + ), + tracks, + }); + Ok(()) + })?; + + Ok(()) + } +} + +fn fix_invalid_runtime(d: f64) -> f64 { + match d { + // Broken durations found experimentally + 359999.999 | 359999.000 | 86399.999 | 86399.99900000001 => 0., + x => x, + } +} diff --git a/import/src/plugins/misc.rs b/import/src/plugins/misc.rs new file mode 100644 index 0000000..4717753 --- /dev/null +++ b/import/src/plugins/misc.rs @@ -0,0 +1,100 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ +use crate::plugins::{ImportContext, ImportPlugin}; +use anyhow::{Result, bail}; +use jellycache::{HashKey, cache_store}; +use jellycommon::{Asset, NodeID, NodeKind, PictureSlot, Visibility}; +use jellyremuxer::matroska::{AttachedFile, Segment}; +use log::info; +use std::{fs::File, io::Read, path::Path}; + +pub struct ImageFiles; +impl ImportPlugin for ImageFiles { + fn file(&self, ct: &ImportContext, parent: NodeID, path: &Path) -> Result<()> { + let filename = path.file_name().unwrap().to_string_lossy(); + let slot = match filename.as_ref() { + "poster.jpeg" | "poster.webp" | "poster.png" => PictureSlot::Cover, + "backdrop.jpeg" | "backdrop.webp" | "backdrop.png" => PictureSlot::Backdrop, + _ => return Ok(()), + }; + info!("import {slot:?} at {path:?}"); + let asset = Asset(cache_store( + format!("media/literal/{}-poster.image", HashKey(path)), + || { + let mut data = Vec::new(); + File::open(path)?.read_to_end(&mut data)?; + Ok(data) + }, + )?); + ct.db.update_node_init(parent, |node| { + node.pictures.insert(PictureSlot::Cover, asset); + Ok(()) + })?; + Ok(()) + } +} + +pub fn is_cover(a: &&AttachedFile) -> bool { + a.name.starts_with("cover") && a.media_type.starts_with("image/") +} +pub struct ImageAttachments; +impl ImportPlugin for ImageAttachments { + fn media(&self, ct: &ImportContext, node: NodeID, _path: &Path, seg: &Segment) -> Result<()> { + let Some(cover) = seg + .attachments + .iter() + .flat_map(|a| &a.files) + .find(is_cover) + .map(|att| Asset(att.data.clone().try_into().unwrap())) + else { + return Ok(()); + }; + + ct.db.update_node_init(node, |node| { + node.pictures.insert(PictureSlot::Cover, cover); + Ok(()) + })?; + Ok(()) + } +} + +pub struct General; +impl ImportPlugin for General { + fn import_instruction(&self, ct: &ImportContext, node: NodeID, line: &str) -> Result<()> { + if line == "hidden" { + ct.db.update_node_init(node, |node| { + node.visibility = node.visibility.min(Visibility::Hidden); + Ok(()) + })?; + } + if line == "reduced" { + ct.db.update_node_init(node, |node| { + node.visibility = node.visibility.min(Visibility::Reduced); + Ok(()) + })?; + } + if let Some(kind) = line.strip_prefix("kind-").or(line.strip_prefix("kind=")) { + let kind = match kind { + "movie" => NodeKind::Movie, + "video" => NodeKind::Video, + "music" => NodeKind::Music, + "short_form_video" => NodeKind::ShortFormVideo, + "collection" => NodeKind::Collection, + "channel" => NodeKind::Channel, + "show" => NodeKind::Show, + "series" => NodeKind::Series, + "season" => NodeKind::Season, + "episode" => NodeKind::Episode, + _ => bail!("unknown node kind"), + }; + ct.db.update_node_init(node, |node| { + node.kind = kind; + Ok(()) + })?; + } + Ok(()) + } +} diff --git a/import/src/plugins/mod.rs b/import/src/plugins/mod.rs new file mode 100644 index 0000000..47fcfbf --- /dev/null +++ b/import/src/plugins/mod.rs @@ -0,0 +1,48 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ +pub mod acoustid; +pub mod infojson; +pub mod musicbrainz; +pub mod tags; +pub mod tmdb; +pub mod trakt; +pub mod vgmdb; +pub mod wikidata; +pub mod wikimedia_commons; +pub mod media_info; +pub mod misc; + +use std::path::Path; + +use anyhow::Result; +use jellycommon::NodeID; +use jellydb::Database; +use jellyremuxer::matroska::Segment; +use tokio::runtime::Handle; + +pub struct ImportContext { + pub db: Database, + pub rt: Handle, +} + +pub trait ImportPlugin { + fn file(&self, ct: &ImportContext, parent: NodeID, path: &Path) -> Result<()> { + let _ = (ct, parent, path); + Ok(()) + } + fn media(&self, ct: &ImportContext, node: NodeID, path: &Path, seg: &Segment) -> Result<()> { + let _ = (ct, node, path, seg); + Ok(()) + } + fn import_instruction(&self, ct: &ImportContext, node: NodeID, line: &str) -> Result<()> { + let _ = (ct, node, line); + Ok(()) + } + fn process_node(&self, ct: &ImportContext, node: NodeID) -> Result<()> { + let _ = (ct, node); + Ok(()) + } +} diff --git a/import/src/plugins/musicbrainz.rs b/import/src/plugins/musicbrainz.rs new file mode 100644 index 0000000..44b2a06 --- /dev/null +++ b/import/src/plugins/musicbrainz.rs @@ -0,0 +1,320 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ + +use crate::{USER_AGENT, plugins::ImportPlugin}; +use anyhow::{Context, Result}; +use jellycache::cache_memory; +use log::info; +use reqwest::{ + Client, ClientBuilder, + header::{HeaderMap, HeaderName, HeaderValue}, +}; +use serde::{Deserialize, Serialize}; +use std::{collections::BTreeMap, sync::Arc, time::Duration}; +use tokio::{ + runtime::Handle, + sync::Semaphore, + time::{Instant, sleep_until}, +}; + +pub mod reltypes { + pub const MUSIC_VIDEO: &str = "ce3de655-7451-44d1-9224-87eb948c205d"; + pub const INSTRUMENTAL: &str = "9fc01a58-7801-4bd2-b07d-61cc7ffacf90"; + pub const VOCAL: &str = "0fdbe3c6-7700-4a31-ae54-b53f06ae1cfa"; + pub const RECORDING: &str = "a01ee869-80a8-45ef-9447-c59e91aa7926"; + pub const PROGRAMMING: &str = "36c50022-44e0-488d-994b-33f11d20301e"; + pub const PRODUCER: &str = "5c0ceac3-feb4-41f0-868d-dc06f6e27fc0"; + pub const ARTIST: &str = "5c0ceac3-feb4-41f0-868d-dc06f6e27fc0"; + pub const PHONOGRAPHIC_COPYRIGHT: &str = "7fd5fbc0-fbf4-4d04-be23-417d50a4dc30"; + pub const MIX: &str = "3e3102e1-1896-4f50-b5b2-dd9824e46efe"; + pub const INSTRUMENT: &str = "59054b12-01ac-43ee-a618-285fd397e461"; + pub const WIKIDATA: &str = "689870a4-a1e4-4912-b17f-7b2664215698"; + pub const VGMDB: &str = "0af15ab3-c615-46d6-b95b-a5fcd2a92ed9"; +} + +pub struct MusicBrainz { + client: Client, + rate_limit: Arc, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbRecordingRel { + pub id: String, + pub first_release_date: Option, + pub title: String, + pub isrcs: Vec, + pub video: bool, + pub disambiguation: String, + pub length: Option, + pub relations: Vec, + pub artist_credit: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbArtistRel { + pub id: String, + pub isnis: Vec, + pub ipis: Vec, + pub name: String, + pub disambiguation: String, + pub country: Option, + pub sort_name: String, + pub gender_id: Option, + pub area: Option, + pub begin_area: Option, + pub end_area: Option, + pub life_span: MbTimespan, + pub relations: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbArtistCredit { + pub name: String, + pub artist: MbArtist, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbRelation { + pub direction: String, + pub r#type: String, + pub type_id: String, + pub begin: Option, + pub end: Option, + pub ended: bool, + pub target_type: String, + pub target_credit: String, + pub source_credit: String, + pub attributes: Vec, + pub attribute_ids: BTreeMap, + pub attribute_values: BTreeMap, + + pub work: Option, + pub artist: Option, + pub url: Option, + pub recording: Option, + pub series: Option, + pub event: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbSeries { + pub id: String, + pub r#type: Option, + pub type_id: Option, + pub name: String, + pub disambiguation: String, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbRecording { + pub id: String, + pub title: String, + #[serde(default)] + pub isrcs: Vec, + pub video: bool, + pub disambiguation: String, + pub length: Option, + #[serde(default)] + pub artist_credit: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbWork { + pub id: String, + pub r#type: Option, + pub type_id: Option, + pub languages: Vec, + pub iswcs: Vec, + pub language: Option, + pub title: String, + pub attributes: Vec, + pub disambiguation: String, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbEvent { + pub id: String, + pub r#type: Option, + pub type_id: Option, + pub name: String, + pub time: String, + pub cancelled: bool, + pub setlist: String, + pub life_span: MbTimespan, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbArtist { + pub id: String, + pub r#type: Option, + pub type_id: Option, + pub name: String, + pub disambiguation: String, + pub country: Option, + pub sort_name: String, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbTimespan { + pub begin: Option, + pub end: Option, + pub ended: bool, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbArea { + pub name: String, + pub sort_name: String, + #[serde(default)] + pub iso_3166_1_codes: Vec, + pub id: String, + pub disambiguation: String, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbUrl { + pub id: String, + pub resource: String, +} + +impl Default for MusicBrainz { + fn default() -> Self { + Self::new() + } +} + +impl MusicBrainz { + const MAX_PAR_REQ: usize = 4; + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("accept"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + Self { + client, + // send at most 1 req/s according to musicbrainz docs, each lock is held for 10s + // this implementation also never sends more than MAX_PAR_REQ requests in-flight. + rate_limit: Arc::new(Semaphore::new(Self::MAX_PAR_REQ)), + } + } + + pub fn lookup_recording(&self, id: String, rt: &Handle) -> Result> { + cache_memory(&format!("ext/musicbrainz/recording/{id}.json"), move || { + rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64); + info!("recording lookup: {id}"); + + let inc = [ + "isrcs", + "artists", + "area-rels", + "artist-rels", + "event-rels", + "genre-rels", + "instrument-rels", + "label-rels", + "place-rels", + "recording-rels", + "release-rels", + "release-group-rels", + "series-rels", + "url-rels", + "work-rels", + ] + .join("+"); + + let resp = self + .client + .get(format!( + "https://musicbrainz.org/ws/2/recording/{id}?inc={inc}" + )) + .send() + .await? + .error_for_status()? + .json::() + .await?; + + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); + + Ok(resp) + }) + }) + .context("musicbrainz recording lookup") + } + + pub fn lookup_artist(&self, id: String, rt: &Handle) -> Result> { + cache_memory(&format!("ext/musicbrainz/artist/{id}.json"), move || { + rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64); + info!("artist lookup: {id}"); + + let inc = [ + "area-rels", + "artist-rels", + "event-rels", + "genre-rels", + "instrument-rels", + "label-rels", + "place-rels", + "recording-rels", + "release-rels", + "release-group-rels", + "series-rels", + "url-rels", + "work-rels", + ] + .join("+"); + + let resp = self + .client + .get(format!( + "https://musicbrainz.org/ws/2/artist/{id}?inc={inc}" + )) + .send() + .await? + .error_for_status()? + .json::() + .await?; + + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); + + Ok(resp) + }) + }) + .context("musicbrainz artist lookup") + } +} + +impl ImportPlugin for MusicBrainz {} diff --git a/import/src/plugins/tags.rs b/import/src/plugins/tags.rs new file mode 100644 index 0000000..8452aad --- /dev/null +++ b/import/src/plugins/tags.rs @@ -0,0 +1,60 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ + +use crate::plugins::{ImportContext, ImportPlugin}; +use anyhow::Result; +use jellycommon::{IdentifierType, NodeID, NodeKind}; +use jellyremuxer::matroska::Segment; +use std::{collections::HashMap, path::Path}; + +pub struct Tags; +impl ImportPlugin for Tags { + fn media(&self, ct: &ImportContext, node: NodeID, _path: &Path, seg: &Segment) -> Result<()> { + let tags = seg + .tags + .first() + .map(|tags| { + tags.tags + .iter() + .flat_map(|t| t.simple_tags.clone()) + .map(|st| (st.name, st.string.unwrap_or_default())) + .collect::>() + }) + .unwrap_or_default(); + + ct.db.update_node_init(node, |node| { + node.title = seg.info.title.clone(); + for (key, value) in tags { + match key.as_str() { + "DESCRIPTION" => node.description = Some(value), + "SYNOPSIS" => node.description = Some(value), + "COMMENT" => node.tagline = Some(value), + "CONTENT_TYPE" => { + node.kind = match value.to_lowercase().trim() { + "movie" | "documentary" | "film" => NodeKind::Movie, + "music" | "recording" => NodeKind::Music, + _ => continue, + } + } + _ => node.identifiers.extend(Some(match key.as_str() { + "MUSICBRAINZ_TRACKID" => (IdentifierType::MusicbrainzRecording, value), + "MUSICBRAINZ_ARTISTID" => (IdentifierType::MusicbrainzArtist, value), + "MUSICBRAINZ_ALBUMID" => (IdentifierType::MusicbrainzRelease, value), + "MUSICBRAINZ_ALBUMARTISTID" => continue, + "MUSICBRAINZ_RELEASEGROUPID" => { + (IdentifierType::MusicbrainzReleaseGroup, value) + } + "ISRC" => (IdentifierType::Isrc, value), + "BARCODE" => (IdentifierType::Barcode, value), + _ => continue, + })), + } + } + Ok(()) + })?; + Ok(()) + } +} diff --git a/import/src/plugins/tmdb.rs b/import/src/plugins/tmdb.rs new file mode 100644 index 0000000..3d6e832 --- /dev/null +++ b/import/src/plugins/tmdb.rs @@ -0,0 +1,281 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ +use crate::USER_AGENT; +use anyhow::{anyhow, bail, Context, Result}; +use jellycache::{cache_memory, cache_store, EscapeKey, HashKey}; +use jellycommon::{ + chrono::{format::Parsed, Utc}, + Asset, +}; +use log::info; +use reqwest::{ + header::{HeaderMap, HeaderName, HeaderValue}, + Client, ClientBuilder, +}; +use serde::{Deserialize, Serialize}; +use std::{fmt::Display, sync::Arc}; +use tokio::runtime::Handle; + +pub struct Tmdb { + client: Client, + image_client: Client, + key: String, +} + +impl Tmdb { + pub fn new(api_key: &str) -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("accept"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + let image_client = ClientBuilder::new().build().unwrap(); + Self { + client, + image_client, + key: api_key.to_owned(), + } + } + pub fn search(&self, kind: TmdbKind, query: &str, rt: &Handle) -> Result> { + cache_memory( + &format!("ext/tmdb/search/{kind}-{}.json", HashKey(query)), + move || { + rt.block_on(async { + info!("searching tmdb: {query:?}"); + Ok(self + .client + .get(format!( + "https://api.themoviedb.org/3/search/{kind}?query={}?api_key={}", + query.replace(" ", "+"), + self.key + )) + .send() + .await? + .error_for_status()? + .json::() + .await?) + }) + }, + ) + .context("tmdb search") + } + pub fn details(&self, kind: TmdbKind, id: u64, rt: &Handle) -> Result> { + cache_memory(&format!("ext/tmdb/details/{kind}-{id}.json"), move || { + rt.block_on(async { + info!("fetching details: {id:?}"); + Ok(self + .client + .get(format!( + "https://api.themoviedb.org/3/{kind}/{id}?api_key={}", + self.key, + )) + .send() + .await? + .error_for_status()? + .json() + .await?) + }) + }) + .context("tmdb details") + } + pub fn person_image(&self, id: u64, rt: &Handle) -> Result> { + cache_memory(&format!("ext/tmdb/person/images/{id}.json"), move || { + rt.block_on(async { + Ok(self + .client + .get(format!( + "https://api.themoviedb.org/3/person/{id}/images?api_key={}", + self.key, + )) + .send() + .await? + .error_for_status()? + .json() + .await?) + }) + }) + .context("tmdb person images") + } + pub fn image(&self, path: &str, rt: &Handle) -> Result { + cache_store( + format!("ext/tmdb/image/{}.image", EscapeKey(path)), + move || { + rt.block_on(async { + info!("downloading image {path:?}"); + Ok(self + .image_client + .get(format!("https://image.tmdb.org/t/p/original{path}")) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec()) + }) + }, + ) + .context("tmdb image download") + .map(Asset) + } + + pub fn episode_details( + &self, + series_id: u64, + season: usize, + episode: usize, + rt: &Handle, + ) -> Result> { + cache_memory(&format!("ext/tmdb/episode-details/{series_id}-S{season}-E{episode}.json"), move || { + rt.block_on(async { + info!("tmdb episode details {series_id} S={season} E={episode}"); + Ok(self + .image_client + .get(format!("https://api.themoviedb.org/3/tv/{series_id}/season/{season}/episode/{episode}?api_key={}", self.key)) + .send() + .await? + .error_for_status()? + .json() + .await?) + }) + }) + .context("tmdb episode details") + } +} + +pub fn parse_release_date(d: &str) -> Result> { + if d.is_empty() { + return Ok(None); + } else if d.len() < 10 { + bail!(anyhow!("date string too short")) + } + let (year, month, day) = (&d[0..4], &d[5..7], &d[8..10]); + let (year, month, day) = ( + year.parse().context("parsing year")?, + month.parse().context("parsing month")?, + day.parse().context("parsing day")?, + ); + + let mut p = Parsed::new(); + p.year = Some(year); + p.month = Some(month); + p.day = Some(day); + p.hour_div_12 = Some(0); + p.hour_mod_12 = Some(0); + p.minute = Some(0); + p.second = Some(0); + Ok(Some(p.to_datetime_with_timezone(&Utc)?.timestamp_millis())) +} + +impl Display for TmdbKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + TmdbKind::Tv => "tv", + TmdbKind::Movie => "movie", + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbEpisode { + pub air_date: String, + pub overview: String, + pub name: String, + pub id: u64, + pub runtime: f64, + pub still_path: Option, + pub vote_average: f64, + pub vote_count: usize, +} + +#[derive(Debug, Clone, Copy, Hash, Serialize, Deserialize)] +pub enum TmdbKind { + Tv, + Movie, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbPersonImage { + pub id: u64, + pub profiles: Vec, +} +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbPersonImageProfile { + pub aspect_ratio: f64, + pub height: u32, + pub width: u32, + pub file_path: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbQuery { + pub page: usize, + pub results: Vec, + pub total_pages: usize, + pub total_results: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbQueryResult { + pub adult: bool, + pub backdrop_path: Option, + pub genre_ids: Vec, + pub id: u64, + pub original_language: Option, + pub original_title: Option, + pub overview: String, + pub popularity: f64, + pub poster_path: Option, + pub release_date: Option, + pub title: Option, + pub name: Option, + pub vote_average: f64, + pub vote_count: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbDetails { + pub adult: bool, + pub backdrop_path: Option, + pub genres: Vec, + pub id: u64, + pub original_language: Option, + pub original_title: Option, + pub overview: String, + pub popularity: f64, + pub poster_path: Option, + pub release_date: Option, + pub title: Option, + pub name: Option, + pub vote_average: f64, + pub vote_count: usize, + pub budget: Option, + pub homepage: Option, + pub imdb_id: Option, + pub production_companies: Vec, + pub revenue: Option, + pub tagline: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbGenre { + pub id: u64, + pub name: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbProductionCompany { + pub id: u64, + pub name: String, + pub logo_path: Option, +} diff --git a/import/src/plugins/trakt.rs b/import/src/plugins/trakt.rs new file mode 100644 index 0000000..5a1aa8e --- /dev/null +++ b/import/src/plugins/trakt.rs @@ -0,0 +1,403 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ +use crate::{ + USER_AGENT, + plugins::{ImportContext, ImportPlugin}, +}; +use anyhow::{Context, Result, bail}; +use jellycache::{HashKey, cache_memory}; +use jellycommon::{Appearance, CreditCategory, IdentifierType, NodeID, NodeKind}; +use log::info; +use reqwest::{ + Client, ClientBuilder, + header::{HeaderMap, HeaderName, HeaderValue}, +}; +use serde::{Deserialize, Serialize}; +use std::{collections::BTreeMap, fmt::Display, sync::Arc}; +use tokio::runtime::Handle; + +pub struct Trakt { + client: Client, +} + +impl Trakt { + pub fn new(api_key: &str) -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("trakt-api-key"), + HeaderValue::from_str(api_key).unwrap(), + ), + ( + HeaderName::from_static("trakt-api-version"), + HeaderValue::from_static("2"), + ), + ( + HeaderName::from_static("content-type"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + Self { client } + } + + pub fn search( + &self, + kinds: &[TraktKind], + query: &str, + rt: &Handle, + ) -> Result>> { + cache_memory( + &format!("ext/trakt/search/{}.json", HashKey(query)), + move || { + rt.block_on(async { + let url = format!( + "https://api.trakt.tv/search/{}?query={}&extended=full", + kinds + .iter() + .map(|t| t.singular()) + .collect::>() + .join(","), + urlencoding::encode(query), + ); + let res = self.client.get(url).send().await?.error_for_status()?; + Ok(res.json().await?) + }) + }, + ) + .context("trakt search") + } + + pub fn lookup(&self, kind: TraktKind, id: u64, rt: &Handle) -> Result> { + cache_memory(&format!("ext/trakt/lookup/{kind}-{id}.json"), move || { + rt.block_on(async { + info!("trakt lookup {kind:?}:{id:?}"); + let url = format!("https://api.trakt.tv/{}/{id}?extended=full", kind.plural()); + let res = self.client.get(url).send().await?.error_for_status()?; + Ok(res.json().await?) + }) + }) + .context("trakt lookup") + } + + pub fn people(&self, kind: TraktKind, id: u64, rt: &Handle) -> Result> { + cache_memory(&format!("ext/trakt/people/{kind}-{id}.json"), move || { + rt.block_on(async { + info!("trakt people {kind:?}:{id:?}"); + let url = format!( + "https://api.trakt.tv/{}/{id}/people?extended=full", + kind.plural() + ); + let res = self.client.get(url).send().await?.error_for_status()?; + Ok(res.json().await?) + }) + }) + .context("trakt people") + } + + pub fn show_seasons(&self, id: u64, rt: &Handle) -> Result>> { + cache_memory(&format!("ext/trakt/seasons/{id}.json"), move || { + rt.block_on(async { + info!("trakt seasons {id:?}"); + let url = format!("https://api.trakt.tv/shows/{id}/seasons?extended=full"); + let res = self.client.get(url).send().await?.error_for_status()?; + Ok(res.json().await?) + }) + }) + .context("trakt show seasons") + } + + pub fn show_season_episodes( + &self, + id: u64, + season: usize, + rt: &Handle, + ) -> Result>> { + cache_memory( + &format!("ext/trakt/episodes/{id}-S{season}.json"), + move || { + rt.block_on(async { + info!("trakt episodes {id:?} season={season}"); + let url = + format!("https://api.trakt.tv/shows/{id}/seasons/{season}?extended=full"); + let res = self.client.get(url).send().await?.error_for_status()?; + Ok(res.json().await?) + }) + }, + ) + .context("trakt show season episodes") + } +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TraktSeason { + pub number: usize, + pub ids: TraktIds, + pub rating: f64, + pub votes: usize, + pub episode_count: usize, + pub aired_count: Option, + pub title: String, + pub overview: Option, + pub network: String, +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TraktEpisode { + pub season: Option, + pub number: usize, + pub number_abs: Option, + pub ids: TraktIds, + pub rating: f64, + pub votes: usize, + pub title: String, + pub runtime: f64, + pub overview: Option, + pub available_translations: Vec, + pub first_aired: Option, + pub episode_type: String, +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TraktPeople { + #[serde(default)] + pub cast: Vec, + #[serde(default)] + pub crew: BTreeMap>, +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TraktAppearance { + #[serde(default)] + pub jobs: Vec, + #[serde(default)] + pub characters: Vec, + pub person: TraktPerson, +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TraktPerson { + pub name: String, + pub ids: TraktIds, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct TraktSearchResult { + pub r#type: TraktKind, + pub score: f64, + #[serde(flatten)] + pub inner: TraktKindObject, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TraktKindObject { + Movie(TraktMediaObject), + Show(TraktMediaObject), + Season(TraktMediaObject), + Episode(TraktMediaObject), + Person(TraktMediaObject), + User(TraktMediaObject), +} + +impl TraktKindObject { + pub fn inner(&self) -> &TraktMediaObject { + match self { + TraktKindObject::Movie(x) + | TraktKindObject::Show(x) + | TraktKindObject::Season(x) + | TraktKindObject::Episode(x) + | TraktKindObject::Person(x) + | TraktKindObject::User(x) => x, + } + } +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +pub enum TraktPeopleGroup { + #[serde(rename = "production")] + Production, + #[serde(rename = "art")] + Art, + #[serde(rename = "crew")] + Crew, + #[serde(rename = "costume & make-up")] //? they really use that in as a key?! + CostumeMakeup, + #[serde(rename = "directing")] + Directing, + #[serde(rename = "writing")] + Writing, + #[serde(rename = "sound")] + Sound, + #[serde(rename = "camera")] + Camera, + #[serde(rename = "visual effects")] + VisualEffects, + #[serde(rename = "lighting")] + Lighting, + #[serde(rename = "editing")] + Editing, + #[serde(rename = "created by")] + CreatedBy, +} +impl TraktPeopleGroup { + pub fn as_credit_category(self) -> CreditCategory { + match self { + TraktPeopleGroup::Production => CreditCategory::Production, + TraktPeopleGroup::Art => CreditCategory::Art, + TraktPeopleGroup::Crew => CreditCategory::Crew, + TraktPeopleGroup::CostumeMakeup => CreditCategory::CostumeMakeup, + TraktPeopleGroup::Directing => CreditCategory::Directing, + TraktPeopleGroup::Writing => CreditCategory::Writing, + TraktPeopleGroup::Sound => CreditCategory::Sound, + TraktPeopleGroup::Camera => CreditCategory::Camera, + TraktPeopleGroup::VisualEffects => CreditCategory::Vfx, + TraktPeopleGroup::Lighting => CreditCategory::Lighting, + TraktPeopleGroup::Editing => CreditCategory::Editing, + TraktPeopleGroup::CreatedBy => CreditCategory::CreatedBy, + } + } +} +impl TraktAppearance { + pub fn a(&self) -> Appearance { + Appearance { + jobs: self.jobs.to_owned(), + characters: self.characters.to_owned(), + node: NodeID([0; 32]), // person: Person { + // name: self.person.name.to_owned(), + // headshot: None, + // ids: self.person.ids.to_owned(), + // }, + } + } +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TraktMediaObject { + pub title: String, + pub year: Option, + pub ids: TraktIds, + + pub tagline: Option, + pub overview: Option, + pub released: Option, + pub runtime: Option, + pub country: Option, + pub trailer: Option, + pub homepage: Option, + pub status: Option, + pub rating: Option, + pub votes: Option, + pub comment_count: Option, + pub language: Option, + pub available_translations: Option>, + pub genres: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +pub struct TraktIds { + pub trakt: Option, + pub slug: Option, + pub tvdb: Option, + pub imdb: Option, + pub tmdb: Option, +} + +impl Display for TraktSearchResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "{} ({}) \x1b[2m{} [{:?}]\x1b[0m", + self.inner.inner().title, + self.inner.inner().year.unwrap_or(0), + self.r#type, + self.inner.inner().ids + )) + } +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, Hash, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum TraktKind { + Movie, + Show, + Season, + Episode, + Person, + User, +} + +impl TraktKind { + pub fn as_node_kind(self) -> NodeKind { + match self { + TraktKind::Movie => NodeKind::Movie, + TraktKind::Show => NodeKind::Show, + TraktKind::Season => NodeKind::Season, + TraktKind::Episode => NodeKind::Episode, + TraktKind::Person => NodeKind::Channel, + TraktKind::User => NodeKind::Channel, + } + } +} + +impl TraktKind { + pub fn singular(self) -> &'static str { + match self { + TraktKind::Movie => "movie", + TraktKind::Show => "show", + TraktKind::Season => "season", + TraktKind::Episode => "episode", + TraktKind::Person => "person", + TraktKind::User => "user", + } + } + pub fn plural(self) -> &'static str { + match self { + TraktKind::Movie => "movies", + TraktKind::Show => "shows", + TraktKind::Season => "seasons", + TraktKind::Episode => "episodes", + TraktKind::Person => "people", + TraktKind::User => "users", // //! not used in API + } + } +} +impl Display for TraktKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + TraktKind::Movie => "Movie", + TraktKind::Show => "Show", + TraktKind::Season => "Season", + TraktKind::Episode => "Episode", + TraktKind::Person => "Person", + TraktKind::User => "User", + }) + } +} + +impl ImportPlugin for Trakt { + fn import_instruction(&self, ct: &ImportContext, node: NodeID, line: &str) -> Result<()> { + if let Some(value) = line.strip_prefix("trakt-").or(line.strip_prefix("trakt=")) { + let (ty, id) = value.split_once(":").unwrap_or(("movie", value)); + let ty = match ty { + "movie" => IdentifierType::TraktMovie, + "show" => IdentifierType::TraktShow, + "season" => IdentifierType::TraktSeason, + "episode" => IdentifierType::TraktEpisode, + _ => bail!("unknown trakt kind"), + }; + ct.db.update_node_init(node, |node| { + node.identifiers.insert(ty, id.to_owned()); + Ok(()) + })?; + } + Ok(()) + } +} diff --git a/import/src/plugins/vgmdb.rs b/import/src/plugins/vgmdb.rs new file mode 100644 index 0000000..402fd90 --- /dev/null +++ b/import/src/plugins/vgmdb.rs @@ -0,0 +1,127 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ + +use crate::USER_AGENT; +use anyhow::{Context, Result}; +use jellycache::{cache, cache_store, HashKey}; +use jellycommon::Asset; +use log::info; +use regex::Regex; +use reqwest::{ + header::{HeaderMap, HeaderName, HeaderValue}, + Client, ClientBuilder, +}; +use std::{ + sync::{Arc, LazyLock}, + time::Duration, +}; +use tokio::{ + runtime::Handle, + sync::Semaphore, + time::{sleep_until, Instant}, +}; + +pub struct Vgmdb { + client: Client, + rate_limit: Arc, +} + +static RE_IMAGE_URL_FROM_HTML: LazyLock = LazyLock::new(|| { + Regex::new(r#"href='(?https://media.vgm.io/artists/[-/\w\.]+)'"#).unwrap() +}); + +impl Default for Vgmdb { + fn default() -> Self { + Self::new() + } +} + +impl Vgmdb { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ( + HeaderName::from_static("x-comment"), + HeaderValue::from_static("Please add an API, thanks!"), + ), + ])) + .build() + .unwrap(); + Self { + client, + rate_limit: Arc::new(Semaphore::new(3)), + } + } + + pub fn get_artist_image(&self, id: u64, rt: &Handle) -> Result> { + if let Some(url) = self.get_artist_image_url(id, rt)? { + cache_store( + format!("ext/vgmdb/artist-image/{}.image", HashKey(&url)), + move || { + rt.block_on(async { + info!("downloading image {url:?}"); + Ok(self + .client + .get(url) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec()) + }) + }, + ) + .context("vgmdb media download") + .map(Asset) + .map(Some) + } else { + Ok(None) + } + } + + pub fn get_artist_image_url(&self, id: u64, rt: &Handle) -> Result> { + let html = self.scrape_artist_page(id, rt)?; + if let Some(cap) = RE_IMAGE_URL_FROM_HTML.captures(&str::from_utf8(&html).unwrap()) { + if let Some(url) = cap.name("url").map(|m| m.as_str()) { + return Ok(Some(url.to_string())); + } + } + Ok(None) + } + + pub fn scrape_artist_page(&self, id: u64, rt: &Handle) -> Result> { + cache(&format!("ext/vgmdb/artist-page/{id}.html"), move || { + rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::from_secs(1); + info!("scrape artist: {id}"); + + let resp = self + .client + .get(format!("https://vgmdb.net/artist/{id}")) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec(); + + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); + + Ok(resp) + }) + }) + .context("vgmdb artist page scrape") + } +} diff --git a/import/src/plugins/wikidata.rs b/import/src/plugins/wikidata.rs new file mode 100644 index 0000000..358996e --- /dev/null +++ b/import/src/plugins/wikidata.rs @@ -0,0 +1,129 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ + +use crate::USER_AGENT; +use anyhow::{Context, Result, bail}; +use jellycache::{EscapeKey, cache_memory}; +use log::info; +use reqwest::{ + Client, ClientBuilder, + header::{HeaderMap, HeaderName, HeaderValue}, +}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::{collections::BTreeMap, sync::Arc}; +use tokio::runtime::Handle; + +pub struct Wikidata { + client: Client, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataResponse { + entities: BTreeMap, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataEntity { + pub pageid: u64, + pub ns: u64, + pub title: String, + pub lastrevid: u64, + pub modified: String, + pub r#type: String, + pub id: String, + pub claims: BTreeMap>, +} +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataClaim { + pub r#type: String, + pub id: String, + pub rank: String, + pub mainsnak: WikidataSnak, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataSnak { + pub snaktype: String, + pub property: String, + pub hash: String, + pub datavalue: Option, + pub datatype: String, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataValue { + pub value: Value, + pub r#type: String, +} + +pub mod properties { + pub static IMAGE: &str = "P18"; +} + +impl Default for Wikidata { + fn default() -> Self { + Self::new() + } +} + +impl Wikidata { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("accept"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + Self { client } + } + + pub fn query_image_path(&self, id: String, rt: &Handle) -> Result> { + let response = self.query(id.clone(), rt)?; + if let Some(entity) = response.entities.get(&id) { + if let Some(images) = entity.claims.get(properties::IMAGE) { + for image in images { + if image.mainsnak.datatype != "commonsMedia" { + bail!("image is of type {:?}", image.mainsnak.datatype); + } + if let Some(dv) = &image.mainsnak.datavalue { + if let Value::String(filename) = &dv.value { + return Ok(Some(filename.to_owned())); + } + } + } + } + } + Ok(None) + } + + pub fn query(&self, id: String, rt: &Handle) -> Result> { + cache_memory( + &format!("ext/wikidata/{}.json", EscapeKey(&id)), + move || { + rt.block_on(async { + info!("entity query: {id}"); + Ok(self + .client + .get(format!("https://www.wikidata.org/entity/{id}")) + .send() + .await? + .error_for_status()? + .json() + .await?) + }) + }, + ) + .context("wikidata entity") + } +} diff --git a/import/src/plugins/wikimedia_commons.rs b/import/src/plugins/wikimedia_commons.rs new file mode 100644 index 0000000..86d934c --- /dev/null +++ b/import/src/plugins/wikimedia_commons.rs @@ -0,0 +1,63 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ + +use crate::USER_AGENT; +use anyhow::{Context, Result}; +use jellycache::{cache_store, EscapeKey}; +use jellycommon::Asset; +use reqwest::{ + header::{HeaderMap, HeaderName, HeaderValue}, + redirect::Policy, + Client, ClientBuilder, +}; +use tokio::runtime::Handle; + +pub struct WikimediaCommons { + client: Client, +} +impl Default for WikimediaCommons { + fn default() -> Self { + Self::new() + } +} + +impl WikimediaCommons { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + )])) + .redirect(Policy::limited(5)) + .build() + .unwrap(); + Self { client } + } + + pub fn image_by_filename(&self, filename: String, rt: &Handle) -> Result { + cache_store( + format!("ext/wikimedia-commons/image/{}.image", EscapeKey(&filename)), + move || { + rt.block_on(async { + Ok(self + .client + .get(format!( + "https://commons.wikimedia.org/wiki/Special:FilePath/{}", + filename.replace(" ", "_") + )) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec()) + }) + }, + ) + .context("mediawiki image by filename") + .map(Asset) + } +} diff --git a/import/src/tmdb.rs b/import/src/tmdb.rs deleted file mode 100644 index 3d6e832..0000000 --- a/import/src/tmdb.rs +++ /dev/null @@ -1,281 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin -*/ -use crate::USER_AGENT; -use anyhow::{anyhow, bail, Context, Result}; -use jellycache::{cache_memory, cache_store, EscapeKey, HashKey}; -use jellycommon::{ - chrono::{format::Parsed, Utc}, - Asset, -}; -use log::info; -use reqwest::{ - header::{HeaderMap, HeaderName, HeaderValue}, - Client, ClientBuilder, -}; -use serde::{Deserialize, Serialize}; -use std::{fmt::Display, sync::Arc}; -use tokio::runtime::Handle; - -pub struct Tmdb { - client: Client, - image_client: Client, - key: String, -} - -impl Tmdb { - pub fn new(api_key: &str) -> Self { - let client = ClientBuilder::new() - .default_headers(HeaderMap::from_iter([ - ( - HeaderName::from_static("accept"), - HeaderValue::from_static("application/json"), - ), - ( - HeaderName::from_static("user-agent"), - HeaderValue::from_static(USER_AGENT), - ), - ])) - .build() - .unwrap(); - let image_client = ClientBuilder::new().build().unwrap(); - Self { - client, - image_client, - key: api_key.to_owned(), - } - } - pub fn search(&self, kind: TmdbKind, query: &str, rt: &Handle) -> Result> { - cache_memory( - &format!("ext/tmdb/search/{kind}-{}.json", HashKey(query)), - move || { - rt.block_on(async { - info!("searching tmdb: {query:?}"); - Ok(self - .client - .get(format!( - "https://api.themoviedb.org/3/search/{kind}?query={}?api_key={}", - query.replace(" ", "+"), - self.key - )) - .send() - .await? - .error_for_status()? - .json::() - .await?) - }) - }, - ) - .context("tmdb search") - } - pub fn details(&self, kind: TmdbKind, id: u64, rt: &Handle) -> Result> { - cache_memory(&format!("ext/tmdb/details/{kind}-{id}.json"), move || { - rt.block_on(async { - info!("fetching details: {id:?}"); - Ok(self - .client - .get(format!( - "https://api.themoviedb.org/3/{kind}/{id}?api_key={}", - self.key, - )) - .send() - .await? - .error_for_status()? - .json() - .await?) - }) - }) - .context("tmdb details") - } - pub fn person_image(&self, id: u64, rt: &Handle) -> Result> { - cache_memory(&format!("ext/tmdb/person/images/{id}.json"), move || { - rt.block_on(async { - Ok(self - .client - .get(format!( - "https://api.themoviedb.org/3/person/{id}/images?api_key={}", - self.key, - )) - .send() - .await? - .error_for_status()? - .json() - .await?) - }) - }) - .context("tmdb person images") - } - pub fn image(&self, path: &str, rt: &Handle) -> Result { - cache_store( - format!("ext/tmdb/image/{}.image", EscapeKey(path)), - move || { - rt.block_on(async { - info!("downloading image {path:?}"); - Ok(self - .image_client - .get(format!("https://image.tmdb.org/t/p/original{path}")) - .send() - .await? - .error_for_status()? - .bytes() - .await? - .to_vec()) - }) - }, - ) - .context("tmdb image download") - .map(Asset) - } - - pub fn episode_details( - &self, - series_id: u64, - season: usize, - episode: usize, - rt: &Handle, - ) -> Result> { - cache_memory(&format!("ext/tmdb/episode-details/{series_id}-S{season}-E{episode}.json"), move || { - rt.block_on(async { - info!("tmdb episode details {series_id} S={season} E={episode}"); - Ok(self - .image_client - .get(format!("https://api.themoviedb.org/3/tv/{series_id}/season/{season}/episode/{episode}?api_key={}", self.key)) - .send() - .await? - .error_for_status()? - .json() - .await?) - }) - }) - .context("tmdb episode details") - } -} - -pub fn parse_release_date(d: &str) -> Result> { - if d.is_empty() { - return Ok(None); - } else if d.len() < 10 { - bail!(anyhow!("date string too short")) - } - let (year, month, day) = (&d[0..4], &d[5..7], &d[8..10]); - let (year, month, day) = ( - year.parse().context("parsing year")?, - month.parse().context("parsing month")?, - day.parse().context("parsing day")?, - ); - - let mut p = Parsed::new(); - p.year = Some(year); - p.month = Some(month); - p.day = Some(day); - p.hour_div_12 = Some(0); - p.hour_mod_12 = Some(0); - p.minute = Some(0); - p.second = Some(0); - Ok(Some(p.to_datetime_with_timezone(&Utc)?.timestamp_millis())) -} - -impl Display for TmdbKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(match self { - TmdbKind::Tv => "tv", - TmdbKind::Movie => "movie", - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TmdbEpisode { - pub air_date: String, - pub overview: String, - pub name: String, - pub id: u64, - pub runtime: f64, - pub still_path: Option, - pub vote_average: f64, - pub vote_count: usize, -} - -#[derive(Debug, Clone, Copy, Hash, Serialize, Deserialize)] -pub enum TmdbKind { - Tv, - Movie, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TmdbPersonImage { - pub id: u64, - pub profiles: Vec, -} -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TmdbPersonImageProfile { - pub aspect_ratio: f64, - pub height: u32, - pub width: u32, - pub file_path: String, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TmdbQuery { - pub page: usize, - pub results: Vec, - pub total_pages: usize, - pub total_results: usize, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TmdbQueryResult { - pub adult: bool, - pub backdrop_path: Option, - pub genre_ids: Vec, - pub id: u64, - pub original_language: Option, - pub original_title: Option, - pub overview: String, - pub popularity: f64, - pub poster_path: Option, - pub release_date: Option, - pub title: Option, - pub name: Option, - pub vote_average: f64, - pub vote_count: usize, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TmdbDetails { - pub adult: bool, - pub backdrop_path: Option, - pub genres: Vec, - pub id: u64, - pub original_language: Option, - pub original_title: Option, - pub overview: String, - pub popularity: f64, - pub poster_path: Option, - pub release_date: Option, - pub title: Option, - pub name: Option, - pub vote_average: f64, - pub vote_count: usize, - pub budget: Option, - pub homepage: Option, - pub imdb_id: Option, - pub production_companies: Vec, - pub revenue: Option, - pub tagline: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TmdbGenre { - pub id: u64, - pub name: String, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TmdbProductionCompany { - pub id: u64, - pub name: String, - pub logo_path: Option, -} diff --git a/import/src/trakt.rs b/import/src/trakt.rs deleted file mode 100644 index 270c589..0000000 --- a/import/src/trakt.rs +++ /dev/null @@ -1,380 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin -*/ -use crate::USER_AGENT; -use anyhow::{Context, Result}; -use jellycache::{cache_memory, HashKey}; -use jellycommon::{Appearance, CreditCategory, NodeID, NodeKind}; -use log::info; -use reqwest::{ - header::{HeaderMap, HeaderName, HeaderValue}, - Client, ClientBuilder, -}; -use serde::{Deserialize, Serialize}; -use std::{collections::BTreeMap, fmt::Display, sync::Arc}; -use tokio::runtime::Handle; - -pub struct Trakt { - client: Client, -} - -impl Trakt { - pub fn new(api_key: &str) -> Self { - let client = ClientBuilder::new() - .default_headers(HeaderMap::from_iter([ - ( - HeaderName::from_static("trakt-api-key"), - HeaderValue::from_str(api_key).unwrap(), - ), - ( - HeaderName::from_static("trakt-api-version"), - HeaderValue::from_static("2"), - ), - ( - HeaderName::from_static("content-type"), - HeaderValue::from_static("application/json"), - ), - ( - HeaderName::from_static("user-agent"), - HeaderValue::from_static(USER_AGENT), - ), - ])) - .build() - .unwrap(); - Self { client } - } - - pub fn search( - &self, - kinds: &[TraktKind], - query: &str, - rt: &Handle, - ) -> Result>> { - cache_memory( - &format!("ext/trakt/search/{}.json", HashKey(query)), - move || { - rt.block_on(async { - let url = format!( - "https://api.trakt.tv/search/{}?query={}&extended=full", - kinds - .iter() - .map(|t| t.singular()) - .collect::>() - .join(","), - urlencoding::encode(query), - ); - let res = self.client.get(url).send().await?.error_for_status()?; - Ok(res.json().await?) - }) - }, - ) - .context("trakt search") - } - - pub fn lookup(&self, kind: TraktKind, id: u64, rt: &Handle) -> Result> { - cache_memory(&format!("ext/trakt/lookup/{kind}-{id}.json"), move || { - rt.block_on(async { - info!("trakt lookup {kind:?}:{id:?}"); - let url = format!("https://api.trakt.tv/{}/{id}?extended=full", kind.plural()); - let res = self.client.get(url).send().await?.error_for_status()?; - Ok(res.json().await?) - }) - }) - .context("trakt lookup") - } - - pub fn people(&self, kind: TraktKind, id: u64, rt: &Handle) -> Result> { - cache_memory(&format!("ext/trakt/people/{kind}-{id}.json"), move || { - rt.block_on(async { - info!("trakt people {kind:?}:{id:?}"); - let url = format!( - "https://api.trakt.tv/{}/{id}/people?extended=full", - kind.plural() - ); - let res = self.client.get(url).send().await?.error_for_status()?; - Ok(res.json().await?) - }) - }) - .context("trakt people") - } - - pub fn show_seasons(&self, id: u64, rt: &Handle) -> Result>> { - cache_memory(&format!("ext/trakt/seasons/{id}.json"), move || { - rt.block_on(async { - info!("trakt seasons {id:?}"); - let url = format!("https://api.trakt.tv/shows/{id}/seasons?extended=full"); - let res = self.client.get(url).send().await?.error_for_status()?; - Ok(res.json().await?) - }) - }) - .context("trakt show seasons") - } - - pub fn show_season_episodes( - &self, - id: u64, - season: usize, - rt: &Handle, - ) -> Result>> { - cache_memory( - &format!("ext/trakt/episodes/{id}-S{season}.json"), - move || { - rt.block_on(async { - info!("trakt episodes {id:?} season={season}"); - let url = - format!("https://api.trakt.tv/shows/{id}/seasons/{season}?extended=full"); - let res = self.client.get(url).send().await?.error_for_status()?; - Ok(res.json().await?) - }) - }, - ) - .context("trakt show season episodes") - } -} - -#[derive(Debug, Clone, Deserialize, Serialize, Default)] -pub struct TraktSeason { - pub number: usize, - pub ids: TraktIds, - pub rating: f64, - pub votes: usize, - pub episode_count: usize, - pub aired_count: Option, - pub title: String, - pub overview: Option, - pub network: String, -} - -#[derive(Debug, Clone, Deserialize, Serialize, Default)] -pub struct TraktEpisode { - pub season: Option, - pub number: usize, - pub number_abs: Option, - pub ids: TraktIds, - pub rating: f64, - pub votes: usize, - pub title: String, - pub runtime: f64, - pub overview: Option, - pub available_translations: Vec, - pub first_aired: Option, - pub episode_type: String, -} - -#[derive(Debug, Clone, Deserialize, Serialize, Default)] -pub struct TraktPeople { - #[serde(default)] - pub cast: Vec, - #[serde(default)] - pub crew: BTreeMap>, -} - -#[derive(Debug, Clone, Deserialize, Serialize, Default)] -pub struct TraktAppearance { - #[serde(default)] - pub jobs: Vec, - #[serde(default)] - pub characters: Vec, - pub person: TraktPerson, -} - -#[derive(Debug, Clone, Deserialize, Serialize, Default)] -pub struct TraktPerson { - pub name: String, - pub ids: TraktIds, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct TraktSearchResult { - pub r#type: TraktKind, - pub score: f64, - #[serde(flatten)] - pub inner: TraktKindObject, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum TraktKindObject { - Movie(TraktMediaObject), - Show(TraktMediaObject), - Season(TraktMediaObject), - Episode(TraktMediaObject), - Person(TraktMediaObject), - User(TraktMediaObject), -} - -impl TraktKindObject { - pub fn inner(&self) -> &TraktMediaObject { - match self { - TraktKindObject::Movie(x) - | TraktKindObject::Show(x) - | TraktKindObject::Season(x) - | TraktKindObject::Episode(x) - | TraktKindObject::Person(x) - | TraktKindObject::User(x) => x, - } - } -} - -#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] -pub enum TraktPeopleGroup { - #[serde(rename = "production")] - Production, - #[serde(rename = "art")] - Art, - #[serde(rename = "crew")] - Crew, - #[serde(rename = "costume & make-up")] //? they really use that in as a key?! - CostumeMakeup, - #[serde(rename = "directing")] - Directing, - #[serde(rename = "writing")] - Writing, - #[serde(rename = "sound")] - Sound, - #[serde(rename = "camera")] - Camera, - #[serde(rename = "visual effects")] - VisualEffects, - #[serde(rename = "lighting")] - Lighting, - #[serde(rename = "editing")] - Editing, - #[serde(rename = "created by")] - CreatedBy, -} -impl TraktPeopleGroup { - pub fn as_credit_category(self) -> CreditCategory { - match self { - TraktPeopleGroup::Production => CreditCategory::Production, - TraktPeopleGroup::Art => CreditCategory::Art, - TraktPeopleGroup::Crew => CreditCategory::Crew, - TraktPeopleGroup::CostumeMakeup => CreditCategory::CostumeMakeup, - TraktPeopleGroup::Directing => CreditCategory::Directing, - TraktPeopleGroup::Writing => CreditCategory::Writing, - TraktPeopleGroup::Sound => CreditCategory::Sound, - TraktPeopleGroup::Camera => CreditCategory::Camera, - TraktPeopleGroup::VisualEffects => CreditCategory::Vfx, - TraktPeopleGroup::Lighting => CreditCategory::Lighting, - TraktPeopleGroup::Editing => CreditCategory::Editing, - TraktPeopleGroup::CreatedBy => CreditCategory::CreatedBy, - } - } -} -impl TraktAppearance { - pub fn a(&self) -> Appearance { - Appearance { - jobs: self.jobs.to_owned(), - characters: self.characters.to_owned(), - node: NodeID([0; 32]), // person: Person { - // name: self.person.name.to_owned(), - // headshot: None, - // ids: self.person.ids.to_owned(), - // }, - } - } -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct TraktMediaObject { - pub title: String, - pub year: Option, - pub ids: TraktIds, - - pub tagline: Option, - pub overview: Option, - pub released: Option, - pub runtime: Option, - pub country: Option, - pub trailer: Option, - pub homepage: Option, - pub status: Option, - pub rating: Option, - pub votes: Option, - pub comment_count: Option, - pub language: Option, - pub available_translations: Option>, - pub genres: Option>, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct TraktIds { - pub trakt: Option, - pub slug: Option, - pub tvdb: Option, - pub imdb: Option, - pub tmdb: Option, -} - -impl Display for TraktSearchResult { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( - "{} ({}) \x1b[2m{} [{:?}]\x1b[0m", - self.inner.inner().title, - self.inner.inner().year.unwrap_or(0), - self.r#type, - self.inner.inner().ids - )) - } -} - -#[derive(Debug, Serialize, Deserialize, Clone, Copy, Hash, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum TraktKind { - Movie, - Show, - Season, - Episode, - Person, - User, -} - -impl TraktKind { - pub fn as_node_kind(self) -> NodeKind { - match self { - TraktKind::Movie => NodeKind::Movie, - TraktKind::Show => NodeKind::Show, - TraktKind::Season => NodeKind::Season, - TraktKind::Episode => NodeKind::Episode, - TraktKind::Person => NodeKind::Channel, - TraktKind::User => NodeKind::Channel, - } - } -} - -impl TraktKind { - pub fn singular(self) -> &'static str { - match self { - TraktKind::Movie => "movie", - TraktKind::Show => "show", - TraktKind::Season => "season", - TraktKind::Episode => "episode", - TraktKind::Person => "person", - TraktKind::User => "user", - } - } - pub fn plural(self) -> &'static str { - match self { - TraktKind::Movie => "movies", - TraktKind::Show => "shows", - TraktKind::Season => "seasons", - TraktKind::Episode => "episodes", - TraktKind::Person => "people", - TraktKind::User => "users", // //! not used in API - } - } -} -impl Display for TraktKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(match self { - TraktKind::Movie => "Movie", - TraktKind::Show => "Show", - TraktKind::Season => "Season", - TraktKind::Episode => "Episode", - TraktKind::Person => "Person", - TraktKind::User => "User", - }) - } -} diff --git a/import/src/vgmdb.rs b/import/src/vgmdb.rs deleted file mode 100644 index 402fd90..0000000 --- a/import/src/vgmdb.rs +++ /dev/null @@ -1,127 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin -*/ - -use crate::USER_AGENT; -use anyhow::{Context, Result}; -use jellycache::{cache, cache_store, HashKey}; -use jellycommon::Asset; -use log::info; -use regex::Regex; -use reqwest::{ - header::{HeaderMap, HeaderName, HeaderValue}, - Client, ClientBuilder, -}; -use std::{ - sync::{Arc, LazyLock}, - time::Duration, -}; -use tokio::{ - runtime::Handle, - sync::Semaphore, - time::{sleep_until, Instant}, -}; - -pub struct Vgmdb { - client: Client, - rate_limit: Arc, -} - -static RE_IMAGE_URL_FROM_HTML: LazyLock = LazyLock::new(|| { - Regex::new(r#"href='(?https://media.vgm.io/artists/[-/\w\.]+)'"#).unwrap() -}); - -impl Default for Vgmdb { - fn default() -> Self { - Self::new() - } -} - -impl Vgmdb { - pub fn new() -> Self { - let client = ClientBuilder::new() - .default_headers(HeaderMap::from_iter([ - ( - HeaderName::from_static("user-agent"), - HeaderValue::from_static(USER_AGENT), - ), - ( - HeaderName::from_static("x-comment"), - HeaderValue::from_static("Please add an API, thanks!"), - ), - ])) - .build() - .unwrap(); - Self { - client, - rate_limit: Arc::new(Semaphore::new(3)), - } - } - - pub fn get_artist_image(&self, id: u64, rt: &Handle) -> Result> { - if let Some(url) = self.get_artist_image_url(id, rt)? { - cache_store( - format!("ext/vgmdb/artist-image/{}.image", HashKey(&url)), - move || { - rt.block_on(async { - info!("downloading image {url:?}"); - Ok(self - .client - .get(url) - .send() - .await? - .error_for_status()? - .bytes() - .await? - .to_vec()) - }) - }, - ) - .context("vgmdb media download") - .map(Asset) - .map(Some) - } else { - Ok(None) - } - } - - pub fn get_artist_image_url(&self, id: u64, rt: &Handle) -> Result> { - let html = self.scrape_artist_page(id, rt)?; - if let Some(cap) = RE_IMAGE_URL_FROM_HTML.captures(&str::from_utf8(&html).unwrap()) { - if let Some(url) = cap.name("url").map(|m| m.as_str()) { - return Ok(Some(url.to_string())); - } - } - Ok(None) - } - - pub fn scrape_artist_page(&self, id: u64, rt: &Handle) -> Result> { - cache(&format!("ext/vgmdb/artist-page/{id}.html"), move || { - rt.block_on(async { - let _permit = self.rate_limit.clone().acquire_owned().await?; - let permit_drop_ts = Instant::now() + Duration::from_secs(1); - info!("scrape artist: {id}"); - - let resp = self - .client - .get(format!("https://vgmdb.net/artist/{id}")) - .send() - .await? - .error_for_status()? - .bytes() - .await? - .to_vec(); - - tokio::task::spawn(async move { - sleep_until(permit_drop_ts).await; - drop(_permit); - }); - - Ok(resp) - }) - }) - .context("vgmdb artist page scrape") - } -} diff --git a/import/src/wikidata.rs b/import/src/wikidata.rs deleted file mode 100644 index 3a107fe..0000000 --- a/import/src/wikidata.rs +++ /dev/null @@ -1,129 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin -*/ - -use crate::USER_AGENT; -use anyhow::{bail, Context, Result}; -use jellycache::{cache_memory, EscapeKey}; -use log::info; -use reqwest::{ - header::{HeaderMap, HeaderName, HeaderValue}, - Client, ClientBuilder, -}; -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use std::{collections::BTreeMap, sync::Arc}; -use tokio::runtime::Handle; - -pub struct Wikidata { - client: Client, -} - -#[derive(Debug, Deserialize, Serialize, Clone)] -pub struct WikidataResponse { - entities: BTreeMap, -} - -#[derive(Debug, Deserialize, Serialize, Clone)] -pub struct WikidataEntity { - pub pageid: u64, - pub ns: u64, - pub title: String, - pub lastrevid: u64, - pub modified: String, - pub r#type: String, - pub id: String, - pub claims: BTreeMap>, -} -#[derive(Debug, Deserialize, Serialize, Clone)] -pub struct WikidataClaim { - pub r#type: String, - pub id: String, - pub rank: String, - pub mainsnak: WikidataSnak, -} - -#[derive(Debug, Deserialize, Serialize, Clone)] -pub struct WikidataSnak { - pub snaktype: String, - pub property: String, - pub hash: String, - pub datavalue: Option, - pub datatype: String, -} - -#[derive(Debug, Deserialize, Serialize, Clone)] -pub struct WikidataValue { - pub value: Value, - pub r#type: String, -} - -pub mod properties { - pub static IMAGE: &str = "P18"; -} - -impl Default for Wikidata { - fn default() -> Self { - Self::new() - } -} - -impl Wikidata { - pub fn new() -> Self { - let client = ClientBuilder::new() - .default_headers(HeaderMap::from_iter([ - ( - HeaderName::from_static("accept"), - HeaderValue::from_static("application/json"), - ), - ( - HeaderName::from_static("user-agent"), - HeaderValue::from_static(USER_AGENT), - ), - ])) - .build() - .unwrap(); - Self { client } - } - - pub fn query_image_path(&self, id: String, rt: &Handle) -> Result> { - let response = self.query(id.clone(), rt)?; - if let Some(entity) = response.entities.get(&id) { - if let Some(images) = entity.claims.get(properties::IMAGE) { - for image in images { - if image.mainsnak.datatype != "commonsMedia" { - bail!("image is of type {:?}", image.mainsnak.datatype); - } - if let Some(dv) = &image.mainsnak.datavalue { - if let Value::String(filename) = &dv.value { - return Ok(Some(filename.to_owned())); - } - } - } - } - } - Ok(None) - } - - pub fn query(&self, id: String, rt: &Handle) -> Result> { - cache_memory( - &format!("ext/wikidata/{}.json", EscapeKey(&id)), - move || { - rt.block_on(async { - info!("entity query: {id}"); - Ok(self - .client - .get(format!("https://www.wikidata.org/entity/{id}")) - .send() - .await? - .error_for_status()? - .json() - .await?) - }) - }, - ) - .context("wikidata entity") - } -} diff --git a/import/src/wikimedia_commons.rs b/import/src/wikimedia_commons.rs deleted file mode 100644 index 86d934c..0000000 --- a/import/src/wikimedia_commons.rs +++ /dev/null @@ -1,63 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2025 metamuffin -*/ - -use crate::USER_AGENT; -use anyhow::{Context, Result}; -use jellycache::{cache_store, EscapeKey}; -use jellycommon::Asset; -use reqwest::{ - header::{HeaderMap, HeaderName, HeaderValue}, - redirect::Policy, - Client, ClientBuilder, -}; -use tokio::runtime::Handle; - -pub struct WikimediaCommons { - client: Client, -} -impl Default for WikimediaCommons { - fn default() -> Self { - Self::new() - } -} - -impl WikimediaCommons { - pub fn new() -> Self { - let client = ClientBuilder::new() - .default_headers(HeaderMap::from_iter([( - HeaderName::from_static("user-agent"), - HeaderValue::from_static(USER_AGENT), - )])) - .redirect(Policy::limited(5)) - .build() - .unwrap(); - Self { client } - } - - pub fn image_by_filename(&self, filename: String, rt: &Handle) -> Result { - cache_store( - format!("ext/wikimedia-commons/image/{}.image", EscapeKey(&filename)), - move || { - rt.block_on(async { - Ok(self - .client - .get(format!( - "https://commons.wikimedia.org/wiki/Special:FilePath/{}", - filename.replace(" ", "_") - )) - .send() - .await? - .error_for_status()? - .bytes() - .await? - .to_vec()) - }) - }, - ) - .context("mediawiki image by filename") - .map(Asset) - } -} -- cgit v1.3