diff options
Diffstat (limited to 'import/src/plugins')
| -rw-r--r-- | import/src/plugins/acoustid.rs | 174 | ||||
| -rw-r--r-- | import/src/plugins/infojson.rs | 272 | ||||
| -rw-r--r-- | import/src/plugins/media_info.rs | 92 | ||||
| -rw-r--r-- | import/src/plugins/misc.rs | 100 | ||||
| -rw-r--r-- | import/src/plugins/mod.rs | 48 | ||||
| -rw-r--r-- | import/src/plugins/musicbrainz.rs | 320 | ||||
| -rw-r--r-- | import/src/plugins/tags.rs | 60 | ||||
| -rw-r--r-- | import/src/plugins/tmdb.rs | 281 | ||||
| -rw-r--r-- | import/src/plugins/trakt.rs | 403 | ||||
| -rw-r--r-- | import/src/plugins/vgmdb.rs | 127 | ||||
| -rw-r--r-- | import/src/plugins/wikidata.rs | 129 | ||||
| -rw-r--r-- | import/src/plugins/wikimedia_commons.rs | 63 |
12 files changed, 2069 insertions, 0 deletions
diff --git a/import/src/plugins/acoustid.rs b/import/src/plugins/acoustid.rs new file mode 100644 index 0000000..154b0a2 --- /dev/null +++ b/import/src/plugins/acoustid.rs @@ -0,0 +1,174 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ +use crate::{ + USER_AGENT, + plugins::{ImportContext, ImportPlugin}, +}; +use anyhow::{Context, Result}; +use jellycache::{HashKey, cache_memory}; +use jellycommon::{IdentifierType, NodeID}; +use jellyremuxer::matroska::Segment; +use log::info; +use reqwest::{ + Client, ClientBuilder, + header::{HeaderMap, HeaderName, HeaderValue}, +}; +use serde::{Deserialize, Serialize}; +use std::{ + io::Read, + path::Path, + process::{Command, Stdio}, + sync::Arc, + time::Duration, +}; +use tokio::{ + runtime::Handle, + sync::Semaphore, + time::{Instant, sleep_until}, +}; + +pub(crate) struct AcoustID { + client: Client, + key: String, + rate_limit: Arc<Semaphore>, +} + +#[derive(Debug, Hash, Clone, Serialize, Deserialize)] +pub(crate) struct Fingerprint { + duration: u32, + fingerprint: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub(crate) struct FpCalcOutput { + duration: f32, + fingerprint: String, +} + +#[derive(Serialize, Deserialize)] +pub(crate) struct AcoustIDLookupResultRecording { + id: String, +} +#[derive(Serialize, Deserialize)] +pub(crate) struct AcoustIDLookupResult { + id: String, + score: f32, + #[serde(default)] + recordings: Vec<AcoustIDLookupResultRecording>, +} +#[derive(Serialize, Deserialize)] +pub(crate) struct AcoustIDLookupResponse { + status: String, + results: Vec<AcoustIDLookupResult>, +} + +impl AcoustID { + pub fn new(api_key: &str) -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("accept"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + Self { + client, + // send at most 3 req/s according to acoustid docs, each lock is therefore held for 1s + // this implementation also never sends more than 3 requests in-flight. + rate_limit: Arc::new(Semaphore::new(3)), + key: api_key.to_owned(), + } + } + + pub fn get_atid_mbid(&self, fp: &Fingerprint, rt: &Handle) -> Result<Option<(String, String)>> { + let res = self.lookup(fp.to_owned(), rt)?; + for r in &res.results { + if let Some(k) = r.recordings.first() { + return Ok(Some((r.id.clone(), k.id.clone()))); + } + } + Ok(None) + } + + pub fn lookup(&self, fp: Fingerprint, rt: &Handle) -> Result<Arc<AcoustIDLookupResponse>> { + cache_memory(&format!("ext/acoustid/{}.json", HashKey(&fp)) , move || rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::SECOND; + info!("acoustid lookup"); + + let duration = fp.duration; + let fingerprint = fp.fingerprint.replace("=", "%3D"); + let client = &self.key; + let body = format!("format=json&meta=recordingids&client={client}&duration={duration}&fingerprint={fingerprint}"); + + let resp = self + .client + .post("https://api.acoustid.org/v2/lookup".to_string()) + .header("Content-Type", "application/x-www-form-urlencoded") + .body(body) + .send() + .await?.error_for_status()?.json::<AcoustIDLookupResponse>().await?; + + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); + + Ok(resp) + })) + .context("acoustid lookup") + } +} + +pub(crate) fn acoustid_fingerprint(path: &Path) -> Result<Arc<Fingerprint>> { + cache_memory( + &format!("media/chromaprint/{}.json", HashKey(path)), + move || { + let child = Command::new("fpcalc") + .arg("-json") + .arg(path) + .stdout(Stdio::piped()) + .spawn() + .context("fpcalc")?; + + let mut buf = Vec::new(); + child + .stdout + .unwrap() + .read_to_end(&mut buf) + .context("read fpcalc output")?; + + let out: FpCalcOutput = + serde_json::from_slice(&buf).context("parsing fpcalc output")?; + let out = Fingerprint { + duration: out.duration as u32, + fingerprint: out.fingerprint, + }; + Ok(out) + }, + ) +} + +impl ImportPlugin for AcoustID { + fn media(&self, ct: &ImportContext, node: NodeID, path: &Path, _seg: &Segment) -> Result<()> { + let fp = acoustid_fingerprint(path)?; + if let Some((atid, mbid)) = self.get_atid_mbid(&fp, &ct.rt)? { + ct.db.update_node_init(node, |n| { + n.identifiers.insert(IdentifierType::AcoustIdTrack, atid); + n.identifiers + .insert(IdentifierType::MusicbrainzRecording, mbid); + Ok(()) + })?; + }; + Ok(()) + } +} diff --git a/import/src/plugins/infojson.rs b/import/src/plugins/infojson.rs new file mode 100644 index 0000000..4dceeb8 --- /dev/null +++ b/import/src/plugins/infojson.rs @@ -0,0 +1,272 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ +use anyhow::{Context, Result, anyhow}; +use jellycache::cache_read; +use jellycommon::{ + IdentifierType, NodeID, NodeKind, RatingType, + chrono::{Utc, format::Parsed}, +}; +use jellyremuxer::matroska::{AttachedFile, Segment}; +use log::info; +use serde::{Deserialize, Serialize}; +use std::{collections::HashMap, fs::File, io::BufReader, path::Path}; + +use crate::plugins::{ImportContext, ImportPlugin}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YVideo { + pub album: Option<String>, + pub age_limit: Option<usize>, + pub alt_title: Option<String>, + pub aspect_ratio: Option<f32>, + pub automatic_captions: Option<HashMap<String, Vec<YCaption>>>, + pub availability: Option<String>, // "public" | "private" | "unlisted", + pub average_rating: Option<String>, + pub categories: Option<Vec<String>>, + pub channel_follower_count: Option<usize>, + pub channel_id: Option<String>, + pub channel_is_verified: Option<bool>, + pub channel: Option<String>, + pub chapters: Option<Vec<YChapter>>, + pub comment_count: Option<usize>, + pub description: Option<String>, + pub display_id: Option<String>, + pub duration_string: Option<String>, + pub duration: Option<f64>, + pub epoch: usize, + pub extractor_key: String, + pub extractor: String, + pub formats: Option<Vec<YFormat>>, + pub fulltitle: Option<String>, + pub heatmap: Option<Vec<YHeatmapSample>>, + pub height: Option<i32>, + pub id: String, + pub is_live: Option<bool>, + pub like_count: Option<usize>, + pub media_type: Option<String>, + pub n_entries: Option<usize>, + pub original_url: Option<String>, + pub playable_in_embed: Option<bool>, + pub playlist_count: Option<usize>, + pub playlist_id: Option<String>, + pub playlist_index: Option<usize>, + pub playlist_title: Option<String>, + pub playlist_uploader_id: Option<String>, + pub playlist_uploader: Option<String>, + pub playlist: Option<String>, + pub tags: Option<Vec<String>>, + pub thumbnail: Option<String>, + pub thumbnails: Option<Vec<YThumbnail>>, + pub title: String, + pub upload_date: Option<String>, + pub uploader_id: Option<String>, + pub uploader_url: Option<String>, + pub uploader: Option<String>, + pub view_count: Option<usize>, + pub was_live: Option<bool>, + pub webpage_url_basename: String, + pub webpage_url_domain: String, + pub webpage_url: String, + pub width: Option<i32>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YCaption { + pub url: Option<String>, + pub ext: String, //"vtt" | "json3" | "srv1" | "srv2" | "srv3" | "ttml", + pub protocol: Option<String>, + pub name: Option<String>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YFormat { + pub format_id: String, + pub format_note: Option<String>, + pub ext: String, + pub protocol: String, + pub acodec: Option<String>, + pub vcodec: Option<String>, + pub url: Option<String>, + pub width: Option<u32>, + pub height: Option<u32>, + pub fps: Option<f64>, + pub columns: Option<u32>, + pub fragments: Option<Vec<YFragment>>, + pub resolution: Option<String>, + pub dynamic_range: Option<String>, + pub aspect_ratio: Option<f64>, + pub http_headers: HashMap<String, String>, + pub audio_ext: String, + pub video_ext: String, + pub vbr: Option<f64>, + pub abr: Option<f64>, + pub format: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YFragment { + pub url: Option<String>, + pub duration: Option<f64>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YThumbnail { + pub url: String, + pub preference: Option<i32>, + pub id: String, + pub height: Option<u32>, + pub width: Option<u32>, + pub resolution: Option<String>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YChapter { + pub start_time: f64, + pub end_time: f64, + pub title: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YHeatmapSample { + pub start_time: f64, + pub end_time: f64, + pub value: f64, +} + +pub fn parse_upload_date(d: &str) -> anyhow::Result<i64> { + let (year, month, day) = (&d[0..4], &d[4..6], &d[6..8]); + let (year, month, day) = ( + year.parse().context("parsing year")?, + month.parse().context("parsing month")?, + day.parse().context("parsing day")?, + ); + + let mut p = Parsed::new(); + p.year = Some(year); + p.month = Some(month); + p.day = Some(day); + p.hour_div_12 = Some(0); + p.hour_mod_12 = Some(0); + p.minute = Some(0); + p.second = Some(0); + Ok(p.to_datetime_with_timezone(&Utc)?.timestamp_millis()) +} + +pub fn is_info_json(a: &&AttachedFile) -> bool { + a.name == "info.json" && a.media_type == "application/json" +} +pub struct Infojson; +impl ImportPlugin for Infojson { + fn file(&self, ct: &ImportContext, parent: NodeID, path: &Path) -> Result<()> { + let filename = path.file_name().unwrap().to_string_lossy(); + if filename != "channel.info.json" { + return Ok(()); + } + + info!("import channel info.json at {path:?}"); + let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?; + ct.db.update_node_init(parent, |node| { + node.kind = NodeKind::Channel; + node.title = Some(clean_uploader_name(&data.title).to_owned()); + if let Some(cid) = data.channel_id { + node.identifiers.insert(IdentifierType::YoutubeChannel, cid); + } + if let Some(uid) = data.uploader_id { + node.identifiers + .insert(IdentifierType::YoutubeChannelHandle, uid); + } + if let Some(desc) = data.description { + node.description = Some(desc); + } + if let Some(followers) = data.channel_follower_count { + node.ratings + .insert(RatingType::YoutubeFollowers, followers as f64); + } + Ok(()) + })?; + + Ok(()) + } + + fn media(&self, ct: &ImportContext, node: NodeID, _path: &Path, seg: &Segment) -> Result<()> { + let infojson = seg + .attachments + .iter() + .flat_map(|a| &a.files) + .find(is_info_json) + .map(|att| { + let data = cache_read(str::from_utf8(&att.data).unwrap())? + .ok_or(anyhow!("info json cache missing"))?; + anyhow::Ok(serde_json::from_slice::<YVideo>(&data)?) + }) + .transpose() + .context("infojson parsing")?; + + if let Some(infojson) = infojson { + ct.db.update_node_init(node, |node| { + node.kind = if let Some(ty) = &infojson.media_type + && ty == "short" + { + NodeKind::ShortFormVideo + } else if infojson.album.is_some() { + NodeKind::Music + } else { + NodeKind::Video + }; + node.title = Some(infojson.title); + node.subtitle = if infojson.alt_title != node.title { + infojson.alt_title + } else { + None + } + .or(infojson + .uploader + .as_ref() + .map(|u| clean_uploader_name(u).to_owned())) + .or(node.subtitle.clone()); + + node.tags.extend(infojson.tags.unwrap_or_default()); + + if let Some(desc) = infojson.description { + node.description = Some(desc) + } + node.tagline = Some(infojson.webpage_url); + if let Some(date) = &infojson.upload_date { + node.release_date = + Some(parse_upload_date(date).context("parsing upload date")?); + } + match infojson.extractor.as_str() { + "youtube" => { + node.identifiers + .insert(IdentifierType::YoutubeVideo, infojson.id); + node.ratings.insert( + RatingType::YoutubeViews, + infojson.view_count.unwrap_or_default() as f64, + ); + if let Some(lc) = infojson.like_count { + node.ratings.insert(RatingType::YoutubeLikes, lc as f64); + } + } + "Bandcamp" => drop( + node.identifiers + .insert(IdentifierType::Bandcamp, infojson.id), + ), + _ => (), + } + + Ok(()) + })?; + } + Ok(()) + } +} + +fn clean_uploader_name(mut s: &str) -> &str { + s = s.strip_suffix(" - Videos").unwrap_or(s); + s = s.strip_suffix(" - Topic").unwrap_or(s); + s = s.strip_prefix("Uploads from ").unwrap_or(s); + s +} diff --git a/import/src/plugins/media_info.rs b/import/src/plugins/media_info.rs new file mode 100644 index 0000000..1d4d627 --- /dev/null +++ b/import/src/plugins/media_info.rs @@ -0,0 +1,92 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::plugins::{ImportContext, ImportPlugin}; +use anyhow::{Result, anyhow}; +use jellycommon::{Chapter, NodeID, SourceTrack, SourceTrackKind, TrackSource}; +use jellyremuxer::matroska::Segment; +use std::path::Path; + +pub struct MediaInfo; +impl ImportPlugin for MediaInfo { + fn media(&self, ct: &ImportContext, node: NodeID, path: &Path, seg: &Segment) -> Result<()> { + let tracks = seg + .tracks + .as_ref() + .ok_or(anyhow!("no tracks"))? + .entries + .iter() + .map(|track| SourceTrack { + codec: track.codec_id.clone(), + language: track.language.clone(), + name: track.name.clone().unwrap_or_default(), + federated: Vec::new(), + kind: if let Some(video) = &track.video { + SourceTrackKind::Video { + width: video.pixel_width, + height: video.pixel_height, + fps: video.frame_rate, + } + } else if let Some(audio) = &track.audio { + SourceTrackKind::Audio { + channels: audio.channels as usize, + sample_rate: audio.sampling_frequency, + bit_depth: audio.bit_depth.map(|r| r as usize), + } + } else { + SourceTrackKind::Subtitle + }, + source: TrackSource::Local(path.to_owned(), track.track_number), + }) + .collect::<Vec<_>>(); + + let size = path.metadata()?.len(); + + ct.db.update_node_init(node, |node| { + node.storage_size = size; + node.media = Some(jellycommon::MediaInfo { + chapters: seg + .chapters + .clone() + .map(|c| { + let mut chaps = Vec::new(); + if let Some(ee) = c.edition_entries.first() { + for ca in &ee.chapter_atoms { + let mut labels = Vec::new(); + for cd in &ca.displays { + for lang in &cd.languages { + labels.push((lang.to_owned(), cd.string.clone())) + } + } + chaps.push(Chapter { + labels, + time_start: Some(ca.time_start as f64 * 1e-9), + time_end: ca.time_end.map(|ts| ts as f64 * 1e-9), + }) + } + } + chaps + }) + .unwrap_or_default(), + duration: fix_invalid_runtime( + seg.info.duration.unwrap_or_default() * seg.info.timestamp_scale as f64 * 1e-9, + ), + tracks, + }); + Ok(()) + })?; + + Ok(()) + } +} + +fn fix_invalid_runtime(d: f64) -> f64 { + match d { + // Broken durations found experimentally + 359999.999 | 359999.000 | 86399.999 | 86399.99900000001 => 0., + x => x, + } +} diff --git a/import/src/plugins/misc.rs b/import/src/plugins/misc.rs new file mode 100644 index 0000000..4717753 --- /dev/null +++ b/import/src/plugins/misc.rs @@ -0,0 +1,100 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ +use crate::plugins::{ImportContext, ImportPlugin}; +use anyhow::{Result, bail}; +use jellycache::{HashKey, cache_store}; +use jellycommon::{Asset, NodeID, NodeKind, PictureSlot, Visibility}; +use jellyremuxer::matroska::{AttachedFile, Segment}; +use log::info; +use std::{fs::File, io::Read, path::Path}; + +pub struct ImageFiles; +impl ImportPlugin for ImageFiles { + fn file(&self, ct: &ImportContext, parent: NodeID, path: &Path) -> Result<()> { + let filename = path.file_name().unwrap().to_string_lossy(); + let slot = match filename.as_ref() { + "poster.jpeg" | "poster.webp" | "poster.png" => PictureSlot::Cover, + "backdrop.jpeg" | "backdrop.webp" | "backdrop.png" => PictureSlot::Backdrop, + _ => return Ok(()), + }; + info!("import {slot:?} at {path:?}"); + let asset = Asset(cache_store( + format!("media/literal/{}-poster.image", HashKey(path)), + || { + let mut data = Vec::new(); + File::open(path)?.read_to_end(&mut data)?; + Ok(data) + }, + )?); + ct.db.update_node_init(parent, |node| { + node.pictures.insert(PictureSlot::Cover, asset); + Ok(()) + })?; + Ok(()) + } +} + +pub fn is_cover(a: &&AttachedFile) -> bool { + a.name.starts_with("cover") && a.media_type.starts_with("image/") +} +pub struct ImageAttachments; +impl ImportPlugin for ImageAttachments { + fn media(&self, ct: &ImportContext, node: NodeID, _path: &Path, seg: &Segment) -> Result<()> { + let Some(cover) = seg + .attachments + .iter() + .flat_map(|a| &a.files) + .find(is_cover) + .map(|att| Asset(att.data.clone().try_into().unwrap())) + else { + return Ok(()); + }; + + ct.db.update_node_init(node, |node| { + node.pictures.insert(PictureSlot::Cover, cover); + Ok(()) + })?; + Ok(()) + } +} + +pub struct General; +impl ImportPlugin for General { + fn import_instruction(&self, ct: &ImportContext, node: NodeID, line: &str) -> Result<()> { + if line == "hidden" { + ct.db.update_node_init(node, |node| { + node.visibility = node.visibility.min(Visibility::Hidden); + Ok(()) + })?; + } + if line == "reduced" { + ct.db.update_node_init(node, |node| { + node.visibility = node.visibility.min(Visibility::Reduced); + Ok(()) + })?; + } + if let Some(kind) = line.strip_prefix("kind-").or(line.strip_prefix("kind=")) { + let kind = match kind { + "movie" => NodeKind::Movie, + "video" => NodeKind::Video, + "music" => NodeKind::Music, + "short_form_video" => NodeKind::ShortFormVideo, + "collection" => NodeKind::Collection, + "channel" => NodeKind::Channel, + "show" => NodeKind::Show, + "series" => NodeKind::Series, + "season" => NodeKind::Season, + "episode" => NodeKind::Episode, + _ => bail!("unknown node kind"), + }; + ct.db.update_node_init(node, |node| { + node.kind = kind; + Ok(()) + })?; + } + Ok(()) + } +} diff --git a/import/src/plugins/mod.rs b/import/src/plugins/mod.rs new file mode 100644 index 0000000..47fcfbf --- /dev/null +++ b/import/src/plugins/mod.rs @@ -0,0 +1,48 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ +pub mod acoustid; +pub mod infojson; +pub mod musicbrainz; +pub mod tags; +pub mod tmdb; +pub mod trakt; +pub mod vgmdb; +pub mod wikidata; +pub mod wikimedia_commons; +pub mod media_info; +pub mod misc; + +use std::path::Path; + +use anyhow::Result; +use jellycommon::NodeID; +use jellydb::Database; +use jellyremuxer::matroska::Segment; +use tokio::runtime::Handle; + +pub struct ImportContext { + pub db: Database, + pub rt: Handle, +} + +pub trait ImportPlugin { + fn file(&self, ct: &ImportContext, parent: NodeID, path: &Path) -> Result<()> { + let _ = (ct, parent, path); + Ok(()) + } + fn media(&self, ct: &ImportContext, node: NodeID, path: &Path, seg: &Segment) -> Result<()> { + let _ = (ct, node, path, seg); + Ok(()) + } + fn import_instruction(&self, ct: &ImportContext, node: NodeID, line: &str) -> Result<()> { + let _ = (ct, node, line); + Ok(()) + } + fn process_node(&self, ct: &ImportContext, node: NodeID) -> Result<()> { + let _ = (ct, node); + Ok(()) + } +} diff --git a/import/src/plugins/musicbrainz.rs b/import/src/plugins/musicbrainz.rs new file mode 100644 index 0000000..44b2a06 --- /dev/null +++ b/import/src/plugins/musicbrainz.rs @@ -0,0 +1,320 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::{USER_AGENT, plugins::ImportPlugin}; +use anyhow::{Context, Result}; +use jellycache::cache_memory; +use log::info; +use reqwest::{ + Client, ClientBuilder, + header::{HeaderMap, HeaderName, HeaderValue}, +}; +use serde::{Deserialize, Serialize}; +use std::{collections::BTreeMap, sync::Arc, time::Duration}; +use tokio::{ + runtime::Handle, + sync::Semaphore, + time::{Instant, sleep_until}, +}; + +pub mod reltypes { + pub const MUSIC_VIDEO: &str = "ce3de655-7451-44d1-9224-87eb948c205d"; + pub const INSTRUMENTAL: &str = "9fc01a58-7801-4bd2-b07d-61cc7ffacf90"; + pub const VOCAL: &str = "0fdbe3c6-7700-4a31-ae54-b53f06ae1cfa"; + pub const RECORDING: &str = "a01ee869-80a8-45ef-9447-c59e91aa7926"; + pub const PROGRAMMING: &str = "36c50022-44e0-488d-994b-33f11d20301e"; + pub const PRODUCER: &str = "5c0ceac3-feb4-41f0-868d-dc06f6e27fc0"; + pub const ARTIST: &str = "5c0ceac3-feb4-41f0-868d-dc06f6e27fc0"; + pub const PHONOGRAPHIC_COPYRIGHT: &str = "7fd5fbc0-fbf4-4d04-be23-417d50a4dc30"; + pub const MIX: &str = "3e3102e1-1896-4f50-b5b2-dd9824e46efe"; + pub const INSTRUMENT: &str = "59054b12-01ac-43ee-a618-285fd397e461"; + pub const WIKIDATA: &str = "689870a4-a1e4-4912-b17f-7b2664215698"; + pub const VGMDB: &str = "0af15ab3-c615-46d6-b95b-a5fcd2a92ed9"; +} + +pub struct MusicBrainz { + client: Client, + rate_limit: Arc<Semaphore>, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbRecordingRel { + pub id: String, + pub first_release_date: Option<String>, + pub title: String, + pub isrcs: Vec<String>, + pub video: bool, + pub disambiguation: String, + pub length: Option<u32>, + pub relations: Vec<MbRelation>, + pub artist_credit: Vec<MbArtistCredit>, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbArtistRel { + pub id: String, + pub isnis: Vec<String>, + pub ipis: Vec<String>, + pub name: String, + pub disambiguation: String, + pub country: Option<String>, + pub sort_name: String, + pub gender_id: Option<String>, + pub area: Option<MbArea>, + pub begin_area: Option<MbArea>, + pub end_area: Option<MbArea>, + pub life_span: MbTimespan, + pub relations: Vec<MbRelation>, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbArtistCredit { + pub name: String, + pub artist: MbArtist, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbRelation { + pub direction: String, + pub r#type: String, + pub type_id: String, + pub begin: Option<String>, + pub end: Option<String>, + pub ended: bool, + pub target_type: String, + pub target_credit: String, + pub source_credit: String, + pub attributes: Vec<String>, + pub attribute_ids: BTreeMap<String, String>, + pub attribute_values: BTreeMap<String, String>, + + pub work: Option<MbWork>, + pub artist: Option<MbArtist>, + pub url: Option<MbUrl>, + pub recording: Option<MbRecording>, + pub series: Option<MbSeries>, + pub event: Option<MbEvent>, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbSeries { + pub id: String, + pub r#type: Option<String>, + pub type_id: Option<String>, + pub name: String, + pub disambiguation: String, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbRecording { + pub id: String, + pub title: String, + #[serde(default)] + pub isrcs: Vec<String>, + pub video: bool, + pub disambiguation: String, + pub length: Option<u32>, + #[serde(default)] + pub artist_credit: Vec<MbArtistCredit>, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbWork { + pub id: String, + pub r#type: Option<String>, + pub type_id: Option<String>, + pub languages: Vec<String>, + pub iswcs: Vec<String>, + pub language: Option<String>, + pub title: String, + pub attributes: Vec<String>, + pub disambiguation: String, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbEvent { + pub id: String, + pub r#type: Option<String>, + pub type_id: Option<String>, + pub name: String, + pub time: String, + pub cancelled: bool, + pub setlist: String, + pub life_span: MbTimespan, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbArtist { + pub id: String, + pub r#type: Option<String>, + pub type_id: Option<String>, + pub name: String, + pub disambiguation: String, + pub country: Option<String>, + pub sort_name: String, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbTimespan { + pub begin: Option<String>, + pub end: Option<String>, + pub ended: bool, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbArea { + pub name: String, + pub sort_name: String, + #[serde(default)] + pub iso_3166_1_codes: Vec<String>, + pub id: String, + pub disambiguation: String, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct MbUrl { + pub id: String, + pub resource: String, +} + +impl Default for MusicBrainz { + fn default() -> Self { + Self::new() + } +} + +impl MusicBrainz { + const MAX_PAR_REQ: usize = 4; + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("accept"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + Self { + client, + // send at most 1 req/s according to musicbrainz docs, each lock is held for 10s + // this implementation also never sends more than MAX_PAR_REQ requests in-flight. + rate_limit: Arc::new(Semaphore::new(Self::MAX_PAR_REQ)), + } + } + + pub fn lookup_recording(&self, id: String, rt: &Handle) -> Result<Arc<MbRecordingRel>> { + cache_memory(&format!("ext/musicbrainz/recording/{id}.json"), move || { + rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64); + info!("recording lookup: {id}"); + + let inc = [ + "isrcs", + "artists", + "area-rels", + "artist-rels", + "event-rels", + "genre-rels", + "instrument-rels", + "label-rels", + "place-rels", + "recording-rels", + "release-rels", + "release-group-rels", + "series-rels", + "url-rels", + "work-rels", + ] + .join("+"); + + let resp = self + .client + .get(format!( + "https://musicbrainz.org/ws/2/recording/{id}?inc={inc}" + )) + .send() + .await? + .error_for_status()? + .json::<MbRecordingRel>() + .await?; + + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); + + Ok(resp) + }) + }) + .context("musicbrainz recording lookup") + } + + pub fn lookup_artist(&self, id: String, rt: &Handle) -> Result<Arc<MbArtistRel>> { + cache_memory(&format!("ext/musicbrainz/artist/{id}.json"), move || { + rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64); + info!("artist lookup: {id}"); + + let inc = [ + "area-rels", + "artist-rels", + "event-rels", + "genre-rels", + "instrument-rels", + "label-rels", + "place-rels", + "recording-rels", + "release-rels", + "release-group-rels", + "series-rels", + "url-rels", + "work-rels", + ] + .join("+"); + + let resp = self + .client + .get(format!( + "https://musicbrainz.org/ws/2/artist/{id}?inc={inc}" + )) + .send() + .await? + .error_for_status()? + .json::<MbArtistRel>() + .await?; + + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); + + Ok(resp) + }) + }) + .context("musicbrainz artist lookup") + } +} + +impl ImportPlugin for MusicBrainz {} diff --git a/import/src/plugins/tags.rs b/import/src/plugins/tags.rs new file mode 100644 index 0000000..8452aad --- /dev/null +++ b/import/src/plugins/tags.rs @@ -0,0 +1,60 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::plugins::{ImportContext, ImportPlugin}; +use anyhow::Result; +use jellycommon::{IdentifierType, NodeID, NodeKind}; +use jellyremuxer::matroska::Segment; +use std::{collections::HashMap, path::Path}; + +pub struct Tags; +impl ImportPlugin for Tags { + fn media(&self, ct: &ImportContext, node: NodeID, _path: &Path, seg: &Segment) -> Result<()> { + let tags = seg + .tags + .first() + .map(|tags| { + tags.tags + .iter() + .flat_map(|t| t.simple_tags.clone()) + .map(|st| (st.name, st.string.unwrap_or_default())) + .collect::<HashMap<_, _>>() + }) + .unwrap_or_default(); + + ct.db.update_node_init(node, |node| { + node.title = seg.info.title.clone(); + for (key, value) in tags { + match key.as_str() { + "DESCRIPTION" => node.description = Some(value), + "SYNOPSIS" => node.description = Some(value), + "COMMENT" => node.tagline = Some(value), + "CONTENT_TYPE" => { + node.kind = match value.to_lowercase().trim() { + "movie" | "documentary" | "film" => NodeKind::Movie, + "music" | "recording" => NodeKind::Music, + _ => continue, + } + } + _ => node.identifiers.extend(Some(match key.as_str() { + "MUSICBRAINZ_TRACKID" => (IdentifierType::MusicbrainzRecording, value), + "MUSICBRAINZ_ARTISTID" => (IdentifierType::MusicbrainzArtist, value), + "MUSICBRAINZ_ALBUMID" => (IdentifierType::MusicbrainzRelease, value), + "MUSICBRAINZ_ALBUMARTISTID" => continue, + "MUSICBRAINZ_RELEASEGROUPID" => { + (IdentifierType::MusicbrainzReleaseGroup, value) + } + "ISRC" => (IdentifierType::Isrc, value), + "BARCODE" => (IdentifierType::Barcode, value), + _ => continue, + })), + } + } + Ok(()) + })?; + Ok(()) + } +} diff --git a/import/src/plugins/tmdb.rs b/import/src/plugins/tmdb.rs new file mode 100644 index 0000000..3d6e832 --- /dev/null +++ b/import/src/plugins/tmdb.rs @@ -0,0 +1,281 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ +use crate::USER_AGENT; +use anyhow::{anyhow, bail, Context, Result}; +use jellycache::{cache_memory, cache_store, EscapeKey, HashKey}; +use jellycommon::{ + chrono::{format::Parsed, Utc}, + Asset, +}; +use log::info; +use reqwest::{ + header::{HeaderMap, HeaderName, HeaderValue}, + Client, ClientBuilder, +}; +use serde::{Deserialize, Serialize}; +use std::{fmt::Display, sync::Arc}; +use tokio::runtime::Handle; + +pub struct Tmdb { + client: Client, + image_client: Client, + key: String, +} + +impl Tmdb { + pub fn new(api_key: &str) -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("accept"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + let image_client = ClientBuilder::new().build().unwrap(); + Self { + client, + image_client, + key: api_key.to_owned(), + } + } + pub fn search(&self, kind: TmdbKind, query: &str, rt: &Handle) -> Result<Arc<TmdbQuery>> { + cache_memory( + &format!("ext/tmdb/search/{kind}-{}.json", HashKey(query)), + move || { + rt.block_on(async { + info!("searching tmdb: {query:?}"); + Ok(self + .client + .get(format!( + "https://api.themoviedb.org/3/search/{kind}?query={}?api_key={}", + query.replace(" ", "+"), + self.key + )) + .send() + .await? + .error_for_status()? + .json::<TmdbQuery>() + .await?) + }) + }, + ) + .context("tmdb search") + } + pub fn details(&self, kind: TmdbKind, id: u64, rt: &Handle) -> Result<Arc<TmdbDetails>> { + cache_memory(&format!("ext/tmdb/details/{kind}-{id}.json"), move || { + rt.block_on(async { + info!("fetching details: {id:?}"); + Ok(self + .client + .get(format!( + "https://api.themoviedb.org/3/{kind}/{id}?api_key={}", + self.key, + )) + .send() + .await? + .error_for_status()? + .json() + .await?) + }) + }) + .context("tmdb details") + } + pub fn person_image(&self, id: u64, rt: &Handle) -> Result<Arc<TmdbPersonImage>> { + cache_memory(&format!("ext/tmdb/person/images/{id}.json"), move || { + rt.block_on(async { + Ok(self + .client + .get(format!( + "https://api.themoviedb.org/3/person/{id}/images?api_key={}", + self.key, + )) + .send() + .await? + .error_for_status()? + .json() + .await?) + }) + }) + .context("tmdb person images") + } + pub fn image(&self, path: &str, rt: &Handle) -> Result<Asset> { + cache_store( + format!("ext/tmdb/image/{}.image", EscapeKey(path)), + move || { + rt.block_on(async { + info!("downloading image {path:?}"); + Ok(self + .image_client + .get(format!("https://image.tmdb.org/t/p/original{path}")) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec()) + }) + }, + ) + .context("tmdb image download") + .map(Asset) + } + + pub fn episode_details( + &self, + series_id: u64, + season: usize, + episode: usize, + rt: &Handle, + ) -> Result<Arc<TmdbEpisode>> { + cache_memory(&format!("ext/tmdb/episode-details/{series_id}-S{season}-E{episode}.json"), move || { + rt.block_on(async { + info!("tmdb episode details {series_id} S={season} E={episode}"); + Ok(self + .image_client + .get(format!("https://api.themoviedb.org/3/tv/{series_id}/season/{season}/episode/{episode}?api_key={}", self.key)) + .send() + .await? + .error_for_status()? + .json() + .await?) + }) + }) + .context("tmdb episode details") + } +} + +pub fn parse_release_date(d: &str) -> Result<Option<i64>> { + if d.is_empty() { + return Ok(None); + } else if d.len() < 10 { + bail!(anyhow!("date string too short")) + } + let (year, month, day) = (&d[0..4], &d[5..7], &d[8..10]); + let (year, month, day) = ( + year.parse().context("parsing year")?, + month.parse().context("parsing month")?, + day.parse().context("parsing day")?, + ); + + let mut p = Parsed::new(); + p.year = Some(year); + p.month = Some(month); + p.day = Some(day); + p.hour_div_12 = Some(0); + p.hour_mod_12 = Some(0); + p.minute = Some(0); + p.second = Some(0); + Ok(Some(p.to_datetime_with_timezone(&Utc)?.timestamp_millis())) +} + +impl Display for TmdbKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + TmdbKind::Tv => "tv", + TmdbKind::Movie => "movie", + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbEpisode { + pub air_date: String, + pub overview: String, + pub name: String, + pub id: u64, + pub runtime: f64, + pub still_path: Option<String>, + pub vote_average: f64, + pub vote_count: usize, +} + +#[derive(Debug, Clone, Copy, Hash, Serialize, Deserialize)] +pub enum TmdbKind { + Tv, + Movie, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbPersonImage { + pub id: u64, + pub profiles: Vec<TmdbPersonImageProfile>, +} +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbPersonImageProfile { + pub aspect_ratio: f64, + pub height: u32, + pub width: u32, + pub file_path: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbQuery { + pub page: usize, + pub results: Vec<TmdbQueryResult>, + pub total_pages: usize, + pub total_results: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbQueryResult { + pub adult: bool, + pub backdrop_path: Option<String>, + pub genre_ids: Vec<u64>, + pub id: u64, + pub original_language: Option<String>, + pub original_title: Option<String>, + pub overview: String, + pub popularity: f64, + pub poster_path: Option<String>, + pub release_date: Option<String>, + pub title: Option<String>, + pub name: Option<String>, + pub vote_average: f64, + pub vote_count: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbDetails { + pub adult: bool, + pub backdrop_path: Option<String>, + pub genres: Vec<TmdbGenre>, + pub id: u64, + pub original_language: Option<String>, + pub original_title: Option<String>, + pub overview: String, + pub popularity: f64, + pub poster_path: Option<String>, + pub release_date: Option<String>, + pub title: Option<String>, + pub name: Option<String>, + pub vote_average: f64, + pub vote_count: usize, + pub budget: Option<usize>, + pub homepage: Option<String>, + pub imdb_id: Option<String>, + pub production_companies: Vec<TmdbProductionCompany>, + pub revenue: Option<usize>, + pub tagline: Option<String>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbGenre { + pub id: u64, + pub name: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TmdbProductionCompany { + pub id: u64, + pub name: String, + pub logo_path: Option<String>, +} diff --git a/import/src/plugins/trakt.rs b/import/src/plugins/trakt.rs new file mode 100644 index 0000000..5a1aa8e --- /dev/null +++ b/import/src/plugins/trakt.rs @@ -0,0 +1,403 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ +use crate::{ + USER_AGENT, + plugins::{ImportContext, ImportPlugin}, +}; +use anyhow::{Context, Result, bail}; +use jellycache::{HashKey, cache_memory}; +use jellycommon::{Appearance, CreditCategory, IdentifierType, NodeID, NodeKind}; +use log::info; +use reqwest::{ + Client, ClientBuilder, + header::{HeaderMap, HeaderName, HeaderValue}, +}; +use serde::{Deserialize, Serialize}; +use std::{collections::BTreeMap, fmt::Display, sync::Arc}; +use tokio::runtime::Handle; + +pub struct Trakt { + client: Client, +} + +impl Trakt { + pub fn new(api_key: &str) -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("trakt-api-key"), + HeaderValue::from_str(api_key).unwrap(), + ), + ( + HeaderName::from_static("trakt-api-version"), + HeaderValue::from_static("2"), + ), + ( + HeaderName::from_static("content-type"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + Self { client } + } + + pub fn search( + &self, + kinds: &[TraktKind], + query: &str, + rt: &Handle, + ) -> Result<Arc<Vec<TraktSearchResult>>> { + cache_memory( + &format!("ext/trakt/search/{}.json", HashKey(query)), + move || { + rt.block_on(async { + let url = format!( + "https://api.trakt.tv/search/{}?query={}&extended=full", + kinds + .iter() + .map(|t| t.singular()) + .collect::<Vec<_>>() + .join(","), + urlencoding::encode(query), + ); + let res = self.client.get(url).send().await?.error_for_status()?; + Ok(res.json().await?) + }) + }, + ) + .context("trakt search") + } + + pub fn lookup(&self, kind: TraktKind, id: u64, rt: &Handle) -> Result<Arc<TraktMediaObject>> { + cache_memory(&format!("ext/trakt/lookup/{kind}-{id}.json"), move || { + rt.block_on(async { + info!("trakt lookup {kind:?}:{id:?}"); + let url = format!("https://api.trakt.tv/{}/{id}?extended=full", kind.plural()); + let res = self.client.get(url).send().await?.error_for_status()?; + Ok(res.json().await?) + }) + }) + .context("trakt lookup") + } + + pub fn people(&self, kind: TraktKind, id: u64, rt: &Handle) -> Result<Arc<TraktPeople>> { + cache_memory(&format!("ext/trakt/people/{kind}-{id}.json"), move || { + rt.block_on(async { + info!("trakt people {kind:?}:{id:?}"); + let url = format!( + "https://api.trakt.tv/{}/{id}/people?extended=full", + kind.plural() + ); + let res = self.client.get(url).send().await?.error_for_status()?; + Ok(res.json().await?) + }) + }) + .context("trakt people") + } + + pub fn show_seasons(&self, id: u64, rt: &Handle) -> Result<Arc<Vec<TraktSeason>>> { + cache_memory(&format!("ext/trakt/seasons/{id}.json"), move || { + rt.block_on(async { + info!("trakt seasons {id:?}"); + let url = format!("https://api.trakt.tv/shows/{id}/seasons?extended=full"); + let res = self.client.get(url).send().await?.error_for_status()?; + Ok(res.json().await?) + }) + }) + .context("trakt show seasons") + } + + pub fn show_season_episodes( + &self, + id: u64, + season: usize, + rt: &Handle, + ) -> Result<Arc<Vec<TraktEpisode>>> { + cache_memory( + &format!("ext/trakt/episodes/{id}-S{season}.json"), + move || { + rt.block_on(async { + info!("trakt episodes {id:?} season={season}"); + let url = + format!("https://api.trakt.tv/shows/{id}/seasons/{season}?extended=full"); + let res = self.client.get(url).send().await?.error_for_status()?; + Ok(res.json().await?) + }) + }, + ) + .context("trakt show season episodes") + } +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TraktSeason { + pub number: usize, + pub ids: TraktIds, + pub rating: f64, + pub votes: usize, + pub episode_count: usize, + pub aired_count: Option<usize>, + pub title: String, + pub overview: Option<String>, + pub network: String, +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TraktEpisode { + pub season: Option<usize>, + pub number: usize, + pub number_abs: Option<usize>, + pub ids: TraktIds, + pub rating: f64, + pub votes: usize, + pub title: String, + pub runtime: f64, + pub overview: Option<String>, + pub available_translations: Vec<String>, + pub first_aired: Option<String>, + pub episode_type: String, +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TraktPeople { + #[serde(default)] + pub cast: Vec<TraktAppearance>, + #[serde(default)] + pub crew: BTreeMap<TraktPeopleGroup, Vec<TraktAppearance>>, +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TraktAppearance { + #[serde(default)] + pub jobs: Vec<String>, + #[serde(default)] + pub characters: Vec<String>, + pub person: TraktPerson, +} + +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TraktPerson { + pub name: String, + pub ids: TraktIds, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct TraktSearchResult { + pub r#type: TraktKind, + pub score: f64, + #[serde(flatten)] + pub inner: TraktKindObject, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TraktKindObject { + Movie(TraktMediaObject), + Show(TraktMediaObject), + Season(TraktMediaObject), + Episode(TraktMediaObject), + Person(TraktMediaObject), + User(TraktMediaObject), +} + +impl TraktKindObject { + pub fn inner(&self) -> &TraktMediaObject { + match self { + TraktKindObject::Movie(x) + | TraktKindObject::Show(x) + | TraktKindObject::Season(x) + | TraktKindObject::Episode(x) + | TraktKindObject::Person(x) + | TraktKindObject::User(x) => x, + } + } +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +pub enum TraktPeopleGroup { + #[serde(rename = "production")] + Production, + #[serde(rename = "art")] + Art, + #[serde(rename = "crew")] + Crew, + #[serde(rename = "costume & make-up")] //? they really use that in as a key?! + CostumeMakeup, + #[serde(rename = "directing")] + Directing, + #[serde(rename = "writing")] + Writing, + #[serde(rename = "sound")] + Sound, + #[serde(rename = "camera")] + Camera, + #[serde(rename = "visual effects")] + VisualEffects, + #[serde(rename = "lighting")] + Lighting, + #[serde(rename = "editing")] + Editing, + #[serde(rename = "created by")] + CreatedBy, +} +impl TraktPeopleGroup { + pub fn as_credit_category(self) -> CreditCategory { + match self { + TraktPeopleGroup::Production => CreditCategory::Production, + TraktPeopleGroup::Art => CreditCategory::Art, + TraktPeopleGroup::Crew => CreditCategory::Crew, + TraktPeopleGroup::CostumeMakeup => CreditCategory::CostumeMakeup, + TraktPeopleGroup::Directing => CreditCategory::Directing, + TraktPeopleGroup::Writing => CreditCategory::Writing, + TraktPeopleGroup::Sound => CreditCategory::Sound, + TraktPeopleGroup::Camera => CreditCategory::Camera, + TraktPeopleGroup::VisualEffects => CreditCategory::Vfx, + TraktPeopleGroup::Lighting => CreditCategory::Lighting, + TraktPeopleGroup::Editing => CreditCategory::Editing, + TraktPeopleGroup::CreatedBy => CreditCategory::CreatedBy, + } + } +} +impl TraktAppearance { + pub fn a(&self) -> Appearance { + Appearance { + jobs: self.jobs.to_owned(), + characters: self.characters.to_owned(), + node: NodeID([0; 32]), // person: Person { + // name: self.person.name.to_owned(), + // headshot: None, + // ids: self.person.ids.to_owned(), + // }, + } + } +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TraktMediaObject { + pub title: String, + pub year: Option<u32>, + pub ids: TraktIds, + + pub tagline: Option<String>, + pub overview: Option<String>, + pub released: Option<String>, + pub runtime: Option<usize>, + pub country: Option<String>, + pub trailer: Option<String>, + pub homepage: Option<String>, + pub status: Option<String>, + pub rating: Option<f64>, + pub votes: Option<usize>, + pub comment_count: Option<usize>, + pub language: Option<String>, + pub available_translations: Option<Vec<String>>, + pub genres: Option<Vec<String>>, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +pub struct TraktIds { + pub trakt: Option<u64>, + pub slug: Option<String>, + pub tvdb: Option<u64>, + pub imdb: Option<String>, + pub tmdb: Option<u64>, +} + +impl Display for TraktSearchResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "{} ({}) \x1b[2m{} [{:?}]\x1b[0m", + self.inner.inner().title, + self.inner.inner().year.unwrap_or(0), + self.r#type, + self.inner.inner().ids + )) + } +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, Hash, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum TraktKind { + Movie, + Show, + Season, + Episode, + Person, + User, +} + +impl TraktKind { + pub fn as_node_kind(self) -> NodeKind { + match self { + TraktKind::Movie => NodeKind::Movie, + TraktKind::Show => NodeKind::Show, + TraktKind::Season => NodeKind::Season, + TraktKind::Episode => NodeKind::Episode, + TraktKind::Person => NodeKind::Channel, + TraktKind::User => NodeKind::Channel, + } + } +} + +impl TraktKind { + pub fn singular(self) -> &'static str { + match self { + TraktKind::Movie => "movie", + TraktKind::Show => "show", + TraktKind::Season => "season", + TraktKind::Episode => "episode", + TraktKind::Person => "person", + TraktKind::User => "user", + } + } + pub fn plural(self) -> &'static str { + match self { + TraktKind::Movie => "movies", + TraktKind::Show => "shows", + TraktKind::Season => "seasons", + TraktKind::Episode => "episodes", + TraktKind::Person => "people", + TraktKind::User => "users", // //! not used in API + } + } +} +impl Display for TraktKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + TraktKind::Movie => "Movie", + TraktKind::Show => "Show", + TraktKind::Season => "Season", + TraktKind::Episode => "Episode", + TraktKind::Person => "Person", + TraktKind::User => "User", + }) + } +} + +impl ImportPlugin for Trakt { + fn import_instruction(&self, ct: &ImportContext, node: NodeID, line: &str) -> Result<()> { + if let Some(value) = line.strip_prefix("trakt-").or(line.strip_prefix("trakt=")) { + let (ty, id) = value.split_once(":").unwrap_or(("movie", value)); + let ty = match ty { + "movie" => IdentifierType::TraktMovie, + "show" => IdentifierType::TraktShow, + "season" => IdentifierType::TraktSeason, + "episode" => IdentifierType::TraktEpisode, + _ => bail!("unknown trakt kind"), + }; + ct.db.update_node_init(node, |node| { + node.identifiers.insert(ty, id.to_owned()); + Ok(()) + })?; + } + Ok(()) + } +} diff --git a/import/src/plugins/vgmdb.rs b/import/src/plugins/vgmdb.rs new file mode 100644 index 0000000..402fd90 --- /dev/null +++ b/import/src/plugins/vgmdb.rs @@ -0,0 +1,127 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::USER_AGENT; +use anyhow::{Context, Result}; +use jellycache::{cache, cache_store, HashKey}; +use jellycommon::Asset; +use log::info; +use regex::Regex; +use reqwest::{ + header::{HeaderMap, HeaderName, HeaderValue}, + Client, ClientBuilder, +}; +use std::{ + sync::{Arc, LazyLock}, + time::Duration, +}; +use tokio::{ + runtime::Handle, + sync::Semaphore, + time::{sleep_until, Instant}, +}; + +pub struct Vgmdb { + client: Client, + rate_limit: Arc<Semaphore>, +} + +static RE_IMAGE_URL_FROM_HTML: LazyLock<Regex> = LazyLock::new(|| { + Regex::new(r#"href='(?<url>https://media.vgm.io/artists/[-/\w\.]+)'"#).unwrap() +}); + +impl Default for Vgmdb { + fn default() -> Self { + Self::new() + } +} + +impl Vgmdb { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ( + HeaderName::from_static("x-comment"), + HeaderValue::from_static("Please add an API, thanks!"), + ), + ])) + .build() + .unwrap(); + Self { + client, + rate_limit: Arc::new(Semaphore::new(3)), + } + } + + pub fn get_artist_image(&self, id: u64, rt: &Handle) -> Result<Option<Asset>> { + if let Some(url) = self.get_artist_image_url(id, rt)? { + cache_store( + format!("ext/vgmdb/artist-image/{}.image", HashKey(&url)), + move || { + rt.block_on(async { + info!("downloading image {url:?}"); + Ok(self + .client + .get(url) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec()) + }) + }, + ) + .context("vgmdb media download") + .map(Asset) + .map(Some) + } else { + Ok(None) + } + } + + pub fn get_artist_image_url(&self, id: u64, rt: &Handle) -> Result<Option<String>> { + let html = self.scrape_artist_page(id, rt)?; + if let Some(cap) = RE_IMAGE_URL_FROM_HTML.captures(&str::from_utf8(&html).unwrap()) { + if let Some(url) = cap.name("url").map(|m| m.as_str()) { + return Ok(Some(url.to_string())); + } + } + Ok(None) + } + + pub fn scrape_artist_page(&self, id: u64, rt: &Handle) -> Result<Vec<u8>> { + cache(&format!("ext/vgmdb/artist-page/{id}.html"), move || { + rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::from_secs(1); + info!("scrape artist: {id}"); + + let resp = self + .client + .get(format!("https://vgmdb.net/artist/{id}")) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec(); + + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); + + Ok(resp) + }) + }) + .context("vgmdb artist page scrape") + } +} diff --git a/import/src/plugins/wikidata.rs b/import/src/plugins/wikidata.rs new file mode 100644 index 0000000..358996e --- /dev/null +++ b/import/src/plugins/wikidata.rs @@ -0,0 +1,129 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::USER_AGENT; +use anyhow::{Context, Result, bail}; +use jellycache::{EscapeKey, cache_memory}; +use log::info; +use reqwest::{ + Client, ClientBuilder, + header::{HeaderMap, HeaderName, HeaderValue}, +}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::{collections::BTreeMap, sync::Arc}; +use tokio::runtime::Handle; + +pub struct Wikidata { + client: Client, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataResponse { + entities: BTreeMap<String, WikidataEntity>, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataEntity { + pub pageid: u64, + pub ns: u64, + pub title: String, + pub lastrevid: u64, + pub modified: String, + pub r#type: String, + pub id: String, + pub claims: BTreeMap<String, Vec<WikidataClaim>>, +} +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataClaim { + pub r#type: String, + pub id: String, + pub rank: String, + pub mainsnak: WikidataSnak, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataSnak { + pub snaktype: String, + pub property: String, + pub hash: String, + pub datavalue: Option<WikidataValue>, + pub datatype: String, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataValue { + pub value: Value, + pub r#type: String, +} + +pub mod properties { + pub static IMAGE: &str = "P18"; +} + +impl Default for Wikidata { + fn default() -> Self { + Self::new() + } +} + +impl Wikidata { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("accept"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + Self { client } + } + + pub fn query_image_path(&self, id: String, rt: &Handle) -> Result<Option<String>> { + let response = self.query(id.clone(), rt)?; + if let Some(entity) = response.entities.get(&id) { + if let Some(images) = entity.claims.get(properties::IMAGE) { + for image in images { + if image.mainsnak.datatype != "commonsMedia" { + bail!("image is of type {:?}", image.mainsnak.datatype); + } + if let Some(dv) = &image.mainsnak.datavalue { + if let Value::String(filename) = &dv.value { + return Ok(Some(filename.to_owned())); + } + } + } + } + } + Ok(None) + } + + pub fn query(&self, id: String, rt: &Handle) -> Result<Arc<WikidataResponse>> { + cache_memory( + &format!("ext/wikidata/{}.json", EscapeKey(&id)), + move || { + rt.block_on(async { + info!("entity query: {id}"); + Ok(self + .client + .get(format!("https://www.wikidata.org/entity/{id}")) + .send() + .await? + .error_for_status()? + .json() + .await?) + }) + }, + ) + .context("wikidata entity") + } +} diff --git a/import/src/plugins/wikimedia_commons.rs b/import/src/plugins/wikimedia_commons.rs new file mode 100644 index 0000000..86d934c --- /dev/null +++ b/import/src/plugins/wikimedia_commons.rs @@ -0,0 +1,63 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::USER_AGENT; +use anyhow::{Context, Result}; +use jellycache::{cache_store, EscapeKey}; +use jellycommon::Asset; +use reqwest::{ + header::{HeaderMap, HeaderName, HeaderValue}, + redirect::Policy, + Client, ClientBuilder, +}; +use tokio::runtime::Handle; + +pub struct WikimediaCommons { + client: Client, +} +impl Default for WikimediaCommons { + fn default() -> Self { + Self::new() + } +} + +impl WikimediaCommons { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + )])) + .redirect(Policy::limited(5)) + .build() + .unwrap(); + Self { client } + } + + pub fn image_by_filename(&self, filename: String, rt: &Handle) -> Result<Asset> { + cache_store( + format!("ext/wikimedia-commons/image/{}.image", EscapeKey(&filename)), + move || { + rt.block_on(async { + Ok(self + .client + .get(format!( + "https://commons.wikimedia.org/wiki/Special:FilePath/{}", + filename.replace(" ", "_") + )) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec()) + }) + }, + ) + .context("mediawiki image by filename") + .map(Asset) + } +} |