diff options
| author | metamuffin <metamuffin@disroot.org> | 2026-02-19 14:29:03 +0100 |
|---|---|---|
| committer | metamuffin <metamuffin@disroot.org> | 2026-02-19 14:29:03 +0100 |
| commit | 41e8ff64585b7a3b77acd027d95e3e7f651d9e0e (patch) | |
| tree | 7226e1ae6d6dfd7e111ff0364889089ca5b07cd9 /import | |
| parent | dc4fa1995c9a341fe3027774c52a0760a00c6387 (diff) | |
| download | jellything-41e8ff64585b7a3b77acd027d95e3e7f651d9e0e.tar jellything-41e8ff64585b7a3b77acd027d95e3e7f651d9e0e.tar.bz2 jellything-41e8ff64585b7a3b77acd027d95e3e7f651d9e0e.tar.zst | |
migrate musicbrainz importer
Diffstat (limited to 'import')
| -rw-r--r-- | import/src/helpers.rs | 22 | ||||
| -rw-r--r-- | import/src/lib.rs | 1 | ||||
| -rw-r--r-- | import/src/plugins/mod.rs | 4 | ||||
| -rw-r--r-- | import/src/plugins/musicbrainz.rs | 145 | ||||
| -rw-r--r-- | import/src/plugins/trakt.rs | 15 | ||||
| -rw-r--r-- | import/src/plugins/vgmdb.rs | 29 | ||||
| -rw-r--r-- | import/src/plugins/wikidata.rs | 104 | ||||
| -rw-r--r-- | import/src/plugins/wikimedia_commons.rs | 79 |
8 files changed, 286 insertions, 113 deletions
diff --git a/import/src/helpers.rs b/import/src/helpers.rs new file mode 100644 index 0000000..5245a50 --- /dev/null +++ b/import/src/helpers.rs @@ -0,0 +1,22 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2026 metamuffin <metamuffin.org> +*/ + +use anyhow::Result; +use jellycommon::{ + NO_SLUG, + jellyobject::{ObjectBuffer, Path}, +}; +use jellydb::{Filter, Query, RowNum, Sort, Transaction}; + +pub fn get_or_insert_slug(txn: &mut dyn Transaction, slug: &str) -> Result<RowNum> { + match txn.query_single(Query { + filter: Filter::Match(Path(vec![NO_SLUG.0]), slug.into()), + sort: Sort::None, + })? { + Some(r) => Ok(r), + None => txn.insert(ObjectBuffer::new(&mut [(NO_SLUG.0, &slug)])), + } +} diff --git a/import/src/lib.rs b/import/src/lib.rs index 73f2b91..441fd0a 100644 --- a/import/src/lib.rs +++ b/import/src/lib.rs @@ -7,6 +7,7 @@ pub mod plugins; pub mod reporting; +pub mod helpers; use crate::{ plugins::{ImportPlugin, PluginContext, infojson::is_info_json, init_plugins, misc::is_cover}, diff --git a/import/src/plugins/mod.rs b/import/src/plugins/mod.rs index d99fa9b..a948c48 100644 --- a/import/src/plugins/mod.rs +++ b/import/src/plugins/mod.rs @@ -13,7 +13,6 @@ pub mod tmdb; pub mod trakt; pub mod vgmdb; pub mod wikidata; -pub mod wikimedia_commons; use crate::{ApiSecrets, ImportConfig, InheritedFlags}; use anyhow::Result; @@ -80,8 +79,7 @@ pub fn init_plugins(secrets: &ApiSecrets) -> Vec<Box<dyn ImportPlugin>> { } plugins.push(Box::new(musicbrainz::MusicBrainz::new())); // deps: acoustid plugins.push(Box::new(wikidata::Wikidata::new())); // deps: musicbrainz - plugins.push(Box::new(wikimedia_commons::WikimediaCommons::new())); // deps: wikidata - plugins.push(Box::new(vgmdb::Vgmdb::new())); // deps: wikidata + // plugins.push(Box::new(vgmdb::Vgmdb::new())); // deps: wikidata plugins } diff --git a/import/src/plugins/musicbrainz.rs b/import/src/plugins/musicbrainz.rs index 27e4d0f..9f7b08c 100644 --- a/import/src/plugins/musicbrainz.rs +++ b/import/src/plugins/musicbrainz.rs @@ -6,10 +6,16 @@ use crate::{ USER_AGENT, - plugins::{ImportPlugin, PluginInfo}, + helpers::get_or_insert_slug, + plugins::{ + ImportPlugin, PluginContext, PluginInfo, + musicbrainz::reltypes::{VGMDB, WIKIDATA}, + }, }; use anyhow::{Context, Result}; use jellycache::Cache; +use jellycommon::{jellyobject::ObjectBuffer, *}; +use jellydb::RowNum; use log::info; use reqwest::{ Client, ClientBuilder, @@ -228,7 +234,7 @@ impl MusicBrainz { pub fn lookup_recording( &self, cache: &Cache, - id: String, + id: &str, rt: &Handle, ) -> Result<Arc<MbRecordingRel>> { cache @@ -280,12 +286,7 @@ impl MusicBrainz { .context("musicbrainz recording lookup") } - pub fn lookup_artist( - &self, - cache: &Cache, - id: String, - rt: &Handle, - ) -> Result<Arc<MbArtistRel>> { + pub fn lookup_artist(&self, cache: &Cache, id: &str, rt: &Handle) -> Result<Arc<MbArtistRel>> { cache .cache_memory(&format!("ext/musicbrainz/artist/{id}.json"), move || { rt.block_on(async { @@ -338,7 +339,135 @@ impl ImportPlugin for MusicBrainz { fn info(&self) -> PluginInfo { PluginInfo { name: "musicbrainz", + handle_process: true, ..Default::default() } } + fn process(&self, ct: &PluginContext, node_row: RowNum) -> Result<()> { + self.process_recording(ct, node_row)?; + self.process_artist(ct, node_row)?; + Ok(()) + } +} +impl MusicBrainz { + fn process_recording(&self, ct: &PluginContext, node_row: RowNum) -> Result<()> { + let data = ct.ic.get_node(node_row)?.unwrap(); + let data = data.as_object(); + + let Some(mbid) = data + .get(NO_IDENTIFIERS) + .unwrap_or_default() + .get(IDENT_MUSICBRAINZ_RECORDING) + else { + return Ok(()); + }; + + let rec = self.lookup_recording(&ct.ic.cache, mbid, ct.rt)?; + + ct.ic.db.transaction(&mut |txn| { + let mut node = txn.get(node_row)?.unwrap(); + + node = node.as_object().insert(NO_TITLE, &rec.title); + if let Some(a) = rec.artist_credit.first() { + node = node.as_object().insert(NO_SUBTITLE, &a.artist.name); + } + node = node.as_object().update(NO_IDENTIFIERS, |ids| { + ids.insert_multi( + IDENT_ISRC, + &rec.isrcs.iter().map(|e| e.as_str()).collect::<Vec<_>>(), + ) + }); + + for rel in &rec.relations { + use reltypes::*; + let Some((role, cat)) = (match rel.type_id.as_str() { + INSTRUMENT => Some(("", CRCAT_INSTRUMENT)), + VOCAL => Some(("", CRCAT_VOCAL)), + PRODUCER => Some(("", CRCAT_PRODUCER)), + MIX => Some(("mix ", CRCAT_ENGINEER)), + PHONOGRAPHIC_COPYRIGHT => Some(("phonographic copyright ", CRCAT_ENGINEER)), + PROGRAMMING => Some(("programming ", CRCAT_ENGINEER)), + _ => None, + }) else { + continue; + }; + + let artist = rel.artist.as_ref().unwrap(); + let artist_slug = format!("musicbrainz-artist-{}", artist.id); + let artist_row = get_or_insert_slug(txn, &artist_slug)?; + + let mut artist_node = txn.get(artist_row)?.unwrap(); + artist_node = artist_node.as_object().update(NO_IDENTIFIERS, |ids| { + ids.insert(IDENT_MUSICBRAINZ_ARTIST, &artist.id) + }); + artist_node = artist_node.as_object().insert(NO_KIND, KIND_PERSON); + txn.update(artist_row, artist_node)?; + ct.pending_nodes.lock().unwrap().insert(artist_row); + + let credit = ObjectBuffer::new(&mut [ + (CR_NODE.0, &artist_row), + (CR_KIND.0, &cat), + (CR_ROLE.0, &role), + ]); + node = node + .as_object() + .extend_object(NO_CREDIT, CR_NODE.0, Some(credit)) + } + + txn.update(node_row, node) + })?; + + Ok(()) + } + + fn process_artist(&self, ct: &PluginContext, node_row: RowNum) -> Result<()> { + let data = ct.ic.get_node(node_row)?.unwrap(); + let data = data.as_object(); + + let Some(mbid) = data + .get(NO_IDENTIFIERS) + .unwrap_or_default() + .get(IDENT_MUSICBRAINZ_ARTIST) + else { + return Ok(()); + }; + + let artist = self.lookup_artist(&ct.ic.cache, mbid, ct.rt)?; + + ct.ic.db.transaction(&mut |txn| { + let mut node = txn.get(node_row)?.unwrap(); + + node = node.as_object().insert(NO_TITLE, &artist.name); + + for rel in &artist.relations { + let url = rel.url.as_ref().map(|u| u.resource.clone()); + match rel.type_id.as_str() { + WIKIDATA => { + if let Some(url) = url + && let Some(id) = url.strip_prefix("https://www.wikidata.org/wiki/") + { + node = node + .as_object() + .update(NO_IDENTIFIERS, |ids| ids.insert(IDENT_WIKIDATA, id)) + } + } + VGMDB => { + if let Some(url) = url + && let Some(id) = url.strip_prefix("https://vgmdb.net/artist/") + { + let id = id.parse::<u64>().context("parse vgmdb id")?; + node = node + .as_object() + .update(NO_IDENTIFIERS, |ids| ids.insert(IDENT_VGMDB_ARTIST, id)) + } + } + _ => (), + } + } + + txn.update(node_row, node) + })?; + + Ok(()) + } } diff --git a/import/src/plugins/trakt.rs b/import/src/plugins/trakt.rs index 9b7ed99..021ffe9 100644 --- a/import/src/plugins/trakt.rs +++ b/import/src/plugins/trakt.rs @@ -5,15 +5,16 @@ */ use crate::{ USER_AGENT, + helpers::get_or_insert_slug, plugins::{ImportPlugin, PluginContext, PluginInfo}, }; use anyhow::{Context, Result, anyhow, bail}; use jellycache::{Cache, HashKey}; use jellycommon::{ - jellyobject::{ObjectBuffer, Path, Tag}, + jellyobject::{ObjectBuffer, Tag}, *, }; -use jellydb::{Filter, Query, RowNum, Sort}; +use jellydb::RowNum; use log::info; use reqwest::{ Client, ClientBuilder, @@ -516,15 +517,7 @@ impl Trakt { .collect::<Vec<String>>() .join(", "); - let row = match txn.query_single(Query { - filter: Filter::Match(Path(vec![NO_SLUG.0]), slug.as_str().into()), - sort: Sort::None, - })? { - Some(r) => r, - None => { - txn.insert(ObjectBuffer::new(&mut [(NO_SLUG.0, &slug.as_str())]))? - } - }; + let row = get_or_insert_slug(txn, &slug)?; let mut c = txn.get(row)?.unwrap(); c = c.as_object().insert(NO_KIND, KIND_PERSON); diff --git a/import/src/plugins/vgmdb.rs b/import/src/plugins/vgmdb.rs index 534b241..8221692 100644 --- a/import/src/plugins/vgmdb.rs +++ b/import/src/plugins/vgmdb.rs @@ -6,10 +6,12 @@ use crate::{ USER_AGENT, - plugins::{ImportPlugin, PluginInfo}, + plugins::{ImportPlugin, PluginContext, PluginInfo}, }; use anyhow::{Context, Result}; use jellycache::{Cache, HashKey}; +use jellycommon::*; +use jellydb::RowNum; use log::info; use regex::Regex; use reqwest::{ @@ -138,7 +140,32 @@ impl ImportPlugin for Vgmdb { fn info(&self) -> PluginInfo { PluginInfo { name: "vgmdb", + handle_process: true, ..Default::default() } } + + fn process(&self, ct: &PluginContext, node: RowNum) -> Result<()> { + let data = ct.ic.get_node(node)?.unwrap(); + let data = data.as_object(); + + let Some(id) = data + .get(NO_IDENTIFIERS) + .unwrap_or_default() + .get(IDENT_VGMDB_ARTIST) + else { + return Ok(()); + }; + + let Some(image) = self.get_artist_image(&ct.ic.cache, id, ct.rt)? else { + return Ok(()); + }; + + ct.ic.update_node(node, |node| { + node.as_object() + .update(NO_PICTURES, |pics| pics.insert(PICT_COVER, &image)) + })?; + + Ok(()) + } } diff --git a/import/src/plugins/wikidata.rs b/import/src/plugins/wikidata.rs index 3afd393..2286f8d 100644 --- a/import/src/plugins/wikidata.rs +++ b/import/src/plugins/wikidata.rs @@ -6,14 +6,17 @@ use crate::{ USER_AGENT, - plugins::{ImportPlugin, PluginInfo}, + plugins::{ImportPlugin, PluginContext, PluginInfo}, }; use anyhow::{Context, Result, bail}; use jellycache::{Cache, EscapeKey}; +use jellycommon::*; +use jellydb::RowNum; use log::info; use reqwest::{ Client, ClientBuilder, header::{HeaderMap, HeaderName, HeaderValue}, + redirect::Policy, }; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -22,6 +25,7 @@ use tokio::runtime::Handle; pub struct Wikidata { client: Client, + commons: WikimediaCommons, } #[derive(Debug, Deserialize, Serialize, Clone)] @@ -88,17 +92,15 @@ impl Wikidata { ])) .build() .unwrap(); - Self { client } + Self { + client, + commons: WikimediaCommons::new(), + } } - pub fn query_image_path( - &self, - cache: &Cache, - id: String, - rt: &Handle, - ) -> Result<Option<String>> { - let response = self.query(cache, id.clone(), rt)?; - if let Some(entity) = response.entities.get(&id) { + pub fn query_image_path(&self, cache: &Cache, id: &str, rt: &Handle) -> Result<Option<String>> { + let response = self.query(cache, id, rt)?; + if let Some(entity) = response.entities.get(id) { if let Some(images) = entity.claims.get(properties::IMAGE) { for image in images { if image.mainsnak.datatype != "commonsMedia" { @@ -115,7 +117,7 @@ impl Wikidata { Ok(None) } - pub fn query(&self, cache: &Cache, id: String, rt: &Handle) -> Result<Arc<WikidataResponse>> { + pub fn query(&self, cache: &Cache, id: &str, rt: &Handle) -> Result<Arc<WikidataResponse>> { cache .cache_memory( &format!("ext/wikidata/{}.json", EscapeKey(&id)), @@ -137,11 +139,91 @@ impl Wikidata { } } +pub struct WikimediaCommons { + client: Client, +} +impl Default for WikimediaCommons { + fn default() -> Self { + Self::new() + } +} + +impl WikimediaCommons { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + )])) + .redirect(Policy::limited(5)) + .build() + .unwrap(); + Self { client } + } + + pub fn image_by_filename( + &self, + cache: &Cache, + filename: String, + rt: &Handle, + ) -> Result<String> { + cache + .store( + format!("ext/wikimedia-commons/image/{}.image", EscapeKey(&filename)), + move || { + rt.block_on(async { + Ok(self + .client + .get(format!( + "https://commons.wikimedia.org/wiki/Special:FilePath/{}", + filename.replace(" ", "_") + )) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec()) + }) + }, + ) + .context("mediawiki image by filename") + } +} + impl ImportPlugin for Wikidata { fn info(&self) -> PluginInfo { PluginInfo { name: "wikidata", + handle_process: true, ..Default::default() } } + fn process(&self, ct: &PluginContext, node: RowNum) -> Result<()> { + let data = ct.ic.get_node(node)?.unwrap(); + let data = data.as_object(); + + let Some(id) = data + .get(NO_IDENTIFIERS) + .unwrap_or_default() + .get(IDENT_WIKIDATA) + else { + return Ok(()); + }; + + let Some(filename) = self.query_image_path(&ct.ic.cache, id, ct.rt)? else { + return Ok(()); + }; + + let image = self + .commons + .image_by_filename(&ct.ic.cache, filename, ct.rt)?; + + ct.ic.update_node(node, |node| { + node.as_object() + .update(NO_PICTURES, |pics| pics.insert(PICT_COVER, &image)) + })?; + + Ok(()) + } } diff --git a/import/src/plugins/wikimedia_commons.rs b/import/src/plugins/wikimedia_commons.rs deleted file mode 100644 index aebf5dd..0000000 --- a/import/src/plugins/wikimedia_commons.rs +++ /dev/null @@ -1,79 +0,0 @@ -/* - This file is part of jellything (https://codeberg.org/metamuffin/jellything) - which is licensed under the GNU Affero General Public License (version 3); see /COPYING. - Copyright (C) 2026 metamuffin <metamuffin.org> -*/ - -use crate::{ - USER_AGENT, - plugins::{ImportPlugin, PluginInfo}, -}; -use anyhow::{Context, Result}; -use jellycache::{Cache, EscapeKey}; -use reqwest::{ - Client, ClientBuilder, - header::{HeaderMap, HeaderName, HeaderValue}, - redirect::Policy, -}; -use tokio::runtime::Handle; - -pub struct WikimediaCommons { - client: Client, -} -impl Default for WikimediaCommons { - fn default() -> Self { - Self::new() - } -} - -impl WikimediaCommons { - pub fn new() -> Self { - let client = ClientBuilder::new() - .default_headers(HeaderMap::from_iter([( - HeaderName::from_static("user-agent"), - HeaderValue::from_static(USER_AGENT), - )])) - .redirect(Policy::limited(5)) - .build() - .unwrap(); - Self { client } - } - - pub fn image_by_filename( - &self, - cache: &Cache, - filename: String, - rt: &Handle, - ) -> Result<String> { - cache - .store( - format!("ext/wikimedia-commons/image/{}.image", EscapeKey(&filename)), - move || { - rt.block_on(async { - Ok(self - .client - .get(format!( - "https://commons.wikimedia.org/wiki/Special:FilePath/{}", - filename.replace(" ", "_") - )) - .send() - .await? - .error_for_status()? - .bytes() - .await? - .to_vec()) - }) - }, - ) - .context("mediawiki image by filename") - } -} - -impl ImportPlugin for WikimediaCommons { - fn info(&self) -> PluginInfo { - PluginInfo { - name: "wikimedia-commons", - ..Default::default() - } - } -} |