diff options
-rw-r--r-- | import/src/acoustid.rs | 15 | ||||
-rw-r--r-- | import/src/lib.rs | 24 | ||||
-rw-r--r-- | import/src/musicbrainz.rs | 1 | ||||
-rw-r--r-- | import/src/vgmdb.rs | 108 |
4 files changed, 140 insertions, 8 deletions
diff --git a/import/src/acoustid.rs b/import/src/acoustid.rs index 8d33821..9083724 100644 --- a/import/src/acoustid.rs +++ b/import/src/acoustid.rs @@ -4,7 +4,7 @@ Copyright (C) 2025 metamuffin <metamuffin.org> */ use crate::USER_AGENT; -use anyhow::Result; +use anyhow::{Context, Result}; use bincode::{Decode, Encode}; use jellybase::cache::async_cache_memory; use log::info; @@ -126,11 +126,18 @@ pub(crate) async fn acoustid_fingerprint(path: &Path) -> Result<Arc<Fingerprint> .arg("-json") .arg(path) .stdout(Stdio::piped()) - .spawn()?; + .spawn() + .context("fpcalc")?; let mut buf = Vec::new(); - child.stdout.unwrap().read_to_end(&mut buf).await?; - let out: FpCalcOutput = serde_json::from_slice(&buf)?; + child + .stdout + .unwrap() + .read_to_end(&mut buf) + .await + .context("read fpcalc output")?; + + let out: FpCalcOutput = serde_json::from_slice(&buf).context("parsing fpcalc output")?; let out = Fingerprint { duration: out.duration as u32, fingerprint: out.fingerprint, diff --git a/import/src/lib.rs b/import/src/lib.rs index fa74b9c..b93dbec 100644 --- a/import/src/lib.rs +++ b/import/src/lib.rs @@ -37,6 +37,7 @@ use tokio::{ task::spawn_blocking, }; use trakt::Trakt; +use vgmdb::Vgmdb; use wikidata::Wikidata; use wikimedia_commons::WikimediaCommons; @@ -45,13 +46,14 @@ pub mod infojson; pub mod musicbrainz; pub mod tmdb; pub mod trakt; +pub mod vgmdb; pub mod wikidata; pub mod wikimedia_commons; pub const USER_AGENT: &'static str = concat!( "jellything/", env!("CARGO_PKG_VERSION"), - " ( https://codeberg.org/metamuffin/jellything )" + " +https://codeberg.org/metamuffin/jellything" ); static IMPORT_SEM: LazyLock<Semaphore> = LazyLock::new(|| Semaphore::new(1)); @@ -67,6 +69,7 @@ struct Apis { musicbrainz: MusicBrainz, wikidata: Wikidata, wikimedia_commons: WikimediaCommons, + vgmdb: Vgmdb, } pub fn is_importing() -> bool { @@ -96,6 +99,7 @@ fn import(db: &Database, incremental: bool) -> Result<()> { musicbrainz: MusicBrainz::new(), wikidata: Wikidata::new(), wikimedia_commons: WikimediaCommons::new(), + vgmdb: Vgmdb::new(), }; let rthandle = Handle::current(); @@ -716,7 +720,8 @@ fn apply_musicbrainz_recording( let artist = rthandle.block_on(apis.musicbrainz.lookup_artist(artist.id.clone()))?; - let mut image = None; + let mut image_1 = None; + let mut image_2 = None; for rel in &artist.relations { match rel.type_id.as_str() { @@ -729,7 +734,18 @@ fn apply_musicbrainz_recording( let path = rthandle.block_on( apis.wikimedia_commons.image_by_filename(filename), )?; - image = Some(AssetInner::Cache(path).ser()); + image_1 = Some(AssetInner::Cache(path).ser()); + } + } + } + VGMDB => { + let url = rel.url.as_ref().unwrap().resource.clone(); + if let Some(id) = url.strip_prefix("https://vgmdb.net/artist/") { + let id = id.parse::<u64>().context("parse vgmdb id")?; + if let Some(path) = + rthandle.block_on(apis.vgmdb.get_artist_image(id))? + { + image_2 = Some(AssetInner::Cache(path).ser()); } } } @@ -750,7 +766,7 @@ fn apply_musicbrainz_recording( } else { rel.target_credit.clone() }, - headshot: image, + headshot: image_1.or(image_2), ids: ObjectIds::default(), }, }); diff --git a/import/src/musicbrainz.rs b/import/src/musicbrainz.rs index 704807b..f05b3de 100644 --- a/import/src/musicbrainz.rs +++ b/import/src/musicbrainz.rs @@ -32,6 +32,7 @@ pub mod reltypes { pub const MIX: &str = "3e3102e1-1896-4f50-b5b2-dd9824e46efe"; pub const INSTRUMENT: &str = "59054b12-01ac-43ee-a618-285fd397e461"; pub const WIKIDATA: &str = "689870a4-a1e4-4912-b17f-7b2664215698"; + pub const VGMDB: &str = "0af15ab3-c615-46d6-b95b-a5fcd2a92ed9"; } pub struct MusicBrainz { diff --git a/import/src/vgmdb.rs b/import/src/vgmdb.rs new file mode 100644 index 0000000..9ac76d6 --- /dev/null +++ b/import/src/vgmdb.rs @@ -0,0 +1,108 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::USER_AGENT; +use anyhow::Result; +use jellybase::cache::{async_cache_file, async_cache_memory, CachePath}; +use log::info; +use regex::Regex; +use reqwest::{ + header::{HeaderMap, HeaderName, HeaderValue}, + Client, ClientBuilder, +}; +use std::{ + sync::{Arc, LazyLock}, + time::Duration, +}; +use tokio::{ + io::AsyncWriteExt, + sync::Semaphore, + time::{sleep_until, Instant}, +}; + +pub struct Vgmdb { + client: Client, + rate_limit: Arc<Semaphore>, +} + +static RE_IMAGE_URL_FROM_HTML: LazyLock<Regex> = LazyLock::new(|| { + Regex::new(r#"href='(?<url>https://media.vgm.io/artists/[-/\w\.]+)'"#).unwrap() +}); + +impl Vgmdb { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ( + HeaderName::from_static("x-comment"), + HeaderValue::from_static("Please add an API, thanks!"), + ), + ])) + .build() + .unwrap(); + Self { + client, + rate_limit: Arc::new(Semaphore::new(3)), + } + } + + pub async fn get_artist_image(&self, id: u64) -> Result<Option<CachePath>> { + if let Some(url) = self.get_artist_image_url(id).await? { + Ok(Some( + async_cache_file("api-vgmdb-media", url.clone(), |mut file| async move { + info!("downloading image {url:?}"); + let mut res = self.client.get(url).send().await?.error_for_status()?; + while let Some(chunk) = res.chunk().await? { + file.write_all(&chunk).await?; + } + Ok(()) + }) + .await?, + )) + } else { + Ok(None) + } + } + + pub async fn get_artist_image_url(&self, id: u64) -> Result<Option<String>> { + let html = self.scrape_artist_page(id).await?; + if let Some(cap) = RE_IMAGE_URL_FROM_HTML.captures(&html) { + if let Some(url) = cap.name("url").map(|m| m.as_str()) { + return Ok(Some(url.to_string())); + } + } + return Ok(None); + } + + pub async fn scrape_artist_page(&self, id: u64) -> Result<Arc<String>> { + async_cache_memory("api-vgmdb-artist", id.clone(), || async move { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::from_secs(1); + info!("scrape artist: {id}"); + + let resp = self + .client + .get(format!("https://vgmdb.net/artist/{id}")) + .send() + .await? + .error_for_status()? + .text() + .await?; + + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); + + Ok(resp) + }) + .await + } +} |