/* This file is part of jellything (https://codeberg.org/metamuffin/jellything) which is licensed under the GNU Affero General Public License (version 3); see /COPYING. Copyright (C) 2026 metamuffin */ use crate::{ USER_AGENT, plugins::{ImportPlugin, PluginContext, PluginInfo}, source_rank::ObjectImportSourceExt, }; use anyhow::{Context, Result}; use jellycache::{Cache, HashKey}; use jellycommon::*; use jellydb::RowNum; use log::info; use regex::Regex; use reqwest::{ Client, ClientBuilder, header::{HeaderMap, HeaderName, HeaderValue}, }; use std::{ sync::{Arc, LazyLock}, time::Duration, }; use tokio::{ runtime::Handle, sync::Semaphore, time::{Instant, sleep_until}, }; pub struct Vgmdb { client: Client, rate_limit: Arc, } static RE_IMAGE_URL_FROM_HTML: LazyLock = LazyLock::new(|| { Regex::new(r#"href='(?https://media.vgm.io/artists/[-/\w\.]+)'"#).unwrap() }); impl Default for Vgmdb { fn default() -> Self { Self::new() } } impl Vgmdb { pub fn new() -> Self { let client = ClientBuilder::new() .default_headers(HeaderMap::from_iter([ ( HeaderName::from_static("user-agent"), HeaderValue::from_static(USER_AGENT), ), ( HeaderName::from_static("x-comment"), HeaderValue::from_static("Please add an API, thanks!"), ), ])) .build() .unwrap(); Self { client, rate_limit: Arc::new(Semaphore::new(3)), } } pub fn get_artist_image(&self, cache: &Cache, id: u64, rt: &Handle) -> Result> { if let Some(url) = self.get_artist_image_url(cache, id, rt)? { cache .store( format!("ext/vgmdb/artist-image/{}.image", HashKey(&url)), move || { rt.block_on(async { info!("downloading image {url:?}"); Ok(self .client .get(url) .send() .await? .error_for_status()? .bytes() .await? .to_vec()) }) }, ) .context("vgmdb media download") .map(Some) } else { Ok(None) } } pub fn get_artist_image_url( &self, cache: &Cache, id: u64, rt: &Handle, ) -> Result> { let html = self.scrape_artist_page(cache, id, rt)?; if let Some(cap) = RE_IMAGE_URL_FROM_HTML.captures(&str::from_utf8(&html).unwrap()) { if let Some(url) = cap.name("url").map(|m| m.as_str()) { return Ok(Some(url.to_string())); } } Ok(None) } pub fn scrape_artist_page(&self, cache: &Cache, id: u64, rt: &Handle) -> Result> { cache .cache(&format!("ext/vgmdb/artist-page/{id}.html"), move || { rt.block_on(async { let _permit = self.rate_limit.clone().acquire_owned().await?; let permit_drop_ts = Instant::now() + Duration::from_secs(1); info!("scrape artist: {id}"); let resp = self .client .get(format!("https://vgmdb.net/artist/{id}")) .send() .await? .error_for_status()? .bytes() .await? .to_vec(); tokio::task::spawn(async move { sleep_until(permit_drop_ts).await; drop(_permit); }); Ok(resp) }) }) .context("vgmdb artist page scrape") } } impl ImportPlugin for Vgmdb { fn info(&self) -> PluginInfo { PluginInfo { name: "vgmdb", tag: MSOURCE_VGMDB, handle_process: true, ..Default::default() } } fn process(&self, ct: &PluginContext, node: RowNum) -> Result<()> { let data = ct.ic.get_node(node)?.unwrap(); let data = data.as_object(); let Some(id) = data .get(NO_IDENTIFIERS) .unwrap_or_default() .get(IDENT_VGMDB_ARTIST) else { return Ok(()); }; let id = id.parse()?; let Some(image) = self.get_artist_image(&ct.ic.cache, id, ct.rt)? else { return Ok(()); }; ct.ic.update_node(node, |node| { node.as_object() .update(NO_PICTURES, |pics| pics.insert_s(ct.is, PICT_COVER, &image)) })?; Ok(()) } }