diff options
Diffstat (limited to 'import/src/plugins/vgmdb.rs')
| -rw-r--r-- | import/src/plugins/vgmdb.rs | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/import/src/plugins/vgmdb.rs b/import/src/plugins/vgmdb.rs new file mode 100644 index 0000000..402fd90 --- /dev/null +++ b/import/src/plugins/vgmdb.rs @@ -0,0 +1,127 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::USER_AGENT; +use anyhow::{Context, Result}; +use jellycache::{cache, cache_store, HashKey}; +use jellycommon::Asset; +use log::info; +use regex::Regex; +use reqwest::{ + header::{HeaderMap, HeaderName, HeaderValue}, + Client, ClientBuilder, +}; +use std::{ + sync::{Arc, LazyLock}, + time::Duration, +}; +use tokio::{ + runtime::Handle, + sync::Semaphore, + time::{sleep_until, Instant}, +}; + +pub struct Vgmdb { + client: Client, + rate_limit: Arc<Semaphore>, +} + +static RE_IMAGE_URL_FROM_HTML: LazyLock<Regex> = LazyLock::new(|| { + Regex::new(r#"href='(?<url>https://media.vgm.io/artists/[-/\w\.]+)'"#).unwrap() +}); + +impl Default for Vgmdb { + fn default() -> Self { + Self::new() + } +} + +impl Vgmdb { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ( + HeaderName::from_static("x-comment"), + HeaderValue::from_static("Please add an API, thanks!"), + ), + ])) + .build() + .unwrap(); + Self { + client, + rate_limit: Arc::new(Semaphore::new(3)), + } + } + + pub fn get_artist_image(&self, id: u64, rt: &Handle) -> Result<Option<Asset>> { + if let Some(url) = self.get_artist_image_url(id, rt)? { + cache_store( + format!("ext/vgmdb/artist-image/{}.image", HashKey(&url)), + move || { + rt.block_on(async { + info!("downloading image {url:?}"); + Ok(self + .client + .get(url) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec()) + }) + }, + ) + .context("vgmdb media download") + .map(Asset) + .map(Some) + } else { + Ok(None) + } + } + + pub fn get_artist_image_url(&self, id: u64, rt: &Handle) -> Result<Option<String>> { + let html = self.scrape_artist_page(id, rt)?; + if let Some(cap) = RE_IMAGE_URL_FROM_HTML.captures(&str::from_utf8(&html).unwrap()) { + if let Some(url) = cap.name("url").map(|m| m.as_str()) { + return Ok(Some(url.to_string())); + } + } + Ok(None) + } + + pub fn scrape_artist_page(&self, id: u64, rt: &Handle) -> Result<Vec<u8>> { + cache(&format!("ext/vgmdb/artist-page/{id}.html"), move || { + rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::from_secs(1); + info!("scrape artist: {id}"); + + let resp = self + .client + .get(format!("https://vgmdb.net/artist/{id}")) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec(); + + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); + + Ok(resp) + }) + }) + .context("vgmdb artist page scrape") + } +} |