aboutsummaryrefslogtreecommitdiff
path: root/import/src/plugins/vgmdb.rs
diff options
context:
space:
mode:
Diffstat (limited to 'import/src/plugins/vgmdb.rs')
-rw-r--r--import/src/plugins/vgmdb.rs127
1 files changed, 127 insertions, 0 deletions
diff --git a/import/src/plugins/vgmdb.rs b/import/src/plugins/vgmdb.rs
new file mode 100644
index 0000000..402fd90
--- /dev/null
+++ b/import/src/plugins/vgmdb.rs
@@ -0,0 +1,127 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+
+use crate::USER_AGENT;
+use anyhow::{Context, Result};
+use jellycache::{cache, cache_store, HashKey};
+use jellycommon::Asset;
+use log::info;
+use regex::Regex;
+use reqwest::{
+ header::{HeaderMap, HeaderName, HeaderValue},
+ Client, ClientBuilder,
+};
+use std::{
+ sync::{Arc, LazyLock},
+ time::Duration,
+};
+use tokio::{
+ runtime::Handle,
+ sync::Semaphore,
+ time::{sleep_until, Instant},
+};
+
+pub struct Vgmdb {
+ client: Client,
+ rate_limit: Arc<Semaphore>,
+}
+
+static RE_IMAGE_URL_FROM_HTML: LazyLock<Regex> = LazyLock::new(|| {
+ Regex::new(r#"href='(?<url>https://media.vgm.io/artists/[-/\w\.]+)'"#).unwrap()
+});
+
+impl Default for Vgmdb {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl Vgmdb {
+ pub fn new() -> Self {
+ let client = ClientBuilder::new()
+ .default_headers(HeaderMap::from_iter([
+ (
+ HeaderName::from_static("user-agent"),
+ HeaderValue::from_static(USER_AGENT),
+ ),
+ (
+ HeaderName::from_static("x-comment"),
+ HeaderValue::from_static("Please add an API, thanks!"),
+ ),
+ ]))
+ .build()
+ .unwrap();
+ Self {
+ client,
+ rate_limit: Arc::new(Semaphore::new(3)),
+ }
+ }
+
+ pub fn get_artist_image(&self, id: u64, rt: &Handle) -> Result<Option<Asset>> {
+ if let Some(url) = self.get_artist_image_url(id, rt)? {
+ cache_store(
+ format!("ext/vgmdb/artist-image/{}.image", HashKey(&url)),
+ move || {
+ rt.block_on(async {
+ info!("downloading image {url:?}");
+ Ok(self
+ .client
+ .get(url)
+ .send()
+ .await?
+ .error_for_status()?
+ .bytes()
+ .await?
+ .to_vec())
+ })
+ },
+ )
+ .context("vgmdb media download")
+ .map(Asset)
+ .map(Some)
+ } else {
+ Ok(None)
+ }
+ }
+
+ pub fn get_artist_image_url(&self, id: u64, rt: &Handle) -> Result<Option<String>> {
+ let html = self.scrape_artist_page(id, rt)?;
+ if let Some(cap) = RE_IMAGE_URL_FROM_HTML.captures(&str::from_utf8(&html).unwrap()) {
+ if let Some(url) = cap.name("url").map(|m| m.as_str()) {
+ return Ok(Some(url.to_string()));
+ }
+ }
+ Ok(None)
+ }
+
+ pub fn scrape_artist_page(&self, id: u64, rt: &Handle) -> Result<Vec<u8>> {
+ cache(&format!("ext/vgmdb/artist-page/{id}.html"), move || {
+ rt.block_on(async {
+ let _permit = self.rate_limit.clone().acquire_owned().await?;
+ let permit_drop_ts = Instant::now() + Duration::from_secs(1);
+ info!("scrape artist: {id}");
+
+ let resp = self
+ .client
+ .get(format!("https://vgmdb.net/artist/{id}"))
+ .send()
+ .await?
+ .error_for_status()?
+ .bytes()
+ .await?
+ .to_vec();
+
+ tokio::task::spawn(async move {
+ sleep_until(permit_drop_ts).await;
+ drop(_permit);
+ });
+
+ Ok(resp)
+ })
+ })
+ .context("vgmdb artist page scrape")
+ }
+}