aboutsummaryrefslogtreecommitdiff
path: root/import/src/vgmdb.rs
diff options
context:
space:
mode:
Diffstat (limited to 'import/src/vgmdb.rs')
-rw-r--r--import/src/vgmdb.rs108
1 files changed, 108 insertions, 0 deletions
diff --git a/import/src/vgmdb.rs b/import/src/vgmdb.rs
new file mode 100644
index 0000000..9ac76d6
--- /dev/null
+++ b/import/src/vgmdb.rs
@@ -0,0 +1,108 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+
+use crate::USER_AGENT;
+use anyhow::Result;
+use jellybase::cache::{async_cache_file, async_cache_memory, CachePath};
+use log::info;
+use regex::Regex;
+use reqwest::{
+ header::{HeaderMap, HeaderName, HeaderValue},
+ Client, ClientBuilder,
+};
+use std::{
+ sync::{Arc, LazyLock},
+ time::Duration,
+};
+use tokio::{
+ io::AsyncWriteExt,
+ sync::Semaphore,
+ time::{sleep_until, Instant},
+};
+
+pub struct Vgmdb {
+ client: Client,
+ rate_limit: Arc<Semaphore>,
+}
+
+static RE_IMAGE_URL_FROM_HTML: LazyLock<Regex> = LazyLock::new(|| {
+ Regex::new(r#"href='(?<url>https://media.vgm.io/artists/[-/\w\.]+)'"#).unwrap()
+});
+
+impl Vgmdb {
+ pub fn new() -> Self {
+ let client = ClientBuilder::new()
+ .default_headers(HeaderMap::from_iter([
+ (
+ HeaderName::from_static("user-agent"),
+ HeaderValue::from_static(USER_AGENT),
+ ),
+ (
+ HeaderName::from_static("x-comment"),
+ HeaderValue::from_static("Please add an API, thanks!"),
+ ),
+ ]))
+ .build()
+ .unwrap();
+ Self {
+ client,
+ rate_limit: Arc::new(Semaphore::new(3)),
+ }
+ }
+
+ pub async fn get_artist_image(&self, id: u64) -> Result<Option<CachePath>> {
+ if let Some(url) = self.get_artist_image_url(id).await? {
+ Ok(Some(
+ async_cache_file("api-vgmdb-media", url.clone(), |mut file| async move {
+ info!("downloading image {url:?}");
+ let mut res = self.client.get(url).send().await?.error_for_status()?;
+ while let Some(chunk) = res.chunk().await? {
+ file.write_all(&chunk).await?;
+ }
+ Ok(())
+ })
+ .await?,
+ ))
+ } else {
+ Ok(None)
+ }
+ }
+
+ pub async fn get_artist_image_url(&self, id: u64) -> Result<Option<String>> {
+ let html = self.scrape_artist_page(id).await?;
+ if let Some(cap) = RE_IMAGE_URL_FROM_HTML.captures(&html) {
+ if let Some(url) = cap.name("url").map(|m| m.as_str()) {
+ return Ok(Some(url.to_string()));
+ }
+ }
+ return Ok(None);
+ }
+
+ pub async fn scrape_artist_page(&self, id: u64) -> Result<Arc<String>> {
+ async_cache_memory("api-vgmdb-artist", id.clone(), || async move {
+ let _permit = self.rate_limit.clone().acquire_owned().await?;
+ let permit_drop_ts = Instant::now() + Duration::from_secs(1);
+ info!("scrape artist: {id}");
+
+ let resp = self
+ .client
+ .get(format!("https://vgmdb.net/artist/{id}"))
+ .send()
+ .await?
+ .error_for_status()?
+ .text()
+ .await?;
+
+ tokio::task::spawn(async move {
+ sleep_until(permit_drop_ts).await;
+ drop(_permit);
+ });
+
+ Ok(resp)
+ })
+ .await
+ }
+}