1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
/*
This file is part of jellything (https://codeberg.org/metamuffin/jellything)
which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
Copyright (C) 2025 metamuffin <metamuffin.org>
*/
use crate::USER_AGENT;
use anyhow::{Context, Result};
use jellycache::{async_cache_file, async_cache_memory, CachePath};
use log::info;
use regex::Regex;
use reqwest::{
header::{HeaderMap, HeaderName, HeaderValue},
Client, ClientBuilder,
};
use std::{
sync::{Arc, LazyLock},
time::Duration,
};
use tokio::{
io::AsyncWriteExt,
sync::Semaphore,
time::{sleep_until, Instant},
};
pub struct Vgmdb {
client: Client,
rate_limit: Arc<Semaphore>,
}
static RE_IMAGE_URL_FROM_HTML: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"href='(?<url>https://media.vgm.io/artists/[-/\w\.]+)'"#).unwrap()
});
impl Vgmdb {
pub fn new() -> Self {
let client = ClientBuilder::new()
.default_headers(HeaderMap::from_iter([
(
HeaderName::from_static("user-agent"),
HeaderValue::from_static(USER_AGENT),
),
(
HeaderName::from_static("x-comment"),
HeaderValue::from_static("Please add an API, thanks!"),
),
]))
.build()
.unwrap();
Self {
client,
rate_limit: Arc::new(Semaphore::new(3)),
}
}
pub async fn get_artist_image(&self, id: u64) -> Result<Option<CachePath>> {
if let Some(url) = self.get_artist_image_url(id).await? {
Ok(Some(
async_cache_file("api-vgmdb-media", url.clone(), |mut file| async move {
info!("downloading image {url:?}");
let mut res = self.client.get(url).send().await?.error_for_status()?;
while let Some(chunk) = res.chunk().await? {
file.write_all(&chunk).await?;
}
Ok(())
})
.await
.context("vgmdb media download")?,
))
} else {
Ok(None)
}
}
pub async fn get_artist_image_url(&self, id: u64) -> Result<Option<String>> {
let html = self.scrape_artist_page(id).await?;
if let Some(cap) = RE_IMAGE_URL_FROM_HTML.captures(&html) {
if let Some(url) = cap.name("url").map(|m| m.as_str()) {
return Ok(Some(url.to_string()));
}
}
return Ok(None);
}
pub async fn scrape_artist_page(&self, id: u64) -> Result<Arc<String>> {
async_cache_memory("api-vgmdb-artist", id.clone(), || async move {
let _permit = self.rate_limit.clone().acquire_owned().await?;
let permit_drop_ts = Instant::now() + Duration::from_secs(1);
info!("scrape artist: {id}");
let resp = self
.client
.get(format!("https://vgmdb.net/artist/{id}"))
.send()
.await?
.error_for_status()?
.text()
.await?;
tokio::task::spawn(async move {
sleep_until(permit_drop_ts).await;
drop(_permit);
});
Ok(resp)
})
.await
.context("vgmdb artist page scrape")
}
}
|