1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
/*
This file is part of jellything (https://codeberg.org/metamuffin/jellything)
which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
Copyright (C) 2025 metamuffin <metamuffin.org>
*/
use crate::USER_AGENT;
use anyhow::{Context, Result};
use jellycache::{cache, cache_store, HashKey};
use jellycommon::Asset;
use log::info;
use regex::Regex;
use reqwest::{
header::{HeaderMap, HeaderName, HeaderValue},
Client, ClientBuilder,
};
use std::{
sync::{Arc, LazyLock},
time::Duration,
};
use tokio::{
runtime::Handle,
sync::Semaphore,
time::{sleep_until, Instant},
};
pub struct Vgmdb {
client: Client,
rate_limit: Arc<Semaphore>,
}
static RE_IMAGE_URL_FROM_HTML: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"href='(?<url>https://media.vgm.io/artists/[-/\w\.]+)'"#).unwrap()
});
impl Default for Vgmdb {
fn default() -> Self {
Self::new()
}
}
impl Vgmdb {
pub fn new() -> Self {
let client = ClientBuilder::new()
.default_headers(HeaderMap::from_iter([
(
HeaderName::from_static("user-agent"),
HeaderValue::from_static(USER_AGENT),
),
(
HeaderName::from_static("x-comment"),
HeaderValue::from_static("Please add an API, thanks!"),
),
]))
.build()
.unwrap();
Self {
client,
rate_limit: Arc::new(Semaphore::new(3)),
}
}
pub fn get_artist_image(&self, id: u64, rt: &Handle) -> Result<Option<Asset>> {
if let Some(url) = self.get_artist_image_url(id, rt)? {
cache_store(
format!("ext/vgmdb/artist-image/{}.image", HashKey(&url)),
move || {
rt.block_on(async {
info!("downloading image {url:?}");
Ok(self
.client
.get(url)
.send()
.await?
.error_for_status()?
.bytes()
.await?
.to_vec())
})
},
)
.context("vgmdb media download")
.map(Asset)
.map(Some)
} else {
Ok(None)
}
}
pub fn get_artist_image_url(&self, id: u64, rt: &Handle) -> Result<Option<String>> {
let html = self.scrape_artist_page(id, rt)?;
if let Some(cap) = RE_IMAGE_URL_FROM_HTML.captures(&str::from_utf8(&html).unwrap()) {
if let Some(url) = cap.name("url").map(|m| m.as_str()) {
return Ok(Some(url.to_string()));
}
}
Ok(None)
}
pub fn scrape_artist_page(&self, id: u64, rt: &Handle) -> Result<Vec<u8>> {
cache(&format!("ext/vgmdb/artist-page/{id}.html"), move || {
rt.block_on(async {
let _permit = self.rate_limit.clone().acquire_owned().await?;
let permit_drop_ts = Instant::now() + Duration::from_secs(1);
info!("scrape artist: {id}");
let resp = self
.client
.get(format!("https://vgmdb.net/artist/{id}"))
.send()
.await?
.error_for_status()?
.bytes()
.await?
.to_vec();
tokio::task::spawn(async move {
sleep_until(permit_drop_ts).await;
drop(_permit);
});
Ok(resp)
})
})
.context("vgmdb artist page scrape")
}
}
|