aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--import/src/acoustid.rs15
-rw-r--r--import/src/lib.rs24
-rw-r--r--import/src/musicbrainz.rs1
-rw-r--r--import/src/vgmdb.rs108
4 files changed, 140 insertions, 8 deletions
diff --git a/import/src/acoustid.rs b/import/src/acoustid.rs
index 8d33821..9083724 100644
--- a/import/src/acoustid.rs
+++ b/import/src/acoustid.rs
@@ -4,7 +4,7 @@
Copyright (C) 2025 metamuffin <metamuffin.org>
*/
use crate::USER_AGENT;
-use anyhow::Result;
+use anyhow::{Context, Result};
use bincode::{Decode, Encode};
use jellybase::cache::async_cache_memory;
use log::info;
@@ -126,11 +126,18 @@ pub(crate) async fn acoustid_fingerprint(path: &Path) -> Result<Arc<Fingerprint>
.arg("-json")
.arg(path)
.stdout(Stdio::piped())
- .spawn()?;
+ .spawn()
+ .context("fpcalc")?;
let mut buf = Vec::new();
- child.stdout.unwrap().read_to_end(&mut buf).await?;
- let out: FpCalcOutput = serde_json::from_slice(&buf)?;
+ child
+ .stdout
+ .unwrap()
+ .read_to_end(&mut buf)
+ .await
+ .context("read fpcalc output")?;
+
+ let out: FpCalcOutput = serde_json::from_slice(&buf).context("parsing fpcalc output")?;
let out = Fingerprint {
duration: out.duration as u32,
fingerprint: out.fingerprint,
diff --git a/import/src/lib.rs b/import/src/lib.rs
index fa74b9c..b93dbec 100644
--- a/import/src/lib.rs
+++ b/import/src/lib.rs
@@ -37,6 +37,7 @@ use tokio::{
task::spawn_blocking,
};
use trakt::Trakt;
+use vgmdb::Vgmdb;
use wikidata::Wikidata;
use wikimedia_commons::WikimediaCommons;
@@ -45,13 +46,14 @@ pub mod infojson;
pub mod musicbrainz;
pub mod tmdb;
pub mod trakt;
+pub mod vgmdb;
pub mod wikidata;
pub mod wikimedia_commons;
pub const USER_AGENT: &'static str = concat!(
"jellything/",
env!("CARGO_PKG_VERSION"),
- " ( https://codeberg.org/metamuffin/jellything )"
+ " +https://codeberg.org/metamuffin/jellything"
);
static IMPORT_SEM: LazyLock<Semaphore> = LazyLock::new(|| Semaphore::new(1));
@@ -67,6 +69,7 @@ struct Apis {
musicbrainz: MusicBrainz,
wikidata: Wikidata,
wikimedia_commons: WikimediaCommons,
+ vgmdb: Vgmdb,
}
pub fn is_importing() -> bool {
@@ -96,6 +99,7 @@ fn import(db: &Database, incremental: bool) -> Result<()> {
musicbrainz: MusicBrainz::new(),
wikidata: Wikidata::new(),
wikimedia_commons: WikimediaCommons::new(),
+ vgmdb: Vgmdb::new(),
};
let rthandle = Handle::current();
@@ -716,7 +720,8 @@ fn apply_musicbrainz_recording(
let artist =
rthandle.block_on(apis.musicbrainz.lookup_artist(artist.id.clone()))?;
- let mut image = None;
+ let mut image_1 = None;
+ let mut image_2 = None;
for rel in &artist.relations {
match rel.type_id.as_str() {
@@ -729,7 +734,18 @@ fn apply_musicbrainz_recording(
let path = rthandle.block_on(
apis.wikimedia_commons.image_by_filename(filename),
)?;
- image = Some(AssetInner::Cache(path).ser());
+ image_1 = Some(AssetInner::Cache(path).ser());
+ }
+ }
+ }
+ VGMDB => {
+ let url = rel.url.as_ref().unwrap().resource.clone();
+ if let Some(id) = url.strip_prefix("https://vgmdb.net/artist/") {
+ let id = id.parse::<u64>().context("parse vgmdb id")?;
+ if let Some(path) =
+ rthandle.block_on(apis.vgmdb.get_artist_image(id))?
+ {
+ image_2 = Some(AssetInner::Cache(path).ser());
}
}
}
@@ -750,7 +766,7 @@ fn apply_musicbrainz_recording(
} else {
rel.target_credit.clone()
},
- headshot: image,
+ headshot: image_1.or(image_2),
ids: ObjectIds::default(),
},
});
diff --git a/import/src/musicbrainz.rs b/import/src/musicbrainz.rs
index 704807b..f05b3de 100644
--- a/import/src/musicbrainz.rs
+++ b/import/src/musicbrainz.rs
@@ -32,6 +32,7 @@ pub mod reltypes {
pub const MIX: &str = "3e3102e1-1896-4f50-b5b2-dd9824e46efe";
pub const INSTRUMENT: &str = "59054b12-01ac-43ee-a618-285fd397e461";
pub const WIKIDATA: &str = "689870a4-a1e4-4912-b17f-7b2664215698";
+ pub const VGMDB: &str = "0af15ab3-c615-46d6-b95b-a5fcd2a92ed9";
}
pub struct MusicBrainz {
diff --git a/import/src/vgmdb.rs b/import/src/vgmdb.rs
new file mode 100644
index 0000000..9ac76d6
--- /dev/null
+++ b/import/src/vgmdb.rs
@@ -0,0 +1,108 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+
+use crate::USER_AGENT;
+use anyhow::Result;
+use jellybase::cache::{async_cache_file, async_cache_memory, CachePath};
+use log::info;
+use regex::Regex;
+use reqwest::{
+ header::{HeaderMap, HeaderName, HeaderValue},
+ Client, ClientBuilder,
+};
+use std::{
+ sync::{Arc, LazyLock},
+ time::Duration,
+};
+use tokio::{
+ io::AsyncWriteExt,
+ sync::Semaphore,
+ time::{sleep_until, Instant},
+};
+
+pub struct Vgmdb {
+ client: Client,
+ rate_limit: Arc<Semaphore>,
+}
+
+static RE_IMAGE_URL_FROM_HTML: LazyLock<Regex> = LazyLock::new(|| {
+ Regex::new(r#"href='(?<url>https://media.vgm.io/artists/[-/\w\.]+)'"#).unwrap()
+});
+
+impl Vgmdb {
+ pub fn new() -> Self {
+ let client = ClientBuilder::new()
+ .default_headers(HeaderMap::from_iter([
+ (
+ HeaderName::from_static("user-agent"),
+ HeaderValue::from_static(USER_AGENT),
+ ),
+ (
+ HeaderName::from_static("x-comment"),
+ HeaderValue::from_static("Please add an API, thanks!"),
+ ),
+ ]))
+ .build()
+ .unwrap();
+ Self {
+ client,
+ rate_limit: Arc::new(Semaphore::new(3)),
+ }
+ }
+
+ pub async fn get_artist_image(&self, id: u64) -> Result<Option<CachePath>> {
+ if let Some(url) = self.get_artist_image_url(id).await? {
+ Ok(Some(
+ async_cache_file("api-vgmdb-media", url.clone(), |mut file| async move {
+ info!("downloading image {url:?}");
+ let mut res = self.client.get(url).send().await?.error_for_status()?;
+ while let Some(chunk) = res.chunk().await? {
+ file.write_all(&chunk).await?;
+ }
+ Ok(())
+ })
+ .await?,
+ ))
+ } else {
+ Ok(None)
+ }
+ }
+
+ pub async fn get_artist_image_url(&self, id: u64) -> Result<Option<String>> {
+ let html = self.scrape_artist_page(id).await?;
+ if let Some(cap) = RE_IMAGE_URL_FROM_HTML.captures(&html) {
+ if let Some(url) = cap.name("url").map(|m| m.as_str()) {
+ return Ok(Some(url.to_string()));
+ }
+ }
+ return Ok(None);
+ }
+
+ pub async fn scrape_artist_page(&self, id: u64) -> Result<Arc<String>> {
+ async_cache_memory("api-vgmdb-artist", id.clone(), || async move {
+ let _permit = self.rate_limit.clone().acquire_owned().await?;
+ let permit_drop_ts = Instant::now() + Duration::from_secs(1);
+ info!("scrape artist: {id}");
+
+ let resp = self
+ .client
+ .get(format!("https://vgmdb.net/artist/{id}"))
+ .send()
+ .await?
+ .error_for_status()?
+ .text()
+ .await?;
+
+ tokio::task::spawn(async move {
+ sleep_until(permit_drop_ts).await;
+ drop(_permit);
+ });
+
+ Ok(resp)
+ })
+ .await
+ }
+}