aboutsummaryrefslogtreecommitdiff
path: root/import/src
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2025-12-08 19:53:12 +0100
committermetamuffin <metamuffin@disroot.org>2025-12-08 19:53:12 +0100
commit6edf0fd93abf7e58b4c0974e3d3e54bcf8517946 (patch)
tree32577db9d987897d4037ba9af0084b95b55e145c /import/src
parente4584a8135584e6591bac7d5397cf227cf3cff92 (diff)
downloadjellything-6edf0fd93abf7e58b4c0974e3d3e54bcf8517946.tar
jellything-6edf0fd93abf7e58b4c0974e3d3e54bcf8517946.tar.bz2
jellything-6edf0fd93abf7e58b4c0974e3d3e54bcf8517946.tar.zst
human-readable cache keys
Diffstat (limited to 'import/src')
-rw-r--r--import/src/acoustid.rs6
-rw-r--r--import/src/lib.rs161
-rw-r--r--import/src/musicbrainz.rs169
-rw-r--r--import/src/tmdb.rs20
-rw-r--r--import/src/trakt.rs65
-rw-r--r--import/src/vgmdb.rs92
-rw-r--r--import/src/wikidata.rs31
-rw-r--r--import/src/wikimedia_commons.rs8
8 files changed, 289 insertions, 263 deletions
diff --git a/import/src/acoustid.rs b/import/src/acoustid.rs
index 809d964..01adb57 100644
--- a/import/src/acoustid.rs
+++ b/import/src/acoustid.rs
@@ -5,7 +5,7 @@
*/
use crate::USER_AGENT;
use anyhow::{Context, Result};
-use jellycache::{cache_memory, CacheKey};
+use jellycache::{cache_memory, HashKey};
use log::info;
use reqwest::{
header::{HeaderMap, HeaderName, HeaderValue},
@@ -95,7 +95,7 @@ impl AcoustID {
}
pub fn lookup(&self, fp: Fingerprint, rt: &Handle) -> Result<Arc<AcoustIDLookupResponse>> {
- cache_memory(CacheKey::new_json(("acoustid-lookup", &fp)) , move || rt.block_on(async {
+ cache_memory(&format!("ext/acoustid/{}.json", HashKey(&fp)) , move || rt.block_on(async {
let _permit = self.rate_limit.clone().acquire_owned().await?;
let permit_drop_ts = Instant::now() + Duration::SECOND;
info!("acoustid lookup");
@@ -126,7 +126,7 @@ impl AcoustID {
pub(crate) fn acoustid_fingerprint(path: &Path) -> Result<Arc<Fingerprint>> {
cache_memory(
- CacheKey::new_json(("acoustid-fingerprint", path)),
+ &format!("media/chromaprint/{}.json", HashKey(path)),
move || {
let child = Command::new("fpcalc")
.arg("-json")
diff --git a/import/src/lib.rs b/import/src/lib.rs
index af13316..e31127e 100644
--- a/import/src/lib.rs
+++ b/import/src/lib.rs
@@ -20,10 +20,10 @@ use crate::{tmdb::TmdbKind, trakt::TraktKind};
use acoustid::{acoustid_fingerprint, AcoustID};
use anyhow::{anyhow, bail, Context, Result};
use infojson::YVideo;
-use jellycache::{cache, cache_memory, cache_store, CacheKey};
+use jellycache::{cache_memory, cache_read, cache_store, HashKey};
use jellycommon::{
- Appearance, Chapter, CreditCategory, IdentifierType, MediaInfo, Node, NodeID, NodeKind,
- Picture, PictureSlot, RatingType, SourceTrack, SourceTrackKind, TrackSource, Visibility,
+ Appearance, Asset, Chapter, CreditCategory, IdentifierType, MediaInfo, Node, NodeID, NodeKind,
+ PictureSlot, RatingType, SourceTrack, SourceTrackKind, TrackSource, Visibility,
};
use jellyimport_fallback_generator::generate_fallback;
use jellyremuxer::{
@@ -261,27 +261,31 @@ fn import_file(
match filename.as_ref() {
"poster.jpeg" | "poster.webp" | "poster.png" => {
info!("import poster at {path:?}");
- let key = CacheKey::new_image(path);
- cache(key, || {
- let mut data = Vec::new();
- File::open(path)?.read_to_end(&mut data)?;
- Ok(data)
- })?;
+ let asset = Asset(cache_store(
+ format!("media/literal/{}-poster.image", HashKey(path)),
+ || {
+ let mut data = Vec::new();
+ File::open(path)?.read_to_end(&mut data)?;
+ Ok(data)
+ },
+ )?);
db.update_node_init(parent, |node| {
- node.pictures.insert(PictureSlot::Cover, Picture(key.0));
+ node.pictures.insert(PictureSlot::Cover, asset);
Ok(())
})?;
}
"backdrop.jpeg" | "backdrop.webp" | "backdrop.png" => {
info!("import backdrop at {path:?}");
- let key = CacheKey::new_image(path);
- cache(key, || {
- let mut data = Vec::new();
- File::open(path)?.read_to_end(&mut data)?;
- Ok(data)
- })?;
+ let asset = Asset(cache_store(
+ format!("media/literal/{}-poster.image", HashKey(path)),
+ || {
+ let mut data = Vec::new();
+ File::open(path)?.read_to_end(&mut data)?;
+ Ok(data)
+ },
+ )?);
db.update_node_init(parent, |node| {
- node.pictures.insert(PictureSlot::Backdrop, Picture(key.0));
+ node.pictures.insert(PictureSlot::Backdrop, asset);
Ok(())
})?;
}
@@ -356,25 +360,63 @@ fn import_file(
}
pub fn read_media_metadata(path: &Path) -> Result<Arc<matroska::Segment>> {
- cache_memory(CacheKey::new_json(path), move || {
- let media = File::open(path)?;
- let mut media =
- create_demuxer_autodetect(Box::new(media))?.ok_or(anyhow!("media format unknown"))?;
+ cache_memory(
+ &format!("media/metadata/{}.json", HashKey(path)),
+ move || {
+ let media = File::open(path)?;
+ let mut media = create_demuxer_autodetect(Box::new(media))?
+ .ok_or(anyhow!("media format unknown"))?;
- let info = media.info()?;
- let tracks = media.tracks()?;
- let tags = media.tags()?;
- let attachments = media.attachments()?;
- let chapters = media.chapters()?;
- Ok(Segment {
- info,
- tracks,
- tags: tags.into_iter().collect(),
- attachments,
- chapters,
- ..Default::default()
- })
- })
+ let info = media.info()?;
+ let tracks = media.tracks()?;
+ let tags = media.tags()?;
+ let mut attachments = media.attachments()?;
+ let chapters = media.chapters()?;
+
+ // Replace data of useful attachments with cache key; delete data of all others
+ if let Some(attachments) = &mut attachments {
+ for att in &mut attachments.files {
+ if let Some(fname) = attachment_types::is_useful_attachment(&att) {
+ let key = cache_store(
+ format!("media/attachment/{}-{fname}", HashKey(path)),
+ || Ok(att.data.clone()),
+ )?;
+ att.data = key.as_bytes().to_vec();
+ } else {
+ att.data.clear();
+ }
+ }
+ }
+
+ Ok(Segment {
+ info,
+ tracks,
+ tags: tags.into_iter().collect(),
+ attachments,
+ chapters,
+ ..Default::default()
+ })
+ },
+ )
+}
+
+mod attachment_types {
+ use jellyremuxer::matroska::AttachedFile;
+
+ pub fn is_useful_attachment(a: &AttachedFile) -> Option<&'static str> {
+ match a {
+ _ if is_info_json(&a) => Some("info.json"),
+ _ if is_cover(&a) => Some("cover.image"),
+ _ => None,
+ }
+ }
+
+ pub fn is_info_json(a: &&AttachedFile) -> bool {
+ a.name == "info.json" && a.media_type == "application/json"
+ }
+ pub fn is_cover(a: &&AttachedFile) -> bool {
+ a.name.starts_with("cover") && a.media_type.starts_with("image/")
+ }
}
fn import_media_file(
@@ -392,8 +434,12 @@ fn import_media_file(
.attachments
.iter()
.flat_map(|a| &a.files)
- .find(|a| a.name == "info.json" && a.media_type == "application/json")
- .map(|d| serde_json::from_slice::<infojson::YVideo>(&d.data))
+ .find(attachment_types::is_info_json)
+ .map(|att| {
+ let data = cache_read(str::from_utf8(&att.data).unwrap())?
+ .ok_or(anyhow!("info json cache missing"))?;
+ anyhow::Ok(serde_json::from_slice::<infojson::YVideo>(&data)?)
+ })
.transpose()
.context("infojson parsing")?;
@@ -401,13 +447,8 @@ fn import_media_file(
.attachments
.iter()
.flat_map(|a| &a.files)
- .find(|a| a.name.starts_with("cover") && a.media_type.starts_with("image/"))
- .map(|att| {
- cache_store(CacheKey::new_image(("cover", path)), || {
- Ok(att.data.clone())
- })
- })
- .transpose()?;
+ .find(attachment_types::is_cover)
+ .map(|att| Asset(att.data.clone().try_into().unwrap()));
let mut tags = m
.tags
@@ -519,7 +560,7 @@ fn import_media_file(
node.identifiers.extend(eids);
if let Some(cover) = cover {
- node.pictures.insert(PictureSlot::Cover, Picture(cover.0));
+ node.pictures.insert(PictureSlot::Cover, cover);
}
if let Some(ct) = tags.get("CONTENT_TYPE") {
@@ -676,7 +717,7 @@ fn import_media_file(
let tmdb_details =
tmdb.episode_details(tmdb_id, season, episode, rthandle)?;
if let Some(still) = &tmdb_details.still_path {
- poster = Some(Picture(tmdb.image(still, rthandle)?.0))
+ poster = Some(tmdb.image(still, rthandle)?)
}
}
}
@@ -816,10 +857,10 @@ fn apply_musicbrainz_recording(
if let Some(filename) =
apis.wikidata.query_image_path(id.to_owned(), rthandle)?
{
- let path = apis
- .wikimedia_commons
- .image_by_filename(filename, rthandle)?;
- image_1 = Some(Picture(path.0));
+ image_1 = Some(
+ apis.wikimedia_commons
+ .image_by_filename(filename, rthandle)?,
+ );
}
}
}
@@ -828,7 +869,7 @@ fn apply_musicbrainz_recording(
if let Some(id) = url.strip_prefix("https://vgmdb.net/artist/") {
let id = id.parse::<u64>().context("parse vgmdb id")?;
if let Some(path) = apis.vgmdb.get_artist_image(id, rthandle)? {
- image_2 = Some(Picture(path.0));
+ image_2 = Some(path);
}
}
}
@@ -841,14 +882,12 @@ fn apply_musicbrainz_recording(
}
jobs.extend(rel.attributes.clone());
- let headshot = match image_1.or(image_2) {
+ let _headshot = match image_1.or(image_2) {
Some(x) => x,
- None => Picture(
- cache_store(CacheKey::new_image(("fallback", &artist.sort_name)), || {
- generate_fallback(&artist.sort_name)
- })?
- .0,
- ),
+ None => Asset(cache_store(
+ format!("fallback/{}.image", HashKey(&artist.sort_name)),
+ || generate_fallback(&artist.sort_name),
+ )?),
};
node.credits.entry(group).or_default().push(Appearance {
@@ -913,12 +952,10 @@ fn apply_trakt_tmdb(
tmdb_data = Some(data.clone());
if let Some(path) = &data.backdrop_path {
- let im = tmdb.image(path, rthandle).context("tmdb backdrop image")?;
- backdrop = Some(Picture(im.0));
+ backdrop = Some(tmdb.image(path, rthandle).context("tmdb backdrop image")?);
}
if let Some(path) = &data.poster_path {
- let im = tmdb.image(path, rthandle).context("tmdb poster image")?;
- poster = Some(Picture(im.0));
+ poster = Some(tmdb.image(path, rthandle).context("tmdb poster image")?);
}
// for p in people_map.values_mut().flatten() {
diff --git a/import/src/musicbrainz.rs b/import/src/musicbrainz.rs
index 92df703..fe86175 100644
--- a/import/src/musicbrainz.rs
+++ b/import/src/musicbrainz.rs
@@ -6,7 +6,7 @@
use crate::USER_AGENT;
use anyhow::{Context, Result};
-use jellycache::{cache_memory, CacheContentType, CacheKey};
+use jellycache::cache_memory;
use log::info;
use reqwest::{
header::{HeaderMap, HeaderName, HeaderValue},
@@ -223,107 +223,96 @@ impl MusicBrainz {
}
pub fn lookup_recording(&self, id: String, rt: &Handle) -> Result<Arc<MbRecordingRel>> {
- cache_memory(
- CacheKey::new(
- CacheContentType::Json,
- ("musicbrainz-recording-lookup", &id),
- ),
- move || {
- rt.block_on(async {
- let _permit = self.rate_limit.clone().acquire_owned().await?;
- let permit_drop_ts =
- Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64);
- info!("recording lookup: {id}");
+ cache_memory(&format!("ext/musicbrainz/recording/{id}.json"), move || {
+ rt.block_on(async {
+ let _permit = self.rate_limit.clone().acquire_owned().await?;
+ let permit_drop_ts = Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64);
+ info!("recording lookup: {id}");
- let inc = [
- "isrcs",
- "artists",
- "area-rels",
- "artist-rels",
- "event-rels",
- "genre-rels",
- "instrument-rels",
- "label-rels",
- "place-rels",
- "recording-rels",
- "release-rels",
- "release-group-rels",
- "series-rels",
- "url-rels",
- "work-rels",
- ]
- .join("+");
+ let inc = [
+ "isrcs",
+ "artists",
+ "area-rels",
+ "artist-rels",
+ "event-rels",
+ "genre-rels",
+ "instrument-rels",
+ "label-rels",
+ "place-rels",
+ "recording-rels",
+ "release-rels",
+ "release-group-rels",
+ "series-rels",
+ "url-rels",
+ "work-rels",
+ ]
+ .join("+");
- let resp = self
- .client
- .get(format!(
- "https://musicbrainz.org/ws/2/recording/{id}?inc={inc}"
- ))
- .send()
- .await?
- .error_for_status()?
- .json::<MbRecordingRel>()
- .await?;
+ let resp = self
+ .client
+ .get(format!(
+ "https://musicbrainz.org/ws/2/recording/{id}?inc={inc}"
+ ))
+ .send()
+ .await?
+ .error_for_status()?
+ .json::<MbRecordingRel>()
+ .await?;
- tokio::task::spawn(async move {
- sleep_until(permit_drop_ts).await;
- drop(_permit);
- });
+ tokio::task::spawn(async move {
+ sleep_until(permit_drop_ts).await;
+ drop(_permit);
+ });
- Ok(resp)
- })
- },
- )
+ Ok(resp)
+ })
+ })
.context("musicbrainz recording lookup")
}
pub fn lookup_artist(&self, id: String, rt: &Handle) -> Result<Arc<MbArtistRel>> {
- cache_memory(
- CacheKey::new(CacheContentType::Json, ("musicbrainz-artist-lookup", &id)),
- move || {
- rt.block_on(async {
- let _permit = self.rate_limit.clone().acquire_owned().await?;
- let permit_drop_ts =
- Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64);
- info!("artist lookup: {id}");
+ cache_memory(&format!("ext/musicbrainz/artist/{id}.json"), move || {
+ rt.block_on(async {
+ let _permit = self.rate_limit.clone().acquire_owned().await?;
+ let permit_drop_ts = Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64);
+ info!("artist lookup: {id}");
- let inc = [
- "area-rels",
- "artist-rels",
- "event-rels",
- "genre-rels",
- "instrument-rels",
- "label-rels",
- "place-rels",
- "recording-rels",
- "release-rels",
- "release-group-rels",
- "series-rels",
- "url-rels",
- "work-rels",
- ]
- .join("+");
+ let inc = [
+ "area-rels",
+ "artist-rels",
+ "event-rels",
+ "genre-rels",
+ "instrument-rels",
+ "label-rels",
+ "place-rels",
+ "recording-rels",
+ "release-rels",
+ "release-group-rels",
+ "series-rels",
+ "url-rels",
+ "work-rels",
+ ]
+ .join("+");
- let resp = self
- .client
- .get(format!(
- "https://musicbrainz.org/ws/2/artist/{id}?inc={inc}"
- ))
- .send()
- .await?
- .error_for_status()?
- .json::<MbArtistRel>()
- .await?;
+ let resp = self
+ .client
+ .get(format!(
+ "https://musicbrainz.org/ws/2/artist/{id}?inc={inc}"
+ ))
+ .send()
+ .await?
+ .error_for_status()?
+ .json::<MbArtistRel>()
+ .await?;
- tokio::task::spawn(async move {
- sleep_until(permit_drop_ts).await;
- drop(_permit);
- });
+ tokio::task::spawn(async move {
+ sleep_until(permit_drop_ts).await;
+ drop(_permit);
+ });
- Ok(resp)
- })
- },
- )
+ Ok(resp)
+ })
+ })
.context("musicbrainz artist lookup")
}
}
diff --git a/import/src/tmdb.rs b/import/src/tmdb.rs
index d4a0c25..219c1f6 100644
--- a/import/src/tmdb.rs
+++ b/import/src/tmdb.rs
@@ -5,8 +5,11 @@
*/
use crate::USER_AGENT;
use anyhow::{anyhow, bail, Context, Result};
-use jellycache::{cache_memory, cache_store, CacheKey};
-use jellycommon::chrono::{format::Parsed, Utc};
+use jellycache::{cache_memory, cache_store, HashKey};
+use jellycommon::{
+ chrono::{format::Parsed, Utc},
+ Asset,
+};
use log::info;
use reqwest::{
header::{HeaderMap, HeaderName, HeaderValue},
@@ -46,7 +49,7 @@ impl Tmdb {
}
pub fn search(&self, kind: TmdbKind, query: &str, rt: &Handle) -> Result<Arc<TmdbQuery>> {
cache_memory(
- CacheKey::new_json(("api-tmdb-search", kind, query)),
+ &format!("ext/tmdb/search/{kind}-{}.json", HashKey(query)),
move || {
rt.block_on(async {
info!("searching tmdb: {query:?}");
@@ -68,7 +71,7 @@ impl Tmdb {
.context("tmdb search")
}
pub fn details(&self, kind: TmdbKind, id: u64, rt: &Handle) -> Result<Arc<TmdbDetails>> {
- cache_memory(CacheKey::new_json(("tmdb-details", kind, id)), move || {
+ cache_memory(&format!("ext/tmdb/details/{kind}-{id}.json"), move || {
rt.block_on(async {
info!("fetching details: {id:?}");
Ok(self
@@ -87,7 +90,7 @@ impl Tmdb {
.context("tmdb details")
}
pub fn person_image(&self, id: u64, rt: &Handle) -> Result<Arc<TmdbPersonImage>> {
- cache_memory(CacheKey::new_image(("tmdb-person-image", id)), move || {
+ cache_memory(&format!("ext/tmdb/person/images/{id}.json"), move || {
rt.block_on(async {
Ok(self
.client
@@ -104,8 +107,8 @@ impl Tmdb {
})
.context("tmdb person images")
}
- pub fn image(&self, path: &str, rt: &Handle) -> Result<CacheKey> {
- cache_store(CacheKey::new_image(("tmdb-image", path)), move || {
+ pub fn image(&self, path: &str, rt: &Handle) -> Result<Asset> {
+ cache_store(format!("ext/tmdb/image/{path}.json"), move || {
rt.block_on(async {
info!("downloading image {path:?}");
Ok(self
@@ -120,6 +123,7 @@ impl Tmdb {
})
})
.context("tmdb image download")
+ .map(Asset)
}
pub fn episode_details(
@@ -129,7 +133,7 @@ impl Tmdb {
episode: usize,
rt: &Handle,
) -> Result<Arc<TmdbEpisode>> {
- cache_memory(CacheKey::new_json(("tmdb-episode-details", series_id, season, episode)), move || {
+ cache_memory(&format!("ext/tmdb/episode-details/{series_id}-S{season}-E{episode}.json"), move || {
rt.block_on(async {
info!("tmdb episode details {series_id} S={season} E={episode}");
Ok(self
diff --git a/import/src/trakt.rs b/import/src/trakt.rs
index 4a4beea..a241725 100644
--- a/import/src/trakt.rs
+++ b/import/src/trakt.rs
@@ -4,8 +4,8 @@
Copyright (C) 2025 metamuffin <metamuffin.org>
*/
use crate::USER_AGENT;
-use anyhow::Context;
-use jellycache::{cache_memory, CacheKey};
+use anyhow::{Context, Result};
+use jellycache::{cache_memory, HashKey};
use jellycommon::{Appearance, CreditCategory, NodeID, NodeKind};
use log::info;
use reqwest::{
@@ -51,32 +51,30 @@ impl Trakt {
kinds: &[TraktKind],
query: &str,
rt: &Handle,
- ) -> anyhow::Result<Arc<Vec<TraktSearchResult>>> {
- cache_memory(CacheKey::new_json(("trakt-lookup", query)), move || {
- rt.block_on(async {
- let url = format!(
- "https://api.trakt.tv/search/{}?query={}&extended=full",
- kinds
- .iter()
- .map(|t| t.singular())
- .collect::<Vec<_>>()
- .join(","),
- urlencoding::encode(query),
- );
- let res = self.client.get(url).send().await?.error_for_status()?;
- Ok(res.json().await?)
- })
- })
+ ) -> Result<Arc<Vec<TraktSearchResult>>> {
+ cache_memory(
+ &format!("ext/trakt/search/{}.json", HashKey(query)),
+ move || {
+ rt.block_on(async {
+ let url = format!(
+ "https://api.trakt.tv/search/{}?query={}&extended=full",
+ kinds
+ .iter()
+ .map(|t| t.singular())
+ .collect::<Vec<_>>()
+ .join(","),
+ urlencoding::encode(query),
+ );
+ let res = self.client.get(url).send().await?.error_for_status()?;
+ Ok(res.json().await?)
+ })
+ },
+ )
.context("trakt search")
}
- pub fn lookup(
- &self,
- kind: TraktKind,
- id: u64,
- rt: &Handle,
- ) -> anyhow::Result<Arc<TraktMediaObject>> {
- cache_memory(CacheKey::new_json(("trakt-lookup", kind, id)), move || {
+ pub fn lookup(&self, kind: TraktKind, id: u64, rt: &Handle) -> Result<Arc<TraktMediaObject>> {
+ cache_memory(&format!("ext/trakt/lookup/{kind}-{id}.json"), move || {
rt.block_on(async {
info!("trakt lookup {kind:?}:{id:?}");
let url = format!("https://api.trakt.tv/{}/{id}?extended=full", kind.plural());
@@ -87,13 +85,8 @@ impl Trakt {
.context("trakt lookup")
}
- pub fn people(
- &self,
- kind: TraktKind,
- id: u64,
- rt: &Handle,
- ) -> anyhow::Result<Arc<TraktPeople>> {
- cache_memory(CacheKey::new_json(("trakt-people", kind, id)), move || {
+ pub fn people(&self, kind: TraktKind, id: u64, rt: &Handle) -> Result<Arc<TraktPeople>> {
+ cache_memory(&format!("ext/trakt/people/{kind}-{id}.json"), move || {
rt.block_on(async {
info!("trakt people {kind:?}:{id:?}");
let url = format!(
@@ -107,8 +100,8 @@ impl Trakt {
.context("trakt people")
}
- pub fn show_seasons(&self, id: u64, rt: &Handle) -> anyhow::Result<Arc<Vec<TraktSeason>>> {
- cache_memory(CacheKey::new_json(("trakt-seasons", id)), move || {
+ pub fn show_seasons(&self, id: u64, rt: &Handle) -> Result<Arc<Vec<TraktSeason>>> {
+ cache_memory(&format!("ext/trakt/seasons/{id}"), move || {
rt.block_on(async {
info!("trakt seasons {id:?}");
let url = format!("https://api.trakt.tv/shows/{id}/seasons?extended=full");
@@ -124,9 +117,9 @@ impl Trakt {
id: u64,
season: usize,
rt: &Handle,
- ) -> anyhow::Result<Arc<Vec<TraktEpisode>>> {
+ ) -> Result<Arc<Vec<TraktEpisode>>> {
cache_memory(
- CacheKey::new_json(("trakt-episodes", id, season)),
+ &format!("ext/trakt/episodes/{id}-S{season}.json"),
move || {
rt.block_on(async {
info!("trakt episodes {id:?} season={season}");
diff --git a/import/src/vgmdb.rs b/import/src/vgmdb.rs
index 4e37ba3..402fd90 100644
--- a/import/src/vgmdb.rs
+++ b/import/src/vgmdb.rs
@@ -6,7 +6,8 @@
use crate::USER_AGENT;
use anyhow::{Context, Result};
-use jellycache::{cache_memory, cache_store, CacheContentType, CacheKey};
+use jellycache::{cache, cache_store, HashKey};
+use jellycommon::Asset;
use log::info;
use regex::Regex;
use reqwest::{
@@ -59,28 +60,28 @@ impl Vgmdb {
}
}
- pub fn get_artist_image(&self, id: u64, rt: &Handle) -> Result<Option<CacheKey>> {
+ pub fn get_artist_image(&self, id: u64, rt: &Handle) -> Result<Option<Asset>> {
if let Some(url) = self.get_artist_image_url(id, rt)? {
- Ok(Some(
- cache_store(
- CacheKey::new_image(("vgmdb-artist-media", &url)),
- move || {
- rt.block_on(async {
- info!("downloading image {url:?}");
- Ok(self
- .client
- .get(url)
- .send()
- .await?
- .error_for_status()?
- .bytes()
- .await?
- .to_vec())
- })
- },
- )
- .context("vgmdb media download")?,
- ))
+ cache_store(
+ format!("ext/vgmdb/artist-image/{}.image", HashKey(&url)),
+ move || {
+ rt.block_on(async {
+ info!("downloading image {url:?}");
+ Ok(self
+ .client
+ .get(url)
+ .send()
+ .await?
+ .error_for_status()?
+ .bytes()
+ .await?
+ .to_vec())
+ })
+ },
+ )
+ .context("vgmdb media download")
+ .map(Asset)
+ .map(Some)
} else {
Ok(None)
}
@@ -96,34 +97,31 @@ impl Vgmdb {
Ok(None)
}
- pub fn scrape_artist_page(&self, id: u64, rt: &Handle) -> Result<Arc<Vec<u8>>> {
- cache_memory(
- CacheKey::new(CacheContentType::Unknown, ("vgmdb-artist-page", id)),
- move || {
- rt.block_on(async {
- let _permit = self.rate_limit.clone().acquire_owned().await?;
- let permit_drop_ts = Instant::now() + Duration::from_secs(1);
- info!("scrape artist: {id}");
+ pub fn scrape_artist_page(&self, id: u64, rt: &Handle) -> Result<Vec<u8>> {
+ cache(&format!("ext/vgmdb/artist-page/{id}.html"), move || {
+ rt.block_on(async {
+ let _permit = self.rate_limit.clone().acquire_owned().await?;
+ let permit_drop_ts = Instant::now() + Duration::from_secs(1);
+ info!("scrape artist: {id}");
- let resp = self
- .client
- .get(format!("https://vgmdb.net/artist/{id}"))
- .send()
- .await?
- .error_for_status()?
- .bytes()
- .await?
- .to_vec();
+ let resp = self
+ .client
+ .get(format!("https://vgmdb.net/artist/{id}"))
+ .send()
+ .await?
+ .error_for_status()?
+ .bytes()
+ .await?
+ .to_vec();
- tokio::task::spawn(async move {
- sleep_until(permit_drop_ts).await;
- drop(_permit);
- });
+ tokio::task::spawn(async move {
+ sleep_until(permit_drop_ts).await;
+ drop(_permit);
+ });
- Ok(resp)
- })
- },
- )
+ Ok(resp)
+ })
+ })
.context("vgmdb artist page scrape")
}
}
diff --git a/import/src/wikidata.rs b/import/src/wikidata.rs
index 40077b9..3a107fe 100644
--- a/import/src/wikidata.rs
+++ b/import/src/wikidata.rs
@@ -6,7 +6,7 @@
use crate::USER_AGENT;
use anyhow::{bail, Context, Result};
-use jellycache::{cache_memory, CacheKey};
+use jellycache::{cache_memory, EscapeKey};
use log::info;
use reqwest::{
header::{HeaderMap, HeaderName, HeaderValue},
@@ -108,19 +108,22 @@ impl Wikidata {
}
pub fn query(&self, id: String, rt: &Handle) -> Result<Arc<WikidataResponse>> {
- cache_memory(CacheKey::new_json(("wikidata", &id)), move || {
- rt.block_on(async {
- info!("entity query: {id}");
- Ok(self
- .client
- .get(format!("https://www.wikidata.org/entity/{id}"))
- .send()
- .await?
- .error_for_status()?
- .json()
- .await?)
- })
- })
+ cache_memory(
+ &format!("ext/wikidata/{}.json", EscapeKey(&id)),
+ move || {
+ rt.block_on(async {
+ info!("entity query: {id}");
+ Ok(self
+ .client
+ .get(format!("https://www.wikidata.org/entity/{id}"))
+ .send()
+ .await?
+ .error_for_status()?
+ .json()
+ .await?)
+ })
+ },
+ )
.context("wikidata entity")
}
}
diff --git a/import/src/wikimedia_commons.rs b/import/src/wikimedia_commons.rs
index 0d716f0..86d934c 100644
--- a/import/src/wikimedia_commons.rs
+++ b/import/src/wikimedia_commons.rs
@@ -6,7 +6,8 @@
use crate::USER_AGENT;
use anyhow::{Context, Result};
-use jellycache::{cache_store, CacheKey};
+use jellycache::{cache_store, EscapeKey};
+use jellycommon::Asset;
use reqwest::{
header::{HeaderMap, HeaderName, HeaderValue},
redirect::Policy,
@@ -36,9 +37,9 @@ impl WikimediaCommons {
Self { client }
}
- pub fn image_by_filename(&self, filename: String, rt: &Handle) -> Result<CacheKey> {
+ pub fn image_by_filename(&self, filename: String, rt: &Handle) -> Result<Asset> {
cache_store(
- CacheKey::new_image(("wikimedia-commons-image", &filename)),
+ format!("ext/wikimedia-commons/image/{}.image", EscapeKey(&filename)),
move || {
rt.block_on(async {
Ok(self
@@ -57,5 +58,6 @@ impl WikimediaCommons {
},
)
.context("mediawiki image by filename")
+ .map(Asset)
}
}