diff options
| author | metamuffin <metamuffin@disroot.org> | 2025-12-08 19:53:12 +0100 |
|---|---|---|
| committer | metamuffin <metamuffin@disroot.org> | 2025-12-08 19:53:12 +0100 |
| commit | 6edf0fd93abf7e58b4c0974e3d3e54bcf8517946 (patch) | |
| tree | 32577db9d987897d4037ba9af0084b95b55e145c /import/src | |
| parent | e4584a8135584e6591bac7d5397cf227cf3cff92 (diff) | |
| download | jellything-6edf0fd93abf7e58b4c0974e3d3e54bcf8517946.tar jellything-6edf0fd93abf7e58b4c0974e3d3e54bcf8517946.tar.bz2 jellything-6edf0fd93abf7e58b4c0974e3d3e54bcf8517946.tar.zst | |
human-readable cache keys
Diffstat (limited to 'import/src')
| -rw-r--r-- | import/src/acoustid.rs | 6 | ||||
| -rw-r--r-- | import/src/lib.rs | 161 | ||||
| -rw-r--r-- | import/src/musicbrainz.rs | 169 | ||||
| -rw-r--r-- | import/src/tmdb.rs | 20 | ||||
| -rw-r--r-- | import/src/trakt.rs | 65 | ||||
| -rw-r--r-- | import/src/vgmdb.rs | 92 | ||||
| -rw-r--r-- | import/src/wikidata.rs | 31 | ||||
| -rw-r--r-- | import/src/wikimedia_commons.rs | 8 |
8 files changed, 289 insertions, 263 deletions
diff --git a/import/src/acoustid.rs b/import/src/acoustid.rs index 809d964..01adb57 100644 --- a/import/src/acoustid.rs +++ b/import/src/acoustid.rs @@ -5,7 +5,7 @@ */ use crate::USER_AGENT; use anyhow::{Context, Result}; -use jellycache::{cache_memory, CacheKey}; +use jellycache::{cache_memory, HashKey}; use log::info; use reqwest::{ header::{HeaderMap, HeaderName, HeaderValue}, @@ -95,7 +95,7 @@ impl AcoustID { } pub fn lookup(&self, fp: Fingerprint, rt: &Handle) -> Result<Arc<AcoustIDLookupResponse>> { - cache_memory(CacheKey::new_json(("acoustid-lookup", &fp)) , move || rt.block_on(async { + cache_memory(&format!("ext/acoustid/{}.json", HashKey(&fp)) , move || rt.block_on(async { let _permit = self.rate_limit.clone().acquire_owned().await?; let permit_drop_ts = Instant::now() + Duration::SECOND; info!("acoustid lookup"); @@ -126,7 +126,7 @@ impl AcoustID { pub(crate) fn acoustid_fingerprint(path: &Path) -> Result<Arc<Fingerprint>> { cache_memory( - CacheKey::new_json(("acoustid-fingerprint", path)), + &format!("media/chromaprint/{}.json", HashKey(path)), move || { let child = Command::new("fpcalc") .arg("-json") diff --git a/import/src/lib.rs b/import/src/lib.rs index af13316..e31127e 100644 --- a/import/src/lib.rs +++ b/import/src/lib.rs @@ -20,10 +20,10 @@ use crate::{tmdb::TmdbKind, trakt::TraktKind}; use acoustid::{acoustid_fingerprint, AcoustID}; use anyhow::{anyhow, bail, Context, Result}; use infojson::YVideo; -use jellycache::{cache, cache_memory, cache_store, CacheKey}; +use jellycache::{cache_memory, cache_read, cache_store, HashKey}; use jellycommon::{ - Appearance, Chapter, CreditCategory, IdentifierType, MediaInfo, Node, NodeID, NodeKind, - Picture, PictureSlot, RatingType, SourceTrack, SourceTrackKind, TrackSource, Visibility, + Appearance, Asset, Chapter, CreditCategory, IdentifierType, MediaInfo, Node, NodeID, NodeKind, + PictureSlot, RatingType, SourceTrack, SourceTrackKind, TrackSource, Visibility, }; use jellyimport_fallback_generator::generate_fallback; use jellyremuxer::{ @@ -261,27 +261,31 @@ fn import_file( match filename.as_ref() { "poster.jpeg" | "poster.webp" | "poster.png" => { info!("import poster at {path:?}"); - let key = CacheKey::new_image(path); - cache(key, || { - let mut data = Vec::new(); - File::open(path)?.read_to_end(&mut data)?; - Ok(data) - })?; + let asset = Asset(cache_store( + format!("media/literal/{}-poster.image", HashKey(path)), + || { + let mut data = Vec::new(); + File::open(path)?.read_to_end(&mut data)?; + Ok(data) + }, + )?); db.update_node_init(parent, |node| { - node.pictures.insert(PictureSlot::Cover, Picture(key.0)); + node.pictures.insert(PictureSlot::Cover, asset); Ok(()) })?; } "backdrop.jpeg" | "backdrop.webp" | "backdrop.png" => { info!("import backdrop at {path:?}"); - let key = CacheKey::new_image(path); - cache(key, || { - let mut data = Vec::new(); - File::open(path)?.read_to_end(&mut data)?; - Ok(data) - })?; + let asset = Asset(cache_store( + format!("media/literal/{}-poster.image", HashKey(path)), + || { + let mut data = Vec::new(); + File::open(path)?.read_to_end(&mut data)?; + Ok(data) + }, + )?); db.update_node_init(parent, |node| { - node.pictures.insert(PictureSlot::Backdrop, Picture(key.0)); + node.pictures.insert(PictureSlot::Backdrop, asset); Ok(()) })?; } @@ -356,25 +360,63 @@ fn import_file( } pub fn read_media_metadata(path: &Path) -> Result<Arc<matroska::Segment>> { - cache_memory(CacheKey::new_json(path), move || { - let media = File::open(path)?; - let mut media = - create_demuxer_autodetect(Box::new(media))?.ok_or(anyhow!("media format unknown"))?; + cache_memory( + &format!("media/metadata/{}.json", HashKey(path)), + move || { + let media = File::open(path)?; + let mut media = create_demuxer_autodetect(Box::new(media))? + .ok_or(anyhow!("media format unknown"))?; - let info = media.info()?; - let tracks = media.tracks()?; - let tags = media.tags()?; - let attachments = media.attachments()?; - let chapters = media.chapters()?; - Ok(Segment { - info, - tracks, - tags: tags.into_iter().collect(), - attachments, - chapters, - ..Default::default() - }) - }) + let info = media.info()?; + let tracks = media.tracks()?; + let tags = media.tags()?; + let mut attachments = media.attachments()?; + let chapters = media.chapters()?; + + // Replace data of useful attachments with cache key; delete data of all others + if let Some(attachments) = &mut attachments { + for att in &mut attachments.files { + if let Some(fname) = attachment_types::is_useful_attachment(&att) { + let key = cache_store( + format!("media/attachment/{}-{fname}", HashKey(path)), + || Ok(att.data.clone()), + )?; + att.data = key.as_bytes().to_vec(); + } else { + att.data.clear(); + } + } + } + + Ok(Segment { + info, + tracks, + tags: tags.into_iter().collect(), + attachments, + chapters, + ..Default::default() + }) + }, + ) +} + +mod attachment_types { + use jellyremuxer::matroska::AttachedFile; + + pub fn is_useful_attachment(a: &AttachedFile) -> Option<&'static str> { + match a { + _ if is_info_json(&a) => Some("info.json"), + _ if is_cover(&a) => Some("cover.image"), + _ => None, + } + } + + pub fn is_info_json(a: &&AttachedFile) -> bool { + a.name == "info.json" && a.media_type == "application/json" + } + pub fn is_cover(a: &&AttachedFile) -> bool { + a.name.starts_with("cover") && a.media_type.starts_with("image/") + } } fn import_media_file( @@ -392,8 +434,12 @@ fn import_media_file( .attachments .iter() .flat_map(|a| &a.files) - .find(|a| a.name == "info.json" && a.media_type == "application/json") - .map(|d| serde_json::from_slice::<infojson::YVideo>(&d.data)) + .find(attachment_types::is_info_json) + .map(|att| { + let data = cache_read(str::from_utf8(&att.data).unwrap())? + .ok_or(anyhow!("info json cache missing"))?; + anyhow::Ok(serde_json::from_slice::<infojson::YVideo>(&data)?) + }) .transpose() .context("infojson parsing")?; @@ -401,13 +447,8 @@ fn import_media_file( .attachments .iter() .flat_map(|a| &a.files) - .find(|a| a.name.starts_with("cover") && a.media_type.starts_with("image/")) - .map(|att| { - cache_store(CacheKey::new_image(("cover", path)), || { - Ok(att.data.clone()) - }) - }) - .transpose()?; + .find(attachment_types::is_cover) + .map(|att| Asset(att.data.clone().try_into().unwrap())); let mut tags = m .tags @@ -519,7 +560,7 @@ fn import_media_file( node.identifiers.extend(eids); if let Some(cover) = cover { - node.pictures.insert(PictureSlot::Cover, Picture(cover.0)); + node.pictures.insert(PictureSlot::Cover, cover); } if let Some(ct) = tags.get("CONTENT_TYPE") { @@ -676,7 +717,7 @@ fn import_media_file( let tmdb_details = tmdb.episode_details(tmdb_id, season, episode, rthandle)?; if let Some(still) = &tmdb_details.still_path { - poster = Some(Picture(tmdb.image(still, rthandle)?.0)) + poster = Some(tmdb.image(still, rthandle)?) } } } @@ -816,10 +857,10 @@ fn apply_musicbrainz_recording( if let Some(filename) = apis.wikidata.query_image_path(id.to_owned(), rthandle)? { - let path = apis - .wikimedia_commons - .image_by_filename(filename, rthandle)?; - image_1 = Some(Picture(path.0)); + image_1 = Some( + apis.wikimedia_commons + .image_by_filename(filename, rthandle)?, + ); } } } @@ -828,7 +869,7 @@ fn apply_musicbrainz_recording( if let Some(id) = url.strip_prefix("https://vgmdb.net/artist/") { let id = id.parse::<u64>().context("parse vgmdb id")?; if let Some(path) = apis.vgmdb.get_artist_image(id, rthandle)? { - image_2 = Some(Picture(path.0)); + image_2 = Some(path); } } } @@ -841,14 +882,12 @@ fn apply_musicbrainz_recording( } jobs.extend(rel.attributes.clone()); - let headshot = match image_1.or(image_2) { + let _headshot = match image_1.or(image_2) { Some(x) => x, - None => Picture( - cache_store(CacheKey::new_image(("fallback", &artist.sort_name)), || { - generate_fallback(&artist.sort_name) - })? - .0, - ), + None => Asset(cache_store( + format!("fallback/{}.image", HashKey(&artist.sort_name)), + || generate_fallback(&artist.sort_name), + )?), }; node.credits.entry(group).or_default().push(Appearance { @@ -913,12 +952,10 @@ fn apply_trakt_tmdb( tmdb_data = Some(data.clone()); if let Some(path) = &data.backdrop_path { - let im = tmdb.image(path, rthandle).context("tmdb backdrop image")?; - backdrop = Some(Picture(im.0)); + backdrop = Some(tmdb.image(path, rthandle).context("tmdb backdrop image")?); } if let Some(path) = &data.poster_path { - let im = tmdb.image(path, rthandle).context("tmdb poster image")?; - poster = Some(Picture(im.0)); + poster = Some(tmdb.image(path, rthandle).context("tmdb poster image")?); } // for p in people_map.values_mut().flatten() { diff --git a/import/src/musicbrainz.rs b/import/src/musicbrainz.rs index 92df703..fe86175 100644 --- a/import/src/musicbrainz.rs +++ b/import/src/musicbrainz.rs @@ -6,7 +6,7 @@ use crate::USER_AGENT; use anyhow::{Context, Result}; -use jellycache::{cache_memory, CacheContentType, CacheKey}; +use jellycache::cache_memory; use log::info; use reqwest::{ header::{HeaderMap, HeaderName, HeaderValue}, @@ -223,107 +223,96 @@ impl MusicBrainz { } pub fn lookup_recording(&self, id: String, rt: &Handle) -> Result<Arc<MbRecordingRel>> { - cache_memory( - CacheKey::new( - CacheContentType::Json, - ("musicbrainz-recording-lookup", &id), - ), - move || { - rt.block_on(async { - let _permit = self.rate_limit.clone().acquire_owned().await?; - let permit_drop_ts = - Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64); - info!("recording lookup: {id}"); + cache_memory(&format!("ext/musicbrainz/recording/{id}.json"), move || { + rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64); + info!("recording lookup: {id}"); - let inc = [ - "isrcs", - "artists", - "area-rels", - "artist-rels", - "event-rels", - "genre-rels", - "instrument-rels", - "label-rels", - "place-rels", - "recording-rels", - "release-rels", - "release-group-rels", - "series-rels", - "url-rels", - "work-rels", - ] - .join("+"); + let inc = [ + "isrcs", + "artists", + "area-rels", + "artist-rels", + "event-rels", + "genre-rels", + "instrument-rels", + "label-rels", + "place-rels", + "recording-rels", + "release-rels", + "release-group-rels", + "series-rels", + "url-rels", + "work-rels", + ] + .join("+"); - let resp = self - .client - .get(format!( - "https://musicbrainz.org/ws/2/recording/{id}?inc={inc}" - )) - .send() - .await? - .error_for_status()? - .json::<MbRecordingRel>() - .await?; + let resp = self + .client + .get(format!( + "https://musicbrainz.org/ws/2/recording/{id}?inc={inc}" + )) + .send() + .await? + .error_for_status()? + .json::<MbRecordingRel>() + .await?; - tokio::task::spawn(async move { - sleep_until(permit_drop_ts).await; - drop(_permit); - }); + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); - Ok(resp) - }) - }, - ) + Ok(resp) + }) + }) .context("musicbrainz recording lookup") } pub fn lookup_artist(&self, id: String, rt: &Handle) -> Result<Arc<MbArtistRel>> { - cache_memory( - CacheKey::new(CacheContentType::Json, ("musicbrainz-artist-lookup", &id)), - move || { - rt.block_on(async { - let _permit = self.rate_limit.clone().acquire_owned().await?; - let permit_drop_ts = - Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64); - info!("artist lookup: {id}"); + cache_memory(&format!("ext/musicbrainz/artist/{id}.json"), move || { + rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::from_secs(Self::MAX_PAR_REQ as u64); + info!("artist lookup: {id}"); - let inc = [ - "area-rels", - "artist-rels", - "event-rels", - "genre-rels", - "instrument-rels", - "label-rels", - "place-rels", - "recording-rels", - "release-rels", - "release-group-rels", - "series-rels", - "url-rels", - "work-rels", - ] - .join("+"); + let inc = [ + "area-rels", + "artist-rels", + "event-rels", + "genre-rels", + "instrument-rels", + "label-rels", + "place-rels", + "recording-rels", + "release-rels", + "release-group-rels", + "series-rels", + "url-rels", + "work-rels", + ] + .join("+"); - let resp = self - .client - .get(format!( - "https://musicbrainz.org/ws/2/artist/{id}?inc={inc}" - )) - .send() - .await? - .error_for_status()? - .json::<MbArtistRel>() - .await?; + let resp = self + .client + .get(format!( + "https://musicbrainz.org/ws/2/artist/{id}?inc={inc}" + )) + .send() + .await? + .error_for_status()? + .json::<MbArtistRel>() + .await?; - tokio::task::spawn(async move { - sleep_until(permit_drop_ts).await; - drop(_permit); - }); + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); - Ok(resp) - }) - }, - ) + Ok(resp) + }) + }) .context("musicbrainz artist lookup") } } diff --git a/import/src/tmdb.rs b/import/src/tmdb.rs index d4a0c25..219c1f6 100644 --- a/import/src/tmdb.rs +++ b/import/src/tmdb.rs @@ -5,8 +5,11 @@ */ use crate::USER_AGENT; use anyhow::{anyhow, bail, Context, Result}; -use jellycache::{cache_memory, cache_store, CacheKey}; -use jellycommon::chrono::{format::Parsed, Utc}; +use jellycache::{cache_memory, cache_store, HashKey}; +use jellycommon::{ + chrono::{format::Parsed, Utc}, + Asset, +}; use log::info; use reqwest::{ header::{HeaderMap, HeaderName, HeaderValue}, @@ -46,7 +49,7 @@ impl Tmdb { } pub fn search(&self, kind: TmdbKind, query: &str, rt: &Handle) -> Result<Arc<TmdbQuery>> { cache_memory( - CacheKey::new_json(("api-tmdb-search", kind, query)), + &format!("ext/tmdb/search/{kind}-{}.json", HashKey(query)), move || { rt.block_on(async { info!("searching tmdb: {query:?}"); @@ -68,7 +71,7 @@ impl Tmdb { .context("tmdb search") } pub fn details(&self, kind: TmdbKind, id: u64, rt: &Handle) -> Result<Arc<TmdbDetails>> { - cache_memory(CacheKey::new_json(("tmdb-details", kind, id)), move || { + cache_memory(&format!("ext/tmdb/details/{kind}-{id}.json"), move || { rt.block_on(async { info!("fetching details: {id:?}"); Ok(self @@ -87,7 +90,7 @@ impl Tmdb { .context("tmdb details") } pub fn person_image(&self, id: u64, rt: &Handle) -> Result<Arc<TmdbPersonImage>> { - cache_memory(CacheKey::new_image(("tmdb-person-image", id)), move || { + cache_memory(&format!("ext/tmdb/person/images/{id}.json"), move || { rt.block_on(async { Ok(self .client @@ -104,8 +107,8 @@ impl Tmdb { }) .context("tmdb person images") } - pub fn image(&self, path: &str, rt: &Handle) -> Result<CacheKey> { - cache_store(CacheKey::new_image(("tmdb-image", path)), move || { + pub fn image(&self, path: &str, rt: &Handle) -> Result<Asset> { + cache_store(format!("ext/tmdb/image/{path}.json"), move || { rt.block_on(async { info!("downloading image {path:?}"); Ok(self @@ -120,6 +123,7 @@ impl Tmdb { }) }) .context("tmdb image download") + .map(Asset) } pub fn episode_details( @@ -129,7 +133,7 @@ impl Tmdb { episode: usize, rt: &Handle, ) -> Result<Arc<TmdbEpisode>> { - cache_memory(CacheKey::new_json(("tmdb-episode-details", series_id, season, episode)), move || { + cache_memory(&format!("ext/tmdb/episode-details/{series_id}-S{season}-E{episode}.json"), move || { rt.block_on(async { info!("tmdb episode details {series_id} S={season} E={episode}"); Ok(self diff --git a/import/src/trakt.rs b/import/src/trakt.rs index 4a4beea..a241725 100644 --- a/import/src/trakt.rs +++ b/import/src/trakt.rs @@ -4,8 +4,8 @@ Copyright (C) 2025 metamuffin <metamuffin.org> */ use crate::USER_AGENT; -use anyhow::Context; -use jellycache::{cache_memory, CacheKey}; +use anyhow::{Context, Result}; +use jellycache::{cache_memory, HashKey}; use jellycommon::{Appearance, CreditCategory, NodeID, NodeKind}; use log::info; use reqwest::{ @@ -51,32 +51,30 @@ impl Trakt { kinds: &[TraktKind], query: &str, rt: &Handle, - ) -> anyhow::Result<Arc<Vec<TraktSearchResult>>> { - cache_memory(CacheKey::new_json(("trakt-lookup", query)), move || { - rt.block_on(async { - let url = format!( - "https://api.trakt.tv/search/{}?query={}&extended=full", - kinds - .iter() - .map(|t| t.singular()) - .collect::<Vec<_>>() - .join(","), - urlencoding::encode(query), - ); - let res = self.client.get(url).send().await?.error_for_status()?; - Ok(res.json().await?) - }) - }) + ) -> Result<Arc<Vec<TraktSearchResult>>> { + cache_memory( + &format!("ext/trakt/search/{}.json", HashKey(query)), + move || { + rt.block_on(async { + let url = format!( + "https://api.trakt.tv/search/{}?query={}&extended=full", + kinds + .iter() + .map(|t| t.singular()) + .collect::<Vec<_>>() + .join(","), + urlencoding::encode(query), + ); + let res = self.client.get(url).send().await?.error_for_status()?; + Ok(res.json().await?) + }) + }, + ) .context("trakt search") } - pub fn lookup( - &self, - kind: TraktKind, - id: u64, - rt: &Handle, - ) -> anyhow::Result<Arc<TraktMediaObject>> { - cache_memory(CacheKey::new_json(("trakt-lookup", kind, id)), move || { + pub fn lookup(&self, kind: TraktKind, id: u64, rt: &Handle) -> Result<Arc<TraktMediaObject>> { + cache_memory(&format!("ext/trakt/lookup/{kind}-{id}.json"), move || { rt.block_on(async { info!("trakt lookup {kind:?}:{id:?}"); let url = format!("https://api.trakt.tv/{}/{id}?extended=full", kind.plural()); @@ -87,13 +85,8 @@ impl Trakt { .context("trakt lookup") } - pub fn people( - &self, - kind: TraktKind, - id: u64, - rt: &Handle, - ) -> anyhow::Result<Arc<TraktPeople>> { - cache_memory(CacheKey::new_json(("trakt-people", kind, id)), move || { + pub fn people(&self, kind: TraktKind, id: u64, rt: &Handle) -> Result<Arc<TraktPeople>> { + cache_memory(&format!("ext/trakt/people/{kind}-{id}.json"), move || { rt.block_on(async { info!("trakt people {kind:?}:{id:?}"); let url = format!( @@ -107,8 +100,8 @@ impl Trakt { .context("trakt people") } - pub fn show_seasons(&self, id: u64, rt: &Handle) -> anyhow::Result<Arc<Vec<TraktSeason>>> { - cache_memory(CacheKey::new_json(("trakt-seasons", id)), move || { + pub fn show_seasons(&self, id: u64, rt: &Handle) -> Result<Arc<Vec<TraktSeason>>> { + cache_memory(&format!("ext/trakt/seasons/{id}"), move || { rt.block_on(async { info!("trakt seasons {id:?}"); let url = format!("https://api.trakt.tv/shows/{id}/seasons?extended=full"); @@ -124,9 +117,9 @@ impl Trakt { id: u64, season: usize, rt: &Handle, - ) -> anyhow::Result<Arc<Vec<TraktEpisode>>> { + ) -> Result<Arc<Vec<TraktEpisode>>> { cache_memory( - CacheKey::new_json(("trakt-episodes", id, season)), + &format!("ext/trakt/episodes/{id}-S{season}.json"), move || { rt.block_on(async { info!("trakt episodes {id:?} season={season}"); diff --git a/import/src/vgmdb.rs b/import/src/vgmdb.rs index 4e37ba3..402fd90 100644 --- a/import/src/vgmdb.rs +++ b/import/src/vgmdb.rs @@ -6,7 +6,8 @@ use crate::USER_AGENT; use anyhow::{Context, Result}; -use jellycache::{cache_memory, cache_store, CacheContentType, CacheKey}; +use jellycache::{cache, cache_store, HashKey}; +use jellycommon::Asset; use log::info; use regex::Regex; use reqwest::{ @@ -59,28 +60,28 @@ impl Vgmdb { } } - pub fn get_artist_image(&self, id: u64, rt: &Handle) -> Result<Option<CacheKey>> { + pub fn get_artist_image(&self, id: u64, rt: &Handle) -> Result<Option<Asset>> { if let Some(url) = self.get_artist_image_url(id, rt)? { - Ok(Some( - cache_store( - CacheKey::new_image(("vgmdb-artist-media", &url)), - move || { - rt.block_on(async { - info!("downloading image {url:?}"); - Ok(self - .client - .get(url) - .send() - .await? - .error_for_status()? - .bytes() - .await? - .to_vec()) - }) - }, - ) - .context("vgmdb media download")?, - )) + cache_store( + format!("ext/vgmdb/artist-image/{}.image", HashKey(&url)), + move || { + rt.block_on(async { + info!("downloading image {url:?}"); + Ok(self + .client + .get(url) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec()) + }) + }, + ) + .context("vgmdb media download") + .map(Asset) + .map(Some) } else { Ok(None) } @@ -96,34 +97,31 @@ impl Vgmdb { Ok(None) } - pub fn scrape_artist_page(&self, id: u64, rt: &Handle) -> Result<Arc<Vec<u8>>> { - cache_memory( - CacheKey::new(CacheContentType::Unknown, ("vgmdb-artist-page", id)), - move || { - rt.block_on(async { - let _permit = self.rate_limit.clone().acquire_owned().await?; - let permit_drop_ts = Instant::now() + Duration::from_secs(1); - info!("scrape artist: {id}"); + pub fn scrape_artist_page(&self, id: u64, rt: &Handle) -> Result<Vec<u8>> { + cache(&format!("ext/vgmdb/artist-page/{id}.html"), move || { + rt.block_on(async { + let _permit = self.rate_limit.clone().acquire_owned().await?; + let permit_drop_ts = Instant::now() + Duration::from_secs(1); + info!("scrape artist: {id}"); - let resp = self - .client - .get(format!("https://vgmdb.net/artist/{id}")) - .send() - .await? - .error_for_status()? - .bytes() - .await? - .to_vec(); + let resp = self + .client + .get(format!("https://vgmdb.net/artist/{id}")) + .send() + .await? + .error_for_status()? + .bytes() + .await? + .to_vec(); - tokio::task::spawn(async move { - sleep_until(permit_drop_ts).await; - drop(_permit); - }); + tokio::task::spawn(async move { + sleep_until(permit_drop_ts).await; + drop(_permit); + }); - Ok(resp) - }) - }, - ) + Ok(resp) + }) + }) .context("vgmdb artist page scrape") } } diff --git a/import/src/wikidata.rs b/import/src/wikidata.rs index 40077b9..3a107fe 100644 --- a/import/src/wikidata.rs +++ b/import/src/wikidata.rs @@ -6,7 +6,7 @@ use crate::USER_AGENT; use anyhow::{bail, Context, Result}; -use jellycache::{cache_memory, CacheKey}; +use jellycache::{cache_memory, EscapeKey}; use log::info; use reqwest::{ header::{HeaderMap, HeaderName, HeaderValue}, @@ -108,19 +108,22 @@ impl Wikidata { } pub fn query(&self, id: String, rt: &Handle) -> Result<Arc<WikidataResponse>> { - cache_memory(CacheKey::new_json(("wikidata", &id)), move || { - rt.block_on(async { - info!("entity query: {id}"); - Ok(self - .client - .get(format!("https://www.wikidata.org/entity/{id}")) - .send() - .await? - .error_for_status()? - .json() - .await?) - }) - }) + cache_memory( + &format!("ext/wikidata/{}.json", EscapeKey(&id)), + move || { + rt.block_on(async { + info!("entity query: {id}"); + Ok(self + .client + .get(format!("https://www.wikidata.org/entity/{id}")) + .send() + .await? + .error_for_status()? + .json() + .await?) + }) + }, + ) .context("wikidata entity") } } diff --git a/import/src/wikimedia_commons.rs b/import/src/wikimedia_commons.rs index 0d716f0..86d934c 100644 --- a/import/src/wikimedia_commons.rs +++ b/import/src/wikimedia_commons.rs @@ -6,7 +6,8 @@ use crate::USER_AGENT; use anyhow::{Context, Result}; -use jellycache::{cache_store, CacheKey}; +use jellycache::{cache_store, EscapeKey}; +use jellycommon::Asset; use reqwest::{ header::{HeaderMap, HeaderName, HeaderValue}, redirect::Policy, @@ -36,9 +37,9 @@ impl WikimediaCommons { Self { client } } - pub fn image_by_filename(&self, filename: String, rt: &Handle) -> Result<CacheKey> { + pub fn image_by_filename(&self, filename: String, rt: &Handle) -> Result<Asset> { cache_store( - CacheKey::new_image(("wikimedia-commons-image", &filename)), + format!("ext/wikimedia-commons/image/{}.image", EscapeKey(&filename)), move || { rt.block_on(async { Ok(self @@ -57,5 +58,6 @@ impl WikimediaCommons { }, ) .context("mediawiki image by filename") + .map(Asset) } } |