diff options
Diffstat (limited to 'import/src/plugins/wikidata.rs')
| -rw-r--r-- | import/src/plugins/wikidata.rs | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/import/src/plugins/wikidata.rs b/import/src/plugins/wikidata.rs new file mode 100644 index 0000000..358996e --- /dev/null +++ b/import/src/plugins/wikidata.rs @@ -0,0 +1,129 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::USER_AGENT; +use anyhow::{Context, Result, bail}; +use jellycache::{EscapeKey, cache_memory}; +use log::info; +use reqwest::{ + Client, ClientBuilder, + header::{HeaderMap, HeaderName, HeaderValue}, +}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::{collections::BTreeMap, sync::Arc}; +use tokio::runtime::Handle; + +pub struct Wikidata { + client: Client, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataResponse { + entities: BTreeMap<String, WikidataEntity>, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataEntity { + pub pageid: u64, + pub ns: u64, + pub title: String, + pub lastrevid: u64, + pub modified: String, + pub r#type: String, + pub id: String, + pub claims: BTreeMap<String, Vec<WikidataClaim>>, +} +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataClaim { + pub r#type: String, + pub id: String, + pub rank: String, + pub mainsnak: WikidataSnak, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataSnak { + pub snaktype: String, + pub property: String, + pub hash: String, + pub datavalue: Option<WikidataValue>, + pub datatype: String, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataValue { + pub value: Value, + pub r#type: String, +} + +pub mod properties { + pub static IMAGE: &str = "P18"; +} + +impl Default for Wikidata { + fn default() -> Self { + Self::new() + } +} + +impl Wikidata { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("accept"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + Self { client } + } + + pub fn query_image_path(&self, id: String, rt: &Handle) -> Result<Option<String>> { + let response = self.query(id.clone(), rt)?; + if let Some(entity) = response.entities.get(&id) { + if let Some(images) = entity.claims.get(properties::IMAGE) { + for image in images { + if image.mainsnak.datatype != "commonsMedia" { + bail!("image is of type {:?}", image.mainsnak.datatype); + } + if let Some(dv) = &image.mainsnak.datavalue { + if let Value::String(filename) = &dv.value { + return Ok(Some(filename.to_owned())); + } + } + } + } + } + Ok(None) + } + + pub fn query(&self, id: String, rt: &Handle) -> Result<Arc<WikidataResponse>> { + cache_memory( + &format!("ext/wikidata/{}.json", EscapeKey(&id)), + move || { + rt.block_on(async { + info!("entity query: {id}"); + Ok(self + .client + .get(format!("https://www.wikidata.org/entity/{id}")) + .send() + .await? + .error_for_status()? + .json() + .await?) + }) + }, + ) + .context("wikidata entity") + } +} |