From a0cfd77b4d19c43a28c4d82072e6ff136e336af3 Mon Sep 17 00:00:00 2001 From: metamuffin Date: Wed, 10 Dec 2025 16:21:38 +0100 Subject: refactor import plugins part 1 --- import/src/plugins/wikidata.rs | 129 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 import/src/plugins/wikidata.rs (limited to 'import/src/plugins/wikidata.rs') diff --git a/import/src/plugins/wikidata.rs b/import/src/plugins/wikidata.rs new file mode 100644 index 0000000..358996e --- /dev/null +++ b/import/src/plugins/wikidata.rs @@ -0,0 +1,129 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ + +use crate::USER_AGENT; +use anyhow::{Context, Result, bail}; +use jellycache::{EscapeKey, cache_memory}; +use log::info; +use reqwest::{ + Client, ClientBuilder, + header::{HeaderMap, HeaderName, HeaderValue}, +}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::{collections::BTreeMap, sync::Arc}; +use tokio::runtime::Handle; + +pub struct Wikidata { + client: Client, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataResponse { + entities: BTreeMap, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataEntity { + pub pageid: u64, + pub ns: u64, + pub title: String, + pub lastrevid: u64, + pub modified: String, + pub r#type: String, + pub id: String, + pub claims: BTreeMap>, +} +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataClaim { + pub r#type: String, + pub id: String, + pub rank: String, + pub mainsnak: WikidataSnak, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataSnak { + pub snaktype: String, + pub property: String, + pub hash: String, + pub datavalue: Option, + pub datatype: String, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct WikidataValue { + pub value: Value, + pub r#type: String, +} + +pub mod properties { + pub static IMAGE: &str = "P18"; +} + +impl Default for Wikidata { + fn default() -> Self { + Self::new() + } +} + +impl Wikidata { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("accept"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + Self { client } + } + + pub fn query_image_path(&self, id: String, rt: &Handle) -> Result> { + let response = self.query(id.clone(), rt)?; + if let Some(entity) = response.entities.get(&id) { + if let Some(images) = entity.claims.get(properties::IMAGE) { + for image in images { + if image.mainsnak.datatype != "commonsMedia" { + bail!("image is of type {:?}", image.mainsnak.datatype); + } + if let Some(dv) = &image.mainsnak.datavalue { + if let Value::String(filename) = &dv.value { + return Ok(Some(filename.to_owned())); + } + } + } + } + } + Ok(None) + } + + pub fn query(&self, id: String, rt: &Handle) -> Result> { + cache_memory( + &format!("ext/wikidata/{}.json", EscapeKey(&id)), + move || { + rt.block_on(async { + info!("entity query: {id}"); + Ok(self + .client + .get(format!("https://www.wikidata.org/entity/{id}")) + .send() + .await? + .error_for_status()? + .json() + .await?) + }) + }, + ) + .context("wikidata entity") + } +} -- cgit v1.3