/* This file is part of jellything (https://codeberg.org/metamuffin/jellything) which is licensed under the GNU Affero General Public License (version 3); see /COPYING. Copyright (C) 2026 metamuffin */ use crate::{ USER_AGENT, plugins::{ImportPlugin, PluginContext, PluginInfo}, source_rank::ObjectImportSourceExt, }; use anyhow::{Context, Result, bail}; use jellycache::{Cache, EscapeKey}; use jellycommon::{jellyobject::EMPTY, *}; use jellydb::RowNum; use log::info; use reqwest::{ Client, ClientBuilder, header::{HeaderMap, HeaderName, HeaderValue}, redirect::Policy, }; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::{collections::BTreeMap, sync::Arc}; use tokio::runtime::Handle; pub struct Wikidata { client: Client, commons: WikimediaCommons, } #[derive(Debug, Deserialize, Serialize, Clone)] pub struct WikidataResponse { entities: BTreeMap, } #[derive(Debug, Deserialize, Serialize, Clone)] pub struct WikidataEntity { pub pageid: u64, pub ns: u64, pub title: String, pub lastrevid: u64, pub modified: String, pub r#type: String, pub id: String, pub claims: BTreeMap>, } #[derive(Debug, Deserialize, Serialize, Clone)] pub struct WikidataClaim { pub r#type: String, pub id: String, pub rank: String, pub mainsnak: WikidataSnak, } #[derive(Debug, Deserialize, Serialize, Clone)] pub struct WikidataSnak { pub snaktype: String, pub property: String, pub hash: String, pub datavalue: Option, pub datatype: String, } #[derive(Debug, Deserialize, Serialize, Clone)] pub struct WikidataValue { pub value: Value, pub r#type: String, } pub mod properties { pub static IMAGE: &str = "P18"; } impl Default for Wikidata { fn default() -> Self { Self::new() } } impl Wikidata { pub fn new() -> Self { let client = ClientBuilder::new() .default_headers(HeaderMap::from_iter([ ( HeaderName::from_static("accept"), HeaderValue::from_static("application/json"), ), ( HeaderName::from_static("user-agent"), HeaderValue::from_static(USER_AGENT), ), ])) .build() .unwrap(); Self { client, commons: WikimediaCommons::new(), } } pub fn query_image_path(&self, cache: &Cache, id: &str, rt: &Handle) -> Result> { let response = self.query(cache, id, rt)?; if let Some(entity) = response.entities.get(id) && let Some(images) = entity.claims.get(properties::IMAGE) { for image in images { if image.mainsnak.datatype != "commonsMedia" { bail!("image is of type {:?}", image.mainsnak.datatype); } if let Some(dv) = &image.mainsnak.datavalue && let Value::String(filename) = &dv.value { return Ok(Some(filename.to_owned())); } } } Ok(None) } pub fn query(&self, cache: &Cache, id: &str, rt: &Handle) -> Result> { cache .cache_memory( &format!("ext/wikidata/{}.json", EscapeKey(&id)), move || { rt.block_on(async { info!("entity query: {id}"); Ok(self .client .get(format!("https://www.wikidata.org/entity/{id}")) .send() .await? .error_for_status()? .json() .await?) }) }, ) .context("wikidata entity") } } pub struct WikimediaCommons { client: Client, } impl Default for WikimediaCommons { fn default() -> Self { Self::new() } } impl WikimediaCommons { pub fn new() -> Self { let client = ClientBuilder::new() .default_headers(HeaderMap::from_iter([( HeaderName::from_static("user-agent"), HeaderValue::from_static(USER_AGENT), )])) .redirect(Policy::limited(5)) .build() .unwrap(); Self { client } } pub fn image_by_filename( &self, cache: &Cache, filename: String, rt: &Handle, ) -> Result { cache .store( format!("ext/wikimedia-commons/image/{}.image", EscapeKey(&filename)), move || { rt.block_on(async { Ok(self .client .get(format!( "https://commons.wikimedia.org/wiki/Special:FilePath/{}", filename.replace(" ", "_") )) .send() .await? .error_for_status()? .bytes() .await? .to_vec()) }) }, ) .context("mediawiki image by filename") } } impl ImportPlugin for Wikidata { fn info(&self) -> PluginInfo { PluginInfo { name: "wikidata", tag: MSOURCE_WIKIDATA, handle_process: true, ..Default::default() } } fn process(&self, ct: &PluginContext, node: RowNum) -> Result<()> { let data = ct.ic.get_node(node)?.unwrap(); let Some(id) = data .get(NO_IDENTIFIERS) .unwrap_or(EMPTY) .get(IDENT_WIKIDATA) else { return Ok(()); }; let Some(filename) = self.query_image_path(&ct.ic.cache, id, ct.rt)? else { return Ok(()); }; let image = self .commons .image_by_filename(&ct.ic.cache, filename, ct.rt)?; ct.ic.update_node(node, |node| { node.update(NO_PICTURES, |pics| pics.insert_s(ct.is, PICT_COVER, &image)) })?; Ok(()) } }