diff options
author | metamuffin <metamuffin@disroot.org> | 2025-04-25 12:30:30 +0200 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2025-04-25 12:30:30 +0200 |
commit | ed5c3771f40a5107990fdbeafc3b22b88e9347be (patch) | |
tree | 51d544aba73a28133f106c26d9ece262a3ff7301 /import/src/wikidata.rs | |
parent | d3ab2847600eabeb102b969860e6e154cd24d954 (diff) | |
download | jellything-ed5c3771f40a5107990fdbeafc3b22b88e9347be.tar jellything-ed5c3771f40a5107990fdbeafc3b22b88e9347be.tar.bz2 jellything-ed5c3771f40a5107990fdbeafc3b22b88e9347be.tar.zst |
add wikidata api bindings (untested)
Diffstat (limited to 'import/src/wikidata.rs')
-rw-r--r-- | import/src/wikidata.rs | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/import/src/wikidata.rs b/import/src/wikidata.rs new file mode 100644 index 0000000..267899f --- /dev/null +++ b/import/src/wikidata.rs @@ -0,0 +1,120 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ + +use crate::USER_AGENT; +use anyhow::{bail, Result}; +use jellybase::cache::async_cache_memory; +use log::info; +use reqwest::{ + header::{HeaderMap, HeaderName, HeaderValue}, + Client, ClientBuilder, +}; +use serde::Deserialize; +use serde_json::Value; +use std::collections::BTreeMap; + +pub struct Wikidata { + client: Client, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct WikidataResponse { + entities: BTreeMap<String, WikidataEntity>, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct WikidataEntity { + pub pageid: u64, + pub ns: u64, + pub title: String, + pub lastrevid: u64, + pub modified: String, + pub r#type: String, + pub id: String, + pub claims: BTreeMap<String, Vec<WikidataClaim>>, +} +#[derive(Debug, Deserialize, Clone)] +pub struct WikidataClaim { + pub r#type: String, + pub id: String, + pub rank: String, + pub mainsnak: WikidataSnak, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct WikidataSnak { + pub snaktype: String, + pub property: String, + pub hash: String, + pub datavalue: WikidataValue, + pub datatype: String, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct WikidataValue { + pub value: Value, + pub r#type: String, +} + +pub mod properties { + pub static IMAGE: &str = "P18"; +} + +impl Wikidata { + pub fn new() -> Self { + let client = ClientBuilder::new() + .default_headers(HeaderMap::from_iter([ + ( + HeaderName::from_static("accept"), + HeaderValue::from_static("application/json"), + ), + ( + HeaderName::from_static("user-agent"), + HeaderValue::from_static(USER_AGENT), + ), + ])) + .build() + .unwrap(); + Self { client } + } + + pub async fn query_image_path(&self, id: String) -> Result<Option<String>> { + let response = self.query(id.clone()).await?; + if let Some(entity) = response.entities.get(&id) { + if let Some(images) = entity.claims.get(properties::IMAGE) { + for image in images { + if image.mainsnak.datatype != "commonsMedia" { + bail!("image is of type {:?}", image.mainsnak.datatype); + } + if let Value::String(filename) = &image.mainsnak.datavalue.value { + return Ok(Some(filename.to_owned())); + } + } + } + } + Ok(None) + } + + pub async fn query(&self, id: String) -> Result<WikidataResponse> { + let json = async_cache_memory("api-wikidata", id.clone(), || async move { + info!("entity query: {id}"); + + let resp = self + .client + .get(format!("https://www.wikidata.org/entity/{id}")) + .send() + .await? + .error_for_status()? + .text() + .await?; + + Ok(resp) + }) + .await?; + + Ok(serde_json::from_str(&json)?) + } +} |