aboutsummaryrefslogtreecommitdiff
path: root/import/src/wikidata.rs
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2025-04-25 12:30:30 +0200
committermetamuffin <metamuffin@disroot.org>2025-04-25 12:30:30 +0200
commited5c3771f40a5107990fdbeafc3b22b88e9347be (patch)
tree51d544aba73a28133f106c26d9ece262a3ff7301 /import/src/wikidata.rs
parentd3ab2847600eabeb102b969860e6e154cd24d954 (diff)
downloadjellything-ed5c3771f40a5107990fdbeafc3b22b88e9347be.tar
jellything-ed5c3771f40a5107990fdbeafc3b22b88e9347be.tar.bz2
jellything-ed5c3771f40a5107990fdbeafc3b22b88e9347be.tar.zst
add wikidata api bindings (untested)
Diffstat (limited to 'import/src/wikidata.rs')
-rw-r--r--import/src/wikidata.rs120
1 files changed, 120 insertions, 0 deletions
diff --git a/import/src/wikidata.rs b/import/src/wikidata.rs
new file mode 100644
index 0000000..267899f
--- /dev/null
+++ b/import/src/wikidata.rs
@@ -0,0 +1,120 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+
+use crate::USER_AGENT;
+use anyhow::{bail, Result};
+use jellybase::cache::async_cache_memory;
+use log::info;
+use reqwest::{
+ header::{HeaderMap, HeaderName, HeaderValue},
+ Client, ClientBuilder,
+};
+use serde::Deserialize;
+use serde_json::Value;
+use std::collections::BTreeMap;
+
+pub struct Wikidata {
+ client: Client,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct WikidataResponse {
+ entities: BTreeMap<String, WikidataEntity>,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct WikidataEntity {
+ pub pageid: u64,
+ pub ns: u64,
+ pub title: String,
+ pub lastrevid: u64,
+ pub modified: String,
+ pub r#type: String,
+ pub id: String,
+ pub claims: BTreeMap<String, Vec<WikidataClaim>>,
+}
+#[derive(Debug, Deserialize, Clone)]
+pub struct WikidataClaim {
+ pub r#type: String,
+ pub id: String,
+ pub rank: String,
+ pub mainsnak: WikidataSnak,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct WikidataSnak {
+ pub snaktype: String,
+ pub property: String,
+ pub hash: String,
+ pub datavalue: WikidataValue,
+ pub datatype: String,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct WikidataValue {
+ pub value: Value,
+ pub r#type: String,
+}
+
+pub mod properties {
+ pub static IMAGE: &str = "P18";
+}
+
+impl Wikidata {
+ pub fn new() -> Self {
+ let client = ClientBuilder::new()
+ .default_headers(HeaderMap::from_iter([
+ (
+ HeaderName::from_static("accept"),
+ HeaderValue::from_static("application/json"),
+ ),
+ (
+ HeaderName::from_static("user-agent"),
+ HeaderValue::from_static(USER_AGENT),
+ ),
+ ]))
+ .build()
+ .unwrap();
+ Self { client }
+ }
+
+ pub async fn query_image_path(&self, id: String) -> Result<Option<String>> {
+ let response = self.query(id.clone()).await?;
+ if let Some(entity) = response.entities.get(&id) {
+ if let Some(images) = entity.claims.get(properties::IMAGE) {
+ for image in images {
+ if image.mainsnak.datatype != "commonsMedia" {
+ bail!("image is of type {:?}", image.mainsnak.datatype);
+ }
+ if let Value::String(filename) = &image.mainsnak.datavalue.value {
+ return Ok(Some(filename.to_owned()));
+ }
+ }
+ }
+ }
+ Ok(None)
+ }
+
+ pub async fn query(&self, id: String) -> Result<WikidataResponse> {
+ let json = async_cache_memory("api-wikidata", id.clone(), || async move {
+ info!("entity query: {id}");
+
+ let resp = self
+ .client
+ .get(format!("https://www.wikidata.org/entity/{id}"))
+ .send()
+ .await?
+ .error_for_status()?
+ .text()
+ .await?;
+
+ Ok(resp)
+ })
+ .await?;
+
+ Ok(serde_json::from_str(&json)?)
+ }
+}