aboutsummaryrefslogtreecommitdiff
path: root/import/src/plugins/wikidata.rs
diff options
context:
space:
mode:
Diffstat (limited to 'import/src/plugins/wikidata.rs')
-rw-r--r--import/src/plugins/wikidata.rs129
1 files changed, 129 insertions, 0 deletions
diff --git a/import/src/plugins/wikidata.rs b/import/src/plugins/wikidata.rs
new file mode 100644
index 0000000..358996e
--- /dev/null
+++ b/import/src/plugins/wikidata.rs
@@ -0,0 +1,129 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+
+use crate::USER_AGENT;
+use anyhow::{Context, Result, bail};
+use jellycache::{EscapeKey, cache_memory};
+use log::info;
+use reqwest::{
+ Client, ClientBuilder,
+ header::{HeaderMap, HeaderName, HeaderValue},
+};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::{collections::BTreeMap, sync::Arc};
+use tokio::runtime::Handle;
+
+pub struct Wikidata {
+ client: Client,
+}
+
+#[derive(Debug, Deserialize, Serialize, Clone)]
+pub struct WikidataResponse {
+ entities: BTreeMap<String, WikidataEntity>,
+}
+
+#[derive(Debug, Deserialize, Serialize, Clone)]
+pub struct WikidataEntity {
+ pub pageid: u64,
+ pub ns: u64,
+ pub title: String,
+ pub lastrevid: u64,
+ pub modified: String,
+ pub r#type: String,
+ pub id: String,
+ pub claims: BTreeMap<String, Vec<WikidataClaim>>,
+}
+#[derive(Debug, Deserialize, Serialize, Clone)]
+pub struct WikidataClaim {
+ pub r#type: String,
+ pub id: String,
+ pub rank: String,
+ pub mainsnak: WikidataSnak,
+}
+
+#[derive(Debug, Deserialize, Serialize, Clone)]
+pub struct WikidataSnak {
+ pub snaktype: String,
+ pub property: String,
+ pub hash: String,
+ pub datavalue: Option<WikidataValue>,
+ pub datatype: String,
+}
+
+#[derive(Debug, Deserialize, Serialize, Clone)]
+pub struct WikidataValue {
+ pub value: Value,
+ pub r#type: String,
+}
+
+pub mod properties {
+ pub static IMAGE: &str = "P18";
+}
+
+impl Default for Wikidata {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl Wikidata {
+ pub fn new() -> Self {
+ let client = ClientBuilder::new()
+ .default_headers(HeaderMap::from_iter([
+ (
+ HeaderName::from_static("accept"),
+ HeaderValue::from_static("application/json"),
+ ),
+ (
+ HeaderName::from_static("user-agent"),
+ HeaderValue::from_static(USER_AGENT),
+ ),
+ ]))
+ .build()
+ .unwrap();
+ Self { client }
+ }
+
+ pub fn query_image_path(&self, id: String, rt: &Handle) -> Result<Option<String>> {
+ let response = self.query(id.clone(), rt)?;
+ if let Some(entity) = response.entities.get(&id) {
+ if let Some(images) = entity.claims.get(properties::IMAGE) {
+ for image in images {
+ if image.mainsnak.datatype != "commonsMedia" {
+ bail!("image is of type {:?}", image.mainsnak.datatype);
+ }
+ if let Some(dv) = &image.mainsnak.datavalue {
+ if let Value::String(filename) = &dv.value {
+ return Ok(Some(filename.to_owned()));
+ }
+ }
+ }
+ }
+ }
+ Ok(None)
+ }
+
+ pub fn query(&self, id: String, rt: &Handle) -> Result<Arc<WikidataResponse>> {
+ cache_memory(
+ &format!("ext/wikidata/{}.json", EscapeKey(&id)),
+ move || {
+ rt.block_on(async {
+ info!("entity query: {id}");
+ Ok(self
+ .client
+ .get(format!("https://www.wikidata.org/entity/{id}"))
+ .send()
+ .await?
+ .error_for_status()?
+ .json()
+ .await?)
+ })
+ },
+ )
+ .context("wikidata entity")
+ }
+}