aboutsummaryrefslogtreecommitdiff
path: root/import/src/wikidata.rs
blob: 1b7f06e9124caaa64b0286b13cd8a60e26d97a44 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
    This file is part of jellything (https://codeberg.org/metamuffin/jellything)
    which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
    Copyright (C) 2025 metamuffin <metamuffin.org>
*/

use crate::USER_AGENT;
use anyhow::{bail, Context, Result};
use jellycache::async_cache_memory;
use log::info;
use reqwest::{
    header::{HeaderMap, HeaderName, HeaderValue},
    Client, ClientBuilder,
};
use serde::Deserialize;
use serde_json::Value;
use std::collections::BTreeMap;

pub struct Wikidata {
    client: Client,
}

#[derive(Debug, Deserialize, Clone)]
pub struct WikidataResponse {
    entities: BTreeMap<String, WikidataEntity>,
}

#[derive(Debug, Deserialize, Clone)]
pub struct WikidataEntity {
    pub pageid: u64,
    pub ns: u64,
    pub title: String,
    pub lastrevid: u64,
    pub modified: String,
    pub r#type: String,
    pub id: String,
    pub claims: BTreeMap<String, Vec<WikidataClaim>>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct WikidataClaim {
    pub r#type: String,
    pub id: String,
    pub rank: String,
    pub mainsnak: WikidataSnak,
}

#[derive(Debug, Deserialize, Clone)]
pub struct WikidataSnak {
    pub snaktype: String,
    pub property: String,
    pub hash: String,
    pub datavalue: Option<WikidataValue>,
    pub datatype: String,
}

#[derive(Debug, Deserialize, Clone)]
pub struct WikidataValue {
    pub value: Value,
    pub r#type: String,
}

pub mod properties {
    pub static IMAGE: &str = "P18";
}

impl Wikidata {
    pub fn new() -> Self {
        let client = ClientBuilder::new()
            .default_headers(HeaderMap::from_iter([
                (
                    HeaderName::from_static("accept"),
                    HeaderValue::from_static("application/json"),
                ),
                (
                    HeaderName::from_static("user-agent"),
                    HeaderValue::from_static(USER_AGENT),
                ),
            ]))
            .build()
            .unwrap();
        Self { client }
    }

    pub async fn query_image_path(&self, id: String) -> Result<Option<String>> {
        let response = self.query(id.clone()).await?;
        if let Some(entity) = response.entities.get(&id) {
            if let Some(images) = entity.claims.get(properties::IMAGE) {
                for image in images {
                    if image.mainsnak.datatype != "commonsMedia" {
                        bail!("image is of type {:?}", image.mainsnak.datatype);
                    }
                    if let Some(dv) = &image.mainsnak.datavalue {
                        if let Value::String(filename) = &dv.value {
                            return Ok(Some(filename.to_owned()));
                        }
                    }
                }
            }
        }
        Ok(None)
    }

    pub async fn query(&self, id: String) -> Result<WikidataResponse> {
        let json = async_cache_memory("api-wikidata", id.clone(), || async move {
            info!("entity query: {id}");

            let resp = self
                .client
                .get(format!("https://www.wikidata.org/entity/{id}"))
                .send()
                .await?
                .error_for_status()?
                .text()
                .await?;

            Ok(resp)
        })
        .await
        .context("wikidata entity")?;

        Ok(serde_json::from_str(&json).context("parse wikidata entity")?)
    }
}