aboutsummaryrefslogtreecommitdiff
path: root/import/src/plugins/infojson.rs
diff options
context:
space:
mode:
Diffstat (limited to 'import/src/plugins/infojson.rs')
-rw-r--r--import/src/plugins/infojson.rs272
1 files changed, 272 insertions, 0 deletions
diff --git a/import/src/plugins/infojson.rs b/import/src/plugins/infojson.rs
new file mode 100644
index 0000000..4dceeb8
--- /dev/null
+++ b/import/src/plugins/infojson.rs
@@ -0,0 +1,272 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+use anyhow::{Context, Result, anyhow};
+use jellycache::cache_read;
+use jellycommon::{
+ IdentifierType, NodeID, NodeKind, RatingType,
+ chrono::{Utc, format::Parsed},
+};
+use jellyremuxer::matroska::{AttachedFile, Segment};
+use log::info;
+use serde::{Deserialize, Serialize};
+use std::{collections::HashMap, fs::File, io::BufReader, path::Path};
+
+use crate::plugins::{ImportContext, ImportPlugin};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct YVideo {
+ pub album: Option<String>,
+ pub age_limit: Option<usize>,
+ pub alt_title: Option<String>,
+ pub aspect_ratio: Option<f32>,
+ pub automatic_captions: Option<HashMap<String, Vec<YCaption>>>,
+ pub availability: Option<String>, // "public" | "private" | "unlisted",
+ pub average_rating: Option<String>,
+ pub categories: Option<Vec<String>>,
+ pub channel_follower_count: Option<usize>,
+ pub channel_id: Option<String>,
+ pub channel_is_verified: Option<bool>,
+ pub channel: Option<String>,
+ pub chapters: Option<Vec<YChapter>>,
+ pub comment_count: Option<usize>,
+ pub description: Option<String>,
+ pub display_id: Option<String>,
+ pub duration_string: Option<String>,
+ pub duration: Option<f64>,
+ pub epoch: usize,
+ pub extractor_key: String,
+ pub extractor: String,
+ pub formats: Option<Vec<YFormat>>,
+ pub fulltitle: Option<String>,
+ pub heatmap: Option<Vec<YHeatmapSample>>,
+ pub height: Option<i32>,
+ pub id: String,
+ pub is_live: Option<bool>,
+ pub like_count: Option<usize>,
+ pub media_type: Option<String>,
+ pub n_entries: Option<usize>,
+ pub original_url: Option<String>,
+ pub playable_in_embed: Option<bool>,
+ pub playlist_count: Option<usize>,
+ pub playlist_id: Option<String>,
+ pub playlist_index: Option<usize>,
+ pub playlist_title: Option<String>,
+ pub playlist_uploader_id: Option<String>,
+ pub playlist_uploader: Option<String>,
+ pub playlist: Option<String>,
+ pub tags: Option<Vec<String>>,
+ pub thumbnail: Option<String>,
+ pub thumbnails: Option<Vec<YThumbnail>>,
+ pub title: String,
+ pub upload_date: Option<String>,
+ pub uploader_id: Option<String>,
+ pub uploader_url: Option<String>,
+ pub uploader: Option<String>,
+ pub view_count: Option<usize>,
+ pub was_live: Option<bool>,
+ pub webpage_url_basename: String,
+ pub webpage_url_domain: String,
+ pub webpage_url: String,
+ pub width: Option<i32>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct YCaption {
+ pub url: Option<String>,
+ pub ext: String, //"vtt" | "json3" | "srv1" | "srv2" | "srv3" | "ttml",
+ pub protocol: Option<String>,
+ pub name: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct YFormat {
+ pub format_id: String,
+ pub format_note: Option<String>,
+ pub ext: String,
+ pub protocol: String,
+ pub acodec: Option<String>,
+ pub vcodec: Option<String>,
+ pub url: Option<String>,
+ pub width: Option<u32>,
+ pub height: Option<u32>,
+ pub fps: Option<f64>,
+ pub columns: Option<u32>,
+ pub fragments: Option<Vec<YFragment>>,
+ pub resolution: Option<String>,
+ pub dynamic_range: Option<String>,
+ pub aspect_ratio: Option<f64>,
+ pub http_headers: HashMap<String, String>,
+ pub audio_ext: String,
+ pub video_ext: String,
+ pub vbr: Option<f64>,
+ pub abr: Option<f64>,
+ pub format: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct YFragment {
+ pub url: Option<String>,
+ pub duration: Option<f64>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct YThumbnail {
+ pub url: String,
+ pub preference: Option<i32>,
+ pub id: String,
+ pub height: Option<u32>,
+ pub width: Option<u32>,
+ pub resolution: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct YChapter {
+ pub start_time: f64,
+ pub end_time: f64,
+ pub title: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct YHeatmapSample {
+ pub start_time: f64,
+ pub end_time: f64,
+ pub value: f64,
+}
+
+pub fn parse_upload_date(d: &str) -> anyhow::Result<i64> {
+ let (year, month, day) = (&d[0..4], &d[4..6], &d[6..8]);
+ let (year, month, day) = (
+ year.parse().context("parsing year")?,
+ month.parse().context("parsing month")?,
+ day.parse().context("parsing day")?,
+ );
+
+ let mut p = Parsed::new();
+ p.year = Some(year);
+ p.month = Some(month);
+ p.day = Some(day);
+ p.hour_div_12 = Some(0);
+ p.hour_mod_12 = Some(0);
+ p.minute = Some(0);
+ p.second = Some(0);
+ Ok(p.to_datetime_with_timezone(&Utc)?.timestamp_millis())
+}
+
+pub fn is_info_json(a: &&AttachedFile) -> bool {
+ a.name == "info.json" && a.media_type == "application/json"
+}
+pub struct Infojson;
+impl ImportPlugin for Infojson {
+ fn file(&self, ct: &ImportContext, parent: NodeID, path: &Path) -> Result<()> {
+ let filename = path.file_name().unwrap().to_string_lossy();
+ if filename != "channel.info.json" {
+ return Ok(());
+ }
+
+ info!("import channel info.json at {path:?}");
+ let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?;
+ ct.db.update_node_init(parent, |node| {
+ node.kind = NodeKind::Channel;
+ node.title = Some(clean_uploader_name(&data.title).to_owned());
+ if let Some(cid) = data.channel_id {
+ node.identifiers.insert(IdentifierType::YoutubeChannel, cid);
+ }
+ if let Some(uid) = data.uploader_id {
+ node.identifiers
+ .insert(IdentifierType::YoutubeChannelHandle, uid);
+ }
+ if let Some(desc) = data.description {
+ node.description = Some(desc);
+ }
+ if let Some(followers) = data.channel_follower_count {
+ node.ratings
+ .insert(RatingType::YoutubeFollowers, followers as f64);
+ }
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ fn media(&self, ct: &ImportContext, node: NodeID, _path: &Path, seg: &Segment) -> Result<()> {
+ let infojson = seg
+ .attachments
+ .iter()
+ .flat_map(|a| &a.files)
+ .find(is_info_json)
+ .map(|att| {
+ let data = cache_read(str::from_utf8(&att.data).unwrap())?
+ .ok_or(anyhow!("info json cache missing"))?;
+ anyhow::Ok(serde_json::from_slice::<YVideo>(&data)?)
+ })
+ .transpose()
+ .context("infojson parsing")?;
+
+ if let Some(infojson) = infojson {
+ ct.db.update_node_init(node, |node| {
+ node.kind = if let Some(ty) = &infojson.media_type
+ && ty == "short"
+ {
+ NodeKind::ShortFormVideo
+ } else if infojson.album.is_some() {
+ NodeKind::Music
+ } else {
+ NodeKind::Video
+ };
+ node.title = Some(infojson.title);
+ node.subtitle = if infojson.alt_title != node.title {
+ infojson.alt_title
+ } else {
+ None
+ }
+ .or(infojson
+ .uploader
+ .as_ref()
+ .map(|u| clean_uploader_name(u).to_owned()))
+ .or(node.subtitle.clone());
+
+ node.tags.extend(infojson.tags.unwrap_or_default());
+
+ if let Some(desc) = infojson.description {
+ node.description = Some(desc)
+ }
+ node.tagline = Some(infojson.webpage_url);
+ if let Some(date) = &infojson.upload_date {
+ node.release_date =
+ Some(parse_upload_date(date).context("parsing upload date")?);
+ }
+ match infojson.extractor.as_str() {
+ "youtube" => {
+ node.identifiers
+ .insert(IdentifierType::YoutubeVideo, infojson.id);
+ node.ratings.insert(
+ RatingType::YoutubeViews,
+ infojson.view_count.unwrap_or_default() as f64,
+ );
+ if let Some(lc) = infojson.like_count {
+ node.ratings.insert(RatingType::YoutubeLikes, lc as f64);
+ }
+ }
+ "Bandcamp" => drop(
+ node.identifiers
+ .insert(IdentifierType::Bandcamp, infojson.id),
+ ),
+ _ => (),
+ }
+
+ Ok(())
+ })?;
+ }
+ Ok(())
+ }
+}
+
+fn clean_uploader_name(mut s: &str) -> &str {
+ s = s.strip_suffix(" - Videos").unwrap_or(s);
+ s = s.strip_suffix(" - Topic").unwrap_or(s);
+ s = s.strip_prefix("Uploads from ").unwrap_or(s);
+ s
+}