From a0cfd77b4d19c43a28c4d82072e6ff136e336af3 Mon Sep 17 00:00:00 2001 From: metamuffin Date: Wed, 10 Dec 2025 16:21:38 +0100 Subject: refactor import plugins part 1 --- import/src/plugins/infojson.rs | 272 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 import/src/plugins/infojson.rs (limited to 'import/src/plugins/infojson.rs') diff --git a/import/src/plugins/infojson.rs b/import/src/plugins/infojson.rs new file mode 100644 index 0000000..4dceeb8 --- /dev/null +++ b/import/src/plugins/infojson.rs @@ -0,0 +1,272 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin +*/ +use anyhow::{Context, Result, anyhow}; +use jellycache::cache_read; +use jellycommon::{ + IdentifierType, NodeID, NodeKind, RatingType, + chrono::{Utc, format::Parsed}, +}; +use jellyremuxer::matroska::{AttachedFile, Segment}; +use log::info; +use serde::{Deserialize, Serialize}; +use std::{collections::HashMap, fs::File, io::BufReader, path::Path}; + +use crate::plugins::{ImportContext, ImportPlugin}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YVideo { + pub album: Option, + pub age_limit: Option, + pub alt_title: Option, + pub aspect_ratio: Option, + pub automatic_captions: Option>>, + pub availability: Option, // "public" | "private" | "unlisted", + pub average_rating: Option, + pub categories: Option>, + pub channel_follower_count: Option, + pub channel_id: Option, + pub channel_is_verified: Option, + pub channel: Option, + pub chapters: Option>, + pub comment_count: Option, + pub description: Option, + pub display_id: Option, + pub duration_string: Option, + pub duration: Option, + pub epoch: usize, + pub extractor_key: String, + pub extractor: String, + pub formats: Option>, + pub fulltitle: Option, + pub heatmap: Option>, + pub height: Option, + pub id: String, + pub is_live: Option, + pub like_count: Option, + pub media_type: Option, + pub n_entries: Option, + pub original_url: Option, + pub playable_in_embed: Option, + pub playlist_count: Option, + pub playlist_id: Option, + pub playlist_index: Option, + pub playlist_title: Option, + pub playlist_uploader_id: Option, + pub playlist_uploader: Option, + pub playlist: Option, + pub tags: Option>, + pub thumbnail: Option, + pub thumbnails: Option>, + pub title: String, + pub upload_date: Option, + pub uploader_id: Option, + pub uploader_url: Option, + pub uploader: Option, + pub view_count: Option, + pub was_live: Option, + pub webpage_url_basename: String, + pub webpage_url_domain: String, + pub webpage_url: String, + pub width: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YCaption { + pub url: Option, + pub ext: String, //"vtt" | "json3" | "srv1" | "srv2" | "srv3" | "ttml", + pub protocol: Option, + pub name: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YFormat { + pub format_id: String, + pub format_note: Option, + pub ext: String, + pub protocol: String, + pub acodec: Option, + pub vcodec: Option, + pub url: Option, + pub width: Option, + pub height: Option, + pub fps: Option, + pub columns: Option, + pub fragments: Option>, + pub resolution: Option, + pub dynamic_range: Option, + pub aspect_ratio: Option, + pub http_headers: HashMap, + pub audio_ext: String, + pub video_ext: String, + pub vbr: Option, + pub abr: Option, + pub format: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YFragment { + pub url: Option, + pub duration: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YThumbnail { + pub url: String, + pub preference: Option, + pub id: String, + pub height: Option, + pub width: Option, + pub resolution: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YChapter { + pub start_time: f64, + pub end_time: f64, + pub title: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YHeatmapSample { + pub start_time: f64, + pub end_time: f64, + pub value: f64, +} + +pub fn parse_upload_date(d: &str) -> anyhow::Result { + let (year, month, day) = (&d[0..4], &d[4..6], &d[6..8]); + let (year, month, day) = ( + year.parse().context("parsing year")?, + month.parse().context("parsing month")?, + day.parse().context("parsing day")?, + ); + + let mut p = Parsed::new(); + p.year = Some(year); + p.month = Some(month); + p.day = Some(day); + p.hour_div_12 = Some(0); + p.hour_mod_12 = Some(0); + p.minute = Some(0); + p.second = Some(0); + Ok(p.to_datetime_with_timezone(&Utc)?.timestamp_millis()) +} + +pub fn is_info_json(a: &&AttachedFile) -> bool { + a.name == "info.json" && a.media_type == "application/json" +} +pub struct Infojson; +impl ImportPlugin for Infojson { + fn file(&self, ct: &ImportContext, parent: NodeID, path: &Path) -> Result<()> { + let filename = path.file_name().unwrap().to_string_lossy(); + if filename != "channel.info.json" { + return Ok(()); + } + + info!("import channel info.json at {path:?}"); + let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?; + ct.db.update_node_init(parent, |node| { + node.kind = NodeKind::Channel; + node.title = Some(clean_uploader_name(&data.title).to_owned()); + if let Some(cid) = data.channel_id { + node.identifiers.insert(IdentifierType::YoutubeChannel, cid); + } + if let Some(uid) = data.uploader_id { + node.identifiers + .insert(IdentifierType::YoutubeChannelHandle, uid); + } + if let Some(desc) = data.description { + node.description = Some(desc); + } + if let Some(followers) = data.channel_follower_count { + node.ratings + .insert(RatingType::YoutubeFollowers, followers as f64); + } + Ok(()) + })?; + + Ok(()) + } + + fn media(&self, ct: &ImportContext, node: NodeID, _path: &Path, seg: &Segment) -> Result<()> { + let infojson = seg + .attachments + .iter() + .flat_map(|a| &a.files) + .find(is_info_json) + .map(|att| { + let data = cache_read(str::from_utf8(&att.data).unwrap())? + .ok_or(anyhow!("info json cache missing"))?; + anyhow::Ok(serde_json::from_slice::(&data)?) + }) + .transpose() + .context("infojson parsing")?; + + if let Some(infojson) = infojson { + ct.db.update_node_init(node, |node| { + node.kind = if let Some(ty) = &infojson.media_type + && ty == "short" + { + NodeKind::ShortFormVideo + } else if infojson.album.is_some() { + NodeKind::Music + } else { + NodeKind::Video + }; + node.title = Some(infojson.title); + node.subtitle = if infojson.alt_title != node.title { + infojson.alt_title + } else { + None + } + .or(infojson + .uploader + .as_ref() + .map(|u| clean_uploader_name(u).to_owned())) + .or(node.subtitle.clone()); + + node.tags.extend(infojson.tags.unwrap_or_default()); + + if let Some(desc) = infojson.description { + node.description = Some(desc) + } + node.tagline = Some(infojson.webpage_url); + if let Some(date) = &infojson.upload_date { + node.release_date = + Some(parse_upload_date(date).context("parsing upload date")?); + } + match infojson.extractor.as_str() { + "youtube" => { + node.identifiers + .insert(IdentifierType::YoutubeVideo, infojson.id); + node.ratings.insert( + RatingType::YoutubeViews, + infojson.view_count.unwrap_or_default() as f64, + ); + if let Some(lc) = infojson.like_count { + node.ratings.insert(RatingType::YoutubeLikes, lc as f64); + } + } + "Bandcamp" => drop( + node.identifiers + .insert(IdentifierType::Bandcamp, infojson.id), + ), + _ => (), + } + + Ok(()) + })?; + } + Ok(()) + } +} + +fn clean_uploader_name(mut s: &str) -> &str { + s = s.strip_suffix(" - Videos").unwrap_or(s); + s = s.strip_suffix(" - Topic").unwrap_or(s); + s = s.strip_prefix("Uploads from ").unwrap_or(s); + s +} -- cgit v1.3