diff options
Diffstat (limited to 'import/src/plugins/infojson.rs')
| -rw-r--r-- | import/src/plugins/infojson.rs | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/import/src/plugins/infojson.rs b/import/src/plugins/infojson.rs new file mode 100644 index 0000000..4dceeb8 --- /dev/null +++ b/import/src/plugins/infojson.rs @@ -0,0 +1,272 @@ +/* + This file is part of jellything (https://codeberg.org/metamuffin/jellything) + which is licensed under the GNU Affero General Public License (version 3); see /COPYING. + Copyright (C) 2025 metamuffin <metamuffin.org> +*/ +use anyhow::{Context, Result, anyhow}; +use jellycache::cache_read; +use jellycommon::{ + IdentifierType, NodeID, NodeKind, RatingType, + chrono::{Utc, format::Parsed}, +}; +use jellyremuxer::matroska::{AttachedFile, Segment}; +use log::info; +use serde::{Deserialize, Serialize}; +use std::{collections::HashMap, fs::File, io::BufReader, path::Path}; + +use crate::plugins::{ImportContext, ImportPlugin}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YVideo { + pub album: Option<String>, + pub age_limit: Option<usize>, + pub alt_title: Option<String>, + pub aspect_ratio: Option<f32>, + pub automatic_captions: Option<HashMap<String, Vec<YCaption>>>, + pub availability: Option<String>, // "public" | "private" | "unlisted", + pub average_rating: Option<String>, + pub categories: Option<Vec<String>>, + pub channel_follower_count: Option<usize>, + pub channel_id: Option<String>, + pub channel_is_verified: Option<bool>, + pub channel: Option<String>, + pub chapters: Option<Vec<YChapter>>, + pub comment_count: Option<usize>, + pub description: Option<String>, + pub display_id: Option<String>, + pub duration_string: Option<String>, + pub duration: Option<f64>, + pub epoch: usize, + pub extractor_key: String, + pub extractor: String, + pub formats: Option<Vec<YFormat>>, + pub fulltitle: Option<String>, + pub heatmap: Option<Vec<YHeatmapSample>>, + pub height: Option<i32>, + pub id: String, + pub is_live: Option<bool>, + pub like_count: Option<usize>, + pub media_type: Option<String>, + pub n_entries: Option<usize>, + pub original_url: Option<String>, + pub playable_in_embed: Option<bool>, + pub playlist_count: Option<usize>, + pub playlist_id: Option<String>, + pub playlist_index: Option<usize>, + pub playlist_title: Option<String>, + pub playlist_uploader_id: Option<String>, + pub playlist_uploader: Option<String>, + pub playlist: Option<String>, + pub tags: Option<Vec<String>>, + pub thumbnail: Option<String>, + pub thumbnails: Option<Vec<YThumbnail>>, + pub title: String, + pub upload_date: Option<String>, + pub uploader_id: Option<String>, + pub uploader_url: Option<String>, + pub uploader: Option<String>, + pub view_count: Option<usize>, + pub was_live: Option<bool>, + pub webpage_url_basename: String, + pub webpage_url_domain: String, + pub webpage_url: String, + pub width: Option<i32>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YCaption { + pub url: Option<String>, + pub ext: String, //"vtt" | "json3" | "srv1" | "srv2" | "srv3" | "ttml", + pub protocol: Option<String>, + pub name: Option<String>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YFormat { + pub format_id: String, + pub format_note: Option<String>, + pub ext: String, + pub protocol: String, + pub acodec: Option<String>, + pub vcodec: Option<String>, + pub url: Option<String>, + pub width: Option<u32>, + pub height: Option<u32>, + pub fps: Option<f64>, + pub columns: Option<u32>, + pub fragments: Option<Vec<YFragment>>, + pub resolution: Option<String>, + pub dynamic_range: Option<String>, + pub aspect_ratio: Option<f64>, + pub http_headers: HashMap<String, String>, + pub audio_ext: String, + pub video_ext: String, + pub vbr: Option<f64>, + pub abr: Option<f64>, + pub format: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YFragment { + pub url: Option<String>, + pub duration: Option<f64>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YThumbnail { + pub url: String, + pub preference: Option<i32>, + pub id: String, + pub height: Option<u32>, + pub width: Option<u32>, + pub resolution: Option<String>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YChapter { + pub start_time: f64, + pub end_time: f64, + pub title: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YHeatmapSample { + pub start_time: f64, + pub end_time: f64, + pub value: f64, +} + +pub fn parse_upload_date(d: &str) -> anyhow::Result<i64> { + let (year, month, day) = (&d[0..4], &d[4..6], &d[6..8]); + let (year, month, day) = ( + year.parse().context("parsing year")?, + month.parse().context("parsing month")?, + day.parse().context("parsing day")?, + ); + + let mut p = Parsed::new(); + p.year = Some(year); + p.month = Some(month); + p.day = Some(day); + p.hour_div_12 = Some(0); + p.hour_mod_12 = Some(0); + p.minute = Some(0); + p.second = Some(0); + Ok(p.to_datetime_with_timezone(&Utc)?.timestamp_millis()) +} + +pub fn is_info_json(a: &&AttachedFile) -> bool { + a.name == "info.json" && a.media_type == "application/json" +} +pub struct Infojson; +impl ImportPlugin for Infojson { + fn file(&self, ct: &ImportContext, parent: NodeID, path: &Path) -> Result<()> { + let filename = path.file_name().unwrap().to_string_lossy(); + if filename != "channel.info.json" { + return Ok(()); + } + + info!("import channel info.json at {path:?}"); + let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?; + ct.db.update_node_init(parent, |node| { + node.kind = NodeKind::Channel; + node.title = Some(clean_uploader_name(&data.title).to_owned()); + if let Some(cid) = data.channel_id { + node.identifiers.insert(IdentifierType::YoutubeChannel, cid); + } + if let Some(uid) = data.uploader_id { + node.identifiers + .insert(IdentifierType::YoutubeChannelHandle, uid); + } + if let Some(desc) = data.description { + node.description = Some(desc); + } + if let Some(followers) = data.channel_follower_count { + node.ratings + .insert(RatingType::YoutubeFollowers, followers as f64); + } + Ok(()) + })?; + + Ok(()) + } + + fn media(&self, ct: &ImportContext, node: NodeID, _path: &Path, seg: &Segment) -> Result<()> { + let infojson = seg + .attachments + .iter() + .flat_map(|a| &a.files) + .find(is_info_json) + .map(|att| { + let data = cache_read(str::from_utf8(&att.data).unwrap())? + .ok_or(anyhow!("info json cache missing"))?; + anyhow::Ok(serde_json::from_slice::<YVideo>(&data)?) + }) + .transpose() + .context("infojson parsing")?; + + if let Some(infojson) = infojson { + ct.db.update_node_init(node, |node| { + node.kind = if let Some(ty) = &infojson.media_type + && ty == "short" + { + NodeKind::ShortFormVideo + } else if infojson.album.is_some() { + NodeKind::Music + } else { + NodeKind::Video + }; + node.title = Some(infojson.title); + node.subtitle = if infojson.alt_title != node.title { + infojson.alt_title + } else { + None + } + .or(infojson + .uploader + .as_ref() + .map(|u| clean_uploader_name(u).to_owned())) + .or(node.subtitle.clone()); + + node.tags.extend(infojson.tags.unwrap_or_default()); + + if let Some(desc) = infojson.description { + node.description = Some(desc) + } + node.tagline = Some(infojson.webpage_url); + if let Some(date) = &infojson.upload_date { + node.release_date = + Some(parse_upload_date(date).context("parsing upload date")?); + } + match infojson.extractor.as_str() { + "youtube" => { + node.identifiers + .insert(IdentifierType::YoutubeVideo, infojson.id); + node.ratings.insert( + RatingType::YoutubeViews, + infojson.view_count.unwrap_or_default() as f64, + ); + if let Some(lc) = infojson.like_count { + node.ratings.insert(RatingType::YoutubeLikes, lc as f64); + } + } + "Bandcamp" => drop( + node.identifiers + .insert(IdentifierType::Bandcamp, infojson.id), + ), + _ => (), + } + + Ok(()) + })?; + } + Ok(()) + } +} + +fn clean_uploader_name(mut s: &str) -> &str { + s = s.strip_suffix(" - Videos").unwrap_or(s); + s = s.strip_suffix(" - Topic").unwrap_or(s); + s = s.strip_prefix("Uploads from ").unwrap_or(s); + s +} |