/* This file is part of jellything (https://codeberg.org/metamuffin/jellything) which is licensed under the GNU Affero General Public License (version 3); see /COPYING. Copyright (C) 2025 metamuffin */ use anyhow::{anyhow, Context, Result}; use infojson::YVideo; use jellybase::{ assetfed::AssetInner, common::{ Chapter, LocalTrack, MediaInfo, Node, NodeID, NodeKind, Rating, SourceTrack, SourceTrackKind, TrackSource, }, database::Database, CONF, SECRETS, }; use matroska::matroska_metadata; use rayon::iter::{ParallelDrainRange, ParallelIterator}; use std::{ collections::HashMap, fs::{read_to_string, File}, io::BufReader, mem::swap, path::{Path, PathBuf}, sync::LazyLock, time::UNIX_EPOCH, }; use tmdb::Tmdb; use tokio::{ sync::{RwLock, Semaphore}, task::spawn_blocking, }; use trakt::Trakt; pub mod infojson; pub mod matroska; pub mod tmdb; pub mod trakt; static IMPORT_SEM: LazyLock = LazyLock::new(|| Semaphore::new(1)); pub static IMPORT_ERRORS: RwLock> = RwLock::const_new(Vec::new()); // static RE_EPISODE_FILENAME: LazyLock = // LazyLock::new(|| Regex::new(r#"([sS](\d+))?([eE](\d+))( (.+))?"#).unwrap()); struct Apis { trakt: Option, tmdb: Option, } pub fn is_importing() -> bool { IMPORT_SEM.available_permits() == 0 } pub async fn import_wrap(db: Database, incremental: bool) -> Result<()> { let _sem = IMPORT_SEM.try_acquire()?; let jh = spawn_blocking(move || { *IMPORT_ERRORS.blocking_write() = Vec::new(); if let Err(e) = import(&db, incremental) { IMPORT_ERRORS.blocking_write().push(format!("{e:#}")); } }); let _ = jh.await; Ok(()) } fn import(db: &Database, incremental: bool) -> Result<()> { let mut queue_prev = vec![(CONF.media_path.clone(), vec![])]; let mut queue_next; let apis = Apis { trakt: SECRETS.api.trakt.as_ref().map(|key| Trakt::new(key)), tmdb: SECRETS.api.tmdb.as_ref().map(|key| Tmdb::new(key)), }; drop((apis.tmdb, apis.trakt)); while !queue_prev.is_empty() { queue_next = queue_prev .par_drain(..) .flat_map_iter(move |(path, slugs)| { match import_iter_inner(&path, db, slugs, incremental) { Ok(ch) => ch, Err(e) => { IMPORT_ERRORS.blocking_write().push(format!("{e:#}")); Vec::new() } } }) .collect::>(); swap(&mut queue_next, &mut queue_prev); } Ok(()) } fn import_iter_inner( path: &Path, db: &Database, mut slugs: Vec, incremental: bool, ) -> Result)>> { if path.is_dir() { let mut o = Vec::new(); let child_slug = if path == CONF.media_path { "library".to_string() } else { path.file_name() .ok_or(anyhow!("parent no filename"))? .to_string_lossy() .to_string() }; slugs.push(child_slug); for e in path.read_dir()? { let path = e?.path(); o.push((path, slugs.clone())); } return Ok(o); } if path.is_file() { let meta = path.metadata()?; let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs(); if incremental { if let Some(last_mtime) = db.get_import_file_mtime(&path)? { if last_mtime >= mtime { return Ok(Vec::new()); } } } let (slug, parent_slug) = if slugs.len() > 2 { ( format!("{}-{}", slugs[slugs.len() - 2], slugs[slugs.len() - 1]), Some(format!( "{}-{}", slugs[slugs.len() - 3], slugs[slugs.len() - 2] )), ) } else if slugs.len() > 1 { ( format!("{}-{}", slugs[slugs.len() - 2], slugs[slugs.len() - 1]), Some(slugs[slugs.len() - 2].to_string()), ) } else { (slugs[0].to_string(), None) }; import_file(&db, &path, slug, parent_slug).context(anyhow!("{path:?}"))?; db.set_import_file_mtime(&path, mtime)?; } return Ok(Vec::new()); } fn import_file( db: &Database, path: &Path, slug: String, parent_slug: Option, ) -> Result<()> { let id = NodeID::from_slug(&slug); let parent_id = parent_slug.map(|e| NodeID::from_slug(&e)); let filename = path.file_name().unwrap().to_string_lossy(); match filename.as_ref() { "poster.jpeg" | "poster.webp" | "poster.png" => { db.update_node_init(id, |node| { node.slug = slug.to_string(); node.poster = Some(AssetInner::Media(path.to_owned()).ser()); Ok(()) })?; } "backdrop.jpeg" | "backdrop.webp" | "backdrop.png" => { db.update_node_init(id, |node| { node.slug = slug.to_string(); node.backdrop = Some(AssetInner::Media(path.to_owned()).ser()); Ok(()) })?; } "node.yaml" => { let raw = format!("slug: {slug}\n{}", read_to_string(path)?); let data = serde_yaml::from_str::(&raw)?; db.update_node_init(id, |node| { node.parents.extend(parent_id); node.slug = slug.to_string(); fn merge_option(a: &mut Option, b: Option) { if b.is_some() { *a = b; } } merge_option(&mut node.kind, data.kind); merge_option(&mut node.title, data.title); merge_option(&mut node.tagline, data.tagline); merge_option(&mut node.description, data.description); merge_option(&mut node.index, data.index); merge_option(&mut node.release_date, data.release_date); node.external_ids.extend(data.external_ids); Ok(()) })?; } "channel.info.json" => { let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?; db.update_node_init(id, |node| { node.parents.extend(parent_id); node.kind = Some(NodeKind::Channel); node.slug = slug.to_string(); let mut title = data.title.as_str(); title = title.strip_suffix(" - Videos").unwrap_or(title); title = title.strip_suffix(" - Topic").unwrap_or(title); title = title.strip_prefix("Uploads from ").unwrap_or(title); node.title = Some(title.to_owned()); if let Some(cid) = data.channel_id { node.external_ids.insert("youtube:channel".to_string(), cid); } if let Some(uid) = data.uploader_id { node.external_ids .insert("youtube:channel-name".to_string(), uid); } if let Some(desc) = data.description { node.description = Some(desc); } if let Some(followers) = data.channel_follower_count { node.ratings .insert(Rating::YoutubeFollowers, followers as f64); } Ok(()) })?; } _ => import_media_file(db, path, id).context("media file")?, } Ok(()) } fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> { let Some(m) = (*matroska_metadata(path)?).to_owned() else { return Ok(()); }; let info = m.info.ok_or(anyhow!("no info"))?; let tracks = m.tracks.ok_or(anyhow!("no tracks"))?; let mut tags = m .tags .map(|tags| { tags.tags .into_iter() .flat_map(|t| t.simple_tags) .map(|st| (st.name, st.string.unwrap_or_default())) .collect::>() }) .unwrap_or_default(); let filepath_stem = path .file_stem() .ok_or(anyhow!("no file stem"))? .to_string_lossy() .to_string(); let slug = m .infojson .as_ref() .map(|ij| format!("youtube-{}", ij.id)) .unwrap_or(make_kebab(&filepath_stem)); db.update_node_init(NodeID::from_slug(&slug), |node| { node.slug = slug; node.title = info.title; node.poster = m.cover.clone(); node.description = tags.remove("DESCRIPTION"); node.tagline = tags.remove("COMMENT"); node.parents.insert(parent); let tracks = tracks .entries .into_iter() .map(|track| SourceTrack { codec: track.codec_id, language: track.language, name: track.name.unwrap_or_default(), default_duration: track.default_duration, federated: Vec::new(), kind: if let Some(video) = track.video { SourceTrackKind::Video { width: video.pixel_width, height: video.pixel_height, display_width: video.display_width, display_height: video.display_height, display_unit: Some(video.display_unit), fps: video.frame_rate, } } else if let Some(audio) = track.audio { SourceTrackKind::Audio { channels: audio.channels as usize, sample_rate: audio.sampling_frequency, bit_depth: audio.bit_depth.map(|r| r as usize), } } else { SourceTrackKind::Subtitles }, source: TrackSource::Local(LocalTrack { codec_private: track.codec_private, path: path.to_owned(), track: track.track_number as usize, }), }) .collect::>(); if let Some(infojson) = m.infojson { node.kind = Some( if !tracks .iter() .any(|t| matches!(t.kind, SourceTrackKind::Video { .. })) { NodeKind::Music } else if infojson.duration.unwrap_or(0.) < 600. && infojson.aspect_ratio.unwrap_or(2.) < 1. { NodeKind::ShortFormVideo } else { NodeKind::Video }, ); node.title = Some(infojson.title); if let Some(desc) = infojson.description { node.description = Some(desc) } node.tagline = Some(infojson.webpage_url); if let Some(date) = &infojson.upload_date { node.release_date = Some(infojson::parse_upload_date(date).context("parsing upload date")?); } match infojson.extractor.as_str() { "youtube" => drop( node.external_ids .insert("youtube:video".to_string(), infojson.id), ), "Bandcamp" => drop( node.external_ids .insert("bandcamp".to_string(), infojson.id), ), _ => (), } node.ratings.insert( Rating::YoutubeViews, infojson.view_count.unwrap_or_default() as f64, ); if let Some(lc) = infojson.like_count { node.ratings.insert(Rating::YoutubeLikes, lc as f64); } } node.media = Some(MediaInfo { chapters: m .chapters .clone() .map(|c| { let mut chaps = Vec::new(); if let Some(ee) = c.edition_entries.first() { for ca in &ee.chapter_atoms { let mut labels = Vec::new(); for cd in &ca.displays { for lang in &cd.languages { labels.push((lang.to_owned(), cd.string.clone())) } } chaps.push(Chapter { labels, time_start: Some(ca.time_start as f64 * 1e-9), time_end: ca.time_end.map(|ts| ts as f64 * 1e-9), }) } } chaps }) .unwrap_or_default(), duration: (info.duration.unwrap_or_default() * info.timestamp_scale as f64) * 1e-9, tracks, }); Ok(()) })?; Ok(()) } fn make_kebab(i: &str) -> String { let mut o = String::with_capacity(i.len()); for c in i.chars() { o.extend(match c { 'A'..='Z' | 'a'..='z' | '0'..='9' | '_' | '-' => Some(c), ' ' => Some('-'), _ => None, }); } o }