refactor import plugins part 1

author: metamuffin <metamuffin@disroot.org> 2025-12-10 16:21:38 +0100
committer: metamuffin <metamuffin@disroot.org> 2025-12-10 16:21:38 +0100
commit: a0cfd77b4d19c43a28c4d82072e6ff136e336af3 (patch)
tree: 05df9f5faa54cef0ae4136fffddea57fbbafee6b /import/src/lib.rs
parent: 242d5763d451eed2402be7afde50cd9fa0d6bc79 (diff)
download: jellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar
jellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar.bz2
jellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar.zst
1 files changed, 61 insertions, 412 deletions
diff --git a/import/src/lib.rs b/import/src/lib.rs
index e31127e..36c65d3 100644
--- a/import/src/lib.rs
+++ b/import/src/lib.rs
@@ -5,54 +5,47 @@
 */
 #![feature(duration_constants)]
 
-pub mod acoustid;
-pub mod infojson;
-pub mod musicbrainz;
-pub mod tmdb;
-pub mod trakt;
-pub mod vgmdb;
-pub mod wikidata;
-pub mod wikimedia_commons;
+pub mod plugins;
 
-use jellydb::Database;
-
-use crate::{tmdb::TmdbKind, trakt::TraktKind};
-use acoustid::{acoustid_fingerprint, AcoustID};
-use anyhow::{anyhow, bail, Context, Result};
-use infojson::YVideo;
-use jellycache::{cache_memory, cache_read, cache_store, HashKey};
+use crate::plugins::{
+    acoustid::AcoustID,
+    infojson::is_info_json,
+    misc::is_cover,
+    musicbrainz::{self, MusicBrainz},
+    tmdb::{self, Tmdb, TmdbKind},
+    trakt::{Trakt, TraktKind},
+    vgmdb::Vgmdb,
+    wikidata::Wikidata,
+    wikimedia_commons::WikimediaCommons,
+};
+use anyhow::{Context, Result, anyhow};
+use jellycache::{HashKey, cache_memory, cache_store};
 use jellycommon::{
-    Appearance, Asset, Chapter, CreditCategory, IdentifierType, MediaInfo, Node, NodeID, NodeKind,
-    PictureSlot, RatingType, SourceTrack, SourceTrackKind, TrackSource, Visibility,
+    Appearance, Asset, CreditCategory, IdentifierType, Node, NodeID, NodeKind, PictureSlot,
+    RatingType, Visibility,
 };
+use jellydb::Database;
 use jellyimport_fallback_generator::generate_fallback;
 use jellyremuxer::{
     demuxers::create_demuxer_autodetect,
-    matroska::{self, Segment},
+    matroska::{self, AttachedFile, Segment},
 };
 use log::info;
-use musicbrainz::MusicBrainz;
-use rayon::iter::{ParallelBridge, ParallelIterator};
+use rayon::iter::{IntoParallelIterator, ParallelIterator};
 use regex::Regex;
 use serde::{Deserialize, Serialize};
 use std::{
-    collections::{BTreeMap, HashMap},
-    fs::{read_to_string, File},
-    io::{BufReader, Read},
+    collections::BTreeMap,
+    fs::{File, read_to_string},
     path::{Path, PathBuf},
     sync::{Arc, LazyLock, Mutex},
     time::UNIX_EPOCH,
 };
-use tmdb::Tmdb;
 use tokio::{
     runtime::Handle,
     sync::{RwLock, Semaphore},
     task::spawn_blocking,
 };
-use trakt::Trakt;
-use vgmdb::Vgmdb;
-use wikidata::Wikidata;
-use wikimedia_commons::WikimediaCommons;
 
 #[rustfmt::skip]
 #[derive(Debug, Deserialize, Serialize, Default)]
@@ -89,6 +82,7 @@ pub const USER_AGENT: &str = concat!(
 
 static IMPORT_SEM: LazyLock<Semaphore> = LazyLock::new(|| Semaphore::new(1));
 pub static IMPORT_ERRORS: RwLock<Vec<String>> = RwLock::const_new(Vec::new());
+pub static IMPORT_PROGRESS: RwLock<Option<(usize, usize, String)>> = RwLock::const_new(None);
 
 static RE_EPISODE_FILENAME: LazyLock<Regex> =
     LazyLock::new(|| Regex::new(r#"([sS](?<season>\d+))?([eE](?<episode>\d+))( (.+))?"#).unwrap());
@@ -117,7 +111,7 @@ pub fn get_trakt() -> Result<Trakt> {
 }
 
 pub async fn import_wrap(db: Database, incremental: bool) -> Result<()> {
-    let _sem = IMPORT_SEM.try_acquire()?;
+    let _sem = IMPORT_SEM.try_acquire().context("already importing")?;
 
     let jh = spawn_blocking(move || {
         *IMPORT_ERRORS.blocking_write() = Vec::new();
@@ -144,11 +138,10 @@ fn import(db: &Database, incremental: bool) -> Result<()> {
 
     let rthandle = Handle::current();
 
+    let mut files = Vec::new();
     import_traverse(
         &CONF.media_path,
         db,
-        &apis,
-        &rthandle,
         incremental,
         NodeID::MIN,
         "",
@@ -156,8 +149,17 @@ fn import(db: &Database, incremental: bool) -> Result<()> {
             visibility: Visibility::Visible,
             use_acoustid: false,
         },
+        &mut files,
     )?;
 
+    files.into_par_iter().for_each(|(path, parent, iflags)| {
+        import_file(db, &apis, &rthandle, &path, parent, iflags);
+    });
+
+    // let meta = path.metadata()?;
+    //     let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs();
+    //     db.set_import_file_mtime(path, mtime)?;
+
     Ok(())
 }
 
@@ -170,12 +172,11 @@ struct InheritedFlags {
 fn import_traverse(
     path: &Path,
     db: &Database,
-    apis: &Apis,
-    rthandle: &Handle,
     incremental: bool,
     parent: NodeID,
     parent_slug_fragment: &str,
     mut iflags: InheritedFlags,
+    out: &mut Vec<(PathBuf, NodeID, InheritedFlags)>,
 ) -> Result<()> {
     if path.is_dir() {
         let slug_fragment = if path == CONF.media_path {
@@ -211,26 +212,18 @@ fn import_traverse(
             Ok(())
         })?;
 
-        path.read_dir()?.par_bridge().try_for_each(|e| {
+        for e in path.read_dir()? {
             let path = e?.path();
-            if let Err(e) = import_traverse(
-                &path,
-                db,
-                apis,
-                rthandle,
-                incremental,
-                id,
-                &slug_fragment,
-                iflags,
-            ) {
+            if let Err(e) = import_traverse(&path, db, incremental, id, &slug_fragment, iflags, out)
+            {
                 IMPORT_ERRORS
                     .blocking_write()
                     .push(format!("{path:?} import failed: {e:#}"));
             }
-            Ok::<_, anyhow::Error>(())
-        })?;
+        }
         return Ok(());
     }
+
     if path.is_file() {
         let meta = path.metadata()?;
         let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs();
@@ -243,8 +236,7 @@ fn import_traverse(
             }
         }
 
-        import_file(db, apis, rthandle, path, parent, iflags)?;
-        db.set_import_file_mtime(path, mtime)?;
+        out.push((path.to_owned(), parent, iflags));
     }
     Ok(())
 }
@@ -259,36 +251,6 @@ fn import_file(
 ) -> Result<()> {
     let filename = path.file_name().unwrap().to_string_lossy();
     match filename.as_ref() {
-        "poster.jpeg" | "poster.webp" | "poster.png" => {
-            info!("import poster at {path:?}");
-            let asset = Asset(cache_store(
-                format!("media/literal/{}-poster.image", HashKey(path)),
-                || {
-                    let mut data = Vec::new();
-                    File::open(path)?.read_to_end(&mut data)?;
-                    Ok(data)
-                },
-            )?);
-            db.update_node_init(parent, |node| {
-                node.pictures.insert(PictureSlot::Cover, asset);
-                Ok(())
-            })?;
-        }
-        "backdrop.jpeg" | "backdrop.webp" | "backdrop.png" => {
-            info!("import backdrop at {path:?}");
-            let asset = Asset(cache_store(
-                format!("media/literal/{}-poster.image", HashKey(path)),
-                || {
-                    let mut data = Vec::new();
-                    File::open(path)?.read_to_end(&mut data)?;
-                    Ok(data)
-                },
-            )?);
-            db.update_node_init(parent, |node| {
-                node.pictures.insert(PictureSlot::Backdrop, asset);
-                Ok(())
-            })?;
-        }
         "node.yaml" => {
             info!("import node info at {path:?}");
             let data = serde_yaml::from_str::<Node>(&read_to_string(path)?)?;
@@ -330,29 +292,6 @@ fn import_file(
                 })?;
             }
         }
-        "channel.info.json" => {
-            info!("import channel info.json at {path:?}");
-            let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?;
-            db.update_node_init(parent, |node| {
-                node.kind = NodeKind::Channel;
-                node.title = Some(clean_uploader_name(&data.title).to_owned());
-                if let Some(cid) = data.channel_id {
-                    node.identifiers.insert(IdentifierType::YoutubeChannel, cid);
-                }
-                if let Some(uid) = data.uploader_id {
-                    node.identifiers
-                        .insert(IdentifierType::YoutubeChannelHandle, uid);
-                }
-                if let Some(desc) = data.description {
-                    node.description = Some(desc);
-                }
-                if let Some(followers) = data.channel_follower_count {
-                    node.ratings
-                        .insert(RatingType::YoutubeFollowers, followers as f64);
-                }
-                Ok(())
-            })?;
-        }
         _ => import_media_file(db, apis, rthandle, path, parent, iflags).context("media file")?,
     }
 
@@ -376,7 +315,7 @@ pub fn read_media_metadata(path: &Path) -> Result<Arc<matroska::Segment>> {
             // Replace data of useful attachments with cache key; delete data of all others
             if let Some(attachments) = &mut attachments {
                 for att in &mut attachments.files {
-                    if let Some(fname) = attachment_types::is_useful_attachment(&att) {
+                    if let Some(fname) = is_useful_attachment(&att) {
                         let key = cache_store(
                             format!("media/attachment/{}-{fname}", HashKey(path)),
                             || Ok(att.data.clone()),
@@ -400,22 +339,11 @@ pub fn read_media_metadata(path: &Path) -> Result<Arc<matroska::Segment>> {
     )
 }
 
-mod attachment_types {
-    use jellyremuxer::matroska::AttachedFile;
-
-    pub fn is_useful_attachment(a: &AttachedFile) -> Option<&'static str> {
-        match a {
-            _ if is_info_json(&a) => Some("info.json"),
-            _ if is_cover(&a) => Some("cover.image"),
-            _ => None,
-        }
-    }
-
-    pub fn is_info_json(a: &&AttachedFile) -> bool {
-        a.name == "info.json" && a.media_type == "application/json"
-    }
-    pub fn is_cover(a: &&AttachedFile) -> bool {
-        a.name.starts_with("cover") && a.media_type.starts_with("image/")
+pub fn is_useful_attachment(a: &AttachedFile) -> Option<&'static str> {
+    match a {
+        _ if is_info_json(&a) => Some("info.json"),
+        _ if is_cover(&a) => Some("cover.image"),
+        _ => None,
     }
 }
 
@@ -430,38 +358,6 @@ fn import_media_file(
     info!("media file {path:?}");
     let m = read_media_metadata(path)?;
 
-    let infojson = m
-        .attachments
-        .iter()
-        .flat_map(|a| &a.files)
-        .find(attachment_types::is_info_json)
-        .map(|att| {
-            let data = cache_read(str::from_utf8(&att.data).unwrap())?
-                .ok_or(anyhow!("info json cache missing"))?;
-            anyhow::Ok(serde_json::from_slice::<infojson::YVideo>(&data)?)
-        })
-        .transpose()
-        .context("infojson parsing")?;
-
-    let cover = m
-        .attachments
-        .iter()
-        .flat_map(|a| &a.files)
-        .find(attachment_types::is_cover)
-        .map(|att| Asset(att.data.clone().try_into().unwrap()));
-
-    let mut tags = m
-        .tags
-        .first()
-        .map(|tags| {
-            tags.tags
-                .iter()
-                .flat_map(|t| t.simple_tags.clone())
-                .map(|st| (st.name, st.string.unwrap_or_default()))
-                .collect::<HashMap<_, _>>()
-        })
-        .unwrap_or_default();
-
     let filename = path.file_name().unwrap().to_string_lossy().to_string();
 
     let mut episode_index = None;
@@ -480,215 +376,28 @@ fn import_media_file(
     let mut filename_toks = filename.split(".");
     let filepath_stem = filename_toks.next().unwrap();
 
-    let slug = infojson
-        .as_ref()
-        // TODO maybe also include the slug after the primary "id" key
-        .map(|ij| format!("{}-{}", ij.extractor.to_lowercase(), ij.id))
-        .unwrap_or_else(|| {
-            if let Some((s, e)) = episode_index {
-                format!(
-                    "{}-s{s}e{e}",
-                    make_kebab(
-                        &path
-                            .parent()
-                            .unwrap()
-                            .file_name()
-                            .unwrap_or_default()
-                            .to_string_lossy()
-                    )
-                )
-            } else {
-                make_kebab(filepath_stem)
-            }
-        });
+    let slug = if let Some((s, e)) = episode_index {
+        format!(
+            "{}-s{s}e{e}",
+            make_kebab(
+                &path
+                    .parent()
+                    .unwrap()
+                    .file_name()
+                    .unwrap_or_default()
+                    .to_string_lossy()
+            )
+        )
+    } else {
+        make_kebab(filepath_stem)
+    };
 
     let node = NodeID::from_slug(&slug);
 
-    let meta = path.metadata()?;
-    let mut eids = BTreeMap::<IdentifierType, String>::new();
-
-    for (key, value) in &tags {
-        match key.as_str() {
-            "MUSICBRAINZ_TRACKID" => {
-                eids.insert(IdentifierType::MusicbrainzRecording, value.to_owned())
-            }
-            "MUSICBRAINZ_ARTISTID" => {
-                eids.insert(IdentifierType::MusicbrainzArtist, value.to_owned())
-            }
-            "MUSICBRAINZ_ALBUMID" => {
-                eids.insert(IdentifierType::MusicbrainzRelease, value.to_owned())
-            }
-            "MUSICBRAINZ_ALBUMARTISTID" => {
-                None //? ignore this?
-            }
-            "MUSICBRAINZ_RELEASEGROUPID" => {
-                eids.insert(IdentifierType::MusicbrainzReleaseGroup, value.to_owned())
-            }
-            "ISRC" => eids.insert(IdentifierType::Isrc, value.to_owned()),
-            "BARCODE" => eids.insert(IdentifierType::Barcode, value.to_owned()),
-            _ => None,
-        };
-    }
-
-    if iflags.use_acoustid {
-        let fp = acoustid_fingerprint(path)?;
-        if let Some((atid, mbid)) = apis
-            .acoustid
-            .as_ref()
-            .ok_or(anyhow!("need acoustid"))?
-            .get_atid_mbid(&fp, rthandle)?
-        {
-            eids.insert(IdentifierType::AcoustIdTrack, atid);
-            eids.insert(IdentifierType::MusicbrainzRecording, mbid);
-        };
-    }
-
-    let mbrec = eids.get(&IdentifierType::MusicbrainzRecording).cloned();
-
     db.update_node_init(node, |node| {
         node.slug = slug;
-        node.title = m.info.title.clone().or(node.title.clone());
         node.visibility = iflags.visibility;
-
-        node.description = tags
-            .remove("DESCRIPTION")
-            .or(tags.remove("SYNOPSIS"))
-            .or(node.description.clone());
-        node.tagline = tags.remove("COMMENT").or(node.tagline.clone());
         node.parents.insert(parent);
-
-        node.identifiers.extend(eids);
-
-        if let Some(cover) = cover {
-            node.pictures.insert(PictureSlot::Cover, cover);
-        }
-
-        if let Some(ct) = tags.get("CONTENT_TYPE") {
-            node.kind = match ct.to_lowercase().trim() {
-                "movie" | "documentary" | "film" => NodeKind::Movie,
-                "music" | "recording" => NodeKind::Music,
-                _ => NodeKind::Unknown,
-            }
-        }
-
-        let tracks = m
-            .tracks
-            .as_ref()
-            .ok_or(anyhow!("no tracks"))?
-            .entries
-            .iter()
-            .map(|track| SourceTrack {
-                codec: track.codec_id.clone(),
-                language: track.language.clone(),
-                name: track.name.clone().unwrap_or_default(),
-                federated: Vec::new(),
-                kind: if let Some(video) = &track.video {
-                    SourceTrackKind::Video {
-                        width: video.pixel_width,
-                        height: video.pixel_height,
-                        fps: video.frame_rate,
-                    }
-                } else if let Some(audio) = &track.audio {
-                    SourceTrackKind::Audio {
-                        channels: audio.channels as usize,
-                        sample_rate: audio.sampling_frequency,
-                        bit_depth: audio.bit_depth.map(|r| r as usize),
-                    }
-                } else {
-                    SourceTrackKind::Subtitle
-                },
-                source: TrackSource::Local(path.to_owned(), track.track_number),
-            })
-            .collect::<Vec<_>>();
-
-        if let Some(infojson) = infojson {
-            node.kind = if !tracks
-                .iter()
-                .any(|t| matches!(t.kind, SourceTrackKind::Video { .. }))
-            {
-                NodeKind::Music
-            } else if infojson.duration.unwrap_or(0.) < 600.
-                && infojson.aspect_ratio.unwrap_or(2.) < 1.
-            {
-                NodeKind::ShortFormVideo
-            } else {
-                NodeKind::Video
-            };
-            node.title = Some(infojson.title);
-            node.subtitle = if infojson.alt_title != node.title {
-                infojson.alt_title
-            } else {
-                None
-            }
-            .or(infojson
-                .uploader
-                .as_ref()
-                .map(|u| clean_uploader_name(u).to_owned()))
-            .or(node.subtitle.clone());
-
-            node.tags.extend(infojson.tags.unwrap_or_default());
-
-            if let Some(desc) = infojson.description {
-                node.description = Some(desc)
-            }
-            node.tagline = Some(infojson.webpage_url);
-            if let Some(date) = &infojson.upload_date {
-                node.release_date =
-                    Some(infojson::parse_upload_date(date).context("parsing upload date")?);
-            }
-            match infojson.extractor.as_str() {
-                "youtube" => {
-                    node.identifiers
-                        .insert(IdentifierType::YoutubeVideo, infojson.id);
-                    node.ratings.insert(
-                        RatingType::YoutubeViews,
-                        infojson.view_count.unwrap_or_default() as f64,
-                    );
-                    if let Some(lc) = infojson.like_count {
-                        node.ratings.insert(RatingType::YoutubeLikes, lc as f64);
-                    }
-                }
-                "Bandcamp" => drop(
-                    node.identifiers
-                        .insert(IdentifierType::Bandcamp, infojson.id),
-                ),
-                _ => (),
-            }
-        }
-
-        // TODO merge size
-        node.storage_size = meta.len();
-        // TODO merge tracks
-        node.media = Some(MediaInfo {
-            chapters: m
-                .chapters
-                .clone()
-                .map(|c| {
-                    let mut chaps = Vec::new();
-                    if let Some(ee) = c.edition_entries.first() {
-                        for ca in &ee.chapter_atoms {
-                            let mut labels = Vec::new();
-                            for cd in &ca.displays {
-                                for lang in &cd.languages {
-                                    labels.push((lang.to_owned(), cd.string.clone()))
-                                }
-                            }
-                            chaps.push(Chapter {
-                                labels,
-                                time_start: Some(ca.time_start as f64 * 1e-9),
-                                time_end: ca.time_end.map(|ts| ts as f64 * 1e-9),
-                            })
-                        }
-                    }
-                    chaps
-                })
-                .unwrap_or_default(),
-            duration: fix_invalid_runtime(
-                m.info.duration.unwrap_or_default() * m.info.timestamp_scale as f64 * 1e-9,
-            ),
-            tracks,
-        });
-
         Ok(())
     })?;
 
@@ -741,9 +450,6 @@ fn import_media_file(
     for tok in filename_toks {
         apply_node_flag(db, rthandle, apis, node, tok)?;
     }
-    if let Some(mbid) = mbrec {
-        apply_musicbrainz_recording(db, rthandle, apis, node, mbid)?;
-    }
 
     Ok(())
 }
@@ -755,48 +461,6 @@ fn apply_node_flag(
     node: NodeID,
     flag: &str,
 ) -> Result<()> {
-    if let Some(value) = flag.strip_prefix("trakt-").or(flag.strip_prefix("trakt=")) {
-        let (kind, id) = value.split_once(":").unwrap_or(("", value));
-        let kind = match kind {
-            "movie" | "" => TraktKind::Movie,
-            "show" => TraktKind::Show,
-            "season" => TraktKind::Season,
-            "episode" => TraktKind::Episode,
-            _ => bail!("unknown trakt kind"),
-        };
-        apply_trakt_tmdb(db, rthandle, apis, node, kind, id)?;
-    }
-    if flag == "hidden" {
-        db.update_node_init(node, |node| {
-            node.visibility = node.visibility.min(Visibility::Hidden);
-            Ok(())
-        })?;
-    }
-    if flag == "reduced" {
-        db.update_node_init(node, |node| {
-            node.visibility = node.visibility.min(Visibility::Reduced);
-            Ok(())
-        })?;
-    }
-    if let Some(kind) = flag.strip_prefix("kind-").or(flag.strip_prefix("kind=")) {
-        let kind = match kind {
-            "movie" => NodeKind::Movie,
-            "video" => NodeKind::Video,
-            "music" => NodeKind::Music,
-            "short_form_video" => NodeKind::ShortFormVideo,
-            "collection" => NodeKind::Collection,
-            "channel" => NodeKind::Channel,
-            "show" => NodeKind::Show,
-            "series" => NodeKind::Series,
-            "season" => NodeKind::Season,
-            "episode" => NodeKind::Episode,
-            _ => bail!("unknown node kind"),
-        };
-        db.update_node_init(node, |node| {
-            node.kind = kind;
-            Ok(())
-        })?;
-    }
     if let Some(mbid) = flag.strip_prefix("mbrec-").or(flag.strip_prefix("mbrec=")) {
         apply_musicbrainz_recording(db, rthandle, apis, node, mbid.to_string())?
     }
@@ -1016,18 +680,3 @@ fn make_kebab(i: &str) -> String {
     }
     o
 }
-
-fn clean_uploader_name(mut s: &str) -> &str {
-    s = s.strip_suffix(" - Videos").unwrap_or(s);
-    s = s.strip_suffix(" - Topic").unwrap_or(s);
-    s = s.strip_prefix("Uploads from ").unwrap_or(s);
-    s
-}
-
-fn fix_invalid_runtime(d: f64) -> f64 {
-    match d {
-        // Broken durations found experimentally
-        359999.999 | 359999.000 | 86399.999 | 86399.99900000001 => 0.,
-        x => x,
-    }
-}
author	metamuffin <metamuffin@disroot.org>	2025-12-10 16:21:38 +0100
committer	metamuffin <metamuffin@disroot.org>	2025-12-10 16:21:38 +0100
commit	a0cfd77b4d19c43a28c4d82072e6ff136e336af3 (patch)
tree	05df9f5faa54cef0ae4136fffddea57fbbafee6b /import/src/lib.rs
parent	242d5763d451eed2402be7afde50cd9fa0d6bc79 (diff)
download	jellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar jellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar.bz2 jellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar.zst