aboutsummaryrefslogtreecommitdiff
path: root/import/src/lib.rs
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2025-12-10 16:21:38 +0100
committermetamuffin <metamuffin@disroot.org>2025-12-10 16:21:38 +0100
commita0cfd77b4d19c43a28c4d82072e6ff136e336af3 (patch)
tree05df9f5faa54cef0ae4136fffddea57fbbafee6b /import/src/lib.rs
parent242d5763d451eed2402be7afde50cd9fa0d6bc79 (diff)
downloadjellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar
jellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar.bz2
jellything-a0cfd77b4d19c43a28c4d82072e6ff136e336af3.tar.zst
refactor import plugins part 1
Diffstat (limited to 'import/src/lib.rs')
-rw-r--r--import/src/lib.rs473
1 files changed, 61 insertions, 412 deletions
diff --git a/import/src/lib.rs b/import/src/lib.rs
index e31127e..36c65d3 100644
--- a/import/src/lib.rs
+++ b/import/src/lib.rs
@@ -5,54 +5,47 @@
*/
#![feature(duration_constants)]
-pub mod acoustid;
-pub mod infojson;
-pub mod musicbrainz;
-pub mod tmdb;
-pub mod trakt;
-pub mod vgmdb;
-pub mod wikidata;
-pub mod wikimedia_commons;
+pub mod plugins;
-use jellydb::Database;
-
-use crate::{tmdb::TmdbKind, trakt::TraktKind};
-use acoustid::{acoustid_fingerprint, AcoustID};
-use anyhow::{anyhow, bail, Context, Result};
-use infojson::YVideo;
-use jellycache::{cache_memory, cache_read, cache_store, HashKey};
+use crate::plugins::{
+ acoustid::AcoustID,
+ infojson::is_info_json,
+ misc::is_cover,
+ musicbrainz::{self, MusicBrainz},
+ tmdb::{self, Tmdb, TmdbKind},
+ trakt::{Trakt, TraktKind},
+ vgmdb::Vgmdb,
+ wikidata::Wikidata,
+ wikimedia_commons::WikimediaCommons,
+};
+use anyhow::{Context, Result, anyhow};
+use jellycache::{HashKey, cache_memory, cache_store};
use jellycommon::{
- Appearance, Asset, Chapter, CreditCategory, IdentifierType, MediaInfo, Node, NodeID, NodeKind,
- PictureSlot, RatingType, SourceTrack, SourceTrackKind, TrackSource, Visibility,
+ Appearance, Asset, CreditCategory, IdentifierType, Node, NodeID, NodeKind, PictureSlot,
+ RatingType, Visibility,
};
+use jellydb::Database;
use jellyimport_fallback_generator::generate_fallback;
use jellyremuxer::{
demuxers::create_demuxer_autodetect,
- matroska::{self, Segment},
+ matroska::{self, AttachedFile, Segment},
};
use log::info;
-use musicbrainz::MusicBrainz;
-use rayon::iter::{ParallelBridge, ParallelIterator};
+use rayon::iter::{IntoParallelIterator, ParallelIterator};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::{
- collections::{BTreeMap, HashMap},
- fs::{read_to_string, File},
- io::{BufReader, Read},
+ collections::BTreeMap,
+ fs::{File, read_to_string},
path::{Path, PathBuf},
sync::{Arc, LazyLock, Mutex},
time::UNIX_EPOCH,
};
-use tmdb::Tmdb;
use tokio::{
runtime::Handle,
sync::{RwLock, Semaphore},
task::spawn_blocking,
};
-use trakt::Trakt;
-use vgmdb::Vgmdb;
-use wikidata::Wikidata;
-use wikimedia_commons::WikimediaCommons;
#[rustfmt::skip]
#[derive(Debug, Deserialize, Serialize, Default)]
@@ -89,6 +82,7 @@ pub const USER_AGENT: &str = concat!(
static IMPORT_SEM: LazyLock<Semaphore> = LazyLock::new(|| Semaphore::new(1));
pub static IMPORT_ERRORS: RwLock<Vec<String>> = RwLock::const_new(Vec::new());
+pub static IMPORT_PROGRESS: RwLock<Option<(usize, usize, String)>> = RwLock::const_new(None);
static RE_EPISODE_FILENAME: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"([sS](?<season>\d+))?([eE](?<episode>\d+))( (.+))?"#).unwrap());
@@ -117,7 +111,7 @@ pub fn get_trakt() -> Result<Trakt> {
}
pub async fn import_wrap(db: Database, incremental: bool) -> Result<()> {
- let _sem = IMPORT_SEM.try_acquire()?;
+ let _sem = IMPORT_SEM.try_acquire().context("already importing")?;
let jh = spawn_blocking(move || {
*IMPORT_ERRORS.blocking_write() = Vec::new();
@@ -144,11 +138,10 @@ fn import(db: &Database, incremental: bool) -> Result<()> {
let rthandle = Handle::current();
+ let mut files = Vec::new();
import_traverse(
&CONF.media_path,
db,
- &apis,
- &rthandle,
incremental,
NodeID::MIN,
"",
@@ -156,8 +149,17 @@ fn import(db: &Database, incremental: bool) -> Result<()> {
visibility: Visibility::Visible,
use_acoustid: false,
},
+ &mut files,
)?;
+ files.into_par_iter().for_each(|(path, parent, iflags)| {
+ import_file(db, &apis, &rthandle, &path, parent, iflags);
+ });
+
+ // let meta = path.metadata()?;
+ // let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs();
+ // db.set_import_file_mtime(path, mtime)?;
+
Ok(())
}
@@ -170,12 +172,11 @@ struct InheritedFlags {
fn import_traverse(
path: &Path,
db: &Database,
- apis: &Apis,
- rthandle: &Handle,
incremental: bool,
parent: NodeID,
parent_slug_fragment: &str,
mut iflags: InheritedFlags,
+ out: &mut Vec<(PathBuf, NodeID, InheritedFlags)>,
) -> Result<()> {
if path.is_dir() {
let slug_fragment = if path == CONF.media_path {
@@ -211,26 +212,18 @@ fn import_traverse(
Ok(())
})?;
- path.read_dir()?.par_bridge().try_for_each(|e| {
+ for e in path.read_dir()? {
let path = e?.path();
- if let Err(e) = import_traverse(
- &path,
- db,
- apis,
- rthandle,
- incremental,
- id,
- &slug_fragment,
- iflags,
- ) {
+ if let Err(e) = import_traverse(&path, db, incremental, id, &slug_fragment, iflags, out)
+ {
IMPORT_ERRORS
.blocking_write()
.push(format!("{path:?} import failed: {e:#}"));
}
- Ok::<_, anyhow::Error>(())
- })?;
+ }
return Ok(());
}
+
if path.is_file() {
let meta = path.metadata()?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs();
@@ -243,8 +236,7 @@ fn import_traverse(
}
}
- import_file(db, apis, rthandle, path, parent, iflags)?;
- db.set_import_file_mtime(path, mtime)?;
+ out.push((path.to_owned(), parent, iflags));
}
Ok(())
}
@@ -259,36 +251,6 @@ fn import_file(
) -> Result<()> {
let filename = path.file_name().unwrap().to_string_lossy();
match filename.as_ref() {
- "poster.jpeg" | "poster.webp" | "poster.png" => {
- info!("import poster at {path:?}");
- let asset = Asset(cache_store(
- format!("media/literal/{}-poster.image", HashKey(path)),
- || {
- let mut data = Vec::new();
- File::open(path)?.read_to_end(&mut data)?;
- Ok(data)
- },
- )?);
- db.update_node_init(parent, |node| {
- node.pictures.insert(PictureSlot::Cover, asset);
- Ok(())
- })?;
- }
- "backdrop.jpeg" | "backdrop.webp" | "backdrop.png" => {
- info!("import backdrop at {path:?}");
- let asset = Asset(cache_store(
- format!("media/literal/{}-poster.image", HashKey(path)),
- || {
- let mut data = Vec::new();
- File::open(path)?.read_to_end(&mut data)?;
- Ok(data)
- },
- )?);
- db.update_node_init(parent, |node| {
- node.pictures.insert(PictureSlot::Backdrop, asset);
- Ok(())
- })?;
- }
"node.yaml" => {
info!("import node info at {path:?}");
let data = serde_yaml::from_str::<Node>(&read_to_string(path)?)?;
@@ -330,29 +292,6 @@ fn import_file(
})?;
}
}
- "channel.info.json" => {
- info!("import channel info.json at {path:?}");
- let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?;
- db.update_node_init(parent, |node| {
- node.kind = NodeKind::Channel;
- node.title = Some(clean_uploader_name(&data.title).to_owned());
- if let Some(cid) = data.channel_id {
- node.identifiers.insert(IdentifierType::YoutubeChannel, cid);
- }
- if let Some(uid) = data.uploader_id {
- node.identifiers
- .insert(IdentifierType::YoutubeChannelHandle, uid);
- }
- if let Some(desc) = data.description {
- node.description = Some(desc);
- }
- if let Some(followers) = data.channel_follower_count {
- node.ratings
- .insert(RatingType::YoutubeFollowers, followers as f64);
- }
- Ok(())
- })?;
- }
_ => import_media_file(db, apis, rthandle, path, parent, iflags).context("media file")?,
}
@@ -376,7 +315,7 @@ pub fn read_media_metadata(path: &Path) -> Result<Arc<matroska::Segment>> {
// Replace data of useful attachments with cache key; delete data of all others
if let Some(attachments) = &mut attachments {
for att in &mut attachments.files {
- if let Some(fname) = attachment_types::is_useful_attachment(&att) {
+ if let Some(fname) = is_useful_attachment(&att) {
let key = cache_store(
format!("media/attachment/{}-{fname}", HashKey(path)),
|| Ok(att.data.clone()),
@@ -400,22 +339,11 @@ pub fn read_media_metadata(path: &Path) -> Result<Arc<matroska::Segment>> {
)
}
-mod attachment_types {
- use jellyremuxer::matroska::AttachedFile;
-
- pub fn is_useful_attachment(a: &AttachedFile) -> Option<&'static str> {
- match a {
- _ if is_info_json(&a) => Some("info.json"),
- _ if is_cover(&a) => Some("cover.image"),
- _ => None,
- }
- }
-
- pub fn is_info_json(a: &&AttachedFile) -> bool {
- a.name == "info.json" && a.media_type == "application/json"
- }
- pub fn is_cover(a: &&AttachedFile) -> bool {
- a.name.starts_with("cover") && a.media_type.starts_with("image/")
+pub fn is_useful_attachment(a: &AttachedFile) -> Option<&'static str> {
+ match a {
+ _ if is_info_json(&a) => Some("info.json"),
+ _ if is_cover(&a) => Some("cover.image"),
+ _ => None,
}
}
@@ -430,38 +358,6 @@ fn import_media_file(
info!("media file {path:?}");
let m = read_media_metadata(path)?;
- let infojson = m
- .attachments
- .iter()
- .flat_map(|a| &a.files)
- .find(attachment_types::is_info_json)
- .map(|att| {
- let data = cache_read(str::from_utf8(&att.data).unwrap())?
- .ok_or(anyhow!("info json cache missing"))?;
- anyhow::Ok(serde_json::from_slice::<infojson::YVideo>(&data)?)
- })
- .transpose()
- .context("infojson parsing")?;
-
- let cover = m
- .attachments
- .iter()
- .flat_map(|a| &a.files)
- .find(attachment_types::is_cover)
- .map(|att| Asset(att.data.clone().try_into().unwrap()));
-
- let mut tags = m
- .tags
- .first()
- .map(|tags| {
- tags.tags
- .iter()
- .flat_map(|t| t.simple_tags.clone())
- .map(|st| (st.name, st.string.unwrap_or_default()))
- .collect::<HashMap<_, _>>()
- })
- .unwrap_or_default();
-
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let mut episode_index = None;
@@ -480,215 +376,28 @@ fn import_media_file(
let mut filename_toks = filename.split(".");
let filepath_stem = filename_toks.next().unwrap();
- let slug = infojson
- .as_ref()
- // TODO maybe also include the slug after the primary "id" key
- .map(|ij| format!("{}-{}", ij.extractor.to_lowercase(), ij.id))
- .unwrap_or_else(|| {
- if let Some((s, e)) = episode_index {
- format!(
- "{}-s{s}e{e}",
- make_kebab(
- &path
- .parent()
- .unwrap()
- .file_name()
- .unwrap_or_default()
- .to_string_lossy()
- )
- )
- } else {
- make_kebab(filepath_stem)
- }
- });
+ let slug = if let Some((s, e)) = episode_index {
+ format!(
+ "{}-s{s}e{e}",
+ make_kebab(
+ &path
+ .parent()
+ .unwrap()
+ .file_name()
+ .unwrap_or_default()
+ .to_string_lossy()
+ )
+ )
+ } else {
+ make_kebab(filepath_stem)
+ };
let node = NodeID::from_slug(&slug);
- let meta = path.metadata()?;
- let mut eids = BTreeMap::<IdentifierType, String>::new();
-
- for (key, value) in &tags {
- match key.as_str() {
- "MUSICBRAINZ_TRACKID" => {
- eids.insert(IdentifierType::MusicbrainzRecording, value.to_owned())
- }
- "MUSICBRAINZ_ARTISTID" => {
- eids.insert(IdentifierType::MusicbrainzArtist, value.to_owned())
- }
- "MUSICBRAINZ_ALBUMID" => {
- eids.insert(IdentifierType::MusicbrainzRelease, value.to_owned())
- }
- "MUSICBRAINZ_ALBUMARTISTID" => {
- None //? ignore this?
- }
- "MUSICBRAINZ_RELEASEGROUPID" => {
- eids.insert(IdentifierType::MusicbrainzReleaseGroup, value.to_owned())
- }
- "ISRC" => eids.insert(IdentifierType::Isrc, value.to_owned()),
- "BARCODE" => eids.insert(IdentifierType::Barcode, value.to_owned()),
- _ => None,
- };
- }
-
- if iflags.use_acoustid {
- let fp = acoustid_fingerprint(path)?;
- if let Some((atid, mbid)) = apis
- .acoustid
- .as_ref()
- .ok_or(anyhow!("need acoustid"))?
- .get_atid_mbid(&fp, rthandle)?
- {
- eids.insert(IdentifierType::AcoustIdTrack, atid);
- eids.insert(IdentifierType::MusicbrainzRecording, mbid);
- };
- }
-
- let mbrec = eids.get(&IdentifierType::MusicbrainzRecording).cloned();
-
db.update_node_init(node, |node| {
node.slug = slug;
- node.title = m.info.title.clone().or(node.title.clone());
node.visibility = iflags.visibility;
-
- node.description = tags
- .remove("DESCRIPTION")
- .or(tags.remove("SYNOPSIS"))
- .or(node.description.clone());
- node.tagline = tags.remove("COMMENT").or(node.tagline.clone());
node.parents.insert(parent);
-
- node.identifiers.extend(eids);
-
- if let Some(cover) = cover {
- node.pictures.insert(PictureSlot::Cover, cover);
- }
-
- if let Some(ct) = tags.get("CONTENT_TYPE") {
- node.kind = match ct.to_lowercase().trim() {
- "movie" | "documentary" | "film" => NodeKind::Movie,
- "music" | "recording" => NodeKind::Music,
- _ => NodeKind::Unknown,
- }
- }
-
- let tracks = m
- .tracks
- .as_ref()
- .ok_or(anyhow!("no tracks"))?
- .entries
- .iter()
- .map(|track| SourceTrack {
- codec: track.codec_id.clone(),
- language: track.language.clone(),
- name: track.name.clone().unwrap_or_default(),
- federated: Vec::new(),
- kind: if let Some(video) = &track.video {
- SourceTrackKind::Video {
- width: video.pixel_width,
- height: video.pixel_height,
- fps: video.frame_rate,
- }
- } else if let Some(audio) = &track.audio {
- SourceTrackKind::Audio {
- channels: audio.channels as usize,
- sample_rate: audio.sampling_frequency,
- bit_depth: audio.bit_depth.map(|r| r as usize),
- }
- } else {
- SourceTrackKind::Subtitle
- },
- source: TrackSource::Local(path.to_owned(), track.track_number),
- })
- .collect::<Vec<_>>();
-
- if let Some(infojson) = infojson {
- node.kind = if !tracks
- .iter()
- .any(|t| matches!(t.kind, SourceTrackKind::Video { .. }))
- {
- NodeKind::Music
- } else if infojson.duration.unwrap_or(0.) < 600.
- && infojson.aspect_ratio.unwrap_or(2.) < 1.
- {
- NodeKind::ShortFormVideo
- } else {
- NodeKind::Video
- };
- node.title = Some(infojson.title);
- node.subtitle = if infojson.alt_title != node.title {
- infojson.alt_title
- } else {
- None
- }
- .or(infojson
- .uploader
- .as_ref()
- .map(|u| clean_uploader_name(u).to_owned()))
- .or(node.subtitle.clone());
-
- node.tags.extend(infojson.tags.unwrap_or_default());
-
- if let Some(desc) = infojson.description {
- node.description = Some(desc)
- }
- node.tagline = Some(infojson.webpage_url);
- if let Some(date) = &infojson.upload_date {
- node.release_date =
- Some(infojson::parse_upload_date(date).context("parsing upload date")?);
- }
- match infojson.extractor.as_str() {
- "youtube" => {
- node.identifiers
- .insert(IdentifierType::YoutubeVideo, infojson.id);
- node.ratings.insert(
- RatingType::YoutubeViews,
- infojson.view_count.unwrap_or_default() as f64,
- );
- if let Some(lc) = infojson.like_count {
- node.ratings.insert(RatingType::YoutubeLikes, lc as f64);
- }
- }
- "Bandcamp" => drop(
- node.identifiers
- .insert(IdentifierType::Bandcamp, infojson.id),
- ),
- _ => (),
- }
- }
-
- // TODO merge size
- node.storage_size = meta.len();
- // TODO merge tracks
- node.media = Some(MediaInfo {
- chapters: m
- .chapters
- .clone()
- .map(|c| {
- let mut chaps = Vec::new();
- if let Some(ee) = c.edition_entries.first() {
- for ca in &ee.chapter_atoms {
- let mut labels = Vec::new();
- for cd in &ca.displays {
- for lang in &cd.languages {
- labels.push((lang.to_owned(), cd.string.clone()))
- }
- }
- chaps.push(Chapter {
- labels,
- time_start: Some(ca.time_start as f64 * 1e-9),
- time_end: ca.time_end.map(|ts| ts as f64 * 1e-9),
- })
- }
- }
- chaps
- })
- .unwrap_or_default(),
- duration: fix_invalid_runtime(
- m.info.duration.unwrap_or_default() * m.info.timestamp_scale as f64 * 1e-9,
- ),
- tracks,
- });
-
Ok(())
})?;
@@ -741,9 +450,6 @@ fn import_media_file(
for tok in filename_toks {
apply_node_flag(db, rthandle, apis, node, tok)?;
}
- if let Some(mbid) = mbrec {
- apply_musicbrainz_recording(db, rthandle, apis, node, mbid)?;
- }
Ok(())
}
@@ -755,48 +461,6 @@ fn apply_node_flag(
node: NodeID,
flag: &str,
) -> Result<()> {
- if let Some(value) = flag.strip_prefix("trakt-").or(flag.strip_prefix("trakt=")) {
- let (kind, id) = value.split_once(":").unwrap_or(("", value));
- let kind = match kind {
- "movie" | "" => TraktKind::Movie,
- "show" => TraktKind::Show,
- "season" => TraktKind::Season,
- "episode" => TraktKind::Episode,
- _ => bail!("unknown trakt kind"),
- };
- apply_trakt_tmdb(db, rthandle, apis, node, kind, id)?;
- }
- if flag == "hidden" {
- db.update_node_init(node, |node| {
- node.visibility = node.visibility.min(Visibility::Hidden);
- Ok(())
- })?;
- }
- if flag == "reduced" {
- db.update_node_init(node, |node| {
- node.visibility = node.visibility.min(Visibility::Reduced);
- Ok(())
- })?;
- }
- if let Some(kind) = flag.strip_prefix("kind-").or(flag.strip_prefix("kind=")) {
- let kind = match kind {
- "movie" => NodeKind::Movie,
- "video" => NodeKind::Video,
- "music" => NodeKind::Music,
- "short_form_video" => NodeKind::ShortFormVideo,
- "collection" => NodeKind::Collection,
- "channel" => NodeKind::Channel,
- "show" => NodeKind::Show,
- "series" => NodeKind::Series,
- "season" => NodeKind::Season,
- "episode" => NodeKind::Episode,
- _ => bail!("unknown node kind"),
- };
- db.update_node_init(node, |node| {
- node.kind = kind;
- Ok(())
- })?;
- }
if let Some(mbid) = flag.strip_prefix("mbrec-").or(flag.strip_prefix("mbrec=")) {
apply_musicbrainz_recording(db, rthandle, apis, node, mbid.to_string())?
}
@@ -1016,18 +680,3 @@ fn make_kebab(i: &str) -> String {
}
o
}
-
-fn clean_uploader_name(mut s: &str) -> &str {
- s = s.strip_suffix(" - Videos").unwrap_or(s);
- s = s.strip_suffix(" - Topic").unwrap_or(s);
- s = s.strip_prefix("Uploads from ").unwrap_or(s);
- s
-}
-
-fn fix_invalid_runtime(d: f64) -> f64 {
- match d {
- // Broken durations found experimentally
- 359999.999 | 359999.000 | 86399.999 | 86399.99900000001 => 0.,
- x => x,
- }
-}