diff options
Diffstat (limited to 'import/src/lib.rs')
-rw-r--r-- | import/src/lib.rs | 629 |
1 files changed, 331 insertions, 298 deletions
diff --git a/import/src/lib.rs b/import/src/lib.rs index d62de57..4a6d87b 100644 --- a/import/src/lib.rs +++ b/import/src/lib.rs @@ -21,13 +21,16 @@ use jellyimport_asset_token::AssetInner; use acoustid::{acoustid_fingerprint, AcoustID}; use anyhow::{anyhow, bail, Context, Result}; use infojson::YVideo; -use jellycache::cache_file; +use jellycache::{cache_file, cache_memory}; use jellycommon::{ Appearance, Chapter, LocalTrack, MediaInfo, Node, NodeID, NodeKind, ObjectIds, PeopleGroup, Person, Rating, SourceTrack, SourceTrackKind, TmdbKind, TrackSource, TraktKind, Visibility, }; use jellyimport_fallback_generator::generate_fallback; -use jellyremuxer::magic::detect_container_format; +use jellyremuxer::{ + demuxers::create_demuxer_autodetect, + matroska::{self, Segment}, +}; use log::info; use musicbrainz::MusicBrainz; use rayon::iter::{ParallelBridge, ParallelIterator}; @@ -36,9 +39,9 @@ use serde::{Deserialize, Serialize}; use std::{ collections::{BTreeMap, HashMap}, fs::{read_to_string, File}, - io::BufReader, + io::{BufReader, Write}, path::{Path, PathBuf}, - sync::{LazyLock, Mutex}, + sync::{Arc, LazyLock, Mutex}, time::UNIX_EPOCH, }; use tmdb::Tmdb; @@ -342,6 +345,28 @@ fn import_file( Ok(()) } +pub fn read_media_metadata(path: &Path) -> Result<Arc<matroska::Segment>> { + cache_memory("mkmeta-v4", path, move || { + let media = File::open(path)?; + let mut media = + create_demuxer_autodetect(Box::new(media))?.ok_or(anyhow!("media format unknown"))?; + + let info = media.info()?; + let tracks = media.tracks()?; + let tags = media.tags()?; + let attachments = media.attachments()?; + let chapters = media.chapters()?; + Ok(Segment { + info, + tracks, + tags: tags.into_iter().collect(), + attachments, + chapters, + ..Default::default() + }) + }) +} + fn import_media_file( db: &Database, apis: &Apis, @@ -351,323 +376,331 @@ fn import_media_file( iflags: InheritedFlags, ) -> Result<()> { info!("media file {path:?}"); - let mut file = File::open(path)?; - let Some(container) = detect_container_format(&mut file)? else { - return Ok(()); - }; - eprintln!("{container:?}"); + let m = read_media_metadata(path)?; - // let infojson = m - // .infojson - // .map(|d| serde_json::from_slice::<infojson::YVideo>(&d)) - // .transpose() - // .context("infojson parsing")?; + let infojson = m + .attachments + .iter() + .flat_map(|a| &a.files) + .find(|a| a.name == "info.json" && a.media_type == "application/json") + .map(|d| serde_json::from_slice::<infojson::YVideo>(&d.data)) + .transpose() + .context("infojson parsing")?; - // let info = m.info.ok_or(anyhow!("no info"))?; - // let tracks = m.tracks.ok_or(anyhow!("no tracks"))?; + let cover = m + .attachments + .iter() + .flat_map(|a| &a.files) + .find(|a| a.name.starts_with("cover") && a.media_type.starts_with("image/")) + .map(|att| { + cache_file("att-cover-v2", path, move |mut file| { + file.write_all(&att.data)?; + Ok(()) + }) + }) + .transpose()?; - // let mut tags = m - // .tags - // .map(|tags| { - // tags.tags - // .into_iter() - // .flat_map(|t| t.simple_tags) - // .map(|st| (st.name, st.string.unwrap_or_default())) - // .collect::<HashMap<_, _>>() - // }) - // .unwrap_or_default(); + let mut tags = m + .tags + .get(0) + .map(|tags| { + tags.tags + .iter() + .flat_map(|t| t.simple_tags.clone()) + .map(|st| (st.name, st.string.unwrap_or_default())) + .collect::<HashMap<_, _>>() + }) + .unwrap_or_default(); - // let filename = path.file_name().unwrap().to_string_lossy().to_string(); + let filename = path.file_name().unwrap().to_string_lossy().to_string(); - // let mut episode_index = None; - // if let Some(cap) = RE_EPISODE_FILENAME.captures(&filename) { - // if let Some(episode) = cap.name("episode").map(|m| m.as_str()) { - // let season = cap.name("season").map(|m| m.as_str()); - // let episode = episode.parse::<usize>().context("parse episode num")?; - // let season = season - // .unwrap_or("1") - // .parse::<usize>() - // .context("parse season num")?; - // episode_index = Some((season, episode)) - // } - // } + let mut episode_index = None; + if let Some(cap) = RE_EPISODE_FILENAME.captures(&filename) { + if let Some(episode) = cap.name("episode").map(|m| m.as_str()) { + let season = cap.name("season").map(|m| m.as_str()); + let episode = episode.parse::<usize>().context("parse episode num")?; + let season = season + .unwrap_or("1") + .parse::<usize>() + .context("parse season num")?; + episode_index = Some((season, episode)) + } + } - // let mut filename_toks = filename.split("."); - // let filepath_stem = filename_toks.next().unwrap(); + let mut filename_toks = filename.split("."); + let filepath_stem = filename_toks.next().unwrap(); - // let slug = infojson - // .as_ref() - // // TODO maybe also include the slug after the primary "id" key - // .map(|ij| format!("{}-{}", ij.extractor.to_lowercase(), ij.id)) - // .unwrap_or_else(|| { - // if let Some((s, e)) = episode_index { - // format!( - // "{}-s{s}e{e}", - // make_kebab( - // &path - // .parent() - // .unwrap() - // .file_name() - // .unwrap_or_default() - // .to_string_lossy() - // ) - // ) - // } else { - // make_kebab(filepath_stem) - // } - // }); + let slug = infojson + .as_ref() + // TODO maybe also include the slug after the primary "id" key + .map(|ij| format!("{}-{}", ij.extractor.to_lowercase(), ij.id)) + .unwrap_or_else(|| { + if let Some((s, e)) = episode_index { + format!( + "{}-s{s}e{e}", + make_kebab( + &path + .parent() + .unwrap() + .file_name() + .unwrap_or_default() + .to_string_lossy() + ) + ) + } else { + make_kebab(filepath_stem) + } + }); - // let node = NodeID::from_slug(&slug); + let node = NodeID::from_slug(&slug); - // let meta = path.metadata()?; - // let mut eids = BTreeMap::new(); + let meta = path.metadata()?; + let mut eids = BTreeMap::new(); - // for (key, value) in &tags { - // match key.as_str() { - // "MUSICBRAINZ_TRACKID" => { - // eids.insert("musicbrainz.recording".to_string(), value.to_owned()) - // } - // "MUSICBRAINZ_ARTISTID" => { - // eids.insert("musicbrainz.artist".to_string(), value.to_owned()) - // } - // "MUSICBRAINZ_ALBUMID" => { - // eids.insert("musicbrainz.release".to_string(), value.to_owned()) - // } - // "MUSICBRAINZ_ALBUMARTISTID" => { - // eids.insert("musicbrainz.albumartist".to_string(), value.to_owned()) - // } - // "MUSICBRAINZ_RELEASEGROUPID" => { - // eids.insert("musicbrainz.releasegroup".to_string(), value.to_owned()) - // } - // "ISRC" => eids.insert("isrc".to_string(), value.to_owned()), - // "BARCODE" => eids.insert("barcode".to_string(), value.to_owned()), - // _ => None, - // }; - // } + for (key, value) in &tags { + match key.as_str() { + "MUSICBRAINZ_TRACKID" => { + eids.insert("musicbrainz.recording".to_string(), value.to_owned()) + } + "MUSICBRAINZ_ARTISTID" => { + eids.insert("musicbrainz.artist".to_string(), value.to_owned()) + } + "MUSICBRAINZ_ALBUMID" => { + eids.insert("musicbrainz.release".to_string(), value.to_owned()) + } + "MUSICBRAINZ_ALBUMARTISTID" => { + eids.insert("musicbrainz.albumartist".to_string(), value.to_owned()) + } + "MUSICBRAINZ_RELEASEGROUPID" => { + eids.insert("musicbrainz.releasegroup".to_string(), value.to_owned()) + } + "ISRC" => eids.insert("isrc".to_string(), value.to_owned()), + "BARCODE" => eids.insert("barcode".to_string(), value.to_owned()), + _ => None, + }; + } - // if iflags.use_acoustid { - // let fp = rthandle.block_on(acoustid_fingerprint(path))?; - // if let Some((atid, mbid)) = rthandle.block_on( - // apis.acoustid - // .as_ref() - // .ok_or(anyhow!("need acoustid"))? - // .get_atid_mbid(&fp), - // )? { - // eids.insert("acoustid.track".to_string(), atid); - // eids.insert("musicbrainz.recording".to_string(), mbid); - // }; - // } + if iflags.use_acoustid { + let fp = rthandle.block_on(acoustid_fingerprint(path))?; + if let Some((atid, mbid)) = rthandle.block_on( + apis.acoustid + .as_ref() + .ok_or(anyhow!("need acoustid"))? + .get_atid_mbid(&fp), + )? { + eids.insert("acoustid.track".to_string(), atid); + eids.insert("musicbrainz.recording".to_string(), mbid); + }; + } - // let mbrec = eids.get("musicbrainz.recording").cloned(); + let mbrec = eids.get("musicbrainz.recording").cloned(); - // db.update_node_init(node, |node| { - // node.slug = slug; - // node.title = info.title.or(node.title.clone()); - // node.visibility = iflags.visibility; - // node.poster = m - // .cover - // .map(|a| AssetInner::Cache(a).ser()) - // .or(node.poster.clone()); - // node.description = tags - // .remove("DESCRIPTION") - // .or(tags.remove("SYNOPSIS")) - // .or(node.description.clone()); - // node.tagline = tags.remove("COMMENT").or(node.tagline.clone()); - // node.parents.insert(parent); + db.update_node_init(node, |node| { + node.slug = slug; + node.title = m.info.title.clone().or(node.title.clone()); + node.visibility = iflags.visibility; + node.poster = cover + .map(|a| AssetInner::Cache(a).ser()) + .or(node.poster.clone()); + node.description = tags + .remove("DESCRIPTION") + .or(tags.remove("SYNOPSIS")) + .or(node.description.clone()); + node.tagline = tags.remove("COMMENT").or(node.tagline.clone()); + node.parents.insert(parent); - // node.external_ids.extend(eids); + node.external_ids.extend(eids); - // if let Some(ct) = tags.get("CONTENT_TYPE") { - // node.kind = match ct.to_lowercase().trim() { - // "movie" | "documentary" | "film" => NodeKind::Movie, - // "music" | "recording" => NodeKind::Music, - // _ => NodeKind::Unknown, - // } - // } + if let Some(ct) = tags.get("CONTENT_TYPE") { + node.kind = match ct.to_lowercase().trim() { + "movie" | "documentary" | "film" => NodeKind::Movie, + "music" | "recording" => NodeKind::Music, + _ => NodeKind::Unknown, + } + } - // let tracks = tracks - // .entries - // .into_iter() - // .map(|track| SourceTrack { - // codec: track.codec_id, - // language: track.language, - // name: track.name.unwrap_or_default(), - // default_duration: track.default_duration, - // federated: Vec::new(), - // codec_delay: track.codec_delay, - // seek_pre_roll: track.seek_pre_roll, - // flag_lacing: track.flag_lacing, - // kind: if let Some(video) = track.video { - // SourceTrackKind::Video { - // width: video.pixel_width, - // height: video.pixel_height, - // display_width: video.display_width, - // display_height: video.display_height, - // display_unit: video.display_unit, - // fps: video.frame_rate, - // } - // } else if let Some(audio) = track.audio { - // SourceTrackKind::Audio { - // channels: audio.channels as usize, - // sample_rate: audio.sampling_frequency, - // bit_depth: audio.bit_depth.map(|r| r as usize), - // } - // } else { - // SourceTrackKind::Subtitle - // }, - // source: TrackSource::Local( - // AssetInner::LocalTrack(LocalTrack { - // path: path.to_owned(), - // track: track.track_number as usize, - // }) - // .ser(), - // ), - // }) - // .collect::<Vec<_>>(); + let tracks = m + .tracks + .as_ref() + .ok_or(anyhow!("no tracks"))? + .entries + .iter() + .map(|track| SourceTrack { + codec: track.codec_id.clone(), + language: track.language.clone(), + name: track.name.clone().unwrap_or_default(), + federated: Vec::new(), + kind: if let Some(video) = &track.video { + SourceTrackKind::Video { + width: video.pixel_width, + height: video.pixel_height, + display_width: video.display_width, + display_height: video.display_height, + display_unit: video.display_unit, + fps: video.frame_rate, + } + } else if let Some(audio) = &track.audio { + SourceTrackKind::Audio { + channels: audio.channels as usize, + sample_rate: audio.sampling_frequency, + bit_depth: audio.bit_depth.map(|r| r as usize), + } + } else { + SourceTrackKind::Subtitle + }, + source: TrackSource::Local( + AssetInner::LocalTrack(LocalTrack { + path: path.to_owned(), + track: track.track_number as usize, + }) + .ser(), + ), + }) + .collect::<Vec<_>>(); - // if let Some(infojson) = infojson { - // node.kind = if !tracks - // .iter() - // .any(|t| matches!(t.kind, SourceTrackKind::Video { .. })) - // { - // NodeKind::Music - // } else if infojson.duration.unwrap_or(0.) < 600. - // && infojson.aspect_ratio.unwrap_or(2.) < 1. - // { - // NodeKind::ShortFormVideo - // } else { - // NodeKind::Video - // }; - // node.title = Some(infojson.title); - // node.subtitle = if infojson.alt_title != node.title { - // infojson.alt_title - // } else { - // None - // } - // .or(infojson - // .uploader - // .as_ref() - // .map(|u| clean_uploader_name(u).to_owned())) - // .or(node.subtitle.clone()); + if let Some(infojson) = infojson { + node.kind = if !tracks + .iter() + .any(|t| matches!(t.kind, SourceTrackKind::Video { .. })) + { + NodeKind::Music + } else if infojson.duration.unwrap_or(0.) < 600. + && infojson.aspect_ratio.unwrap_or(2.) < 1. + { + NodeKind::ShortFormVideo + } else { + NodeKind::Video + }; + node.title = Some(infojson.title); + node.subtitle = if infojson.alt_title != node.title { + infojson.alt_title + } else { + None + } + .or(infojson + .uploader + .as_ref() + .map(|u| clean_uploader_name(u).to_owned())) + .or(node.subtitle.clone()); - // node.tags.extend(infojson.tags.unwrap_or_default()); + node.tags.extend(infojson.tags.unwrap_or_default()); - // if let Some(desc) = infojson.description { - // node.description = Some(desc) - // } - // node.tagline = Some(infojson.webpage_url); - // if let Some(date) = &infojson.upload_date { - // node.release_date = - // Some(infojson::parse_upload_date(date).context("parsing upload date")?); - // } - // match infojson.extractor.as_str() { - // "youtube" => { - // node.external_ids - // .insert("youtube.video".to_string(), infojson.id); - // node.ratings.insert( - // Rating::YoutubeViews, - // infojson.view_count.unwrap_or_default() as f64, - // ); - // if let Some(lc) = infojson.like_count { - // node.ratings.insert(Rating::YoutubeLikes, lc as f64); - // } - // } - // "Bandcamp" => drop( - // node.external_ids - // .insert("bandcamp".to_string(), infojson.id), - // ), - // _ => (), - // } - // } + if let Some(desc) = infojson.description { + node.description = Some(desc) + } + node.tagline = Some(infojson.webpage_url); + if let Some(date) = &infojson.upload_date { + node.release_date = + Some(infojson::parse_upload_date(date).context("parsing upload date")?); + } + match infojson.extractor.as_str() { + "youtube" => { + node.external_ids + .insert("youtube.video".to_string(), infojson.id); + node.ratings.insert( + Rating::YoutubeViews, + infojson.view_count.unwrap_or_default() as f64, + ); + if let Some(lc) = infojson.like_count { + node.ratings.insert(Rating::YoutubeLikes, lc as f64); + } + } + "Bandcamp" => drop( + node.external_ids + .insert("bandcamp".to_string(), infojson.id), + ), + _ => (), + } + } - // // TODO merge size - // node.storage_size = meta.len(); - // // TODO merge tracks - // node.media = Some(MediaInfo { - // chapters: m - // .chapters - // .clone() - // .map(|c| { - // let mut chaps = Vec::new(); - // if let Some(ee) = c.edition_entries.first() { - // for ca in &ee.chapter_atoms { - // let mut labels = Vec::new(); - // for cd in &ca.displays { - // for lang in &cd.languages { - // labels.push((lang.to_owned(), cd.string.clone())) - // } - // } - // chaps.push(Chapter { - // labels, - // time_start: Some(ca.time_start as f64 * 1e-9), - // time_end: ca.time_end.map(|ts| ts as f64 * 1e-9), - // }) - // } - // } - // chaps - // }) - // .unwrap_or_default(), - // duration: fix_invalid_runtime( - // info.duration.unwrap_or_default() * info.timestamp_scale as f64 * 1e-9, - // ), - // tracks, - // }); + // TODO merge size + node.storage_size = meta.len(); + // TODO merge tracks + node.media = Some(MediaInfo { + chapters: m + .chapters + .clone() + .map(|c| { + let mut chaps = Vec::new(); + if let Some(ee) = c.edition_entries.first() { + for ca in &ee.chapter_atoms { + let mut labels = Vec::new(); + for cd in &ca.displays { + for lang in &cd.languages { + labels.push((lang.to_owned(), cd.string.clone())) + } + } + chaps.push(Chapter { + labels, + time_start: Some(ca.time_start as f64 * 1e-9), + time_end: ca.time_end.map(|ts| ts as f64 * 1e-9), + }) + } + } + chaps + }) + .unwrap_or_default(), + duration: fix_invalid_runtime( + m.info.duration.unwrap_or_default() * m.info.timestamp_scale as f64 * 1e-9, + ), + tracks, + }); - // Ok(()) - // })?; + Ok(()) + })?; - // if let Some((season, episode)) = episode_index { - // let mut trakt_id = None; - // let flagspath = path.parent().unwrap().join("flags"); - // if flagspath.exists() { - // for flag in read_to_string(flagspath)?.lines() { - // if let Some(value) = flag.strip_prefix("trakt-").or(flag.strip_prefix("trakt=")) { - // let (kind, id) = value.split_once(":").unwrap_or(("", value)); - // if kind == "show" { - // trakt_id = Some(id.parse::<u64>()?); - // } - // } - // } - // } - // if let Some(trakt_id) = trakt_id { - // let trakt = apis.trakt.as_ref().ok_or(anyhow!("trakt required"))?; - // let seasons = rthandle.block_on(trakt.show_seasons(trakt_id))?; - // if seasons.iter().any(|x| x.number == season) { - // let episodes = rthandle.block_on(trakt.show_season_episodes(trakt_id, season))?; - // let mut poster = None; - // if let Some(tmdb) = &apis.tmdb { - // let trakt_details = - // rthandle.block_on(trakt.lookup(TraktKind::Show, trakt_id))?; - // if let Some(tmdb_id) = trakt_details.ids.tmdb { - // let tmdb_details = - // rthandle.block_on(tmdb.episode_details(tmdb_id, season, episode))?; - // if let Some(still) = &tmdb_details.still_path { - // poster = - // Some(AssetInner::Cache(rthandle.block_on(tmdb.image(still))?).ser()) - // } - // } - // } - // if let Some(episode) = episodes.get(episode.saturating_sub(1)) { - // db.update_node_init(node, |node| { - // node.kind = NodeKind::Episode; - // node.index = Some(episode.number); - // node.title = Some(episode.title.clone()); - // node.poster = poster.or(node.poster.clone()); - // node.description = episode.overview.clone().or(node.description.clone()); - // node.ratings.insert(Rating::Trakt, episode.rating); - // Ok(()) - // })? - // } - // } - // } - // } + if let Some((season, episode)) = episode_index { + let mut trakt_id = None; + let flagspath = path.parent().unwrap().join("flags"); + if flagspath.exists() { + for flag in read_to_string(flagspath)?.lines() { + if let Some(value) = flag.strip_prefix("trakt-").or(flag.strip_prefix("trakt=")) { + let (kind, id) = value.split_once(":").unwrap_or(("", value)); + if kind == "show" { + trakt_id = Some(id.parse::<u64>()?); + } + } + } + } + if let Some(trakt_id) = trakt_id { + let trakt = apis.trakt.as_ref().ok_or(anyhow!("trakt required"))?; + let seasons = rthandle.block_on(trakt.show_seasons(trakt_id))?; + if seasons.iter().any(|x| x.number == season) { + let episodes = rthandle.block_on(trakt.show_season_episodes(trakt_id, season))?; + let mut poster = None; + if let Some(tmdb) = &apis.tmdb { + let trakt_details = + rthandle.block_on(trakt.lookup(TraktKind::Show, trakt_id))?; + if let Some(tmdb_id) = trakt_details.ids.tmdb { + let tmdb_details = + rthandle.block_on(tmdb.episode_details(tmdb_id, season, episode))?; + if let Some(still) = &tmdb_details.still_path { + poster = + Some(AssetInner::Cache(rthandle.block_on(tmdb.image(still))?).ser()) + } + } + } + if let Some(episode) = episodes.get(episode.saturating_sub(1)) { + db.update_node_init(node, |node| { + node.kind = NodeKind::Episode; + node.index = Some(episode.number); + node.title = Some(episode.title.clone()); + node.poster = poster.or(node.poster.clone()); + node.description = episode.overview.clone().or(node.description.clone()); + node.ratings.insert(Rating::Trakt, episode.rating); + Ok(()) + })? + } + } + } + } - // for tok in filename_toks { - // apply_node_flag(db, rthandle, apis, node, tok)?; - // } - // if let Some(mbid) = mbrec { - // apply_musicbrainz_recording(db, rthandle, apis, node, mbid)?; - // } + for tok in filename_toks { + apply_node_flag(db, rthandle, apis, node, tok)?; + } + if let Some(mbid) = mbrec { + apply_musicbrainz_recording(db, rthandle, apis, node, mbid)?; + } Ok(()) } |