aboutsummaryrefslogtreecommitdiff
path: root/import
diff options
context:
space:
mode:
Diffstat (limited to 'import')
-rw-r--r--import/Cargo.toml4
-rw-r--r--import/src/infojson.rs16
-rw-r--r--import/src/lib.rs119
-rw-r--r--import/src/matroska.rs112
4 files changed, 153 insertions, 98 deletions
diff --git a/import/Cargo.toml b/import/Cargo.toml
index d0342df..988e626 100644
--- a/import/Cargo.toml
+++ b/import/Cargo.toml
@@ -8,7 +8,9 @@ jellycommon = { path = "../common" }
jellybase = { path = "../base" }
jellyclient = { path = "../client" }
-ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct" }
+ebml-struct = { git = "https://codeberg.org/metamuffin/ebml-struct", features = [
+ "bincode",
+] }
rayon = "1.10.0"
crossbeam-channel = "0.5.14"
diff --git a/import/src/infojson.rs b/import/src/infojson.rs
index c2ae305..3e4667e 100644
--- a/import/src/infojson.rs
+++ b/import/src/infojson.rs
@@ -3,13 +3,13 @@
which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
Copyright (C) 2025 metamuffin <metamuffin.org>
*/
-
use anyhow::Context;
+use bincode::{Decode, Encode};
use jellycommon::chrono::{format::Parsed, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub struct YVideo {
pub id: String,
pub title: String,
@@ -63,7 +63,7 @@ pub struct YVideo {
pub epoch: usize,
}
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub struct YCaption {
pub url: Option<String>,
pub ext: String, //"vtt" | "json3" | "srv1" | "srv2" | "srv3" | "ttml",
@@ -71,7 +71,7 @@ pub struct YCaption {
pub name: Option<String>,
}
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub struct YFormat {
pub format_id: String,
pub format_note: Option<String>,
@@ -96,13 +96,13 @@ pub struct YFormat {
pub format: String,
}
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub struct YFragment {
pub url: Option<String>,
pub duration: Option<f64>,
}
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub struct YThumbnail {
pub url: String,
pub preference: Option<i32>,
@@ -112,14 +112,14 @@ pub struct YThumbnail {
pub resolution: Option<String>,
}
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub struct YChapter {
pub start_time: f64,
pub end_time: f64,
pub title: String,
}
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub struct YHeatmapSample {
pub start_time: f64,
pub end_time: f64,
diff --git a/import/src/lib.rs b/import/src/lib.rs
index add7e4d..10bd0ec 100644
--- a/import/src/lib.rs
+++ b/import/src/lib.rs
@@ -4,24 +4,19 @@
Copyright (C) 2025 metamuffin <metamuffin.org>
*/
use anyhow::{anyhow, Context, Result};
-use ebml_struct::{
- ids::*,
- matroska::*,
- read::{EbmlReadExt, TagRead},
-};
use infojson::YVideo;
-use jellybase::{assetfed::AssetInner, cache::cache_file, database::Database, CONF, SECRETS};
+use jellybase::{assetfed::AssetInner, database::Database, CONF, SECRETS};
use jellycommon::{
Chapter, LocalTrack, MediaInfo, Node, NodeID, NodeKind, Rating, SourceTrack, SourceTrackKind,
TrackSource,
};
use log::info;
+use matroska::matroska_metadata;
use rayon::iter::{ParallelDrainRange, ParallelIterator};
-use regex::Regex;
use std::{
collections::HashMap,
fs::File,
- io::{BufReader, ErrorKind, Read, Write},
+ io::{BufReader, Read},
mem::swap,
path::{Path, PathBuf},
sync::LazyLock,
@@ -35,14 +30,15 @@ use tokio::{
use trakt::Trakt;
pub mod infojson;
+pub mod matroska;
pub mod tmdb;
pub mod trakt;
static IMPORT_SEM: LazyLock<Semaphore> = LazyLock::new(|| Semaphore::new(1));
pub static IMPORT_ERRORS: RwLock<Vec<String>> = RwLock::const_new(Vec::new());
-static RE_EPISODE_FILENAME: LazyLock<Regex> =
- LazyLock::new(|| Regex::new(r#"([sS](\d+))?([eE](\d+))( (.+))?"#).unwrap());
+// static RE_EPISODE_FILENAME: LazyLock<Regex> =
+// LazyLock::new(|| Regex::new(r#"([sS](\d+))?([eE](\d+))( (.+))?"#).unwrap());
struct Apis {
trakt: Option<Trakt>,
@@ -57,6 +53,7 @@ pub async fn import_wrap(db: Database, incremental: bool) -> Result<()> {
let _sem = IMPORT_SEM.try_acquire()?;
let jh = spawn_blocking(move || {
+ *IMPORT_ERRORS.blocking_write() = Vec::new();
if let Err(e) = import(&db, incremental) {
IMPORT_ERRORS.blocking_write().push(format!("{e:#}"));
}
@@ -121,25 +118,26 @@ fn import_iter_inner(path: &Path, db: &Database, incremental: bool) -> Result<Ve
}
fn import_file(db: &Database, path: &Path) -> Result<()> {
- let parent = NodeID::from_slug(
- &path
- .parent()
- .ok_or(anyhow!("no parent"))?
- .file_name()
- .ok_or(anyhow!("parent no filename"))?
- .to_string_lossy(),
- );
+ let parent_slug = path
+ .parent()
+ .ok_or(anyhow!("no parent"))?
+ .file_name()
+ .ok_or(anyhow!("parent no filename"))?
+ .to_string_lossy();
+ let parent = NodeID::from_slug(&parent_slug);
let filename = path.file_name().unwrap().to_string_lossy();
match filename.as_ref() {
"poster.jpeg" | "poster.webp" => {
db.update_node_init(parent, |node| {
+ node.slug = parent_slug.to_string();
node.poster = Some(AssetInner::Media(path.to_owned()).ser());
Ok(())
})?;
}
"backdrop.jpeg" | "backdrop.webp" => {
db.update_node_init(parent, |node| {
+ node.slug = parent_slug.to_string();
node.backdrop = Some(AssetInner::Media(path.to_owned()).ser());
Ok(())
})?;
@@ -147,6 +145,7 @@ fn import_file(db: &Database, path: &Path) -> Result<()> {
"info.json" | "info.yaml" => {
let data = serde_yaml::from_reader::<_, Node>(BufReader::new(File::open(path)?))?;
db.update_node_init(parent, |node| {
+ node.slug = parent_slug.to_string();
fn merge_option<T>(a: &mut Option<T>, b: Option<T>) {
if b.is_some() {
*a = b;
@@ -161,6 +160,7 @@ fn import_file(db: &Database, path: &Path) -> Result<()> {
"channel.info.json" => {
let data = serde_json::from_reader::<_, YVideo>(BufReader::new(File::open(path)?))?;
db.update_node_init(parent, |node| {
+ node.slug = parent_slug.to_string();
node.title = Some(
data.title
.strip_suffix(" - Videos")
@@ -189,76 +189,14 @@ fn import_file(db: &Database, path: &Path) -> Result<()> {
fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
info!("reading media file {path:?}");
- let mut file = BufReader::new(File::open(path)?);
- let mut file = file.by_ref().take(u64::MAX);
-
- let (x, mut ebml) = file.read_tag()?;
- assert_eq!(x, EL_EBML);
- let ebml = Ebml::read(&mut ebml).unwrap();
- assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm");
- let (x, mut segment) = file.read_tag()?;
- assert_eq!(x, EL_SEGMENT);
- let mut info = None;
- let mut infojson = None;
- let mut tracks = None;
- let mut cover = None;
- let mut chapters = None;
- let mut tags = None;
- loop {
- let (x, mut seg) = match segment.read_tag() {
- Ok(o) => o,
- Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
- Err(e) => return Err(e.into()),
- };
- match x {
- EL_INFO => info = Some(Info::read(&mut seg).context("info")?),
- EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?),
- EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?),
- EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?),
- EL_ATTACHMENTS => {
- let attachments = Attachments::read(&mut seg).context("attachments")?;
- for f in attachments.files {
- match f.name.as_str() {
- "info.json" => {
- infojson = Some(
- serde_json::from_slice::<infojson::YVideo>(&f.data)
- .context("infojson")?,
- );
- }
- "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg" | "cover.avif" => {
- cover = Some(
- AssetInner::Cache(cache_file(
- &["att-cover", path.to_string_lossy().as_ref()],
- move |mut file| {
- file.write_all(&f.data)?;
- Ok(())
- },
- )?)
- .ser(),
- )
- }
- a => println!("{a:?}"),
- }
- }
- }
- EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => {
- seg.consume()?;
- }
- EL_CLUSTER => {
- break;
- }
- id => {
- eprintln!("unknown top-level element {id:x}");
- seg.consume()?;
- }
- }
- }
+ let m = (*matroska_metadata(path)?).to_owned();
- let info = info.ok_or(anyhow!("no info"))?;
- let tracks = tracks.ok_or(anyhow!("no tracks"))?;
+ let info = m.info.ok_or(anyhow!("no info"))?;
+ let tracks = m.tracks.ok_or(anyhow!("no tracks"))?;
- let mut tags = tags
+ let mut tags = m
+ .tags
.map(|tags| {
tags.tags
.into_iter()
@@ -274,7 +212,8 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
.to_string_lossy()
.to_string();
- let slug = infojson
+ let slug = m
+ .infojson
.as_ref()
.map(|ij| format!("youtube-{}", ij.id))
.unwrap_or(make_kebab(&filepath_stem));
@@ -282,13 +221,13 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
db.update_node_init(NodeID::from_slug(&slug), |node| {
node.slug = slug;
node.title = info.title;
- node.poster = cover;
+ node.poster = m.cover.clone();
node.description = tags.remove("DESCRIPTION");
node.tagline = tags.remove("COMMENT");
if !node.parents.contains(&parent) {
node.parents.push(parent)
}
- if let Some(infojson) = infojson {
+ if let Some(infojson) = m.infojson {
node.kind = Some(
if infojson.duration.unwrap_or(0.) < 600.
&& infojson.aspect_ratio.unwrap_or(2.) < 1.
@@ -314,7 +253,9 @@ fn import_media_file(db: &Database, path: &Path, parent: NodeID) -> Result<()> {
}
}
node.media = Some(MediaInfo {
- chapters: chapters
+ chapters: m
+ .chapters
+ .clone()
.map(|c| {
let mut chaps = Vec::new();
if let Some(ee) = c.edition_entries.first() {
diff --git a/import/src/matroska.rs b/import/src/matroska.rs
new file mode 100644
index 0000000..bb8d927
--- /dev/null
+++ b/import/src/matroska.rs
@@ -0,0 +1,112 @@
+/*
+ This file is part of jellything (https://codeberg.org/metamuffin/jellything)
+ which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
+ Copyright (C) 2025 metamuffin <metamuffin.org>
+*/
+use crate::infojson::{self, YVideo};
+use anyhow::{Context, Result};
+use bincode::{Decode, Encode};
+use ebml_struct::{
+ ids::*,
+ matroska::*,
+ read::{EbmlReadExt, TagRead},
+};
+use jellybase::{
+ assetfed::AssetInner,
+ cache::{cache_file, cache_memory},
+};
+use jellycommon::Asset;
+use std::{
+ fs::File,
+ io::{BufReader, ErrorKind, Read, Write},
+ path::Path,
+ sync::Arc,
+};
+
+#[derive(Encode, Decode, Clone)]
+pub(crate) struct MatroskaMetadata {
+ pub info: Option<Info>,
+ pub tracks: Option<Tracks>,
+ pub cover: Option<Asset>,
+ pub chapters: Option<Chapters>,
+ pub tags: Option<Tags>,
+ pub infojson: Option<YVideo>,
+}
+pub(crate) fn matroska_metadata(path: &Path) -> Result<Arc<MatroskaMetadata>> {
+ cache_memory(&["mkmeta-v1", path.to_string_lossy().as_ref()], || {
+ let mut file = BufReader::new(File::open(path)?);
+ let mut file = file.by_ref().take(u64::MAX);
+
+ let (x, mut ebml) = file.read_tag()?;
+ assert_eq!(x, EL_EBML);
+ let ebml = Ebml::read(&mut ebml).unwrap();
+ assert!(ebml.doc_type == "matroska" || ebml.doc_type == "webm");
+ let (x, mut segment) = file.read_tag()?;
+ assert_eq!(x, EL_SEGMENT);
+
+ let mut info = None;
+ let mut infojson = None;
+ let mut tracks = None;
+ let mut cover = None;
+ let mut chapters = None;
+ let mut tags = None;
+ loop {
+ let (x, mut seg) = match segment.read_tag() {
+ Ok(o) => o,
+ Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
+ Err(e) => return Err(e.into()),
+ };
+ match x {
+ EL_INFO => info = Some(Info::read(&mut seg).context("info")?),
+ EL_TRACKS => tracks = Some(Tracks::read(&mut seg).context("tracks")?),
+ EL_CHAPTERS => chapters = Some(Chapters::read(&mut seg).context("chapters")?),
+ EL_TAGS => tags = Some(Tags::read(&mut seg).context("tags")?),
+ EL_ATTACHMENTS => {
+ let attachments = Attachments::read(&mut seg).context("attachments")?;
+ for f in attachments.files {
+ match f.name.as_str() {
+ "info.json" => {
+ infojson = Some(
+ serde_json::from_slice::<infojson::YVideo>(&f.data)
+ .context("infojson")?,
+ );
+ }
+ "cover.webp" | "cover.png" | "cover.jpg" | "cover.jpeg"
+ | "cover.avif" => {
+ cover = Some(
+ AssetInner::Cache(cache_file(
+ &["att-cover", path.to_string_lossy().as_ref()],
+ move |mut file| {
+ file.write_all(&f.data)?;
+ Ok(())
+ },
+ )?)
+ .ser(),
+ )
+ }
+ a => println!("{a:?}"),
+ }
+ }
+ }
+ EL_VOID | EL_CRC32 | EL_CUES | EL_SEEKHEAD => {
+ seg.consume()?;
+ }
+ EL_CLUSTER => {
+ break;
+ }
+ id => {
+ eprintln!("unknown top-level element {id:x}");
+ seg.consume()?;
+ }
+ }
+ }
+ Ok(MatroskaMetadata {
+ chapters,
+ cover,
+ info,
+ infojson,
+ tags,
+ tracks,
+ })
+ })
+}