aboutsummaryrefslogtreecommitdiff
path: root/transcoder/src/subtitles.rs
blob: 9118ebcb0bdd9bad661af56625092e868e0f243b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/*
    This file is part of jellything (https://codeberg.org/metamuffin/jellything)
    which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
    Copyright (C) 2024 metamuffin <metamuffin.org>
*/
use anyhow::{anyhow, bail, Context};
use jellycommon::jhls::SubtitleCue;
use std::fmt::Write;

pub fn parse_subtitles(
    codec: &str,
    codec_private: Option<Vec<u8>>,
    blocks: Vec<(u64, u64, Vec<u8>)>,
) -> anyhow::Result<Vec<SubtitleCue>> {
    match codec {
        "D_WEBVTT/SUBTITLES" => parse_webvtt_blocks(blocks),
        "S_HDMV/PGS" => bail!("no HDMV/PGS yet"),
        "S_HDMV/TEXTST" => bail!("no HDMV/PGS yet"),
        "S_ARISUB" => bail!("no arisub yet"),
        "S_TEXT/UTF8" => parse_subrip_blocks(blocks),
        "S_VOBSUB" => bail!("no vobsub yet"),
        "S_TEXT/ASS" => parse_ass_blocks(
            codec_private.ok_or(anyhow!("ass without CodecPrivate"))?,
            blocks,
        ),
        x => bail!("unknown sub codec {x:?}"),
    }
    .context(anyhow!("parsing {codec} subtitles"))
}

pub fn write_webvtt(title: String, subtitles: Vec<SubtitleCue>) -> anyhow::Result<String> {
    let mut out = String::new();

    writeln!(out, "WEBVTT - {title}")?; // TODO ensure title does not contain "-->"
    writeln!(out)?;
    for SubtitleCue {
        start,
        end,
        content,
    } in subtitles
    {
        writeln!(out, "{} --> {}", format_time(start), format_time(end))?;
        writeln!(out, "- {content}")?;
        writeln!(out)?;
    }
    Ok(out)
}

pub fn parse_webvtt_blocks(blocks: Vec<(u64, u64, Vec<u8>)>) -> anyhow::Result<Vec<SubtitleCue>> {
    let mut out = Vec::new();
    for (pts, dur, block) in blocks {
        let content = String::from_utf8_lossy(&block).trim().to_string();
        out.push(SubtitleCue {
            content,
            start: pts as f64 / 1000.,
            end: (pts + dur) as f64 / 1000.,
        })
    }
    Ok(out)
}
pub fn parse_subrip_blocks(blocks: Vec<(u64, u64, Vec<u8>)>) -> anyhow::Result<Vec<SubtitleCue>> {
    let mut out = Vec::new();
    for (pts, dur, block) in blocks {
        let content = String::from_utf8_lossy(&block).trim().to_string();
        out.push(SubtitleCue {
            content,
            start: pts as f64 / 1000.,
            end: (pts + dur) as f64 / 1000.,
        });
    }
    Ok(out)
}

pub fn parse_ass_blocks(
    _codec_private: Vec<u8>,
    blocks: Vec<(u64, u64, Vec<u8>)>,
) -> anyhow::Result<Vec<SubtitleCue>> {
    // TODO dont ignore codec_private

    fn convert_block(s: &str) -> Option<&str> {
        // ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, Text
        let (_read_order, s) = s.split_once(',')?;
        let (_layer, s) = s.split_once(',')?;
        let (_style, s) = s.split_once(',')?;
        let (_name, s) = s.split_once(',')?;
        let (_marginl, s) = s.split_once(',')?;
        let (_marginr, s) = s.split_once(',')?;
        let (_marginv, s) = s.split_once(',')?;
        let (_effect, text) = s.split_once(',')?;

        Some(text)
    }

    let mut out = Vec::new();
    for (pts, dur, block) in blocks {
        let block = String::from_utf8(block)?;
        let text = convert_block(&block).ok_or(anyhow!("bad ass xD"))?;
        out.push(SubtitleCue {
            content: text.to_owned(),
            start: pts as f64 / 1000.,
            end: (pts + dur) as f64 / 1000.,
        })
    }
    Ok(out)
}

fn format_time(t: f64) -> String {
    const SECOND: u64 = 1000;
    const MINUTE: u64 = SECOND * 60;
    const HOUR: u64 = MINUTE * 60;

    let t = (t * 1000.) as u64;
    let hh = t / HOUR;
    let t = t % HOUR;
    let mm = t / MINUTE;
    let t = t % MINUTE;
    let ss = t / SECOND;
    let mmm = t % SECOND;

    format!("{hh:04}:{mm:02}:{ss:02}.{mmm:03}")
}