diff options
Diffstat (limited to 'src/main.rs')
-rw-r--r-- | src/main.rs | 197 |
1 files changed, 134 insertions, 63 deletions
diff --git a/src/main.rs b/src/main.rs index 34b00bb..53292cd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,9 @@ use clap::Parser; use parse_wiki_text::{Node, Parameter}; use std::fs::File; -use std::io::{stdin, stdout, Read, Write}; +use std::io::{stderr, stdin, stdout, BufWriter, Read, Write}; +use std::sync::{Arc, Mutex}; +use std::thread; use tar::Header; extern crate bzip2; @@ -21,10 +23,16 @@ struct Args { /// Show non-fatal warnings #[arg(short, long)] verbose: bool, + #[arg(short, long)] + show_todo: bool, #[arg(short, long, default_value = "")] footer: String, } +struct Config { + show_todo: bool, +} + fn main() { let args = Args::parse(); @@ -34,16 +42,32 @@ fn main() { } let input = std::io::BufReader::new(input); - let mut archive = tar::Builder::new(stdout()); + let mut archive = tar::Builder::new(BufWriter::new(stdout())); let footer = format!( "<a href=\"https://codeberg.org/metamuffin/staticwiki\">staticwiki</a>; {}", args.footer ); + let progress = Arc::new(Mutex::new((0, String::new()))); + { + let progress = progress.clone(); + thread::spawn(move || loop { + let (i, name) = progress.lock().unwrap().clone(); + eprint!("\r\x1b[2Karticles processed: {i}, current: {name}",); + stderr().flush().unwrap(); + std::thread::sleep(std::time::Duration::from_millis(1000 / 30)); + }); + } + + let config = Config { + show_todo: args.show_todo, + }; + for (i, result) in parse_mediawiki_dump::parse(input).enumerate() { match result { Ok(page) => { + *progress.lock().unwrap() = (i, page.title.clone()); if page.namespace == 0 && match &page.format { None => false, @@ -67,8 +91,8 @@ fn main() { let mut html = String::from("<!DOCTYPE html><html><head>"); write!(&mut html, "<title>{}</title></head>", escape(&page.title)).unwrap(); write!(&mut html, "<body><h1>{}</h1>", escape(&page.title)).unwrap(); - render_toc(&mut html, &ast.nodes); - render_nodes(&mut html, &mut refs, &ast.nodes); + render_toc(&mut html, &ast.nodes, &config); + render_nodes(&mut html, &mut refs, &ast.nodes, &config); render_refs(&mut html, &refs); write!(&mut html, "<footer>{footer}</footer>").unwrap(); write!(&mut html, "</body></html>").unwrap(); @@ -96,9 +120,6 @@ fn main() { break; } } - if i % 10000 == 0 { - eprintln!("{i}") - } if Some(i) == args.limit { break; } @@ -117,19 +138,19 @@ pub fn escape(text: &str) -> String { .replace("\"", """) } -fn render_nodes(html: &mut String, refs: &mut Vec<String>, nodes: &Vec<Node>) { +fn render_nodes(html: &mut String, refs: &mut Vec<String>, nodes: &Vec<Node>, config: &Config) { for n in nodes { - render_node(html, refs, n) + render_node(html, refs, n, config) } } -fn render_nodes_to_string(nodes: &Vec<Node>, refs: &mut Vec<String>) -> String { +fn render_nodes_to_string(nodes: &Vec<Node>, refs: &mut Vec<String>, config: &Config) -> String { let mut html = String::new(); - render_nodes(&mut html, refs, nodes); + render_nodes(&mut html, refs, nodes, config); return html; } -fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { +fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node, config: &Config) { use std::fmt::Write; match n { parse_wiki_text::Node::Bold { .. } => (), @@ -138,20 +159,31 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { ordinal: _, target: _, .. - } => write!(html, "[todo]").unwrap(), + } => { + if config.show_todo { + write!(html, "[todo: category]").unwrap() + } + } parse_wiki_text::Node::CharacterEntity { character: _, .. } => { - write!(html, "[todo: character]").unwrap() + if config.show_todo { + write!(html, "[todo: character]").unwrap() + } } parse_wiki_text::Node::Comment { .. } => (), parse_wiki_text::Node::DefinitionList { items: _, .. } => { - write!(html, "[todo: def list]").unwrap() + if config.show_todo { + write!(html, "[todo: def list]").unwrap() + } } - parse_wiki_text::Node::EndTag { name: _, .. } => write!(html, "[todo: tag end]").unwrap(), + parse_wiki_text::Node::EndTag { name: _, .. } => (), //write!(html, "[todo: tag end]").unwrap(), + parse_wiki_text::Node::StartTag { name: _, .. } => (), // write!(html, "[todo: start tag]").unwrap() parse_wiki_text::Node::ExternalLink { nodes: _, .. } => { - write!(html, "[todo: external link]").unwrap() + if config.show_todo { + write!(html, "[todo: external link]").unwrap() + } } parse_wiki_text::Node::Heading { level, nodes, .. } => { - let h = render_nodes_to_string(nodes, refs); + let h = render_nodes_to_string(nodes, refs, config); write!(html, "<h{level} id=\"{}\">{}</h{level}>", urlencode(&h), h).unwrap(); } parse_wiki_text::Node::HorizontalDivider { .. } => write!(html, "<hr>").unwrap(), @@ -163,16 +195,23 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { html, "<a href=\"{}\">{}</a>", urlencode(target), // TODO does this always link to wikipedia? - render_nodes_to_string(text, refs) + render_nodes_to_string(text, refs, config) ) .unwrap(), - parse_wiki_text::Node::MagicWord { .. } => write!(html, "[todo: magic]").unwrap(), + parse_wiki_text::Node::MagicWord { .. } => { + if config.show_todo { + write!(html, "[todo: magic]").unwrap() + } + } parse_wiki_text::Node::OrderedList { items, .. } => write!( html, "<ol>{}</ol>", items .iter() - .map(|e| format!("<li>{}</li>", render_nodes_to_string(&e.nodes, refs))) + .map(|e| format!( + "<li>{}</li>", + render_nodes_to_string(&e.nodes, refs, config) + )) .collect::<Vec<_>>() .join("") ) @@ -182,7 +221,10 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { "<ul>{}</ul>", items .iter() - .map(|e| format!("<li>{}</li>", render_nodes_to_string(&e.nodes, refs))) + .map(|e| format!( + "<li>{}</li>", + render_nodes_to_string(&e.nodes, refs, config) + )) .collect::<Vec<_>>() .join("") ) @@ -192,10 +234,17 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { default: _, name: _, .. - } => write!(html, "[todo: parameter]").unwrap(), - parse_wiki_text::Node::Preformatted { nodes, .. } => { - write!(html, "<pre>{}</pre>", render_nodes_to_string(nodes, refs)).unwrap() + } => { + if config.show_todo { + write!(html, "[todo: parameter]").unwrap() + } } + parse_wiki_text::Node::Preformatted { nodes, .. } => write!( + html, + "<pre>{}</pre>", + render_nodes_to_string(nodes, refs, config) + ) + .unwrap(), parse_wiki_text::Node::Redirect { target, .. } => write!( html, "Redirect: <a href=\"{}\">{}</a>", @@ -203,9 +252,7 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { urlencode(target) ) .unwrap(), - parse_wiki_text::Node::StartTag { name: _, .. } => { - write!(html, "[todo: start tag]").unwrap() - } + parse_wiki_text::Node::Table { attributes: _, captions: _, @@ -215,13 +262,25 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { parse_wiki_text::Node::Tag { name, nodes, .. } => match name.as_ref() { "ref" => { if !nodes.is_empty() { - let r = render_nodes_to_string(nodes, refs); + let r = render_nodes_to_string(nodes, refs, config); refs.push(r); let refid = refs.len(); write!(html, "<sup><a href=\"#{}\">[{}]</a></sup>", refid, refid).unwrap(); } } - _ => write!(html, "[todo: {name:?} tag]").unwrap(), + "math" => { + let r = render_nodes_to_string(nodes, refs, config); + write!(html, "math: <code>{}</code>", r).unwrap(); + } + "syntaxhighlight" => { + let r = render_nodes_to_string(nodes, refs, config); + write!(html, "<pre>{}</pre>", r).unwrap(); + } + _ => { + if config.show_todo { + write!(html, "[todo: {name:?} tag]").unwrap() + } + } }, parse_wiki_text::Node::Template { name, parameters, .. @@ -230,13 +289,13 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { Some(Node::Text { value, .. }) => value, _ => "", }; - render_template(html, refs, name, parameters); + render_template(html, refs, name, parameters, config); } parse_wiki_text::Node::Text { value, .. } => write!(html, "{}", escape(value)).unwrap(), } } -fn render_toc(html: &mut String, nodes: &Vec<Node>) { +fn render_toc(html: &mut String, nodes: &Vec<Node>, config: &Config) { use std::fmt::Write; write!(html, "<div><h4><i>Table of contents</i></h4>").unwrap(); let mut k = 0; @@ -252,7 +311,7 @@ fn render_toc(html: &mut String, nodes: &Vec<Node>) { k -= 1; write!(html, "</ol>").unwrap(); } - let h = render_nodes_to_string(nodes, &mut vec![]); + let h = render_nodes_to_string(nodes, &mut vec![], config); write!(html, "<li><a href=\"#{}\">{}</a></li>", urlencode(&h), h).unwrap(); } @@ -262,51 +321,35 @@ fn render_toc(html: &mut String, nodes: &Vec<Node>) { write!(html, "</div>").unwrap(); } -pub fn render_template( +fn render_template( html: &mut String, refs: &mut Vec<String>, name: &str, params: &Vec<Parameter>, + config: &Config, ) -> Option<()> { use std::fmt::Write; - match name { + match name.to_lowercase().as_str() { + "internetquelle" | "literatur" | "citation" | "man" | "external link" => { + generic_template(html, name, params, config) + } + s if s.starts_with("cite ") => generic_template(html, name, params, config), + // TODO this can panic - "lang" => write!( + "lang" | "transliteration" => write!( html, "{}", - render_nodes_to_string(¶ms.get(1)?.value, refs) + render_nodes_to_string(¶ms.get(1)?.value, refs, config) ) .unwrap(), - "IPA" => write!( + s if s.starts_with("IPA") => write!( html, "<code>{}</code>", - render_nodes_to_string(¶ms.get(0)?.value, refs) + render_nodes_to_string(¶ms.get(0)?.value, refs, config) ) .unwrap(), - "Internetquelle" | "Literatur" => { - write!(html, "{}: <ul>", escape(name)).unwrap(); - for p in params { - let key = p - .name - .as_ref() - .map(|n| render_nodes_to_string(n, &mut vec![])) - .unwrap_or(String::from("??")); - let value = render_nodes_to_string(&p.value, &mut vec![]); - if let "url" | "archiv-url" | "Online" = key.as_str() { - write!( - html, - "<li>{}: <a href=\"{}\">{}</a></li>", - key, value, value - ) - } else { - write!(html, "<li>{}: {}</li>", key, value) - } - .unwrap() - } - write!(html, "</ul>").unwrap(); - } - "Siehe auch" | "Hauptartikel" => { + "Siehe auch" | "Hauptartikel" | "main" => { let k = text_node(params.get(0)?.value.get(0)?); write!( html, @@ -318,8 +361,12 @@ pub fn render_template( .unwrap(); } + "sfn" => {} + _ => { - write!(html, "[todo: {name:?} template]").unwrap(); + if config.show_todo { + write!(html, "[todo: {name:?} template]").unwrap(); + } // write!(html, "[todo: {name:?} template <pre>{params:#?}</pre>]").unwrap(); // eprintln!("unsupported template {name:?}"); // eprintln!("{params:?}"); @@ -328,6 +375,30 @@ pub fn render_template( Some(()) } +fn generic_template(html: &mut String, name: &str, params: &Vec<Parameter>, config: &Config) { + use std::fmt::Write; + write!(html, "{}: <ul>", escape(name)).unwrap(); + for p in params { + let key = p + .name + .as_ref() + .map(|n| render_nodes_to_string(n, &mut vec![], config)) + .unwrap_or(String::from("??")); + let value = render_nodes_to_string(&p.value, &mut vec![], config); + if let "url" | "archiv-url" | "Online" | "archive-url" = key.as_str() { + write!( + html, + "<li>{}: <a href=\"{}\">{}</a></li>", + key, value, value + ) + } else { + write!(html, "<li>{}: {}</li>", key, value) + } + .unwrap() + } + write!(html, "</ul>").unwrap(); +} + pub fn text_node(n: &Node) -> String { match n { Node::Text { value, .. } => value.to_string(), |