From 4ca5624bd2a78d32085b9ab0ffdb5bf24840fec3 Mon Sep 17 00:00:00 2001 From: metamuffin Date: Wed, 26 Mar 2025 14:43:04 +0100 Subject: disable todo by default --- src/main.rs | 197 +++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 134 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/src/main.rs b/src/main.rs index 34b00bb..53292cd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,9 @@ use clap::Parser; use parse_wiki_text::{Node, Parameter}; use std::fs::File; -use std::io::{stdin, stdout, Read, Write}; +use std::io::{stderr, stdin, stdout, BufWriter, Read, Write}; +use std::sync::{Arc, Mutex}; +use std::thread; use tar::Header; extern crate bzip2; @@ -21,10 +23,16 @@ struct Args { /// Show non-fatal warnings #[arg(short, long)] verbose: bool, + #[arg(short, long)] + show_todo: bool, #[arg(short, long, default_value = "")] footer: String, } +struct Config { + show_todo: bool, +} + fn main() { let args = Args::parse(); @@ -34,16 +42,32 @@ fn main() { } let input = std::io::BufReader::new(input); - let mut archive = tar::Builder::new(stdout()); + let mut archive = tar::Builder::new(BufWriter::new(stdout())); let footer = format!( "staticwiki; {}", args.footer ); + let progress = Arc::new(Mutex::new((0, String::new()))); + { + let progress = progress.clone(); + thread::spawn(move || loop { + let (i, name) = progress.lock().unwrap().clone(); + eprint!("\r\x1b[2Karticles processed: {i}, current: {name}",); + stderr().flush().unwrap(); + std::thread::sleep(std::time::Duration::from_millis(1000 / 30)); + }); + } + + let config = Config { + show_todo: args.show_todo, + }; + for (i, result) in parse_mediawiki_dump::parse(input).enumerate() { match result { Ok(page) => { + *progress.lock().unwrap() = (i, page.title.clone()); if page.namespace == 0 && match &page.format { None => false, @@ -67,8 +91,8 @@ fn main() { let mut html = String::from(""); write!(&mut html, "{}", escape(&page.title)).unwrap(); write!(&mut html, "

{}

", escape(&page.title)).unwrap(); - render_toc(&mut html, &ast.nodes); - render_nodes(&mut html, &mut refs, &ast.nodes); + render_toc(&mut html, &ast.nodes, &config); + render_nodes(&mut html, &mut refs, &ast.nodes, &config); render_refs(&mut html, &refs); write!(&mut html, "").unwrap(); write!(&mut html, "").unwrap(); @@ -96,9 +120,6 @@ fn main() { break; } } - if i % 10000 == 0 { - eprintln!("{i}") - } if Some(i) == args.limit { break; } @@ -117,19 +138,19 @@ pub fn escape(text: &str) -> String { .replace("\"", """) } -fn render_nodes(html: &mut String, refs: &mut Vec, nodes: &Vec) { +fn render_nodes(html: &mut String, refs: &mut Vec, nodes: &Vec, config: &Config) { for n in nodes { - render_node(html, refs, n) + render_node(html, refs, n, config) } } -fn render_nodes_to_string(nodes: &Vec, refs: &mut Vec) -> String { +fn render_nodes_to_string(nodes: &Vec, refs: &mut Vec, config: &Config) -> String { let mut html = String::new(); - render_nodes(&mut html, refs, nodes); + render_nodes(&mut html, refs, nodes, config); return html; } -fn render_node(html: &mut String, refs: &mut Vec, n: &Node) { +fn render_node(html: &mut String, refs: &mut Vec, n: &Node, config: &Config) { use std::fmt::Write; match n { parse_wiki_text::Node::Bold { .. } => (), @@ -138,20 +159,31 @@ fn render_node(html: &mut String, refs: &mut Vec, n: &Node) { ordinal: _, target: _, .. - } => write!(html, "[todo]").unwrap(), + } => { + if config.show_todo { + write!(html, "[todo: category]").unwrap() + } + } parse_wiki_text::Node::CharacterEntity { character: _, .. } => { - write!(html, "[todo: character]").unwrap() + if config.show_todo { + write!(html, "[todo: character]").unwrap() + } } parse_wiki_text::Node::Comment { .. } => (), parse_wiki_text::Node::DefinitionList { items: _, .. } => { - write!(html, "[todo: def list]").unwrap() + if config.show_todo { + write!(html, "[todo: def list]").unwrap() + } } - parse_wiki_text::Node::EndTag { name: _, .. } => write!(html, "[todo: tag end]").unwrap(), + parse_wiki_text::Node::EndTag { name: _, .. } => (), //write!(html, "[todo: tag end]").unwrap(), + parse_wiki_text::Node::StartTag { name: _, .. } => (), // write!(html, "[todo: start tag]").unwrap() parse_wiki_text::Node::ExternalLink { nodes: _, .. } => { - write!(html, "[todo: external link]").unwrap() + if config.show_todo { + write!(html, "[todo: external link]").unwrap() + } } parse_wiki_text::Node::Heading { level, nodes, .. } => { - let h = render_nodes_to_string(nodes, refs); + let h = render_nodes_to_string(nodes, refs, config); write!(html, "{}", urlencode(&h), h).unwrap(); } parse_wiki_text::Node::HorizontalDivider { .. } => write!(html, "
").unwrap(), @@ -163,16 +195,23 @@ fn render_node(html: &mut String, refs: &mut Vec, n: &Node) { html, "{}", urlencode(target), // TODO does this always link to wikipedia? - render_nodes_to_string(text, refs) + render_nodes_to_string(text, refs, config) ) .unwrap(), - parse_wiki_text::Node::MagicWord { .. } => write!(html, "[todo: magic]").unwrap(), + parse_wiki_text::Node::MagicWord { .. } => { + if config.show_todo { + write!(html, "[todo: magic]").unwrap() + } + } parse_wiki_text::Node::OrderedList { items, .. } => write!( html, "
    {}
", items .iter() - .map(|e| format!("
  • {}
  • ", render_nodes_to_string(&e.nodes, refs))) + .map(|e| format!( + "
  • {}
  • ", + render_nodes_to_string(&e.nodes, refs, config) + )) .collect::>() .join("") ) @@ -182,7 +221,10 @@ fn render_node(html: &mut String, refs: &mut Vec, n: &Node) { "
      {}
    ", items .iter() - .map(|e| format!("
  • {}
  • ", render_nodes_to_string(&e.nodes, refs))) + .map(|e| format!( + "
  • {}
  • ", + render_nodes_to_string(&e.nodes, refs, config) + )) .collect::>() .join("") ) @@ -192,10 +234,17 @@ fn render_node(html: &mut String, refs: &mut Vec, n: &Node) { default: _, name: _, .. - } => write!(html, "[todo: parameter]").unwrap(), - parse_wiki_text::Node::Preformatted { nodes, .. } => { - write!(html, "
    {}
    ", render_nodes_to_string(nodes, refs)).unwrap() + } => { + if config.show_todo { + write!(html, "[todo: parameter]").unwrap() + } } + parse_wiki_text::Node::Preformatted { nodes, .. } => write!( + html, + "
    {}
    ", + render_nodes_to_string(nodes, refs, config) + ) + .unwrap(), parse_wiki_text::Node::Redirect { target, .. } => write!( html, "Redirect: {}", @@ -203,9 +252,7 @@ fn render_node(html: &mut String, refs: &mut Vec, n: &Node) { urlencode(target) ) .unwrap(), - parse_wiki_text::Node::StartTag { name: _, .. } => { - write!(html, "[todo: start tag]").unwrap() - } + parse_wiki_text::Node::Table { attributes: _, captions: _, @@ -215,13 +262,25 @@ fn render_node(html: &mut String, refs: &mut Vec, n: &Node) { parse_wiki_text::Node::Tag { name, nodes, .. } => match name.as_ref() { "ref" => { if !nodes.is_empty() { - let r = render_nodes_to_string(nodes, refs); + let r = render_nodes_to_string(nodes, refs, config); refs.push(r); let refid = refs.len(); write!(html, "[{}]", refid, refid).unwrap(); } } - _ => write!(html, "[todo: {name:?} tag]").unwrap(), + "math" => { + let r = render_nodes_to_string(nodes, refs, config); + write!(html, "math: {}", r).unwrap(); + } + "syntaxhighlight" => { + let r = render_nodes_to_string(nodes, refs, config); + write!(html, "
    {}
    ", r).unwrap(); + } + _ => { + if config.show_todo { + write!(html, "[todo: {name:?} tag]").unwrap() + } + } }, parse_wiki_text::Node::Template { name, parameters, .. @@ -230,13 +289,13 @@ fn render_node(html: &mut String, refs: &mut Vec, n: &Node) { Some(Node::Text { value, .. }) => value, _ => "", }; - render_template(html, refs, name, parameters); + render_template(html, refs, name, parameters, config); } parse_wiki_text::Node::Text { value, .. } => write!(html, "{}", escape(value)).unwrap(), } } -fn render_toc(html: &mut String, nodes: &Vec) { +fn render_toc(html: &mut String, nodes: &Vec, config: &Config) { use std::fmt::Write; write!(html, "

    Table of contents

    ").unwrap(); let mut k = 0; @@ -252,7 +311,7 @@ fn render_toc(html: &mut String, nodes: &Vec) { k -= 1; write!(html, "").unwrap(); } - let h = render_nodes_to_string(nodes, &mut vec![]); + let h = render_nodes_to_string(nodes, &mut vec![], config); write!(html, "
  • {}
  • ", urlencode(&h), h).unwrap(); } @@ -262,51 +321,35 @@ fn render_toc(html: &mut String, nodes: &Vec) { write!(html, "
    ").unwrap(); } -pub fn render_template( +fn render_template( html: &mut String, refs: &mut Vec, name: &str, params: &Vec, + config: &Config, ) -> Option<()> { use std::fmt::Write; - match name { + match name.to_lowercase().as_str() { + "internetquelle" | "literatur" | "citation" | "man" | "external link" => { + generic_template(html, name, params, config) + } + s if s.starts_with("cite ") => generic_template(html, name, params, config), + // TODO this can panic - "lang" => write!( + "lang" | "transliteration" => write!( html, "{}", - render_nodes_to_string(¶ms.get(1)?.value, refs) + render_nodes_to_string(¶ms.get(1)?.value, refs, config) ) .unwrap(), - "IPA" => write!( + s if s.starts_with("IPA") => write!( html, "{}", - render_nodes_to_string(¶ms.get(0)?.value, refs) + render_nodes_to_string(¶ms.get(0)?.value, refs, config) ) .unwrap(), - "Internetquelle" | "Literatur" => { - write!(html, "{}:
      ", escape(name)).unwrap(); - for p in params { - let key = p - .name - .as_ref() - .map(|n| render_nodes_to_string(n, &mut vec![])) - .unwrap_or(String::from("??")); - let value = render_nodes_to_string(&p.value, &mut vec![]); - if let "url" | "archiv-url" | "Online" = key.as_str() { - write!( - html, - "
    • {}: {}
    • ", - key, value, value - ) - } else { - write!(html, "
    • {}: {}
    • ", key, value) - } - .unwrap() - } - write!(html, "
    ").unwrap(); - } - "Siehe auch" | "Hauptartikel" => { + "Siehe auch" | "Hauptartikel" | "main" => { let k = text_node(params.get(0)?.value.get(0)?); write!( html, @@ -318,8 +361,12 @@ pub fn render_template( .unwrap(); } + "sfn" => {} + _ => { - write!(html, "[todo: {name:?} template]").unwrap(); + if config.show_todo { + write!(html, "[todo: {name:?} template]").unwrap(); + } // write!(html, "[todo: {name:?} template
    {params:#?}
    ]").unwrap(); // eprintln!("unsupported template {name:?}"); // eprintln!("{params:?}"); @@ -328,6 +375,30 @@ pub fn render_template( Some(()) } +fn generic_template(html: &mut String, name: &str, params: &Vec, config: &Config) { + use std::fmt::Write; + write!(html, "{}:
      ", escape(name)).unwrap(); + for p in params { + let key = p + .name + .as_ref() + .map(|n| render_nodes_to_string(n, &mut vec![], config)) + .unwrap_or(String::from("??")); + let value = render_nodes_to_string(&p.value, &mut vec![], config); + if let "url" | "archiv-url" | "Online" | "archive-url" = key.as_str() { + write!( + html, + "
    • {}: {}
    • ", + key, value, value + ) + } else { + write!(html, "
    • {}: {}
    • ", key, value) + } + .unwrap() + } + write!(html, "
    ").unwrap(); +} + pub fn text_node(n: &Node) -> String { match n { Node::Text { value, .. } => value.to_string(), -- cgit v1.2.3-70-g09d2