aboutsummaryrefslogtreecommitdiff
path: root/src/main.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/main.rs')
-rw-r--r--src/main.rs197
1 files changed, 134 insertions, 63 deletions
diff --git a/src/main.rs b/src/main.rs
index 34b00bb..53292cd 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,9 @@
use clap::Parser;
use parse_wiki_text::{Node, Parameter};
use std::fs::File;
-use std::io::{stdin, stdout, Read, Write};
+use std::io::{stderr, stdin, stdout, BufWriter, Read, Write};
+use std::sync::{Arc, Mutex};
+use std::thread;
use tar::Header;
extern crate bzip2;
@@ -21,10 +23,16 @@ struct Args {
/// Show non-fatal warnings
#[arg(short, long)]
verbose: bool,
+ #[arg(short, long)]
+ show_todo: bool,
#[arg(short, long, default_value = "")]
footer: String,
}
+struct Config {
+ show_todo: bool,
+}
+
fn main() {
let args = Args::parse();
@@ -34,16 +42,32 @@ fn main() {
}
let input = std::io::BufReader::new(input);
- let mut archive = tar::Builder::new(stdout());
+ let mut archive = tar::Builder::new(BufWriter::new(stdout()));
let footer = format!(
"<a href=\"https://codeberg.org/metamuffin/staticwiki\">staticwiki</a>; {}",
args.footer
);
+ let progress = Arc::new(Mutex::new((0, String::new())));
+ {
+ let progress = progress.clone();
+ thread::spawn(move || loop {
+ let (i, name) = progress.lock().unwrap().clone();
+ eprint!("\r\x1b[2Karticles processed: {i}, current: {name}",);
+ stderr().flush().unwrap();
+ std::thread::sleep(std::time::Duration::from_millis(1000 / 30));
+ });
+ }
+
+ let config = Config {
+ show_todo: args.show_todo,
+ };
+
for (i, result) in parse_mediawiki_dump::parse(input).enumerate() {
match result {
Ok(page) => {
+ *progress.lock().unwrap() = (i, page.title.clone());
if page.namespace == 0
&& match &page.format {
None => false,
@@ -67,8 +91,8 @@ fn main() {
let mut html = String::from("<!DOCTYPE html><html><head>");
write!(&mut html, "<title>{}</title></head>", escape(&page.title)).unwrap();
write!(&mut html, "<body><h1>{}</h1>", escape(&page.title)).unwrap();
- render_toc(&mut html, &ast.nodes);
- render_nodes(&mut html, &mut refs, &ast.nodes);
+ render_toc(&mut html, &ast.nodes, &config);
+ render_nodes(&mut html, &mut refs, &ast.nodes, &config);
render_refs(&mut html, &refs);
write!(&mut html, "<footer>{footer}</footer>").unwrap();
write!(&mut html, "</body></html>").unwrap();
@@ -96,9 +120,6 @@ fn main() {
break;
}
}
- if i % 10000 == 0 {
- eprintln!("{i}")
- }
if Some(i) == args.limit {
break;
}
@@ -117,19 +138,19 @@ pub fn escape(text: &str) -> String {
.replace("\"", "&quot;")
}
-fn render_nodes(html: &mut String, refs: &mut Vec<String>, nodes: &Vec<Node>) {
+fn render_nodes(html: &mut String, refs: &mut Vec<String>, nodes: &Vec<Node>, config: &Config) {
for n in nodes {
- render_node(html, refs, n)
+ render_node(html, refs, n, config)
}
}
-fn render_nodes_to_string(nodes: &Vec<Node>, refs: &mut Vec<String>) -> String {
+fn render_nodes_to_string(nodes: &Vec<Node>, refs: &mut Vec<String>, config: &Config) -> String {
let mut html = String::new();
- render_nodes(&mut html, refs, nodes);
+ render_nodes(&mut html, refs, nodes, config);
return html;
}
-fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
+fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node, config: &Config) {
use std::fmt::Write;
match n {
parse_wiki_text::Node::Bold { .. } => (),
@@ -138,20 +159,31 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
ordinal: _,
target: _,
..
- } => write!(html, "[todo]").unwrap(),
+ } => {
+ if config.show_todo {
+ write!(html, "[todo: category]").unwrap()
+ }
+ }
parse_wiki_text::Node::CharacterEntity { character: _, .. } => {
- write!(html, "[todo: character]").unwrap()
+ if config.show_todo {
+ write!(html, "[todo: character]").unwrap()
+ }
}
parse_wiki_text::Node::Comment { .. } => (),
parse_wiki_text::Node::DefinitionList { items: _, .. } => {
- write!(html, "[todo: def list]").unwrap()
+ if config.show_todo {
+ write!(html, "[todo: def list]").unwrap()
+ }
}
- parse_wiki_text::Node::EndTag { name: _, .. } => write!(html, "[todo: tag end]").unwrap(),
+ parse_wiki_text::Node::EndTag { name: _, .. } => (), //write!(html, "[todo: tag end]").unwrap(),
+ parse_wiki_text::Node::StartTag { name: _, .. } => (), // write!(html, "[todo: start tag]").unwrap()
parse_wiki_text::Node::ExternalLink { nodes: _, .. } => {
- write!(html, "[todo: external link]").unwrap()
+ if config.show_todo {
+ write!(html, "[todo: external link]").unwrap()
+ }
}
parse_wiki_text::Node::Heading { level, nodes, .. } => {
- let h = render_nodes_to_string(nodes, refs);
+ let h = render_nodes_to_string(nodes, refs, config);
write!(html, "<h{level} id=\"{}\">{}</h{level}>", urlencode(&h), h).unwrap();
}
parse_wiki_text::Node::HorizontalDivider { .. } => write!(html, "<hr>").unwrap(),
@@ -163,16 +195,23 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
html,
"<a href=\"{}\">{}</a>",
urlencode(target), // TODO does this always link to wikipedia?
- render_nodes_to_string(text, refs)
+ render_nodes_to_string(text, refs, config)
)
.unwrap(),
- parse_wiki_text::Node::MagicWord { .. } => write!(html, "[todo: magic]").unwrap(),
+ parse_wiki_text::Node::MagicWord { .. } => {
+ if config.show_todo {
+ write!(html, "[todo: magic]").unwrap()
+ }
+ }
parse_wiki_text::Node::OrderedList { items, .. } => write!(
html,
"<ol>{}</ol>",
items
.iter()
- .map(|e| format!("<li>{}</li>", render_nodes_to_string(&e.nodes, refs)))
+ .map(|e| format!(
+ "<li>{}</li>",
+ render_nodes_to_string(&e.nodes, refs, config)
+ ))
.collect::<Vec<_>>()
.join("")
)
@@ -182,7 +221,10 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
"<ul>{}</ul>",
items
.iter()
- .map(|e| format!("<li>{}</li>", render_nodes_to_string(&e.nodes, refs)))
+ .map(|e| format!(
+ "<li>{}</li>",
+ render_nodes_to_string(&e.nodes, refs, config)
+ ))
.collect::<Vec<_>>()
.join("")
)
@@ -192,10 +234,17 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
default: _,
name: _,
..
- } => write!(html, "[todo: parameter]").unwrap(),
- parse_wiki_text::Node::Preformatted { nodes, .. } => {
- write!(html, "<pre>{}</pre>", render_nodes_to_string(nodes, refs)).unwrap()
+ } => {
+ if config.show_todo {
+ write!(html, "[todo: parameter]").unwrap()
+ }
}
+ parse_wiki_text::Node::Preformatted { nodes, .. } => write!(
+ html,
+ "<pre>{}</pre>",
+ render_nodes_to_string(nodes, refs, config)
+ )
+ .unwrap(),
parse_wiki_text::Node::Redirect { target, .. } => write!(
html,
"Redirect: <a href=\"{}\">{}</a>",
@@ -203,9 +252,7 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
urlencode(target)
)
.unwrap(),
- parse_wiki_text::Node::StartTag { name: _, .. } => {
- write!(html, "[todo: start tag]").unwrap()
- }
+
parse_wiki_text::Node::Table {
attributes: _,
captions: _,
@@ -215,13 +262,25 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
parse_wiki_text::Node::Tag { name, nodes, .. } => match name.as_ref() {
"ref" => {
if !nodes.is_empty() {
- let r = render_nodes_to_string(nodes, refs);
+ let r = render_nodes_to_string(nodes, refs, config);
refs.push(r);
let refid = refs.len();
write!(html, "<sup><a href=\"#{}\">[{}]</a></sup>", refid, refid).unwrap();
}
}
- _ => write!(html, "[todo: {name:?} tag]").unwrap(),
+ "math" => {
+ let r = render_nodes_to_string(nodes, refs, config);
+ write!(html, "math: <code>{}</code>", r).unwrap();
+ }
+ "syntaxhighlight" => {
+ let r = render_nodes_to_string(nodes, refs, config);
+ write!(html, "<pre>{}</pre>", r).unwrap();
+ }
+ _ => {
+ if config.show_todo {
+ write!(html, "[todo: {name:?} tag]").unwrap()
+ }
+ }
},
parse_wiki_text::Node::Template {
name, parameters, ..
@@ -230,13 +289,13 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
Some(Node::Text { value, .. }) => value,
_ => "",
};
- render_template(html, refs, name, parameters);
+ render_template(html, refs, name, parameters, config);
}
parse_wiki_text::Node::Text { value, .. } => write!(html, "{}", escape(value)).unwrap(),
}
}
-fn render_toc(html: &mut String, nodes: &Vec<Node>) {
+fn render_toc(html: &mut String, nodes: &Vec<Node>, config: &Config) {
use std::fmt::Write;
write!(html, "<div><h4><i>Table of contents</i></h4>").unwrap();
let mut k = 0;
@@ -252,7 +311,7 @@ fn render_toc(html: &mut String, nodes: &Vec<Node>) {
k -= 1;
write!(html, "</ol>").unwrap();
}
- let h = render_nodes_to_string(nodes, &mut vec![]);
+ let h = render_nodes_to_string(nodes, &mut vec![], config);
write!(html, "<li><a href=\"#{}\">{}</a></li>", urlencode(&h), h).unwrap();
}
@@ -262,51 +321,35 @@ fn render_toc(html: &mut String, nodes: &Vec<Node>) {
write!(html, "</div>").unwrap();
}
-pub fn render_template(
+fn render_template(
html: &mut String,
refs: &mut Vec<String>,
name: &str,
params: &Vec<Parameter>,
+ config: &Config,
) -> Option<()> {
use std::fmt::Write;
- match name {
+ match name.to_lowercase().as_str() {
+ "internetquelle" | "literatur" | "citation" | "man" | "external link" => {
+ generic_template(html, name, params, config)
+ }
+ s if s.starts_with("cite ") => generic_template(html, name, params, config),
+
// TODO this can panic
- "lang" => write!(
+ "lang" | "transliteration" => write!(
html,
"{}",
- render_nodes_to_string(&params.get(1)?.value, refs)
+ render_nodes_to_string(&params.get(1)?.value, refs, config)
)
.unwrap(),
- "IPA" => write!(
+ s if s.starts_with("IPA") => write!(
html,
"<code>{}</code>",
- render_nodes_to_string(&params.get(0)?.value, refs)
+ render_nodes_to_string(&params.get(0)?.value, refs, config)
)
.unwrap(),
- "Internetquelle" | "Literatur" => {
- write!(html, "{}: <ul>", escape(name)).unwrap();
- for p in params {
- let key = p
- .name
- .as_ref()
- .map(|n| render_nodes_to_string(n, &mut vec![]))
- .unwrap_or(String::from("??"));
- let value = render_nodes_to_string(&p.value, &mut vec![]);
- if let "url" | "archiv-url" | "Online" = key.as_str() {
- write!(
- html,
- "<li>{}: <a href=\"{}\">{}</a></li>",
- key, value, value
- )
- } else {
- write!(html, "<li>{}: {}</li>", key, value)
- }
- .unwrap()
- }
- write!(html, "</ul>").unwrap();
- }
- "Siehe auch" | "Hauptartikel" => {
+ "Siehe auch" | "Hauptartikel" | "main" => {
let k = text_node(params.get(0)?.value.get(0)?);
write!(
html,
@@ -318,8 +361,12 @@ pub fn render_template(
.unwrap();
}
+ "sfn" => {}
+
_ => {
- write!(html, "[todo: {name:?} template]").unwrap();
+ if config.show_todo {
+ write!(html, "[todo: {name:?} template]").unwrap();
+ }
// write!(html, "[todo: {name:?} template <pre>{params:#?}</pre>]").unwrap();
// eprintln!("unsupported template {name:?}");
// eprintln!("{params:?}");
@@ -328,6 +375,30 @@ pub fn render_template(
Some(())
}
+fn generic_template(html: &mut String, name: &str, params: &Vec<Parameter>, config: &Config) {
+ use std::fmt::Write;
+ write!(html, "{}: <ul>", escape(name)).unwrap();
+ for p in params {
+ let key = p
+ .name
+ .as_ref()
+ .map(|n| render_nodes_to_string(n, &mut vec![], config))
+ .unwrap_or(String::from("??"));
+ let value = render_nodes_to_string(&p.value, &mut vec![], config);
+ if let "url" | "archiv-url" | "Online" | "archive-url" = key.as_str() {
+ write!(
+ html,
+ "<li>{}: <a href=\"{}\">{}</a></li>",
+ key, value, value
+ )
+ } else {
+ write!(html, "<li>{}: {}</li>", key, value)
+ }
+ .unwrap()
+ }
+ write!(html, "</ul>").unwrap();
+}
+
pub fn text_node(n: &Node) -> String {
match n {
Node::Text { value, .. } => value.to_string(),