use clap::Parser;
use parse_wiki_text::{Node, Parameter};
use std::fs::File;
use std::io::{stdin, stdout, Read, Write};
use tar::Header;
extern crate bzip2;
extern crate parse_mediawiki_dump;
#[derive(Parser)]
struct Args {
/// Stop after n articles (for debugging)
#[arg(short, long)]
limit: Option{}
", escape(&page.title)).unwrap();
render_toc(&mut html, &ast.nodes);
render_nodes(&mut html, &mut refs, &ast.nodes);
render_refs(&mut html, &refs);
write!(&mut html, "").unwrap();
if args.tar {
let mut header = Header::new_gnu();
header.set_size(html.as_bytes().len() as u64);
header.set_cksum();
archive
.append_data(&mut header, filename, html.as_bytes())
.unwrap();
} else {
let mut f = File::create(format!("out/{}", filename)).unwrap();
f.write_all(html.as_bytes()).unwrap()
}
} else {
eprintln!("page ignored: {:?}", page.title);
}
}
}
if Some(i) == args.limit {
break;
}
}
archive.finish().unwrap();
}
fn urlencode(t: &str) -> String {
t.replace("/", "_")
}
pub fn escape(text: &str) -> String {
text.replace("&", "&")
.replace("<", "<")
.replace(">", ">")
.replace("'", "’")
.replace("\"", """)
}
fn render_nodes(html: &mut String, refs: &mut Vec
").unwrap(),
parse_wiki_text::Node::Image {
target: _, text: _, ..
} => write!(html, "[todo: image]").unwrap(),
parse_wiki_text::Node::Italic { .. } => (),
parse_wiki_text::Node::Link { target, text, .. } => write!(
html,
"{}",
urlencode(target), // TODO does this always link to wikipedia?
render_nodes_to_string(text, refs)
)
.unwrap(),
parse_wiki_text::Node::MagicWord { .. } => write!(html, "[todo: magic]").unwrap(),
parse_wiki_text::Node::OrderedList { items, .. } => write!(
html,
"{}
",
items
.iter()
.map(|e| format!("{}
",
items
.iter()
.map(|e| format!("
").unwrap(), parse_wiki_text::Node::Parameter { default: _, name: _, .. } => write!(html, "[todo: parameter]").unwrap(), parse_wiki_text::Node::Preformatted { nodes, .. } => { write!(html, "
{}", render_nodes_to_string(nodes, refs)).unwrap() } parse_wiki_text::Node::Redirect { target, .. } => write!( html, "Redirect: {}", urlencode(target), urlencode(target) ) .unwrap(), parse_wiki_text::Node::StartTag { name: _, .. } => { write!(html, "[todo: start tag]").unwrap() } parse_wiki_text::Node::Table { attributes: _, captions: _, rows: _, .. } => write!(html, "[todo: table]").unwrap(), parse_wiki_text::Node::Tag { name, nodes, .. } => match name.as_ref() { "ref" => { if !nodes.is_empty() { let r = render_nodes_to_string(nodes, refs); refs.push(r); let refid = refs.len(); write!(html, "[{}]", refid, refid).unwrap(); } } _ => write!(html, "[todo: tag {name:?} template]").unwrap(), }, parse_wiki_text::Node::Template { name, parameters, .. } => { let name = match name.first() { Some(Node::Text { value, .. }) => value, _ => panic!("no"), }; render_template(html, refs, name, parameters) } parse_wiki_text::Node::Text { value, .. } => write!(html, "{}", escape(value)).unwrap(), } } fn render_toc(html: &mut String, nodes: &Vec
{}
",
render_nodes_to_string(¶ms[0].value, refs)
)
.unwrap(),
_ => {
write!(html, "[todo: {name:?} template]").unwrap();
// eprintln!("unsupported template {name:?}");
// eprintln!("{params:?}");
}
}
}
fn render_refs(html: &mut String, refs: &Vec