aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2022-09-25 18:29:22 +0200
committermetamuffin <metamuffin@disroot.org>2022-09-25 18:29:22 +0200
commite3edf18503b3975ccec3b33c0cb9e7f0888bd031 (patch)
tree9b8d795bf5e0ff6e0f5cdd882cef07d495f73a72
parenta80b5c677417cdbc17df3109ef9d12afe79973cc (diff)
downloadmetamuffin-blog-e3edf18503b3975ccec3b33c0cb9e7f0888bd031.tar
metamuffin-blog-e3edf18503b3975ccec3b33c0cb9e7f0888bd031.tar.bz2
metamuffin-blog-e3edf18503b3975ccec3b33c0cb9e7f0888bd031.tar.zst
extend parser
-rw-r--r--code/Cargo.lock24
-rw-r--r--code/Cargo.toml1
-rw-r--r--code/src/atom.rs2
-rw-r--r--code/src/html.rs6
-rw-r--r--code/src/markdown/parser.rs188
-rw-r--r--code/src/markdown/render.rs30
-rw-r--r--code/src/syntax_highlight/grammar.rs2
-rw-r--r--code/src/syntax_highlight/mod.rs2
-rw-r--r--readme.md11
9 files changed, 215 insertions, 51 deletions
diff --git a/code/Cargo.lock b/code/Cargo.lock
index 0bf0ca0..a41b2d4 100644
--- a/code/Cargo.lock
+++ b/code/Cargo.lock
@@ -48,7 +48,6 @@ dependencies = [
"clap",
"iso8601",
"laby",
- "markdown",
"synoptic",
]
@@ -171,29 +170,12 @@ dependencies = [
]
[[package]]
-name = "lazy_static"
-version = "1.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
-
-[[package]]
name = "libc"
version = "0.2.132"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
[[package]]
-name = "markdown"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef3aab6a1d529b112695f72beec5ee80e729cb45af58663ec902c8fac764ecdd"
-dependencies = [
- "lazy_static",
- "pipeline",
- "regex",
-]
-
-[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -228,12 +210,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff"
[[package]]
-name = "pipeline"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d15b6607fa632996eb8a17c9041cb6071cb75ac057abd45dece578723ea8c7c0"
-
-[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/code/Cargo.toml b/code/Cargo.toml
index c528649..c2081a5 100644
--- a/code/Cargo.toml
+++ b/code/Cargo.toml
@@ -7,6 +7,5 @@ edition = "2021"
laby = "0.2.4" # html generation macros
clap = { version = "3.2.17", features = ["derive"] } # bloated argument parser
anyhow = "1.0.62" # error stuff
-markdown = "0.3.0" # markdown parser
iso8601 = "0.5.0" # date parsing
synoptic = "1.2.0" # syntax highlighting
diff --git a/code/src/atom.rs b/code/src/atom.rs
index 7912b97..c231c94 100644
--- a/code/src/atom.rs
+++ b/code/src/atom.rs
@@ -1,4 +1,4 @@
-use crate::{get_articles, markdown::escape, Args, ArticleMeta, BLOG_BASE};
+use crate::{get_articles, markdown::render::escape, Args, ArticleMeta, BLOG_BASE};
use std::process::{Command, Stdio};
pub fn generate_atom(args: &Args) -> String {
diff --git a/code/src/html.rs b/code/src/html.rs
index 5bcc245..d310ad3 100644
--- a/code/src/html.rs
+++ b/code/src/html.rs
@@ -1,4 +1,6 @@
-use crate::{article_metadata, file_history, get_articles, markdown::blocks_to_html, ArticleMeta};
+use crate::{
+ article_metadata, file_history, get_articles, markdown::{render::blocks_to_html, self}, ArticleMeta,
+};
use laby::{frag, html, iter, li, ul, Render};
use std::fs::read_to_string;
@@ -26,7 +28,7 @@ pub fn article(path: String) -> impl Render {
scaffold(
article_metadata(path.clone().into()).title,
frag!(
- laby::raw!(blocks_to_html(markdown::tokenize(
+ laby::raw!(blocks_to_html(markdown::parser::parse(
&read_to_string(&path).unwrap()
))),
hr!(),
diff --git a/code/src/markdown/parser.rs b/code/src/markdown/parser.rs
index e69de29..cb025a6 100644
--- a/code/src/markdown/parser.rs
+++ b/code/src/markdown/parser.rs
@@ -0,0 +1,188 @@
+#[derive(Debug, Clone)]
+pub enum Block {
+ Header(usize, Vec<Span>),
+ Paragraph(Vec<Span>),
+ Blockquote(Vec<Block>),
+ CodeBlock(Option<String>, String),
+ LatexBlock(String),
+ OrderedList(Vec<Vec<Block>>),
+ UnorderedList(Vec<Vec<Block>>),
+ Raw(String),
+ Hr,
+}
+#[derive(Debug, Clone)]
+pub enum Span {
+ Break,
+ Text(String),
+ Code(String),
+ Link(String, String),
+ Image(String, String),
+ Emphasis(Vec<Span>),
+ Strong(Vec<Span>),
+ Latex(String),
+}
+
+pub fn parse(mut s: &str) -> Vec<Block> {
+ let mut blocks = Vec::new();
+ while s.len() != 0 {
+ if s.starts_with("\n") {
+ s = &s[1..];
+ continue;
+ }
+ // TODO bad code here
+ if let Some((block, rest)) = try_header(s) {
+ s = rest;
+ blocks.push(block);
+ continue;
+ }
+ if let Some((block, rest)) = try_latex_block(s) {
+ s = rest;
+ blocks.push(block);
+ continue;
+ }
+ if let Some((block, rest)) = try_code_block(s) {
+ s = rest;
+ blocks.push(block);
+ continue;
+ }
+ if let Some((block, rest)) = try_list(s) {
+ s = rest;
+ blocks.push(block);
+ continue;
+ }
+ let lf = [s.find("\n\n"), s.find("\n-")]
+ .iter()
+ .filter_map(|e| *e)
+ .min()
+ .unwrap_or(s.len());
+ let span = Span::parse(&s[..lf]);
+ blocks.push(Block::Paragraph(span));
+ if lf >= s.len() {
+ break;
+ }
+ s = &s[lf + 1..];
+ }
+ blocks
+}
+
+fn try_code_block(mut s: &str) -> Option<(Block, &str)> {
+ if !s.starts_with("```") {
+ return None;
+ }
+ s = &s[3..];
+ let lf = s.find('\n')?;
+ let syntax = if lf != 0 {
+ Some(String::from(&s[0..lf]))
+ } else {
+ None
+ };
+ s = &s[lf..];
+ let end = s.find("\n```\n")?;
+ Some((
+ Block::CodeBlock(syntax, String::from(&s[..end])),
+ &s[end + 4..],
+ ))
+}
+fn try_latex_block(mut s: &str) -> Option<(Block, &str)> {
+ if !s.starts_with("$$") {
+ return None;
+ }
+ s = &s[2..];
+ let end = s.find("$$")?;
+ Some((Block::LatexBlock(String::from(&s[..end])), &s[end + 2..]))
+}
+fn try_list(mut s: &str) -> Option<(Block, &str)> {
+ if !s.starts_with("-") {
+ return None;
+ };
+ let mut blocks = vec![];
+ loop {
+ if !s.starts_with("-") || s.len() == 0 {
+ break Some((Block::UnorderedList(blocks), s));
+ }
+ s = &s[1..];
+ let mut lf = s.find("\n").unwrap();
+ while s[lf + 1..].starts_with(" -") {
+ lf += 2 + s[lf + 2..].find("\n").unwrap();
+ }
+ eprintln!("{:?}", &s[..lf]);
+ let mut k = s[..lf]
+ .split("\n")
+ .map(|l| if l.starts_with(" ") { &l[2..] } else { &l })
+ .collect::<Vec<_>>()
+ .join("\n");
+ k.push('\n');
+ blocks.push(parse(&k));
+ s = &s[lf + 1..];
+ }
+}
+fn try_header(s: &str) -> Option<(Block, &str)> {
+ if s.starts_with("#") {
+ let mut u = 0;
+ while s.chars().nth(u)? == '#' {
+ u += 1;
+ }
+ let lf = s.find('\n')?;
+ Some((Block::Header(u, Span::parse(&s[u..lf])), &s[lf + 1..]))
+ } else {
+ None
+ }
+}
+
+impl Span {
+ pub fn parse(mut s: &str) -> Vec<Span> {
+ let mut spans = Vec::new();
+ while s.len() != 0 {
+ let nt = s.find(&['*', '_', '`', '[', '$']);
+
+ if let Some(nt) = nt {
+ spans.push(Span::Text(String::from(&s[..nt])));
+ s = &s[nt..];
+ if s.starts_with("**") {
+ s = &s[2..];
+ let end = s.find("**").expect("** not ended");
+ spans.push(Span::Strong(Span::parse(&s[..end])));
+ s = &s[end + 2..];
+ continue;
+ }
+ if s.starts_with("_") {
+ s = &s[1..];
+ let end = s.find("_").expect("_ not ended");
+ spans.push(Span::Emphasis(Span::parse(&s[..end])));
+ s = &s[end + 1..];
+ continue;
+ }
+ if s.starts_with("`") {
+ s = &s[1..];
+ let end = s.find("`").expect("` not ended");
+ spans.push(Span::Code(String::from(&s[..end])));
+ s = &s[end + 1..];
+ continue;
+ }
+ if s.starts_with("$") {
+ s = &s[1..];
+ let end = s.find("$").expect("$ not ended");
+ spans.push(Span::Latex(String::from(&s[..end])));
+ s = &s[end + 1..];
+ continue;
+ }
+ if s.starts_with("[") {
+ s = &s[1..];
+ let del = s.find("](").expect("]( expected");
+ let end = del + s[del..].find(")").expect(") expected");
+ spans.push(Span::Link(
+ String::from(&s[..del]),
+ String::from(&s[del + 2..end]),
+ ));
+ s = &s[end + 1..];
+ continue;
+ }
+ panic!("{s:?}")
+ } else {
+ spans.push(Span::Text(String::from(s)));
+ break;
+ }
+ }
+ spans
+ }
+}
diff --git a/code/src/markdown/render.rs b/code/src/markdown/render.rs
index 22934c5..39f204f 100644
--- a/code/src/markdown/render.rs
+++ b/code/src/markdown/render.rs
@@ -1,7 +1,7 @@
-use markdown::{Block, ListItem, Span};
-
use crate::syntax_highlight::syntax_highlight;
+use super::parser::{Block, Span};
+
pub fn span_to_html(ss: Vec<Span>) -> String {
let mut out = String::new();
for s in ss {
@@ -9,12 +9,13 @@ pub fn span_to_html(ss: Vec<Span>) -> String {
Span::Break => format!("<br/>"),
Span::Text(t) => escape(&t),
Span::Code(c) => format!("<code>{}</code>", escape(&c)),
- Span::Link(text, url, _) => {
+ Span::Link(text, url) => {
format!("<a href=\"{}\">{}</a>", escape(&url), escape(&text))
}
- Span::Image(_, _, _) => todo!(),
+ Span::Image(_, _) => todo!(),
Span::Emphasis(c) => format!("<i>{}</i>", span_to_html(c)),
Span::Strong(c) => format!("<b>{}</b>", span_to_html(c)),
+ Span::Latex(_) => format!("TODO: Inline Latex"),
}
.as_str()
}
@@ -24,7 +25,7 @@ pub fn blocks_to_html(blocks: Vec<Block>) -> String {
let mut out = String::new();
for e in blocks {
out += match e {
- Block::Header(text, level) => {
+ Block::Header(level, text) => {
format!("<h{level}>{}</h{level}>", span_to_html(text))
}
Block::Paragraph(p) => format!("<p>{}</p>", span_to_html(p)),
@@ -36,16 +37,10 @@ pub fn blocks_to_html(blocks: Vec<Block>) -> String {
format!("<pre>{}</pre>", escape(&content))
}
}
- Block::OrderedList(els, _) => format!(
+ Block::OrderedList(els) => format!(
"<ol>{}</ol>",
els.into_iter()
- .map(|e| format!(
- "<li>{}</li>",
- match e {
- ListItem::Simple(s) => span_to_html(s),
- ListItem::Paragraph(b) => blocks_to_html(b),
- }
- ))
+ .map(|e| format!("<li>{}</li>", blocks_to_html(e)))
.collect::<Vec<_>>()
.join("")
),
@@ -53,19 +48,14 @@ pub fn blocks_to_html(blocks: Vec<Block>) -> String {
format!(
"<ul>{}</ul>",
els.into_iter()
- .map(|e| format!(
- "<li>{}</li>",
- match e {
- ListItem::Simple(s) => span_to_html(s),
- ListItem::Paragraph(b) => blocks_to_html(b),
- }
- ))
+ .map(|e| format!("<li>{}</li>", blocks_to_html(e)))
.collect::<Vec<_>>()
.join("")
)
}
Block::Raw(r) => r,
Block::Hr => format!("<hr/>"),
+ Block::LatexBlock(_) => format!("TODO: Latex block"),
}
.as_str();
}
diff --git a/code/src/syntax_highlight/grammar.rs b/code/src/syntax_highlight/grammar.rs
index 18ba9e2..0878b33 100644
--- a/code/src/syntax_highlight/grammar.rs
+++ b/code/src/syntax_highlight/grammar.rs
@@ -5,7 +5,7 @@ pub fn grammar_for(syntax: &str) -> &'static [(&'static str, &'static [&'static
"keyword",
&[
"fn", "pub", "async", "return", "if", "else", "let", "for", "in", "while",
- "loop", "impl", "for", "trait", "struct", "enum",
+ "loop", "impl", "for", "trait", "struct", "enum", "dyn",
],
),
(
diff --git a/code/src/syntax_highlight/mod.rs b/code/src/syntax_highlight/mod.rs
index 46bf2a0..62a2536 100644
--- a/code/src/syntax_highlight/mod.rs
+++ b/code/src/syntax_highlight/mod.rs
@@ -1,7 +1,7 @@
pub mod grammar;
pub mod theme;
-use crate::{markdown::escape, syntax_highlight::theme::theme};
+use crate::{markdown::render::escape, syntax_highlight::theme::theme};
use grammar::grammar_for;
use synoptic::{Highlighter, Token};
diff --git a/readme.md b/readme.md
index c96fee1..8417c90 100644
--- a/readme.md
+++ b/readme.md
@@ -1,6 +1,15 @@
# metamuffin's blog
-TODO
+My personal blog's source code.
+
+## Ideas
+
+## Todo (blog-tool)
+
+- proper markdown parsing
+- markdown images
+- parser error handling
+- syntax highlighting via AST from grammar
## License