aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2025-03-26 14:43:04 +0100
committermetamuffin <metamuffin@disroot.org>2025-03-26 14:43:04 +0100
commit4ca5624bd2a78d32085b9ab0ffdb5bf24840fec3 (patch)
treea0c0e9d485b75682a57887abe92616b4e7faed5c
parenteda0c12b4af72935c9fa6adef60aff754313f189 (diff)
downloadstaticwiki-master.tar
staticwiki-master.tar.bz2
staticwiki-master.tar.zst
disable todo by defaultHEADmaster
-rw-r--r--.gitignore1
-rw-r--r--Cargo.lock487
-rw-r--r--Cargo.toml10
-rw-r--r--src/main.rs197
4 files changed, 465 insertions, 230 deletions
diff --git a/.gitignore b/.gitignore
index 17e10f4..d7abd7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
/target
/data
/out
+/parse_wiki_text \ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index a091df4..6bae1dd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,12 +1,12 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
-version = 3
+version = 4
[[package]]
name = "addr2line"
-version = "0.17.0"
+version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9ecd88a8c8378ca913a680cd98f0f13ac67383d35993f86c90a70e3f137816b"
+checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678"
dependencies = [
"gimli",
]
@@ -18,21 +18,68 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
-name = "atty"
-version = "0.2.14"
+name = "aho-corasick"
+version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
- "hermit-abi",
- "libc",
- "winapi",
+ "memchr",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19"
+dependencies = [
+ "anstyle",
+ "windows-sys",
]
[[package]]
name = "backtrace"
-version = "0.3.66"
+version = "0.3.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cab84319d616cfb654d03394f38ab7e6f0919e181b1b57e1fd15e7fb4077d9a7"
+checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a"
dependencies = [
"addr2line",
"cc",
@@ -50,31 +97,35 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
+name = "bitflags"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
+
+[[package]]
name = "bzip2"
-version = "0.4.3"
+version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6afcd980b5f3a45017c57e57a2fcccbb351cc43a356ce117ef760ef8052b89b0"
+checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
dependencies = [
"bzip2-sys",
- "libc",
]
[[package]]
name = "bzip2-sys"
-version = "0.1.11+1.0.8"
+version = "0.1.13+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
+checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
dependencies = [
"cc",
- "libc",
"pkg-config",
]
[[package]]
name = "cc"
-version = "1.0.73"
+version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
+checksum = "ac367972e516d45567c7eafc73d24e1c193dcf200a8d94e9db7b3d38b349572d"
[[package]]
name = "cfg-if"
@@ -84,51 +135,93 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
-version = "4.0.0-rc.3"
+version = "4.5.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7233bf306993c874a6edc363281e83770889877c9d5ee7f656249c65d7e7aa62"
+checksum = "6088f3ae8c3608d19260cd7445411865a485688711b78b5be70d78cd96136f83"
dependencies = [
- "atty",
- "bitflags",
+ "clap_builder",
"clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22a7ef7f676155edfb82daa97f99441f3ebf4a58d5e32f295a56259f1b6facc8"
+dependencies = [
+ "anstream",
+ "anstyle",
"clap_lex",
- "once_cell",
"strsim",
- "termcolor",
]
[[package]]
name = "clap_derive"
-version = "4.0.0-rc.1"
+version = "4.5.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51eef4d62724bf369e9ca7458cfde0c55263708b4552020058fba384864e8c23"
+checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7"
dependencies = [
"heck",
- "proc-macro-error",
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.100",
]
[[package]]
name = "clap_lex"
-version = "0.3.0"
+version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8"
-dependencies = [
- "os_str_bytes",
-]
+checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
[[package]]
name = "encoding_rs"
-version = "0.8.31"
+version = "0.8.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b"
+checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
dependencies = [
"cfg-if",
]
[[package]]
+name = "env_filter"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
+dependencies = [
+ "log",
+ "regex",
+]
+
+[[package]]
+name = "env_logger"
+version = "0.11.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3716d7a920fb4fac5d84e9d4bce8ceb321e9414b4409da61b07b75c1e3d0697"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "env_filter",
+ "jiff",
+ "log",
+]
+
+[[package]]
+name = "errno"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
+dependencies = [
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
name = "failure"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -146,15 +239,15 @@ checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 1.0.109",
"synstructure",
]
[[package]]
name = "filetime"
-version = "0.2.17"
+version = "0.2.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e94a7bbaa59354bc20dd75b67f23e2797b4490e9d6928203fb105c79e448c86c"
+checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd"
dependencies = [
"cfg-if",
"libc",
@@ -164,77 +257,89 @@ dependencies = [
[[package]]
name = "gimli"
-version = "0.26.2"
+version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d"
+checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
[[package]]
name = "heck"
-version = "0.4.0"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
+checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
[[package]]
-name = "hermit-abi"
-version = "0.1.19"
+name = "jiff"
+version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+checksum = "c102670231191d07d37a35af3eb77f1f0dbf7a71be51a962dcd57ea607be7260"
dependencies = [
- "libc",
+ "jiff-static",
+ "log",
+ "portable-atomic",
+ "portable-atomic-util",
+ "serde",
+]
+
+[[package]]
+name = "jiff-static"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cdde31a9d349f1b1f51a0b3714a5940ac022976f4b49485fc04be052b183b4c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.100",
]
[[package]]
name = "libc"
-version = "0.2.133"
+version = "0.2.155"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966"
+checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "log"
-version = "0.4.17"
+version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
-dependencies = [
- "cfg-if",
-]
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "memchr"
-version = "2.5.0"
+version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "miniz_oxide"
-version = "0.5.4"
+version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34"
+checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08"
dependencies = [
"adler",
]
[[package]]
name = "object"
-version = "0.29.0"
+version = "0.36.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "21158b2c33aa6d4561f1c0a6ea283ca92bc54802a93b263e910746d679a7eb53"
+checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434"
dependencies = [
"memchr",
]
[[package]]
-name = "once_cell"
-version = "1.15.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
-
-[[package]]
-name = "os_str_bytes"
-version = "6.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff"
-
-[[package]]
name = "parse_mediawiki_dump"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -246,44 +351,33 @@ dependencies = [
[[package]]
name = "parse_wiki_text"
version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd59a245c58efa02bd73c9462fd9d4c28952d650092c6bcba43b60b4707171dd"
[[package]]
name = "pkg-config"
-version = "0.3.25"
+version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
+checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
[[package]]
-name = "proc-macro-error"
-version = "1.0.4"
+name = "portable-atomic"
+version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
-dependencies = [
- "proc-macro-error-attr",
- "proc-macro2",
- "quote",
- "syn",
- "version_check",
-]
+checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
[[package]]
-name = "proc-macro-error-attr"
-version = "1.0.4"
+name = "portable-atomic-util"
+version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
+checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
dependencies = [
- "proc-macro2",
- "quote",
- "version_check",
+ "portable-atomic",
]
[[package]]
name = "proc-macro2"
-version = "1.0.44"
+version = "1.0.94"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58"
+checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
dependencies = [
"unicode-ident",
]
@@ -302,27 +396,89 @@ dependencies = [
[[package]]
name = "quote"
-version = "1.0.21"
+version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
-version = "0.2.16"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
+dependencies = [
+ "bitflags 1.3.2",
+]
+
+[[package]]
+name = "regex"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
dependencies = [
- "bitflags",
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
]
[[package]]
+name = "regex-syntax"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+
+[[package]]
name = "rustc-demangle"
-version = "0.1.21"
+version = "0.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
+
+[[package]]
+name = "rustix"
+version = "0.38.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
+dependencies = [
+ "bitflags 2.6.0",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.100",
+]
[[package]]
name = "staticwiki"
@@ -330,6 +486,8 @@ version = "0.1.0"
dependencies = [
"bzip2",
"clap",
+ "env_logger",
+ "log",
"parse_mediawiki_dump",
"parse_wiki_text",
"tar",
@@ -337,15 +495,26 @@ dependencies = [
[[package]]
name = "strsim"
-version = "0.10.0"
+version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
-version = "1.0.101"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
+checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
dependencies = [
"proc-macro2",
"quote",
@@ -360,15 +529,15 @@ checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 1.0.109",
"unicode-xid",
]
[[package]]
name = "tar"
-version = "0.4.38"
+version = "0.4.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b55807c0344e1e6c04d7c965f5289c39a8d94ae23ed5c0b57aabac549f871c6"
+checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a"
dependencies = [
"filetime",
"libc",
@@ -376,19 +545,10 @@ dependencies = [
]
[[package]]
-name = "termcolor"
-version = "1.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
-dependencies = [
- "winapi-util",
-]
-
-[[package]]
name = "unicode-ident"
-version = "1.0.4"
+version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-xid"
@@ -397,90 +557,91 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
[[package]]
-name = "version_check"
-version = "0.9.4"
+name = "utf8parse"
+version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
-name = "winapi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
-dependencies = [
- "winapi-i686-pc-windows-gnu",
- "winapi-x86_64-pc-windows-gnu",
-]
-
-[[package]]
-name = "winapi-i686-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
-
-[[package]]
-name = "winapi-util"
-version = "0.1.5"
+name = "windows-sys"
+version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
- "winapi",
+ "windows-targets",
]
[[package]]
-name = "winapi-x86_64-pc-windows-gnu"
-version = "0.4.0"
+name = "windows-targets"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
-
-[[package]]
-name = "windows-sys"
-version = "0.36.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
+checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
dependencies = [
+ "windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
+ "windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
+
+[[package]]
name = "windows_aarch64_msvc"
-version = "0.36.1"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
+checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
[[package]]
name = "windows_i686_gnu"
-version = "0.36.1"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
+checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
[[package]]
name = "windows_i686_msvc"
-version = "0.36.1"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
+checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
[[package]]
name = "windows_x86_64_gnu"
-version = "0.36.1"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
+checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
[[package]]
name = "windows_x86_64_msvc"
-version = "0.36.1"
+version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
+checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
[[package]]
name = "xattr"
-version = "0.2.3"
+version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc"
+checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f"
dependencies = [
"libc",
+ "linux-raw-sys",
+ "rustix",
]
diff --git a/Cargo.toml b/Cargo.toml
index 137128e..8d46d93 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,7 +5,9 @@ edition = "2021"
[dependencies]
parse_mediawiki_dump = "0.1.0"
-parse_wiki_text = "0.1.5"
-bzip2 = "0.4.3"
-tar = "0.4.38"
-clap = { version = "4.0.0-rc.3", features = ["derive"] }
+parse_wiki_text = { path = "parse_wiki_text" }
+bzip2 = "0.5.2"
+tar = "0.4.44"
+clap = { version = "4.5.32", features = ["derive"] }
+env_logger = "0.11.7"
+log = "0.4.27"
diff --git a/src/main.rs b/src/main.rs
index 34b00bb..53292cd 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,9 @@
use clap::Parser;
use parse_wiki_text::{Node, Parameter};
use std::fs::File;
-use std::io::{stdin, stdout, Read, Write};
+use std::io::{stderr, stdin, stdout, BufWriter, Read, Write};
+use std::sync::{Arc, Mutex};
+use std::thread;
use tar::Header;
extern crate bzip2;
@@ -21,10 +23,16 @@ struct Args {
/// Show non-fatal warnings
#[arg(short, long)]
verbose: bool,
+ #[arg(short, long)]
+ show_todo: bool,
#[arg(short, long, default_value = "")]
footer: String,
}
+struct Config {
+ show_todo: bool,
+}
+
fn main() {
let args = Args::parse();
@@ -34,16 +42,32 @@ fn main() {
}
let input = std::io::BufReader::new(input);
- let mut archive = tar::Builder::new(stdout());
+ let mut archive = tar::Builder::new(BufWriter::new(stdout()));
let footer = format!(
"<a href=\"https://codeberg.org/metamuffin/staticwiki\">staticwiki</a>; {}",
args.footer
);
+ let progress = Arc::new(Mutex::new((0, String::new())));
+ {
+ let progress = progress.clone();
+ thread::spawn(move || loop {
+ let (i, name) = progress.lock().unwrap().clone();
+ eprint!("\r\x1b[2Karticles processed: {i}, current: {name}",);
+ stderr().flush().unwrap();
+ std::thread::sleep(std::time::Duration::from_millis(1000 / 30));
+ });
+ }
+
+ let config = Config {
+ show_todo: args.show_todo,
+ };
+
for (i, result) in parse_mediawiki_dump::parse(input).enumerate() {
match result {
Ok(page) => {
+ *progress.lock().unwrap() = (i, page.title.clone());
if page.namespace == 0
&& match &page.format {
None => false,
@@ -67,8 +91,8 @@ fn main() {
let mut html = String::from("<!DOCTYPE html><html><head>");
write!(&mut html, "<title>{}</title></head>", escape(&page.title)).unwrap();
write!(&mut html, "<body><h1>{}</h1>", escape(&page.title)).unwrap();
- render_toc(&mut html, &ast.nodes);
- render_nodes(&mut html, &mut refs, &ast.nodes);
+ render_toc(&mut html, &ast.nodes, &config);
+ render_nodes(&mut html, &mut refs, &ast.nodes, &config);
render_refs(&mut html, &refs);
write!(&mut html, "<footer>{footer}</footer>").unwrap();
write!(&mut html, "</body></html>").unwrap();
@@ -96,9 +120,6 @@ fn main() {
break;
}
}
- if i % 10000 == 0 {
- eprintln!("{i}")
- }
if Some(i) == args.limit {
break;
}
@@ -117,19 +138,19 @@ pub fn escape(text: &str) -> String {
.replace("\"", "&quot;")
}
-fn render_nodes(html: &mut String, refs: &mut Vec<String>, nodes: &Vec<Node>) {
+fn render_nodes(html: &mut String, refs: &mut Vec<String>, nodes: &Vec<Node>, config: &Config) {
for n in nodes {
- render_node(html, refs, n)
+ render_node(html, refs, n, config)
}
}
-fn render_nodes_to_string(nodes: &Vec<Node>, refs: &mut Vec<String>) -> String {
+fn render_nodes_to_string(nodes: &Vec<Node>, refs: &mut Vec<String>, config: &Config) -> String {
let mut html = String::new();
- render_nodes(&mut html, refs, nodes);
+ render_nodes(&mut html, refs, nodes, config);
return html;
}
-fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
+fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node, config: &Config) {
use std::fmt::Write;
match n {
parse_wiki_text::Node::Bold { .. } => (),
@@ -138,20 +159,31 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
ordinal: _,
target: _,
..
- } => write!(html, "[todo]").unwrap(),
+ } => {
+ if config.show_todo {
+ write!(html, "[todo: category]").unwrap()
+ }
+ }
parse_wiki_text::Node::CharacterEntity { character: _, .. } => {
- write!(html, "[todo: character]").unwrap()
+ if config.show_todo {
+ write!(html, "[todo: character]").unwrap()
+ }
}
parse_wiki_text::Node::Comment { .. } => (),
parse_wiki_text::Node::DefinitionList { items: _, .. } => {
- write!(html, "[todo: def list]").unwrap()
+ if config.show_todo {
+ write!(html, "[todo: def list]").unwrap()
+ }
}
- parse_wiki_text::Node::EndTag { name: _, .. } => write!(html, "[todo: tag end]").unwrap(),
+ parse_wiki_text::Node::EndTag { name: _, .. } => (), //write!(html, "[todo: tag end]").unwrap(),
+ parse_wiki_text::Node::StartTag { name: _, .. } => (), // write!(html, "[todo: start tag]").unwrap()
parse_wiki_text::Node::ExternalLink { nodes: _, .. } => {
- write!(html, "[todo: external link]").unwrap()
+ if config.show_todo {
+ write!(html, "[todo: external link]").unwrap()
+ }
}
parse_wiki_text::Node::Heading { level, nodes, .. } => {
- let h = render_nodes_to_string(nodes, refs);
+ let h = render_nodes_to_string(nodes, refs, config);
write!(html, "<h{level} id=\"{}\">{}</h{level}>", urlencode(&h), h).unwrap();
}
parse_wiki_text::Node::HorizontalDivider { .. } => write!(html, "<hr>").unwrap(),
@@ -163,16 +195,23 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
html,
"<a href=\"{}\">{}</a>",
urlencode(target), // TODO does this always link to wikipedia?
- render_nodes_to_string(text, refs)
+ render_nodes_to_string(text, refs, config)
)
.unwrap(),
- parse_wiki_text::Node::MagicWord { .. } => write!(html, "[todo: magic]").unwrap(),
+ parse_wiki_text::Node::MagicWord { .. } => {
+ if config.show_todo {
+ write!(html, "[todo: magic]").unwrap()
+ }
+ }
parse_wiki_text::Node::OrderedList { items, .. } => write!(
html,
"<ol>{}</ol>",
items
.iter()
- .map(|e| format!("<li>{}</li>", render_nodes_to_string(&e.nodes, refs)))
+ .map(|e| format!(
+ "<li>{}</li>",
+ render_nodes_to_string(&e.nodes, refs, config)
+ ))
.collect::<Vec<_>>()
.join("")
)
@@ -182,7 +221,10 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
"<ul>{}</ul>",
items
.iter()
- .map(|e| format!("<li>{}</li>", render_nodes_to_string(&e.nodes, refs)))
+ .map(|e| format!(
+ "<li>{}</li>",
+ render_nodes_to_string(&e.nodes, refs, config)
+ ))
.collect::<Vec<_>>()
.join("")
)
@@ -192,10 +234,17 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
default: _,
name: _,
..
- } => write!(html, "[todo: parameter]").unwrap(),
- parse_wiki_text::Node::Preformatted { nodes, .. } => {
- write!(html, "<pre>{}</pre>", render_nodes_to_string(nodes, refs)).unwrap()
+ } => {
+ if config.show_todo {
+ write!(html, "[todo: parameter]").unwrap()
+ }
}
+ parse_wiki_text::Node::Preformatted { nodes, .. } => write!(
+ html,
+ "<pre>{}</pre>",
+ render_nodes_to_string(nodes, refs, config)
+ )
+ .unwrap(),
parse_wiki_text::Node::Redirect { target, .. } => write!(
html,
"Redirect: <a href=\"{}\">{}</a>",
@@ -203,9 +252,7 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
urlencode(target)
)
.unwrap(),
- parse_wiki_text::Node::StartTag { name: _, .. } => {
- write!(html, "[todo: start tag]").unwrap()
- }
+
parse_wiki_text::Node::Table {
attributes: _,
captions: _,
@@ -215,13 +262,25 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
parse_wiki_text::Node::Tag { name, nodes, .. } => match name.as_ref() {
"ref" => {
if !nodes.is_empty() {
- let r = render_nodes_to_string(nodes, refs);
+ let r = render_nodes_to_string(nodes, refs, config);
refs.push(r);
let refid = refs.len();
write!(html, "<sup><a href=\"#{}\">[{}]</a></sup>", refid, refid).unwrap();
}
}
- _ => write!(html, "[todo: {name:?} tag]").unwrap(),
+ "math" => {
+ let r = render_nodes_to_string(nodes, refs, config);
+ write!(html, "math: <code>{}</code>", r).unwrap();
+ }
+ "syntaxhighlight" => {
+ let r = render_nodes_to_string(nodes, refs, config);
+ write!(html, "<pre>{}</pre>", r).unwrap();
+ }
+ _ => {
+ if config.show_todo {
+ write!(html, "[todo: {name:?} tag]").unwrap()
+ }
+ }
},
parse_wiki_text::Node::Template {
name, parameters, ..
@@ -230,13 +289,13 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) {
Some(Node::Text { value, .. }) => value,
_ => "",
};
- render_template(html, refs, name, parameters);
+ render_template(html, refs, name, parameters, config);
}
parse_wiki_text::Node::Text { value, .. } => write!(html, "{}", escape(value)).unwrap(),
}
}
-fn render_toc(html: &mut String, nodes: &Vec<Node>) {
+fn render_toc(html: &mut String, nodes: &Vec<Node>, config: &Config) {
use std::fmt::Write;
write!(html, "<div><h4><i>Table of contents</i></h4>").unwrap();
let mut k = 0;
@@ -252,7 +311,7 @@ fn render_toc(html: &mut String, nodes: &Vec<Node>) {
k -= 1;
write!(html, "</ol>").unwrap();
}
- let h = render_nodes_to_string(nodes, &mut vec![]);
+ let h = render_nodes_to_string(nodes, &mut vec![], config);
write!(html, "<li><a href=\"#{}\">{}</a></li>", urlencode(&h), h).unwrap();
}
@@ -262,51 +321,35 @@ fn render_toc(html: &mut String, nodes: &Vec<Node>) {
write!(html, "</div>").unwrap();
}
-pub fn render_template(
+fn render_template(
html: &mut String,
refs: &mut Vec<String>,
name: &str,
params: &Vec<Parameter>,
+ config: &Config,
) -> Option<()> {
use std::fmt::Write;
- match name {
+ match name.to_lowercase().as_str() {
+ "internetquelle" | "literatur" | "citation" | "man" | "external link" => {
+ generic_template(html, name, params, config)
+ }
+ s if s.starts_with("cite ") => generic_template(html, name, params, config),
+
// TODO this can panic
- "lang" => write!(
+ "lang" | "transliteration" => write!(
html,
"{}",
- render_nodes_to_string(&params.get(1)?.value, refs)
+ render_nodes_to_string(&params.get(1)?.value, refs, config)
)
.unwrap(),
- "IPA" => write!(
+ s if s.starts_with("IPA") => write!(
html,
"<code>{}</code>",
- render_nodes_to_string(&params.get(0)?.value, refs)
+ render_nodes_to_string(&params.get(0)?.value, refs, config)
)
.unwrap(),
- "Internetquelle" | "Literatur" => {
- write!(html, "{}: <ul>", escape(name)).unwrap();
- for p in params {
- let key = p
- .name
- .as_ref()
- .map(|n| render_nodes_to_string(n, &mut vec![]))
- .unwrap_or(String::from("??"));
- let value = render_nodes_to_string(&p.value, &mut vec![]);
- if let "url" | "archiv-url" | "Online" = key.as_str() {
- write!(
- html,
- "<li>{}: <a href=\"{}\">{}</a></li>",
- key, value, value
- )
- } else {
- write!(html, "<li>{}: {}</li>", key, value)
- }
- .unwrap()
- }
- write!(html, "</ul>").unwrap();
- }
- "Siehe auch" | "Hauptartikel" => {
+ "Siehe auch" | "Hauptartikel" | "main" => {
let k = text_node(params.get(0)?.value.get(0)?);
write!(
html,
@@ -318,8 +361,12 @@ pub fn render_template(
.unwrap();
}
+ "sfn" => {}
+
_ => {
- write!(html, "[todo: {name:?} template]").unwrap();
+ if config.show_todo {
+ write!(html, "[todo: {name:?} template]").unwrap();
+ }
// write!(html, "[todo: {name:?} template <pre>{params:#?}</pre>]").unwrap();
// eprintln!("unsupported template {name:?}");
// eprintln!("{params:?}");
@@ -328,6 +375,30 @@ pub fn render_template(
Some(())
}
+fn generic_template(html: &mut String, name: &str, params: &Vec<Parameter>, config: &Config) {
+ use std::fmt::Write;
+ write!(html, "{}: <ul>", escape(name)).unwrap();
+ for p in params {
+ let key = p
+ .name
+ .as_ref()
+ .map(|n| render_nodes_to_string(n, &mut vec![], config))
+ .unwrap_or(String::from("??"));
+ let value = render_nodes_to_string(&p.value, &mut vec![], config);
+ if let "url" | "archiv-url" | "Online" | "archive-url" = key.as_str() {
+ write!(
+ html,
+ "<li>{}: <a href=\"{}\">{}</a></li>",
+ key, value, value
+ )
+ } else {
+ write!(html, "<li>{}: {}</li>", key, value)
+ }
+ .unwrap()
+ }
+ write!(html, "</ul>").unwrap();
+}
+
pub fn text_node(n: &Node) -> String {
match n {
Node::Text { value, .. } => value.to_string(),