diff options
author | metamuffin <metamuffin@disroot.org> | 2025-03-26 14:43:04 +0100 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2025-03-26 14:43:04 +0100 |
commit | 4ca5624bd2a78d32085b9ab0ffdb5bf24840fec3 (patch) | |
tree | a0c0e9d485b75682a57887abe92616b4e7faed5c | |
parent | eda0c12b4af72935c9fa6adef60aff754313f189 (diff) | |
download | staticwiki-master.tar staticwiki-master.tar.bz2 staticwiki-master.tar.zst |
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Cargo.lock | 487 | ||||
-rw-r--r-- | Cargo.toml | 10 | ||||
-rw-r--r-- | src/main.rs | 197 |
4 files changed, 465 insertions, 230 deletions
@@ -1,3 +1,4 @@ /target /data /out +/parse_wiki_text
\ No newline at end of file @@ -1,12 +1,12 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" -version = "0.17.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9ecd88a8c8378ca913a680cd98f0f13ac67383d35993f86c90a70e3f137816b" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" dependencies = [ "gimli", ] @@ -18,21 +18,68 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] -name = "atty" -version = "0.2.14" +name = "aho-corasick" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ - "hermit-abi", - "libc", - "winapi", + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys", ] [[package]] name = "backtrace" -version = "0.3.66" +version = "0.3.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cab84319d616cfb654d03394f38ab7e6f0919e181b1b57e1fd15e7fb4077d9a7" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" dependencies = [ "addr2line", "cc", @@ -50,31 +97,35 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] name = "bzip2" -version = "0.4.3" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6afcd980b5f3a45017c57e57a2fcccbb351cc43a356ce117ef760ef8052b89b0" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" dependencies = [ "bzip2-sys", - "libc", ] [[package]] name = "bzip2-sys" -version = "0.1.11+1.0.8" +version = "0.1.13+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" dependencies = [ "cc", - "libc", "pkg-config", ] [[package]] name = "cc" -version = "1.0.73" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "ac367972e516d45567c7eafc73d24e1c193dcf200a8d94e9db7b3d38b349572d" [[package]] name = "cfg-if" @@ -84,51 +135,93 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.0.0-rc.3" +version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7233bf306993c874a6edc363281e83770889877c9d5ee7f656249c65d7e7aa62" +checksum = "6088f3ae8c3608d19260cd7445411865a485688711b78b5be70d78cd96136f83" dependencies = [ - "atty", - "bitflags", + "clap_builder", "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22a7ef7f676155edfb82daa97f99441f3ebf4a58d5e32f295a56259f1b6facc8" +dependencies = [ + "anstream", + "anstyle", "clap_lex", - "once_cell", "strsim", - "termcolor", ] [[package]] name = "clap_derive" -version = "4.0.0-rc.1" +version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51eef4d62724bf369e9ca7458cfde0c55263708b4552020058fba384864e8c23" +checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" dependencies = [ "heck", - "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 2.0.100", ] [[package]] name = "clap_lex" -version = "0.3.0" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8" -dependencies = [ - "os_str_bytes", -] +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" [[package]] name = "encoding_rs" -version = "0.8.31" +version = "0.8.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" dependencies = [ "cfg-if", ] [[package]] +name = "env_filter" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3716d7a920fb4fac5d84e9d4bce8ceb321e9414b4409da61b07b75c1e3d0697" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] name = "failure" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -146,15 +239,15 @@ checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "synstructure", ] [[package]] name = "filetime" -version = "0.2.17" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94a7bbaa59354bc20dd75b67f23e2797b4490e9d6928203fb105c79e448c86c" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" dependencies = [ "cfg-if", "libc", @@ -164,77 +257,89 @@ dependencies = [ [[package]] name = "gimli" -version = "0.26.2" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" [[package]] name = "heck" -version = "0.4.0" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" [[package]] -name = "hermit-abi" -version = "0.1.19" +name = "jiff" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +checksum = "c102670231191d07d37a35af3eb77f1f0dbf7a71be51a962dcd57ea607be7260" dependencies = [ - "libc", + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cdde31a9d349f1b1f51a0b3714a5940ac022976f4b49485fc04be052b183b4c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", ] [[package]] name = "libc" -version = "0.2.133" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "log" -version = "0.4.17" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "miniz_oxide" -version = "0.5.4" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" dependencies = [ "adler", ] [[package]] name = "object" -version = "0.29.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21158b2c33aa6d4561f1c0a6ea283ca92bc54802a93b263e910746d679a7eb53" +checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" dependencies = [ "memchr", ] [[package]] -name = "once_cell" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" - -[[package]] -name = "os_str_bytes" -version = "6.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" - -[[package]] name = "parse_mediawiki_dump" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -246,44 +351,33 @@ dependencies = [ [[package]] name = "parse_wiki_text" version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd59a245c58efa02bd73c9462fd9d4c28952d650092c6bcba43b60b4707171dd" [[package]] name = "pkg-config" -version = "0.3.25" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] -name = "proc-macro-error" -version = "1.0.4" +name = "portable-atomic" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn", - "version_check", -] +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" [[package]] -name = "proc-macro-error-attr" -version = "1.0.4" +name = "portable-atomic-util" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" dependencies = [ - "proc-macro2", - "quote", - "version_check", + "portable-atomic", ] [[package]] name = "proc-macro2" -version = "1.0.44" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" dependencies = [ "unicode-ident", ] @@ -302,27 +396,89 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.21" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] name = "redox_syscall" -version = "0.2.16" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ - "bitflags", + "aho-corasick", + "memchr", + "regex-syntax", ] [[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] name = "rustc-demangle" -version = "0.1.21" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "serde" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] [[package]] name = "staticwiki" @@ -330,6 +486,8 @@ version = "0.1.0" dependencies = [ "bzip2", "clap", + "env_logger", + "log", "parse_mediawiki_dump", "parse_wiki_text", "tar", @@ -337,15 +495,26 @@ dependencies = [ [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "1.0.101" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -360,15 +529,15 @@ checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "unicode-xid", ] [[package]] name = "tar" -version = "0.4.38" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b55807c0344e1e6c04d7c965f5289c39a8d94ae23ed5c0b57aabac549f871c6" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" dependencies = [ "filetime", "libc", @@ -376,19 +545,10 @@ dependencies = [ ] [[package]] -name = "termcolor" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" -dependencies = [ - "winapi-util", -] - -[[package]] name = "unicode-ident" -version = "1.0.4" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-xid" @@ -397,90 +557,91 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" [[package]] -name = "version_check" -version = "0.9.4" +name = "utf8parse" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.5" +name = "windows-sys" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "winapi", + "windows-targets", ] [[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" +name = "windows-targets" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ + "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", + "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", + "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] name = "windows_aarch64_msvc" -version = "0.36.1" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" [[package]] name = "windows_i686_gnu" -version = "0.36.1" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" [[package]] name = "windows_i686_msvc" -version = "0.36.1" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" [[package]] name = "windows_x86_64_gnu" -version = "0.36.1" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" [[package]] name = "windows_x86_64_msvc" -version = "0.36.1" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "xattr" -version = "0.2.3" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" dependencies = [ "libc", + "linux-raw-sys", + "rustix", ] @@ -5,7 +5,9 @@ edition = "2021" [dependencies] parse_mediawiki_dump = "0.1.0" -parse_wiki_text = "0.1.5" -bzip2 = "0.4.3" -tar = "0.4.38" -clap = { version = "4.0.0-rc.3", features = ["derive"] } +parse_wiki_text = { path = "parse_wiki_text" } +bzip2 = "0.5.2" +tar = "0.4.44" +clap = { version = "4.5.32", features = ["derive"] } +env_logger = "0.11.7" +log = "0.4.27" diff --git a/src/main.rs b/src/main.rs index 34b00bb..53292cd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,9 @@ use clap::Parser; use parse_wiki_text::{Node, Parameter}; use std::fs::File; -use std::io::{stdin, stdout, Read, Write}; +use std::io::{stderr, stdin, stdout, BufWriter, Read, Write}; +use std::sync::{Arc, Mutex}; +use std::thread; use tar::Header; extern crate bzip2; @@ -21,10 +23,16 @@ struct Args { /// Show non-fatal warnings #[arg(short, long)] verbose: bool, + #[arg(short, long)] + show_todo: bool, #[arg(short, long, default_value = "")] footer: String, } +struct Config { + show_todo: bool, +} + fn main() { let args = Args::parse(); @@ -34,16 +42,32 @@ fn main() { } let input = std::io::BufReader::new(input); - let mut archive = tar::Builder::new(stdout()); + let mut archive = tar::Builder::new(BufWriter::new(stdout())); let footer = format!( "<a href=\"https://codeberg.org/metamuffin/staticwiki\">staticwiki</a>; {}", args.footer ); + let progress = Arc::new(Mutex::new((0, String::new()))); + { + let progress = progress.clone(); + thread::spawn(move || loop { + let (i, name) = progress.lock().unwrap().clone(); + eprint!("\r\x1b[2Karticles processed: {i}, current: {name}",); + stderr().flush().unwrap(); + std::thread::sleep(std::time::Duration::from_millis(1000 / 30)); + }); + } + + let config = Config { + show_todo: args.show_todo, + }; + for (i, result) in parse_mediawiki_dump::parse(input).enumerate() { match result { Ok(page) => { + *progress.lock().unwrap() = (i, page.title.clone()); if page.namespace == 0 && match &page.format { None => false, @@ -67,8 +91,8 @@ fn main() { let mut html = String::from("<!DOCTYPE html><html><head>"); write!(&mut html, "<title>{}</title></head>", escape(&page.title)).unwrap(); write!(&mut html, "<body><h1>{}</h1>", escape(&page.title)).unwrap(); - render_toc(&mut html, &ast.nodes); - render_nodes(&mut html, &mut refs, &ast.nodes); + render_toc(&mut html, &ast.nodes, &config); + render_nodes(&mut html, &mut refs, &ast.nodes, &config); render_refs(&mut html, &refs); write!(&mut html, "<footer>{footer}</footer>").unwrap(); write!(&mut html, "</body></html>").unwrap(); @@ -96,9 +120,6 @@ fn main() { break; } } - if i % 10000 == 0 { - eprintln!("{i}") - } if Some(i) == args.limit { break; } @@ -117,19 +138,19 @@ pub fn escape(text: &str) -> String { .replace("\"", """) } -fn render_nodes(html: &mut String, refs: &mut Vec<String>, nodes: &Vec<Node>) { +fn render_nodes(html: &mut String, refs: &mut Vec<String>, nodes: &Vec<Node>, config: &Config) { for n in nodes { - render_node(html, refs, n) + render_node(html, refs, n, config) } } -fn render_nodes_to_string(nodes: &Vec<Node>, refs: &mut Vec<String>) -> String { +fn render_nodes_to_string(nodes: &Vec<Node>, refs: &mut Vec<String>, config: &Config) -> String { let mut html = String::new(); - render_nodes(&mut html, refs, nodes); + render_nodes(&mut html, refs, nodes, config); return html; } -fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { +fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node, config: &Config) { use std::fmt::Write; match n { parse_wiki_text::Node::Bold { .. } => (), @@ -138,20 +159,31 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { ordinal: _, target: _, .. - } => write!(html, "[todo]").unwrap(), + } => { + if config.show_todo { + write!(html, "[todo: category]").unwrap() + } + } parse_wiki_text::Node::CharacterEntity { character: _, .. } => { - write!(html, "[todo: character]").unwrap() + if config.show_todo { + write!(html, "[todo: character]").unwrap() + } } parse_wiki_text::Node::Comment { .. } => (), parse_wiki_text::Node::DefinitionList { items: _, .. } => { - write!(html, "[todo: def list]").unwrap() + if config.show_todo { + write!(html, "[todo: def list]").unwrap() + } } - parse_wiki_text::Node::EndTag { name: _, .. } => write!(html, "[todo: tag end]").unwrap(), + parse_wiki_text::Node::EndTag { name: _, .. } => (), //write!(html, "[todo: tag end]").unwrap(), + parse_wiki_text::Node::StartTag { name: _, .. } => (), // write!(html, "[todo: start tag]").unwrap() parse_wiki_text::Node::ExternalLink { nodes: _, .. } => { - write!(html, "[todo: external link]").unwrap() + if config.show_todo { + write!(html, "[todo: external link]").unwrap() + } } parse_wiki_text::Node::Heading { level, nodes, .. } => { - let h = render_nodes_to_string(nodes, refs); + let h = render_nodes_to_string(nodes, refs, config); write!(html, "<h{level} id=\"{}\">{}</h{level}>", urlencode(&h), h).unwrap(); } parse_wiki_text::Node::HorizontalDivider { .. } => write!(html, "<hr>").unwrap(), @@ -163,16 +195,23 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { html, "<a href=\"{}\">{}</a>", urlencode(target), // TODO does this always link to wikipedia? - render_nodes_to_string(text, refs) + render_nodes_to_string(text, refs, config) ) .unwrap(), - parse_wiki_text::Node::MagicWord { .. } => write!(html, "[todo: magic]").unwrap(), + parse_wiki_text::Node::MagicWord { .. } => { + if config.show_todo { + write!(html, "[todo: magic]").unwrap() + } + } parse_wiki_text::Node::OrderedList { items, .. } => write!( html, "<ol>{}</ol>", items .iter() - .map(|e| format!("<li>{}</li>", render_nodes_to_string(&e.nodes, refs))) + .map(|e| format!( + "<li>{}</li>", + render_nodes_to_string(&e.nodes, refs, config) + )) .collect::<Vec<_>>() .join("") ) @@ -182,7 +221,10 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { "<ul>{}</ul>", items .iter() - .map(|e| format!("<li>{}</li>", render_nodes_to_string(&e.nodes, refs))) + .map(|e| format!( + "<li>{}</li>", + render_nodes_to_string(&e.nodes, refs, config) + )) .collect::<Vec<_>>() .join("") ) @@ -192,10 +234,17 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { default: _, name: _, .. - } => write!(html, "[todo: parameter]").unwrap(), - parse_wiki_text::Node::Preformatted { nodes, .. } => { - write!(html, "<pre>{}</pre>", render_nodes_to_string(nodes, refs)).unwrap() + } => { + if config.show_todo { + write!(html, "[todo: parameter]").unwrap() + } } + parse_wiki_text::Node::Preformatted { nodes, .. } => write!( + html, + "<pre>{}</pre>", + render_nodes_to_string(nodes, refs, config) + ) + .unwrap(), parse_wiki_text::Node::Redirect { target, .. } => write!( html, "Redirect: <a href=\"{}\">{}</a>", @@ -203,9 +252,7 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { urlencode(target) ) .unwrap(), - parse_wiki_text::Node::StartTag { name: _, .. } => { - write!(html, "[todo: start tag]").unwrap() - } + parse_wiki_text::Node::Table { attributes: _, captions: _, @@ -215,13 +262,25 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { parse_wiki_text::Node::Tag { name, nodes, .. } => match name.as_ref() { "ref" => { if !nodes.is_empty() { - let r = render_nodes_to_string(nodes, refs); + let r = render_nodes_to_string(nodes, refs, config); refs.push(r); let refid = refs.len(); write!(html, "<sup><a href=\"#{}\">[{}]</a></sup>", refid, refid).unwrap(); } } - _ => write!(html, "[todo: {name:?} tag]").unwrap(), + "math" => { + let r = render_nodes_to_string(nodes, refs, config); + write!(html, "math: <code>{}</code>", r).unwrap(); + } + "syntaxhighlight" => { + let r = render_nodes_to_string(nodes, refs, config); + write!(html, "<pre>{}</pre>", r).unwrap(); + } + _ => { + if config.show_todo { + write!(html, "[todo: {name:?} tag]").unwrap() + } + } }, parse_wiki_text::Node::Template { name, parameters, .. @@ -230,13 +289,13 @@ fn render_node(html: &mut String, refs: &mut Vec<String>, n: &Node) { Some(Node::Text { value, .. }) => value, _ => "", }; - render_template(html, refs, name, parameters); + render_template(html, refs, name, parameters, config); } parse_wiki_text::Node::Text { value, .. } => write!(html, "{}", escape(value)).unwrap(), } } -fn render_toc(html: &mut String, nodes: &Vec<Node>) { +fn render_toc(html: &mut String, nodes: &Vec<Node>, config: &Config) { use std::fmt::Write; write!(html, "<div><h4><i>Table of contents</i></h4>").unwrap(); let mut k = 0; @@ -252,7 +311,7 @@ fn render_toc(html: &mut String, nodes: &Vec<Node>) { k -= 1; write!(html, "</ol>").unwrap(); } - let h = render_nodes_to_string(nodes, &mut vec![]); + let h = render_nodes_to_string(nodes, &mut vec![], config); write!(html, "<li><a href=\"#{}\">{}</a></li>", urlencode(&h), h).unwrap(); } @@ -262,51 +321,35 @@ fn render_toc(html: &mut String, nodes: &Vec<Node>) { write!(html, "</div>").unwrap(); } -pub fn render_template( +fn render_template( html: &mut String, refs: &mut Vec<String>, name: &str, params: &Vec<Parameter>, + config: &Config, ) -> Option<()> { use std::fmt::Write; - match name { + match name.to_lowercase().as_str() { + "internetquelle" | "literatur" | "citation" | "man" | "external link" => { + generic_template(html, name, params, config) + } + s if s.starts_with("cite ") => generic_template(html, name, params, config), + // TODO this can panic - "lang" => write!( + "lang" | "transliteration" => write!( html, "{}", - render_nodes_to_string(¶ms.get(1)?.value, refs) + render_nodes_to_string(¶ms.get(1)?.value, refs, config) ) .unwrap(), - "IPA" => write!( + s if s.starts_with("IPA") => write!( html, "<code>{}</code>", - render_nodes_to_string(¶ms.get(0)?.value, refs) + render_nodes_to_string(¶ms.get(0)?.value, refs, config) ) .unwrap(), - "Internetquelle" | "Literatur" => { - write!(html, "{}: <ul>", escape(name)).unwrap(); - for p in params { - let key = p - .name - .as_ref() - .map(|n| render_nodes_to_string(n, &mut vec![])) - .unwrap_or(String::from("??")); - let value = render_nodes_to_string(&p.value, &mut vec![]); - if let "url" | "archiv-url" | "Online" = key.as_str() { - write!( - html, - "<li>{}: <a href=\"{}\">{}</a></li>", - key, value, value - ) - } else { - write!(html, "<li>{}: {}</li>", key, value) - } - .unwrap() - } - write!(html, "</ul>").unwrap(); - } - "Siehe auch" | "Hauptartikel" => { + "Siehe auch" | "Hauptartikel" | "main" => { let k = text_node(params.get(0)?.value.get(0)?); write!( html, @@ -318,8 +361,12 @@ pub fn render_template( .unwrap(); } + "sfn" => {} + _ => { - write!(html, "[todo: {name:?} template]").unwrap(); + if config.show_todo { + write!(html, "[todo: {name:?} template]").unwrap(); + } // write!(html, "[todo: {name:?} template <pre>{params:#?}</pre>]").unwrap(); // eprintln!("unsupported template {name:?}"); // eprintln!("{params:?}"); @@ -328,6 +375,30 @@ pub fn render_template( Some(()) } +fn generic_template(html: &mut String, name: &str, params: &Vec<Parameter>, config: &Config) { + use std::fmt::Write; + write!(html, "{}: <ul>", escape(name)).unwrap(); + for p in params { + let key = p + .name + .as_ref() + .map(|n| render_nodes_to_string(n, &mut vec![], config)) + .unwrap_or(String::from("??")); + let value = render_nodes_to_string(&p.value, &mut vec![], config); + if let "url" | "archiv-url" | "Online" | "archive-url" = key.as_str() { + write!( + html, + "<li>{}: <a href=\"{}\">{}</a></li>", + key, value, value + ) + } else { + write!(html, "<li>{}: {}</li>", key, value) + } + .unwrap() + } + write!(html, "</ul>").unwrap(); +} + pub fn text_node(n: &Node) -> String { match n { Node::Text { value, .. } => value.to_string(), |