diff options
author | metamuffin <metamuffin@disroot.org> | 2024-11-27 22:34:03 +0000 |
---|---|---|
committer | lialenck <lialenck@noreply.codeberg.org> | 2024-11-27 22:34:03 +0000 |
commit | 2a17ceac1ab5cdee98d20a928795a1aba06c8be7 (patch) | |
tree | 1c5c31e238870776dc324a91ddface2ae15e050a /src/main.rs | |
parent | 467674743fb638ea56713aecc719a80505b82a17 (diff) | |
download | embeddings-sort-2a17ceac1ab5cdee98d20a928795a1aba06c8be7.tar embeddings-sort-2a17ceac1ab5cdee98d20a928795a1aba06c8be7.tar.bz2 embeddings-sort-2a17ceac1ab5cdee98d20a928795a1aba06c8be7.tar.zst |
Replace sled with redb (Also replaces serde to bincode.) (#2)
Reviewed-on: https://codeberg.org/lialenck/embeddings-sort/pulls/2
Co-authored-by: metamuffin <metamuffin@disroot.org>
Co-committed-by: metamuffin <metamuffin@disroot.org>
Diffstat (limited to 'src/main.rs')
-rw-r--r-- | src/main.rs | 23 |
1 files changed, 11 insertions, 12 deletions
diff --git a/src/main.rs b/src/main.rs index 2e63dd8..45621cf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ use anyhow::{anyhow, Result}; +use cache::Cache; use clap::Parser; use sha2::{Digest, Sha512_256}; use std::{ @@ -12,9 +13,12 @@ use std::path::absolute; use embedders::*; use tsp_approx::*; +pub mod cache; mod embedders; mod tsp_approx; +pub type FileHash = [u8; 32]; + #[derive(Debug, Clone, Copy, clap::ValueEnum)] enum Embedder { Brightness, @@ -84,12 +88,12 @@ struct Config { fn get_config() -> Result<Config> { let glob_cache_dir = dirs::cache_dir().ok_or(anyhow!("Could not get cache directory"))?; - Ok(Config { - cache_dir: glob_cache_dir.join("embeddings-sort"), - }) + let cache_dir = glob_cache_dir.join("embeddings-sort"); + std::fs::create_dir_all(&cache_dir)?; + Ok(Config { cache_dir }) } -fn hash_file(p: &PathBuf) -> Result<[u8; 32]> { +fn hash_file(p: &PathBuf) -> Result<FileHash> { let mut f = fs::File::open(p)?; let mut hasher = Sha512_256::new(); io::copy(&mut f, &mut hasher)?; @@ -105,18 +109,13 @@ fn process_embedder<E>(mut e: E, args: &Args, cfg: &Config) -> Result<(Vec<PathB where E: BatchEmbedder, { - let db = sled::open(cfg.cache_dir.join("embeddings.db"))?; - let tree = typed_sled::Tree::<[u8; 32], E::Embedding>::open(&db, E::NAME); + let cache = Cache::open(&cfg.cache_dir.join("embeddings.db-v2"))?; // find cached embeddings let mut embeds = args .images .iter() - .map(|path| { - let h = hash_file(path)?; - let r: Result<Option<E::Embedding>> = tree.get(&h).map_err(|e| e.into()); - r - }) + .map(|path| cache.get(E::NAME, hash_file(path)?)) .collect::<Result<Vec<_>>>()?; // find indices of missing embeddings @@ -148,7 +147,7 @@ where { match emb { Ok(emb) => { - tree.insert(&hash_file(&args.images[idx])?, &emb)?; + cache.insert(E::NAME, hash_file(&args.images[idx])?, &emb)?; embeds[idx] = Some(emb); } Err(e) => { |