aboutsummaryrefslogtreecommitdiff
path: root/src/main.rs
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2024-11-27 22:34:03 +0000
committerlialenck <lialenck@noreply.codeberg.org>2024-11-27 22:34:03 +0000
commit2a17ceac1ab5cdee98d20a928795a1aba06c8be7 (patch)
tree1c5c31e238870776dc324a91ddface2ae15e050a /src/main.rs
parent467674743fb638ea56713aecc719a80505b82a17 (diff)
downloadembeddings-sort-2a17ceac1ab5cdee98d20a928795a1aba06c8be7.tar
embeddings-sort-2a17ceac1ab5cdee98d20a928795a1aba06c8be7.tar.bz2
embeddings-sort-2a17ceac1ab5cdee98d20a928795a1aba06c8be7.tar.zst
Replace sled with redb (Also replaces serde to bincode.) (#2)
Reviewed-on: https://codeberg.org/lialenck/embeddings-sort/pulls/2 Co-authored-by: metamuffin <metamuffin@disroot.org> Co-committed-by: metamuffin <metamuffin@disroot.org>
Diffstat (limited to 'src/main.rs')
-rw-r--r--src/main.rs23
1 files changed, 11 insertions, 12 deletions
diff --git a/src/main.rs b/src/main.rs
index 2e63dd8..45621cf 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,5 @@
use anyhow::{anyhow, Result};
+use cache::Cache;
use clap::Parser;
use sha2::{Digest, Sha512_256};
use std::{
@@ -12,9 +13,12 @@ use std::path::absolute;
use embedders::*;
use tsp_approx::*;
+pub mod cache;
mod embedders;
mod tsp_approx;
+pub type FileHash = [u8; 32];
+
#[derive(Debug, Clone, Copy, clap::ValueEnum)]
enum Embedder {
Brightness,
@@ -84,12 +88,12 @@ struct Config {
fn get_config() -> Result<Config> {
let glob_cache_dir = dirs::cache_dir().ok_or(anyhow!("Could not get cache directory"))?;
- Ok(Config {
- cache_dir: glob_cache_dir.join("embeddings-sort"),
- })
+ let cache_dir = glob_cache_dir.join("embeddings-sort");
+ std::fs::create_dir_all(&cache_dir)?;
+ Ok(Config { cache_dir })
}
-fn hash_file(p: &PathBuf) -> Result<[u8; 32]> {
+fn hash_file(p: &PathBuf) -> Result<FileHash> {
let mut f = fs::File::open(p)?;
let mut hasher = Sha512_256::new();
io::copy(&mut f, &mut hasher)?;
@@ -105,18 +109,13 @@ fn process_embedder<E>(mut e: E, args: &Args, cfg: &Config) -> Result<(Vec<PathB
where
E: BatchEmbedder,
{
- let db = sled::open(cfg.cache_dir.join("embeddings.db"))?;
- let tree = typed_sled::Tree::<[u8; 32], E::Embedding>::open(&db, E::NAME);
+ let cache = Cache::open(&cfg.cache_dir.join("embeddings.db-v2"))?;
// find cached embeddings
let mut embeds = args
.images
.iter()
- .map(|path| {
- let h = hash_file(path)?;
- let r: Result<Option<E::Embedding>> = tree.get(&h).map_err(|e| e.into());
- r
- })
+ .map(|path| cache.get(E::NAME, hash_file(path)?))
.collect::<Result<Vec<_>>>()?;
// find indices of missing embeddings
@@ -148,7 +147,7 @@ where
{
match emb {
Ok(emb) => {
- tree.insert(&hash_file(&args.images[idx])?, &emb)?;
+ cache.insert(E::NAME, hash_file(&args.images[idx])?, &emb)?;
embeds[idx] = Some(emb);
}
Err(e) => {