diff options
-rw-r--r-- | Cargo.lock | 79 | ||||
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | src/embedders.rs | 25 | ||||
-rw-r--r-- | src/main.rs | 39 |
4 files changed, 117 insertions, 28 deletions
@@ -57,6 +57,12 @@ dependencies = [ ] [[package]] +name = "anyhow" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" + +[[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -84,6 +90,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] name = "bumpalo" version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -173,6 +188,15 @@ dependencies = [ ] [[package]] +name = "cpufeatures" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +dependencies = [ + "libc", +] + +[[package]] name = "crc32fast" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -231,6 +255,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] name = "either" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -240,12 +284,14 @@ checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" name = "embeddings-sort" version = "0.1.0" dependencies = [ + "anyhow", "clap", "image", "indicatif", "priority-queue", "rayon", "serde", + "sha2", "sled", "typed-sled", "xdg", @@ -337,6 +383,16 @@ dependencies = [ ] [[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] name = "getrandom" version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -728,6 +784,17 @@ dependencies = [ ] [[package]] +name = "sha2" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] name = "simd-adler32" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -826,6 +893,12 @@ dependencies = [ ] [[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + +[[package]] name = "unicode-ident" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -844,6 +917,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -15,3 +15,5 @@ indicatif = "0" sled = "0" typed-sled = "0" serde = "1" +sha2 = "0" +anyhow = "1" diff --git a/src/embedders.rs b/src/embedders.rs index 0693b5e..8911e95 100644 --- a/src/embedders.rs +++ b/src/embedders.rs @@ -1,5 +1,6 @@ +use anyhow::{bail, Result}; use rayon::prelude::*; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use serde::{Deserialize, Serialize}; pub trait MetricElem: Send + Sync + 'static + Serialize + for<'a> Deserialize<'a> { @@ -16,21 +17,21 @@ pub trait EmbedderT: Send + Sync { type Embedding: MetricElem; const NAME: &'static str; - fn embed(&self, _: &PathBuf) -> Result<Self::Embedding, String>; + fn embed(&self, _: &Path) -> Result<Self::Embedding>; } pub trait BatchEmbedder: Send + Sync { type Embedding: MetricElem; const NAME: &'static str; - fn embeds(&mut self, _: &[PathBuf]) -> Result<Vec<Self::Embedding>, String>; + fn embeds(&mut self, _: &[PathBuf]) -> Result<Vec<Self::Embedding>>; } impl<T: EmbedderT> BatchEmbedder for T { type Embedding = T::Embedding; const NAME: &'static str = T::NAME; - fn embeds(&mut self, paths: &[PathBuf]) -> Result<Vec<Self::Embedding>, String> { + fn embeds(&mut self, paths: &[PathBuf]) -> Result<Vec<Self::Embedding>> { paths.par_iter() .map(|p| self.embed(p)) .collect::<Vec<_>>() @@ -44,12 +45,12 @@ impl EmbedderT for BrightnessEmbedder { type Embedding = f64; const NAME: &'static str = "Brightness"; - fn embed(&self, path: &PathBuf) -> Result<f64, String> { - let im = image::open(path).map_err(|e| e.to_string())?; + fn embed(&self, path: &Path) -> Result<f64> { + let im = image::open(path)?; let num_bytes = 3 * (im.height() * im.width()); if num_bytes == 0 { - return Err("Encountered NaN brightness, due to an empty image".to_string()); + bail!("Encountered NaN brightness, due to an empty image"); } Ok(im.to_rgb8() @@ -74,8 +75,8 @@ impl EmbedderT for HueEmbedder { type Embedding = Hue; const NAME: &'static str = "Hue"; - fn embed(&self, path: &PathBuf) -> Result<Hue, String> { - let im = image::open(path).map_err(|e| e.to_string())?; + fn embed(&self, path: &Path) -> Result<Hue> { + let im = image::open(path)?; let num_pixels = im.height() * im.width(); let [sr, sg, sb] = im .to_rgb8() @@ -98,7 +99,7 @@ impl EmbedderT for HueEmbedder { }; if hue.is_nan() { - return Err("Encountered NaN hue, possibly because of a colorless or empty image".to_string()); + bail!("Encountered NaN hue, possibly because of a colorless or empty image"); } Ok(Hue(hue)) @@ -118,8 +119,8 @@ impl EmbedderT for ColorEmbedder { type Embedding = (f64, f64, f64); const NAME: &'static str = "Color"; - fn embed(&self, path: &PathBuf) -> Result<(f64, f64, f64), String> { - let im = image::open(path).map_err(|e| e.to_string())?; + fn embed(&self, path: &Path) -> Result<(f64, f64, f64)> { + let im = image::open(path)?; let num_pixels = im.height() * im.width(); let [sr, sg, sb] = im .to_rgb8() diff --git a/src/main.rs b/src/main.rs index c4a0c26..5caa4ce 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,10 @@ #![feature(iterator_try_collect)] +use anyhow::Result; use clap::Parser; use priority_queue::PriorityQueue; -use std::cmp::Ordering; -use std::collections::HashMap; -use std::path::PathBuf; +use sha2::{Sha512_256, Digest}; +use std::{cmp::Ordering, collections::HashMap, fs, io, path::PathBuf}; use embedders::*; @@ -30,9 +30,8 @@ struct Config { base_dirs: xdg::BaseDirectories, } -fn get_config() -> Result<Config, String> { - let dirs = xdg::BaseDirectories::with_prefix("embeddings-sort") - .map_err(|_| "oh no")?; +fn get_config() -> Result<Config> { + let dirs = xdg::BaseDirectories::with_prefix("embeddings-sort")?; Ok(Config{base_dirs: dirs}) } @@ -113,21 +112,30 @@ fn tsp_from_mst(mst: HashMap<usize, Vec<usize>>) -> Vec<usize> { tsp_path } -fn process_embedder<E>(mut e: E, args: Args, cfg: Config) -> Result<Vec<PathBuf>, String> +fn hash_file(p: &PathBuf) -> Result<[u8; 32]> { + let mut f = fs::File::open(p)?; + let mut hasher = Sha512_256::new(); + io::copy(&mut f, &mut hasher)?; + Ok(hasher.finalize().into_iter().collect::<Vec<u8>>().try_into().unwrap()) +} + +fn process_embedder<E>(mut e: E, args: Args, cfg: Config) -> Result<Vec<PathBuf>> where E: EmbedderT { - if args.images.len() == 0 { + if args.images.is_empty() { return Ok(Vec::new()); } - let db = sled::open(cfg.base_dirs.place_cache_file("embeddings.db") - .map_err(|e| e.to_string())?).map_err(|e| e.to_string())?; - let tree = typed_sled::Tree::<PathBuf, E::Embedding>::open(&db, E::NAME); + let db = sled::open(cfg.base_dirs.place_cache_file("embeddings.db")?)?; + let tree = typed_sled::Tree::<[u8; 32], E::Embedding>::open(&db, E::NAME); - // TODO nicht pfad, sondern hash vom bild als key nehmen let mut embeds: Vec<Option<_>> = args.images .iter() - .map(|p| tree.get(p).map_err(|e| e.to_string())) + .map(|p| { + let h = hash_file(p)?; + let r: Result<Option<E::Embedding>> = tree.get(&h).map_err(|e| e.into()); + r + }) .try_collect()?; let missing_embeds_indices: Vec<_> = embeds @@ -145,8 +153,7 @@ fn process_embedder<E>(mut e: E, args: Args, cfg: Config) -> Result<Vec<PathBuf> for (idx, emb) in missing_embeds_indices .into_iter().zip(missing_embeds.into_iter()) { - // TODO hier auch hash statt pfad - tree.insert(&args.images[idx], &emb).map_err(|e| e.to_string())?; + tree.insert(&hash_file(&args.images[idx])?, &emb)?; embeds[idx] = Some(emb); } @@ -156,7 +163,7 @@ fn process_embedder<E>(mut e: E, args: Args, cfg: Config) -> Result<Vec<PathBuf> Ok(tsp_path.iter().map(|i| args.images[*i].clone()).collect()) } -fn main() -> Result<(), String> { +fn main() -> Result<()> { let cfg = get_config()?; let args = Args::parse(); |