aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock79
-rw-r--r--Cargo.toml2
-rw-r--r--src/embedders.rs25
-rw-r--r--src/main.rs39
4 files changed, 117 insertions, 28 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 1fab5d7..cd53559 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -57,6 +57,12 @@ dependencies = [
]
[[package]]
+name = "anyhow"
+version = "1.0.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
+
+[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -84,6 +90,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
name = "bumpalo"
version = "3.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -173,6 +188,15 @@ dependencies = [
]
[[package]]
+name = "cpufeatures"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1"
+dependencies = [
+ "libc",
+]
+
+[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -231,6 +255,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+]
+
+[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -240,12 +284,14 @@ checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
name = "embeddings-sort"
version = "0.1.0"
dependencies = [
+ "anyhow",
"clap",
"image",
"indicatif",
"priority-queue",
"rayon",
"serde",
+ "sha2",
"sled",
"typed-sled",
"xdg",
@@ -337,6 +383,16 @@ dependencies = [
]
[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
name = "getrandom"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -728,6 +784,17 @@ dependencies = [
]
[[package]]
+name = "sha2"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
name = "simd-adler32"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -826,6 +893,12 @@ dependencies = [
]
[[package]]
+name = "typenum"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
+
+[[package]]
name = "unicode-ident"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -844,6 +917,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index fc80898..1010994 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,3 +15,5 @@ indicatif = "0"
sled = "0"
typed-sled = "0"
serde = "1"
+sha2 = "0"
+anyhow = "1"
diff --git a/src/embedders.rs b/src/embedders.rs
index 0693b5e..8911e95 100644
--- a/src/embedders.rs
+++ b/src/embedders.rs
@@ -1,5 +1,6 @@
+use anyhow::{bail, Result};
use rayon::prelude::*;
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
pub trait MetricElem: Send + Sync + 'static + Serialize + for<'a> Deserialize<'a> {
@@ -16,21 +17,21 @@ pub trait EmbedderT: Send + Sync {
type Embedding: MetricElem;
const NAME: &'static str;
- fn embed(&self, _: &PathBuf) -> Result<Self::Embedding, String>;
+ fn embed(&self, _: &Path) -> Result<Self::Embedding>;
}
pub trait BatchEmbedder: Send + Sync {
type Embedding: MetricElem;
const NAME: &'static str;
- fn embeds(&mut self, _: &[PathBuf]) -> Result<Vec<Self::Embedding>, String>;
+ fn embeds(&mut self, _: &[PathBuf]) -> Result<Vec<Self::Embedding>>;
}
impl<T: EmbedderT> BatchEmbedder for T {
type Embedding = T::Embedding;
const NAME: &'static str = T::NAME;
- fn embeds(&mut self, paths: &[PathBuf]) -> Result<Vec<Self::Embedding>, String> {
+ fn embeds(&mut self, paths: &[PathBuf]) -> Result<Vec<Self::Embedding>> {
paths.par_iter()
.map(|p| self.embed(p))
.collect::<Vec<_>>()
@@ -44,12 +45,12 @@ impl EmbedderT for BrightnessEmbedder {
type Embedding = f64;
const NAME: &'static str = "Brightness";
- fn embed(&self, path: &PathBuf) -> Result<f64, String> {
- let im = image::open(path).map_err(|e| e.to_string())?;
+ fn embed(&self, path: &Path) -> Result<f64> {
+ let im = image::open(path)?;
let num_bytes = 3 * (im.height() * im.width());
if num_bytes == 0 {
- return Err("Encountered NaN brightness, due to an empty image".to_string());
+ bail!("Encountered NaN brightness, due to an empty image");
}
Ok(im.to_rgb8()
@@ -74,8 +75,8 @@ impl EmbedderT for HueEmbedder {
type Embedding = Hue;
const NAME: &'static str = "Hue";
- fn embed(&self, path: &PathBuf) -> Result<Hue, String> {
- let im = image::open(path).map_err(|e| e.to_string())?;
+ fn embed(&self, path: &Path) -> Result<Hue> {
+ let im = image::open(path)?;
let num_pixels = im.height() * im.width();
let [sr, sg, sb] = im
.to_rgb8()
@@ -98,7 +99,7 @@ impl EmbedderT for HueEmbedder {
};
if hue.is_nan() {
- return Err("Encountered NaN hue, possibly because of a colorless or empty image".to_string());
+ bail!("Encountered NaN hue, possibly because of a colorless or empty image");
}
Ok(Hue(hue))
@@ -118,8 +119,8 @@ impl EmbedderT for ColorEmbedder {
type Embedding = (f64, f64, f64);
const NAME: &'static str = "Color";
- fn embed(&self, path: &PathBuf) -> Result<(f64, f64, f64), String> {
- let im = image::open(path).map_err(|e| e.to_string())?;
+ fn embed(&self, path: &Path) -> Result<(f64, f64, f64)> {
+ let im = image::open(path)?;
let num_pixels = im.height() * im.width();
let [sr, sg, sb] = im
.to_rgb8()
diff --git a/src/main.rs b/src/main.rs
index c4a0c26..5caa4ce 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,10 +1,10 @@
#![feature(iterator_try_collect)]
+use anyhow::Result;
use clap::Parser;
use priority_queue::PriorityQueue;
-use std::cmp::Ordering;
-use std::collections::HashMap;
-use std::path::PathBuf;
+use sha2::{Sha512_256, Digest};
+use std::{cmp::Ordering, collections::HashMap, fs, io, path::PathBuf};
use embedders::*;
@@ -30,9 +30,8 @@ struct Config {
base_dirs: xdg::BaseDirectories,
}
-fn get_config() -> Result<Config, String> {
- let dirs = xdg::BaseDirectories::with_prefix("embeddings-sort")
- .map_err(|_| "oh no")?;
+fn get_config() -> Result<Config> {
+ let dirs = xdg::BaseDirectories::with_prefix("embeddings-sort")?;
Ok(Config{base_dirs: dirs})
}
@@ -113,21 +112,30 @@ fn tsp_from_mst(mst: HashMap<usize, Vec<usize>>) -> Vec<usize> {
tsp_path
}
-fn process_embedder<E>(mut e: E, args: Args, cfg: Config) -> Result<Vec<PathBuf>, String>
+fn hash_file(p: &PathBuf) -> Result<[u8; 32]> {
+ let mut f = fs::File::open(p)?;
+ let mut hasher = Sha512_256::new();
+ io::copy(&mut f, &mut hasher)?;
+ Ok(hasher.finalize().into_iter().collect::<Vec<u8>>().try_into().unwrap())
+}
+
+fn process_embedder<E>(mut e: E, args: Args, cfg: Config) -> Result<Vec<PathBuf>>
where E: EmbedderT
{
- if args.images.len() == 0 {
+ if args.images.is_empty() {
return Ok(Vec::new());
}
- let db = sled::open(cfg.base_dirs.place_cache_file("embeddings.db")
- .map_err(|e| e.to_string())?).map_err(|e| e.to_string())?;
- let tree = typed_sled::Tree::<PathBuf, E::Embedding>::open(&db, E::NAME);
+ let db = sled::open(cfg.base_dirs.place_cache_file("embeddings.db")?)?;
+ let tree = typed_sled::Tree::<[u8; 32], E::Embedding>::open(&db, E::NAME);
- // TODO nicht pfad, sondern hash vom bild als key nehmen
let mut embeds: Vec<Option<_>> = args.images
.iter()
- .map(|p| tree.get(p).map_err(|e| e.to_string()))
+ .map(|p| {
+ let h = hash_file(p)?;
+ let r: Result<Option<E::Embedding>> = tree.get(&h).map_err(|e| e.into());
+ r
+ })
.try_collect()?;
let missing_embeds_indices: Vec<_> = embeds
@@ -145,8 +153,7 @@ fn process_embedder<E>(mut e: E, args: Args, cfg: Config) -> Result<Vec<PathBuf>
for (idx, emb) in missing_embeds_indices
.into_iter().zip(missing_embeds.into_iter())
{
- // TODO hier auch hash statt pfad
- tree.insert(&args.images[idx], &emb).map_err(|e| e.to_string())?;
+ tree.insert(&hash_file(&args.images[idx])?, &emb)?;
embeds[idx] = Some(emb);
}
@@ -156,7 +163,7 @@ fn process_embedder<E>(mut e: E, args: Args, cfg: Config) -> Result<Vec<PathBuf>
Ok(tsp_path.iter().map(|i| args.images[*i].clone()).collect())
}
-fn main() -> Result<(), String> {
+fn main() -> Result<()> {
let cfg = get_config()?;
let args = Args::parse();