diff options
author | Lia Lenckowski <lialenck@protonmail.com> | 2024-11-28 00:03:29 +0100 |
---|---|---|
committer | Lia Lenckowski <lialenck@protonmail.com> | 2024-11-28 00:03:29 +0100 |
commit | abaf12fcdc8e76172965517d760b34524f167e8c (patch) | |
tree | 115a3882b3f4a6e8d2f387a52bc6c634d4ac68ff /src | |
parent | 2a17ceac1ab5cdee98d20a928795a1aba06c8be7 (diff) | |
download | embeddings-sort-abaf12fcdc8e76172965517d760b34524f167e8c.tar embeddings-sort-abaf12fcdc8e76172965517d760b34524f167e8c.tar.bz2 embeddings-sort-abaf12fcdc8e76172965517d760b34524f167e8c.tar.zst |
optimizations: lower durability, massively increase ai embedding batch size
also fix that one clippy warning
Diffstat (limited to 'src')
-rw-r--r-- | src/cache.rs | 5 | ||||
-rw-r--r-- | src/embedders/ai.rs | 6 | ||||
-rw-r--r-- | src/tsp_approx.rs | 1 |
3 files changed, 7 insertions, 5 deletions
diff --git a/src/cache.rs b/src/cache.rs index 608adb5..6767f79 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,7 +1,7 @@ use crate::{FileHash, MetricElem}; use anyhow::Result; use bincode::config::standard; -use redb::{Database, TableDefinition}; +use redb::{Database, Durability, TableDefinition}; use std::path::Path; const T_ENTRIES: TableDefinition<(&str, FileHash), &[u8]> = TableDefinition::new("entries"); @@ -32,7 +32,8 @@ impl Cache { hash: FileHash, value: &E, ) -> Result<()> { - let txn = self.db.begin_write()?; + let mut txn = self.db.begin_write()?; + txn.set_durability(Durability::Eventual); let mut table = txn.open_table(T_ENTRIES)?; table.insert( (type_name, hash), diff --git a/src/embedders/ai.rs b/src/embedders/ai.rs index 7d31a6b..582c9c1 100644 --- a/src/embedders/ai.rs +++ b/src/embedders/ai.rs @@ -42,8 +42,8 @@ impl<Metric: VecMetric> ContentEmbedder<'_, Metric> { // per-image basis. Thus, we first try embedding 64 images at once, and if that fails, fall // back to passing them to fastembeds one-by-one, so that we can get all the non-failure // results. - for chunk in paths.chunks(64) { - match embedder.embed(chunk.iter().collect(), Some(8)) { + for chunk in paths.chunks(256) { + match embedder.embed(chunk.iter().collect(), None) { Ok(embeds) => res.extend(embeds.into_iter().map(|e| Ok(e.into()))), Err(_) => { // embed one by one @@ -61,7 +61,7 @@ impl<Metric: VecMetric> ContentEmbedder<'_, Metric> { res.append(&mut embeds); } } - bar.inc(64); + bar.inc(256); } Ok(res) diff --git a/src/tsp_approx.rs b/src/tsp_approx.rs index e3448a1..d8c30fe 100644 --- a/src/tsp_approx.rs +++ b/src/tsp_approx.rs @@ -135,6 +135,7 @@ fn min_weight_matching( res } +#[allow(clippy::type_complexity)] fn euler_tour( mut graph: HashMap<usize, MultiSet<usize, RandomState>>, hash_seed: &Option<u64>, |