aboutsummaryrefslogtreecommitdiff
path: root/src/embedders
diff options
context:
space:
mode:
authorLia Lenckowski <lialenck@protonmail.com>2024-11-28 00:03:29 +0100
committerLia Lenckowski <lialenck@protonmail.com>2024-11-28 00:03:29 +0100
commitabaf12fcdc8e76172965517d760b34524f167e8c (patch)
tree115a3882b3f4a6e8d2f387a52bc6c634d4ac68ff /src/embedders
parent2a17ceac1ab5cdee98d20a928795a1aba06c8be7 (diff)
downloadembeddings-sort-abaf12fcdc8e76172965517d760b34524f167e8c.tar
embeddings-sort-abaf12fcdc8e76172965517d760b34524f167e8c.tar.bz2
embeddings-sort-abaf12fcdc8e76172965517d760b34524f167e8c.tar.zst
optimizations: lower durability, massively increase ai embedding batch size
also fix that one clippy warning
Diffstat (limited to 'src/embedders')
-rw-r--r--src/embedders/ai.rs6
1 files changed, 3 insertions, 3 deletions
diff --git a/src/embedders/ai.rs b/src/embedders/ai.rs
index 7d31a6b..582c9c1 100644
--- a/src/embedders/ai.rs
+++ b/src/embedders/ai.rs
@@ -42,8 +42,8 @@ impl<Metric: VecMetric> ContentEmbedder<'_, Metric> {
// per-image basis. Thus, we first try embedding 64 images at once, and if that fails, fall
// back to passing them to fastembeds one-by-one, so that we can get all the non-failure
// results.
- for chunk in paths.chunks(64) {
- match embedder.embed(chunk.iter().collect(), Some(8)) {
+ for chunk in paths.chunks(256) {
+ match embedder.embed(chunk.iter().collect(), None) {
Ok(embeds) => res.extend(embeds.into_iter().map(|e| Ok(e.into()))),
Err(_) => {
// embed one by one
@@ -61,7 +61,7 @@ impl<Metric: VecMetric> ContentEmbedder<'_, Metric> {
res.append(&mut embeds);
}
}
- bar.inc(64);
+ bar.inc(256);
}
Ok(res)