aboutsummaryrefslogtreecommitdiff
path: root/src/embedders
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2023-09-20 21:40:05 +0200
committermetamuffin <metamuffin@disroot.org>2023-09-20 21:40:05 +0200
commitd0ce1d9134968a15d37135622138f6b8b7667454 (patch)
tree29e40aa04df7a1b7b48811ab7582b52964af1dc6 /src/embedders
parent8b65d87640e7367cd88e72c320e4370f4ba471a6 (diff)
downloadembeddings-sort-d0ce1d9134968a15d37135622138f6b8b7667454.tar
embeddings-sort-d0ce1d9134968a15d37135622138f6b8b7667454.tar.bz2
embeddings-sort-d0ce1d9134968a15d37135622138f6b8b7667454.tar.zst
replace cosinesim with L2-norm euclidean
Diffstat (limited to 'src/embedders')
-rw-r--r--src/embedders/vecmetric.rs13
1 files changed, 6 insertions, 7 deletions
diff --git a/src/embedders/vecmetric.rs b/src/embedders/vecmetric.rs
index 0c63911..1bda3a8 100644
--- a/src/embedders/vecmetric.rs
+++ b/src/embedders/vecmetric.rs
@@ -19,15 +19,14 @@ impl VecMetric for ManhattenDistance {}
impl MetricElem for CosineSimilarity {
fn dist(&self, other: &Self) -> f64 {
- let x = self
- .0
+ let len_a = self.0.iter().map(|x| x.powi(2)).sum::<f32>().sqrt();
+ let len_b = other.0.iter().map(|x| x.powi(2)).sum::<f32>().sqrt();
+ self.0
.iter()
.zip(other.0.iter())
- .map(|(a, b)| *a * *b)
- .sum::<f32>();
- let mag_a = self.0.iter().map(|x| x.powi(2)).sum::<f32>();
- let mag_b = other.0.iter().map(|x| x.powi(2)).sum::<f32>();
- (x / (mag_a * mag_b).sqrt()) as f64
+ .map(|(a, b)| (*a / len_a - *b / len_b).powi(2))
+ .sum::<f32>()
+ .sqrt() as f64
}
}
impl MetricElem for EuclidianDistance {