aboutsummaryrefslogtreecommitdiff
path: root/database
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2026-03-11 04:46:43 +0100
committermetamuffin <metamuffin@disroot.org>2026-03-11 04:46:43 +0100
commit3115a894fa7008ef4ee657945cf8addaec8f98cb (patch)
treea560341a732d694756ebdee8acdae26c615842c7 /database
parent958c5ecfd1ffbb43425c3737dc3eb1ea50fc92f6 (diff)
downloadjellything-3115a894fa7008ef4ee657945cf8addaec8f98cb.tar
jellything-3115a894fa7008ef4ee657945cf8addaec8f98cb.tar.bz2
jellything-3115a894fa7008ef4ee657945cf8addaec8f98cb.tar.zst
basic text search
Diffstat (limited to 'database')
-rw-r--r--database/src/kv/index.rs116
1 files changed, 96 insertions, 20 deletions
diff --git a/database/src/kv/index.rs b/database/src/kv/index.rs
index 0881cb7..469a6e8 100644
--- a/database/src/kv/index.rs
+++ b/database/src/kv/index.rs
@@ -15,7 +15,7 @@ use crate::{
};
use anyhow::{Result, bail};
use jellyobject::Object;
-use std::iter::empty;
+use std::{collections::HashSet, iter::empty};
pub fn update_index(
txn: &mut dyn jellykv::Transaction,
@@ -31,11 +31,7 @@ pub fn update_index(
SortKey::None => {
for mut k in ks {
k.extend(row.to_be_bytes());
- if remove {
- txn.del(&k)?;
- } else {
- txn.set(&k, &[])?;
- }
+ index_marker(txn, &k, remove)?;
}
}
SortKey::Random => {
@@ -50,13 +46,7 @@ pub fn update_index(
}
SortKey::Count => {
for k in ks {
- let mut c = read_counter(txn, &k, 0)?;
- if remove {
- c -= 1;
- } else {
- c += 1;
- }
- write_counter(txn, &k, c)?;
+ index_counter(txn, &k, remove)?;
}
}
SortKey::Value(path, multi_behaviour) => {
@@ -65,19 +55,54 @@ pub fn update_index(
for mut k in ks {
k.extend(value);
k.extend(row.to_be_bytes());
- if remove {
- txn.del(&k)?;
- } else {
- txn.set(&k, &[])?;
- }
+ index_marker(txn, &k, remove)?;
+ }
+ }
+ }
+ SortKey::Text(path) => {
+ let mut tokens = HashSet::new();
+ for val in path.get_matching_values(ob) {
+ for tok in text_tokenizer(val) {
+ tokens.insert(tok);
+ }
+ }
+ for &tok in &tokens {
+ for mut k in ks.clone() {
+ k.push(0);
+ k.extend(tok);
+ index_counter(txn, &k, remove)?;
+ }
+ for mut k in ks.clone() {
+ k.push(1);
+ k.extend(tok);
+ k.extend(row.to_be_bytes());
+ index_marker(txn, &k, remove)?;
}
}
}
- SortKey::Text(_) => todo!(),
}
Ok(())
}
+pub fn index_counter(txn: &mut dyn jellykv::Transaction, k: &[u8], remove: bool) -> Result<()> {
+ let mut c = read_counter(txn, &k, 0)?;
+ if remove && c > 0 {
+ c -= 1;
+ } else {
+ c += 1;
+ }
+ write_counter(txn, &k, c)?;
+ Ok(())
+}
+
+pub fn index_marker(txn: &mut dyn jellykv::Transaction, k: &[u8], remove: bool) -> Result<()> {
+ if remove {
+ txn.del(&k)
+ } else {
+ txn.set(&k, &[])
+ }
+}
+
pub fn read_count_index(txn: &dyn jellykv::Transaction, prefix: Vec<u8>) -> Result<u64> {
read_counter(txn, &prefix, 0)
}
@@ -155,10 +180,61 @@ pub fn iter_index<'a>(
}),
)
}
- Sort::TextSearch(_, _) => todo!(),
+ Sort::TextSearch(_, text) => {
+ let search_tokens = text_tokenizer(text.as_bytes())
+ .map(|e| e.to_owned())
+ .collect::<Vec<_>>();
+ let mut min_tok = Vec::new();
+ let mut min_count = u64::MAX;
+ for token in &search_tokens {
+ let mut k = prefix.clone();
+ k.push(0);
+ k.extend(token);
+ let count = read_counter(txn, &k, 0)?;
+ if count < min_count {
+ min_count = count;
+ min_tok = token.to_owned()
+ }
+ }
+ let mut min_token_prefix = prefix.clone();
+ min_token_prefix.push(1);
+ min_token_prefix.extend(&min_tok);
+ Box::new(
+ PrefixIterator {
+ inner: txn.iter(&min_token_prefix, false)?,
+ prefix: min_token_prefix.into(),
+ }
+ .flat_map(move |k| {
+ let k = match k {
+ Ok(k) => k,
+ Err(e) => return Some(Err(e)),
+ };
+ let rn = RowNum::from_be_bytes(k[k.len() - 8..].try_into().unwrap());
+ for token in &search_tokens {
+ let mut k = prefix.clone();
+ k.push(1);
+ k.extend(token);
+ k.extend(rn.to_be_bytes());
+ let v = match txn.get(&k) {
+ Ok(v) => v,
+ Err(e) => return Some(Err(e)),
+ };
+ if v.is_none() {
+ return None;
+ }
+ }
+ Some(anyhow::Ok((rn, Vec::new())))
+ }),
+ )
+ }
})
}
+fn text_tokenizer(text: &[u8]) -> impl Iterator<Item = &[u8]> {
+ text.split(|x| matches!(*x, b' ' | b',' | b':' | b'/' | b'+' | b'&'))
+ .filter(|x| !x.is_empty())
+}
+
fn inc_key(mut k: Vec<u8>) -> Vec<u8> {
for v in k.iter_mut().rev() {
let (nv, carry) = v.overflowing_add(1);