diff options
| author | metamuffin <metamuffin@disroot.org> | 2026-03-11 04:46:43 +0100 |
|---|---|---|
| committer | metamuffin <metamuffin@disroot.org> | 2026-03-11 04:46:43 +0100 |
| commit | 3115a894fa7008ef4ee657945cf8addaec8f98cb (patch) | |
| tree | a560341a732d694756ebdee8acdae26c615842c7 /database | |
| parent | 958c5ecfd1ffbb43425c3737dc3eb1ea50fc92f6 (diff) | |
| download | jellything-3115a894fa7008ef4ee657945cf8addaec8f98cb.tar jellything-3115a894fa7008ef4ee657945cf8addaec8f98cb.tar.bz2 jellything-3115a894fa7008ef4ee657945cf8addaec8f98cb.tar.zst | |
basic text search
Diffstat (limited to 'database')
| -rw-r--r-- | database/src/kv/index.rs | 116 |
1 files changed, 96 insertions, 20 deletions
diff --git a/database/src/kv/index.rs b/database/src/kv/index.rs index 0881cb7..469a6e8 100644 --- a/database/src/kv/index.rs +++ b/database/src/kv/index.rs @@ -15,7 +15,7 @@ use crate::{ }; use anyhow::{Result, bail}; use jellyobject::Object; -use std::iter::empty; +use std::{collections::HashSet, iter::empty}; pub fn update_index( txn: &mut dyn jellykv::Transaction, @@ -31,11 +31,7 @@ pub fn update_index( SortKey::None => { for mut k in ks { k.extend(row.to_be_bytes()); - if remove { - txn.del(&k)?; - } else { - txn.set(&k, &[])?; - } + index_marker(txn, &k, remove)?; } } SortKey::Random => { @@ -50,13 +46,7 @@ pub fn update_index( } SortKey::Count => { for k in ks { - let mut c = read_counter(txn, &k, 0)?; - if remove { - c -= 1; - } else { - c += 1; - } - write_counter(txn, &k, c)?; + index_counter(txn, &k, remove)?; } } SortKey::Value(path, multi_behaviour) => { @@ -65,19 +55,54 @@ pub fn update_index( for mut k in ks { k.extend(value); k.extend(row.to_be_bytes()); - if remove { - txn.del(&k)?; - } else { - txn.set(&k, &[])?; - } + index_marker(txn, &k, remove)?; + } + } + } + SortKey::Text(path) => { + let mut tokens = HashSet::new(); + for val in path.get_matching_values(ob) { + for tok in text_tokenizer(val) { + tokens.insert(tok); + } + } + for &tok in &tokens { + for mut k in ks.clone() { + k.push(0); + k.extend(tok); + index_counter(txn, &k, remove)?; + } + for mut k in ks.clone() { + k.push(1); + k.extend(tok); + k.extend(row.to_be_bytes()); + index_marker(txn, &k, remove)?; } } } - SortKey::Text(_) => todo!(), } Ok(()) } +pub fn index_counter(txn: &mut dyn jellykv::Transaction, k: &[u8], remove: bool) -> Result<()> { + let mut c = read_counter(txn, &k, 0)?; + if remove && c > 0 { + c -= 1; + } else { + c += 1; + } + write_counter(txn, &k, c)?; + Ok(()) +} + +pub fn index_marker(txn: &mut dyn jellykv::Transaction, k: &[u8], remove: bool) -> Result<()> { + if remove { + txn.del(&k) + } else { + txn.set(&k, &[]) + } +} + pub fn read_count_index(txn: &dyn jellykv::Transaction, prefix: Vec<u8>) -> Result<u64> { read_counter(txn, &prefix, 0) } @@ -155,10 +180,61 @@ pub fn iter_index<'a>( }), ) } - Sort::TextSearch(_, _) => todo!(), + Sort::TextSearch(_, text) => { + let search_tokens = text_tokenizer(text.as_bytes()) + .map(|e| e.to_owned()) + .collect::<Vec<_>>(); + let mut min_tok = Vec::new(); + let mut min_count = u64::MAX; + for token in &search_tokens { + let mut k = prefix.clone(); + k.push(0); + k.extend(token); + let count = read_counter(txn, &k, 0)?; + if count < min_count { + min_count = count; + min_tok = token.to_owned() + } + } + let mut min_token_prefix = prefix.clone(); + min_token_prefix.push(1); + min_token_prefix.extend(&min_tok); + Box::new( + PrefixIterator { + inner: txn.iter(&min_token_prefix, false)?, + prefix: min_token_prefix.into(), + } + .flat_map(move |k| { + let k = match k { + Ok(k) => k, + Err(e) => return Some(Err(e)), + }; + let rn = RowNum::from_be_bytes(k[k.len() - 8..].try_into().unwrap()); + for token in &search_tokens { + let mut k = prefix.clone(); + k.push(1); + k.extend(token); + k.extend(rn.to_be_bytes()); + let v = match txn.get(&k) { + Ok(v) => v, + Err(e) => return Some(Err(e)), + }; + if v.is_none() { + return None; + } + } + Some(anyhow::Ok((rn, Vec::new()))) + }), + ) + } }) } +fn text_tokenizer(text: &[u8]) -> impl Iterator<Item = &[u8]> { + text.split(|x| matches!(*x, b' ' | b',' | b':' | b'/' | b'+' | b'&')) + .filter(|x| !x.is_empty()) +} + fn inc_key(mut k: Vec<u8>) -> Vec<u8> { for v in k.iter_mut().rev() { let (nv, carry) = v.overflowing_add(1); |