aboutsummaryrefslogtreecommitdiff
path: root/database
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2026-03-11 05:00:42 +0100
committermetamuffin <metamuffin@disroot.org>2026-03-11 05:00:42 +0100
commit89d9e1703b06ad6f84933e1399e70a9d055dfbe8 (patch)
tree8d359e07a8e36c9d8205364213277e21cf0e9933 /database
parent3115a894fa7008ef4ee657945cf8addaec8f98cb (diff)
downloadjellything-89d9e1703b06ad6f84933e1399e70a9d055dfbe8.tar
jellything-89d9e1703b06ad6f84933e1399e70a9d055dfbe8.tar.bz2
jellything-89d9e1703b06ad6f84933e1399e70a9d055dfbe8.tar.zst
tokenize lowercase
Diffstat (limited to 'database')
-rw-r--r--database/src/kv/index.rs28
1 files changed, 14 insertions, 14 deletions
diff --git a/database/src/kv/index.rs b/database/src/kv/index.rs
index 469a6e8..e4f707c 100644
--- a/database/src/kv/index.rs
+++ b/database/src/kv/index.rs
@@ -62,19 +62,19 @@ pub fn update_index(
SortKey::Text(path) => {
let mut tokens = HashSet::new();
for val in path.get_matching_values(ob) {
- for tok in text_tokenizer(val) {
+ for tok in text_tokenizer(str::from_utf8(val).unwrap()) {
tokens.insert(tok);
}
}
- for &tok in &tokens {
+ for tok in &tokens {
for mut k in ks.clone() {
k.push(0);
- k.extend(tok);
+ k.extend(tok.as_bytes());
index_counter(txn, &k, remove)?;
}
for mut k in ks.clone() {
k.push(1);
- k.extend(tok);
+ k.extend(tok.as_bytes());
k.extend(row.to_be_bytes());
index_marker(txn, &k, remove)?;
}
@@ -181,24 +181,22 @@ pub fn iter_index<'a>(
)
}
Sort::TextSearch(_, text) => {
- let search_tokens = text_tokenizer(text.as_bytes())
- .map(|e| e.to_owned())
- .collect::<Vec<_>>();
- let mut min_tok = Vec::new();
+ let search_tokens = text_tokenizer(text);
+ let mut min_tok = String::new();
let mut min_count = u64::MAX;
for token in &search_tokens {
let mut k = prefix.clone();
k.push(0);
- k.extend(token);
+ k.extend(token.as_bytes());
let count = read_counter(txn, &k, 0)?;
if count < min_count {
min_count = count;
- min_tok = token.to_owned()
+ min_tok = token.to_string()
}
}
let mut min_token_prefix = prefix.clone();
min_token_prefix.push(1);
- min_token_prefix.extend(&min_tok);
+ min_token_prefix.extend(min_tok.as_bytes());
Box::new(
PrefixIterator {
inner: txn.iter(&min_token_prefix, false)?,
@@ -213,7 +211,7 @@ pub fn iter_index<'a>(
for token in &search_tokens {
let mut k = prefix.clone();
k.push(1);
- k.extend(token);
+ k.extend(token.as_bytes());
k.extend(rn.to_be_bytes());
let v = match txn.get(&k) {
Ok(v) => v,
@@ -230,9 +228,11 @@ pub fn iter_index<'a>(
})
}
-fn text_tokenizer(text: &[u8]) -> impl Iterator<Item = &[u8]> {
- text.split(|x| matches!(*x, b' ' | b',' | b':' | b'/' | b'+' | b'&'))
+fn text_tokenizer(text: &str) -> HashSet<String> {
+ text.split(|x| matches!(x, ' ' | ',' | ':' | '/' | '+' | '&'))
.filter(|x| !x.is_empty())
+ .map(|s| s.to_lowercase())
+ .collect()
}
fn inc_key(mut k: Vec<u8>) -> Vec<u8> {