/* This file is part of jellything (https://codeberg.org/metamuffin/jellything) which is licensed under the GNU Affero General Public License (version 3); see /COPYING. Copyright (C) 2026 metamuffin */ use crate::{ MultiBehaviour, RowNum, Sort, SortOrder, kv::{ SubtreeNum, binning::BinningComponent, helpers::{read_counter, write_counter}, index_key::{IndexKey, SortKey}, prefix_iterator::PrefixIterator, }, }; use anyhow::{Result, bail}; use jellyobject::Object; use std::{collections::HashSet, iter::empty}; pub fn test_index_identical(ik: &IndexKey, before: &Object, after: &Object) -> bool { let mut identical = true; for b in &ik.0.0 { match b { BinningComponent::Has(path) => { identical &= path.get_matching_value(before).is_some() == path.get_matching_value(after).is_some(); } BinningComponent::Match(path) => { identical &= path.get_matching_values(before) == path.get_matching_values(after); } } } match &ik.1 { SortKey::None | SortKey::Count | SortKey::Random => (), SortKey::Value(path, _) => { identical &= path.get_matching_values(before) == path.get_matching_values(after); } SortKey::Text(paths) => { for p in paths { identical &= p.get_matching_values(before) == p.get_matching_values(after); } } } identical } pub fn update_index( txn: &mut dyn jellykv::Transaction, is: SubtreeNum, ik: &IndexKey, row: RowNum, ob: &Object, remove: bool, ) -> Result<()> { let mut ks = vec![is.to_be_bytes().to_vec()]; ik.0.apply(ob, &mut ks); match &ik.1 { SortKey::None => { for mut k in ks { k.extend(row.to_be_bytes()); index_marker(txn, &k, remove)?; } } SortKey::Random => { for mut k in ks { k.extend(randomize(row).to_be_bytes()); if remove { txn.del(&k)?; } else { txn.set(&k, &row.to_be_bytes())?; } } } SortKey::Count => { for k in ks { index_counter(txn, &k, remove)?; } } SortKey::Value(path, multi_behaviour) => { assert_eq!(*multi_behaviour, MultiBehaviour::First, "TODO"); if let Some(value) = path.get_matching_value(ob) { for mut k in ks { k.extend(value); k.extend(row.to_be_bytes()); index_marker(txn, &k, remove)?; } } } SortKey::Text(paths) => { let mut tokens = HashSet::new(); for path in paths { for val in path.get_matching_values(ob) { for tok in text_tokenizer(str::from_utf8(val).unwrap()) { tokens.insert(tok); } } } for tok in &tokens { for mut k in ks.clone() { k.push(0); k.extend((tok.len() as u32).to_be_bytes()); k.extend(tok.as_bytes()); index_counter(txn, &k, remove)?; } for mut k in ks.clone() { k.push(1); k.extend((tok.len() as u32).to_be_bytes()); k.extend(tok.as_bytes()); k.extend(row.to_be_bytes()); index_marker(txn, &k, remove)?; } } } } Ok(()) } pub fn index_counter(txn: &mut dyn jellykv::Transaction, k: &[u8], remove: bool) -> Result<()> { let mut c = read_counter(txn, &k, 0)?; if remove && c > 0 { c -= 1; } else { c += 1; } write_counter(txn, &k, c)?; Ok(()) } pub fn index_marker(txn: &mut dyn jellykv::Transaction, k: &[u8], remove: bool) -> Result<()> { if remove { txn.del(&k) } else { txn.set(&k, &[]) } } pub fn read_count_index(txn: &dyn jellykv::Transaction, prefix: Vec) -> Result { read_counter(txn, &prefix, 0) } #[allow(clippy::type_complexity)] pub fn iter_index<'a>( txn: &'a dyn jellykv::Transaction, prefix: Vec, sort: &Sort, resume: Option>, ) -> Result)>> + 'a>> { Ok(match sort { Sort::None => { let mut start = prefix.clone(); if let Some(resume) = resume { if resume.len() != 8 { bail!("invalid resume length") } start.extend(resume); start = inc_key(start); } Box::new( PrefixIterator { inner: txn.iter(&start, false)?, prefix: prefix.into(), } .map(|k| { k.map(|k| { let rn = RowNum::from_be_bytes(k[k.len() - 8..].try_into().unwrap()); (rn, rn.to_be_bytes().to_vec()) }) }), ) } Sort::Random(seed) => { let mut k = prefix.clone(); k.extend(randomize(*seed).to_be_bytes()); let Some(it) = txn.iter(&k, false)?.next() else { return Ok(Box::new(empty())); }; let res = it?; if !res.starts_with(&prefix) { return Ok(Box::new(empty())); } let r = txn.get(&res)?.unwrap(); let row = RowNum::from_be_bytes(r.try_into().unwrap()); Box::new([Ok((row, vec![]))].into_iter()) } Sort::Value(value_sort) => { assert!(value_sort.offset.is_none(), "TODO"); let (mut start, rev) = match value_sort.order { SortOrder::Ascending => (prefix.clone(), false), SortOrder::Descending => (inc_key(prefix.clone()), true), }; if let Some(resume) = resume { start = prefix.iter().copied().chain(resume).collect(); if rev { start = dec_key(start); } else { start = inc_key(start); } } Box::new( PrefixIterator { inner: txn.iter(&start, rev)?, prefix: prefix.clone().into(), } .map(move |k| { k.map(|k| { ( RowNum::from_be_bytes(k[k.len() - 8..].try_into().unwrap()), k[prefix.len()..].to_vec(), ) }) }), ) } Sort::TextSearch(_, text) => { let search_tokens = text_tokenizer(text); let mut min_tok = String::new(); let mut min_count = u64::MAX; for token in &search_tokens { let mut k = prefix.clone(); k.push(0); k.extend(token.as_bytes()); let count = read_counter(txn, &k, 0)?; if count < min_count { min_count = count; min_tok = token.to_string() } } let mut min_token_prefix = prefix.clone(); min_token_prefix.push(1); min_token_prefix.extend((min_tok.len() as u32).to_be_bytes()); min_token_prefix.extend(min_tok.as_bytes()); Box::new( PrefixIterator { inner: txn.iter(&min_token_prefix, false)?, prefix: min_token_prefix.into(), } .flat_map(move |k| { let k = match k { Ok(k) => k, Err(e) => return Some(Err(e)), }; let rn = RowNum::from_be_bytes(k[k.len() - 8..].try_into().unwrap()); for token in &search_tokens { let mut k = prefix.clone(); k.push(1); k.extend((token.len() as u32).to_be_bytes()); k.extend(token.as_bytes()); k.extend(rn.to_be_bytes()); let v = match txn.get(&k) { Ok(v) => v, Err(e) => return Some(Err(e)), }; if v.is_none() { return None; } } Some(anyhow::Ok((rn, Vec::new()))) }), ) } }) } fn text_tokenizer(text: &str) -> HashSet { text.split(|x| matches!(x, ' ' | ',' | ':' | '/' | '+' | '&' | '@' | '#')) .filter(|x| !x.is_empty()) .map(|s| s.to_lowercase()) .collect() } fn inc_key(mut k: Vec) -> Vec { for v in k.iter_mut().rev() { let (nv, carry) = v.overflowing_add(1); *v = nv; if !carry { break; } } k } fn dec_key(mut k: Vec) -> Vec { for v in k.iter_mut().rev() { let (nv, carry) = v.overflowing_sub(1); *v = nv; if !carry { break; } } k } fn randomize(mut x: u64) -> u64 { for _ in 0..15 { x ^= x << 13; x ^= x >> 7; x ^= x << 17; } x }