diff options
author | metamuffin <metamuffin@disroot.org> | 2024-04-15 15:19:29 +0200 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2024-04-15 15:19:29 +0200 |
commit | c988e7db759966d9586471e8cfcfd0d91e855dc0 (patch) | |
tree | 447f2b817bc04a9591f07c11e913b35b651cd0cd | |
parent | c5bb4949eb0959ec3a3c2fa010d2d7549347e587 (diff) | |
download | jellything-c988e7db759966d9586471e8cfcfd0d91e855dc0.tar jellything-c988e7db759966d9586471e8cfcfd0d91e855dc0.tar.bz2 jellything-c988e7db759966d9586471e8cfcfd0d91e855dc0.tar.zst |
fulltext search pt.2
-rw-r--r-- | base/src/database.rs | 43 | ||||
-rw-r--r-- | import/src/db.rs | 27 | ||||
-rw-r--r-- | server/src/main.rs | 5 | ||||
-rw-r--r-- | server/src/routes/ui/search.rs | 83 |
4 files changed, 105 insertions, 53 deletions
diff --git a/base/src/database.rs b/base/src/database.rs index f1ae595..2fabf1c 100644 --- a/base/src/database.rs +++ b/base/src/database.rs @@ -3,6 +3,7 @@ which is licensed under the GNU Affero General Public License (version 3); see /COPYING. Copyright (C) 2024 metamuffin <metamuffin.org> */ +use anyhow::Context; use bincode::{Decode, Encode}; use jellycommon::{ user::{NodeUserData, User}, @@ -10,13 +11,15 @@ use jellycommon::{ }; use log::info; use serde::{Deserialize, Serialize}; -use std::{borrow::Borrow, fs::create_dir_all, ops::Deref, path::Path}; +use std::{borrow::Borrow, fs::create_dir_all, ops::Deref, path::Path, sync::RwLock}; use tantivy::{ - schema::{Field, Schema, FAST, STORED, TEXT}, - Index, IndexReader, ReloadPolicy, + directory::MmapDirectory, + schema::{Field, Schema, FAST, INDEXED, STORED, TEXT}, + DateOptions, Index, IndexReader, IndexWriter, ReloadPolicy, }; pub use redb::*; +pub use tantivy; pub const T_USER: TableDefinition<&str, Ser<User>> = TableDefinition::new("user"); pub const T_USER_NODE: TableDefinition<(&str, &str), Ser<NodeUserData>> = @@ -30,16 +33,19 @@ pub const T_NODE_IMPORT: TableDefinition<&str, Ser<Vec<(Vec<usize>, Node)>>> = pub struct DataAcid { pub inner: redb::Database, - pub ft_node: NodeFulltextIndex, + pub node_index: NodeFulltextIndex, } impl DataAcid { pub fn open(path: &Path) -> Result<Self, anyhow::Error> { info!("database"); + create_dir_all(path)?; let db = redb::Database::create(path.join("data"))?; - let ft_node = NodeFulltextIndex::new(path)?; - let r = Self { inner: db, ft_node }; + let r = Self { + inner: db, + node_index: ft_node, + }; { // this creates all tables such that read operations on them do not fail. @@ -68,10 +74,13 @@ impl Deref for DataAcid { pub struct NodeFulltextIndex { pub schema: Schema, pub reader: IndexReader, + pub writer: RwLock<IndexWriter>, + pub index: Index, pub id: Field, pub title: Field, + pub releasedate: Field, pub description: Field, - pub index: Index, + pub f_index: Field, } impl NodeFulltextIndex { fn new(path: &Path) -> anyhow::Result<Self> { @@ -79,17 +88,31 @@ impl NodeFulltextIndex { let id = schema.add_text_field("id", TEXT | STORED | FAST); let title = schema.add_text_field("title", TEXT); let description = schema.add_text_field("description", TEXT); + let f_index = schema.add_u64_field("index", FAST); + let releasedate = schema.add_date_field( + "releasedate", + DateOptions::from(INDEXED) + .set_fast() + .set_precision(tantivy::DateTimePrecision::Seconds), + ); let schema = schema.build(); - create_dir_all(path.join("node_fts_index"))?; - let index = Index::create_in_dir(path.join("node_fts_index"), schema.clone())?; + create_dir_all(path.join("node_index"))?; + let directory = + MmapDirectory::open(path.join("node_index")).context("opening index directory")?; + let index = Index::open_or_create(directory, schema.clone()).context("creating index")?; let reader = index .reader_builder() .reload_policy(ReloadPolicy::OnCommitWithDelay) - .try_into()?; + .try_into() + .context("creating reader")?; + let writer = index.writer(30_000_000).context("creating writer")?; Ok(Self { index, + writer: writer.into(), reader, schema, + f_index, + releasedate, id, description, title, diff --git a/import/src/db.rs b/import/src/db.rs index 88c5601..4c62681 100644 --- a/import/src/db.rs +++ b/import/src/db.rs @@ -1,7 +1,9 @@ use std::collections::HashMap; use anyhow::anyhow; -use jellybase::database::{DataAcid, ReadableTable, Ser, T_NODE, T_NODE_EXTENDED, T_NODE_IMPORT}; +use jellybase::database::{ + doc, DataAcid, ReadableTable, Ser, T_NODE, T_NODE_EXTENDED, T_NODE_IMPORT, +}; use jellycommon::{ExtendedNode, Node}; use log::info; use std::sync::RwLock; @@ -114,6 +116,13 @@ impl ImportStorage for MemoryStorage<'_> { table.drain::<&str>(..)?; drop(table); txn.commit()?; + self.db + .node_index + .writer + .read() + .unwrap() + .delete_all_documents()?; + self.db.node_index.writer.write().unwrap().commit()?; Ok(()) } fn get_partial_parts(&self, id: &str) -> anyhow::Result<Vec<(Vec<usize>, Node)>> { @@ -128,9 +137,22 @@ impl ImportStorage for MemoryStorage<'_> { fn insert_complete_node(&self, id: &str, node: Node) -> anyhow::Result<()> { let txn_write = self.db.inner.begin_write()?; let mut t_node = txn_write.open_table(T_NODE)?; - t_node.insert(id, Ser(node))?; + t_node.insert(id, Ser(node.clone()))?; drop(t_node); txn_write.commit()?; + + self.db + .node_index + .writer + .read() + .unwrap() + .add_document(doc!( + self.db.node_index.id => node.public.id.unwrap_or_default(), + self.db.node_index.title => node.public.title.unwrap_or_default(), + self.db.node_index.description => node.public.description.unwrap_or_default(), + self.db.node_index.releasedate => node.public.release_date.unwrap_or_default(), + self.db.node_index.f_index => node.public.index.unwrap_or_default() as u64, + ))?; Ok(()) } @@ -160,6 +182,7 @@ impl ImportStorage for MemoryStorage<'_> { } fn finish(&self) -> anyhow::Result<()> { + self.db.node_index.reader.reload()?; Ok(()) } } diff --git a/server/src/main.rs b/server/src/main.rs index 919ba50..d834481 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -8,6 +8,7 @@ #![feature(let_chains)] use crate::routes::ui::{account::hash_password, admin::log::enable_logging}; +use anyhow::Context; use database::DataAcid; use jellybase::{ database::{ReadableTable, Ser, T_USER}, @@ -29,7 +30,9 @@ async fn main() { log::warn!("authentification bypass enabled"); create_dir_all(&CONF.cache_path).await.unwrap(); - let database = DataAcid::open(&CONF.database_path).unwrap(); + let database = DataAcid::open(&CONF.database_path) + .context("opening database") + .unwrap(); let federation = Federation::initialize(); if let Some(username) = &CONF.admin_username diff --git a/server/src/routes/ui/search.rs b/server/src/routes/ui/search.rs index eb222c9..cafa755 100644 --- a/server/src/routes/ui/search.rs +++ b/server/src/routes/ui/search.rs @@ -5,7 +5,9 @@ use super::{ node::NodeCard, }; use edit_distance::edit_distance; -use jellybase::database::{DataAcid, ReadableTable, T_NODE, T_USER_NODE}; +use jellybase::database::{ + tantivy::query::QueryParser, DataAcid, ReadableTable, T_NODE, T_USER_NODE, +}; use rocket::{get, State}; #[get("/search?<query>")] @@ -14,47 +16,48 @@ pub async fn r_search<'a>( db: &State<DataAcid>, query: Option<&str>, ) -> MyResult<DynLayoutPage<'a>> { - let results = if let Some(query) = query { - let mut items = { - let txn = db.begin_read()?; - let nodes = txn.open_table(T_NODE)?; - let node_users = txn.open_table(T_USER_NODE)?; - let i = nodes - .iter()? - .map(|a| { - let (x, y) = a.unwrap(); - let (x, y) = (x.value().to_owned(), y.value().0); - let z = node_users - .get(&(session.user.name.as_str(), x.as_str())) - .unwrap() - .map(|z| z.value().0) - .unwrap_or_default(); - let y = y.public; - (x, y, z) - }) - .collect::<Vec<_>>(); - drop(nodes); - i - }; - - let query = query.to_lowercase(); - items.sort_by_cached_key(|(_, n, _)| { - n.title - .as_ref() - .map(|x| x.to_lowercase()) - .unwrap_or_default() - .split(" ") - .map(|tok| edit_distance(query.as_str(), tok)) - .min() - .unwrap_or(usize::MAX) - }); + // let results = if let Some(query) = query { + // let mut items = { + // let txn = db.begin_read()?; + // let nodes = txn.open_table(T_NODE)?; + // let node_users = txn.open_table(T_USER_NODE)?; + // let i = nodes + // .iter()? + // .map(|a| { + // let (x, y) = a.unwrap(); + // let (x, y) = (x.value().to_owned(), y.value().0); + // let z = node_users + // .get(&(session.user.name.as_str(), x.as_str())) + // .unwrap() + // .map(|z| z.value().0) + // .unwrap_or_default(); + // let y = y.public; + // (x, y, z) + // }) + // .collect::<Vec<_>>(); + // drop(nodes); + // i + // }; + // let query = query.to_lowercase(); + // items.sort_by_cached_key(|(_, n, _)| { + // n.title + // .as_ref() + // .map(|x| x.to_lowercase()) + // .unwrap_or_default() + // .split(" ") + // .map(|tok| edit_distance(query.as_str(), tok)) + // .min() + // .unwrap_or(usize::MAX) + // }); + // Some(items.into_iter().take(64).collect::<Vec<_>>()) + // } else { + // None + // }; - Some(items.into_iter().take(64).collect::<Vec<_>>()) - } else { - None - }; + let query = QueryParser::for_index(index, vec![]); - Ok(LayoutPage { + let searcher = db.node_index.reader.searcher(); + searcher.Ok(LayoutPage { title: "Search".to_string(), class: Some("search"), content: markup::new! { |