aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2024-04-15 15:19:29 +0200
committermetamuffin <metamuffin@disroot.org>2024-04-15 15:19:29 +0200
commitc988e7db759966d9586471e8cfcfd0d91e855dc0 (patch)
tree447f2b817bc04a9591f07c11e913b35b651cd0cd
parentc5bb4949eb0959ec3a3c2fa010d2d7549347e587 (diff)
downloadjellything-c988e7db759966d9586471e8cfcfd0d91e855dc0.tar
jellything-c988e7db759966d9586471e8cfcfd0d91e855dc0.tar.bz2
jellything-c988e7db759966d9586471e8cfcfd0d91e855dc0.tar.zst
fulltext search pt.2
-rw-r--r--base/src/database.rs43
-rw-r--r--import/src/db.rs27
-rw-r--r--server/src/main.rs5
-rw-r--r--server/src/routes/ui/search.rs83
4 files changed, 105 insertions, 53 deletions
diff --git a/base/src/database.rs b/base/src/database.rs
index f1ae595..2fabf1c 100644
--- a/base/src/database.rs
+++ b/base/src/database.rs
@@ -3,6 +3,7 @@
which is licensed under the GNU Affero General Public License (version 3); see /COPYING.
Copyright (C) 2024 metamuffin <metamuffin.org>
*/
+use anyhow::Context;
use bincode::{Decode, Encode};
use jellycommon::{
user::{NodeUserData, User},
@@ -10,13 +11,15 @@ use jellycommon::{
};
use log::info;
use serde::{Deserialize, Serialize};
-use std::{borrow::Borrow, fs::create_dir_all, ops::Deref, path::Path};
+use std::{borrow::Borrow, fs::create_dir_all, ops::Deref, path::Path, sync::RwLock};
use tantivy::{
- schema::{Field, Schema, FAST, STORED, TEXT},
- Index, IndexReader, ReloadPolicy,
+ directory::MmapDirectory,
+ schema::{Field, Schema, FAST, INDEXED, STORED, TEXT},
+ DateOptions, Index, IndexReader, IndexWriter, ReloadPolicy,
};
pub use redb::*;
+pub use tantivy;
pub const T_USER: TableDefinition<&str, Ser<User>> = TableDefinition::new("user");
pub const T_USER_NODE: TableDefinition<(&str, &str), Ser<NodeUserData>> =
@@ -30,16 +33,19 @@ pub const T_NODE_IMPORT: TableDefinition<&str, Ser<Vec<(Vec<usize>, Node)>>> =
pub struct DataAcid {
pub inner: redb::Database,
- pub ft_node: NodeFulltextIndex,
+ pub node_index: NodeFulltextIndex,
}
impl DataAcid {
pub fn open(path: &Path) -> Result<Self, anyhow::Error> {
info!("database");
+ create_dir_all(path)?;
let db = redb::Database::create(path.join("data"))?;
-
let ft_node = NodeFulltextIndex::new(path)?;
- let r = Self { inner: db, ft_node };
+ let r = Self {
+ inner: db,
+ node_index: ft_node,
+ };
{
// this creates all tables such that read operations on them do not fail.
@@ -68,10 +74,13 @@ impl Deref for DataAcid {
pub struct NodeFulltextIndex {
pub schema: Schema,
pub reader: IndexReader,
+ pub writer: RwLock<IndexWriter>,
+ pub index: Index,
pub id: Field,
pub title: Field,
+ pub releasedate: Field,
pub description: Field,
- pub index: Index,
+ pub f_index: Field,
}
impl NodeFulltextIndex {
fn new(path: &Path) -> anyhow::Result<Self> {
@@ -79,17 +88,31 @@ impl NodeFulltextIndex {
let id = schema.add_text_field("id", TEXT | STORED | FAST);
let title = schema.add_text_field("title", TEXT);
let description = schema.add_text_field("description", TEXT);
+ let f_index = schema.add_u64_field("index", FAST);
+ let releasedate = schema.add_date_field(
+ "releasedate",
+ DateOptions::from(INDEXED)
+ .set_fast()
+ .set_precision(tantivy::DateTimePrecision::Seconds),
+ );
let schema = schema.build();
- create_dir_all(path.join("node_fts_index"))?;
- let index = Index::create_in_dir(path.join("node_fts_index"), schema.clone())?;
+ create_dir_all(path.join("node_index"))?;
+ let directory =
+ MmapDirectory::open(path.join("node_index")).context("opening index directory")?;
+ let index = Index::open_or_create(directory, schema.clone()).context("creating index")?;
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::OnCommitWithDelay)
- .try_into()?;
+ .try_into()
+ .context("creating reader")?;
+ let writer = index.writer(30_000_000).context("creating writer")?;
Ok(Self {
index,
+ writer: writer.into(),
reader,
schema,
+ f_index,
+ releasedate,
id,
description,
title,
diff --git a/import/src/db.rs b/import/src/db.rs
index 88c5601..4c62681 100644
--- a/import/src/db.rs
+++ b/import/src/db.rs
@@ -1,7 +1,9 @@
use std::collections::HashMap;
use anyhow::anyhow;
-use jellybase::database::{DataAcid, ReadableTable, Ser, T_NODE, T_NODE_EXTENDED, T_NODE_IMPORT};
+use jellybase::database::{
+ doc, DataAcid, ReadableTable, Ser, T_NODE, T_NODE_EXTENDED, T_NODE_IMPORT,
+};
use jellycommon::{ExtendedNode, Node};
use log::info;
use std::sync::RwLock;
@@ -114,6 +116,13 @@ impl ImportStorage for MemoryStorage<'_> {
table.drain::<&str>(..)?;
drop(table);
txn.commit()?;
+ self.db
+ .node_index
+ .writer
+ .read()
+ .unwrap()
+ .delete_all_documents()?;
+ self.db.node_index.writer.write().unwrap().commit()?;
Ok(())
}
fn get_partial_parts(&self, id: &str) -> anyhow::Result<Vec<(Vec<usize>, Node)>> {
@@ -128,9 +137,22 @@ impl ImportStorage for MemoryStorage<'_> {
fn insert_complete_node(&self, id: &str, node: Node) -> anyhow::Result<()> {
let txn_write = self.db.inner.begin_write()?;
let mut t_node = txn_write.open_table(T_NODE)?;
- t_node.insert(id, Ser(node))?;
+ t_node.insert(id, Ser(node.clone()))?;
drop(t_node);
txn_write.commit()?;
+
+ self.db
+ .node_index
+ .writer
+ .read()
+ .unwrap()
+ .add_document(doc!(
+ self.db.node_index.id => node.public.id.unwrap_or_default(),
+ self.db.node_index.title => node.public.title.unwrap_or_default(),
+ self.db.node_index.description => node.public.description.unwrap_or_default(),
+ self.db.node_index.releasedate => node.public.release_date.unwrap_or_default(),
+ self.db.node_index.f_index => node.public.index.unwrap_or_default() as u64,
+ ))?;
Ok(())
}
@@ -160,6 +182,7 @@ impl ImportStorage for MemoryStorage<'_> {
}
fn finish(&self) -> anyhow::Result<()> {
+ self.db.node_index.reader.reload()?;
Ok(())
}
}
diff --git a/server/src/main.rs b/server/src/main.rs
index 919ba50..d834481 100644
--- a/server/src/main.rs
+++ b/server/src/main.rs
@@ -8,6 +8,7 @@
#![feature(let_chains)]
use crate::routes::ui::{account::hash_password, admin::log::enable_logging};
+use anyhow::Context;
use database::DataAcid;
use jellybase::{
database::{ReadableTable, Ser, T_USER},
@@ -29,7 +30,9 @@ async fn main() {
log::warn!("authentification bypass enabled");
create_dir_all(&CONF.cache_path).await.unwrap();
- let database = DataAcid::open(&CONF.database_path).unwrap();
+ let database = DataAcid::open(&CONF.database_path)
+ .context("opening database")
+ .unwrap();
let federation = Federation::initialize();
if let Some(username) = &CONF.admin_username
diff --git a/server/src/routes/ui/search.rs b/server/src/routes/ui/search.rs
index eb222c9..cafa755 100644
--- a/server/src/routes/ui/search.rs
+++ b/server/src/routes/ui/search.rs
@@ -5,7 +5,9 @@ use super::{
node::NodeCard,
};
use edit_distance::edit_distance;
-use jellybase::database::{DataAcid, ReadableTable, T_NODE, T_USER_NODE};
+use jellybase::database::{
+ tantivy::query::QueryParser, DataAcid, ReadableTable, T_NODE, T_USER_NODE,
+};
use rocket::{get, State};
#[get("/search?<query>")]
@@ -14,47 +16,48 @@ pub async fn r_search<'a>(
db: &State<DataAcid>,
query: Option<&str>,
) -> MyResult<DynLayoutPage<'a>> {
- let results = if let Some(query) = query {
- let mut items = {
- let txn = db.begin_read()?;
- let nodes = txn.open_table(T_NODE)?;
- let node_users = txn.open_table(T_USER_NODE)?;
- let i = nodes
- .iter()?
- .map(|a| {
- let (x, y) = a.unwrap();
- let (x, y) = (x.value().to_owned(), y.value().0);
- let z = node_users
- .get(&(session.user.name.as_str(), x.as_str()))
- .unwrap()
- .map(|z| z.value().0)
- .unwrap_or_default();
- let y = y.public;
- (x, y, z)
- })
- .collect::<Vec<_>>();
- drop(nodes);
- i
- };
-
- let query = query.to_lowercase();
- items.sort_by_cached_key(|(_, n, _)| {
- n.title
- .as_ref()
- .map(|x| x.to_lowercase())
- .unwrap_or_default()
- .split(" ")
- .map(|tok| edit_distance(query.as_str(), tok))
- .min()
- .unwrap_or(usize::MAX)
- });
+ // let results = if let Some(query) = query {
+ // let mut items = {
+ // let txn = db.begin_read()?;
+ // let nodes = txn.open_table(T_NODE)?;
+ // let node_users = txn.open_table(T_USER_NODE)?;
+ // let i = nodes
+ // .iter()?
+ // .map(|a| {
+ // let (x, y) = a.unwrap();
+ // let (x, y) = (x.value().to_owned(), y.value().0);
+ // let z = node_users
+ // .get(&(session.user.name.as_str(), x.as_str()))
+ // .unwrap()
+ // .map(|z| z.value().0)
+ // .unwrap_or_default();
+ // let y = y.public;
+ // (x, y, z)
+ // })
+ // .collect::<Vec<_>>();
+ // drop(nodes);
+ // i
+ // };
+ // let query = query.to_lowercase();
+ // items.sort_by_cached_key(|(_, n, _)| {
+ // n.title
+ // .as_ref()
+ // .map(|x| x.to_lowercase())
+ // .unwrap_or_default()
+ // .split(" ")
+ // .map(|tok| edit_distance(query.as_str(), tok))
+ // .min()
+ // .unwrap_or(usize::MAX)
+ // });
+ // Some(items.into_iter().take(64).collect::<Vec<_>>())
+ // } else {
+ // None
+ // };
- Some(items.into_iter().take(64).collect::<Vec<_>>())
- } else {
- None
- };
+ let query = QueryParser::for_index(index, vec![]);
- Ok(LayoutPage {
+ let searcher = db.node_index.reader.searcher();
+ searcher.Ok(LayoutPage {
title: "Search".to_string(),
class: Some("search"),
content: markup::new! {