From 532cc431d1c5ca1ffcf429a4ccb94edc7848fe7a Mon Sep 17 00:00:00 2001 From: metamuffin Date: Thu, 30 May 2024 00:09:11 +0200 Subject: rename filters dir --- src/modules/files.rs | 390 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 390 insertions(+) create mode 100644 src/modules/files.rs (limited to 'src/modules/files.rs') diff --git a/src/modules/files.rs b/src/modules/files.rs new file mode 100644 index 0000000..4fdd5cd --- /dev/null +++ b/src/modules/files.rs @@ -0,0 +1,390 @@ +use super::{Node, NodeContext, NodeKind, NodeRequest, NodeResponse}; +use crate::{config::return_true, ServiceError}; +use bytes::{Bytes, BytesMut}; +use futures::Future; +use futures_util::{future, future::Either, ready, stream, FutureExt, Stream, StreamExt}; +use headers::{ + AcceptRanges, CacheControl, ContentLength, ContentRange, ContentType, HeaderMapExt, + LastModified, +}; +use http_body_util::{combinators::BoxBody, BodyExt, StreamBody}; +use humansize::FormatSizeOptions; +use hyper::{ + body::Frame, + header::{CONTENT_TYPE, LOCATION}, + http::HeaderValue, + Response, StatusCode, +}; +use log::debug; +use markup::Render; +use percent_encoding::percent_decode_str; +use serde::Deserialize; +use serde_yaml::Value; +use std::{ + fs::Metadata, + io, + ops::Range, + path::{Path, PathBuf}, + pin::Pin, + sync::Arc, + task::Poll, +}; +use tokio::{ + fs::{read_to_string, File}, + io::AsyncSeekExt, +}; +use tokio_util::io::poll_read_buf; + +pub struct FilesKind; +pub struct FileKind; + +#[derive(Debug, Deserialize)] +struct Files { + root: PathBuf, + #[serde(default)] + index: bool, + #[serde(default = "return_true")] + last_modified: bool, + // #[serde(default = "return_true")] + // etag: bool, + #[serde(default)] + cache: CacheMode, +} +#[derive(Debug, Default, Deserialize)] +#[serde(rename_all = "snake_case")] +enum CacheMode { + #[default] + Public, + Private, + NoStore, +} + +impl NodeKind for FilesKind { + fn name(&self) -> &'static str { + "files" + } + fn instanciate(&self, config: Value) -> anyhow::Result> { + Ok(Arc::new(serde_yaml::from_value::(config)?)) + } +} + +impl Node for Files { + fn handle<'a>( + &'a self, + _context: &'a mut NodeContext, + request: NodeRequest, + ) -> Pin> + Send + Sync + 'a>> { + Box::pin(async move { + let rpath = request.uri().path(); + + let mut path = self.root.clone(); + let mut user_path_depth = 0; + for seg in rpath.split("/") { + let seg = percent_decode_str(seg).decode_utf8()?; + + if seg == "" || seg == "." { + continue; + } + + if seg == ".." { + if user_path_depth <= 0 { + return Err(ServiceError::BadPath); + } + path.pop(); + user_path_depth -= 1; + } else { + path.push(seg.as_ref()); + user_path_depth += 1; + } + } + if !path.exists() { + return Err(ServiceError::NotFound); + } + + let metadata = path.metadata()?; + + if metadata.file_type().is_dir() { + debug!("sending index for {path:?}"); + if let Ok(indexhtml) = read_to_string(path.join("index.html")).await { + return Ok(html_string_response(indexhtml)); + } + + if self.index { + if !rpath.ends_with("/") { + let mut r = Response::new(String::new()); + *r.status_mut() = StatusCode::FOUND; + r.headers_mut().insert( + LOCATION, + HeaderValue::from_str(&format!("{}/", rpath)) + .map_err(|_| ServiceError::Other)?, + ); + return Ok(r.map(|b| b.map_err(|e| match e {}).boxed())); + } + + return index(&path, rpath.to_string()) + .await + .map(html_string_response); + } else { + return Err(ServiceError::NotFound); + } + } + + let modified = metadata.modified()?; + + let not_modified = if self.last_modified { + request + .headers() + .typed_get::() + .map(|if_modified_since| { + Ok::<_, ServiceError>(!if_modified_since.is_modified(modified)) + }) + .transpose()? + .unwrap_or_default() + } else { + false + }; + + // let etag = ETag::from_str(&calc_etag(modified)).map_err(|_| ServiceError::Other)?; + // let etag_matches = if self.etag { + // request.headers() + // .typed_get::() + // .map(|if_none_match| if_none_match.precondition_passes(&etag)) + // .unwrap_or_default() + // } else { + // false + // }; + + let range = request.headers().typed_get::(); + let range = bytes_range(range, metadata.len())?; + + debug!("sending file {path:?}"); + let file = File::open(path.clone()).await?; + + // let skip_body = not_modified || etag_matches; + let skip_body = not_modified; + let mut r = if skip_body { + Response::new("".to_string()).map(|b| b.map_err(|e| match e {}).boxed()) + } else { + Response::new(BoxBody::new(StreamBody::new( + StreamBody::new(file_stream(file, 4096, range.clone())) + .map(|e| e.map(|e| Frame::data(e)).map_err(ServiceError::Io)), + ))) + }; + + if !skip_body { + if range.end - range.start != metadata.len() { + *r.status_mut() = StatusCode::PARTIAL_CONTENT; + r.headers_mut().typed_insert( + ContentRange::bytes(range.clone(), metadata.len()) + .expect("valid ContentRange"), + ); + } + } + // if not_modified || etag_matches { + if not_modified { + *r.status_mut() = StatusCode::NOT_MODIFIED; + } + + r.headers_mut().typed_insert(AcceptRanges::bytes()); + r.headers_mut() + .typed_insert(ContentLength(range.end - range.start)); + + let mime = mime_guess::from_path(path).first_or_octet_stream(); + r.headers_mut().typed_insert(ContentType::from(mime)); + + r.headers_mut().typed_insert(match self.cache { + CacheMode::Public => CacheControl::new().with_public(), + CacheMode::Private => CacheControl::new().with_private(), + CacheMode::NoStore => CacheControl::new().with_no_store(), + }); + + // if self.etag { + // r.headers_mut().typed_insert(etag); + // } + if self.last_modified { + r.headers_mut().typed_insert(LastModified::from(modified)); + } + + Ok(r) + }) + } +} + +// Adapted from warp (https://github.com/seanmonstar/warp/blob/master/src/filters/fs.rs). Thanks! +fn file_stream( + mut file: File, + buf_size: usize, + range: Range, +) -> impl Stream> + Send { + use std::io::SeekFrom; + + let seek = async move { + if range.start != 0 { + file.seek(SeekFrom::Start(range.start)).await?; + } + Ok(file) + }; + + seek.into_stream() + .map(move |result| { + let mut buf = BytesMut::new(); + let mut len = range.end - range.start; + let mut f = match result { + Ok(f) => f, + Err(f) => return Either::Left(stream::once(future::err(f))), + }; + + Either::Right(stream::poll_fn(move |cx| { + if len == 0 { + return Poll::Ready(None); + } + reserve_at_least(&mut buf, buf_size); + + let n = match ready!(poll_read_buf(Pin::new(&mut f), cx, &mut buf)) { + Ok(n) => n as u64, + Err(err) => { + debug!("file read error: {}", err); + return Poll::Ready(Some(Err(err))); + } + }; + + if n == 0 { + debug!("file read found EOF before expected length"); + return Poll::Ready(None); + } + + let mut chunk = buf.split().freeze(); + if n > len { + chunk = chunk.split_to(len as usize); + len = 0; + } else { + len -= n; + } + + Poll::Ready(Some(Ok(chunk))) + })) + }) + .flatten() +} + +// Also adapted from warp +fn bytes_range(range: Option, max_len: u64) -> Result, ServiceError> { + use std::ops::Bound; + + let range = if let Some(range) = range { + range + } else { + return Ok(0..max_len); + }; + + let ret = range + .satisfiable_ranges(max_len) + .map(|(start, end)| { + let start = match start { + Bound::Unbounded => 0, + Bound::Included(s) => s, + Bound::Excluded(s) => s + 1, + }; + + let end = match end { + Bound::Unbounded => max_len, + Bound::Included(s) => { + // For the special case where s == the file size + if s == max_len { + s + } else { + s + 1 + } + } + Bound::Excluded(s) => s, + }; + + if start < end && end <= max_len { + Ok(start..end) + } else { + Err(ServiceError::BadRange) + } + }) + .next() + .unwrap_or(Ok(0..max_len)); + ret +} + +fn reserve_at_least(buf: &mut BytesMut, cap: usize) { + if buf.capacity() - buf.len() < cap { + buf.reserve(cap); + } +} + +async fn index(path: &Path, rpath: String) -> Result { + let files = path + .read_dir()? + .map(|e| e.and_then(|e| Ok((e.file_name().into_string().unwrap(), e.metadata()?)))) + .filter(|e| e.as_ref().map(|(e, _)| !e.starts_with(".")).unwrap_or(true)) + .collect::, _>>()?; + let banner = read_to_string(path.join("index.banner.html")).await.ok(); + let mut s = String::new(); + IndexTemplate { + files, + banner, + path: rpath, + } + .render(&mut s) + .unwrap(); + Ok(s) +} + +fn html_string_response(s: String) -> hyper::Response> { + let mut r = Response::new(s); + r.headers_mut() + .insert(CONTENT_TYPE, HeaderValue::from_static("text/html")); + r.map(|b| b.map_err(|e| match e {}).boxed()) +} + +markup::define! { + IndexTemplate(path: String, banner: Option, files: Vec<(String, Metadata)>) { + @markup::doctype() + html { + head { + meta[charset="UTF-8"]; + title { "Index of " @path } + } + body { + @if let Some(banner) = banner { + @markup::raw(banner) + } else { + h1 { "Index of " @path } + } + hr; + table { + @if path != "/" { + tr { td { b { a[href=".."] { "../" } } } } + } + @for (name, meta) in files { tr { + td { a[href=name] { + @name + @if meta.file_type().is_dir() { "/" } + } } + td { + @if meta.file_type().is_dir() { + i { "directory" } + } else { + @humansize::format_size(meta.len(), FormatSizeOptions::default()) + } + } + } } + } + hr; + footer { sub { "served by " a[href="https://codeberg.org/metamuffin/gnix"] { "gnix" } } } + } + } + } +} + +// fn calc_etag(s: SystemTime) -> String { +// // TODO: make this not change after server restart but still unguessable +// let mut hasher = DefaultHasher::new(); +// s.hash(&mut hasher); +// let hash = hasher.finish(); +// hex::encode(hash.to_le_bytes()) +// } -- cgit v1.2.3-70-g09d2