ptth/crates/ptth_server/src/file_server/mod.rs

383 lines
9.3 KiB
Rust
Raw Normal View History

2020-10-30 22:38:23 +00:00
// Static file server that can plug into the PTTH reverse server
2020-11-29 19:05:28 +00:00
// I'm not sure if I like this one
#![allow (clippy::enum_glob_use)]
2020-10-30 22:38:23 +00:00
use std::{
cmp::min,
2020-11-29 19:05:28 +00:00
collections::HashMap,
convert::{Infallible, TryFrom},
2020-10-30 22:38:23 +00:00
io::SeekFrom,
path::{
Path,
PathBuf,
},
sync::Arc,
2020-10-30 22:38:23 +00:00
};
use arc_swap::ArcSwap;
use handlebars::Handlebars;
use serde::Serialize;
2020-10-30 22:38:23 +00:00
use tokio::{
fs::{
DirEntry,
2020-10-30 22:38:23 +00:00
File,
ReadDir,
},
io::{
AsyncReadExt,
AsyncSeekExt,
},
2020-10-30 22:38:23 +00:00
sync::mpsc::{
channel,
},
};
use tracing::instrument;
2020-10-30 22:38:23 +00:00
2020-11-27 00:03:11 +00:00
use ptth_core::{
http_serde::{
Method,
Response,
StatusCode,
},
prelude::*,
};
pub mod errors;
pub mod metrics;
2020-12-20 18:38:39 +00:00
mod html;
mod internal;
#[cfg(feature = "markdown")]
mod markdown;
2020-11-29 22:31:54 +00:00
mod range;
use errors::FileServerError;
#[derive (Default)]
pub struct Config {
pub file_server_root: Option <PathBuf>,
}
pub struct State {
pub config: Config,
pub handlebars: handlebars::Handlebars <'static>,
2020-12-20 19:00:22 +00:00
pub metrics_startup: metrics::Startup,
pub metrics_interval: Arc <ArcSwap <Option <metrics::Interval>>>,
pub hidden_path: Option <PathBuf>,
}
#[derive (Serialize)]
struct DirJson {
entries: Vec <DirEntryJson>,
}
#[derive (Serialize)]
2020-12-20 18:38:39 +00:00
struct DirEntryJson {
name: String,
size: u64,
is_dir: bool,
}
async fn read_dir_entry_json (entry: DirEntry) -> Option <DirEntryJson>
{
let name = entry.file_name ().into_string ().ok ()?;
let metadata = entry.metadata ().await.ok ()?;
let is_dir = metadata.is_dir ();
let size = metadata.len ();
Some (DirEntryJson {
name,
size,
is_dir,
})
}
async fn serve_dir_json (
mut dir: ReadDir
) -> Result <Response, FileServerError>
{
let mut entries = vec! [];
while let Ok (Some (entry)) = dir.next_entry ().await {
if let Some (entry) = read_dir_entry_json (entry).await {
entries.push (entry);
}
}
entries.sort_unstable_by (|a, b| a.name.cmp (&b.name));
let dir = DirJson {
entries,
};
let mut response = Response::default ();
response.header ("content-type".to_string (), "application/json; charset=UTF-8".to_string ().into_bytes ());
response.body_bytes (serde_json::to_string (&dir).unwrap ().into_bytes ());
Ok (response)
}
#[instrument (level = "debug", skip (f))]
2020-10-30 22:38:23 +00:00
async fn serve_file (
mut f: File,
2021-04-03 16:21:59 +00:00
client_wants_body: bool,
range: range::ValidParsed,
if_none_match: Option <&Vec <u8>>,
)
2020-11-29 19:47:40 +00:00
-> Result <Response, FileServerError>
{
2021-04-03 16:21:59 +00:00
// Tripping the etag through UTF-8 isn't the best way to encourage it to
// be valid ASCII, but if I make it binary I might accidentally pass the
// hash binary as a header, which is not valid.
let etag = get_file_etag (&f).await.map (String::into_bytes);
let client_cache_hit = match &etag {
None => false,
Some (actual) => match &if_none_match {
None => false,
Some (if_none_match) => &actual == if_none_match,
}
};
let (tx, rx) = channel (1);
2021-04-03 16:21:59 +00:00
let body = if client_wants_body && ! client_cache_hit {
Some (rx)
2020-10-30 22:38:23 +00:00
}
else {
None
};
2020-11-29 22:31:54 +00:00
let (range, range_requested) = (range.range, range.range_requested);
info! ("Serving range {}-{}", range.start, range.end);
2020-10-30 22:38:23 +00:00
let content_length = range.end - range.start;
2020-10-30 22:38:23 +00:00
let seek = SeekFrom::Start (range.start);
2020-11-29 19:47:40 +00:00
f.seek (seek).await?;
2021-04-03 16:21:59 +00:00
if body.is_some () {
2020-10-30 22:38:23 +00:00
tokio::spawn (async move {
let mut bytes_sent = 0;
let mut bytes_left = content_length;
2020-10-30 22:38:23 +00:00
2020-11-09 16:33:13 +00:00
let mark_interval = 200_000;
let mut next_mark = mark_interval;
2020-10-30 22:38:23 +00:00
loop {
2020-11-29 19:05:28 +00:00
let mut buffer = vec! [0_u8; 65_536];
let bytes_read = f.read (&mut buffer).await.expect ("Couldn't read from file");
2020-10-30 22:38:23 +00:00
if bytes_read == 0 {
break;
}
buffer.truncate (bytes_read);
let bytes_read_64 = u64::try_from (bytes_read).expect ("Couldn't fit usize into u64");
let bytes_read_64 = min (bytes_left, bytes_read_64);
if tx.send (Ok::<_, Infallible> (buffer)).await.is_err () {
warn! ("Cancelling file stream (Sent {} out of {} bytes)", bytes_sent, content_length);
2020-10-30 22:38:23 +00:00
break;
}
bytes_left -= bytes_read_64;
2020-10-30 22:38:23 +00:00
if bytes_left == 0 {
debug! ("Finished");
2020-10-30 22:38:23 +00:00
break;
}
bytes_sent += bytes_read_64;
2020-11-09 16:33:13 +00:00
while next_mark <= bytes_sent {
trace! ("Sent {} bytes", next_mark);
next_mark += mark_interval;
}
2020-10-30 22:38:23 +00:00
//delay_for (Duration::from_millis (50)).await;
}
});
}
let mut response = Response::default ();
2020-10-30 22:38:23 +00:00
// The cache-related headers in HTTP have bad names. See here:
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
// The intended semantics I'm using are:
// - etag - Some random hashed value that changes whenever the metadata
// (name, inode number, length, mtime) of a file changes. Also changes
2021-04-03 16:21:59 +00:00
// on new server instance. Maybe.
// - no-cache - Clients and the relay can store this, but should revalidate
// with the origin server (us) because only we can check if the file
// changed on disk.
// - max-age=0 - The file might change at any point during or after the
// request, so for proper invalidation, the client should immediately
// consider it stale.
response.header ("cache-control".to_string (), b"no-cache,max-age=0".to_vec ());
2021-04-03 16:21:59 +00:00
etag.map (|etag| {
response.header ("etag".to_string (), etag);
});
2020-10-30 22:38:23 +00:00
response.header (String::from ("accept-ranges"), b"bytes".to_vec ());
if range_requested {
response.status_code (StatusCode::PartialContent);
response.header (String::from ("content-range"), format! ("bytes {}-{}/{}", range.start, range.end - 1, range.end).into_bytes ());
}
else {
response.status_code (StatusCode::Ok);
response.header (String::from ("content-length"), range.end.to_string ().into_bytes ());
}
2021-04-03 16:21:59 +00:00
if client_cache_hit {
response.status_code (StatusCode::NotModified);
2020-10-30 22:38:23 +00:00
}
2021-04-03 16:21:59 +00:00
else if ! client_wants_body {
2020-11-29 19:05:28 +00:00
response.status_code (StatusCode::NoContent);
}
2021-04-03 16:21:59 +00:00
else {
response.content_length = Some (content_length);
}
2020-10-30 22:38:23 +00:00
if let Some (body) = body {
response.body (body);
}
2020-11-29 19:47:40 +00:00
Ok (response)
2020-10-30 22:38:23 +00:00
}
2021-04-03 16:21:59 +00:00
async fn get_file_etag (f: &File) -> Option <String>
{
let md = f.metadata ().await;
None
}
// Pass a request to the internal decision-making logic.
// When it returns, prettify it as HTML or JSON based on what the client
// asked for.
#[instrument (level = "debug", skip (state, headers))]
pub async fn serve_all (
state: &State,
root: &Path,
method: Method,
uri: &str,
headers: &HashMap <String, Vec <u8>>
)
-> Result <Response, FileServerError>
{
use internal::{
OutputFormat,
Response::*,
};
fn serve_error <S: Into <Vec <u8>>> (
status_code: StatusCode,
msg: S
)
-> Response
{
let mut resp = Response::default ();
resp.status_code (status_code);
resp.body_bytes (msg.into ());
resp
}
Ok (match internal::serve_all (root, method, uri, headers, state.hidden_path.as_deref ()).await? {
Favicon => serve_error (StatusCode::NotFound, "Not found\n"),
Forbidden => serve_error (StatusCode::Forbidden, "403 Forbidden\n"),
MethodNotAllowed => serve_error (StatusCode::MethodNotAllowed, "Unsupported method\n"),
NotFound => serve_error (StatusCode::NotFound, "404 Not Found\nAre you missing a trailing slash?\n"),
RangeNotSatisfiable (file_len) => {
let mut resp = Response::default ();
resp.status_code (StatusCode::RangeNotSatisfiable)
.header ("content-range".to_string (), format! ("bytes */{}", file_len).into_bytes ());
resp
},
Redirect (location) => {
let mut resp = Response::default ();
resp.status_code (StatusCode::TemporaryRedirect)
.header ("location".to_string (), location.into_bytes ());
resp.body_bytes (b"Redirecting...\n".to_vec ());
resp
},
InvalidQuery => serve_error (StatusCode::BadRequest, "Query is invalid for this object\n"),
Root => html::serve_root (state).await?,
ServeDir (internal::ServeDirParams {
path,
dir,
format
}) => match format {
OutputFormat::Json => serve_dir_json (dir.into_inner ()).await?,
OutputFormat::Html => html::serve_dir (&state.handlebars, &state.metrics_startup, path.to_string_lossy (), dir.into_inner ()).await?,
},
ServeFile (internal::ServeFileParams {
file,
send_body,
range,
2021-04-03 16:21:59 +00:00
}) => serve_file (file.into_inner (), send_body, range, headers.get ("if-none-match")).await?,
MarkdownErr (e) => {
#[cfg (feature = "markdown")]
{
use markdown::Error::*;
let e = e.inner;
let code = match &e {
TooBig => StatusCode::InternalServerError,
//NotMarkdown => serve_error (StatusCode::BadRequest, "File is not Markdown"),
NotUtf8 => StatusCode::BadRequest,
};
return Ok (serve_error (code, e.to_string ()));
}
#[cfg (not (feature = "markdown"))]
{
let _e = e;
serve_error (StatusCode::BadRequest, "Markdown feature is disabled")
}
},
MarkdownPreview (s) => html::serve (s),
})
2020-10-30 22:38:23 +00:00
}
pub fn load_templates (
asset_root: &Path
)
-> Result <Handlebars <'static>, handlebars::TemplateFileError>
{
2020-10-31 20:46:38 +00:00
let mut handlebars = Handlebars::new ();
handlebars.set_strict_mode (true);
let asset_root = asset_root.join ("handlebars/server");
2020-11-29 19:05:28 +00:00
for (k, v) in &[
2020-10-31 20:46:38 +00:00
("file_server_dir", "file_server_dir.html"),
("file_server_root", "file_server_root.html"),
2020-11-29 19:05:28 +00:00
] {
handlebars.register_template_file (k, asset_root.join (v))?;
}
Ok (handlebars)
}
fn pretty_print_bytes (b: u64) -> String {
if b < 1024 {
format! ("{} B", b)
}
else if (b + 512) < 1024 * 1024 {
format! ("{} KiB", (b + 512) / 1024)
}
else if (b + 512 * 1024) < 1024 * 1024 * 1024 {
format! ("{} MiB", (b + 512 * 1024) / 1024 / 1024)
}
else {
format! ("{} GiB", (b + 512 * 1024 * 1024) / 1024 / 1024 / 1024)
}
}
#[cfg (test)]
mod tests;