ptth/crates/ptth_server/src/file_server/mod.rs

694 lines
15 KiB
Rust
Raw Normal View History

2020-10-30 22:38:23 +00:00
// Static file server that can plug into the PTTH reverse server
2020-11-29 19:05:28 +00:00
// I'm not sure if I like this one
#![allow (clippy::enum_glob_use)]
2020-10-30 22:38:23 +00:00
use std::{
borrow::Cow,
cmp::min,
2020-11-29 19:05:28 +00:00
collections::HashMap,
2020-10-30 22:38:23 +00:00
convert::{Infallible, TryInto},
error::Error,
fmt::Debug,
2020-10-30 22:38:23 +00:00
io::SeekFrom,
path::{Path, PathBuf},
2020-10-30 22:38:23 +00:00
};
use handlebars::Handlebars;
2020-11-29 19:05:28 +00:00
use percent_encoding::{
percent_decode,
};
use serde::Serialize;
2020-10-30 22:38:23 +00:00
use tokio::{
fs::{
DirEntry,
2020-10-30 22:38:23 +00:00
File,
read_dir,
ReadDir,
},
io::AsyncReadExt,
sync::mpsc::{
channel,
},
};
use tracing::instrument;
2020-10-30 22:38:23 +00:00
use regex::Regex;
#[cfg (test)]
use always_equal::test::AlwaysEqual;
#[cfg (not (test))]
use always_equal::prod::AlwaysEqual;
2020-11-27 00:03:11 +00:00
use ptth_core::{
http_serde::{
Method,
Response,
StatusCode,
},
prelude::*,
prefix_match,
};
mod errors;
#[derive (Debug, Serialize)]
pub struct ServerInfo {
pub server_name: String,
}
#[derive (Serialize)]
struct TemplateDirEntry {
icon: &'static str,
trailing_slash: &'static str,
// Unfortunately file_name will allocate as long as some platforms
// (Windows!) aren't UTF-8. Cause I don't want to write separate code
// for such a small problem.
file_name: String,
// This could be a Cow with file_name if no encoding was done but
// it's simpler to allocate.
encoded_file_name: String,
size: Cow <'static, str>,
error: bool,
}
#[derive (Serialize)]
struct TemplateDirPage <'a> {
#[serde (flatten)]
server_info: &'a ServerInfo,
path: Cow <'a, str>,
entries: Vec <TemplateDirEntry>,
}
2020-10-30 22:38:23 +00:00
fn parse_range_header (range_str: &str) -> (Option <u64>, Option <u64>) {
2020-11-29 19:05:28 +00:00
use lazy_static::lazy_static;
2020-10-30 22:38:23 +00:00
lazy_static! {
static ref RE: Regex = Regex::new (r"^bytes=(\d*)-(\d*)$").expect ("Couldn't compile regex for Range header");
}
debug! ("{}", range_str);
2020-10-30 22:38:23 +00:00
let caps = match RE.captures (range_str) {
Some (x) => x,
2020-11-29 19:05:28 +00:00
None => return (None, None),
2020-10-30 22:38:23 +00:00
};
let start = caps.get (1).map (|x| x.as_str ());
let end = caps.get (2).map (|x| x.as_str ());
2020-11-29 19:05:28 +00:00
let start = start.and_then (|x| u64::from_str_radix (x, 10).ok ());
// HTTP specifies ranges as [start inclusive, end inclusive]
// But that's dumb and [start inclusive, end exclusive) is better
2020-11-29 19:05:28 +00:00
let end = end.and_then (|x| u64::from_str_radix (x, 10).ok ().map (|x| x + 1));
2020-10-30 22:38:23 +00:00
(start, end)
}
use std::ops::Range;
#[derive (Debug, PartialEq)]
enum ParsedRange {
Ok (Range <u64>),
PartialContent (Range <u64>),
RangeNotSatisfiable (u64),
}
fn check_range (range_str: Option <&str>, file_len: u64)
-> ParsedRange
{
use ParsedRange::*;
let not_satisfiable = RangeNotSatisfiable (file_len);
let range_str = match range_str {
None => return Ok (0..file_len),
Some (x) => x,
};
let (start, end) = parse_range_header (range_str);
let start = start.unwrap_or (0);
if start >= file_len {
return not_satisfiable;
}
let end = end.unwrap_or (file_len);
if end > file_len {
return not_satisfiable;
}
if end < start {
return not_satisfiable;
}
PartialContent (start..end)
}
2020-11-25 00:16:14 +00:00
fn get_icon (file_name: &str) -> &'static str {
// Because my editor actually doesn't render these
2020-11-29 19:05:28 +00:00
let video = "\u{1f39e}\u{fe0f}";
let picture = "\u{1f4f7}";
let file = "\u{1f4c4}";
2020-11-25 00:16:14 +00:00
if
file_name.ends_with (".mp4") ||
file_name.ends_with (".avi") ||
file_name.ends_with (".mkv") ||
file_name.ends_with (".webm")
{
video
}
else if
file_name.ends_with (".jpg") ||
file_name.ends_with (".jpeg") ||
file_name.ends_with (".png") ||
file_name.ends_with (".bmp")
{
picture
}
else {
file
}
}
async fn read_dir_entry (entry: DirEntry) -> TemplateDirEntry
{
2020-11-29 19:05:28 +00:00
use percent_encoding::{
CONTROLS,
utf8_percent_encode,
};
let file_name = match entry.file_name ().into_string () {
Ok (x) => x,
Err (_) => return TemplateDirEntry {
2020-11-29 19:05:28 +00:00
icon: "\u{26a0}\u{fe0f}",
trailing_slash: "",
file_name: "File / directory name is not UTF-8".into (),
encoded_file_name: "".into (),
size: "".into (),
error: true,
},
};
let metadata = match entry.metadata ().await {
Ok (x) => x,
Err (_) => return TemplateDirEntry {
2020-11-29 19:05:28 +00:00
icon: "\u{26a0}\u{fe0f}",
trailing_slash: "",
file_name: "Could not fetch metadata".into (),
encoded_file_name: "".into (),
size: "".into (),
error: true,
},
};
let (trailing_slash, icon, size) = {
let t = metadata.file_type ();
2020-11-29 19:05:28 +00:00
let icon_folder = "\u{1f4c1}";
if t.is_dir () {
2020-11-25 00:16:14 +00:00
("/", icon_folder, "".into ())
}
else {
2020-11-25 00:16:14 +00:00
("", get_icon (&file_name), pretty_print_bytes (metadata.len ()).into ())
}
};
let encoded_file_name = utf8_percent_encode (&file_name, CONTROLS).to_string ();
TemplateDirEntry {
icon,
trailing_slash: &trailing_slash,
file_name,
encoded_file_name,
size,
error: false,
}
}
async fn serve_root (
handlebars: &Handlebars <'static>,
server_info: &ServerInfo
) -> Response
{
let s = handlebars.render ("file_server_root", &server_info).unwrap ();
serve_html (s)
}
fn serve_html (s: String) -> Response {
let mut resp = Response::default ();
resp
.header ("content-type".to_string (), "text/html; charset=UTF-8".to_string ().into_bytes ())
.body_bytes (s.into_bytes ())
;
resp
}
#[instrument (level = "debug", skip (handlebars, dir))]
async fn serve_dir (
handlebars: &Handlebars <'static>,
server_info: &ServerInfo,
path: Cow <'_, str>,
mut dir: ReadDir
) -> Response
{
let mut entries = vec! [];
2020-10-30 22:38:23 +00:00
while let Ok (Some (entry)) = dir.next_entry ().await {
entries.push (read_dir_entry (entry).await);
}
2020-10-30 22:38:23 +00:00
entries.sort_unstable_by (|a, b| a.file_name.partial_cmp (&b.file_name).unwrap ());
let s = handlebars.render ("file_server_dir", &TemplateDirPage {
path,
entries,
server_info,
}).unwrap ();
2020-10-30 22:38:23 +00:00
serve_html (s)
2020-10-30 22:38:23 +00:00
}
#[instrument (level = "debug", skip (f))]
2020-10-30 22:38:23 +00:00
async fn serve_file (
mut f: File,
should_send_body: bool,
range: Range <u64>,
range_requested: bool
)
-> Response
{
let (tx, rx) = channel (1);
2020-10-30 22:38:23 +00:00
let body = if should_send_body {
Some (rx)
2020-10-30 22:38:23 +00:00
}
else {
None
};
info! ("Serving range {}-{}", range.start, range.end);
2020-10-30 22:38:23 +00:00
let content_length = range.end - range.start;
2020-10-30 22:38:23 +00:00
let seek = SeekFrom::Start (range.start);
2020-10-30 22:38:23 +00:00
if should_send_body {
tokio::spawn (async move {
f.seek (seek).await.unwrap ();
2020-10-30 22:38:23 +00:00
let mut tx = tx;
let mut bytes_sent = 0;
let mut bytes_left = content_length;
2020-10-30 22:38:23 +00:00
2020-11-09 16:33:13 +00:00
let mark_interval = 200_000;
let mut next_mark = mark_interval;
2020-10-30 22:38:23 +00:00
loop {
2020-11-29 19:05:28 +00:00
let mut buffer = vec! [0_u8; 65_536];
2020-10-30 22:38:23 +00:00
let bytes_read: u64 = f.read (&mut buffer).await.unwrap ().try_into ().unwrap ();
let bytes_read = min (bytes_left, bytes_read);
buffer.truncate (bytes_read.try_into ().unwrap ());
if bytes_read == 0 {
break;
}
if tx.send (Ok::<_, Infallible> (buffer)).await.is_err () {
warn! ("Cancelling file stream (Sent {} out of {} bytes)", bytes_sent, content_length);
2020-10-30 22:38:23 +00:00
break;
}
bytes_left -= bytes_read;
if bytes_left == 0 {
debug! ("Finished");
2020-10-30 22:38:23 +00:00
break;
}
bytes_sent += bytes_read;
2020-11-09 16:33:13 +00:00
while next_mark <= bytes_sent {
trace! ("Sent {} bytes", next_mark);
next_mark += mark_interval;
}
2020-10-30 22:38:23 +00:00
//delay_for (Duration::from_millis (50)).await;
}
});
}
let mut response = Response::default ();
2020-10-30 22:38:23 +00:00
response.header (String::from ("accept-ranges"), b"bytes".to_vec ());
if range_requested {
response.status_code (StatusCode::PartialContent);
response.header (String::from ("content-range"), format! ("bytes {}-{}/{}", range.start, range.end - 1, range.end).into_bytes ());
}
else {
response.status_code (StatusCode::Ok);
response.header (String::from ("content-length"), range.end.to_string ().into_bytes ());
}
2020-11-29 19:05:28 +00:00
if should_send_body {
response.content_length = Some (content_length);
2020-10-30 22:38:23 +00:00
}
else {
2020-11-29 19:05:28 +00:00
response.status_code (StatusCode::NoContent);
}
2020-10-30 22:38:23 +00:00
if let Some (body) = body {
response.body (body);
}
response
}
fn serve_error (
status_code: StatusCode,
msg: &str
2020-10-30 22:38:23 +00:00
)
-> Response
2020-10-30 22:38:23 +00:00
{
let mut resp = Response::default ();
resp.status_code (status_code);
resp.body_bytes (msg.as_bytes ().to_vec ());
resp
}
fn serve_307 (location: String) -> Response {
let mut resp = Response::default ();
resp.status_code (StatusCode::TemporaryRedirect);
resp.header ("location".to_string (), location.into_bytes ());
resp.body_bytes (b"Redirecting...".to_vec ());
2020-10-30 22:38:23 +00:00
resp
}
fn render_markdown (bytes: &[u8], out: &mut String) -> Result <(), MarkdownError> {
use pulldown_cmark::{Parser, Options, html};
let markdown_input = match std::str::from_utf8 (bytes) {
2020-11-25 00:16:14 +00:00
Err (_) => return Err (MarkdownError::NotUtf8),
Ok (x) => x,
};
let mut options = Options::empty ();
options.insert (Options::ENABLE_STRIKETHROUGH);
let parser = Parser::new_ext (markdown_input, options);
html::push_html (out, parser);
Ok (())
}
fn render_markdown_styled (bytes: &[u8]) -> Result <String, MarkdownError> {
// Write to String buffer.
let mut out = String::new ();
out.push_str ("<body style=\"font-family: sans-serif;\">");
render_markdown (bytes, &mut out)?;
out.push_str ("</body>");
Ok (out)
}
// Sort of an internal API endpoint to make testing work better.
// Eventually we could expose this as JSON or Msgpack or whatever. For now
// it's just a Rust struct that we can test on without caring about
// human-readable HTML
#[derive (Debug, PartialEq)]
struct ServeDirParams {
path: PathBuf,
dir: AlwaysEqual <ReadDir>,
}
#[derive (Debug, PartialEq)]
struct ServeFileParams {
send_body: bool,
range: Range <u64>,
range_requested: bool,
file: AlwaysEqual <File>,
}
#[derive (Debug, PartialEq)]
enum MarkdownError {
2020-11-25 00:16:14 +00:00
TooBig,
// NotMarkdown,
NotUtf8,
}
#[derive (Debug, PartialEq)]
enum InternalResponse {
Favicon,
Forbidden,
InvalidUri,
InvalidQuery,
MethodNotAllowed,
NotFound,
RangeNotSatisfiable (u64),
Redirect (String),
Root,
ServeDir (ServeDirParams),
ServeFile (ServeFileParams),
MarkdownErr (MarkdownError),
MarkdownPreview (String),
}
async fn internal_serve_all (
root: &Path,
method: Method,
uri: &str,
headers: &HashMap <String, Vec <u8>>,
hidden_path: Option <&Path>
2020-10-30 22:38:23 +00:00
)
-> InternalResponse
2020-10-30 22:38:23 +00:00
{
use std::str::FromStr;
use InternalResponse::*;
info! ("Client requested {}", uri);
2020-10-30 22:38:23 +00:00
let uri = match hyper::Uri::from_str (uri) {
Err (_) => return InvalidUri,
Ok (x) => x,
};
let send_body = match &method {
Method::Get => true,
Method::Head => false,
m => {
debug! ("Unsupported method {:?}", m);
return MethodNotAllowed;
}
};
2020-10-30 22:38:23 +00:00
if uri.path () == "/favicon.ico" {
return Favicon;
}
let path = match prefix_match ("/files", uri.path ()) {
Some (x) => x,
None => return Root,
};
2020-10-30 22:38:23 +00:00
if path == "" {
return Redirect ("files/".to_string ());
}
// TODO: There is totally a dir traversal attack in here somewhere
let encoded_path = &path [1..];
let path_s = percent_decode (encoded_path.as_bytes ()).decode_utf8 ().unwrap ();
let path = Path::new (&*path_s);
2020-10-30 22:38:23 +00:00
let full_path = root.join (path);
2020-10-30 22:38:23 +00:00
debug! ("full_path = {:?}", full_path);
if let Some (hidden_path) = hidden_path {
if full_path == hidden_path {
return Forbidden;
}
}
2020-11-25 00:16:14 +00:00
let has_trailing_slash = path_s.is_empty () || path_s.ends_with ('/');
if let Ok (dir) = read_dir (&full_path).await {
if ! has_trailing_slash {
return Redirect (format! ("{}/", path.file_name ().unwrap ().to_str ().unwrap ()));
}
if uri.query ().is_some () {
return InvalidQuery;
}
let dir = dir.into ();
ServeDir (ServeDirParams {
dir,
path: full_path,
})
2020-10-30 22:38:23 +00:00
}
else if let Ok (mut file) = File::open (&full_path).await {
use std::os::unix::fs::PermissionsExt;
let file_md = file.metadata ().await.unwrap ();
2020-11-26 23:51:10 +00:00
if file_md.permissions ().mode () == super::load_toml::CONFIG_PERMISSIONS_MODE
{
return Forbidden;
}
let file_len = file_md.len ();
2020-11-29 19:05:28 +00:00
let range_header = headers.get ("range").and_then (|v| std::str::from_utf8 (v).ok ());
match check_range (range_header, file_len) {
ParsedRange::RangeNotSatisfiable (file_len) => RangeNotSatisfiable (file_len),
ParsedRange::Ok (range) => {
if uri.query () == Some ("as_markdown") {
const MAX_BUF_SIZE: u32 = 1_000_000;
if file_len > MAX_BUF_SIZE.try_into ().unwrap () {
2020-11-25 00:16:14 +00:00
MarkdownErr (MarkdownError::TooBig)
}
else {
2020-11-29 19:05:28 +00:00
let mut buffer = vec! [0_u8; MAX_BUF_SIZE.try_into ().unwrap ()];
let bytes_read = file.read (&mut buffer).await.unwrap ();
buffer.truncate (bytes_read);
MarkdownPreview (render_markdown_styled (&buffer).unwrap ())
}
}
else {
let file = file.into ();
ServeFile (ServeFileParams {
file,
send_body,
range,
range_requested: false,
})
}
},
ParsedRange::PartialContent (range) => {
if uri.query ().is_some () {
InvalidQuery
}
else {
let file = file.into ();
ServeFile (ServeFileParams {
file,
send_body,
range,
range_requested: true,
})
}
},
}
2020-10-30 22:38:23 +00:00
}
else {
NotFound
}
}
#[instrument (level = "debug", skip (handlebars, headers))]
pub async fn serve_all (
handlebars: &Handlebars <'static>,
server_info: &ServerInfo,
root: &Path,
method: Method,
uri: &str,
headers: &HashMap <String, Vec <u8>>,
hidden_path: Option <&Path>
)
-> Response
{
use InternalResponse::*;
match internal_serve_all (root, method, uri, headers, hidden_path).await {
Favicon => serve_error (StatusCode::NotFound, ""),
Forbidden => serve_error (StatusCode::Forbidden, "403 Forbidden"),
InvalidUri => serve_error (StatusCode::BadRequest, "Invalid URI"),
InvalidQuery => serve_error (StatusCode::BadRequest, "Query is invalid for this object"),
MethodNotAllowed => serve_error (StatusCode::MethodNotAllowed, "Unsupported method"),
NotFound => serve_error (StatusCode::NotFound, "404 Not Found"),
RangeNotSatisfiable (file_len) => {
let mut resp = Response::default ();
resp.status_code (StatusCode::RangeNotSatisfiable)
.header ("content-range".to_string (), format! ("bytes */{}", file_len).into_bytes ());
resp
},
Redirect (location) => serve_307 (location),
Root => serve_root (handlebars, server_info).await,
ServeDir (ServeDirParams {
path,
dir,
}) => serve_dir (handlebars, server_info, path.to_string_lossy (), dir.into_inner ()).await,
ServeFile (ServeFileParams {
file,
send_body,
range,
range_requested,
}) => serve_file (file.into_inner (), send_body, range, range_requested).await,
MarkdownErr (e) => match e {
2020-11-25 00:16:14 +00:00
MarkdownError::TooBig => serve_error (StatusCode::InternalServerError, "File is too big to preview as Markdown"),
//MarkdownError::NotMarkdown => serve_error (StatusCode::BadRequest, "File is not Markdown"),
MarkdownError::NotUtf8 => serve_error (StatusCode::BadRequest, "File is not UTF-8"),
},
MarkdownPreview (s) => serve_html (s),
2020-10-30 22:38:23 +00:00
}
}
pub fn load_templates (
asset_root: &Path
)
2020-10-31 20:46:38 +00:00
-> Result <Handlebars <'static>, Box <dyn Error>>
{
2020-10-31 20:46:38 +00:00
let mut handlebars = Handlebars::new ();
handlebars.set_strict_mode (true);
let asset_root = asset_root.join ("handlebars/server");
2020-11-29 19:05:28 +00:00
for (k, v) in &[
2020-10-31 20:46:38 +00:00
("file_server_dir", "file_server_dir.html"),
("file_server_root", "file_server_root.html"),
2020-11-29 19:05:28 +00:00
] {
handlebars.register_template_file (k, asset_root.join (v))?;
}
Ok (handlebars)
}
fn pretty_print_bytes (b: u64) -> String {
if b < 1024 {
format! ("{} B", b)
}
else if (b + 512) < 1024 * 1024 {
format! ("{} KiB", (b + 512) / 1024)
}
else if (b + 512 * 1024) < 1024 * 1024 * 1024 {
format! ("{} MiB", (b + 512 * 1024) / 1024 / 1024)
}
else {
format! ("{} GiB", (b + 512 * 1024 * 1024) / 1024 / 1024 / 1024)
}
}
#[cfg (test)]
mod tests;