From 9ac44cfeb7a938ad6a8df5a002a6d4db53f87b36 Mon Sep 17 00:00:00 2001 From: _ <> Date: Wed, 16 Dec 2020 14:46:03 +0000 Subject: [PATCH] :star: new: finish MVP for scraper auth. Adding a SQLite DB to properly track the keys is going to take a while. For now I'll just keep them in the config file and give them 30-day expirations. --- crates/ptth_relay/src/config.rs | 29 +++++++-- crates/ptth_relay/src/key_validity.rs | 41 +++++++----- crates/ptth_relay/src/lib.rs | 2 + crates/ptth_relay/src/scraper_api.rs | 80 ++++++++++++++++-------- issues/2020-12Dec/auth-route-YNQAQKJS.md | 50 ++++++++++++++- src/tests.rs | 16 ++--- 6 files changed, 161 insertions(+), 57 deletions(-) diff --git a/crates/ptth_relay/src/config.rs b/crates/ptth_relay/src/config.rs index 3877f47..d444081 100644 --- a/crates/ptth_relay/src/config.rs +++ b/crates/ptth_relay/src/config.rs @@ -8,7 +8,10 @@ use std::{ path::Path, }; -use crate::errors::ConfigError; +use crate::{ + errors::ConfigError, + key_validity::*, +}; // Stuff we need to load from the config file and use to // set up the HTTP server @@ -28,7 +31,7 @@ pub mod file { #[derive (Deserialize)] pub struct DevMode { - pub scraper_key: Option >, + } // Stuff that's identical between the file and the runtime structures @@ -45,19 +48,25 @@ pub mod file { #[derive (Deserialize)] pub struct Config { - pub port: Option , - pub servers: Vec , #[serde (flatten)] pub iso: Isomorphic, + + pub port: Option , + pub servers: Vec , + + // Adding a DB will take a while, so I'm moving these out of dev mode. + pub scraper_keys: Vec >, } } // Stuff we actually need at runtime pub struct Config { + pub iso: file::Isomorphic, + pub port: Option , pub servers: HashMap , - pub iso: file::Isomorphic, + pub scraper_keys: HashMap >, } impl TryFrom for Config { @@ -69,10 +78,18 @@ impl TryFrom for Config { let servers = itertools::process_results (servers, |i| HashMap::from_iter (i))?; + let scraper_keys = if f.iso.enable_scraper_api { + HashMap::from_iter (f.scraper_keys.into_iter ().map (|key| (key.hash.encode_base64 (), key))) + } + else { + Default::default () + }; + Ok (Self { + iso: f.iso, port: f.port, servers, - iso: f.iso, + scraper_keys, }) } } diff --git a/crates/ptth_relay/src/key_validity.rs b/crates/ptth_relay/src/key_validity.rs index c75603e..dbb3704 100644 --- a/crates/ptth_relay/src/key_validity.rs +++ b/crates/ptth_relay/src/key_validity.rs @@ -69,7 +69,7 @@ impl <'de> Deserialize <'de> for BlakeHashWrapper { } pub struct Valid7Days; -//pub struct Valid30Days; +pub struct Valid30Days; //pub struct Valid90Days; pub trait MaxValidDuration { @@ -82,11 +82,19 @@ impl MaxValidDuration for Valid7Days { } } +impl MaxValidDuration for Valid30Days { + fn dur () -> Duration { + Duration::days (30) + } +} + #[derive (Deserialize)] pub struct ScraperKey { + name: String, + not_before: DateTime , not_after: DateTime , - hash: BlakeHashWrapper, + pub hash: BlakeHashWrapper, #[serde (default)] _phantom: std::marker::PhantomData , @@ -103,11 +111,12 @@ pub enum KeyValidity { DurationNegative, } -impl ScraperKey { - pub fn new (input: &[u8]) -> Self { +impl ScraperKey { + pub fn new_30_day > (name: S, input: &[u8]) -> Self { let now = Utc::now (); Self { + name: name.into (), not_before: now, not_after: now + Duration::days (7), hash: BlakeHashWrapper::from_key (input), @@ -161,7 +170,8 @@ mod tests { fn duration_negative () { let zero_time = Utc::now (); - let key = ScraperKey:: { + let key = ScraperKey:: { + name: "automated testing".to_string (), not_before: zero_time + Duration::days (1 + 2), not_after: zero_time + Duration::days (1), hash: BlakeHashWrapper::from_key ("bad_password".as_bytes ()), @@ -183,14 +193,15 @@ mod tests { fn key_valid_too_long () { let zero_time = Utc::now (); - let key = ScraperKey:: { + let key = ScraperKey:: { + name: "automated testing".to_string (), not_before: zero_time + Duration::days (1), - not_after: zero_time + Duration::days (1 + 8), + not_after: zero_time + Duration::days (1 + 31), hash: BlakeHashWrapper::from_key ("bad_password".as_bytes ()), _phantom: Default::default (), }; - let err = DurationTooLong (Duration::days (7)); + let err = DurationTooLong (Duration::days (30)); for (input, expected) in &[ (zero_time + Duration::days (0), err), @@ -205,9 +216,10 @@ mod tests { fn normal_key () { let zero_time = Utc::now (); - let key = ScraperKey:: { + let key = ScraperKey:: { + name: "automated testing".to_string (), not_before: zero_time + Duration::days (1), - not_after: zero_time + Duration::days (1 + 7), + not_after: zero_time + Duration::days (1 + 30), hash: BlakeHashWrapper::from_key ("bad_password".as_bytes ()), _phantom: Default::default (), }; @@ -215,7 +227,7 @@ mod tests { for (input, expected) in &[ (zero_time + Duration::days (0), ClockIsBehind), (zero_time + Duration::days (2), Valid), - (zero_time + Duration::days (1 + 7), Expired), + (zero_time + Duration::days (1 + 30), Expired), (zero_time + Duration::days (100), Expired), ] { assert_eq! (key.is_valid (*input, "bad_password".as_bytes ()), *expected); @@ -226,9 +238,10 @@ mod tests { fn wrong_key () { let zero_time = Utc::now (); - let key = ScraperKey:: { + let key = ScraperKey:: { + name: "automated testing".to_string (), not_before: zero_time + Duration::days (1), - not_after: zero_time + Duration::days (1 + 7), + not_after: zero_time + Duration::days (1 + 30), hash: BlakeHashWrapper::from_key ("bad_password".as_bytes ()), _phantom: Default::default (), }; @@ -236,7 +249,7 @@ mod tests { for input in &[ zero_time + Duration::days (0), zero_time + Duration::days (2), - zero_time + Duration::days (1 + 7), + zero_time + Duration::days (1 + 30), zero_time + Duration::days (100), ] { let validity = key.is_valid (*input, "badder_password".as_bytes ()); diff --git a/crates/ptth_relay/src/lib.rs b/crates/ptth_relay/src/lib.rs index e74799a..f3c0057 100644 --- a/crates/ptth_relay/src/lib.rs +++ b/crates/ptth_relay/src/lib.rs @@ -338,6 +338,8 @@ async fn handle_all ( server_endpoint::handle_listen (state, listen_code.into (), api_key.as_bytes ()).await } else if let Some (rest) = prefix_match ("/frontend/servers/", &path) { + // DRY T4H76LB3 + if rest == "" { Ok (handle_server_list (state, handlebars).await?) } diff --git a/crates/ptth_relay/src/scraper_api.rs b/crates/ptth_relay/src/scraper_api.rs index 32ba64e..ab391fc 100644 --- a/crates/ptth_relay/src/scraper_api.rs +++ b/crates/ptth_relay/src/scraper_api.rs @@ -23,11 +23,21 @@ use tracing::{ use crate::{ RequestError, error_reply, - key_validity::KeyValidity, + key_validity::*, prefix_match, relay_state::RelayState, }; +// Not sure if this is the best way to do a hard-coded string table, but +// it'll keep the tests up to date + +mod strings { + pub const FORBIDDEN: &str = "403 Forbidden"; + pub const NO_API_KEY: &str = "Can't auth as scraper without API key"; + pub const UNKNOWN_API_VERSION: &str = "Unknown scraper API version"; + pub const UNKNOWN_API_ENDPOINT: &str = "Unknown scraper API endpoint"; +} + // JSON is probably Good Enough For Now, so I'll just make everything // a struct and lazily serialize it right before leaving the // top-level handle () fn. @@ -109,27 +119,25 @@ async fn api_v1 ( let api_key = req.headers ().get ("X-ApiKey"); let api_key = match api_key { - None => return Ok (error_reply (StatusCode::FORBIDDEN, "Can't run scraper without an API key")?), + None => return Ok (error_reply (StatusCode::FORBIDDEN, strings::NO_API_KEY)?), Some (x) => x, }; - let bad_key = || error_reply (StatusCode::FORBIDDEN, "403 Forbidden"); + let actual_hash = BlakeHashWrapper::from_key (api_key.as_bytes ()); + + let bad_key = || error_reply (StatusCode::FORBIDDEN, strings::FORBIDDEN); { let config = state.config.read ().await; - let dev_mode = match &config.iso.dev_mode { - None => return Ok (bad_key ()?), + let expected_key = match config.scraper_keys.get (&actual_hash.encode_base64 ()) { Some (x) => x, - }; - - let expected_key = match &dev_mode.scraper_key { None => return Ok (bad_key ()?), - Some (x) => x, }; let now = chrono::Utc::now (); + // The hash in is_valid is redundant match expected_key.is_valid (now, api_key.as_bytes ()) { KeyValidity::Valid => (), KeyValidity::WrongKey (bad_hash) => { @@ -150,8 +158,25 @@ async fn api_v1 ( let x = v1_server_list (&state).await; Ok (error_reply (StatusCode::OK, &serde_json::to_string (&x).unwrap ())?) } + else if let Some (rest) = prefix_match ("server/", path_rest) { + // DRY T4H76LB3 + + if let Some (idx) = rest.find ('/') { + let listen_code = String::from (&rest [0..idx]); + let path = String::from (&rest [idx..]); + let (parts, _) = req.into_parts (); + + // This is ugly. I don't like having scraper_api know about the + // crate root. + + Ok (crate::handle_http_request (parts, path, state, listen_code).await?) + } + else { + Ok (error_reply (StatusCode::BAD_REQUEST, "Bad URI format")?) + } + } else { - Ok (error_reply (StatusCode::NOT_FOUND, "Unknown API endpoint")?) + Ok (error_reply (StatusCode::NOT_FOUND, strings::UNKNOWN_API_ENDPOINT)?) } } @@ -176,7 +201,7 @@ pub async fn handle ( api_v1 (req, state, rest).await } else { - Ok (error_reply (StatusCode::NOT_FOUND, "Unknown scraper API version")?) + Ok (error_reply (StatusCode::NOT_FOUND, strings::UNKNOWN_API_VERSION)?) } } @@ -203,7 +228,7 @@ mod tests { // Expected expected_status: StatusCode, expected_headers: Vec <(&'static str, &'static str)>, - expected_body: &'static str, + expected_body: String, } impl TestCase { @@ -237,16 +262,16 @@ mod tests { x } - fn expected_body (&self, v: &'static str) -> Self { + fn expected_body (&self, v: String) -> Self { let mut x = self.clone (); x.expected_body = v; x } - fn expected (&self, sc: StatusCode, body: &'static str) -> Self { + fn expected (&self, sc: StatusCode, body: &str) -> Self { self .expected_status (sc) - .expected_body (body) + .expected_body (format! ("{}\n", body)) } async fn test (&self) { @@ -260,14 +285,15 @@ mod tests { let input = input.body (Body::empty ()).unwrap (); let config_file = config::file::Config { - port: Some (4000), - servers: vec! [], iso: config::file::Isomorphic { enable_scraper_api: true, - dev_mode: self.valid_key.map (|key| config::file::DevMode { - scraper_key: Some (key_validity::ScraperKey::new (key.as_bytes ())), - }), + dev_mode: Default::default (), }, + port: Some (4000), + servers: vec! [], + scraper_keys: vec! [ + self.valid_key.map (|x| key_validity::ScraperKey::new_30_day ("automated test", x.as_bytes ())), + ].into_iter ().filter_map (|x| x).collect (), }; let config = config::Config::try_from (config_file).expect ("Can't load config"); @@ -292,7 +318,7 @@ mod tests { let actual_body = actual_body.to_vec (); let actual_body = String::from_utf8 (actual_body).expect ("Body should be UTF-8"); - assert_eq! (&actual_body, self.expected_body); + assert_eq! (actual_body, self.expected_body); } } @@ -309,21 +335,21 @@ mod tests { expected_headers: vec! [ ("content-type", "text/plain"), ], - expected_body: "You're valid!\n", + expected_body: "You're valid!\n".to_string (), }; for case in &[ base_case.clone (), base_case.path_rest ("v9999/test") - .expected (StatusCode::NOT_FOUND, "Unknown scraper API version\n"), + .expected (StatusCode::NOT_FOUND, strings::UNKNOWN_API_VERSION), base_case.valid_key (None) - .expected (StatusCode::FORBIDDEN, "403 Forbidden\n"), + .expected (StatusCode::FORBIDDEN, strings::FORBIDDEN), base_case.input_key (Some ("borgus")) - .expected (StatusCode::FORBIDDEN, "403 Forbidden\n"), + .expected (StatusCode::FORBIDDEN, strings::FORBIDDEN), base_case.path_rest ("v1/toast") - .expected (StatusCode::NOT_FOUND, "Unknown API endpoint\n"), + .expected (StatusCode::NOT_FOUND, strings::UNKNOWN_API_ENDPOINT), base_case.input_key (None) - .expected (StatusCode::FORBIDDEN, "Can't run scraper without an API key\n"), + .expected (StatusCode::FORBIDDEN, strings::NO_API_KEY), ] { case.test ().await; } diff --git a/issues/2020-12Dec/auth-route-YNQAQKJS.md b/issues/2020-12Dec/auth-route-YNQAQKJS.md index e161f08..9c114f7 100644 --- a/issues/2020-12Dec/auth-route-YNQAQKJS.md +++ b/issues/2020-12Dec/auth-route-YNQAQKJS.md @@ -2,6 +2,52 @@ (Find this issue with `git grep YNQAQKJS`) +## Test curl commands + +(In production the API key should be loaded from a file. Putting it in the +Bash command is bad, because it will be saved to Bash's history file. Putting +it in environment variables is slightly better) + +`curl -H "X-ApiKey: $API_KEY" 127.0.0.1:4000/scraper/api/test` + +Should return "You're valid!" + +`curl -H "X-ApiKey: $API_KEY" 127.0.0.1:4000/scraper/v1/server_list` + +Should return a JSON object listing all the servers. + +`curl -H "X-ApiKey: $API_KEY" 127.0.0.1:4000/scraper/v1/server/aliens_wildland/api/v1/dir/` + +Proxies into the "aliens_wildland" server and retrieves a JSON object listing +the file server root. (The server must be running a new version of ptth_server +which can serve the JSON API) + +`curl -H "X-ApiKey: $API_KEY" 127.0.0.1:4000/scraper/v1/server/aliens_wildland/api/v1/dir/src/` + +Same, but retrieves the listing for "/src". + +`curl -H "X-ApiKey: $API_KEY" 127.0.0.1:4000/scraper/v1/server/aliens_wildland/files/src/tests.rs` + +There is no special API for retrieving files yet - But the existing server +API will be is proxied through the new scraper API on the relay. + +`curl --head -H "X-ApiKey: $API_KEY" 127.0.0.1:4000/scraper/v1/server/aliens_wildland/files/src/tests.rs` + +PTTH supports HEAD requests. This request will yield a "204 No Content", with +the "content-length" header. + +`curl -H "range: bytes=100-199" -H "X-ApiKey: $API_KEY" 127.0.0.1:4000/scraper/v1/server/aliens_wildland/files/src/tests.rs` + +PTTH supports byte range requests. This request will skip 100 bytes into the +file, and read 100 bytes. + +To avoid fence-post errors, most programming languages use half-open ranges. +e.g. `0..3` means "0, 1, 2". However, HTTP byte ranges are closed ranges. +e.g. `0..3` means "0, 1, 2, 3". So 100-199 means 199 is the last byte retrieved. + +By polling with HEAD and byte range requests, a scraper client can approximate +`tail -f` behavior of a server-side file. + ## Problem statement PTTH has 2 auth routes: @@ -38,7 +84,7 @@ stronger is ready. - (X) (POC) Test with curl - (X) Clean up scraper endpoint - (X) Add (almost) end-to-end tests for test scraper endpoint -- ( ) Thread server endpoints through relay scraper auth +- (X) Thread server endpoints through relay scraper auth - ( ) Add tests for other scraper endpoints - (don't care) Factor v1 API into v1 module - (X) Add real scraper endpoints @@ -80,7 +126,7 @@ These will all be JSON for now since Python, Rust, C++, C#, etc. can handle it. For compatibility with wget spidering, I _might_ do XML or HTML that's machine-readable. We'll see. -## Open questions +## Decision journal **Who generates the API key? The scraper client, or the PTTH relay server?** diff --git a/src/tests.rs b/src/tests.rs index dabe3d5..45e2f91 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -36,6 +36,7 @@ fn end_to_end () { let tripcode = BlakeHashWrapper::from_key (api_key.as_bytes ()); debug! ("Relay is expecting tripcode {}", tripcode.encode_base64 ()); let config_file = ptth_relay::config::file::Config { + iso: Default::default (), port: None, servers: vec! [ ptth_relay::config::file::Server { @@ -44,7 +45,7 @@ fn end_to_end () { display_name: None, }, ], - iso: Default::default (), + scraper_keys: vec! [], }; let config = ptth_relay::config::Config::try_from (config_file).expect ("Can't load config"); @@ -143,16 +144,15 @@ fn scraper_endpoints () { use ptth_relay::*; let config_file = config::file::Config { - port: Some (4001), - servers: vec! [ - - ], iso: config::file::Isomorphic { enable_scraper_api: true, - dev_mode: Some (config::file::DevMode { - scraper_key: Some (key_validity::ScraperKey::new (b"bogus")), - }), + dev_mode: Default::default (), }, + port: Some (4001), + servers: vec! [], + scraper_keys: vec! [ + key_validity::ScraperKey::new_30_day ("automated testing", b"bogus") + ], }; let config = config::Config::try_from (config_file).expect ("Can't load config");