ptth/crates/ptth_server/src/file_server/metrics.rs

176 lines
3.7 KiB
Rust

use std::sync::Arc;
use arc_swap::ArcSwap;
use chrono::{DateTime, Utc};
use tracing::{
debug, error, trace,
};
use ulid::Ulid;
// Metrics are named for when they're updated:
// - Startup (Once, when the server state is initialized)
// - Interval (About once per minute)
// - Events (When a request is processed)
#[derive (Debug, serde::Serialize)]
pub struct Startup {
// D-Bus machine ID, if we're on Linux
pub machine_id: Option <String>,
// Git version that ptth_server was built from (unimplemented)
pub git_version: Option <String>,
// User-assigned and human-readable name for this server.
// Must be unique within a relay.
pub server_name: String,
// Random base64 instance ID. ptth_server generates this at process start.
// It's a fallback for detecting outages without relying on any clocks.
#[serde (serialize_with = "serialize_ulid")]
pub instance_id: Ulid,
// System UTC
pub startup_utc: DateTime <Utc>,
}
#[derive (Debug, serde::Serialize)]
pub struct Interval {
pub utc: DateTime <Utc>,
pub rss_mib: u64,
#[serde (skip)]
pub cpu_usage: heim::process::CpuUsage,
}
pub struct Events {
}
impl Startup {
#[must_use]
pub fn new (server_name: String) -> Self
{
let x = Self {
machine_id: get_machine_id (),
git_version: None,
server_name,
instance_id: ulid::Ulid::new (),
startup_utc: Utc::now (),
};
debug! ("metrics at startup: {:?}", x);
x
}
}
fn serialize_ulid <S: serde::Serializer> (t: &Ulid, s: S)
-> Result <S::Ok, S::Error>
{
let t = t.to_string ();
s.serialize_str (&t)
}
impl Interval {
pub async fn new () -> Result <Self, super::FileServerError> {
use tokio::join;
use heim::process;
use uom::si::{
information::mebibyte,
ratio,
time::second,
};
let our_process = process::current ().await?;
let cpu_usage = our_process.cpu_usage ();
let (cpu_usage, mem) = join! {
cpu_usage,
our_process.memory ()
};
let cpu_usage = cpu_usage?;
let mem = mem?;
let rss_mib = mem.rss ().get::<mebibyte> ();
let x = Self {
utc: Utc::now (),
rss_mib,
cpu_usage,
};
Ok (x)
}
pub async fn monitor (interval_writer: Arc <ArcSwap <Option <Interval>>>)
{
use std::time::Duration;
use uom::si::ratio::percent;
let mut interval = tokio::time::interval (Duration::from_secs (60));
let mut counter = 0_u64;
let mut next_10_time = counter;
let mut metrics_at_last_10: Arc <Option <Interval>> = Arc::new (None);
loop {
interval.tick ().await;
let new_interval_metrics = match Interval::new ().await {
Err (e) => {
error! ("Failed to update interval metrics: {:?}", e);
continue;
},
Ok (x) => x,
};
let new_interval_metrics = Arc::new (Some (new_interval_metrics));
if counter >= next_10_time {
if let (Some (old), Some (new)) = (&*metrics_at_last_10, &*new_interval_metrics) {
let diff = new.cpu_usage.clone () - old.cpu_usage.clone ();
trace! ("CPU usage: {}%", diff.get::<percent> ());
}
next_10_time += 1;
metrics_at_last_10 = new_interval_metrics.clone ();
}
interval_writer.store (new_interval_metrics);
counter += 1;
//trace! ("interval metrics 1");
}
}
}
fn get_machine_id () -> Option <String> {
use std::{
fs::File,
io::Read,
};
let mut buf = vec! [0; 1024];
let mut f = File::open ("/etc/machine-id").ok ()?;
let bytes_read = f.read (&mut buf).ok ()?;
buf.truncate (bytes_read);
let s = std::str::from_utf8 (&buf).ok ()?;
let s = s.trim_end ().to_string ();
Some (s)
}
#[cfg (test)]
mod tests {
use super::*;
#[test]
fn ulid_null () {
let a = Startup::new ("bogus".to_string ());
let b = Startup::new ("bogus".to_string ());
assert_ne! (a.instance_id, b.instance_id);
}
}