diff --git a/Cargo.lock b/Cargo.lock index 44ef4eccc5..4b1809d3b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1332,6 +1332,32 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +[[package]] +name = "clickana" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "chrono", + "clap", + "clickhouse-admin-server-client", + "clickhouse-admin-types", + "dropshot 0.13.0", + "futures", + "omicron-common", + "omicron-workspace-hack", + "ratatui", + "schemars", + "serde_json", + "slog", + "slog-async", + "slog-dtrace", + "slog-error-chain", + "slog-term", + "tokio", + "tokio-postgres", +] + [[package]] name = "clickhouse-admin-api" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index bb9ff0e80f..3d8efe3acb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ members = [ "cockroach-admin/types", "common", "dev-tools/cert-dev", + "dev-tools/clickana", "dev-tools/clickhouse-cluster-dev", "dev-tools/ch-dev", "dev-tools/crdb-seed", @@ -158,6 +159,7 @@ default-members = [ "cockroach-admin/types", "common", "dev-tools/cert-dev", + "dev-tools/clickana", "dev-tools/clickhouse-cluster-dev", "dev-tools/ch-dev", "dev-tools/crdb-seed", @@ -332,6 +334,7 @@ chrono = { version = "0.4", features = [ "serde" ] } chrono-tz = "0.10.0" ciborium = "0.2.2" clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] } +clickana = { path = "dev-tools/clickana" } clickhouse-admin-api = { path = "clickhouse-admin/api" } clickhouse-admin-keeper-client = { path = "clients/clickhouse-admin-keeper-client" } clickhouse-admin-server-client = { path = "clients/clickhouse-admin-server-client" } diff --git a/clickhouse-admin/Cargo.toml b/clickhouse-admin/Cargo.toml index 80b080b2ff..65ceb3fdbf 100644 --- a/clickhouse-admin/Cargo.toml +++ b/clickhouse-admin/Cargo.toml @@ -22,7 +22,9 @@ slog.workspace = true slog-async.workspace = true slog-dtrace.workspace = true slog-error-chain.workspace = true +slog-term.workspace = true serde.workspace = true +serde_json.workspace = true thiserror.workspace = true tokio.workspace = true tokio-postgres.workspace = true diff --git a/clickhouse-admin/types/src/lib.rs b/clickhouse-admin/types/src/lib.rs index 3b8696438c..aa1437bedc 100644 --- a/clickhouse-admin/types/src/lib.rs +++ b/clickhouse-admin/types/src/lib.rs @@ -1200,7 +1200,7 @@ pub enum Timestamp { #[derive(Debug, Serialize, Deserialize, JsonSchema, PartialEq)] #[serde(rename_all = "snake_case")] pub struct SystemTimeSeries { - pub time: Timestamp, + pub time: String, pub value: f64, // TODO: Would be really nice to have an enum with possible units (s, ms, bytes) // Not sure if I can even add this, the system tables don't mention units at all. @@ -2099,15 +2099,15 @@ snapshot_storage_disk=LocalSnapshotDisk let expected = vec![ SystemTimeSeries { - time: crate::Timestamp::Unix("1732494720".to_string()), + time: "1732494720".to_string(), value: 110220450825.75238, }, SystemTimeSeries { - time: crate::Timestamp::Unix("1732494840".to_string()), + time: "1732494840".to_string(), value: 110339992917.33331, }, SystemTimeSeries { - time: crate::Timestamp::Unix("1732494960".to_string()), + time: "1732494960".to_string(), value: 110421854037.33331, }, ]; @@ -2127,21 +2127,15 @@ snapshot_storage_disk=LocalSnapshotDisk let expected = vec![ SystemTimeSeries { - time: crate::Timestamp::Utc( - "2024-11-25T00:34:00Z".parse::>().unwrap(), - ), + time: "2024-11-25T00:34:00Z".to_string(), value: 110220450825.75238, }, SystemTimeSeries { - time: crate::Timestamp::Utc( - "2024-11-25T00:35:00Z".parse::>().unwrap(), - ), + time: "2024-11-25T00:35:00Z".to_string(), value: 110339992917.33331, }, SystemTimeSeries { - time: crate::Timestamp::Utc( - "2024-11-25T00:36:00Z".parse::>().unwrap(), - ), + time: "2024-11-25T00:36:00Z".to_string(), value: 110421854037.33331, }, ]; @@ -2176,7 +2170,7 @@ snapshot_storage_disk=LocalSnapshotDisk assert_eq!( format!("{}", root_cause), - "data did not match any variant of untagged enum Timestamp at line 1 column 12", + "invalid type: integer `2024`, expected a string at line 1 column 12", ); } } diff --git a/dev-tools/clickana/Cargo.toml b/dev-tools/clickana/Cargo.toml new file mode 100644 index 0000000000..a9f91b890b --- /dev/null +++ b/dev-tools/clickana/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "clickana" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +camino.workspace = true +chrono.workspace = true +clap.workspace = true +clickhouse-admin-types.workspace = true +clickhouse-admin-server-client.workspace = true +dropshot.workspace = true +futures.workspace = true +omicron-common.workspace = true +ratatui.workspace = true +schemars.workspace = true +slog.workspace = true +slog-async.workspace = true +slog-dtrace.workspace = true +slog-error-chain.workspace = true +slog-term.workspace = true +serde_json.workspace = true +tokio.workspace = true +tokio-postgres.workspace = true + +omicron-workspace-hack.workspace = true + +[lints] +workspace = true diff --git a/dev-tools/clickana/src/bin/clickana.rs b/dev-tools/clickana/src/bin/clickana.rs new file mode 100644 index 0000000000..0c8d06156e --- /dev/null +++ b/dev-tools/clickana/src/bin/clickana.rs @@ -0,0 +1,57 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::Result; +use camino::Utf8PathBuf; +use clap::Parser; +use clickana::Clickana; +use std::net::SocketAddr; + +const CLICKANA_LOG_FILE: &str = "/tmp/clickana.log"; + +#[tokio::main] +async fn main() -> Result<()> { + let args = Cli::parse(); + + let terminal = ratatui::init(); + let result = Clickana::new( + args.clickhouse_addr, + args.log_path, + args.sampling_interval, + args.time_range, + args.refresh_interval, + ) + .run(terminal) + .await; + ratatui::restore(); + result +} + +#[derive(Debug, Parser)] +struct Cli { + /// Path to the log file + #[arg( + long, + short, + env = "CLICKANA_LOG_PATH", + default_value = CLICKANA_LOG_FILE, + )] + log_path: Utf8PathBuf, + + /// Address where a clickhouse admin server is listening on + #[arg(long, short = 'a')] + clickhouse_addr: SocketAddr, + + /// The interval to collect monitoring data in seconds + #[arg(long, short, default_value_t = 60)] + sampling_interval: u64, + + /// Range of time to collect monitoring data in seconds + #[arg(long, short, default_value_t = 3600)] + time_range: u64, + + /// The interval at which the dashboards will refresh + #[arg(long, short, default_value_t = 60)] + refresh_interval: u64, +} diff --git a/dev-tools/clickana/src/chart.rs b/dev-tools/clickana/src/chart.rs new file mode 100644 index 0000000000..f8a78fb63d --- /dev/null +++ b/dev-tools/clickana/src/chart.rs @@ -0,0 +1,765 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{bail, Result}; +use chrono::{DateTime, Utc}; +use clickhouse_admin_server_client::types::{SystemTable, SystemTimeSeries}; +use ratatui::{ + layout::{Constraint, Rect}, + style::{Color, Style, Stylize}, + symbols::Marker, + text::Line, + widgets::{Axis, Block, Chart, Dataset, GraphType, LegendPosition}, + Frame, +}; +use std::fmt::Display; + +// Ratatui requires data points in a Dataset to be f64 +const GIBIBYTE_F64: f64 = 1073741824.0; +const MEBIBYTE_F64: f64 = 1048576.0; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Unit { + Count, + Gibibyte, + Mebibyte, +} + +impl Display for Unit { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + Unit::Count => "", + Unit::Gibibyte => "GiB", + Unit::Mebibyte => "MiB", + }; + write!(f, "{s}") + } +} + +impl Unit { + /// Returns the value of the unit represented in bytes. + fn as_bytes_f64(&self) -> Result { + let bytes = match self { + Unit::Gibibyte => GIBIBYTE_F64, + Unit::Mebibyte => MEBIBYTE_F64, + Unit::Count => bail!("Count cannot be converted into bytes"), + }; + Ok(bytes) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum MetricName { + DiskUsage, + MemoryTracking, + QueryCount, + RunningQueries, +} + +impl Display for MetricName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + MetricName::DiskUsage => "DiskUsed_default", + MetricName::MemoryTracking => "CurrentMetric_MemoryTracking", + MetricName::QueryCount => "ProfileEvent_Query", + MetricName::RunningQueries => "CurrentMetric_Query", + }; + write!(f, "{s}") + } +} + +impl MetricName { + /// Returns the associated table to query for each metric. + pub fn table(&self) -> SystemTable { + match self { + MetricName::DiskUsage => SystemTable::AsynchronousMetricLog, + MetricName::MemoryTracking + | MetricName::QueryCount + | MetricName::RunningQueries => SystemTable::MetricLog, + } + } + + /// Returns the unit the data values will be represented as. + fn unit(&self) -> Unit { + match self { + MetricName::DiskUsage => Unit::Gibibyte, + MetricName::MemoryTracking => Unit::Mebibyte, + MetricName::QueryCount | MetricName::RunningQueries => Unit::Count, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ChartMetadata { + pub title: String, + pub unit: Unit, +} + +impl ChartMetadata { + pub fn new(metric: MetricName, title: String) -> Self { + let unit = metric.unit(); + Self { title, unit } + } +} + +#[derive(Debug)] +struct TimeSeriesValues { + /// A collection of all the values from the timeseries + values: Vec, +} + +impl TimeSeriesValues { + fn new(raw_data: &Vec) -> Self { + let values: Vec = raw_data.iter().map(|ts| ts.value).collect(); + Self { values } + } + + fn min(&self) -> Result<&f64> { + let Some(min_value) = + self.values.iter().min_by(|a, b| a.partial_cmp(b).unwrap()) + else { + bail!("no values have been retrieved") + }; + + Ok(min_value) + } + + fn max(&self) -> Result<&f64> { + let Some(max_value) = + self.values.iter().max_by(|a, b| a.partial_cmp(b).unwrap()) + else { + bail!("no values have been retrieved") + }; + + Ok(max_value) + } + + /// Returns the average value of the max and min values. + fn avg(&self, min_value_label: f64, max_value_label: f64) -> f64 { + (min_value_label + max_value_label) / 2.0 + } +} + +// The result of the following functions will not be precise, but it doesn't +// matter since we just want an estimate for the chart's labels and bounds. +// all we need are values that are larger than the maximum value in the +// timeseries or smaller than the minimum value in the timeseries. + +/// Returns the sum of the maximum raw value and 1 or the equivalent of 1 +/// MiB or GiB in bytes. +fn padded_max_value_raw(unit: Unit, max_value_raw: &f64) -> Result { + let ceil_value = max_value_raw.ceil(); + let padded_value = match unit { + Unit::Count => ceil_value + 1.0, + Unit::Gibibyte | Unit::Mebibyte => ceil_value + unit.as_bytes_f64()?, + }; + Ok(padded_value) +} + +/// Returns the sum of the max raw value and 1 or the equivalent of 1 +/// Mib or Gib. +fn padded_max_value_as_unit(unit: Unit, max_value_raw: &f64) -> Result { + let label_value = match unit { + Unit::Count => max_value_raw + 1.0, + Unit::Gibibyte | Unit::Mebibyte => { + (max_value_raw / unit.as_bytes_f64()?) + 1.0 + } + }; + Ok(label_value.ceil()) +} + +/// Returns the difference of the minimum raw value and 1 or the equivalent +/// of 1 in MiB or GiB in bytes. If the minimum is equal to or less than 1.0, +/// or the equivalent of 1 once converted from bytes to the expected unit +/// (e.g. less than or equal to 1048576 if we're using MiB) we'll use 0.0 as +/// the minimum value as we don't expect any of our charts +/// to require negative numbers for now. +fn padded_min_value_raw(unit: Unit, min_value_raw: &f64) -> Result { + let padded_value = match unit { + Unit::Count => { + if *min_value_raw <= 1.0 { + 0.0 + } else { + min_value_raw - 1.0 + } + } + Unit::Gibibyte | Unit::Mebibyte => { + let bytes = unit.as_bytes_f64()?; + if *min_value_raw <= bytes { + 0.0 + } else { + min_value_raw - bytes + } + } + }; + Ok(padded_value.floor()) +} + +/// Returns the difference of the minimum raw value and 1 or the equivalent +/// of 1 in MiB or GiB in bytes. If the minimum is less than 1, we'll use +/// 0 as the minimum value as we don't expect any of our charts to require +/// negative numbers for now. +fn padded_min_value_as_unit(unit: Unit, min_value_raw: &f64) -> Result { + let padded_value = match unit { + Unit::Count => { + if *min_value_raw < 1.0 { + 0.0 + } else { + min_value_raw - 1.0 + } + } + Unit::Gibibyte | Unit::Mebibyte => { + let value_as_unit = min_value_raw / unit.as_bytes_f64()?; + if value_as_unit < 1.0 { + 0.0 + } else { + value_as_unit - 1.0 + } + } + }; + Ok(padded_value.floor()) +} + +#[derive(Debug, PartialEq)] +struct YAxisValues { + lower_label: String, + mid_label: String, + upper_label: String, + lower_bound: f64, + upper_bound: f64, +} + +impl YAxisValues { + fn new(unit: Unit, raw_data: &Vec) -> Result { + // Retrieve values only to create Y axis bounds and labels + let values = TimeSeriesValues::new(&raw_data); + let max_value = values.max()?; + let min_value = values.min()?; + + // In case there is very little variance in the y axis, we will be adding some + // padding to the bounds and labels so we don't end up with repeated labels or + // straight lines too close to the upper bounds. + let upper_bound = padded_max_value_raw(unit, max_value)?; + let upper_label_as_unit = padded_max_value_as_unit(unit, max_value)?; + let lower_bound = padded_min_value_raw(unit, min_value)?; + let lower_label_as_unit = padded_min_value_as_unit(unit, min_value)?; + let mid_label_as_unit = + values.avg(lower_label_as_unit, upper_label_as_unit); + + // To nicely display the mid value label for the Y axis, we do the following: + // - It is not displayed it if it is a 0.0. + // - If it does not have a fractional number we display it as an integer. + // - Else, we display the number as is up to the first fractional number. + //let mid_value = mid_label; + let fractional_of_mid_value = + mid_label_as_unit - mid_label_as_unit.floor(); + let mid_value_formatted = format!("{:.1}", mid_label_as_unit); + let mid_label = if mid_value_formatted == *"0.0" { + "".to_string() + } else if fractional_of_mid_value == 0.0 { + format!( + "{} {}", + mid_value_formatted.split('.').next().unwrap(), + unit + ) + } else { + format!("{} {}", mid_value_formatted, unit) + }; + + let upper_label = format!("{} {}", upper_label_as_unit, unit); + let lower_label = format!("{} {}", lower_label_as_unit, unit); + + Ok(Self { + lower_label, + mid_label, + upper_label, + lower_bound, + upper_bound, + }) + } +} + +#[derive(Debug)] +struct TimeSeriesTimestamps { + /// A collection of all the timestamps from the timeseries + timestamps: Vec, +} + +impl TimeSeriesTimestamps { + fn new(raw_data: &Vec) -> Self { + let timestamps: Vec = raw_data + .iter() + .map(|ts| { + ts.time.trim_matches('"').parse::().unwrap_or_else(|_| { + panic!("could not parse timestamp {} into i64", ts.time) + }) + }) + .collect(); + Self { timestamps } + } + + fn min(&self) -> Result<&i64> { + let Some(start_time) = self.timestamps.iter().min() else { + bail!("failed to retrieve start time, timestamp list is empty") + }; + Ok(start_time) + } + + fn max(&self) -> Result<&i64> { + let Some(end_time) = self.timestamps.iter().max() else { + bail!("failed to retrieve end time, timestamp list is empty") + }; + Ok(end_time) + } + + fn avg(&self, start_time: &i64, end_time: &i64) -> i64 { + (start_time + end_time) / 2 + } +} + +#[derive(Debug, PartialEq)] +pub struct XAxisTimestamps { + mid_time_label: DateTime, + pub start_time_label: DateTime, + pub end_time_label: DateTime, + start_time_bound: f64, + end_time_bound: f64, +} + +impl XAxisTimestamps { + fn new(raw_data: &Vec) -> Result { + // Retrieve timestamps only to create chart bounds and labels + let timestamps = TimeSeriesTimestamps::new(&raw_data); + // These timestamps will be used to calculate start and end timestamps in order + // to create labels and set bounds for the X axis. As above, some of these conversions + // may lose precision, but it's OK as these values are only used to make sure the + // datapoints fit within the graph nicely. + let start_time = timestamps.min()?; + let end_time = timestamps.max()?; + let avg_time = timestamps.avg(start_time, end_time); + + let Some(start_time_label) = DateTime::from_timestamp(*start_time, 0) + else { + bail!( + "failed to convert timestamp to UTC date and time; + timestamp = {}", + start_time + ) + }; + let Some(end_time_label) = DateTime::from_timestamp(*end_time, 0) + else { + bail!( + "failed to convert timestamp to UTC date and time; + timestamp = {}", + end_time + ) + }; + let Some(mid_time_label) = DateTime::from_timestamp(avg_time, 0) else { + bail!( + "failed to convert timestamp to UTC date and time; + timestamp = {}", + avg_time + ) + }; + + let start_time_bound = *start_time as f64; + let end_time_bound = *end_time as f64; + + Ok(Self { + mid_time_label, + start_time_label, + end_time_label, + start_time_bound, + end_time_bound, + }) + } +} + +#[derive(Debug, PartialEq)] +struct DataPoints { + data: Vec<(f64, f64)>, +} + +impl DataPoints { + fn new(timeseries: &Vec) -> Self { + // These values will be used to render the graph and ratatui + // requires them to be f64 + let data: Vec<(f64, f64)> = timeseries + .iter() + .map(|ts| { + ( + ts.time.trim_matches('"').parse::().unwrap_or_else( + |_| { + panic!( + "could not parse timestamp {} into f64", + ts.time + ) + }, + ), + ts.value, + ) + }) + .collect(); + Self { data } + } +} + +#[derive(Debug, PartialEq)] +pub struct ChartData { + metadata: ChartMetadata, + data_points: DataPoints, + pub x_axis_timestamps: XAxisTimestamps, + y_axis_values: YAxisValues, +} + +impl ChartData { + pub fn new( + raw_data: Vec, + metadata: ChartMetadata, + ) -> Result { + // Retrieve datapoints that will be charted + let data_points = DataPoints::new(&raw_data); + + // Retrieve X axis bounds and labels + let x_axis_timestamps = XAxisTimestamps::new(&raw_data)?; + + // Retrieve X axis bounds and labels + let y_axis_values = YAxisValues::new(metadata.unit, &raw_data)?; + + Ok(Self { metadata, data_points, x_axis_timestamps, y_axis_values }) + } + + pub fn render_line_chart(&self, frame: &mut Frame, area: Rect) { + let datasets = vec![Dataset::default() + .marker(Marker::Braille) + .style(Style::default().fg(Color::LightGreen)) + .graph_type(GraphType::Line) + .data(&self.data_points.data)]; + + let chart = Chart::new(datasets) + .block( + Block::bordered() + .title(Line::from(self.title()).cyan().bold().centered()), + ) + .x_axis( + Axis::default() + .style(Style::default().gray()) + .bounds([self.start_time_bound(), self.end_time_bound()]) + .labels([ + self.start_time_label().bold(), + self.mid_time_label().bold(), + self.end_time_label().bold(), + ]), + ) + .y_axis( + Axis::default() + .style(Style::default().gray()) + .bounds([ + self.lower_value_bound(), + self.upper_value_bound(), + ]) + .labels([ + self.lower_value_label().bold(), + self.mid_value_label().bold(), + self.upper_value_label().bold(), + ]), + ) + .legend_position(Some(LegendPosition::TopLeft)) + .hidden_legend_constraints(( + Constraint::Ratio(1, 2), + Constraint::Ratio(1, 2), + )); + + frame.render_widget(chart, area); + } + + fn title(&self) -> String { + self.metadata.title.clone() + } + + pub fn start_date_time(&self) -> DateTime { + self.x_axis_timestamps.start_time_label + } + + pub fn end_date_time(&self) -> DateTime { + self.x_axis_timestamps.end_time_label + } + + fn start_time_label(&self) -> String { + self.x_axis_timestamps.start_time_label.time().to_string() + } + + fn mid_time_label(&self) -> String { + self.x_axis_timestamps.mid_time_label.time().to_string() + } + + fn end_time_label(&self) -> String { + self.x_axis_timestamps.end_time_label.time().to_string() + } + + fn start_time_bound(&self) -> f64 { + self.x_axis_timestamps.start_time_bound + } + + fn end_time_bound(&self) -> f64 { + self.x_axis_timestamps.end_time_bound + } + + fn lower_value_label(&self) -> String { + self.y_axis_values.lower_label.clone() + } + + fn mid_value_label(&self) -> String { + self.y_axis_values.mid_label.clone() + } + + fn upper_value_label(&self) -> String { + self.y_axis_values.upper_label.clone() + } + + fn lower_value_bound(&self) -> f64 { + self.y_axis_values.lower_bound + } + + fn upper_value_bound(&self) -> f64 { + self.y_axis_values.upper_bound + } +} + +#[cfg(test)] +mod tests { + use crate::{ + chart::{Unit, YAxisValues}, + ChartData, ChartMetadata, MetricName, + }; + use chrono::DateTime; + use clickhouse_admin_server_client::types::SystemTimeSeries; + + use super::{DataPoints, XAxisTimestamps}; + + #[test] + fn gather_chart_data_for_disk_usage_success() { + let metadata = + ChartMetadata::new(MetricName::DiskUsage, "Test Chart".to_string()); + let raw_data = vec![ + SystemTimeSeries { + time: "1732223400".to_string(), + value: 479551511587.3104, + }, + SystemTimeSeries { + time: "1732223520".to_string(), + value: 479555459822.93335, + }, + SystemTimeSeries { + time: "1732223640".to_string(), + value: 479560290201.6, + }, + ]; + + let expected_result = ChartData { + metadata: ChartMetadata { + title: "Test Chart".to_string(), + unit: Unit::Gibibyte, + }, + data_points: DataPoints { + data: vec![ + (1732223400.0, 479551511587.3104), + (1732223520.0, 479555459822.93335), + (1732223640.0, 479560290201.6), + ], + }, + x_axis_timestamps: XAxisTimestamps { + start_time_label: DateTime::from_timestamp(1732223400, 0) + .unwrap(), + mid_time_label: DateTime::from_timestamp(1732223520, 0) + .unwrap(), + end_time_label: DateTime::from_timestamp(1732223640, 0) + .unwrap(), + start_time_bound: 1732223400.0, + end_time_bound: 1732223640.0, + }, + y_axis_values: YAxisValues { + lower_label: "445 GiB".to_string(), + mid_label: "446.5 GiB".to_string(), + upper_label: "448 GiB".to_string(), + lower_bound: 478477769763.0, + upper_bound: 480634032026.0, + }, + }; + let result = ChartData::new(raw_data, metadata).unwrap(); + assert_eq!(result, expected_result); + } + + #[test] + fn gather_chart_data_for_memory_tracking_success() { + let metadata = ChartMetadata::new( + MetricName::MemoryTracking, + "Test Chart".to_string(), + ); + let raw_data = vec![ + SystemTimeSeries { + time: "1732223400".to_string(), + value: 479551511587.3104, + }, + SystemTimeSeries { + time: "1732223520".to_string(), + value: 479555459822.93335, + }, + SystemTimeSeries { + time: "1732223640".to_string(), + value: 479560290201.6, + }, + ]; + + let expected_result = ChartData { + metadata: ChartMetadata { + title: "Test Chart".to_string(), + unit: Unit::Mebibyte, + }, + data_points: DataPoints { + data: vec![ + (1732223400.0, 479551511587.3104), + (1732223520.0, 479555459822.93335), + (1732223640.0, 479560290201.6), + ], + }, + x_axis_timestamps: XAxisTimestamps { + start_time_label: DateTime::from_timestamp(1732223400, 0) + .unwrap(), + mid_time_label: DateTime::from_timestamp(1732223520, 0) + .unwrap(), + end_time_label: DateTime::from_timestamp(1732223640, 0) + .unwrap(), + start_time_bound: 1732223400.0, + end_time_bound: 1732223640.0, + }, + y_axis_values: YAxisValues { + lower_label: "457334 MiB".to_string(), + mid_label: "457340 MiB".to_string(), + upper_label: "457346 MiB".to_string(), + lower_bound: 479550463011.0, + upper_bound: 479561338778.0, + }, + }; + let result = ChartData::new(raw_data, metadata).unwrap(); + assert_eq!(result, expected_result); + } + + #[test] + fn gather_chart_data_for_query_count_success() { + let metadata = ChartMetadata::new( + MetricName::QueryCount, + "Test Chart".to_string(), + ); + let raw_data = vec![ + SystemTimeSeries { time: "1732223400".to_string(), value: 0.0 }, + SystemTimeSeries { time: "1732223520".to_string(), value: 0.004 }, + SystemTimeSeries { time: "1732223640".to_string(), value: 0.0 }, + ]; + + let expected_result = ChartData { + metadata: ChartMetadata { + title: "Test Chart".to_string(), + unit: Unit::Count, + }, + data_points: DataPoints { + data: vec![ + (1732223400.0, 0.0), + (1732223520.0, 0.004), + (1732223640.0, 0.0), + ], + }, + x_axis_timestamps: XAxisTimestamps { + start_time_label: DateTime::from_timestamp(1732223400, 0) + .unwrap(), + mid_time_label: DateTime::from_timestamp(1732223520, 0) + .unwrap(), + end_time_label: DateTime::from_timestamp(1732223640, 0) + .unwrap(), + start_time_bound: 1732223400.0, + end_time_bound: 1732223640.0, + }, + y_axis_values: YAxisValues { + lower_label: "0 ".to_string(), + mid_label: "1 ".to_string(), + upper_label: "2 ".to_string(), + lower_bound: 0.0, + upper_bound: 2.0, + }, + }; + let result = ChartData::new(raw_data, metadata).unwrap(); + assert_eq!(result, expected_result); + } + + #[test] + fn gather_chart_data_for_running_queries_success() { + let metadata = ChartMetadata::new( + MetricName::RunningQueries, + "Test Chart".to_string(), + ); + let raw_data = vec![ + SystemTimeSeries { time: "1732223400".to_string(), value: 1.554 }, + SystemTimeSeries { time: "1732223520".to_string(), value: 1.877 }, + SystemTimeSeries { time: "1732223640".to_string(), value: 1.3456 }, + ]; + + let expected_result = ChartData { + metadata: ChartMetadata { + title: "Test Chart".to_string(), + unit: Unit::Count, + }, + data_points: DataPoints { + data: vec![ + (1732223400.0, 1.554), + (1732223520.0, 1.877), + (1732223640.0, 1.3456), + ], + }, + x_axis_timestamps: XAxisTimestamps { + start_time_label: DateTime::from_timestamp(1732223400, 0) + .unwrap(), + mid_time_label: DateTime::from_timestamp(1732223520, 0) + .unwrap(), + end_time_label: DateTime::from_timestamp(1732223640, 0) + .unwrap(), + start_time_bound: 1732223400.0, + end_time_bound: 1732223640.0, + }, + y_axis_values: YAxisValues { + lower_label: "0 ".to_string(), + mid_label: "1.5 ".to_string(), + upper_label: "3 ".to_string(), + lower_bound: 0.0, + upper_bound: 3.0, + }, + }; + let result = ChartData::new(raw_data, metadata).unwrap(); + assert_eq!(result, expected_result); + } + + #[test] + #[should_panic( + expected = "could not parse timestamp Some nonsense string into f64" + )] + fn gather_chart_data_failure() { + let metadata = + ChartMetadata::new(MetricName::DiskUsage, "Test Chart".to_string()); + let raw_data = vec![ + SystemTimeSeries { + time: "Some nonsense string".to_string(), + value: 479551511587.3104, + }, + SystemTimeSeries { + time: "1732223520".to_string(), + value: 479555459822.93335, + }, + SystemTimeSeries { + time: "1732223640".to_string(), + value: 479560290201.6, + }, + ]; + + let _ = ChartData::new(raw_data, metadata); + } +} diff --git a/dev-tools/clickana/src/lib.rs b/dev-tools/clickana/src/lib.rs new file mode 100644 index 0000000000..76af35ba8d --- /dev/null +++ b/dev-tools/clickana/src/lib.rs @@ -0,0 +1,274 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{anyhow, bail, Context, Result}; +use camino::Utf8PathBuf; +use chrono::{DateTime, Utc}; +use clickhouse_admin_server_client::types::{ + SystemTimeSeries, TimestampFormat, +}; +use clickhouse_admin_server_client::Client as ClickhouseServerClient; +use futures::stream::FuturesOrdered; +use futures::StreamExt; +use omicron_common::FileKv; +use ratatui::crossterm::event::{self, Event, KeyCode}; +use ratatui::layout::{Constraint, Layout, Rect}; +use ratatui::style::{Color, Style, Stylize}; +use ratatui::text::Span; +use ratatui::widgets::Paragraph; +use ratatui::{DefaultTerminal, Frame}; +use slog::{o, Drain, Logger}; +use slog_async::Async; +use slog_term::{FullFormat, PlainDecorator}; +use std::collections::BTreeMap; +use std::future::Future; +use std::net::SocketAddr; +use std::pin::Pin; +use std::time::{Duration, Instant}; + +use crate::chart::{ChartData, ChartMetadata, MetricName}; + +mod chart; + +#[derive(Debug)] +struct Dashboard { + start_time: DateTime, + end_time: DateTime, + top_left_frame: ChartData, + top_right_frame: ChartData, + bottom_left_frame: ChartData, + bottom_right_frame: ChartData, +} + +#[derive(Clone, Debug)] +pub struct Clickana { + clickhouse_addr: SocketAddr, + log_path: Utf8PathBuf, + sampling_interval: u64, + time_range: u64, + refresh_interval: u64, +} + +impl Clickana { + pub fn new( + clickhouse_addr: SocketAddr, + log_path: Utf8PathBuf, + sampling_interval: u64, + time_range: u64, + refresh_interval: u64, + ) -> Self { + Self { + clickhouse_addr, + log_path, + sampling_interval, + time_range, + refresh_interval, + } + } + + pub async fn run(self, mut terminal: DefaultTerminal) -> Result<()> { + let admin_url = format!("http://{}", self.clickhouse_addr); + let log = self.new_logger()?; + let client = ClickhouseServerClient::new(&admin_url, log.clone()); + + let tick_interval = Duration::from_secs(self.refresh_interval); + let mut last_tick = Instant::now(); + loop { + // Charts we will be showing in the dashboard + // + // We are hardcoding these for now. In the future these will likely be taken + // from a TOML config file. + let charts = BTreeMap::from([ + (MetricName::DiskUsage, "Disk Usage".to_string()), + ( + MetricName::MemoryTracking, + "Memory Allocated by the Server".to_string(), + ), + ( + MetricName::QueryCount, + "Queries Started per Second".to_string(), + ), + (MetricName::RunningQueries, "Queries Running".to_string()), + ]); + + let mut tasks = FuturesOrdered::< + Pin>>>, + >::new(); + + for (metric_name, title) in charts { + let s = self.clone(); + let c = client.clone(); + + let task = Box::pin(async move { + let metadata = ChartMetadata::new(metric_name, title); + let data = s.get_api_data(&c, metric_name).await?; + ChartData::new(data, metadata) + }); + tasks.push_back(task); + } + + if tasks.len() != 4 { + bail!( + "expected information for 4 charts, received {} instead", + tasks.len() + ); + } + + // TODO: Eventually we may want to not have a set amount of charts and make the + // dashboard a bit more dynamic. Perhaps taking a toml configuration file or + // something like that. We can then create a vector of "ChartData"s for Dashboard + // to take and create the layout dynamically. + // + // IDEA (ajs): I think it would be useful to be able to have a little menu of charts + // on the side of the pane, and then you can scroll and select which ones to show + // without having to restart the app, or mess with a toml file. + // You could also allow toggling between a set of predefined layouts to make it always + // look nice. So you could show, 1, 2, 4, 6, 8 charts or something and allow selecting + // which to show in each view. You could even remember which charts to show in each layout, + // so you could toggle back and forth between different layouts and see all the charts, + // some with more detail. + // + // We have already checked that the length of tasks is 4, so it's safe to unwrap + let top_left_frame: ChartData = tasks.next().await.unwrap()?; + let top_right_frame: ChartData = tasks.next().await.unwrap()?; + let bottom_left_frame: ChartData = tasks.next().await.unwrap()?; + let bottom_right_frame: ChartData = tasks.next().await.unwrap()?; + + // We only need to retrieve from one chart as they will all be relatively the same. + // Rarely, the charts may have a variance of a second or so depending on when + // the API calls were made, but for the header block we don't need exact precision. + let start_time = top_left_frame.start_date_time(); + let end_time = top_left_frame.end_date_time(); + + let dashboard = Dashboard { + start_time, + end_time, + top_left_frame, + top_right_frame, + bottom_left_frame, + bottom_right_frame, + }; + terminal.draw(|frame| self.draw(frame, dashboard))?; + + let timeout = tick_interval.saturating_sub(last_tick.elapsed()); + if event::poll(timeout)? { + if let Event::Key(key) = event::read()? { + // To exit the dashboard press the "q" key + if key.code == KeyCode::Char('q') { + return Ok(()); + } + } + } + + if last_tick.elapsed() >= tick_interval { + last_tick = Instant::now(); + } + } + } + + fn draw(&self, frame: &mut Frame, dashboard: Dashboard) { + let [heading, top, bottom] = Layout::vertical([ + Constraint::Length(4), + // TODO: If we make the dashboard with too many charts + // we may want to reconsider setting sizes instead of filling + // the space + Constraint::Fill(1), + Constraint::Fill(1), + ]) + .areas(frame.area()); + let [title] = + Layout::horizontal([Constraint::Fill(1); 1]).areas(heading); + let [top_left_frame, top_right_frame] = + Layout::horizontal([Constraint::Fill(1); 2]).areas(top); + let [bottom_left_frame, bottom_right_frame] = + Layout::horizontal([Constraint::Fill(1); 2]).areas(bottom); + + self.render_title_bar(frame, title, &dashboard); + + dashboard.top_left_frame.render_line_chart(frame, top_left_frame); + dashboard.top_right_frame.render_line_chart(frame, top_right_frame); + dashboard.bottom_left_frame.render_line_chart(frame, bottom_left_frame); + dashboard + .bottom_right_frame + .render_line_chart(frame, bottom_right_frame); + } + + fn render_title_bar( + &self, + frame: &mut Frame, + area: Rect, + dashboard: &Dashboard, + ) { + let style = Style::new().fg(Color::Green).bold(); + let title = vec![ + Span::styled("CLICKANA", style).into_centered_line(), + Span::styled( + format!("Sampling Interval: {}s", self.sampling_interval), + style, + ) + .into_left_aligned_line(), + Span::styled( + format!( + "Time Range: {} - {} ({}s)", + dashboard.start_time, dashboard.end_time, self.time_range + ), + style, + ) + .into_left_aligned_line(), + Span::styled( + format!("Refresh Interval {}s", self.refresh_interval), + style, + ) + .into_left_aligned_line(), + ]; + let p = Paragraph::new(title); + + frame.render_widget(p, area); + } + + async fn get_api_data( + &self, + client: &ClickhouseServerClient, + metric: MetricName, + ) -> Result> { + let timeseries = client + .system_timeseries_avg( + metric.table(), + &format!("{metric}"), + Some(self.sampling_interval), + Some(self.time_range), + Some(TimestampFormat::UnixEpoch), + ) + .await + .map(|t| t.into_inner()) + .map_err(|e| { + anyhow!( + concat!( + "failed to retrieve timeseries from clickhouse server; ", + "error = {}", + ), + e + ) + }); + + timeseries + } + + fn new_logger(&self) -> Result { + let file = std::fs::OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(self.log_path.clone()) + .with_context(|| { + format!("error opening log file {}", self.log_path) + })?; + + let decorator = PlainDecorator::new(file); + let drain = FullFormat::new(decorator).build().fuse(); + let drain = Async::new(drain).build().fuse(); + + Ok(slog::Logger::root(drain, o!(FileKv))) + } +} diff --git a/openapi/clickhouse-admin-server.json b/openapi/clickhouse-admin-server.json index c82c7c0d8e..50c526569e 100644 --- a/openapi/clickhouse-admin-server.json +++ b/openapi/clickhouse-admin-server.json @@ -616,7 +616,7 @@ "type": "object", "properties": { "time": { - "$ref": "#/components/schemas/Timestamp" + "type": "string" }, "value": { "type": "number", @@ -628,17 +628,6 @@ "value" ] }, - "Timestamp": { - "anyOf": [ - { - "type": "string", - "format": "date-time" - }, - { - "type": "string" - } - ] - }, "SystemTable": { "description": "Available metrics tables in the `system` database", "type": "string", diff --git a/openapi/clickhouse-admin-single.json b/openapi/clickhouse-admin-single.json index b00bf56314..c6b99da245 100644 --- a/openapi/clickhouse-admin-single.json +++ b/openapi/clickhouse-admin-single.json @@ -131,7 +131,7 @@ "type": "object", "properties": { "time": { - "$ref": "#/components/schemas/Timestamp" + "type": "string" }, "value": { "type": "number", @@ -143,17 +143,6 @@ "value" ] }, - "Timestamp": { - "anyOf": [ - { - "type": "string", - "format": "date-time" - }, - { - "type": "string" - } - ] - }, "SystemTable": { "description": "Available metrics tables in the `system` database", "type": "string", diff --git a/package-manifest.toml b/package-manifest.toml index 83b1ba8168..b28ac7d59f 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -166,7 +166,8 @@ source.packages = [ "internal-dns-cli.tar.gz", "omicron-clickhouse-admin.tar.gz", "zone-setup.tar.gz", - "zone-network-install.tar.gz" + "zone-network-install.tar.gz", + "clickana.tar.gz" ] output.type = "zone" @@ -197,7 +198,8 @@ source.packages = [ "internal-dns-cli.tar.gz", "omicron-clickhouse-admin.tar.gz", "zone-setup.tar.gz", - "zone-network-install.tar.gz" + "zone-network-install.tar.gz", + "clickana.tar.gz" ] output.type = "zone" @@ -924,3 +926,12 @@ service_name = "probe" source.type = "composite" source.packages = ["thundermuffin.tar.gz"] output.type = "zone" + +[package.clickana] +service_name = "clickana" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["clickana"] +source.rust.release = true +output.type = "zone" +output.intermediate_only = true \ No newline at end of file diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index db250eb914..7c5fb44d13 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -76,6 +76,7 @@ use omicron_common::address::WICKETD_NEXUS_PROXY_PORT; use omicron_common::address::WICKETD_PORT; use omicron_common::address::{ get_internal_dns_server_addresses, CLICKHOUSE_ADMIN_PORT, + CLICKHOUSE_TCP_PORT, }; use omicron_common::address::{Ipv6Subnet, NEXUS_TECHPORT_EXTERNAL_PORT}; use omicron_common::address::{BOOTSTRAP_ARTIFACT_PORT, COCKROACH_ADMIN_PORT}; @@ -1597,13 +1598,20 @@ impl ServiceManager { addr.to_string() }; + // The ClickHouse client connects via the TCP port + let ch_address = { + let mut addr = *address; + addr.set_port(CLICKHOUSE_TCP_PORT); + addr.to_string() + }; + let clickhouse_admin_config = PropertyGroupBuilder::new("config") .add_property("http_address", "astring", admin_address) .add_property( "ch_address", "astring", - address.to_string(), + ch_address.to_string(), ) .add_property( "ch_binary", @@ -1668,13 +1676,20 @@ impl ServiceManager { addr.to_string() }; + // The ClickHouse client connects via the TCP port + let ch_address = { + let mut addr = *address; + addr.set_port(CLICKHOUSE_TCP_PORT); + addr.to_string() + }; + let clickhouse_admin_config = PropertyGroupBuilder::new("config") .add_property("http_address", "astring", admin_address) .add_property( "ch_address", "astring", - address.to_string(), + ch_address.to_string(), ) .add_property( "ch_binary",