Skip to content

Commit

Permalink
fix: use monitoring spec instead of bool
Browse files Browse the repository at this point in the history
  • Loading branch information
nathanielc committed Jan 30, 2024
1 parent e741a23 commit e6477ab
Show file tree
Hide file tree
Showing 21 changed files with 135 additions and 70 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ WORKDIR /home/builder/keramik/
# Use the same ids as the parent docker image by default
# ARG UID=1001
# ARG GID=1001
USER root

# Copy in source code
COPY . .
Expand Down
3 changes: 2 additions & 1 deletion keramik/src/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ metadata:
name: network-with-monitoring
spec:
replicas: 2
monitoring: true
monitoring:
namespaced: true
```
To view the metrics and traces port-forward the services:
Expand Down
4 changes: 3 additions & 1 deletion keramik/src/setup_network.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ metadata:
name: <unique-name>-small
spec:
replicas: 2
monitoring: true # Required if you plan to run a simulation
# Required if you plan to run a simulation
monitoring:
namespaced: true
```
The `<unique-name>` can be any unique string, your initials are a good default if you are deploying the network to a cloud cluster.
Expand Down
6 changes: 4 additions & 2 deletions keramik/src/simulation.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ metadata:
name: custom-js-ceramic
spec:
replicas: 2
monitoring: true
monitoring:
namespaced: true
ceramic:
- image: ceramicnetwork/composedb:dev
imagePullPolicy: IfNotPresent
Expand All @@ -111,7 +112,8 @@ metadata:
name: custom-ipfs
spec:
replicas: 2
monitoring: true
monitoring:
namespaced: true
ceramic:
- ipfs:
rust:
Expand Down
1 change: 0 additions & 1 deletion operator/src/monitoring/prometheus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ fn config_map_data() -> BTreeMap<String, String> {
- targets:
- 'localhost:9090'
- 'otel:9464'
- 'otel:9465'
- 'otel:8888'"#
.to_owned(),
)])
Expand Down
3 changes: 0 additions & 3 deletions operator/src/network/ceramic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ pub struct NetworkConfig {
pub network_type: NetworkType,
pub eth_rpc_url: String,
pub cas_api_url: String,
pub monitoring: bool,
}

impl Default for NetworkConfig {
Expand All @@ -178,7 +177,6 @@ impl Default for NetworkConfig {
network_type: NetworkType::default(),
eth_rpc_url: format!("http://{GANACHE_SERVICE_NAME}:8545"),
cas_api_url: format!("http://{CAS_SERVICE_NAME}:8081"),
monitoring: false,
}
}
}
Expand All @@ -194,7 +192,6 @@ impl From<&NetworkSpec> for NetworkConfig {
.unwrap_or(default.network_type),
eth_rpc_url: value.eth_rpc_url.to_owned().unwrap_or(default.eth_rpc_url),
cas_api_url: value.cas_api_url.to_owned().unwrap_or(default.cas_api_url),
monitoring: value.monitoring.unwrap_or(default.monitoring),
}
}
}
Expand Down
37 changes: 31 additions & 6 deletions operator/src/network/controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use crate::{
ceramic::{self, CeramicBundle, CeramicConfigs, CeramicInfo, NetworkConfig},
datadog::DataDogConfig,
ipfs_rpc::{HttpRpcClient, IpfsRpcClient},
peers, CasSpec, Network, NetworkStatus, NetworkType,
peers, CasSpec, MonitoringSpec, Network, NetworkStatus, NetworkType,
},
utils::Clock,
CONTROLLER_NAME,
Expand Down Expand Up @@ -179,6 +179,29 @@ pub async fn run() {

const MAX_CERAMICS: usize = 10;

struct MonitoringConfig {
namespaced: bool,
}

impl Default for MonitoringConfig {
fn default() -> Self {
Self { namespaced: false }
}
}

impl From<&Option<MonitoringSpec>> for MonitoringConfig {
fn from(value: &Option<MonitoringSpec>) -> Self {
let default = MonitoringConfig::default();
if let Some(value) = value {
Self {
namespaced: value.namespaced.unwrap_or(default.namespaced),
}
} else {
default
}
}
}

/// Perform a reconcile pass for the Network CRD
async fn reconcile(
network: Arc<Network>,
Expand Down Expand Up @@ -229,9 +252,9 @@ async fn reconcile(
let ns = apply_network_namespace(cx.clone(), network.clone()).await?;

let net_config: NetworkConfig = spec.into();
let monitoring_config: MonitoringConfig = (&spec.monitoring).into();

if net_config.monitoring {
info!("configuring opentelemetry monitoring");
if monitoring_config.namespaced {
let orefs = network
.controller_owner_ref(&())
.map(|oref| vec![oref])
Expand Down Expand Up @@ -843,8 +866,8 @@ mod tests {
network::{
ipfs_rpc::{tests::MockIpfsRpcClientTest, PeerStatus},
stub::{CeramicStub, Stub},
BootstrapSpec, CasSpec, CeramicSpec, DataDogSpec, GoIpfsSpec, IpfsSpec, NetworkSpec,
NetworkStatus, NetworkType, ResourceLimitsSpec, RustIpfsSpec,
BootstrapSpec, CasSpec, CeramicSpec, DataDogSpec, GoIpfsSpec, IpfsSpec, MonitoringSpec,
NetworkSpec, NetworkStatus, NetworkType, ResourceLimitsSpec, RustIpfsSpec,
},
utils::{
test::{timeout_after_1s, ApiServerVerifier, WithStatus},
Expand Down Expand Up @@ -3423,7 +3446,9 @@ mod tests {
async fn monitoring() {
// Setup network spec and status
let network = Network::test().with_spec(NetworkSpec {
monitoring: Some(true),
monitoring: Some(MonitoringSpec {
namespaced: Some(true),
}),
..Default::default()
});
let mock_rpc_client = default_ipfs_rpc_mock();
Expand Down
10 changes: 9 additions & 1 deletion operator/src/network/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ pub struct NetworkSpec {
/// as well as running every container with unlimited resources.
pub dev_mode: Option<bool>,
/// Enable monitoring resources to be deployed into the network.
pub monitoring: Option<bool>,
pub monitoring: Option<MonitoringSpec>,
}

/// Local network ID.
Expand Down Expand Up @@ -241,3 +241,11 @@ pub struct ResourceLimitsSpec {
/// Ephemeral storage resource limit
pub storage: Option<Quantity>,
}

/// Describes how monitoring resources are deployed for the network
#[derive(Serialize, Deserialize, Debug, Default, PartialEq, Clone, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct MonitoringSpec {
/// Deploy monitoring resources into the network namespace directly
pub namespaced: Option<bool>,
}
2 changes: 1 addition & 1 deletion operator/src/network/testdata/opentelemetry_config
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Request {
body: {
"apiVersion": "v1",
"data": {
"otel-config.yaml": "\n receivers:\n # Push based metrics\n otlp:\n protocols:\n grpc:\n endpoint: 0.0.0.0:4317\n processors:\n batch:\n\n exporters:\n # This is unused but can be easily added for debugging.\n logging:\n # can be one of detailed | normal | basic\n verbosity: detailed\n # Log all messages, do not sample\n sampling_initial: 1\n sampling_thereafter: 1\n otlp/jaeger:\n endpoint: jaeger:4317\n tls:\n insecure: true\n prometheus:\n endpoint: 0.0.0.0:9090\n # Keep stale metrics around for 1h before dropping\n # This helps as simulation metrics are stale once the simulation stops.\n metric_expiration: 1h\n resource_to_telemetry_conversion:\n enabled: true\n service:\n pipelines:\n traces:\n receivers: [otlp]\n processors: [batch]\n exporters: [otlp/jaeger]\n metrics:\n receivers: [otlp]\n processors: [batch]\n exporters: [prometheus]\n # Enable telemetry on the collector itself\n telemetry:\n logs:\n level: info\n metrics:\n level: detailed\n address: 0.0.0.0:8888"
"otel-config.yaml": "---\nreceivers:\n # Push based metrics\n otlp:\n protocols:\n grpc:\n endpoint: 0.0.0.0:4317\n # Pull based metrics\n prometheus:\n config:\n scrape_configs:\n - job_name: 'kubernetes-service-endpoints'\n scrape_interval: 10s\n scrape_timeout: 1s\n\n kubernetes_sd_configs:\n - role: pod\n\n # Only container ports named `metrics` will be considered valid targets.\n #\n # Setup relabel rules to give meaning to the following k8s annotations:\n # prometheus/path - URL path of the metrics endpoint\n #\n # Example:\n # annotations:\n # prometheus/path: \"/api/v0/metrics\"\n relabel_configs:\n - source_labels: [__meta_kubernetes_pod_container_port_name]\n action: keep\n regex: \"metrics\"\n - source_labels: [__meta_kubernetes_pod_annotation_prometheus_path]\n action: replace\n target_label: __metrics_path__\n regex: (.+)\n - source_labels: [__meta_kubernetes_namespace]\n action: replace\n target_label: kubernetes_namespace\n - source_labels: [__meta_kubernetes_pod_name]\n action: replace\n target_label: kubernetes_pod\n - source_labels: [__meta_kubernetes_pod_container_name]\n action: replace\n target_label: kubernetes_container\n\nprocessors:\n batch:\n\nexporters:\n # This is unused but can be easily added for debugging.\n logging:\n # can be one of detailed | normal | basic\n verbosity: detailed\n # Log all messages, do not sample\n sampling_initial: 1\n sampling_thereafter: 1\n otlp/jaeger:\n endpoint: jaeger:4317\n tls:\n insecure: true\n prometheus:\n endpoint: 0.0.0.0:9464\n # Keep stale metrics around for 1h before dropping\n # This helps as simulation metrics are stale once the simulation stops.\n metric_expiration: 1h\n resource_to_telemetry_conversion:\n enabled: true\n prometheus/simulation:\n endpoint: 0.0.0.0:9465\n # Keep stale metrics around for 1h before dropping\n # This helps as simulation metrics are stale once the simulation stops.\n metric_expiration: 1h\n resource_to_telemetry_conversion:\n enabled: true\n\nservice:\n pipelines:\n traces:\n receivers: [otlp]\n processors: [batch]\n exporters: [otlp/jaeger]\n metrics:\n receivers: [otlp,prometheus]\n processors: [batch]\n exporters: [prometheus]\n metrics/simulation:\n receivers: [otlp]\n processors: [batch]\n exporters: [prometheus/simulation]\n # Enable telemetry on the collector itself\n telemetry:\n logs:\n level: info\n metrics:\n level: detailed\n address: 0.0.0.0:8888"
},
"kind": "ConfigMap",
"metadata": {
Expand Down
12 changes: 9 additions & 3 deletions operator/src/network/testdata/opentelemetry_service
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@ Request {
"targetPort": 4317
},
{
"name": "prom-metrics",
"port": 9090,
"name": "all-metrics",
"port": 9464,
"protocol": "TCP",
"targetPort": 9090
"targetPort": 9464
},
{
"name": "sim-metrics",
"port": 9465,
"protocol": "TCP",
"targetPort": 9465
},
{
"name": "self-metrics",
Expand Down
11 changes: 7 additions & 4 deletions operator/src/network/testdata/opentelemetry_stateful_set
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ Request {
"spec": {
"containers": [
{
"command": [
"/otelcol-custom",
"args": [
"--config=/config/otel-config.yaml"
],
"image": "public.ecr.aws/r5b3e0r5/3box/otelcol",
Expand All @@ -44,8 +43,12 @@ Request {
"name": "otlp-receiver"
},
{
"containerPort": 9090,
"name": "prom-metrics"
"containerPort": 9464,
"name": "all-metrics"
},
{
"containerPort": 9465,
"name": "sim-metrics"
},
{
"containerPort": 8888,
Expand Down
2 changes: 1 addition & 1 deletion operator/src/network/testdata/prom_config
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Request {
body: {
"apiVersion": "v1",
"data": {
"prom-config.yaml": "\n global:\n scrape_interval: 10s\n scrape_timeout: 5s\n\n scrape_configs:\n - job_name: services\n metrics_path: /metrics\n honor_labels: true\n static_configs:\n - targets:\n - 'localhost:9090'\n - 'otel:9090'\n - 'otel:8888'"
"prom-config.yaml": "\n global:\n scrape_interval: 10s\n scrape_timeout: 5s\n\n scrape_configs:\n - job_name: services\n metrics_path: /metrics\n honor_labels: true\n static_configs:\n - targets:\n - 'localhost:9090'\n - 'otel:9464'\n - 'otel:8888'"
},
"kind": "ConfigMap",
"metadata": {
Expand Down
Loading

0 comments on commit e6477ab

Please sign in to comment.