Skip to content

Commit

Permalink
Set alloy wal truncate_frequency to 15m (#130)
Browse files Browse the repository at this point in the history
* try truncate frequency

* Add flag to be able to set truncate_frequency
  • Loading branch information
QuentinBisson authored Oct 17, 2024
1 parent 3b383d7 commit fa6b6eb
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- Add wal `truncate_frequency` configuration to alloy-metrics with a default set to 15m.
- Add grafanaOrganization CRD in helm chart.

## [0.7.1] - 2024-10-10
Expand Down
4 changes: 3 additions & 1 deletion helm/observability-operator/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@ spec:
- --management-cluster-name={{ $.Values.managementCluster.name }}
- --management-cluster-pipeline={{ $.Values.managementCluster.pipeline }}
- --management-cluster-region={{ $.Values.managementCluster.region }}
- --monitoring-agent={{ $.Values.monitoring.agent }}
# Monitoring configuration
- --monitoring-enabled={{ $.Values.monitoring.enabled }}
- --monitoring-agent={{ $.Values.monitoring.agent }}
- --monitoring-sharding-scale-up-series-count={{ $.Values.monitoring.sharding.scaleUpSeriesCount }}
- --monitoring-sharding-scale-down-percentage={{ $.Values.monitoring.sharding.scaleDownPercentage }}
- --monitoring-wal-truncate-frequency={{ $.Values.monitoring.wal.truncateFrequency }}
{{- if .Values.monitoring.prometheusVersion }}
- --prometheus-version={{ $.Values.monitoring.prometheusVersion }}
{{- end }}
Expand Down
22 changes: 22 additions & 0 deletions helm/observability-operator/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@
"monitoring": {
"type": "object",
"properties": {
"agent": {
"type": "string"
},
"enabled": {
"type": "boolean"
},
Expand All @@ -63,6 +66,25 @@
},
"prometheusVersion": {
"type": "string"
},
"sharding": {
"type": "object",
"properties": {
"scaleDownPercentage": {
"type": "number"
},
"scaleUpSeriesCount": {
"type": "integer"
}
}
},
"wal": {
"type": "object",
"properties": {
"truncateFrequency": {
"type": "string"
}
}
}
}
},
Expand Down
3 changes: 3 additions & 0 deletions helm/observability-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ monitoring:
sharding:
scaleUpSeriesCount: 1000000
scaleDownPercentage: 0.20
wal:
# -- Configures the WAL truncation frequency
truncateFrequency: 15m

operator:
# -- Configures the resources for the operator deployment
Expand Down
12 changes: 9 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"flag"
"fmt"
"os"
"time"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
Expand Down Expand Up @@ -75,6 +76,7 @@ var (
monitoringEnabled bool
monitoringShardingScaleUpSeriesCount float64
monitoringShardingScaleDownPercentage float64
monitoringWALTruncateFrequency time.Duration
prometheusVersion string
)

Expand Down Expand Up @@ -114,16 +116,19 @@ func main() {
"The pipeline of the management cluster.")
flag.StringVar(&managementClusterRegion, "management-cluster-region", "",
"The region of the management cluster.")
flag.StringVar(&monitoringAgent, "monitoring-agent", commonmonitoring.MonitoringAgentPrometheus,
fmt.Sprintf("select monitoring agent to use (%s or %s)", commonmonitoring.MonitoringAgentPrometheus, commonmonitoring.MonitoringAgentAlloy))
// Monitoring configuration flags.
flag.BoolVar(&monitoringEnabled, "monitoring-enabled", false,
"Enable monitoring at the management cluster level.")
flag.StringVar(&monitoringAgent, "monitoring-agent", commonmonitoring.MonitoringAgentPrometheus,
fmt.Sprintf("select monitoring agent to use (%s or %s)", commonmonitoring.MonitoringAgentPrometheus, commonmonitoring.MonitoringAgentAlloy))
flag.Float64Var(&monitoringShardingScaleUpSeriesCount, "monitoring-sharding-scale-up-series-count", 0,
"Configures the number of time series needed to add an extra prometheus agent shard.")
flag.Float64Var(&monitoringShardingScaleDownPercentage, "monitoring-sharding-scale-down-percentage", 0,
"Configures the percentage of removed series to scale down the number of prometheus agent shards.")
flag.StringVar(&prometheusVersion, "prometheus-version", "",
"The version of Prometheus Agents to deploy.")
flag.DurationVar(&monitoringWALTruncateFrequency, "monitoring-wal-truncate-frequency", 2*time.Hour,
"Configures how frequently the Write-Ahead Log (WAL) truncates segments.")
opts := zap.Options{
Development: false,
}
Expand Down Expand Up @@ -213,7 +218,8 @@ func main() {
ScaleUpSeriesCount: monitoringShardingScaleUpSeriesCount,
ScaleDownPercentage: monitoringShardingScaleDownPercentage,
},
PrometheusVersion: prometheusVersion,
WALTruncateFrequency: monitoringWALTruncateFrequency,
PrometheusVersion: prometheusVersion,
}

prometheusAgentService := prometheusagent.PrometheusAgentService{
Expand Down
4 changes: 4 additions & 0 deletions pkg/monitoring/alloy/configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ func (a *Service) generateAlloyConfig(ctx context.Context, cluster *clusterv1.Cl
QueueConfigMaxSamplesPerSend int
QueueConfigMaxShards int

WALTruncateFrequency string

ExternalLabels map[string]string
}{
RemoteWriteURLEnvVarName: AlloyRemoteWriteURLEnvVarName,
Expand All @@ -139,6 +141,8 @@ func (a *Service) generateAlloyConfig(ctx context.Context, cluster *clusterv1.Cl
QueueConfigMaxSamplesPerSend: commonmonitoring.QueueConfigMaxSamplesPerSend,
QueueConfigMaxShards: commonmonitoring.QueueConfigMaxShards,

WALTruncateFrequency: a.MonitoringConfig.WALTruncateFrequency.String(),

ExternalLabels: map[string]string{
"cluster_id": cluster.Name,
"cluster_type": common.GetClusterType(cluster, a.ManagementCluster),
Expand Down
3 changes: 3 additions & 0 deletions pkg/monitoring/alloy/templates/alloy-config.alloy.template
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ prometheus.remote_write "default" {
max_shards = {{ .QueueConfigMaxShards }}
}
}
wal {
truncate_frequency = "{{ .WALTruncateFrequency }}"
}
external_labels = {
{{- range $key, $value := .ExternalLabels }}
"{{ $key }}" = "{{ $value }}",
Expand Down
3 changes: 3 additions & 0 deletions pkg/monitoring/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package monitoring

import (
"strconv"
"time"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"

Expand All @@ -15,6 +16,8 @@ type Config struct {
Enabled bool
MonitoringAgent string
DefaultShardingStrategy sharding.Strategy
// WALTruncateFrequency is the frequency at which the WAL segments should be truncated.
WALTruncateFrequency time.Duration
// TODO(atlas): validate prometheus version using SemVer
PrometheusVersion string
}
Expand Down

0 comments on commit fa6b6eb

Please sign in to comment.