Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: metrics for services and checks #519

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
f76470c
poc: a metrics module for pebble
IronCore864 Nov 13, 2024
6d8ee59
chore: undo unnecessary change
IronCore864 Nov 14, 2024
b4abc9a
chore: undo unnecessary change
IronCore864 Nov 14, 2024
a274276
chore: undo unnecessary change
IronCore864 Nov 14, 2024
a2c07e6
chore: metrics identity basic auth poc
IronCore864 Nov 26, 2024
4ebb633
chore: a poc for metrics with labels
IronCore864 Nov 27, 2024
7468b95
poc: remove adding identities using env vars according to comment in …
IronCore864 Nov 28, 2024
790a8f9
chore: update tests for the metrics lib poc
IronCore864 Nov 28, 2024
272005b
chore: refactor identities and access according to spec review
IronCore864 Dec 9, 2024
5be3e96
feat: use sha512 to verify password
IronCore864 Jan 21, 2025
a6c374d
feat: move the metrics api to /v1/metrics
IronCore864 Jan 21, 2025
1bd54cb
chore: remove Username from apiBasicIdentity
IronCore864 Jan 21, 2025
98ea11e
chore: revert changes on user state
IronCore864 Jan 21, 2025
68c18b7
Merge branch 'master' into poc-custom-metrics-lib
IronCore864 Jan 21, 2025
7fc255e
chore: fix failed identity tests
IronCore864 Jan 21, 2025
31a0617
test: unit tests for basic identity
IronCore864 Jan 22, 2025
a57f041
chore: rework the metrics for services
IronCore864 Jan 24, 2025
b7a442f
chore: add metrics for checks, not done
IronCore864 Jan 24, 2025
8ebebb8
chore: refactor metrics, add open telemetry writer
IronCore864 Feb 11, 2025
363eaf0
Merge branch 'master' into poc-custom-metrics-lib
IronCore864 Feb 11, 2025
a1db1a6
chore: refactor according to review, fix check counter reset issue
IronCore864 Feb 11, 2025
047cc42
Merge branch 'master' into poc-custom-metrics-lib
IronCore864 Feb 11, 2025
c527344
chore: add a test for check metrics
IronCore864 Feb 12, 2025
5476299
test: add tests for open telemetry writer
IronCore864 Feb 12, 2025
9e5b65a
test: service metrics
IronCore864 Feb 12, 2025
d678766
chore: update tests
IronCore864 Feb 12, 2025
c99fa53
chore: fix linting
IronCore864 Feb 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file removed .fuse_hidden0000020d00000002
Empty file.
11 changes: 5 additions & 6 deletions client/identities.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ type Identity struct {
Access IdentityAccess `json:"access" yaml:"access"`

// One or more of the following type-specific configuration fields must be
// non-nil (currently the only types are "local" and "basicauth").
Local *LocalIdentity `json:"local,omitempty" yaml:"local,omitempty"`
BasicAuth *BasicAuthIdentity `json:"basicauth,omitempty" yaml:"basicauth,omitempty"`
// non-nil (currently the only types are "local" and "basic").
Local *LocalIdentity `json:"local,omitempty" yaml:"local,omitempty"`
Basic *BasicIdentity `json:"basic,omitempty" yaml:"basic,omitempty"`
}

// IdentityAccess defines the access level for an identity.
Expand All @@ -48,10 +48,9 @@ type LocalIdentity struct {
UserID *uint32 `json:"user-id" yaml:"user-id"`
}

// BasicAuthIdentity holds identity configuration specific to the "basicauth" type
// BasicIdentity holds identity configuration specific to the "basic" type
// (for username/password authentication).
type BasicAuthIdentity struct {
Username string `json:"username" yaml:"username"`
type BasicIdentity struct {
Password string `json:"password" yaml:"password"`
}

Expand Down
4 changes: 2 additions & 2 deletions internals/cli/cmd_identities.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ func (cmd *cmdIdentities) writeText(identities map[string]*client.Identity) erro
if identity.Local != nil {
types = append(types, "local")
}
if identity.BasicAuth != nil {
types = append(types, "basicauth")
if identity.Basic != nil {
types = append(types, "basic")
}
sort.Strings(types)
if len(types) == 0 {
Expand Down
18 changes: 0 additions & 18 deletions internals/cli/cmd_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,24 +248,6 @@ func runDaemon(rcmd *cmdRun, ch chan os.Signal, ready chan<- func()) error {
}
}

metricsEndpointUsername := os.Getenv("METRICS_ENDPOINT_USERNAME")
metricsEndpointPassword := os.Getenv("METRICS_ENDPOINT_PASSWORD")
if metricsEndpointUsername != "" && metricsEndpointPassword != "" {
identities := map[string]*client.Identity{
metricsEndpointUsername: &client.Identity{
Access: client.ReadAccess,
BasicAuth: &client.BasicAuthIdentity{
Username: metricsEndpointUsername,
Password: metricsEndpointPassword,
},
},
}
err = rcmd.client.ReplaceIdentities(identities)
if err != nil {
return fmt.Errorf("cannot replace identities: %w", err)
}
}

// The "stop" channel is used by the "enter" command to stop the daemon.
var stop chan struct{}
if ready != nil {
Expand Down
15 changes: 15 additions & 0 deletions internals/daemon/access.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,18 @@ func (ac UserAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) Re
// An identity explicitly set to "access: untrusted" isn't allowed.
return Unauthorized(accessDenied)
}

// MetricsAccess allows requests over the UNIX domain socket from any local user
type MetricsAccess struct{}

func (ac MetricsAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) Response {
if user == nil {
return Unauthorized(accessDenied)
}
switch user.Identity.Access {
case state.MetricsAccess, state.AdminAccess:
return nil
}
// An identity explicitly set to "access: untrusted" isn't allowed.
return Unauthorized(accessDenied)
}
2 changes: 1 addition & 1 deletion internals/daemon/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ var API = []*Command{{
POST: v1PostIdentities,
}, {
Path: "/metrics",
ReadAccess: UserAccess{},
ReadAccess: MetricsAccess{},
GET: Metrics,
}}

Expand Down
43 changes: 31 additions & 12 deletions internals/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,12 @@ func formatLabelKey(labels []string, labelValues []string) string {

// Sort labels for consistency
sort.Strings(labelPairs)
return strings.Join(labelPairs, ",")
res := strings.Join(labelPairs, ",")
if res == "" {
// a special key for situations where no labels are used
res = "__empty__"
}
return res
}

// NewCounterVec creates a new counter vector.
Expand Down Expand Up @@ -117,8 +122,8 @@ func (r *MetricsRegistry) newMetricVec(name, help string, labels []string, metri
func (v *MetricVec) WithLabelValues(labelValues ...string) *Metric {
if len(labelValues) != len(v.labels) {
panic(fmt.Errorf(
"%q has %d variable labels named %q but %d values %q were provided",
v,
"%s has %d variable labels named %q but %d values %q were provided",
v.Name,
len(v.labels),
v.labels,
len(labelValues),
Expand Down Expand Up @@ -192,15 +197,29 @@ func (r *MetricsRegistry) GatherMetrics() string {
output += fmt.Sprintf("# HELP %s %s\n", vec.Name, vec.Help)
output += fmt.Sprintf("# TYPE %s %s\n", vec.Name, vec.Type)

for labelKey, metric := range vec.metrics {
switch v := metric.value.(type) {
case int64:
output += fmt.Sprintf("%s{%s} %d\n", vec.Name, labelKey, v)
case float64:
output += fmt.Sprintf("%s{%s} %f\n", vec.Name, labelKey, v)
default:
// Fallback for other types
output += fmt.Sprintf("%s{%s} %v\n", vec.Name, labelKey, v)
if len(vec.labels) == 0 { // Handle metrics without labels
for _, metric := range vec.metrics {
switch v := metric.value.(type) {
case int64:
output += fmt.Sprintf("%s %d\n", vec.Name, v) // No curly braces
case float64:
output += fmt.Sprintf("%s %.2f\n", vec.Name, v) // No curly braces
default:
output += fmt.Sprintf("%s %v\n", vec.Name, v) // Fallback
}
}
} else {
// Handle metrics with labels.
for labelKey, metric := range vec.metrics {
switch v := metric.value.(type) {
case int64:
output += fmt.Sprintf("%s{%s} %d\n", vec.Name, labelKey, v)
case float64:
output += fmt.Sprintf("%s{%s} %.2f\n", vec.Name, labelKey, v)
default:
// Fallback for other types
output += fmt.Sprintf("%s{%s} %v\n", vec.Name, labelKey, v)
}
}
}
}
Expand Down
65 changes: 39 additions & 26 deletions internals/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,53 +33,66 @@ type RegistryTestSuite struct {

func (s *RegistryTestSuite) SetUpTest(c *C) {
s.registry = &MetricsRegistry{
metrics: make(map[string]*Metric),
metricVecs: make(map[string]*MetricVec),
}
}

func (s *RegistryTestSuite) TestCounter(c *C) {
s.registry.NewMetric("test_counter", MetricTypeCounter, "Test counter")
s.registry.IncCounter("test_counter")
s.registry.IncCounter("test_counter")
c.Check(s.registry.metrics["test_counter"].value.(int64), Equals, int64(2))
func (s *RegistryTestSuite) TestCounterWithoutLabels(c *C) {
labels := []string{}
testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", labels)
testCounter.WithLabelValues().Inc()
c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{})].value.(int64), Equals, int64(1))
testCounter.WithLabelValues().Inc()
c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{})].value.(int64), Equals, int64(2))
}

func (s *RegistryTestSuite) TestGauge(c *C) {
s.registry.NewMetric("test_gauge", MetricTypeGauge, "Test gauge")
s.registry.SetGauge("test_gauge", 10)
c.Check(s.registry.metrics["test_gauge"].value.(int64), Equals, int64(10))
s.registry.SetGauge("test_gauge", 20)
c.Check(s.registry.metrics["test_gauge"].value.(int64), Equals, int64(20))
func (s *RegistryTestSuite) TestCounterWithLabels(c *C) {
labels := []string{"operation", "status"}
testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", labels)
testCounter.WithLabelValues("read", "success").Inc()
c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{"read", "success"})].value.(int64), Equals, int64(1))
testCounter.WithLabelValues("write", "fail").Add(2)
c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{"write", "fail"})].value.(int64), Equals, int64(2))
}

func (s *RegistryTestSuite) TestHistogram(c *C) {
s.registry.NewMetric("test_histogram", MetricTypeHistogram, "Test histogram")
s.registry.ObserveHistogram("test_histogram", 1.0)
s.registry.ObserveHistogram("test_histogram", 2.0)
histogramValues := s.registry.metrics["test_histogram"].value.([]float64)
c.Check(len(histogramValues), Equals, 2)
c.Check(histogramValues[0], Equals, 1.0)
c.Check(histogramValues[1], Equals, 2.0)
func (s *RegistryTestSuite) TestGauge(c *C) {
labels := []string{"sensor"}
testGauge := s.registry.NewGaugeVec("test_gauge", "Current value of something", labels)
testGauge.WithLabelValues("temperature").Set(10.0)
c.Check(s.registry.metricVecs["test_gauge"].metrics[formatLabelKey(labels, []string{"temperature"})].value.(float64), Equals, float64(10.0))
testGauge.WithLabelValues("temperature").Set(20.0)
c.Check(s.registry.metricVecs["test_gauge"].metrics[formatLabelKey(labels, []string{"temperature"})].value.(float64), Equals, float64(20.0))
}

func (s *RegistryTestSuite) TestGatherMetrics(c *C) {
s.registry.NewMetric("test_counter", MetricTypeCounter, "Test counter")
s.registry.IncCounter("test_counter")
testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{"operation", "status"})
testCounter.WithLabelValues("read", "success").Inc()
testGauge := s.registry.NewGaugeVec("test_gauge", "Current value of something", []string{"sensor"})
testGauge.WithLabelValues("temperature").Set(10.0)
metricsOutput := s.registry.GatherMetrics()
expectedOutput := "# HELP test_counter Total number of something processed\n# TYPE test_counter counter\ntest_counter{operation=read,status=success} 1\n"
expectedOutput += "# HELP test_gauge Current value of something\n# TYPE test_gauge gauge\ntest_gauge{sensor=temperature} 10.00\n"
c.Check(metricsOutput, Equals, expectedOutput)
}

func (s *RegistryTestSuite) TestGatherMetricsWithoutLabels(c *C) {
testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{})
testCounter.WithLabelValues().Inc()
metricsOutput := s.registry.GatherMetrics()
expectedOutput := "# HELP test_counter Test counter\n# TYPE test_counter counter\ntest_counter 1\n"
expectedOutput := "# HELP test_counter Total number of something processed\n# TYPE test_counter counter\ntest_counter 1\n"
c.Check(metricsOutput, Equals, expectedOutput)
}

func (s *RegistryTestSuite) TestRaceConditions(c *C) {
s.registry.NewMetric("race_counter", MetricTypeCounter, "Race counter")
counter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{})
var wg sync.WaitGroup
for i := 0; i < 1000; i++ {
wg.Add(1)
go func() {
defer wg.Done()
s.registry.IncCounter("race_counter")
counter.WithLabelValues().Inc()
}()
}
wg.Wait()
c.Check(s.registry.metrics["race_counter"].value.(int64), Equals, int64(1000))
c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey([]string{}, []string{})].value.(int64), Equals, int64(1000))
}
Loading
Loading