From f76470c0dbf98fd4230d9365a299aca739103a7d Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Wed, 13 Nov 2024 17:08:26 +0800 Subject: [PATCH 01/41] poc: a metrics module for pebble --- go.mod | 5 +- go.sum | 2 + internals/daemon/api.go | 4 + internals/daemon/api_metrics.go | 35 +++++ internals/daemon/daemon.go | 16 +++ internals/metrics/metrics.go | 211 ++++++++++++++++++++++++++++++ internals/metrics/metrics_test.go | 85 ++++++++++++ 7 files changed, 357 insertions(+), 1 deletion(-) create mode 100644 internals/daemon/api_metrics.go create mode 100644 internals/metrics/metrics.go create mode 100644 internals/metrics/metrics_test.go diff --git a/go.mod b/go.mod index dde8646ac..9c4033752 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/canonical/pebble -go 1.22 +go 1.22.0 + +toolchain go1.23.1 require ( github.com/canonical/go-flags v0.0.0-20230403090104-105d09a091b8 @@ -8,6 +10,7 @@ require ( github.com/gorilla/mux v1.8.1 github.com/gorilla/websocket v1.5.1 github.com/pkg/term v1.1.0 + golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f golang.org/x/sys v0.21.0 golang.org/x/term v0.21.0 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c diff --git a/go.sum b/go.sum index 31493018c..935059e56 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/pkg/term v1.1.0 h1:xIAAdCMh3QIAy+5FrE8Ad8XoDhEU4ufwbaSozViP9kk= github.com/pkg/term v1.1.0/go.mod h1:E25nymQcrSllhX42Ok8MRm1+hyBdHY0dCeiKZ9jpNGw= +golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= +golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak= golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internals/daemon/api.go b/internals/daemon/api.go index e6c6de16c..5c13b91bf 100644 --- a/internals/daemon/api.go +++ b/internals/daemon/api.go @@ -108,6 +108,10 @@ var API = []*Command{{ WriteAccess: AdminAccess{}, GET: v1GetIdentities, POST: v1PostIdentities, +}, { + Path: "/metrics", + ReadAccess: OpenAccess{}, + GET: Metrics, }} var ( diff --git a/internals/daemon/api_metrics.go b/internals/daemon/api_metrics.go new file mode 100644 index 000000000..303dac3df --- /dev/null +++ b/internals/daemon/api_metrics.go @@ -0,0 +1,35 @@ +// Copyright (c) 2024 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package daemon + +import ( + "net/http" + + "github.com/canonical/pebble/internals/metrics" +) + +func Metrics(c *Command, r *http.Request, _ *UserState) Response { + return metricsResponse{} +} + +// metricsResponse is a Response implementation to serve the metrics in a prometheus metrics format. +type metricsResponse struct{} + +func (r metricsResponse) ServeHTTP(w http.ResponseWriter, req *http.Request) { + registry := metrics.GetRegistry() + w.WriteHeader(http.StatusOK) + w.Write([]byte(registry.GatherMetrics())) + +} diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index b39c17975..630529c9c 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -33,9 +33,11 @@ import ( "time" "github.com/gorilla/mux" + "golang.org/x/exp/rand" "gopkg.in/tomb.v2" "github.com/canonical/pebble/internals/logger" + "github.com/canonical/pebble/internals/metrics" "github.com/canonical/pebble/internals/osutil" "github.com/canonical/pebble/internals/osutil/sys" "github.com/canonical/pebble/internals/overlord" @@ -366,6 +368,20 @@ func (d *Daemon) Init() error { } logger.Noticef("Started daemon.") + + registry := metrics.GetRegistry() + registry.NewMetric("my_counter", metrics.MetricTypeCounter, "A simple counter") + // Goroutine to update metrics randomly + go func() { + for { + err := registry.IncCounter("my_counter") // Increment by 1 + if err != nil { + fmt.Println("Error incrementing counter:", err) + } + time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second) // Random sleep between 1 and 5 seconds + } + }() + return nil } diff --git a/internals/metrics/metrics.go b/internals/metrics/metrics.go new file mode 100644 index 000000000..c45d0bd33 --- /dev/null +++ b/internals/metrics/metrics.go @@ -0,0 +1,211 @@ +// Copyright (c) 2024 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package metrics + +import ( + "fmt" + "net/http" + "sync" + "time" + + "github.com/gorilla/mux" + "golang.org/x/exp/rand" +) + +// MetricType models the type of a metric. +type MetricType string + +const ( + MetricTypeCounter MetricType = "counter" + MetricTypeGauge MetricType = "gauge" + MetricTypeHistogram MetricType = "histogram" +) + +// Metric represents a single metric. +type Metric struct { + Name string + Type MetricType + Help string + value interface{} // Can be int64 for counter/gauge, or []float64 for histogram. + mu sync.RWMutex +} + +// MetricsRegistry stores and manages metrics. +type MetricsRegistry struct { + metrics map[string]*Metric + mu sync.RWMutex +} + +// Package-level variable to hold the single registry. +var registry *MetricsRegistry + +// Ensure registry is initialized only once. +var once sync.Once + +// GetRegistry returns the singleton MetricsRegistry instance. +func GetRegistry() *MetricsRegistry { + once.Do(func() { + registry = &MetricsRegistry{ + metrics: make(map[string]*Metric), + } + }) + return registry +} + +// NewMetric registers a new metric. +func (r *MetricsRegistry) NewMetric(name string, metricType MetricType, help string) error { + r.mu.Lock() + defer r.mu.Unlock() + + if _, ok := r.metrics[name]; ok { + return fmt.Errorf("metric with name %s already registered", name) + } + + var value interface{} + switch metricType { + case MetricTypeCounter, MetricTypeGauge: + value = int64(0) + case MetricTypeHistogram: + value = make([]float64, 0) + default: + return fmt.Errorf("invalid metric type: %s", metricType) + } + + r.metrics[name] = &Metric{ + Name: name, + Type: metricType, + Help: help, + value: value, + } + + return nil +} + +// IncCounter increments a counter metric. +func (r *MetricsRegistry) IncCounter(name string) error { + return r.updateMetric(name, MetricTypeCounter, 1) +} + +// SetGauge sets the value of a gauge metric. +func (r *MetricsRegistry) SetGauge(name string, value int64) error { + return r.updateMetric(name, MetricTypeGauge, value) +} + +// ObserveHistogram adds a value to a histogram metric. +func (r *MetricsRegistry) ObserveHistogram(name string, value float64) error { + return r.updateMetric(name, MetricTypeHistogram, value) +} + +// updateMetric updates the value of a metric. +func (r *MetricsRegistry) updateMetric(name string, metricType MetricType, value interface{}) error { + r.mu.RLock() + metric, ok := r.metrics[name] + r.mu.RUnlock() + + if !ok { + return fmt.Errorf("metric with name %s not found", name) + } + + if metric.Type != metricType { + return fmt.Errorf("mismatched metric type for %s", name) + } + + metric.mu.Lock() + defer metric.mu.Unlock() + + switch metricType { + case MetricTypeCounter: + metric.value = metric.value.(int64) + int64(value.(int)) + case MetricTypeGauge: + metric.value = value.(int64) + case MetricTypeHistogram: + metric.value = append(metric.value.([]float64), value.(float64)) + } + + return nil +} + +// GatherMetrics gathers all metrics and formats them in Prometheus exposition format. +func (r *MetricsRegistry) GatherMetrics() string { + r.mu.RLock() + defer r.mu.RUnlock() + + var output string + for _, metric := range r.metrics { + output += fmt.Sprintf("# HELP %s %s\n", metric.Name, metric.Help) + output += fmt.Sprintf("# TYPE %s %s\n", metric.Name, metric.Type) + + switch metric.Type { + case MetricTypeCounter, MetricTypeGauge: + output += fmt.Sprintf("%s %d\n", metric.Name, metric.value.(int64)) + case MetricTypeHistogram: + for _, v := range metric.value.([]float64) { + output += fmt.Sprintf("%s %f\n", metric.Name, v) // Basic histogram representation + } + } + } + + return output +} + +// Example usage +func main() { + // Get the singleton registry + registry := GetRegistry() + + registry.NewMetric("my_counter", MetricTypeCounter, "A simple counter") + registry.NewMetric("my_gauge", MetricTypeGauge, "A simple gauge") + registry.NewMetric("my_histogram", MetricTypeHistogram, "A simple histogram") + + // Goroutine to update metrics randomly + go func() { + for { + // Counter + err := registry.IncCounter("my_counter") // Increment by 1 + if err != nil { + fmt.Println("Error incrementing counter:", err) + } + + // Gauge + gaugeValue := rand.Int63n(100) + err = registry.SetGauge("my_gauge", gaugeValue) + if err != nil { + fmt.Println("Error setting gauge:", err) + } + + // Histogram + histogramValue := rand.Float64() * 10 + err = registry.ObserveHistogram("my_histogram", histogramValue) + if err != nil { + fmt.Println("Error observing histogram:", err) + } + + time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second) // Random sleep between 1 and 5 seconds + } + }() + // Use Gorilla Mux router + router := mux.NewRouter() + router.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte(registry.GatherMetrics())) + }) + + // Serve on port 2112 + fmt.Println("Serving metrics on :2112/metrics") + err := http.ListenAndServe(":2112", router) // Use the router here + if err != nil { + fmt.Println("Error starting server:", err) + } +} diff --git a/internals/metrics/metrics_test.go b/internals/metrics/metrics_test.go new file mode 100644 index 000000000..7de9d6b95 --- /dev/null +++ b/internals/metrics/metrics_test.go @@ -0,0 +1,85 @@ +// Copyright (c) 2024 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package metrics + +import ( + "sync" + "testing" + + . "gopkg.in/check.v1" +) + +// Hook up check.v1 into the "go test" runner +func Test(t *testing.T) { TestingT(t) } + +var _ = Suite(&RegistryTestSuite{}) + +// Test Suite structure +type RegistryTestSuite struct { + registry *MetricsRegistry +} + +func (s *RegistryTestSuite) SetUpTest(c *C) { + s.registry = &MetricsRegistry{ + metrics: make(map[string]*Metric), + } +} + +func (s *RegistryTestSuite) TestCounter(c *C) { + s.registry.NewMetric("test_counter", MetricTypeCounter, "Test counter") + s.registry.IncCounter("test_counter") + s.registry.IncCounter("test_counter") + c.Check(s.registry.metrics["test_counter"].value.(int64), Equals, int64(2)) +} + +func (s *RegistryTestSuite) TestGauge(c *C) { + s.registry.NewMetric("test_gauge", MetricTypeGauge, "Test gauge") + s.registry.SetGauge("test_gauge", 10) + c.Check(s.registry.metrics["test_gauge"].value.(int64), Equals, int64(10)) + s.registry.SetGauge("test_gauge", 20) + c.Check(s.registry.metrics["test_gauge"].value.(int64), Equals, int64(20)) +} + +func (s *RegistryTestSuite) TestHistogram(c *C) { + s.registry.NewMetric("test_histogram", MetricTypeHistogram, "Test histogram") + s.registry.ObserveHistogram("test_histogram", 1.0) + s.registry.ObserveHistogram("test_histogram", 2.0) + histogramValues := s.registry.metrics["test_histogram"].value.([]float64) + c.Check(len(histogramValues), Equals, 2) + c.Check(histogramValues[0], Equals, 1.0) + c.Check(histogramValues[1], Equals, 2.0) +} + +func (s *RegistryTestSuite) TestGatherMetrics(c *C) { + s.registry.NewMetric("test_counter", MetricTypeCounter, "Test counter") + s.registry.IncCounter("test_counter") + metricsOutput := s.registry.GatherMetrics() + expectedOutput := "# HELP test_counter Test counter\n# TYPE test_counter counter\ntest_counter 1\n" + c.Check(metricsOutput, Equals, expectedOutput) +} + +func (s *RegistryTestSuite) TestRaceConditions(c *C) { + s.registry.NewMetric("race_counter", MetricTypeCounter, "Race counter") + var wg sync.WaitGroup + for i := 0; i < 1000; i++ { + wg.Add(1) + go func() { + defer wg.Done() + s.registry.IncCounter("race_counter") + }() + } + wg.Wait() + c.Check(s.registry.metrics["race_counter"].value.(int64), Equals, int64(1000)) +} From 6d8ee599dafe5374f6019b5b126d218303e42d73 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Thu, 14 Nov 2024 11:03:14 +0800 Subject: [PATCH 02/41] chore: undo unnecessary change --- go.mod | 2 -- 1 file changed, 2 deletions(-) diff --git a/go.mod b/go.mod index 9c4033752..d57f54a8f 100644 --- a/go.mod +++ b/go.mod @@ -2,8 +2,6 @@ module github.com/canonical/pebble go 1.22.0 -toolchain go1.23.1 - require ( github.com/canonical/go-flags v0.0.0-20230403090104-105d09a091b8 github.com/canonical/x-go v0.0.0-20230522092633-7947a7587f5b From b4abc9ae285d11a6e5083128d03a2c93cd696ade Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Thu, 14 Nov 2024 11:03:58 +0800 Subject: [PATCH 03/41] chore: undo unnecessary change --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index d57f54a8f..5c9e2bd50 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/canonical/pebble -go 1.22.0 +go 1.22 require ( github.com/canonical/go-flags v0.0.0-20230403090104-105d09a091b8 From a2742767bf6ee81daa48ef961562ec690af1018e Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Thu, 14 Nov 2024 11:48:59 +0800 Subject: [PATCH 04/41] chore: undo unnecessary change --- .fuse_hidden0000020d00000002 | 0 go.mod | 1 - go.sum | 2 -- internals/daemon/daemon.go | 2 +- internals/metrics/metrics.go | 2 +- 5 files changed, 2 insertions(+), 5 deletions(-) create mode 100644 .fuse_hidden0000020d00000002 diff --git a/.fuse_hidden0000020d00000002 b/.fuse_hidden0000020d00000002 new file mode 100644 index 000000000..e69de29bb diff --git a/go.mod b/go.mod index 5c9e2bd50..dde8646ac 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,6 @@ require ( github.com/gorilla/mux v1.8.1 github.com/gorilla/websocket v1.5.1 github.com/pkg/term v1.1.0 - golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f golang.org/x/sys v0.21.0 golang.org/x/term v0.21.0 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c diff --git a/go.sum b/go.sum index 935059e56..31493018c 100644 --- a/go.sum +++ b/go.sum @@ -15,8 +15,6 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/pkg/term v1.1.0 h1:xIAAdCMh3QIAy+5FrE8Ad8XoDhEU4ufwbaSozViP9kk= github.com/pkg/term v1.1.0/go.mod h1:E25nymQcrSllhX42Ok8MRm1+hyBdHY0dCeiKZ9jpNGw= -golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= -golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak= golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index 630529c9c..0397a2cb9 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "io" + "math/rand" "net" "net/http" "os" @@ -33,7 +34,6 @@ import ( "time" "github.com/gorilla/mux" - "golang.org/x/exp/rand" "gopkg.in/tomb.v2" "github.com/canonical/pebble/internals/logger" diff --git a/internals/metrics/metrics.go b/internals/metrics/metrics.go index c45d0bd33..8c81b8907 100644 --- a/internals/metrics/metrics.go +++ b/internals/metrics/metrics.go @@ -16,12 +16,12 @@ package metrics import ( "fmt" + "math/rand" "net/http" "sync" "time" "github.com/gorilla/mux" - "golang.org/x/exp/rand" ) // MetricType models the type of a metric. From a2c07e666c5afbc0728308fcd084ca6f108a6d3c Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Tue, 26 Nov 2024 22:29:08 +0800 Subject: [PATCH 05/41] chore: metrics identity basic auth poc --- client/identities.go | 12 +++++- internals/cli/cmd_identities.go | 3 ++ internals/cli/cmd_run.go | 18 +++++++++ internals/daemon/access.go | 4 +- internals/daemon/api.go | 2 +- internals/daemon/api_metrics.go | 1 - internals/daemon/api_notices.go | 18 ++++----- internals/daemon/daemon.go | 41 +++++++++++++------ internals/overlord/state/identities.go | 55 ++++++++++++++++++++++---- internals/overlord/state/state.go | 24 +++++++++-- 10 files changed, 140 insertions(+), 38 deletions(-) diff --git a/client/identities.go b/client/identities.go index fa8354223..f7b21ca1a 100644 --- a/client/identities.go +++ b/client/identities.go @@ -26,8 +26,9 @@ type Identity struct { Access IdentityAccess `json:"access" yaml:"access"` // One or more of the following type-specific configuration fields must be - // non-nil (currently the only type is "local"). - Local *LocalIdentity `json:"local,omitempty" yaml:"local,omitempty"` + // non-nil (currently the only types are "local" and "basicauth"). + Local *LocalIdentity `json:"local,omitempty" yaml:"local,omitempty"` + BasicAuth *BasicAuthIdentity `json:"basicauth,omitempty" yaml:"basicauth,omitempty"` } // IdentityAccess defines the access level for an identity. @@ -47,6 +48,13 @@ type LocalIdentity struct { UserID *uint32 `json:"user-id" yaml:"user-id"` } +// BasicAuthIdentity holds identity configuration specific to the "basicauth" type +// (for username/password authentication). +type BasicAuthIdentity struct { + Username string `json:"username" yaml:"username"` + Password string `json:"password" yaml:"password"` +} + // For future extension. type IdentitiesOptions struct{} diff --git a/internals/cli/cmd_identities.go b/internals/cli/cmd_identities.go index f170068de..a40f5f885 100644 --- a/internals/cli/cmd_identities.go +++ b/internals/cli/cmd_identities.go @@ -112,6 +112,9 @@ func (cmd *cmdIdentities) writeText(identities map[string]*client.Identity) erro if identity.Local != nil { types = append(types, "local") } + if identity.BasicAuth != nil { + types = append(types, "basicauth") + } sort.Strings(types) if len(types) == 0 { types = append(types, "unknown") diff --git a/internals/cli/cmd_run.go b/internals/cli/cmd_run.go index 453fbb9d3..e933955af 100644 --- a/internals/cli/cmd_run.go +++ b/internals/cli/cmd_run.go @@ -248,6 +248,24 @@ func runDaemon(rcmd *cmdRun, ch chan os.Signal, ready chan<- func()) error { } } + metricsEndpointUsername := os.Getenv("METRICS_ENDPOINT_USERNAME") + metricsEndpointPassword := os.Getenv("METRICS_ENDPOINT_PASSWORD") + if metricsEndpointUsername != "" && metricsEndpointPassword != "" { + identities := map[string]*client.Identity{ + metricsEndpointUsername: &client.Identity{ + Access: client.ReadAccess, + BasicAuth: &client.BasicAuthIdentity{ + Username: metricsEndpointUsername, + Password: metricsEndpointPassword, + }, + }, + } + err = rcmd.client.ReplaceIdentities(identities) + if err != nil { + return fmt.Errorf("cannot replace identities: %w", err) + } + } + // The "stop" channel is used by the "enter" command to stop the daemon. var stop chan struct{} if ready != nil { diff --git a/internals/daemon/access.go b/internals/daemon/access.go index 1837b408d..9bf45b220 100644 --- a/internals/daemon/access.go +++ b/internals/daemon/access.go @@ -47,7 +47,7 @@ func (ac AdminAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) R if user == nil { return Unauthorized(accessDenied) } - if user.Access == state.AdminAccess { + if user.Identity.Access == state.AdminAccess { return nil } // An identity explicitly set to "access: read" or "access: untrusted" isn't allowed. @@ -61,7 +61,7 @@ func (ac UserAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) Re if user == nil { return Unauthorized(accessDenied) } - switch user.Access { + switch user.Identity.Access { case state.ReadAccess, state.AdminAccess: return nil } diff --git a/internals/daemon/api.go b/internals/daemon/api.go index 5c13b91bf..d1789d7a6 100644 --- a/internals/daemon/api.go +++ b/internals/daemon/api.go @@ -110,7 +110,7 @@ var API = []*Command{{ POST: v1PostIdentities, }, { Path: "/metrics", - ReadAccess: OpenAccess{}, + ReadAccess: UserAccess{}, GET: Metrics, }} diff --git a/internals/daemon/api_metrics.go b/internals/daemon/api_metrics.go index 303dac3df..5061c54a9 100644 --- a/internals/daemon/api_metrics.go +++ b/internals/daemon/api_metrics.go @@ -31,5 +31,4 @@ func (r metricsResponse) ServeHTTP(w http.ResponseWriter, req *http.Request) { registry := metrics.GetRegistry() w.WriteHeader(http.StatusOK) w.Write([]byte(registry.GatherMetrics())) - } diff --git a/internals/daemon/api_notices.go b/internals/daemon/api_notices.go index 8cd274508..35b125454 100644 --- a/internals/daemon/api_notices.go +++ b/internals/daemon/api_notices.go @@ -45,16 +45,16 @@ type addedNotice struct { func v1GetNotices(c *Command, r *http.Request, user *UserState) Response { // TODO(benhoyt): the design of notices presumes UIDs; if in future when we // support identities that aren't UID based, we'll need to fix this. - if user == nil || user.UID == nil { + if user == nil || user.Identity.Local == nil { return Forbidden("cannot determine UID of request, so cannot retrieve notices") } // By default, return notices with the request UID and public notices. - userID := user.UID + userID := &user.Identity.Local.UserID query := r.URL.Query() if len(query["user-id"]) > 0 { - if user.Access != state.AdminAccess { + if user.Identity.Access != state.AdminAccess { return Forbidden(`only admins may use the "user-id" filter`) } var err error @@ -65,7 +65,7 @@ func v1GetNotices(c *Command, r *http.Request, user *UserState) Response { } if len(query["users"]) > 0 { - if user.Access != state.AdminAccess { + if user.Identity.Access != state.AdminAccess { return Forbidden(`only admins may use the "users" filter`) } if len(query["user-id"]) > 0 { @@ -179,7 +179,7 @@ func sanitizeTypesFilter(queryTypes []string) ([]state.NoticeType, error) { } func v1PostNotices(c *Command, r *http.Request, user *UserState) Response { - if user == nil || user.UID == nil { + if user == nil || user.Identity.Local == nil { return Forbidden("cannot determine UID of request, so cannot create notice") } @@ -228,7 +228,7 @@ func v1PostNotices(c *Command, r *http.Request, user *UserState) Response { st.Lock() defer st.Unlock() - noticeId, err := st.AddNotice(user.UID, state.CustomNotice, payload.Key, &state.AddNoticeOptions{ + noticeId, err := st.AddNotice(&user.Identity.Local.UserID, state.CustomNotice, payload.Key, &state.AddNoticeOptions{ Data: data, RepeatAfter: repeatAfter, }) @@ -240,7 +240,7 @@ func v1PostNotices(c *Command, r *http.Request, user *UserState) Response { } func v1GetNotice(c *Command, r *http.Request, user *UserState) Response { - if user == nil || user.UID == nil { + if user == nil || user.Identity.Local == nil { return Forbidden("cannot determine UID of request, so cannot retrieve notice") } noticeID := muxVars(r)["id"] @@ -263,10 +263,10 @@ func noticeViewableByUser(notice *state.Notice, user *UserState) bool { // Notice has no UID, so it's viewable by any user (with a UID). return true } - if user.Access == state.AdminAccess { + if user.Identity.Access == state.AdminAccess { // User is admin, they can view anything. return true } // Otherwise user's UID must match notice's UID. - return *user.UID == userID + return user.Identity.Local.UserID == userID } diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index 0397a2cb9..39b91617d 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -117,8 +117,9 @@ type Daemon struct { // UserState represents the state of an authenticated API user. type UserState struct { - Access state.IdentityAccess - UID *uint32 + // Access state.IdentityAccess + // UID *uint32 + Identity *state.Identity } // A ResponseFunc handles one of the individual verbs for a method @@ -148,22 +149,21 @@ const ( accessForbidden ) -func userFromRequest(st *state.State, r *http.Request, ucred *Ucrednet) (*UserState, error) { - if ucred == nil { - // No ucred details, no UserState. Currently, "local" (ucred-based) is - // the only type of identity we support. - return nil, nil +func userFromRequest(st *state.State, r *http.Request, ucred *Ucrednet, username, password string) (*UserState, error) { + var userID *uint32 + if ucred != nil { + userID = &ucred.Uid } st.Lock() - identity := st.IdentityFromInputs(&ucred.Uid) + identity := st.IdentityFromInputs(userID, username, password) st.Unlock() if identity == nil { // No identity that matches these inputs (for now, just UID). return nil, nil } - return &UserState{Access: identity.Access, UID: &ucred.Uid}, nil + return &UserState{Identity: identity}, nil } func (d *Daemon) Overlord() *overlord.Overlord { @@ -214,7 +214,8 @@ func (c *Command) ServeHTTP(w http.ResponseWriter, r *http.Request) { // not good: https://github.com/canonical/pebble/pull/369 var user *UserState if _, isOpen := access.(OpenAccess); !isOpen { - user, err = userFromRequest(c.d.state, r, ucred) + basicAuthUsername, basicAuthPassword, _ := r.BasicAuth() + user, err = userFromRequest(c.d.state, r, ucred, basicAuthUsername, basicAuthPassword) if err != nil { Forbidden("forbidden").ServeHTTP(w, r) return @@ -225,10 +226,26 @@ func (c *Command) ServeHTTP(w http.ResponseWriter, r *http.Request) { if user == nil && ucred != nil { if ucred.Uid == 0 || ucred.Uid == uint32(os.Getuid()) { // Admin if UID is 0 (root) or the UID the daemon is running as. - user = &UserState{Access: state.AdminAccess, UID: &ucred.Uid} + // user = &UserState{Access: state.AdminAccess, UID: &ucred.Uid} + user = &UserState{ + Identity: &state.Identity{ + Access: state.AdminAccess, + Local: &state.LocalIdentity{ + UserID: ucred.Uid, + }, + }, + } } else { // Regular read access if any other local UID. - user = &UserState{Access: state.ReadAccess, UID: &ucred.Uid} + // user = &UserState{Access: state.ReadAccess, UID: &ucred.Uid} + user = &UserState{ + Identity: &state.Identity{ + Access: state.ReadAccess, + Local: &state.LocalIdentity{ + UserID: ucred.Uid, + }, + }, + } } } diff --git a/internals/overlord/state/identities.go b/internals/overlord/state/identities.go index 609b0d922..953d21a9f 100644 --- a/internals/overlord/state/identities.go +++ b/internals/overlord/state/identities.go @@ -28,8 +28,9 @@ type Identity struct { Access IdentityAccess // One or more of the following type-specific configuration fields must be - // non-nil (currently the only type is "local"). - Local *LocalIdentity + // non-nil (currently the only types are "local" and "basicauth"). + Local *LocalIdentity + BasicAuth *BasicAuthIdentity } // IdentityAccess defines the access level for an identity. @@ -47,6 +48,13 @@ type LocalIdentity struct { UserID uint32 } +// BasicAuthIdentity holds identity configuration specific to the "basicauth" type +// (for username/password authentication). +type BasicAuthIdentity struct { + Username string + Password string // Note: In a real application, store a password hash, not the plaintext password. +} + // validate checks that the identity is valid, returning an error if not. func (d *Identity) validate() error { if d == nil { @@ -65,9 +73,18 @@ func (d *Identity) validate() error { switch { case d.Local != nil: + return nil + case d.BasicAuth != nil: + if d.BasicAuth.Username == "" { + return errors.New("basicauth identity must specify username") + } + if d.BasicAuth.Password == "" { + return errors.New("basicauth identity must specify password") + } + return nil default: - return errors.New(`identity must have at least one type ("local")`) + return errors.New(`identity must have at least one type ("local or "basicauth")`) } } @@ -75,19 +92,30 @@ func (d *Identity) validate() error { // for API responses) excludes secrets. The marshalledIdentity type is used // for saving secrets in state. type apiIdentity struct { - Access string `json:"access"` - Local *apiLocalIdentity `json:"local,omitempty"` + Access string `json:"access"` + Local *apiLocalIdentity `json:"local,omitempty"` + BasicAuth *apiBasicAuthIdentity `json:"basicauth,omitempty"` } type apiLocalIdentity struct { UserID *uint32 `json:"user-id"` } +type apiBasicAuthIdentity struct { + Username string `json:"username"` + Password string `json:"password"` +} + // IMPORTANT NOTE: be sure to exclude secrets when adding to this! func (d *Identity) MarshalJSON() ([]byte, error) { ai := apiIdentity{ Access: string(d.Access), - Local: &apiLocalIdentity{UserID: &d.Local.UserID}, + } + if d.Local != nil { + ai.Local = &apiLocalIdentity{UserID: &d.Local.UserID} + } + if d.BasicAuth != nil { + ai.BasicAuth = &apiBasicAuthIdentity{Username: d.BasicAuth.Username, Password: d.BasicAuth.Password} } return json.Marshal(ai) } @@ -108,6 +136,14 @@ func (d *Identity) UnmarshalJSON(data []byte) error { return errors.New("local identity must specify user-id") } identity.Local = &LocalIdentity{UserID: *ai.Local.UserID} + case ai.BasicAuth != nil: + if ai.BasicAuth.Username == "" { + return errors.New("basicauth identity must specify username") + } + if ai.BasicAuth.Password == "" { + return errors.New("basicauth identity must specify password") + } + identity.BasicAuth = &BasicAuthIdentity{Username: ai.BasicAuth.Username, Password: ai.BasicAuth.Password} } // Perform additional validation using the local Identity type. err = identity.validate() @@ -266,7 +302,7 @@ func (s *State) Identities() map[string]*Identity { // IdentityFromInputs returns an identity with the given inputs, or nil // if there is none. -func (s *State) IdentityFromInputs(userID *uint32) *Identity { +func (s *State) IdentityFromInputs(userID *uint32, username, password string) *Identity { s.reading() for _, identity := range s.identities { @@ -275,6 +311,11 @@ func (s *State) IdentityFromInputs(userID *uint32) *Identity { if identity.Local.UserID == *userID { return identity } + case identity.BasicAuth != nil && username != "" && password != "": + // In real code, compare password hashes, not plain text passwords. + if identity.BasicAuth.Username == username && identity.BasicAuth.Password == password { + return identity + } } } return nil diff --git a/internals/overlord/state/state.go b/internals/overlord/state/state.go index 5d339aabf..7f8d358a9 100644 --- a/internals/overlord/state/state.go +++ b/internals/overlord/state/state.go @@ -172,14 +172,20 @@ type marshalledState struct { // marshalledIdentity is used specifically for marshalling to the state // database file. Unlike apiIdentity, it should include secrets. type marshalledIdentity struct { - Access string `json:"access"` - Local *marshalledLocalIdentity `json:"local,omitempty"` + Access string `json:"access"` + Local *marshalledLocalIdentity `json:"local,omitempty"` + BasicAuth *marshalledBasicAuthIdentity `json:"basicauth,omitempty"` } type marshalledLocalIdentity struct { UserID uint32 `json:"user-id"` } +type marshalledBasicAuthIdentity struct { + Username string `json:"username"` + Password string `json:"password"` +} + // MarshalJSON makes State a json.Marshaller func (s *State) MarshalJSON() ([]byte, error) { s.reading() @@ -202,7 +208,12 @@ func (s *State) marshalledIdentities() map[string]*marshalledIdentity { for name, identity := range s.identities { marshalled[name] = &marshalledIdentity{ Access: string(identity.Access), - Local: &marshalledLocalIdentity{UserID: identity.Local.UserID}, + } + if identity.Local != nil { + marshalled[name].Local = &marshalledLocalIdentity{UserID: identity.Local.UserID} + } + if identity.BasicAuth != nil { + marshalled[name].BasicAuth = &marshalledBasicAuthIdentity{Username: identity.BasicAuth.Username, Password: identity.BasicAuth.Password} } } return marshalled @@ -242,7 +253,12 @@ func (s *State) unmarshalIdentities(marshalled map[string]*marshalledIdentity) { s.identities[name] = &Identity{ Name: name, Access: IdentityAccess(mi.Access), - Local: &LocalIdentity{UserID: mi.Local.UserID}, + } + if mi.Local != nil { + s.identities[name].Local = &LocalIdentity{UserID: mi.Local.UserID} + } + if mi.BasicAuth != nil { + s.identities[name].BasicAuth = &BasicAuthIdentity{Username: mi.BasicAuth.Username, Password: mi.BasicAuth.Password} } } } From 4ebb633795752b495f8c5c5ca7fe36dd9ea9563d Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Wed, 27 Nov 2024 20:06:33 +0800 Subject: [PATCH 06/41] chore: a poc for metrics with labels --- internals/daemon/daemon.go | 12 +- internals/metrics/metrics.go | 231 ++++++++++++++++++++--------------- 2 files changed, 141 insertions(+), 102 deletions(-) diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index 39b91617d..d7c5e6250 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -387,14 +387,16 @@ func (d *Daemon) Init() error { logger.Noticef("Started daemon.") registry := metrics.GetRegistry() - registry.NewMetric("my_counter", metrics.MetricTypeCounter, "A simple counter") + myCounter := registry.NewCounterVec("my_counter", "Total number of something processed.", []string{"operation", "status"}) + myGauge := registry.NewGaugeVec("my_gauge", "Current value of something.", []string{"sensor"}) // Goroutine to update metrics randomly go func() { for { - err := registry.IncCounter("my_counter") // Increment by 1 - if err != nil { - fmt.Println("Error incrementing counter:", err) - } + myCounter.WithLabelValues("read", "success").Inc() + myCounter.WithLabelValues("write", "success").Add(2) + myCounter.WithLabelValues("read", "failed").Inc() + myGauge.WithLabelValues("temperature").Set(20.0 + rand.Float64()*10.0) + time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second) // Random sleep between 1 and 5 seconds } }() diff --git a/internals/metrics/metrics.go b/internals/metrics/metrics.go index 8c81b8907..4c654b2e9 100644 --- a/internals/metrics/metrics.go +++ b/internals/metrics/metrics.go @@ -18,6 +18,8 @@ import ( "fmt" "math/rand" "net/http" + "sort" + "strings" "sync" "time" @@ -28,24 +30,31 @@ import ( type MetricType string const ( - MetricTypeCounter MetricType = "counter" - MetricTypeGauge MetricType = "gauge" - MetricTypeHistogram MetricType = "histogram" + MetricTypeCounter MetricType = "counter" + MetricTypeGauge MetricType = "gauge" ) -// Metric represents a single metric. +// MetricVec represents a collection of metrics with the same name and help but different label values. +type MetricVec struct { + Name string + Help string + Type MetricType + metrics map[string]*Metric // Key is the label values string (e.g., "region=foo,service=bar") + labels []string // Label names (e.g., ["region", "service"]) + mu sync.RWMutex +} + +// Metric represents a single metric within a MetricVec. type Metric struct { - Name string - Type MetricType - Help string - value interface{} // Can be int64 for counter/gauge, or []float64 for histogram. - mu sync.RWMutex + LabelValues map[string]string // Label values for this metric + value interface{} // Can be int64 for counter/gauge, or []float64 for histogram. + mu sync.RWMutex // Mutex for individual metric } -// MetricsRegistry stores and manages metrics. +// MetricsRegistry stores and manages metric vectors. type MetricsRegistry struct { - metrics map[string]*Metric - mu sync.RWMutex + metricVecs map[string]*MetricVec + mu sync.RWMutex } // Package-level variable to hold the single registry. @@ -58,101 +67,140 @@ var once sync.Once func GetRegistry() *MetricsRegistry { once.Do(func() { registry = &MetricsRegistry{ - metrics: make(map[string]*Metric), + metricVecs: make(map[string]*MetricVec), } }) return registry } -// NewMetric registers a new metric. -func (r *MetricsRegistry) NewMetric(name string, metricType MetricType, help string) error { - r.mu.Lock() - defer r.mu.Unlock() - - if _, ok := r.metrics[name]; ok { - return fmt.Errorf("metric with name %s already registered", name) +func formatLabelKey(labels []string, labelValues []string) string { + labelPairs := make([]string, len(labels)) + for i := range labels { + labelPairs[i] = labels[i] + "=" + labelValues[i] } - var value interface{} - switch metricType { - case MetricTypeCounter, MetricTypeGauge: - value = int64(0) - case MetricTypeHistogram: - value = make([]float64, 0) - default: - return fmt.Errorf("invalid metric type: %s", metricType) - } - - r.metrics[name] = &Metric{ - Name: name, - Type: metricType, - Help: help, - value: value, - } - - return nil + // Sort labels for consistency + sort.Strings(labelPairs) + return strings.Join(labelPairs, ",") } -// IncCounter increments a counter metric. -func (r *MetricsRegistry) IncCounter(name string) error { - return r.updateMetric(name, MetricTypeCounter, 1) +// NewCounterVec creates a new counter vector. +func (r *MetricsRegistry) NewCounterVec(name, help string, labels []string) *MetricVec { + return r.newMetricVec(name, help, labels, MetricTypeCounter) } -// SetGauge sets the value of a gauge metric. -func (r *MetricsRegistry) SetGauge(name string, value int64) error { - return r.updateMetric(name, MetricTypeGauge, value) +// NewGaugeVec creates a new gauge vector. +func (r *MetricsRegistry) NewGaugeVec(name, help string, labels []string) *MetricVec { + return r.newMetricVec(name, help, labels, MetricTypeGauge) } -// ObserveHistogram adds a value to a histogram metric. -func (r *MetricsRegistry) ObserveHistogram(name string, value float64) error { - return r.updateMetric(name, MetricTypeHistogram, value) +// newMetricVec is a helper function to create a new metric vector of any type. +func (r *MetricsRegistry) newMetricVec(name, help string, labels []string, metricType MetricType) *MetricVec { + r.mu.Lock() + defer r.mu.Unlock() + + if _, ok := r.metricVecs[name]; ok { + panic(fmt.Sprintf("metric with name %s already registered", name)) + } + vec := &MetricVec{ + Name: name, + Help: help, + Type: metricType, + metrics: make(map[string]*Metric), + labels: labels, + } + r.metricVecs[name] = vec + return vec } -// updateMetric updates the value of a metric. -func (r *MetricsRegistry) updateMetric(name string, metricType MetricType, value interface{}) error { - r.mu.RLock() - metric, ok := r.metrics[name] - r.mu.RUnlock() +// WithLabelValues gets or creates a metric with the specified label values. +func (v *MetricVec) WithLabelValues(labelValues ...string) *Metric { + if len(labelValues) != len(v.labels) { + panic(fmt.Errorf( + "%q has %d variable labels named %q but %d values %q were provided", + v, + len(v.labels), + v.labels, + len(labelValues), + labelValues, + )) + } + + labelKey := formatLabelKey(v.labels, labelValues) + + v.mu.RLock() + metric, ok := v.metrics[labelKey] + v.mu.RUnlock() if !ok { - return fmt.Errorf("metric with name %s not found", name) + v.mu.Lock() + defer v.mu.Unlock() + // Double check locking + metric, ok = v.metrics[labelKey] + + if !ok { + metric = &Metric{ + LabelValues: make(map[string]string), + value: int64(0), + } + for i, label := range v.labels { + metric.LabelValues[label] = labelValues[i] + } + v.metrics[labelKey] = metric + } } - if metric.Type != metricType { - return fmt.Errorf("mismatched metric type for %s", name) - } + return metric +} - metric.mu.Lock() - defer metric.mu.Unlock() +// Inc increments the counter. +func (m *Metric) Inc() { + m.mu.Lock() + defer m.mu.Unlock() + m.value = m.value.(int64) + 1 +} + +// Add adds the given value to the counter. +func (m *Metric) Add(value int64) { + m.mu.Lock() + defer m.mu.Unlock() - switch metricType { - case MetricTypeCounter: - metric.value = metric.value.(int64) + int64(value.(int)) - case MetricTypeGauge: - metric.value = value.(int64) - case MetricTypeHistogram: - metric.value = append(metric.value.([]float64), value.(float64)) + switch v := m.value.(type) { + case int64: + m.value = v + value + default: + // if the metric is not a counter (e.g., a gauge) + panic(fmt.Errorf("add called on a non-counter metric: %T", m.value)) } +} - return nil +// Set sets the gauge value. +func (m *Metric) Set(value float64) { + m.mu.Lock() + defer m.mu.Unlock() + m.value = value } -// GatherMetrics gathers all metrics and formats them in Prometheus exposition format. +// GatherMetrics function (modified slightly) func (r *MetricsRegistry) GatherMetrics() string { r.mu.RLock() defer r.mu.RUnlock() var output string - for _, metric := range r.metrics { - output += fmt.Sprintf("# HELP %s %s\n", metric.Name, metric.Help) - output += fmt.Sprintf("# TYPE %s %s\n", metric.Name, metric.Type) - - switch metric.Type { - case MetricTypeCounter, MetricTypeGauge: - output += fmt.Sprintf("%s %d\n", metric.Name, metric.value.(int64)) - case MetricTypeHistogram: - for _, v := range metric.value.([]float64) { - output += fmt.Sprintf("%s %f\n", metric.Name, v) // Basic histogram representation + + for _, vec := range r.metricVecs { + output += fmt.Sprintf("# HELP %s %s\n", vec.Name, vec.Help) + output += fmt.Sprintf("# TYPE %s %s\n", vec.Name, vec.Type) + + for labelKey, metric := range vec.metrics { + switch v := metric.value.(type) { + case int64: + output += fmt.Sprintf("%s{%s} %d\n", vec.Name, labelKey, v) + case float64: + output += fmt.Sprintf("%s{%s} %f\n", vec.Name, labelKey, v) + default: + // Fallback for other types + output += fmt.Sprintf("%s{%s} %v\n", vec.Name, labelKey, v) } } } @@ -165,32 +213,21 @@ func main() { // Get the singleton registry registry := GetRegistry() - registry.NewMetric("my_counter", MetricTypeCounter, "A simple counter") - registry.NewMetric("my_gauge", MetricTypeGauge, "A simple gauge") - registry.NewMetric("my_histogram", MetricTypeHistogram, "A simple histogram") + myCounter := registry.NewCounterVec("my_counter", "Total number of something processed.", []string{"operation", "status"}) + myGauge := registry.NewGaugeVec("my_gauge", "Current value of something.", []string{"sensor"}) // Goroutine to update metrics randomly go func() { for { - // Counter - err := registry.IncCounter("my_counter") // Increment by 1 - if err != nil { - fmt.Println("Error incrementing counter:", err) - } + // Use like prometheus client library. - // Gauge - gaugeValue := rand.Int63n(100) - err = registry.SetGauge("my_gauge", gaugeValue) - if err != nil { - fmt.Println("Error setting gauge:", err) - } + // counter + myCounter.WithLabelValues("read", "success").Inc() + myCounter.WithLabelValues("write", "success").Add(2) + myCounter.WithLabelValues("read", "failed").Inc() - // Histogram - histogramValue := rand.Float64() * 10 - err = registry.ObserveHistogram("my_histogram", histogramValue) - if err != nil { - fmt.Println("Error observing histogram:", err) - } + // gauge + myGauge.WithLabelValues("temperature").Set(20.0 + rand.Float64()*10.0) time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second) // Random sleep between 1 and 5 seconds } From 7468b95bb2f4bed8818e9ea6d567e075b778beaa Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Thu, 28 Nov 2024 11:30:04 +0800 Subject: [PATCH 07/41] poc: remove adding identities using env vars according to comment in the spec --- internals/cli/cmd_run.go | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/internals/cli/cmd_run.go b/internals/cli/cmd_run.go index e933955af..453fbb9d3 100644 --- a/internals/cli/cmd_run.go +++ b/internals/cli/cmd_run.go @@ -248,24 +248,6 @@ func runDaemon(rcmd *cmdRun, ch chan os.Signal, ready chan<- func()) error { } } - metricsEndpointUsername := os.Getenv("METRICS_ENDPOINT_USERNAME") - metricsEndpointPassword := os.Getenv("METRICS_ENDPOINT_PASSWORD") - if metricsEndpointUsername != "" && metricsEndpointPassword != "" { - identities := map[string]*client.Identity{ - metricsEndpointUsername: &client.Identity{ - Access: client.ReadAccess, - BasicAuth: &client.BasicAuthIdentity{ - Username: metricsEndpointUsername, - Password: metricsEndpointPassword, - }, - }, - } - err = rcmd.client.ReplaceIdentities(identities) - if err != nil { - return fmt.Errorf("cannot replace identities: %w", err) - } - } - // The "stop" channel is used by the "enter" command to stop the daemon. var stop chan struct{} if ready != nil { From 790a8f983678e2b3e84205de3ca6f3dafa86830f Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Thu, 28 Nov 2024 17:09:42 +0800 Subject: [PATCH 08/41] chore: update tests for the metrics lib poc --- internals/metrics/metrics.go | 43 ++++++++++++++------ internals/metrics/metrics_test.go | 65 ++++++++++++++++++------------- 2 files changed, 70 insertions(+), 38 deletions(-) diff --git a/internals/metrics/metrics.go b/internals/metrics/metrics.go index 4c654b2e9..3de81f62e 100644 --- a/internals/metrics/metrics.go +++ b/internals/metrics/metrics.go @@ -81,7 +81,12 @@ func formatLabelKey(labels []string, labelValues []string) string { // Sort labels for consistency sort.Strings(labelPairs) - return strings.Join(labelPairs, ",") + res := strings.Join(labelPairs, ",") + if res == "" { + // a special key for situations where no labels are used + res = "__empty__" + } + return res } // NewCounterVec creates a new counter vector. @@ -117,8 +122,8 @@ func (r *MetricsRegistry) newMetricVec(name, help string, labels []string, metri func (v *MetricVec) WithLabelValues(labelValues ...string) *Metric { if len(labelValues) != len(v.labels) { panic(fmt.Errorf( - "%q has %d variable labels named %q but %d values %q were provided", - v, + "%s has %d variable labels named %q but %d values %q were provided", + v.Name, len(v.labels), v.labels, len(labelValues), @@ -192,15 +197,29 @@ func (r *MetricsRegistry) GatherMetrics() string { output += fmt.Sprintf("# HELP %s %s\n", vec.Name, vec.Help) output += fmt.Sprintf("# TYPE %s %s\n", vec.Name, vec.Type) - for labelKey, metric := range vec.metrics { - switch v := metric.value.(type) { - case int64: - output += fmt.Sprintf("%s{%s} %d\n", vec.Name, labelKey, v) - case float64: - output += fmt.Sprintf("%s{%s} %f\n", vec.Name, labelKey, v) - default: - // Fallback for other types - output += fmt.Sprintf("%s{%s} %v\n", vec.Name, labelKey, v) + if len(vec.labels) == 0 { // Handle metrics without labels + for _, metric := range vec.metrics { + switch v := metric.value.(type) { + case int64: + output += fmt.Sprintf("%s %d\n", vec.Name, v) // No curly braces + case float64: + output += fmt.Sprintf("%s %.2f\n", vec.Name, v) // No curly braces + default: + output += fmt.Sprintf("%s %v\n", vec.Name, v) // Fallback + } + } + } else { + // Handle metrics with labels. + for labelKey, metric := range vec.metrics { + switch v := metric.value.(type) { + case int64: + output += fmt.Sprintf("%s{%s} %d\n", vec.Name, labelKey, v) + case float64: + output += fmt.Sprintf("%s{%s} %.2f\n", vec.Name, labelKey, v) + default: + // Fallback for other types + output += fmt.Sprintf("%s{%s} %v\n", vec.Name, labelKey, v) + } } } } diff --git a/internals/metrics/metrics_test.go b/internals/metrics/metrics_test.go index 7de9d6b95..96bd9baaa 100644 --- a/internals/metrics/metrics_test.go +++ b/internals/metrics/metrics_test.go @@ -33,53 +33,66 @@ type RegistryTestSuite struct { func (s *RegistryTestSuite) SetUpTest(c *C) { s.registry = &MetricsRegistry{ - metrics: make(map[string]*Metric), + metricVecs: make(map[string]*MetricVec), } } -func (s *RegistryTestSuite) TestCounter(c *C) { - s.registry.NewMetric("test_counter", MetricTypeCounter, "Test counter") - s.registry.IncCounter("test_counter") - s.registry.IncCounter("test_counter") - c.Check(s.registry.metrics["test_counter"].value.(int64), Equals, int64(2)) +func (s *RegistryTestSuite) TestCounterWithoutLabels(c *C) { + labels := []string{} + testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", labels) + testCounter.WithLabelValues().Inc() + c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{})].value.(int64), Equals, int64(1)) + testCounter.WithLabelValues().Inc() + c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{})].value.(int64), Equals, int64(2)) } -func (s *RegistryTestSuite) TestGauge(c *C) { - s.registry.NewMetric("test_gauge", MetricTypeGauge, "Test gauge") - s.registry.SetGauge("test_gauge", 10) - c.Check(s.registry.metrics["test_gauge"].value.(int64), Equals, int64(10)) - s.registry.SetGauge("test_gauge", 20) - c.Check(s.registry.metrics["test_gauge"].value.(int64), Equals, int64(20)) +func (s *RegistryTestSuite) TestCounterWithLabels(c *C) { + labels := []string{"operation", "status"} + testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", labels) + testCounter.WithLabelValues("read", "success").Inc() + c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{"read", "success"})].value.(int64), Equals, int64(1)) + testCounter.WithLabelValues("write", "fail").Add(2) + c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{"write", "fail"})].value.(int64), Equals, int64(2)) } -func (s *RegistryTestSuite) TestHistogram(c *C) { - s.registry.NewMetric("test_histogram", MetricTypeHistogram, "Test histogram") - s.registry.ObserveHistogram("test_histogram", 1.0) - s.registry.ObserveHistogram("test_histogram", 2.0) - histogramValues := s.registry.metrics["test_histogram"].value.([]float64) - c.Check(len(histogramValues), Equals, 2) - c.Check(histogramValues[0], Equals, 1.0) - c.Check(histogramValues[1], Equals, 2.0) +func (s *RegistryTestSuite) TestGauge(c *C) { + labels := []string{"sensor"} + testGauge := s.registry.NewGaugeVec("test_gauge", "Current value of something", labels) + testGauge.WithLabelValues("temperature").Set(10.0) + c.Check(s.registry.metricVecs["test_gauge"].metrics[formatLabelKey(labels, []string{"temperature"})].value.(float64), Equals, float64(10.0)) + testGauge.WithLabelValues("temperature").Set(20.0) + c.Check(s.registry.metricVecs["test_gauge"].metrics[formatLabelKey(labels, []string{"temperature"})].value.(float64), Equals, float64(20.0)) } func (s *RegistryTestSuite) TestGatherMetrics(c *C) { - s.registry.NewMetric("test_counter", MetricTypeCounter, "Test counter") - s.registry.IncCounter("test_counter") + testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{"operation", "status"}) + testCounter.WithLabelValues("read", "success").Inc() + testGauge := s.registry.NewGaugeVec("test_gauge", "Current value of something", []string{"sensor"}) + testGauge.WithLabelValues("temperature").Set(10.0) + metricsOutput := s.registry.GatherMetrics() + expectedOutput := "# HELP test_counter Total number of something processed\n# TYPE test_counter counter\ntest_counter{operation=read,status=success} 1\n" + expectedOutput += "# HELP test_gauge Current value of something\n# TYPE test_gauge gauge\ntest_gauge{sensor=temperature} 10.00\n" + c.Check(metricsOutput, Equals, expectedOutput) +} + +func (s *RegistryTestSuite) TestGatherMetricsWithoutLabels(c *C) { + testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{}) + testCounter.WithLabelValues().Inc() metricsOutput := s.registry.GatherMetrics() - expectedOutput := "# HELP test_counter Test counter\n# TYPE test_counter counter\ntest_counter 1\n" + expectedOutput := "# HELP test_counter Total number of something processed\n# TYPE test_counter counter\ntest_counter 1\n" c.Check(metricsOutput, Equals, expectedOutput) } func (s *RegistryTestSuite) TestRaceConditions(c *C) { - s.registry.NewMetric("race_counter", MetricTypeCounter, "Race counter") + counter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{}) var wg sync.WaitGroup for i := 0; i < 1000; i++ { wg.Add(1) go func() { defer wg.Done() - s.registry.IncCounter("race_counter") + counter.WithLabelValues().Inc() }() } wg.Wait() - c.Check(s.registry.metrics["race_counter"].value.(int64), Equals, int64(1000)) + c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey([]string{}, []string{})].value.(int64), Equals, int64(1000)) } From 272005b6a6739b9a8279c7bda8b18cf3d34c6f98 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Mon, 9 Dec 2024 20:02:18 +0800 Subject: [PATCH 09/41] chore: refactor identities and access according to spec review --- .fuse_hidden0000020d00000002 | 0 client/identities.go | 11 ++-- internals/cli/cmd_identities.go | 4 +- internals/daemon/access.go | 15 ++++++ internals/daemon/api.go | 2 +- internals/overlord/state/identities.go | 70 ++++++++++++++------------ internals/overlord/state/state.go | 17 +++---- 7 files changed, 68 insertions(+), 51 deletions(-) delete mode 100644 .fuse_hidden0000020d00000002 diff --git a/.fuse_hidden0000020d00000002 b/.fuse_hidden0000020d00000002 deleted file mode 100644 index e69de29bb..000000000 diff --git a/client/identities.go b/client/identities.go index f7b21ca1a..fc1fb6e10 100644 --- a/client/identities.go +++ b/client/identities.go @@ -26,9 +26,9 @@ type Identity struct { Access IdentityAccess `json:"access" yaml:"access"` // One or more of the following type-specific configuration fields must be - // non-nil (currently the only types are "local" and "basicauth"). - Local *LocalIdentity `json:"local,omitempty" yaml:"local,omitempty"` - BasicAuth *BasicAuthIdentity `json:"basicauth,omitempty" yaml:"basicauth,omitempty"` + // non-nil (currently the only types are "local" and "basic"). + Local *LocalIdentity `json:"local,omitempty" yaml:"local,omitempty"` + Basic *BasicIdentity `json:"basic,omitempty" yaml:"basic,omitempty"` } // IdentityAccess defines the access level for an identity. @@ -48,10 +48,9 @@ type LocalIdentity struct { UserID *uint32 `json:"user-id" yaml:"user-id"` } -// BasicAuthIdentity holds identity configuration specific to the "basicauth" type +// BasicIdentity holds identity configuration specific to the "basic" type // (for username/password authentication). -type BasicAuthIdentity struct { - Username string `json:"username" yaml:"username"` +type BasicIdentity struct { Password string `json:"password" yaml:"password"` } diff --git a/internals/cli/cmd_identities.go b/internals/cli/cmd_identities.go index a40f5f885..ec78cfe56 100644 --- a/internals/cli/cmd_identities.go +++ b/internals/cli/cmd_identities.go @@ -112,8 +112,8 @@ func (cmd *cmdIdentities) writeText(identities map[string]*client.Identity) erro if identity.Local != nil { types = append(types, "local") } - if identity.BasicAuth != nil { - types = append(types, "basicauth") + if identity.Basic != nil { + types = append(types, "basic") } sort.Strings(types) if len(types) == 0 { diff --git a/internals/daemon/access.go b/internals/daemon/access.go index 9bf45b220..8f230c980 100644 --- a/internals/daemon/access.go +++ b/internals/daemon/access.go @@ -68,3 +68,18 @@ func (ac UserAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) Re // An identity explicitly set to "access: untrusted" isn't allowed. return Unauthorized(accessDenied) } + +// MetricsAccess allows requests over the UNIX domain socket from any local user +type MetricsAccess struct{} + +func (ac MetricsAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) Response { + if user == nil { + return Unauthorized(accessDenied) + } + switch user.Identity.Access { + case state.MetricsAccess, state.AdminAccess: + return nil + } + // An identity explicitly set to "access: untrusted" isn't allowed. + return Unauthorized(accessDenied) +} diff --git a/internals/daemon/api.go b/internals/daemon/api.go index d1789d7a6..261405b13 100644 --- a/internals/daemon/api.go +++ b/internals/daemon/api.go @@ -110,7 +110,7 @@ var API = []*Command{{ POST: v1PostIdentities, }, { Path: "/metrics", - ReadAccess: UserAccess{}, + ReadAccess: MetricsAccess{}, GET: Metrics, }} diff --git a/internals/overlord/state/identities.go b/internals/overlord/state/identities.go index 953d21a9f..a5b76b134 100644 --- a/internals/overlord/state/identities.go +++ b/internals/overlord/state/identities.go @@ -18,6 +18,7 @@ import ( "encoding/json" "errors" "fmt" + "regexp" "sort" "strings" ) @@ -28,9 +29,9 @@ type Identity struct { Access IdentityAccess // One or more of the following type-specific configuration fields must be - // non-nil (currently the only types are "local" and "basicauth"). - Local *LocalIdentity - BasicAuth *BasicAuthIdentity + // non-nil (currently the only types are "local" and "basic"). + Local *LocalIdentity + Basic *BasicIdentity } // IdentityAccess defines the access level for an identity. @@ -39,6 +40,7 @@ type IdentityAccess string const ( AdminAccess IdentityAccess = "admin" ReadAccess IdentityAccess = "read" + MetricsAccess IdentityAccess = "metrics" UntrustedAccess IdentityAccess = "untrusted" ) @@ -48,10 +50,9 @@ type LocalIdentity struct { UserID uint32 } -// BasicAuthIdentity holds identity configuration specific to the "basicauth" type +// BasicIdentity holds identity configuration specific to the "basic" type // (for username/password authentication). -type BasicAuthIdentity struct { - Username string +type BasicIdentity struct { Password string // Note: In a real application, store a password hash, not the plaintext password. } @@ -61,30 +62,35 @@ func (d *Identity) validate() error { return errors.New("identity must not be nil") } + if d.Name != "" { + // Regular expression to match any character that is not a letter, number, underscore, or hyphen. + invalidChars := regexp.MustCompile(`[^a-zA-Z0-9_\-]`) + if invalidChars.MatchString(d.Name) { + return fmt.Errorf("identity name %q contains invalid characters (only alphanumeric, underscore, and hyphen allowed)", d.Name) + } + } + switch d.Access { - case AdminAccess, ReadAccess, UntrustedAccess: + case AdminAccess, ReadAccess, MetricsAccess, UntrustedAccess: case "": - return fmt.Errorf("access value must be specified (%q, %q, or %q)", - AdminAccess, ReadAccess, UntrustedAccess) + return fmt.Errorf("access value must be specified (%q, %q, %q, or %q)", + AdminAccess, ReadAccess, MetricsAccess, UntrustedAccess) default: - return fmt.Errorf("invalid access value %q, must be %q, %q, or %q", - d.Access, AdminAccess, ReadAccess, UntrustedAccess) + return fmt.Errorf("invalid access value %q, must be %q, %q, %q, or %q", + d.Access, AdminAccess, ReadAccess, MetricsAccess, UntrustedAccess) } switch { case d.Local != nil: return nil - case d.BasicAuth != nil: - if d.BasicAuth.Username == "" { - return errors.New("basicauth identity must specify username") - } - if d.BasicAuth.Password == "" { - return errors.New("basicauth identity must specify password") + case d.Basic != nil: + if d.Basic.Password == "" { + return errors.New("basic identity must specify password") } return nil default: - return errors.New(`identity must have at least one type ("local or "basicauth")`) + return errors.New(`identity must have at least one type ("local or "basic")`) } } @@ -92,16 +98,16 @@ func (d *Identity) validate() error { // for API responses) excludes secrets. The marshalledIdentity type is used // for saving secrets in state. type apiIdentity struct { - Access string `json:"access"` - Local *apiLocalIdentity `json:"local,omitempty"` - BasicAuth *apiBasicAuthIdentity `json:"basicauth,omitempty"` + Access string `json:"access"` + Local *apiLocalIdentity `json:"local,omitempty"` + Basic *apiBasicIdentity `json:"basic,omitempty"` } type apiLocalIdentity struct { UserID *uint32 `json:"user-id"` } -type apiBasicAuthIdentity struct { +type apiBasicIdentity struct { Username string `json:"username"` Password string `json:"password"` } @@ -114,8 +120,8 @@ func (d *Identity) MarshalJSON() ([]byte, error) { if d.Local != nil { ai.Local = &apiLocalIdentity{UserID: &d.Local.UserID} } - if d.BasicAuth != nil { - ai.BasicAuth = &apiBasicAuthIdentity{Username: d.BasicAuth.Username, Password: d.BasicAuth.Password} + if d.Basic != nil { + ai.Basic = &apiBasicIdentity{Password: d.Basic.Password} } return json.Marshal(ai) } @@ -136,14 +142,11 @@ func (d *Identity) UnmarshalJSON(data []byte) error { return errors.New("local identity must specify user-id") } identity.Local = &LocalIdentity{UserID: *ai.Local.UserID} - case ai.BasicAuth != nil: - if ai.BasicAuth.Username == "" { - return errors.New("basicauth identity must specify username") - } - if ai.BasicAuth.Password == "" { - return errors.New("basicauth identity must specify password") + case ai.Basic != nil: + if ai.Basic.Password == "" { + return errors.New("basic identity must specify password") } - identity.BasicAuth = &BasicAuthIdentity{Username: ai.BasicAuth.Username, Password: ai.BasicAuth.Password} + identity.Basic = &BasicIdentity{Password: ai.Basic.Password} } // Perform additional validation using the local Identity type. err = identity.validate() @@ -166,6 +169,7 @@ func (s *State) AddIdentities(identities map[string]*Identity) error { if _, ok := s.identities[name]; ok { existing = append(existing, name) } + identity.Name = name err := identity.validate() if err != nil { return fmt.Errorf("identity %q invalid: %w", name, err) @@ -311,9 +315,9 @@ func (s *State) IdentityFromInputs(userID *uint32, username, password string) *I if identity.Local.UserID == *userID { return identity } - case identity.BasicAuth != nil && username != "" && password != "": + case identity.Basic != nil && username != "" && password != "": // In real code, compare password hashes, not plain text passwords. - if identity.BasicAuth.Username == username && identity.BasicAuth.Password == password { + if identity.Name == username && identity.Basic.Password == password { return identity } } diff --git a/internals/overlord/state/state.go b/internals/overlord/state/state.go index 7f8d358a9..eb7983bf3 100644 --- a/internals/overlord/state/state.go +++ b/internals/overlord/state/state.go @@ -172,17 +172,16 @@ type marshalledState struct { // marshalledIdentity is used specifically for marshalling to the state // database file. Unlike apiIdentity, it should include secrets. type marshalledIdentity struct { - Access string `json:"access"` - Local *marshalledLocalIdentity `json:"local,omitempty"` - BasicAuth *marshalledBasicAuthIdentity `json:"basicauth,omitempty"` + Access string `json:"access"` + Local *marshalledLocalIdentity `json:"local,omitempty"` + Basic *marshalledBasicIdentity `json:"basi,omitempty"` } type marshalledLocalIdentity struct { UserID uint32 `json:"user-id"` } -type marshalledBasicAuthIdentity struct { - Username string `json:"username"` +type marshalledBasicIdentity struct { Password string `json:"password"` } @@ -212,8 +211,8 @@ func (s *State) marshalledIdentities() map[string]*marshalledIdentity { if identity.Local != nil { marshalled[name].Local = &marshalledLocalIdentity{UserID: identity.Local.UserID} } - if identity.BasicAuth != nil { - marshalled[name].BasicAuth = &marshalledBasicAuthIdentity{Username: identity.BasicAuth.Username, Password: identity.BasicAuth.Password} + if identity.Basic != nil { + marshalled[name].Basic = &marshalledBasicIdentity{Password: identity.Basic.Password} } } return marshalled @@ -257,8 +256,8 @@ func (s *State) unmarshalIdentities(marshalled map[string]*marshalledIdentity) { if mi.Local != nil { s.identities[name].Local = &LocalIdentity{UserID: mi.Local.UserID} } - if mi.BasicAuth != nil { - s.identities[name].BasicAuth = &BasicAuthIdentity{Username: mi.BasicAuth.Username, Password: mi.BasicAuth.Password} + if mi.Basic != nil { + s.identities[name].Basic = &BasicIdentity{Password: mi.Basic.Password} } } } From 5be3e96102383f6e71ddc06138f82b9f033ca097 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Tue, 21 Jan 2025 16:42:45 +0800 Subject: [PATCH 10/41] feat: use sha512 to verify password --- go.mod | 1 + go.sum | 8 ++++++++ internals/daemon/access.go | 2 +- internals/overlord/state/identities.go | 9 ++++++--- internals/overlord/state/state.go | 2 +- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index dde8646ac..496cab6d3 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/canonical/pebble go 1.22 require ( + github.com/GehirnInc/crypt v0.0.0-20230320061759-8cc1b52080c5 github.com/canonical/go-flags v0.0.0-20230403090104-105d09a091b8 github.com/canonical/x-go v0.0.0-20230522092633-7947a7587f5b github.com/gorilla/mux v1.8.1 diff --git a/go.sum b/go.sum index 31493018c..5b1eb141d 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,12 @@ +github.com/GehirnInc/crypt v0.0.0-20230320061759-8cc1b52080c5 h1:IEjq88XO4PuBDcvmjQJcQGg+w+UaafSy8G5Kcb5tBhI= +github.com/GehirnInc/crypt v0.0.0-20230320061759-8cc1b52080c5/go.mod h1:exZ0C/1emQJAw5tHOaUDyY1ycttqBAPcxuzf7QbY6ec= github.com/canonical/go-flags v0.0.0-20230403090104-105d09a091b8 h1:zGaJEJI9qPVyM+QKFJagiyrM91Ke5S9htoL1D470g6E= github.com/canonical/go-flags v0.0.0-20230403090104-105d09a091b8/go.mod h1:ZZFeR9K9iGgpwOaLYF9PdT44/+lfSJ9sQz3B+SsGsYU= github.com/canonical/x-go v0.0.0-20230522092633-7947a7587f5b h1:Da2fardddn+JDlVEYtrzBLTtyzoyU3nIS0Cf0GvjmwU= github.com/canonical/x-go v0.0.0-20230522092633-7947a7587f5b/go.mod h1:upTK9n6rlqITN9rCN69hdreI37dRDFUk2thlGGD5Cg8= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY= @@ -15,6 +19,10 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/pkg/term v1.1.0 h1:xIAAdCMh3QIAy+5FrE8Ad8XoDhEU4ufwbaSozViP9kk= github.com/pkg/term v1.1.0/go.mod h1:E25nymQcrSllhX42Ok8MRm1+hyBdHY0dCeiKZ9jpNGw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internals/daemon/access.go b/internals/daemon/access.go index 8f230c980..0d484f987 100644 --- a/internals/daemon/access.go +++ b/internals/daemon/access.go @@ -69,7 +69,7 @@ func (ac UserAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) Re return Unauthorized(accessDenied) } -// MetricsAccess allows requests over the UNIX domain socket from any local user +// MetricsAccess allows requests over the HTTP from authenticated users type MetricsAccess struct{} func (ac MetricsAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) Response { diff --git a/internals/overlord/state/identities.go b/internals/overlord/state/identities.go index a5b76b134..814ea1b57 100644 --- a/internals/overlord/state/identities.go +++ b/internals/overlord/state/identities.go @@ -21,6 +21,8 @@ import ( "regexp" "sort" "strings" + + "github.com/GehirnInc/crypt/sha512_crypt" ) // Identity holds the configuration of a single identity. @@ -315,9 +317,10 @@ func (s *State) IdentityFromInputs(userID *uint32, username, password string) *I if identity.Local.UserID == *userID { return identity } - case identity.Basic != nil && username != "" && password != "": - // In real code, compare password hashes, not plain text passwords. - if identity.Name == username && identity.Basic.Password == password { + case identity.Basic != nil && username != "" && identity.Name == username && password != "": + crypt := sha512_crypt.New() + err := crypt.Verify(identity.Basic.Password, []byte(password)) + if err == nil { return identity } } diff --git a/internals/overlord/state/state.go b/internals/overlord/state/state.go index eb7983bf3..41c70d8bb 100644 --- a/internals/overlord/state/state.go +++ b/internals/overlord/state/state.go @@ -174,7 +174,7 @@ type marshalledState struct { type marshalledIdentity struct { Access string `json:"access"` Local *marshalledLocalIdentity `json:"local,omitempty"` - Basic *marshalledBasicIdentity `json:"basi,omitempty"` + Basic *marshalledBasicIdentity `json:"basic,omitempty"` } type marshalledLocalIdentity struct { From a6c374dbd91180cd9008c78686c2cbd85c562b7b Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Tue, 21 Jan 2025 16:44:06 +0800 Subject: [PATCH 11/41] feat: move the metrics api to /v1/metrics --- internals/daemon/api.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internals/daemon/api.go b/internals/daemon/api.go index 261405b13..182ce0469 100644 --- a/internals/daemon/api.go +++ b/internals/daemon/api.go @@ -109,7 +109,7 @@ var API = []*Command{{ GET: v1GetIdentities, POST: v1PostIdentities, }, { - Path: "/metrics", + Path: "/v1/metrics", ReadAccess: MetricsAccess{}, GET: Metrics, }} From 1bd54cbaa1705f3ad23ded77b9ea29933a9fe0bd Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Tue, 21 Jan 2025 16:48:51 +0800 Subject: [PATCH 12/41] chore: remove Username from apiBasicIdentity --- internals/overlord/state/identities.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internals/overlord/state/identities.go b/internals/overlord/state/identities.go index 814ea1b57..0447ee4ed 100644 --- a/internals/overlord/state/identities.go +++ b/internals/overlord/state/identities.go @@ -110,7 +110,6 @@ type apiLocalIdentity struct { } type apiBasicIdentity struct { - Username string `json:"username"` Password string `json:"password"` } From 98ea11ebe8264246a383b2b45f465127cc65c16d Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Tue, 21 Jan 2025 17:06:21 +0800 Subject: [PATCH 13/41] chore: revert changes on user state --- internals/daemon/access.go | 6 +++--- internals/daemon/api_notices.go | 18 +++++++++--------- internals/daemon/daemon.go | 33 +++++++++++---------------------- 3 files changed, 23 insertions(+), 34 deletions(-) diff --git a/internals/daemon/access.go b/internals/daemon/access.go index 0d484f987..67a2e9171 100644 --- a/internals/daemon/access.go +++ b/internals/daemon/access.go @@ -47,7 +47,7 @@ func (ac AdminAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) R if user == nil { return Unauthorized(accessDenied) } - if user.Identity.Access == state.AdminAccess { + if user.Access == state.AdminAccess { return nil } // An identity explicitly set to "access: read" or "access: untrusted" isn't allowed. @@ -61,7 +61,7 @@ func (ac UserAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) Re if user == nil { return Unauthorized(accessDenied) } - switch user.Identity.Access { + switch user.Access { case state.ReadAccess, state.AdminAccess: return nil } @@ -76,7 +76,7 @@ func (ac MetricsAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) if user == nil { return Unauthorized(accessDenied) } - switch user.Identity.Access { + switch user.Access { case state.MetricsAccess, state.AdminAccess: return nil } diff --git a/internals/daemon/api_notices.go b/internals/daemon/api_notices.go index 35b125454..8cd274508 100644 --- a/internals/daemon/api_notices.go +++ b/internals/daemon/api_notices.go @@ -45,16 +45,16 @@ type addedNotice struct { func v1GetNotices(c *Command, r *http.Request, user *UserState) Response { // TODO(benhoyt): the design of notices presumes UIDs; if in future when we // support identities that aren't UID based, we'll need to fix this. - if user == nil || user.Identity.Local == nil { + if user == nil || user.UID == nil { return Forbidden("cannot determine UID of request, so cannot retrieve notices") } // By default, return notices with the request UID and public notices. - userID := &user.Identity.Local.UserID + userID := user.UID query := r.URL.Query() if len(query["user-id"]) > 0 { - if user.Identity.Access != state.AdminAccess { + if user.Access != state.AdminAccess { return Forbidden(`only admins may use the "user-id" filter`) } var err error @@ -65,7 +65,7 @@ func v1GetNotices(c *Command, r *http.Request, user *UserState) Response { } if len(query["users"]) > 0 { - if user.Identity.Access != state.AdminAccess { + if user.Access != state.AdminAccess { return Forbidden(`only admins may use the "users" filter`) } if len(query["user-id"]) > 0 { @@ -179,7 +179,7 @@ func sanitizeTypesFilter(queryTypes []string) ([]state.NoticeType, error) { } func v1PostNotices(c *Command, r *http.Request, user *UserState) Response { - if user == nil || user.Identity.Local == nil { + if user == nil || user.UID == nil { return Forbidden("cannot determine UID of request, so cannot create notice") } @@ -228,7 +228,7 @@ func v1PostNotices(c *Command, r *http.Request, user *UserState) Response { st.Lock() defer st.Unlock() - noticeId, err := st.AddNotice(&user.Identity.Local.UserID, state.CustomNotice, payload.Key, &state.AddNoticeOptions{ + noticeId, err := st.AddNotice(user.UID, state.CustomNotice, payload.Key, &state.AddNoticeOptions{ Data: data, RepeatAfter: repeatAfter, }) @@ -240,7 +240,7 @@ func v1PostNotices(c *Command, r *http.Request, user *UserState) Response { } func v1GetNotice(c *Command, r *http.Request, user *UserState) Response { - if user == nil || user.Identity.Local == nil { + if user == nil || user.UID == nil { return Forbidden("cannot determine UID of request, so cannot retrieve notice") } noticeID := muxVars(r)["id"] @@ -263,10 +263,10 @@ func noticeViewableByUser(notice *state.Notice, user *UserState) bool { // Notice has no UID, so it's viewable by any user (with a UID). return true } - if user.Identity.Access == state.AdminAccess { + if user.Access == state.AdminAccess { // User is admin, they can view anything. return true } // Otherwise user's UID must match notice's UID. - return user.Identity.Local.UserID == userID + return *user.UID == userID } diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index d7c5e6250..4e02d03b2 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -117,9 +117,8 @@ type Daemon struct { // UserState represents the state of an authenticated API user. type UserState struct { - // Access state.IdentityAccess - // UID *uint32 - Identity *state.Identity + Access state.IdentityAccess + UID *uint32 } // A ResponseFunc handles one of the individual verbs for a method @@ -163,7 +162,13 @@ func userFromRequest(st *state.State, r *http.Request, ucred *Ucrednet, username // No identity that matches these inputs (for now, just UID). return nil, nil } - return &UserState{Identity: identity}, nil + + if identity.Local != nil { + return &UserState{Access: identity.Access, UID: userID}, nil + } else if identity.Basic != nil { + return &UserState{Access: identity.Access}, nil + } + return nil, nil } func (d *Daemon) Overlord() *overlord.Overlord { @@ -226,26 +231,10 @@ func (c *Command) ServeHTTP(w http.ResponseWriter, r *http.Request) { if user == nil && ucred != nil { if ucred.Uid == 0 || ucred.Uid == uint32(os.Getuid()) { // Admin if UID is 0 (root) or the UID the daemon is running as. - // user = &UserState{Access: state.AdminAccess, UID: &ucred.Uid} - user = &UserState{ - Identity: &state.Identity{ - Access: state.AdminAccess, - Local: &state.LocalIdentity{ - UserID: ucred.Uid, - }, - }, - } + user = &UserState{Access: state.AdminAccess, UID: &ucred.Uid} } else { // Regular read access if any other local UID. - // user = &UserState{Access: state.ReadAccess, UID: &ucred.Uid} - user = &UserState{ - Identity: &state.Identity{ - Access: state.ReadAccess, - Local: &state.LocalIdentity{ - UserID: ucred.Uid, - }, - }, - } + user = &UserState{Access: state.ReadAccess, UID: &ucred.Uid} } } From 7fc255ec4cbcdbf960aa4954f16df8059082aa58 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Tue, 21 Jan 2025 17:37:48 +0800 Subject: [PATCH 14/41] chore: fix failed identity tests --- internals/daemon/daemon.go | 30 ++++++++++----------- internals/overlord/state/identities.go | 6 +++-- internals/overlord/state/identities_test.go | 22 +++++++-------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index 5a34e6767..5ee934f11 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -20,7 +20,6 @@ import ( "errors" "fmt" "io" - "math/rand" "net" "net/http" "os" @@ -37,7 +36,6 @@ import ( "gopkg.in/tomb.v2" "github.com/canonical/pebble/internals/logger" - "github.com/canonical/pebble/internals/metrics" "github.com/canonical/pebble/internals/osutil" "github.com/canonical/pebble/internals/osutil/sys" "github.com/canonical/pebble/internals/overlord" @@ -376,20 +374,20 @@ func (d *Daemon) Init() error { logger.Noticef("Started daemon.") - registry := metrics.GetRegistry() - myCounter := registry.NewCounterVec("my_counter", "Total number of something processed.", []string{"operation", "status"}) - myGauge := registry.NewGaugeVec("my_gauge", "Current value of something.", []string{"sensor"}) - // Goroutine to update metrics randomly - go func() { - for { - myCounter.WithLabelValues("read", "success").Inc() - myCounter.WithLabelValues("write", "success").Add(2) - myCounter.WithLabelValues("read", "failed").Inc() - myGauge.WithLabelValues("temperature").Set(20.0 + rand.Float64()*10.0) - - time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second) // Random sleep between 1 and 5 seconds - } - }() + // registry := metrics.GetRegistry() + // myCounter := registry.NewCounterVec("my_counter", "Total number of something processed.", []string{"operation", "status"}) + // myGauge := registry.NewGaugeVec("my_gauge", "Current value of something.", []string{"sensor"}) + // // Goroutine to update metrics randomly + // go func() { + // for { + // myCounter.WithLabelValues("read", "success").Inc() + // myCounter.WithLabelValues("write", "success").Add(2) + // myCounter.WithLabelValues("read", "failed").Inc() + // myGauge.WithLabelValues("temperature").Set(20.0 + rand.Float64()*10.0) + + // time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second) // Random sleep between 1 and 5 seconds + // } + // }() return nil } diff --git a/internals/overlord/state/identities.go b/internals/overlord/state/identities.go index 0447ee4ed..26125f8c9 100644 --- a/internals/overlord/state/identities.go +++ b/internals/overlord/state/identities.go @@ -92,7 +92,7 @@ func (d *Identity) validate() error { return nil default: - return errors.New(`identity must have at least one type ("local or "basic")`) + return errors.New(`identity must have at least one type ("local" or "basic")`) } } @@ -170,7 +170,9 @@ func (s *State) AddIdentities(identities map[string]*Identity) error { if _, ok := s.identities[name]; ok { existing = append(existing, name) } - identity.Name = name + if identity != nil { + identity.Name = name + } err := identity.validate() if err != nil { return fmt.Errorf("identity %q invalid: %w", name, err) diff --git a/internals/overlord/state/identities_test.go b/internals/overlord/state/identities_test.go index dcf2bdf3d..8e186f178 100644 --- a/internals/overlord/state/identities_test.go +++ b/internals/overlord/state/identities_test.go @@ -101,16 +101,16 @@ func (s *identitiesSuite) TestUnmarshalAPIErrors(c *C) { error string }{{ data: `{"no-type": {"access": "admin"}}`, - error: `identity must have at least one type \("local"\)`, + error: `identity must have at least one type \("local" or "basic"\)`, }, { data: `{"invalid-access": {"access": "admin", "local": {}}}`, error: `local identity must specify user-id`, }, { data: `{"invalid-access": {"access": "foo", "local": {"user-id": 42}}}`, - error: `invalid access value "foo", must be "admin", "read", or "untrusted"`, + error: `invalid access value "foo", must be "admin", "read", "metrics", or "untrusted"`, }, { data: `{"invalid-access": {"local": {"user-id": 42}}}`, - error: `access value must be specified \("admin", "read", or "untrusted"\)`, + error: `access value must be specified \("admin", "read", "metrics", or "untrusted"\)`, }} for _, test := range tests { c.Logf("Input data: %s", test.data) @@ -263,7 +263,7 @@ func (s *identitiesSuite) TestAddIdentities(c *C) { Local: &state.LocalIdentity{UserID: 43}, }, }) - c.Assert(err, ErrorMatches, `identity "bill" invalid: invalid access value "bar", must be "admin", "read", or "untrusted"`) + c.Assert(err, ErrorMatches, `identity "bill" invalid: invalid access value "bar", must be "admin", "read", "metrics", or "untrusted"`) // Must have at least one type. err = st.AddIdentities(map[string]*state.Identity{ @@ -271,7 +271,7 @@ func (s *identitiesSuite) TestAddIdentities(c *C) { Access: "admin", }, }) - c.Assert(err, ErrorMatches, `identity "bill" invalid: identity must have at least one type \("local"\)`) + c.Assert(err, ErrorMatches, `identity "bill" invalid: identity must have at least one type \(\"local\" or \"basic\"\)`) // Ensure user IDs are unique with existing users. err = st.AddIdentities(map[string]*state.Identity{ @@ -430,7 +430,7 @@ func (s *identitiesSuite) TestReplaceIdentities(c *C) { Access: "admin", }, }) - c.Assert(err, ErrorMatches, `identity "bill" invalid: identity must have at least one type \("local"\)`) + c.Assert(err, ErrorMatches, `identity "bill" invalid: identity must have at least one type \("local" or "basic"\)`) // Ensure unique user ID testing is being done (full testing done in AddIdentity). err = st.ReplaceIdentities(map[string]*state.Identity{ @@ -557,24 +557,24 @@ func (s *identitiesSuite) TestIdentityFromInputs(c *C) { err := st.AddIdentities(original) c.Assert(err, IsNil) - identity := st.IdentityFromInputs(nil) + identity := st.IdentityFromInputs(nil, "", "") c.Assert(identity, IsNil) userID := uint32(0) - identity = st.IdentityFromInputs(&userID) + identity = st.IdentityFromInputs(&userID, "", "") c.Assert(identity, IsNil) userID = 100 - identity = st.IdentityFromInputs(&userID) + identity = st.IdentityFromInputs(&userID, "", "") c.Assert(identity, IsNil) userID = 42 - identity = st.IdentityFromInputs(&userID) + identity = st.IdentityFromInputs(&userID, "", "") c.Assert(identity, NotNil) c.Check(identity.Name, Equals, "bob") userID = 1000 - identity = st.IdentityFromInputs(&userID) + identity = st.IdentityFromInputs(&userID, "", "") c.Assert(identity, NotNil) c.Check(identity.Name, Equals, "mary") } From 31a0617875861b3c126b882d12757cbde244382e Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Wed, 22 Jan 2025 19:43:02 +0800 Subject: [PATCH 15/41] test: unit tests for basic identity --- internals/overlord/state/identities_test.go | 74 ++++++++++++++++++++- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/internals/overlord/state/identities_test.go b/internals/overlord/state/identities_test.go index 8e186f178..e28f9eba1 100644 --- a/internals/overlord/state/identities_test.go +++ b/internals/overlord/state/identities_test.go @@ -41,6 +41,10 @@ func (s *identitiesSuite) TestMarshalAPI(c *C) { Access: state.AdminAccess, Local: &state.LocalIdentity{UserID: 1000}, }, + "nancy": { + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{Password: "hash"}, + }, }) c.Assert(err, IsNil) @@ -60,6 +64,12 @@ func (s *identitiesSuite) TestMarshalAPI(c *C) { "local": { "user-id": 1000 } + }, + "nancy": { + "access": "metrics", + "basic": { + "password": "hash" + } } }`[1:]) } @@ -78,6 +88,12 @@ func (s *identitiesSuite) TestUnmarshalAPI(c *C) { "local": { "user-id": 1000 } + }, + "nancy": { + "access": "metrics", + "basic": { + "password": "hash" + } } }`) var identities map[string]*state.Identity @@ -92,6 +108,10 @@ func (s *identitiesSuite) TestUnmarshalAPI(c *C) { Access: state.AdminAccess, Local: &state.LocalIdentity{UserID: 1000}, }, + "nancy": { + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{Password: "hash"}, + }, }) } @@ -105,6 +125,9 @@ func (s *identitiesSuite) TestUnmarshalAPIErrors(c *C) { }, { data: `{"invalid-access": {"access": "admin", "local": {}}}`, error: `local identity must specify user-id`, + }, { + data: `{"invalid-access": {"access": "metrics", "basic": {}}}`, + error: `basic identity must specify password`, }, { data: `{"invalid-access": {"access": "foo", "local": {"user-id": 42}}}`, error: `invalid access value "foo", must be "admin", "read", "metrics", or "untrusted"`, @@ -214,6 +237,10 @@ func (s *identitiesSuite) TestAddIdentities(c *C) { Access: state.AdminAccess, Local: &state.LocalIdentity{UserID: 1000}, }, + "nancy": { + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{Password: "hash"}, + }, } err := st.AddIdentities(original) c.Assert(err, IsNil) @@ -231,6 +258,11 @@ func (s *identitiesSuite) TestAddIdentities(c *C) { Access: state.AdminAccess, Local: &state.LocalIdentity{UserID: 1000}, }, + "nancy": { + Name: "nancy", + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{Password: "hash"}, + }, }) // Can't add identity names that already exist. @@ -314,6 +346,10 @@ func (s *identitiesSuite) TestUpdateIdentities(c *C) { Access: state.AdminAccess, Local: &state.LocalIdentity{UserID: 1000}, }, + "nancy": { + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{Password: "hash"}, + }, } err := st.AddIdentities(original) c.Assert(err, IsNil) @@ -327,6 +363,10 @@ func (s *identitiesSuite) TestUpdateIdentities(c *C) { Access: state.ReadAccess, Local: &state.LocalIdentity{UserID: 42}, }, + "nancy": { + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{Password: "new hash"}, + }, }) c.Assert(err, IsNil) @@ -343,6 +383,11 @@ func (s *identitiesSuite) TestUpdateIdentities(c *C) { Access: state.ReadAccess, Local: &state.LocalIdentity{UserID: 42}, }, + "nancy": { + Name: "nancy", + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{Password: "new hash"}, + }, }) // Can't update identity names that don't exist. @@ -460,6 +505,10 @@ func (s *identitiesSuite) TestRemoveIdentities(c *C) { Access: state.AdminAccess, Local: &state.LocalIdentity{UserID: 1000}, }, + "nancy": { + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{Password: "hash"}, + }, "queen": { Access: state.AdminAccess, Local: &state.LocalIdentity{UserID: 1001}, @@ -469,8 +518,9 @@ func (s *identitiesSuite) TestRemoveIdentities(c *C) { c.Assert(err, IsNil) err = st.RemoveIdentities(map[string]struct{}{ - "bob": {}, - "mary": {}, + "bob": {}, + "mary": {}, + "nancy": {}, }) c.Assert(err, IsNil) @@ -512,6 +562,10 @@ func (s *identitiesSuite) TestIdentities(c *C) { Access: state.AdminAccess, Local: &state.LocalIdentity{UserID: 1000}, }, + "nancy": { + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{Password: "hash"}, + }, } err := st.AddIdentities(original) c.Assert(err, IsNil) @@ -529,6 +583,11 @@ func (s *identitiesSuite) TestIdentities(c *C) { Access: state.AdminAccess, Local: &state.LocalIdentity{UserID: 1000}, }, + "nancy": { + Name: "nancy", + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{Password: "hash"}, + }, } c.Assert(identities, DeepEquals, expected) @@ -553,6 +612,13 @@ func (s *identitiesSuite) TestIdentityFromInputs(c *C) { Access: state.AdminAccess, Local: &state.LocalIdentity{UserID: 1000}, }, + "nancy": { + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{ + // password: test + Password: "$6$F9cFSVEKyO4gB1Wh$8S1BSKsNkF.jBAixGc4W7l80OpfCNk65LZBDHBng3NAmbcHuMj4RIm7992rrJ8YA.SJ0hvm.vGk2z483am4Ym1", + }, + }, } err := st.AddIdentities(original) c.Assert(err, IsNil) @@ -577,4 +643,8 @@ func (s *identitiesSuite) TestIdentityFromInputs(c *C) { identity = st.IdentityFromInputs(&userID, "", "") c.Assert(identity, NotNil) c.Check(identity.Name, Equals, "mary") + + identity = st.IdentityFromInputs(nil, "nancy", "test") + c.Assert(identity, NotNil) + c.Check(identity.Name, Equals, "nancy") } From 306f2d381a23eeb6f0be40af2b8d442aae13a619 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Thu, 23 Jan 2025 09:24:19 +0800 Subject: [PATCH 16/41] feat: add basic identity --- internals/daemon/api.go | 4 - internals/daemon/api_metrics.go | 34 ---- internals/daemon/daemon.go | 16 -- internals/metrics/metrics.go | 267 ------------------------------ internals/metrics/metrics_test.go | 98 ----------- 5 files changed, 419 deletions(-) delete mode 100644 internals/daemon/api_metrics.go delete mode 100644 internals/metrics/metrics.go delete mode 100644 internals/metrics/metrics_test.go diff --git a/internals/daemon/api.go b/internals/daemon/api.go index 182ce0469..e6c6de16c 100644 --- a/internals/daemon/api.go +++ b/internals/daemon/api.go @@ -108,10 +108,6 @@ var API = []*Command{{ WriteAccess: AdminAccess{}, GET: v1GetIdentities, POST: v1PostIdentities, -}, { - Path: "/v1/metrics", - ReadAccess: MetricsAccess{}, - GET: Metrics, }} var ( diff --git a/internals/daemon/api_metrics.go b/internals/daemon/api_metrics.go deleted file mode 100644 index 5061c54a9..000000000 --- a/internals/daemon/api_metrics.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2024 Canonical Ltd -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License version 3 as -// published by the Free Software Foundation. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -package daemon - -import ( - "net/http" - - "github.com/canonical/pebble/internals/metrics" -) - -func Metrics(c *Command, r *http.Request, _ *UserState) Response { - return metricsResponse{} -} - -// metricsResponse is a Response implementation to serve the metrics in a prometheus metrics format. -type metricsResponse struct{} - -func (r metricsResponse) ServeHTTP(w http.ResponseWriter, req *http.Request) { - registry := metrics.GetRegistry() - w.WriteHeader(http.StatusOK) - w.Write([]byte(registry.GatherMetrics())) -} diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index 5ee934f11..24bd6a0b6 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -373,22 +373,6 @@ func (d *Daemon) Init() error { } logger.Noticef("Started daemon.") - - // registry := metrics.GetRegistry() - // myCounter := registry.NewCounterVec("my_counter", "Total number of something processed.", []string{"operation", "status"}) - // myGauge := registry.NewGaugeVec("my_gauge", "Current value of something.", []string{"sensor"}) - // // Goroutine to update metrics randomly - // go func() { - // for { - // myCounter.WithLabelValues("read", "success").Inc() - // myCounter.WithLabelValues("write", "success").Add(2) - // myCounter.WithLabelValues("read", "failed").Inc() - // myGauge.WithLabelValues("temperature").Set(20.0 + rand.Float64()*10.0) - - // time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second) // Random sleep between 1 and 5 seconds - // } - // }() - return nil } diff --git a/internals/metrics/metrics.go b/internals/metrics/metrics.go deleted file mode 100644 index 3de81f62e..000000000 --- a/internals/metrics/metrics.go +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright (c) 2024 Canonical Ltd -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License version 3 as -// published by the Free Software Foundation. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -package metrics - -import ( - "fmt" - "math/rand" - "net/http" - "sort" - "strings" - "sync" - "time" - - "github.com/gorilla/mux" -) - -// MetricType models the type of a metric. -type MetricType string - -const ( - MetricTypeCounter MetricType = "counter" - MetricTypeGauge MetricType = "gauge" -) - -// MetricVec represents a collection of metrics with the same name and help but different label values. -type MetricVec struct { - Name string - Help string - Type MetricType - metrics map[string]*Metric // Key is the label values string (e.g., "region=foo,service=bar") - labels []string // Label names (e.g., ["region", "service"]) - mu sync.RWMutex -} - -// Metric represents a single metric within a MetricVec. -type Metric struct { - LabelValues map[string]string // Label values for this metric - value interface{} // Can be int64 for counter/gauge, or []float64 for histogram. - mu sync.RWMutex // Mutex for individual metric -} - -// MetricsRegistry stores and manages metric vectors. -type MetricsRegistry struct { - metricVecs map[string]*MetricVec - mu sync.RWMutex -} - -// Package-level variable to hold the single registry. -var registry *MetricsRegistry - -// Ensure registry is initialized only once. -var once sync.Once - -// GetRegistry returns the singleton MetricsRegistry instance. -func GetRegistry() *MetricsRegistry { - once.Do(func() { - registry = &MetricsRegistry{ - metricVecs: make(map[string]*MetricVec), - } - }) - return registry -} - -func formatLabelKey(labels []string, labelValues []string) string { - labelPairs := make([]string, len(labels)) - for i := range labels { - labelPairs[i] = labels[i] + "=" + labelValues[i] - } - - // Sort labels for consistency - sort.Strings(labelPairs) - res := strings.Join(labelPairs, ",") - if res == "" { - // a special key for situations where no labels are used - res = "__empty__" - } - return res -} - -// NewCounterVec creates a new counter vector. -func (r *MetricsRegistry) NewCounterVec(name, help string, labels []string) *MetricVec { - return r.newMetricVec(name, help, labels, MetricTypeCounter) -} - -// NewGaugeVec creates a new gauge vector. -func (r *MetricsRegistry) NewGaugeVec(name, help string, labels []string) *MetricVec { - return r.newMetricVec(name, help, labels, MetricTypeGauge) -} - -// newMetricVec is a helper function to create a new metric vector of any type. -func (r *MetricsRegistry) newMetricVec(name, help string, labels []string, metricType MetricType) *MetricVec { - r.mu.Lock() - defer r.mu.Unlock() - - if _, ok := r.metricVecs[name]; ok { - panic(fmt.Sprintf("metric with name %s already registered", name)) - } - vec := &MetricVec{ - Name: name, - Help: help, - Type: metricType, - metrics: make(map[string]*Metric), - labels: labels, - } - r.metricVecs[name] = vec - return vec -} - -// WithLabelValues gets or creates a metric with the specified label values. -func (v *MetricVec) WithLabelValues(labelValues ...string) *Metric { - if len(labelValues) != len(v.labels) { - panic(fmt.Errorf( - "%s has %d variable labels named %q but %d values %q were provided", - v.Name, - len(v.labels), - v.labels, - len(labelValues), - labelValues, - )) - } - - labelKey := formatLabelKey(v.labels, labelValues) - - v.mu.RLock() - metric, ok := v.metrics[labelKey] - v.mu.RUnlock() - - if !ok { - v.mu.Lock() - defer v.mu.Unlock() - // Double check locking - metric, ok = v.metrics[labelKey] - - if !ok { - metric = &Metric{ - LabelValues: make(map[string]string), - value: int64(0), - } - for i, label := range v.labels { - metric.LabelValues[label] = labelValues[i] - } - v.metrics[labelKey] = metric - } - } - - return metric -} - -// Inc increments the counter. -func (m *Metric) Inc() { - m.mu.Lock() - defer m.mu.Unlock() - m.value = m.value.(int64) + 1 -} - -// Add adds the given value to the counter. -func (m *Metric) Add(value int64) { - m.mu.Lock() - defer m.mu.Unlock() - - switch v := m.value.(type) { - case int64: - m.value = v + value - default: - // if the metric is not a counter (e.g., a gauge) - panic(fmt.Errorf("add called on a non-counter metric: %T", m.value)) - } -} - -// Set sets the gauge value. -func (m *Metric) Set(value float64) { - m.mu.Lock() - defer m.mu.Unlock() - m.value = value -} - -// GatherMetrics function (modified slightly) -func (r *MetricsRegistry) GatherMetrics() string { - r.mu.RLock() - defer r.mu.RUnlock() - - var output string - - for _, vec := range r.metricVecs { - output += fmt.Sprintf("# HELP %s %s\n", vec.Name, vec.Help) - output += fmt.Sprintf("# TYPE %s %s\n", vec.Name, vec.Type) - - if len(vec.labels) == 0 { // Handle metrics without labels - for _, metric := range vec.metrics { - switch v := metric.value.(type) { - case int64: - output += fmt.Sprintf("%s %d\n", vec.Name, v) // No curly braces - case float64: - output += fmt.Sprintf("%s %.2f\n", vec.Name, v) // No curly braces - default: - output += fmt.Sprintf("%s %v\n", vec.Name, v) // Fallback - } - } - } else { - // Handle metrics with labels. - for labelKey, metric := range vec.metrics { - switch v := metric.value.(type) { - case int64: - output += fmt.Sprintf("%s{%s} %d\n", vec.Name, labelKey, v) - case float64: - output += fmt.Sprintf("%s{%s} %.2f\n", vec.Name, labelKey, v) - default: - // Fallback for other types - output += fmt.Sprintf("%s{%s} %v\n", vec.Name, labelKey, v) - } - } - } - } - - return output -} - -// Example usage -func main() { - // Get the singleton registry - registry := GetRegistry() - - myCounter := registry.NewCounterVec("my_counter", "Total number of something processed.", []string{"operation", "status"}) - myGauge := registry.NewGaugeVec("my_gauge", "Current value of something.", []string{"sensor"}) - - // Goroutine to update metrics randomly - go func() { - for { - // Use like prometheus client library. - - // counter - myCounter.WithLabelValues("read", "success").Inc() - myCounter.WithLabelValues("write", "success").Add(2) - myCounter.WithLabelValues("read", "failed").Inc() - - // gauge - myGauge.WithLabelValues("temperature").Set(20.0 + rand.Float64()*10.0) - - time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second) // Random sleep between 1 and 5 seconds - } - }() - // Use Gorilla Mux router - router := mux.NewRouter() - router.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) - w.Write([]byte(registry.GatherMetrics())) - }) - - // Serve on port 2112 - fmt.Println("Serving metrics on :2112/metrics") - err := http.ListenAndServe(":2112", router) // Use the router here - if err != nil { - fmt.Println("Error starting server:", err) - } -} diff --git a/internals/metrics/metrics_test.go b/internals/metrics/metrics_test.go deleted file mode 100644 index 96bd9baaa..000000000 --- a/internals/metrics/metrics_test.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (c) 2024 Canonical Ltd -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License version 3 as -// published by the Free Software Foundation. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -package metrics - -import ( - "sync" - "testing" - - . "gopkg.in/check.v1" -) - -// Hook up check.v1 into the "go test" runner -func Test(t *testing.T) { TestingT(t) } - -var _ = Suite(&RegistryTestSuite{}) - -// Test Suite structure -type RegistryTestSuite struct { - registry *MetricsRegistry -} - -func (s *RegistryTestSuite) SetUpTest(c *C) { - s.registry = &MetricsRegistry{ - metricVecs: make(map[string]*MetricVec), - } -} - -func (s *RegistryTestSuite) TestCounterWithoutLabels(c *C) { - labels := []string{} - testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", labels) - testCounter.WithLabelValues().Inc() - c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{})].value.(int64), Equals, int64(1)) - testCounter.WithLabelValues().Inc() - c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{})].value.(int64), Equals, int64(2)) -} - -func (s *RegistryTestSuite) TestCounterWithLabels(c *C) { - labels := []string{"operation", "status"} - testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", labels) - testCounter.WithLabelValues("read", "success").Inc() - c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{"read", "success"})].value.(int64), Equals, int64(1)) - testCounter.WithLabelValues("write", "fail").Add(2) - c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{"write", "fail"})].value.(int64), Equals, int64(2)) -} - -func (s *RegistryTestSuite) TestGauge(c *C) { - labels := []string{"sensor"} - testGauge := s.registry.NewGaugeVec("test_gauge", "Current value of something", labels) - testGauge.WithLabelValues("temperature").Set(10.0) - c.Check(s.registry.metricVecs["test_gauge"].metrics[formatLabelKey(labels, []string{"temperature"})].value.(float64), Equals, float64(10.0)) - testGauge.WithLabelValues("temperature").Set(20.0) - c.Check(s.registry.metricVecs["test_gauge"].metrics[formatLabelKey(labels, []string{"temperature"})].value.(float64), Equals, float64(20.0)) -} - -func (s *RegistryTestSuite) TestGatherMetrics(c *C) { - testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{"operation", "status"}) - testCounter.WithLabelValues("read", "success").Inc() - testGauge := s.registry.NewGaugeVec("test_gauge", "Current value of something", []string{"sensor"}) - testGauge.WithLabelValues("temperature").Set(10.0) - metricsOutput := s.registry.GatherMetrics() - expectedOutput := "# HELP test_counter Total number of something processed\n# TYPE test_counter counter\ntest_counter{operation=read,status=success} 1\n" - expectedOutput += "# HELP test_gauge Current value of something\n# TYPE test_gauge gauge\ntest_gauge{sensor=temperature} 10.00\n" - c.Check(metricsOutput, Equals, expectedOutput) -} - -func (s *RegistryTestSuite) TestGatherMetricsWithoutLabels(c *C) { - testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{}) - testCounter.WithLabelValues().Inc() - metricsOutput := s.registry.GatherMetrics() - expectedOutput := "# HELP test_counter Total number of something processed\n# TYPE test_counter counter\ntest_counter 1\n" - c.Check(metricsOutput, Equals, expectedOutput) -} - -func (s *RegistryTestSuite) TestRaceConditions(c *C) { - counter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{}) - var wg sync.WaitGroup - for i := 0; i < 1000; i++ { - wg.Add(1) - go func() { - defer wg.Done() - counter.WithLabelValues().Inc() - }() - } - wg.Wait() - c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey([]string{}, []string{})].value.(int64), Equals, int64(1000)) -} From 6c534918db7c739921e7a303a2ace28336a5a844 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Thu, 23 Jan 2025 09:51:22 +0800 Subject: [PATCH 17/41] chore: update comments --- client/identities.go | 2 +- internals/daemon/access.go | 2 +- internals/overlord/state/identities.go | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/client/identities.go b/client/identities.go index fc1fb6e10..fa439977e 100644 --- a/client/identities.go +++ b/client/identities.go @@ -49,7 +49,7 @@ type LocalIdentity struct { } // BasicIdentity holds identity configuration specific to the "basic" type -// (for username/password authentication). +// (for HTTP basic authentication). type BasicIdentity struct { Password string `json:"password" yaml:"password"` } diff --git a/internals/daemon/access.go b/internals/daemon/access.go index 67a2e9171..56ecb7129 100644 --- a/internals/daemon/access.go +++ b/internals/daemon/access.go @@ -69,7 +69,7 @@ func (ac UserAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) Re return Unauthorized(accessDenied) } -// MetricsAccess allows requests over the HTTP from authenticated users +// MetricsAccess allows requests over the HTTP from authenticated users. type MetricsAccess struct{} func (ac MetricsAccess) CheckAccess(d *Daemon, r *http.Request, user *UserState) Response { diff --git a/internals/overlord/state/identities.go b/internals/overlord/state/identities.go index 26125f8c9..d1e98543a 100644 --- a/internals/overlord/state/identities.go +++ b/internals/overlord/state/identities.go @@ -53,7 +53,7 @@ type LocalIdentity struct { } // BasicIdentity holds identity configuration specific to the "basic" type -// (for username/password authentication). +// (for HTTP basic authentication). type BasicIdentity struct { Password string // Note: In a real application, store a password hash, not the plaintext password. } @@ -65,7 +65,6 @@ func (d *Identity) validate() error { } if d.Name != "" { - // Regular expression to match any character that is not a letter, number, underscore, or hyphen. invalidChars := regexp.MustCompile(`[^a-zA-Z0-9_\-]`) if invalidChars.MatchString(d.Name) { return fmt.Errorf("identity name %q contains invalid characters (only alphanumeric, underscore, and hyphen allowed)", d.Name) From a57f04129c62f78c985c945f4632aff5f9e8528e Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Fri, 24 Jan 2025 10:12:11 +0800 Subject: [PATCH 18/41] chore: rework the metrics for services --- internals/daemon/api.go | 2 +- internals/daemon/api_metrics.go | 24 ++- internals/metrics/metrics.go | 255 ++--------------------- internals/metrics/metrics_test.go | 98 --------- internals/overlord/servstate/handlers.go | 61 +++++- internals/overlord/servstate/manager.go | 14 ++ 6 files changed, 111 insertions(+), 343 deletions(-) delete mode 100644 internals/metrics/metrics_test.go diff --git a/internals/daemon/api.go b/internals/daemon/api.go index 182ce0469..d6feef22f 100644 --- a/internals/daemon/api.go +++ b/internals/daemon/api.go @@ -111,7 +111,7 @@ var API = []*Command{{ }, { Path: "/v1/metrics", ReadAccess: MetricsAccess{}, - GET: Metrics, + GET: v1GetMetrics, }} var ( diff --git a/internals/daemon/api_metrics.go b/internals/daemon/api_metrics.go index 5061c54a9..629548066 100644 --- a/internals/daemon/api_metrics.go +++ b/internals/daemon/api_metrics.go @@ -15,20 +15,30 @@ package daemon import ( + "bytes" "net/http" - "github.com/canonical/pebble/internals/metrics" + "github.com/canonical/pebble/internals/overlord/servstate" ) -func Metrics(c *Command, r *http.Request, _ *UserState) Response { - return metricsResponse{} +func v1GetMetrics(c *Command, r *http.Request, _ *UserState) Response { + return metricsResponse{ + svcMgr: overlordServiceManager(c.d.overlord), + } } -// metricsResponse is a Response implementation to serve the metrics in a prometheus metrics format. -type metricsResponse struct{} +// metricsResponse is a Response implementation to serve the metrics in the OpenMetrics format. +type metricsResponse struct { + svcMgr *servstate.ServiceManager +} func (r metricsResponse) ServeHTTP(w http.ResponseWriter, req *http.Request) { - registry := metrics.GetRegistry() + var buffer bytes.Buffer + err := r.svcMgr.Metrics(&buffer) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + return + } w.WriteHeader(http.StatusOK) - w.Write([]byte(registry.GatherMetrics())) + w.Write([]byte(buffer.String())) } diff --git a/internals/metrics/metrics.go b/internals/metrics/metrics.go index 3de81f62e..977ec9b07 100644 --- a/internals/metrics/metrics.go +++ b/internals/metrics/metrics.go @@ -1,4 +1,4 @@ -// Copyright (c) 2024 Canonical Ltd +// Copyright (c) 2025 Canonical Ltd // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License version 3 as @@ -16,252 +16,35 @@ package metrics import ( "fmt" - "math/rand" - "net/http" + "io" "sort" "strings" - "sync" - "time" - - "github.com/gorilla/mux" -) - -// MetricType models the type of a metric. -type MetricType string - -const ( - MetricTypeCounter MetricType = "counter" - MetricTypeGauge MetricType = "gauge" ) -// MetricVec represents a collection of metrics with the same name and help but different label values. -type MetricVec struct { - Name string - Help string - Type MetricType - metrics map[string]*Metric // Key is the label values string (e.g., "region=foo,service=bar") - labels []string // Label names (e.g., ["region", "service"]) - mu sync.RWMutex -} - -// Metric represents a single metric within a MetricVec. +// Metric represents a single metric. type Metric struct { - LabelValues map[string]string // Label values for this metric - value interface{} // Can be int64 for counter/gauge, or []float64 for histogram. - mu sync.RWMutex // Mutex for individual metric -} - -// MetricsRegistry stores and manages metric vectors. -type MetricsRegistry struct { - metricVecs map[string]*MetricVec - mu sync.RWMutex + Name string + Value interface{} + LabelPairs []string } -// Package-level variable to hold the single registry. -var registry *MetricsRegistry - -// Ensure registry is initialized only once. -var once sync.Once - -// GetRegistry returns the singleton MetricsRegistry instance. -func GetRegistry() *MetricsRegistry { - once.Do(func() { - registry = &MetricsRegistry{ - metricVecs: make(map[string]*MetricVec), - } - }) - return registry -} - -func formatLabelKey(labels []string, labelValues []string) string { - labelPairs := make([]string, len(labels)) - for i := range labels { - labelPairs[i] = labels[i] + "=" + labelValues[i] - } - - // Sort labels for consistency - sort.Strings(labelPairs) - res := strings.Join(labelPairs, ",") - if res == "" { - // a special key for situations where no labels are used - res = "__empty__" +// WriteTo writes the metric in OpenMetrics format. +func (m *Metric) WriteTo(w io.Writer) (n int64, err error) { + labelStr := "" + if len(m.LabelPairs) > 0 { + sort.Strings(m.LabelPairs) + labelStr = "{" + strings.Join(m.LabelPairs, ",") + "}" } - return res -} - -// NewCounterVec creates a new counter vector. -func (r *MetricsRegistry) NewCounterVec(name, help string, labels []string) *MetricVec { - return r.newMetricVec(name, help, labels, MetricTypeCounter) -} - -// NewGaugeVec creates a new gauge vector. -func (r *MetricsRegistry) NewGaugeVec(name, help string, labels []string) *MetricVec { - return r.newMetricVec(name, help, labels, MetricTypeGauge) -} - -// newMetricVec is a helper function to create a new metric vector of any type. -func (r *MetricsRegistry) newMetricVec(name, help string, labels []string, metricType MetricType) *MetricVec { - r.mu.Lock() - defer r.mu.Unlock() - - if _, ok := r.metricVecs[name]; ok { - panic(fmt.Sprintf("metric with name %s already registered", name)) - } - vec := &MetricVec{ - Name: name, - Help: help, - Type: metricType, - metrics: make(map[string]*Metric), - labels: labels, - } - r.metricVecs[name] = vec - return vec -} - -// WithLabelValues gets or creates a metric with the specified label values. -func (v *MetricVec) WithLabelValues(labelValues ...string) *Metric { - if len(labelValues) != len(v.labels) { - panic(fmt.Errorf( - "%s has %d variable labels named %q but %d values %q were provided", - v.Name, - len(v.labels), - v.labels, - len(labelValues), - labelValues, - )) - } - - labelKey := formatLabelKey(v.labels, labelValues) - v.mu.RLock() - metric, ok := v.metrics[labelKey] - v.mu.RUnlock() - - if !ok { - v.mu.Lock() - defer v.mu.Unlock() - // Double check locking - metric, ok = v.metrics[labelKey] - - if !ok { - metric = &Metric{ - LabelValues: make(map[string]string), - value: int64(0), - } - for i, label := range v.labels { - metric.LabelValues[label] = labelValues[i] - } - v.metrics[labelKey] = metric - } - } - - return metric -} - -// Inc increments the counter. -func (m *Metric) Inc() { - m.mu.Lock() - defer m.mu.Unlock() - m.value = m.value.(int64) + 1 -} - -// Add adds the given value to the counter. -func (m *Metric) Add(value int64) { - m.mu.Lock() - defer m.mu.Unlock() - - switch v := m.value.(type) { + var written int + switch v := m.Value.(type) { case int64: - m.value = v + value + written, err = fmt.Fprintf(w, "%s%s %d\n", m.Name, labelStr, v) + case float64: + written, err = fmt.Fprintf(w, "%s%s %.2f\n", m.Name, labelStr, v) // Format float appropriately default: - // if the metric is not a counter (e.g., a gauge) - panic(fmt.Errorf("add called on a non-counter metric: %T", m.value)) - } -} - -// Set sets the gauge value. -func (m *Metric) Set(value float64) { - m.mu.Lock() - defer m.mu.Unlock() - m.value = value -} - -// GatherMetrics function (modified slightly) -func (r *MetricsRegistry) GatherMetrics() string { - r.mu.RLock() - defer r.mu.RUnlock() - - var output string - - for _, vec := range r.metricVecs { - output += fmt.Sprintf("# HELP %s %s\n", vec.Name, vec.Help) - output += fmt.Sprintf("# TYPE %s %s\n", vec.Name, vec.Type) - - if len(vec.labels) == 0 { // Handle metrics without labels - for _, metric := range vec.metrics { - switch v := metric.value.(type) { - case int64: - output += fmt.Sprintf("%s %d\n", vec.Name, v) // No curly braces - case float64: - output += fmt.Sprintf("%s %.2f\n", vec.Name, v) // No curly braces - default: - output += fmt.Sprintf("%s %v\n", vec.Name, v) // Fallback - } - } - } else { - // Handle metrics with labels. - for labelKey, metric := range vec.metrics { - switch v := metric.value.(type) { - case int64: - output += fmt.Sprintf("%s{%s} %d\n", vec.Name, labelKey, v) - case float64: - output += fmt.Sprintf("%s{%s} %.2f\n", vec.Name, labelKey, v) - default: - // Fallback for other types - output += fmt.Sprintf("%s{%s} %v\n", vec.Name, labelKey, v) - } - } - } + written, err = fmt.Fprintf(w, "%s%s %v\n", m.Name, labelStr, m.Value) } - return output -} - -// Example usage -func main() { - // Get the singleton registry - registry := GetRegistry() - - myCounter := registry.NewCounterVec("my_counter", "Total number of something processed.", []string{"operation", "status"}) - myGauge := registry.NewGaugeVec("my_gauge", "Current value of something.", []string{"sensor"}) - - // Goroutine to update metrics randomly - go func() { - for { - // Use like prometheus client library. - - // counter - myCounter.WithLabelValues("read", "success").Inc() - myCounter.WithLabelValues("write", "success").Add(2) - myCounter.WithLabelValues("read", "failed").Inc() - - // gauge - myGauge.WithLabelValues("temperature").Set(20.0 + rand.Float64()*10.0) - - time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second) // Random sleep between 1 and 5 seconds - } - }() - // Use Gorilla Mux router - router := mux.NewRouter() - router.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) - w.Write([]byte(registry.GatherMetrics())) - }) - - // Serve on port 2112 - fmt.Println("Serving metrics on :2112/metrics") - err := http.ListenAndServe(":2112", router) // Use the router here - if err != nil { - fmt.Println("Error starting server:", err) - } + return int64(written), err } diff --git a/internals/metrics/metrics_test.go b/internals/metrics/metrics_test.go deleted file mode 100644 index 96bd9baaa..000000000 --- a/internals/metrics/metrics_test.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (c) 2024 Canonical Ltd -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License version 3 as -// published by the Free Software Foundation. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -package metrics - -import ( - "sync" - "testing" - - . "gopkg.in/check.v1" -) - -// Hook up check.v1 into the "go test" runner -func Test(t *testing.T) { TestingT(t) } - -var _ = Suite(&RegistryTestSuite{}) - -// Test Suite structure -type RegistryTestSuite struct { - registry *MetricsRegistry -} - -func (s *RegistryTestSuite) SetUpTest(c *C) { - s.registry = &MetricsRegistry{ - metricVecs: make(map[string]*MetricVec), - } -} - -func (s *RegistryTestSuite) TestCounterWithoutLabels(c *C) { - labels := []string{} - testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", labels) - testCounter.WithLabelValues().Inc() - c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{})].value.(int64), Equals, int64(1)) - testCounter.WithLabelValues().Inc() - c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{})].value.(int64), Equals, int64(2)) -} - -func (s *RegistryTestSuite) TestCounterWithLabels(c *C) { - labels := []string{"operation", "status"} - testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", labels) - testCounter.WithLabelValues("read", "success").Inc() - c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{"read", "success"})].value.(int64), Equals, int64(1)) - testCounter.WithLabelValues("write", "fail").Add(2) - c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey(labels, []string{"write", "fail"})].value.(int64), Equals, int64(2)) -} - -func (s *RegistryTestSuite) TestGauge(c *C) { - labels := []string{"sensor"} - testGauge := s.registry.NewGaugeVec("test_gauge", "Current value of something", labels) - testGauge.WithLabelValues("temperature").Set(10.0) - c.Check(s.registry.metricVecs["test_gauge"].metrics[formatLabelKey(labels, []string{"temperature"})].value.(float64), Equals, float64(10.0)) - testGauge.WithLabelValues("temperature").Set(20.0) - c.Check(s.registry.metricVecs["test_gauge"].metrics[formatLabelKey(labels, []string{"temperature"})].value.(float64), Equals, float64(20.0)) -} - -func (s *RegistryTestSuite) TestGatherMetrics(c *C) { - testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{"operation", "status"}) - testCounter.WithLabelValues("read", "success").Inc() - testGauge := s.registry.NewGaugeVec("test_gauge", "Current value of something", []string{"sensor"}) - testGauge.WithLabelValues("temperature").Set(10.0) - metricsOutput := s.registry.GatherMetrics() - expectedOutput := "# HELP test_counter Total number of something processed\n# TYPE test_counter counter\ntest_counter{operation=read,status=success} 1\n" - expectedOutput += "# HELP test_gauge Current value of something\n# TYPE test_gauge gauge\ntest_gauge{sensor=temperature} 10.00\n" - c.Check(metricsOutput, Equals, expectedOutput) -} - -func (s *RegistryTestSuite) TestGatherMetricsWithoutLabels(c *C) { - testCounter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{}) - testCounter.WithLabelValues().Inc() - metricsOutput := s.registry.GatherMetrics() - expectedOutput := "# HELP test_counter Total number of something processed\n# TYPE test_counter counter\ntest_counter 1\n" - c.Check(metricsOutput, Equals, expectedOutput) -} - -func (s *RegistryTestSuite) TestRaceConditions(c *C) { - counter := s.registry.NewCounterVec("test_counter", "Total number of something processed", []string{}) - var wg sync.WaitGroup - for i := 0; i < 1000; i++ { - wg.Add(1) - go func() { - defer wg.Done() - counter.WithLabelValues().Inc() - }() - } - wg.Wait() - c.Check(s.registry.metricVecs["test_counter"].metrics[formatLabelKey([]string{}, []string{})].value.(int64), Equals, int64(1000)) -} diff --git a/internals/overlord/servstate/handlers.go b/internals/overlord/servstate/handlers.go index 78f86a927..e3efc5775 100644 --- a/internals/overlord/servstate/handlers.go +++ b/internals/overlord/servstate/handlers.go @@ -8,6 +8,7 @@ import ( "os/exec" "os/user" "strconv" + "sync/atomic" "syscall" "time" @@ -15,6 +16,7 @@ import ( "gopkg.in/tomb.v2" "github.com/canonical/pebble/internals/logger" + "github.com/canonical/pebble/internals/metrics" "github.com/canonical/pebble/internals/osutil" "github.com/canonical/pebble/internals/overlord/restart" "github.com/canonical/pebble/internals/overlord/state" @@ -104,6 +106,7 @@ type serviceData struct { resetTimer *time.Timer restarting bool currentSince time.Time + startCount atomic.Int64 } func (m *ServiceManager) doStart(task *state.Task, tomb *tomb.Tomb) error { @@ -455,7 +458,7 @@ func (s *serviceData) startInternal() error { // Pass buffer reference to logMgr to start log forwarding s.manager.logMgr.ServiceStarted(s.config, s.logs) - + s.IncStartCount() return nil } @@ -825,6 +828,62 @@ func (s *serviceData) checkFailed(action plan.ServiceAction) { } } +// IncStartCount increments the startCount for the service. +func (d *serviceData) IncStartCount() { + d.startCount.Add(1) +} + +// Metrics writes the service's metrics. +func (d *serviceData) Metrics(writer io.Writer) error { + labels := []string{fmt.Sprintf("service_name=%s", d.config.Name)} + + // Write HELP and TYPE comments for pebble_service_start_count + _, err := fmt.Fprintf(writer, "# HELP pebble_service_start_count Number of times the service has started\n") + if err != nil { + return err + } + _, err = fmt.Fprintf(writer, "# TYPE pebble_service_start_count counter\n") + if err != nil { + return err + } + + startCountMetric := metrics.Metric{ + Name: "pebble_service_start_count", + Value: d.startCount.Load(), + LabelPairs: labels, + } + _, err = startCountMetric.WriteTo(writer) + if err != nil { + return err + } + + // Write HELP and TYPE comments for pebble_service_active + _, err = fmt.Fprintf(writer, "# HELP pebble_service_active Indicates if the service is currently active (1) or not (0)\n") + if err != nil { + return err + } + _, err = fmt.Fprintf(writer, "# TYPE pebble_service_active gauge\n") + if err != nil { + return err + } + + active := 0 + if stateToStatus(d.state) == StatusActive { + active = 1 + } + activeMetric := metrics.Metric{ + Name: "pebble_service_active", + Value: active, + LabelPairs: labels, + } + _, err = activeMetric.WriteTo(writer) + if err != nil { + return err + } + + return nil +} + var setCmdCredential = func(cmd *exec.Cmd, credential *syscall.Credential) { cmd.SysProcAttr.Credential = credential } diff --git a/internals/overlord/servstate/manager.go b/internals/overlord/servstate/manager.go index 19290263b..e350ac002 100644 --- a/internals/overlord/servstate/manager.go +++ b/internals/overlord/servstate/manager.go @@ -334,6 +334,20 @@ func (m *ServiceManager) CheckFailed(name string) { } } +// Metrics collects and writes metrics for all services to the provided writer. +func (m *ServiceManager) Metrics(writer io.Writer) error { + m.servicesLock.Lock() + defer m.servicesLock.Unlock() + + for _, service := range m.services { + err := service.Metrics(writer) + if err != nil { + return err + } + } + return nil +} + // servicesToStop is used during service manager shutdown to cleanly terminate // all running services. Running services include both services in the // stateRunning and stateBackoff, since a service in backoff state can start From b7a442fb6e45f3756e6ea35a81034792470c6c3d Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Fri, 24 Jan 2025 18:33:56 +0800 Subject: [PATCH 19/41] chore: add metrics for checks, not done --- internals/daemon/api.go | 1 + internals/daemon/api_metrics.go | 9 ++ internals/overlord/checkstate/handlers.go | 2 + internals/overlord/checkstate/manager.go | 117 ++++++++++++++++++++-- 4 files changed, 123 insertions(+), 6 deletions(-) diff --git a/internals/daemon/api.go b/internals/daemon/api.go index d6feef22f..bfeb0565e 100644 --- a/internals/daemon/api.go +++ b/internals/daemon/api.go @@ -119,6 +119,7 @@ var ( overlordServiceManager = (*overlord.Overlord).ServiceManager overlordPlanManager = (*overlord.Overlord).PlanManager + overlordCheckManager = (*overlord.Overlord).CheckManager muxVars = mux.Vars ) diff --git a/internals/daemon/api_metrics.go b/internals/daemon/api_metrics.go index 629548066..db829e3cb 100644 --- a/internals/daemon/api_metrics.go +++ b/internals/daemon/api_metrics.go @@ -18,27 +18,36 @@ import ( "bytes" "net/http" + "github.com/canonical/pebble/internals/overlord/checkstate" "github.com/canonical/pebble/internals/overlord/servstate" ) func v1GetMetrics(c *Command, r *http.Request, _ *UserState) Response { return metricsResponse{ svcMgr: overlordServiceManager(c.d.overlord), + chkMgr: overlordCheckManager(c.d.overlord), } } // metricsResponse is a Response implementation to serve the metrics in the OpenMetrics format. type metricsResponse struct { svcMgr *servstate.ServiceManager + chkMgr *checkstate.CheckManager } func (r metricsResponse) ServeHTTP(w http.ResponseWriter, req *http.Request) { var buffer bytes.Buffer + err := r.svcMgr.Metrics(&buffer) if err != nil { w.WriteHeader(http.StatusInternalServerError) return } + err = r.chkMgr.Metrics(&buffer) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + return + } w.WriteHeader(http.StatusOK) w.Write([]byte(buffer.String())) } diff --git a/internals/overlord/checkstate/handlers.go b/internals/overlord/checkstate/handlers.go index a087d5a06..cc3e256c8 100644 --- a/internals/overlord/checkstate/handlers.go +++ b/internals/overlord/checkstate/handlers.go @@ -47,6 +47,7 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro select { case <-ticker.C: err := runCheck(tomb.Context(nil), chk, config.Timeout.Value) + m.incCheckInfoPerformCheckCount(config) if !tomb.Alive() { return checkStopped(config.Name, task.Kind(), tomb.Err()) } @@ -129,6 +130,7 @@ func (m *CheckManager) doRecoverCheck(task *state.Task, tomb *tombpkg.Tomb) erro select { case <-ticker.C: err := runCheck(tomb.Context(nil), chk, config.Timeout.Value) + m.incCheckInfoRecoverCheckCount(config) if !tomb.Alive() { return checkStopped(config.Name, task.Kind(), tomb.Err()) } diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 81eed861d..6e158cbd0 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -17,12 +17,14 @@ package checkstate import ( "context" "fmt" + "io" "reflect" "sort" "sync" "gopkg.in/tomb.v2" + "github.com/canonical/pebble/internals/metrics" "github.com/canonical/pebble/internals/overlord/state" "github.com/canonical/pebble/internals/plan" ) @@ -329,6 +331,24 @@ func (m *CheckManager) updateCheckInfo(config *plan.Check, changeID string, fail } } +func (m *CheckManager) incCheckInfoPerformCheckCount(config *plan.Check) { + m.checksLock.Lock() + defer m.checksLock.Unlock() + + info := m.checks[config.Name] + info.PerformCheckCount += 1 + m.checks[config.Name] = info +} + +func (m *CheckManager) incCheckInfoRecoverCheckCount(config *plan.Check) { + m.checksLock.Lock() + defer m.checksLock.Unlock() + + info := m.checks[config.Name] + info.RecoverCheckCount += 1 + m.checks[config.Name] = info +} + func (m *CheckManager) deleteCheckInfo(name string) { m.checksLock.Lock() defer m.checksLock.Unlock() @@ -338,12 +358,14 @@ func (m *CheckManager) deleteCheckInfo(name string) { // CheckInfo provides status information about a single check. type CheckInfo struct { - Name string - Level plan.CheckLevel - Status CheckStatus - Failures int - Threshold int - ChangeID string + Name string + Level plan.CheckLevel + Status CheckStatus + Failures int + Threshold int + ChangeID string + PerformCheckCount int64 + RecoverCheckCount int64 } type CheckStatus string @@ -356,3 +378,86 @@ const ( type checker interface { check(ctx context.Context) error } + +func (c *CheckInfo) Metrics(writer io.Writer) error { + labels := []string{fmt.Sprintf("check=%s", c.Name)} + + // Write HELP and TYPE comments for pebble_check_up + _, err := fmt.Fprintf(writer, "# HELP pebble_check_up Number of times the perform check has run\n") + if err != nil { + return err + } + _, err = fmt.Fprintf(writer, "# TYPE pebble_check_up counter\n") + if err != nil { + return err + } + checkStatus := 0 + if c.Status == CheckStatusUp { + checkStatus = 1 + } + checkUpMetric := metrics.Metric{ + Name: "pebble_check_up", + Value: checkStatus, + LabelPairs: labels, + } + _, err = checkUpMetric.WriteTo(writer) + if err != nil { + return err + } + + // Write HELP and TYPE comments for pebble_perform_check_count + _, err = fmt.Fprintf(writer, "# HELP pebble_perform_check_count Number of times the perform check has run\n") + if err != nil { + return err + } + _, err = fmt.Fprintf(writer, "# TYPE pebble_perform_check_count counter\n") + if err != nil { + return err + } + performCheckCountMetric := metrics.Metric{ + Name: "pebble_perform_check_count", + Value: c.PerformCheckCount, + LabelPairs: labels, + } + _, err = performCheckCountMetric.WriteTo(writer) + if err != nil { + return err + } + + // Write HELP and TYPE comments for pebble_recover_check_count + _, err = fmt.Fprintf(writer, "# HELP pebble_recover_check_count Number of times the perform check has run\n") + if err != nil { + return err + } + _, err = fmt.Fprintf(writer, "# TYPE pebble_recover_check_count counter\n") + if err != nil { + return err + } + recoverCheckCountMetric := metrics.Metric{ + Name: "pebble_recover_check_count", + Value: c.PerformCheckCount, + LabelPairs: labels, + } + _, err = recoverCheckCountMetric.WriteTo(writer) + if err != nil { + return err + } + return nil +} + +// Metrics collects and writes metrics for all checks to the provided writer. +func (m *CheckManager) Metrics(writer io.Writer) error { + infos, err := m.Checks() + if err != nil { + return err + } + + for _, info := range infos { + err := info.Metrics(writer) + if err != nil { + return err + } + } + + return nil +} From e49419dc01030fd2514a190680f644fbcdd8fe81 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Mon, 10 Feb 2025 17:57:32 +0800 Subject: [PATCH 20/41] chore: refactor according to review and add more unit tests --- client/identities.go | 2 +- internals/daemon/daemon.go | 4 +- internals/overlord/state/identities.go | 44 ++++++++++++--------- internals/overlord/state/identities_test.go | 19 ++++++++- 4 files changed, 46 insertions(+), 23 deletions(-) diff --git a/client/identities.go b/client/identities.go index fa439977e..2f6033330 100644 --- a/client/identities.go +++ b/client/identities.go @@ -26,7 +26,7 @@ type Identity struct { Access IdentityAccess `json:"access" yaml:"access"` // One or more of the following type-specific configuration fields must be - // non-nil (currently the only types are "local" and "basic"). + // non-nil. Local *LocalIdentity `json:"local,omitempty" yaml:"local,omitempty"` Basic *BasicIdentity `json:"basic,omitempty" yaml:"basic,omitempty"` } diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index 24bd6a0b6..6427209b6 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -218,8 +218,8 @@ func (c *Command) ServeHTTP(w http.ResponseWriter, r *http.Request) { // not good: https://github.com/canonical/pebble/pull/369 var user *UserState if _, isOpen := access.(OpenAccess); !isOpen { - basicAuthUsername, basicAuthPassword, _ := r.BasicAuth() - user, err = userFromRequest(c.d.state, r, ucred, basicAuthUsername, basicAuthPassword) + username, password, _ := r.BasicAuth() + user, err = userFromRequest(c.d.state, r, ucred, username, password) if err != nil { Forbidden("forbidden").ServeHTTP(w, r) return diff --git a/internals/overlord/state/identities.go b/internals/overlord/state/identities.go index d1e98543a..37a919952 100644 --- a/internals/overlord/state/identities.go +++ b/internals/overlord/state/identities.go @@ -31,7 +31,7 @@ type Identity struct { Access IdentityAccess // One or more of the following type-specific configuration fields must be - // non-nil (currently the only types are "local" and "basic"). + // non-nil. Local *LocalIdentity Basic *BasicIdentity } @@ -55,20 +55,29 @@ type LocalIdentity struct { // BasicIdentity holds identity configuration specific to the "basic" type // (for HTTP basic authentication). type BasicIdentity struct { - Password string // Note: In a real application, store a password hash, not the plaintext password. + Password string // Note: this is the sha512-crypt hashed password. } +// This is used to ensure we send a well-formed identity Name. +var identityNameRegexp = regexp.MustCompile(`^[a-zA-Z][a-zA-Z0-9_\-]*$`) + // validate checks that the identity is valid, returning an error if not. -func (d *Identity) validate() error { +func (d *Identity) validate(name string) error { if d == nil { return errors.New("identity must not be nil") } - if d.Name != "" { - invalidChars := regexp.MustCompile(`[^a-zA-Z0-9_\-]`) - if invalidChars.MatchString(d.Name) { - return fmt.Errorf("identity name %q contains invalid characters (only alphanumeric, underscore, and hyphen allowed)", d.Name) - } + if !identityNameRegexp.MatchString(name) { + return fmt.Errorf("identity name %q invalid: must start with an alphabetic character and only contain alphanumeric characters, underscore, and hyphen", d.Name) + } + + return d.validateExcludingName() +} + +// validateExcludingName checks that the identity is valid, returning an error if not. +func (d *Identity) validateExcludingName() error { + if d == nil { + return errors.New("identity must not be nil") } switch d.Access { @@ -86,7 +95,7 @@ func (d *Identity) validate() error { return nil case d.Basic != nil: if d.Basic.Password == "" { - return errors.New("basic identity must specify password") + return errors.New("basic identity must specify password (hashed)") } return nil @@ -121,7 +130,7 @@ func (d *Identity) MarshalJSON() ([]byte, error) { ai.Local = &apiLocalIdentity{UserID: &d.Local.UserID} } if d.Basic != nil { - ai.Basic = &apiBasicIdentity{Password: d.Basic.Password} + ai.Basic = &apiBasicIdentity{Password: "*****"} } return json.Marshal(ai) } @@ -144,12 +153,13 @@ func (d *Identity) UnmarshalJSON(data []byte) error { identity.Local = &LocalIdentity{UserID: *ai.Local.UserID} case ai.Basic != nil: if ai.Basic.Password == "" { - return errors.New("basic identity must specify password") + return errors.New("basic identity must specify password (hashed)") } identity.Basic = &BasicIdentity{Password: ai.Basic.Password} } + // Perform additional validation using the local Identity type. - err = identity.validate() + err = identity.validateExcludingName() if err != nil { return err } @@ -169,13 +179,11 @@ func (s *State) AddIdentities(identities map[string]*Identity) error { if _, ok := s.identities[name]; ok { existing = append(existing, name) } - if identity != nil { - identity.Name = name - } - err := identity.validate() + err := identity.validate(name) if err != nil { return fmt.Errorf("identity %q invalid: %w", name, err) } + identity.Name = name } if len(existing) > 0 { sort.Strings(existing) @@ -209,7 +217,7 @@ func (s *State) UpdateIdentities(identities map[string]*Identity) error { if _, ok := s.identities[name]; !ok { missing = append(missing, name) } - err := identity.validate() + err := identity.validate(name) if err != nil { return fmt.Errorf("identity %q invalid: %w", name, err) } @@ -243,7 +251,7 @@ func (s *State) ReplaceIdentities(identities map[string]*Identity) error { for name, identity := range identities { if identity != nil { - err := identity.validate() + err := identity.validate(name) if err != nil { return fmt.Errorf("identity %q invalid: %w", name, err) } diff --git a/internals/overlord/state/identities_test.go b/internals/overlord/state/identities_test.go index e28f9eba1..0ab3c33e4 100644 --- a/internals/overlord/state/identities_test.go +++ b/internals/overlord/state/identities_test.go @@ -68,7 +68,7 @@ func (s *identitiesSuite) TestMarshalAPI(c *C) { "nancy": { "access": "metrics", "basic": { - "password": "hash" + "password": "*****" } } }`[1:]) @@ -127,7 +127,7 @@ func (s *identitiesSuite) TestUnmarshalAPIErrors(c *C) { error: `local identity must specify user-id`, }, { data: `{"invalid-access": {"access": "metrics", "basic": {}}}`, - error: `basic identity must specify password`, + error: `basic identity must specify password \(hashed\)`, }, { data: `{"invalid-access": {"access": "foo", "local": {"user-id": 42}}}`, error: `invalid access value "foo", must be "admin", "read", "metrics", or "untrusted"`, @@ -647,4 +647,19 @@ func (s *identitiesSuite) TestIdentityFromInputs(c *C) { identity = st.IdentityFromInputs(nil, "nancy", "test") c.Assert(identity, NotNil) c.Check(identity.Name, Equals, "nancy") + + identity = st.IdentityFromInputs(nil, "nancy", "") + c.Assert(identity, IsNil) + + identity = st.IdentityFromInputs(nil, "", "test") + c.Assert(identity, IsNil) + + identity = st.IdentityFromInputs(nil, "nancy-wrong-username", "test") + c.Assert(identity, IsNil) + + identity = st.IdentityFromInputs(nil, "nancy", "wrong-password") + c.Assert(identity, IsNil) + + identity = st.IdentityFromInputs(nil, "nancy-wrong-username", "wrong-password") + c.Assert(identity, IsNil) } From 8ebebb80264b792cc5feb736744a128cf9aa03a9 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Tue, 11 Feb 2025 11:48:35 +0800 Subject: [PATCH 21/41] chore: refactor metrics, add open telemetry writer --- internals/daemon/api_metrics.go | 18 +++-- internals/daemon/daemon.go | 15 ---- internals/metrics/metrics.go | 76 ++++++++++++++----- internals/overlord/checkstate/handlers.go | 4 +- internals/overlord/checkstate/manager.go | 92 ++++++++--------------- internals/overlord/servstate/handlers.go | 57 ++++---------- internals/overlord/servstate/manager.go | 7 +- 7 files changed, 122 insertions(+), 147 deletions(-) diff --git a/internals/daemon/api_metrics.go b/internals/daemon/api_metrics.go index db829e3cb..fa51e475d 100644 --- a/internals/daemon/api_metrics.go +++ b/internals/daemon/api_metrics.go @@ -15,9 +15,10 @@ package daemon import ( - "bytes" "net/http" + "github.com/canonical/pebble/internals/logger" + "github.com/canonical/pebble/internals/metrics" "github.com/canonical/pebble/internals/overlord/checkstate" "github.com/canonical/pebble/internals/overlord/servstate" ) @@ -36,18 +37,19 @@ type metricsResponse struct { } func (r metricsResponse) ServeHTTP(w http.ResponseWriter, req *http.Request) { - var buffer bytes.Buffer + openTelemetryWriter := metrics.NewOpenTelemetryWriter(w) - err := r.svcMgr.Metrics(&buffer) + err := r.svcMgr.WriteMetrics(openTelemetryWriter) if err != nil { - w.WriteHeader(http.StatusInternalServerError) + logger.Noticef("Cannot write to HTTP response: %v", err.Error()) + http.Error(w, "# internal server error", http.StatusInternalServerError) return } - err = r.chkMgr.Metrics(&buffer) + + err = r.chkMgr.WriteMetrics(openTelemetryWriter) if err != nil { - w.WriteHeader(http.StatusInternalServerError) + logger.Noticef("Cannot write to HTTP response: %v", err.Error()) + http.Error(w, "# internal server error", http.StatusInternalServerError) return } - w.WriteHeader(http.StatusOK) - w.Write([]byte(buffer.String())) } diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index 5ee934f11..2b86b1569 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -374,21 +374,6 @@ func (d *Daemon) Init() error { logger.Noticef("Started daemon.") - // registry := metrics.GetRegistry() - // myCounter := registry.NewCounterVec("my_counter", "Total number of something processed.", []string{"operation", "status"}) - // myGauge := registry.NewGaugeVec("my_gauge", "Current value of something.", []string{"sensor"}) - // // Goroutine to update metrics randomly - // go func() { - // for { - // myCounter.WithLabelValues("read", "success").Inc() - // myCounter.WithLabelValues("write", "success").Add(2) - // myCounter.WithLabelValues("read", "failed").Inc() - // myGauge.WithLabelValues("temperature").Set(20.0 + rand.Float64()*10.0) - - // time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second) // Random sleep between 1 and 5 seconds - // } - // }() - return nil } diff --git a/internals/metrics/metrics.go b/internals/metrics/metrics.go index 977ec9b07..78cbe0326 100644 --- a/internals/metrics/metrics.go +++ b/internals/metrics/metrics.go @@ -17,34 +17,74 @@ package metrics import ( "fmt" "io" - "sort" "strings" ) +type MetricType int + +const ( + TypeCounterInt MetricType = iota + TypeGaugeInt +) + // Metric represents a single metric. type Metric struct { Name string - Value interface{} - LabelPairs []string + Type MetricType + ValueInt64 int64 + Comment string + Labels []Label +} + +// Label represents a label for metrics. +type Label struct { + key string + value string +} + +// NewLabel creates a new Label with key and value. +func NewLabel(key, value string) Label { + return Label{key, value} +} + +type Writer interface { + Write(Metric) error +} + +// OpenTelemetryWriter implements the Writer interface and formats metrics +// in OpenTelemetryWriter exposition format. +type OpenTelemetryWriter struct { + w io.Writer +} + +// NewOpenTelemetryWriter creates a new OpenTelemetryWriter. +func NewOpenTelemetryWriter(w io.Writer) *OpenTelemetryWriter { + return &OpenTelemetryWriter{w: w} } -// WriteTo writes the metric in OpenMetrics format. -func (m *Metric) WriteTo(w io.Writer) (n int64, err error) { - labelStr := "" - if len(m.LabelPairs) > 0 { - sort.Strings(m.LabelPairs) - labelStr = "{" + strings.Join(m.LabelPairs, ",") + "}" +func (otw *OpenTelemetryWriter) Write(m Metric) error { + var metricType string + switch m.Type { + case TypeCounterInt: + metricType = "counter" + case TypeGaugeInt: + metricType = "gauge" + } + + _, err := fmt.Fprintf(otw.w, "# HELP %s %s\n", m.Name, m.Comment) + if err != nil { + return err + } + _, err = fmt.Fprintf(otw.w, "# TYPE %s %s\n", m.Name, metricType) + if err != nil { + return err } - var written int - switch v := m.Value.(type) { - case int64: - written, err = fmt.Fprintf(w, "%s%s %d\n", m.Name, labelStr, v) - case float64: - written, err = fmt.Fprintf(w, "%s%s %.2f\n", m.Name, labelStr, v) // Format float appropriately - default: - written, err = fmt.Fprintf(w, "%s%s %v\n", m.Name, labelStr, m.Value) + labels := make([]string, len(m.Labels)) + for i, label := range m.Labels { + labels[i] = fmt.Sprintf("%s=%s", label.key, label.value) } - return int64(written), err + _, err = fmt.Fprintf(otw.w, "%s{%s} %d\n", m.Name, strings.Join(labels, ","), m.ValueInt64) + return err } diff --git a/internals/overlord/checkstate/handlers.go b/internals/overlord/checkstate/handlers.go index cc3e256c8..383bc699f 100644 --- a/internals/overlord/checkstate/handlers.go +++ b/internals/overlord/checkstate/handlers.go @@ -47,7 +47,7 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro select { case <-ticker.C: err := runCheck(tomb.Context(nil), chk, config.Timeout.Value) - m.incCheckInfoPerformCheckCount(config) + m.incPerformCheckCount(config) if !tomb.Alive() { return checkStopped(config.Name, task.Kind(), tomb.Err()) } @@ -130,7 +130,7 @@ func (m *CheckManager) doRecoverCheck(task *state.Task, tomb *tombpkg.Tomb) erro select { case <-ticker.C: err := runCheck(tomb.Context(nil), chk, config.Timeout.Value) - m.incCheckInfoRecoverCheckCount(config) + m.incRecoverCheckCount(config) if !tomb.Alive() { return checkStopped(config.Name, task.Kind(), tomb.Err()) } diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 6e158cbd0..80a96c978 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -17,7 +17,6 @@ package checkstate import ( "context" "fmt" - "io" "reflect" "sort" "sync" @@ -331,21 +330,21 @@ func (m *CheckManager) updateCheckInfo(config *plan.Check, changeID string, fail } } -func (m *CheckManager) incCheckInfoPerformCheckCount(config *plan.Check) { +func (m *CheckManager) incPerformCheckCount(config *plan.Check) { m.checksLock.Lock() defer m.checksLock.Unlock() info := m.checks[config.Name] - info.PerformCheckCount += 1 + info.performCheckCount += 1 m.checks[config.Name] = info } -func (m *CheckManager) incCheckInfoRecoverCheckCount(config *plan.Check) { +func (m *CheckManager) incRecoverCheckCount(config *plan.Check) { m.checksLock.Lock() defer m.checksLock.Unlock() info := m.checks[config.Name] - info.RecoverCheckCount += 1 + info.recoverCheckCount += 1 m.checks[config.Name] = info } @@ -364,8 +363,8 @@ type CheckInfo struct { Failures int Threshold int ChangeID string - PerformCheckCount int64 - RecoverCheckCount int64 + performCheckCount int64 + recoverCheckCount int64 } type CheckStatus string @@ -379,81 +378,54 @@ type checker interface { check(ctx context.Context) error } -func (c *CheckInfo) Metrics(writer io.Writer) error { - labels := []string{fmt.Sprintf("check=%s", c.Name)} - - // Write HELP and TYPE comments for pebble_check_up - _, err := fmt.Fprintf(writer, "# HELP pebble_check_up Number of times the perform check has run\n") - if err != nil { - return err - } - _, err = fmt.Fprintf(writer, "# TYPE pebble_check_up counter\n") - if err != nil { - return err - } +func (c *CheckInfo) writeMetrics(writer metrics.Writer) error { checkStatus := 0 if c.Status == CheckStatusUp { checkStatus = 1 } - checkUpMetric := metrics.Metric{ + err := writer.Write(metrics.Metric{ Name: "pebble_check_up", - Value: checkStatus, - LabelPairs: labels, - } - _, err = checkUpMetric.WriteTo(writer) + Type: metrics.TypeGaugeInt, + ValueInt64: int64(checkStatus), + Comment: "Whether the health check is up (1) or not (0)", + Labels: []metrics.Label{metrics.NewLabel("check", c.Name)}, + }) if err != nil { return err } - // Write HELP and TYPE comments for pebble_perform_check_count - _, err = fmt.Fprintf(writer, "# HELP pebble_perform_check_count Number of times the perform check has run\n") - if err != nil { - return err - } - _, err = fmt.Fprintf(writer, "# TYPE pebble_perform_check_count counter\n") - if err != nil { - return err - } - performCheckCountMetric := metrics.Metric{ + err = writer.Write(metrics.Metric{ Name: "pebble_perform_check_count", - Value: c.PerformCheckCount, - LabelPairs: labels, - } - _, err = performCheckCountMetric.WriteTo(writer) + Type: metrics.TypeCounterInt, + ValueInt64: c.performCheckCount, + Comment: "Number of times the perform-check has run", + Labels: []metrics.Label{metrics.NewLabel("check", c.Name)}, + }) if err != nil { return err } - // Write HELP and TYPE comments for pebble_recover_check_count - _, err = fmt.Fprintf(writer, "# HELP pebble_recover_check_count Number of times the perform check has run\n") - if err != nil { - return err - } - _, err = fmt.Fprintf(writer, "# TYPE pebble_recover_check_count counter\n") - if err != nil { - return err - } - recoverCheckCountMetric := metrics.Metric{ + err = writer.Write(metrics.Metric{ Name: "pebble_recover_check_count", - Value: c.PerformCheckCount, - LabelPairs: labels, - } - _, err = recoverCheckCountMetric.WriteTo(writer) + Type: metrics.TypeCounterInt, + ValueInt64: c.recoverCheckCount, + Comment: "Number of times the recover-check has run", + Labels: []metrics.Label{metrics.NewLabel("check", c.Name)}, + }) if err != nil { return err } + return nil } -// Metrics collects and writes metrics for all checks to the provided writer. -func (m *CheckManager) Metrics(writer io.Writer) error { - infos, err := m.Checks() - if err != nil { - return err - } +// WriteMetrics collects and writes metrics for all checks to the provided writer. +func (m *CheckManager) WriteMetrics(writer metrics.Writer) error { + m.checksLock.Lock() + defer m.checksLock.Unlock() - for _, info := range infos { - err := info.Metrics(writer) + for _, info := range m.checks { + err := info.writeMetrics(writer) if err != nil { return err } diff --git a/internals/overlord/servstate/handlers.go b/internals/overlord/servstate/handlers.go index e3efc5775..b2d4a9186 100644 --- a/internals/overlord/servstate/handlers.go +++ b/internals/overlord/servstate/handlers.go @@ -290,7 +290,7 @@ func (s *serviceData) transition(state serviceState) { // transitionRestarting changes the service's state and also sets the restarting flag. func (s *serviceData) transitionRestarting(state serviceState, restarting bool) { - // Update current-since time if derived status is changing. + // Update current-s_e time if derived status is changing. oldStatus := stateToStatus(s.state) newStatus := stateToStatus(state) if oldStatus != newStatus { @@ -458,7 +458,7 @@ func (s *serviceData) startInternal() error { // Pass buffer reference to logMgr to start log forwarding s.manager.logMgr.ServiceStarted(s.config, s.logs) - s.IncStartCount() + s.startCount.Add(1) return nil } @@ -828,41 +828,15 @@ func (s *serviceData) checkFailed(action plan.ServiceAction) { } } -// IncStartCount increments the startCount for the service. -func (d *serviceData) IncStartCount() { - d.startCount.Add(1) -} - -// Metrics writes the service's metrics. -func (d *serviceData) Metrics(writer io.Writer) error { - labels := []string{fmt.Sprintf("service_name=%s", d.config.Name)} - - // Write HELP and TYPE comments for pebble_service_start_count - _, err := fmt.Fprintf(writer, "# HELP pebble_service_start_count Number of times the service has started\n") - if err != nil { - return err - } - _, err = fmt.Fprintf(writer, "# TYPE pebble_service_start_count counter\n") - if err != nil { - return err - } - - startCountMetric := metrics.Metric{ +// writeMetrics writes the service's metrics. +func (d *serviceData) writeMetrics(writer metrics.Writer) error { + err := writer.Write(metrics.Metric{ Name: "pebble_service_start_count", - Value: d.startCount.Load(), - LabelPairs: labels, - } - _, err = startCountMetric.WriteTo(writer) - if err != nil { - return err - } - - // Write HELP and TYPE comments for pebble_service_active - _, err = fmt.Fprintf(writer, "# HELP pebble_service_active Indicates if the service is currently active (1) or not (0)\n") - if err != nil { - return err - } - _, err = fmt.Fprintf(writer, "# TYPE pebble_service_active gauge\n") + Type: metrics.TypeCounterInt, + ValueInt64: d.startCount.Load(), + Comment: "Number of times the service has started", + Labels: []metrics.Label{metrics.NewLabel("service", d.config.Name)}, + }) if err != nil { return err } @@ -871,12 +845,13 @@ func (d *serviceData) Metrics(writer io.Writer) error { if stateToStatus(d.state) == StatusActive { active = 1 } - activeMetric := metrics.Metric{ + err = writer.Write(metrics.Metric{ Name: "pebble_service_active", - Value: active, - LabelPairs: labels, - } - _, err = activeMetric.WriteTo(writer) + Type: metrics.TypeGaugeInt, + ValueInt64: int64(active), + Comment: "Indicates if the service is currently active (1) or not (0)", + Labels: []metrics.Label{metrics.NewLabel("service", d.config.Name)}, + }) if err != nil { return err } diff --git a/internals/overlord/servstate/manager.go b/internals/overlord/servstate/manager.go index e350ac002..fb4c1347f 100644 --- a/internals/overlord/servstate/manager.go +++ b/internals/overlord/servstate/manager.go @@ -9,6 +9,7 @@ import ( "sync" "time" + "github.com/canonical/pebble/internals/metrics" "github.com/canonical/pebble/internals/overlord/restart" "github.com/canonical/pebble/internals/overlord/state" "github.com/canonical/pebble/internals/plan" @@ -334,13 +335,13 @@ func (m *ServiceManager) CheckFailed(name string) { } } -// Metrics collects and writes metrics for all services to the provided writer. -func (m *ServiceManager) Metrics(writer io.Writer) error { +// WriteMetrics collects and writes metrics for all services to the provided writer. +func (m *ServiceManager) WriteMetrics(writer metrics.Writer) error { m.servicesLock.Lock() defer m.servicesLock.Unlock() for _, service := range m.services { - err := service.Metrics(writer) + err := service.writeMetrics(writer) if err != nil { return err } From a1db1a6aa35df8b3d67dd76988a947cf44ca4d3a Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Wed, 12 Feb 2025 07:02:47 +0800 Subject: [PATCH 22/41] chore: refactor according to review, fix check counter reset issue --- internals/overlord/checkstate/handlers.go | 2 +- internals/overlord/checkstate/manager.go | 54 ++++++++++++------- internals/overlord/checkstate/manager_test.go | 1 - 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/internals/overlord/checkstate/handlers.go b/internals/overlord/checkstate/handlers.go index 383bc699f..3c4b9a5a3 100644 --- a/internals/overlord/checkstate/handlers.go +++ b/internals/overlord/checkstate/handlers.go @@ -47,7 +47,6 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro select { case <-ticker.C: err := runCheck(tomb.Context(nil), chk, config.Timeout.Value) - m.incPerformCheckCount(config) if !tomb.Alive() { return checkStopped(config.Name, task.Kind(), tomb.Err()) } @@ -92,6 +91,7 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro task.Set(checkDetailsAttr, &details) m.state.Unlock() } + m.incPerformCheckCount(config) case <-tomb.Dying(): return checkStopped(config.Name, task.Kind(), tomb.Err()) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 0da867381..648170626 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -47,7 +47,7 @@ type CheckManager struct { failureHandlers []FailureFunc checksLock sync.Mutex - checks map[string]CheckInfo + checks map[string]*CheckInfo } // FailureFunc is the type of function called when a failure action is triggered. @@ -57,7 +57,7 @@ type FailureFunc func(name string) func NewManager(s *state.State, runner *state.TaskRunner, planMgr *planstate.PlanManager) *CheckManager { manager := &CheckManager{ state: s, - checks: make(map[string]CheckInfo), + checks: make(map[string]*CheckInfo), planMgr: planMgr, } @@ -324,8 +324,17 @@ func (m *CheckManager) Checks() ([]*CheckInfo, error) { infos := make([]*CheckInfo, 0, len(m.checks)) for _, info := range m.checks { - info := info // take the address of a new variable each time - infos = append(infos, &info) + copied := &CheckInfo{ + Name: info.Name, + Level: info.Level, + Startup: info.Startup, + Status: info.Status, + Failures: info.Failures, + Threshold: info.Threshold, + ChangeID: info.ChangeID, + } + infos = append(infos, copied) + } sort.Slice(infos, func(i, j int) bool { return infos[i].Name < infos[j].Name @@ -347,15 +356,24 @@ func (m *CheckManager) updateCheckInfo(config *plan.Check, changeID string, fail if startup == plan.CheckStartupUnknown { startup = plan.CheckStartupEnabled } - m.checks[config.Name] = CheckInfo{ - Name: config.Name, - Level: config.Level, - Startup: startup, - Status: status, - Failures: failures, - Threshold: config.Threshold, - ChangeID: changeID, + var performCheckCount int64 + var recoverCheckCount int64 + if check, ok := m.checks[config.Name]; ok { + performCheckCount = check.PerformCheckCount + recoverCheckCount = check.RecoverCheckCount + } + checkInfo := &CheckInfo{ + Name: config.Name, + Level: config.Level, + Startup: startup, + Status: status, + Failures: failures, + Threshold: config.Threshold, + ChangeID: changeID, + PerformCheckCount: performCheckCount, + RecoverCheckCount: recoverCheckCount, } + m.checks[config.Name] = checkInfo } func (m *CheckManager) incPerformCheckCount(config *plan.Check) { @@ -363,7 +381,7 @@ func (m *CheckManager) incPerformCheckCount(config *plan.Check) { defer m.checksLock.Unlock() info := m.checks[config.Name] - info.performCheckCount += 1 + info.PerformCheckCount += 1 m.checks[config.Name] = info } @@ -372,7 +390,7 @@ func (m *CheckManager) incRecoverCheckCount(config *plan.Check) { defer m.checksLock.Unlock() info := m.checks[config.Name] - info.recoverCheckCount += 1 + info.RecoverCheckCount += 1 m.checks[config.Name] = info } @@ -392,8 +410,8 @@ type CheckInfo struct { Failures int Threshold int ChangeID string - performCheckCount int64 - recoverCheckCount int64 + PerformCheckCount int64 + RecoverCheckCount int64 } type CheckStatus string @@ -427,7 +445,7 @@ func (c *CheckInfo) writeMetrics(writer metrics.Writer) error { err = writer.Write(metrics.Metric{ Name: "pebble_perform_check_count", Type: metrics.TypeCounterInt, - ValueInt64: c.performCheckCount, + ValueInt64: c.PerformCheckCount, Comment: "Number of times the perform-check has run", Labels: []metrics.Label{metrics.NewLabel("check", c.Name)}, }) @@ -438,7 +456,7 @@ func (c *CheckInfo) writeMetrics(writer metrics.Writer) error { err = writer.Write(metrics.Metric{ Name: "pebble_recover_check_count", Type: metrics.TypeCounterInt, - ValueInt64: c.recoverCheckCount, + ValueInt64: c.RecoverCheckCount, Comment: "Number of times the recover-check has run", Labels: []metrics.Label{metrics.NewLabel("check", c.Name)}, }) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index 9c50926d4..c59445651 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -451,7 +451,6 @@ func (s *ManagerSuite) TestPlanChangedServiceContext(c *C) { }, } s.manager.PlanChanged(origPlan) - waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, {Name: "chk2", Startup: "enabled", Status: "up", Threshold: 3}, From c527344b13cd3995b0ca1f24973872c27d760b29 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Wed, 12 Feb 2025 09:45:35 +0800 Subject: [PATCH 23/41] chore: add a test for check metrics --- internals/overlord/checkstate/manager.go | 51 ++++++++++--------- internals/overlord/checkstate/manager_test.go | 51 +++++++++++++++++++ 2 files changed, 79 insertions(+), 23 deletions(-) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 11dc94528..beb78e767 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -325,13 +325,15 @@ func (m *CheckManager) Checks() ([]*CheckInfo, error) { infos := make([]*CheckInfo, 0, len(m.checks)) for _, info := range m.checks { copied := &CheckInfo{ - Name: info.Name, - Level: info.Level, - Startup: info.Startup, - Status: info.Status, - Failures: info.Failures, - Threshold: info.Threshold, - ChangeID: info.ChangeID, + Name: info.Name, + Level: info.Level, + Startup: info.Startup, + Status: info.Status, + Failures: info.Failures, + Threshold: info.Threshold, + ChangeID: info.ChangeID, + PerformCheckCount: info.PerformCheckCount, + RecoverCheckCount: info.RecoverCheckCount, } infos = append(infos, copied) @@ -356,24 +358,27 @@ func (m *CheckManager) updateCheckInfo(config *plan.Check, changeID string, fail if startup == plan.CheckStartupUnknown { startup = plan.CheckStartupEnabled } - var performCheckCount int64 - var recoverCheckCount int64 + if check, ok := m.checks[config.Name]; ok { - performCheckCount = check.PerformCheckCount - recoverCheckCount = check.RecoverCheckCount - } - checkInfo := &CheckInfo{ - Name: config.Name, - Level: config.Level, - Startup: startup, - Status: status, - Failures: failures, - Threshold: config.Threshold, - ChangeID: changeID, - PerformCheckCount: performCheckCount, - RecoverCheckCount: recoverCheckCount, + check.Level = config.Level + check.Startup = startup + check.Status = status + check.Failures = failures + check.Threshold = config.Threshold + check.ChangeID = changeID + } else { + m.checks[config.Name] = &CheckInfo{ + Name: config.Name, + Level: config.Level, + Startup: startup, + Status: status, + Failures: failures, + Threshold: config.Threshold, + ChangeID: changeID, + PerformCheckCount: 0, + RecoverCheckCount: 0, + } } - m.checks[config.Name] = checkInfo } func (m *CheckManager) incPerformCheckCount(config *plan.Check) { diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index c59445651..08773061d 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -868,3 +868,54 @@ func (s *ManagerSuite) TestReplan(c *C) { c.Assert(status, Matches, "Do.*") c.Assert(change.Kind(), Equals, "perform-check") } + +func (s *ManagerSuite) TestMetrics(c *C) { + origLayer := &plan.Layer{ + Checks: map[string]*plan.Check{ + "chk1": { + Name: "chk1", + Override: "replace", + Period: plan.OptionalDuration{Value: 10 * time.Millisecond}, + Timeout: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk1"}, + Startup: plan.CheckStartupDisabled, + }, + }, + } + err := s.planMgr.AppendLayer(origLayer, false) + c.Assert(err, IsNil) + + // Ensure and wait for the counter to increase. + s.manager.PlanChanged(&plan.Plan{ + Checks: map[string]*plan.Check{ + "chk1": { + Name: "chk1", + Override: "replace", + Period: plan.OptionalDuration{Value: 10 * time.Millisecond}, + Timeout: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk1"}, + Startup: plan.CheckStartupEnabled, + }, + }, + }) + waitChecks(c, s.manager, []*checkstate.CheckInfo{ + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3, PerformCheckCount: 2, RecoverCheckCount: 0}, + }) + checks, err := s.manager.Checks() + c.Assert(err, IsNil) + c.Assert(checks[0].PerformCheckCount, Equals, int64(2)) + + // Test updateCheckInfo (called by StopChecks) doesn't reset metrics. + changed, err := s.manager.StopChecks([]string{"chk1"}) + waitChecks(c, s.manager, []*checkstate.CheckInfo{ + {Name: "chk1", Startup: "disabled", Status: "inactive", Threshold: 3, PerformCheckCount: 2, RecoverCheckCount: 0}, + }) + c.Assert(err, IsNil) + c.Assert(changed, DeepEquals, []string{"chk1"}) + checks, err = s.manager.Checks() + c.Assert(err, IsNil) + c.Assert(checks[0].PerformCheckCount, Equals, int64(2)) + c.Assert(checks[0].RecoverCheckCount, Equals, int64(0)) +} From 54762999bdd3b264b86b4139e046fc8668f439d2 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Wed, 12 Feb 2025 19:54:10 +0800 Subject: [PATCH 24/41] test: add tests for open telemetry writer --- internals/metrics/metrics_test.go | 107 ++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 internals/metrics/metrics_test.go diff --git a/internals/metrics/metrics_test.go b/internals/metrics/metrics_test.go new file mode 100644 index 000000000..4afefc58d --- /dev/null +++ b/internals/metrics/metrics_test.go @@ -0,0 +1,107 @@ +// Copyright (c) 2025 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package metrics_test + +import ( + "bytes" + "testing" + + "github.com/canonical/pebble/internals/metrics" +) + +func TestOpenTelemetryWriter(t *testing.T) { + testCases := []struct { + name string + metric metrics.Metric + expected string + }{ + { + name: "CounterInt", + metric: metrics.Metric{ + Name: "my_counter", + Type: metrics.TypeCounterInt, + ValueInt64: 42, + Comment: "A simple counter", + Labels: []metrics.Label{ + metrics.NewLabel("key1", "value1"), + metrics.NewLabel("key2", "value2"), + }, + }, + expected: `# HELP my_counter A simple counter +# TYPE my_counter counter +my_counter{key1=value1,key2=value2} 42 +`, + }, + { + name: "GaugeInt", + metric: metrics.Metric{ + Name: "my_gauge", + Type: metrics.TypeGaugeInt, + ValueInt64: 1, + Comment: "A simple gauge", + Labels: []metrics.Label{}, // Test with no labels + }, + expected: `# HELP my_gauge A simple gauge +# TYPE my_gauge gauge +my_gauge{} 1 +`, + }, + { + name: "NoComment", // Test without comment + metric: metrics.Metric{ + Name: "no_comment_metric", + Type: metrics.TypeCounterInt, + ValueInt64: 42, + Labels: []metrics.Label{metrics.NewLabel("env", "prod")}, + }, + expected: `# HELP no_comment_metric +# TYPE no_comment_metric counter +no_comment_metric{env=prod} 42 +`, + }, + + { + name: "SpecialCharacters", // Test with special characters in labels + metric: metrics.Metric{ + Name: "special_chars", + Type: metrics.TypeGaugeInt, + ValueInt64: 42, + Comment: "Metric with special characters", + Labels: []metrics.Label{ + metrics.NewLabel("key_with_underscore", "value_with_underscore"), + metrics.NewLabel("key-with-dash", "value-with-dash"), + }, + }, + expected: `# HELP special_chars Metric with special characters +# TYPE special_chars gauge +special_chars{key_with_underscore=value_with_underscore,key-with-dash=value-with-dash} 42 +`, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + buf := &bytes.Buffer{} + writer := metrics.NewOpenTelemetryWriter(buf) + err := writer.Write(tc.metric) + if err != nil { + t.Fatalf("Write failed: %v", err) + } + if buf.String() != tc.expected { + t.Errorf("Output mismatch:\nExpected:\n%s\nGot:\n%s", tc.expected, buf.String()) + } + }) + } +} From 9e5b65a5c7d493f15779e14a20637efa4e42c9be Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Wed, 12 Feb 2025 19:54:42 +0800 Subject: [PATCH 25/41] test: service metrics --- internals/overlord/servstate/manager_test.go | 47 ++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/internals/overlord/servstate/manager_test.go b/internals/overlord/servstate/manager_test.go index d5f38d90a..b325a4411 100644 --- a/internals/overlord/servstate/manager_test.go +++ b/internals/overlord/servstate/manager_test.go @@ -35,6 +35,7 @@ import ( . "gopkg.in/check.v1" "github.com/canonical/pebble/internals/logger" + "github.com/canonical/pebble/internals/metrics" "github.com/canonical/pebble/internals/overlord/checkstate" "github.com/canonical/pebble/internals/overlord/restart" "github.com/canonical/pebble/internals/overlord/servstate" @@ -2093,3 +2094,49 @@ func waitChecks(c *C, checkMgr *checkstate.CheckManager, f func(checks []*checks c.Fatal("timed out waiting for checks to settle") return nil } + +func (s *S) TestMetrics(c *C) { + s.newServiceManager(c) + s.planAddLayer(c, testPlanLayer) + s.planChanged(c) + + s.startTestServices(c, true) + if c.Failed() { + return + } + buf := new(bytes.Buffer) + openTelemetryWriter := metrics.NewOpenTelemetryWriter(buf) + s.manager.WriteMetrics(openTelemetryWriter) + metrics := buf.String() + c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test1} 1") + c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test1} 1") + c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test2} 1") + c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test2} 1") + + s.stopTestServices(c) + s.manager.WriteMetrics(openTelemetryWriter) + metrics = buf.String() + c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test1} 1") + c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test1} 0") + c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test2} 1") + c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test2} 0") + + s.startTestServices(c, true) + if c.Failed() { + return + } + s.manager.WriteMetrics(openTelemetryWriter) + metrics = buf.String() + c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test1} 2") + c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test1} 1") + c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test2} 2") + c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test2} 1") + + s.stopTestServices(c) + s.manager.WriteMetrics(openTelemetryWriter) + metrics = buf.String() + c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test1} 2") + c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test1} 0") + c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test2} 2") + c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test2} 0") +} From d678766e60547401e1acc70590066a506b09b8e5 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Wed, 12 Feb 2025 20:16:46 +0800 Subject: [PATCH 26/41] chore: update tests --- internals/daemon/api_metrics.go | 2 +- internals/metrics/metrics_test.go | 25 ++++++++++++------------ internals/overlord/checkstate/manager.go | 8 ++------ 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/internals/daemon/api_metrics.go b/internals/daemon/api_metrics.go index fa51e475d..7fd35b9d1 100644 --- a/internals/daemon/api_metrics.go +++ b/internals/daemon/api_metrics.go @@ -1,4 +1,4 @@ -// Copyright (c) 2024 Canonical Ltd +// Copyright (c) 2025 Canonical Ltd // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License version 3 as diff --git a/internals/metrics/metrics_test.go b/internals/metrics/metrics_test.go index 4afefc58d..932432d01 100644 --- a/internals/metrics/metrics_test.go +++ b/internals/metrics/metrics_test.go @@ -16,12 +16,17 @@ package metrics_test import ( "bytes" - "testing" "github.com/canonical/pebble/internals/metrics" + + . "gopkg.in/check.v1" ) -func TestOpenTelemetryWriter(t *testing.T) { +type OpenTelemetryWriterSuite struct{} + +var _ = Suite(&OpenTelemetryWriterSuite{}) + +func (s *OpenTelemetryWriterSuite) TestOpenTelemetryWriter(c *C) { testCases := []struct { name string metric metrics.Metric @@ -92,16 +97,10 @@ special_chars{key_with_underscore=value_with_underscore,key-with-dash=value-with } for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - buf := &bytes.Buffer{} - writer := metrics.NewOpenTelemetryWriter(buf) - err := writer.Write(tc.metric) - if err != nil { - t.Fatalf("Write failed: %v", err) - } - if buf.String() != tc.expected { - t.Errorf("Output mismatch:\nExpected:\n%s\nGot:\n%s", tc.expected, buf.String()) - } - }) + buf := &bytes.Buffer{} + writer := metrics.NewOpenTelemetryWriter(buf) + err := writer.Write(tc.metric) + c.Assert(err, IsNil) + c.Assert(buf.String(), Equals, tc.expected) } } diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index beb78e767..0fd177653 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -385,18 +385,14 @@ func (m *CheckManager) incPerformCheckCount(config *plan.Check) { m.checksLock.Lock() defer m.checksLock.Unlock() - info := m.checks[config.Name] - info.PerformCheckCount += 1 - m.checks[config.Name] = info + m.checks[config.Name].PerformCheckCount += 1 } func (m *CheckManager) incRecoverCheckCount(config *plan.Check) { m.checksLock.Lock() defer m.checksLock.Unlock() - info := m.checks[config.Name] - info.RecoverCheckCount += 1 - m.checks[config.Name] = info + m.checks[config.Name].RecoverCheckCount += 1 } func (m *CheckManager) deleteCheckInfo(name string) { From c99fa5359aa51d06ef938d1bd5b8be0156a498f9 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Wed, 12 Feb 2025 20:23:23 +0800 Subject: [PATCH 27/41] chore: fix linting --- internals/metrics/metrics_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internals/metrics/metrics_test.go b/internals/metrics/metrics_test.go index 932432d01..558156755 100644 --- a/internals/metrics/metrics_test.go +++ b/internals/metrics/metrics_test.go @@ -17,9 +17,9 @@ package metrics_test import ( "bytes" - "github.com/canonical/pebble/internals/metrics" - . "gopkg.in/check.v1" + + "github.com/canonical/pebble/internals/metrics" ) type OpenTelemetryWriterSuite struct{} From c8f3aba74e3c061097e28b600beee904bafcdce1 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Thu, 13 Feb 2025 10:48:15 +0800 Subject: [PATCH 28/41] chore: prioritize basic type identity, add tests --- internals/overlord/state/identities.go | 66 +++++++++++++++------ internals/overlord/state/identities_test.go | 30 ++++++++++ 2 files changed, 77 insertions(+), 19 deletions(-) diff --git a/internals/overlord/state/identities.go b/internals/overlord/state/identities.go index 37a919952..de2454823 100644 --- a/internals/overlord/state/identities.go +++ b/internals/overlord/state/identities.go @@ -82,6 +82,10 @@ func (d *Identity) validateExcludingName() error { switch d.Access { case AdminAccess, ReadAccess, MetricsAccess, UntrustedAccess: + // Check basic type access level. + if d.Basic != nil && d.Access != MetricsAccess { + return fmt.Errorf("basic identity can only have %q access, got %q", MetricsAccess, d.Access) + } case "": return fmt.Errorf("access value must be specified (%q, %q, %q, or %q)", AdminAccess, ReadAccess, MetricsAccess, UntrustedAccess) @@ -90,18 +94,21 @@ func (d *Identity) validateExcludingName() error { d.Access, AdminAccess, ReadAccess, MetricsAccess, UntrustedAccess) } - switch { - case d.Local != nil: - return nil - case d.Basic != nil: + gotType := false + if d.Local != nil { + gotType = true + } + if d.Basic != nil { if d.Basic.Password == "" { return errors.New("basic identity must specify password (hashed)") } - - return nil - default: + gotType = true + } + if !gotType { return errors.New(`identity must have at least one type ("local" or "basic")`) } + + return nil } // apiIdentity exists so the default JSON marshalling of an Identity (used @@ -145,13 +152,14 @@ func (d *Identity) UnmarshalJSON(data []byte) error { identity := Identity{ Access: IdentityAccess(ai.Access), } - switch { - case ai.Local != nil: + + if ai.Local != nil { if ai.Local.UserID == nil { return errors.New("local identity must specify user-id") } identity.Local = &LocalIdentity{UserID: *ai.Local.UserID} - case ai.Basic != nil: + } + if ai.Basic != nil { if ai.Basic.Password == "" { return errors.New("basic identity must specify password (hashed)") } @@ -316,23 +324,43 @@ func (s *State) Identities() map[string]*Identity { // IdentityFromInputs returns an identity with the given inputs, or nil // if there is none. +// +// Identity priority: +// 1. If both username and password are provided, the function attempts to +// match a basic type identity. The userID is ignored in this case. If +// a matching username is found but the password verification fails, nil +// is returned immediately. +// 2. If username and password are not both provided, the function attempts to +// match a local type identity using the userID. +// +// If no matching identity is found for the given inputs, nil is returned. func (s *State) IdentityFromInputs(userID *uint32, username, password string) *Identity { s.reading() - for _, identity := range s.identities { - switch { - case identity.Local != nil && userID != nil: - if identity.Local.UserID == *userID { - return identity + switch { + case username != "" || password != "": + // Prioritize username/password if provided, because they come from HTTP + // Authorization header, a per-request, client controlled property. If set + // by the client, it's intentional, so it should have a higher priority. + for _, identity := range s.identities { + if identity.Basic != nil && identity.Name == username { + crypt := sha512_crypt.New() + err := crypt.Verify(identity.Basic.Password, []byte(password)) + if err == nil { + return identity + } else { + return nil + } } - case identity.Basic != nil && username != "" && identity.Name == username && password != "": - crypt := sha512_crypt.New() - err := crypt.Verify(identity.Basic.Password, []byte(password)) - if err == nil { + } + case userID != nil: + for _, identity := range s.identities { + if identity.Local != nil && identity.Local.UserID == *userID { return identity } } } + return nil } diff --git a/internals/overlord/state/identities_test.go b/internals/overlord/state/identities_test.go index 0ab3c33e4..9fe41ae68 100644 --- a/internals/overlord/state/identities_test.go +++ b/internals/overlord/state/identities_test.go @@ -134,6 +134,9 @@ func (s *identitiesSuite) TestUnmarshalAPIErrors(c *C) { }, { data: `{"invalid-access": {"local": {"user-id": 42}}}`, error: `access value must be specified \("admin", "read", "metrics", or "untrusted"\)`, + }, { + data: `{"invalid-access": {"access": "admin", "basic": {"password": "hash"}}}`, + error: `basic identity can only have \"metrics\" access, got \"admin\"`, }} for _, test := range tests { c.Logf("Input data: %s", test.data) @@ -662,4 +665,31 @@ func (s *identitiesSuite) TestIdentityFromInputs(c *C) { identity = st.IdentityFromInputs(nil, "nancy-wrong-username", "wrong-password") c.Assert(identity, IsNil) + + // userID, username, password all provided, two matching identities, prioritize basic type. + userID = 42 + identity = st.IdentityFromInputs(&userID, "nancy", "test") + c.Assert(identity, NotNil) + c.Check(identity.Name, Equals, "nancy") + + // userID, username, password all provided, invalid username/password, userID ignored. + userID = 42 + identity = st.IdentityFromInputs(&userID, "nancy", "") + c.Assert(identity, IsNil) + + userID = 42 + identity = st.IdentityFromInputs(&userID, "", "test") + c.Assert(identity, IsNil) + + userID = 42 + identity = st.IdentityFromInputs(&userID, "nancy-wrong-username", "test") + c.Assert(identity, IsNil) + + userID = 42 + identity = st.IdentityFromInputs(&userID, "nancy", "wrong-password") + c.Assert(identity, IsNil) + + userID = 42 + identity = st.IdentityFromInputs(&userID, "nancy-wrong-username", "wrong-password") + c.Assert(identity, IsNil) } From af5e0b22e530018423b216618a0a9076533c7c80 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Thu, 13 Feb 2025 11:03:13 +0800 Subject: [PATCH 29/41] test: add more tests --- internals/overlord/state/identities_test.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/internals/overlord/state/identities_test.go b/internals/overlord/state/identities_test.go index 9fe41ae68..50bcb1a32 100644 --- a/internals/overlord/state/identities_test.go +++ b/internals/overlord/state/identities_test.go @@ -300,6 +300,15 @@ func (s *identitiesSuite) TestAddIdentities(c *C) { }) c.Assert(err, ErrorMatches, `identity "bill" invalid: invalid access value "bar", must be "admin", "read", "metrics", or "untrusted"`) + // Basic type identity only allow metrics access. + err = st.AddIdentities(map[string]*state.Identity{ + "bill": { + Access: "admin", + Basic: &state.BasicIdentity{Password: "hash"}, + }, + }) + c.Assert(err, ErrorMatches, `identity "bill" invalid: basic identity can only have "metrics" access, got "admin"`) + // Must have at least one type. err = st.AddIdentities(map[string]*state.Identity{ "bill": { @@ -308,6 +317,16 @@ func (s *identitiesSuite) TestAddIdentities(c *C) { }) c.Assert(err, ErrorMatches, `identity "bill" invalid: identity must have at least one type \(\"local\" or \"basic\"\)`) + // May have two types. + err = st.AddIdentities(map[string]*state.Identity{ + "peter": { + Access: state.MetricsAccess, + Basic: &state.BasicIdentity{Password: "hash"}, + Local: &state.LocalIdentity{UserID: 1001}, + }, + }) + c.Assert(err, IsNil) + // Ensure user IDs are unique with existing users. err = st.AddIdentities(map[string]*state.Identity{ "bill": { From edccbacce352b819116c283e07fe7d48ba26ae31 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Thu, 13 Feb 2025 11:06:04 +0800 Subject: [PATCH 30/41] chore: prioritize basic type --- internals/daemon/daemon.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index 6427209b6..5bc441ad1 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -161,11 +161,11 @@ func userFromRequest(st *state.State, r *http.Request, ucred *Ucrednet, username // No identity that matches these inputs (for now, just UID). return nil, nil } - - if identity.Local != nil { - return &UserState{Access: identity.Access, UID: userID}, nil - } else if identity.Basic != nil { + if identity.Basic != nil { + // Prioritize basic type and ignore UID in this case. return &UserState{Access: identity.Access}, nil + } else if identity.Local != nil { + return &UserState{Access: identity.Access, UID: userID}, nil } return nil, nil } From 83c1717e9f1f077637c76dac8bfef284e3714082 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Fri, 14 Feb 2025 11:31:22 +0800 Subject: [PATCH 31/41] chore: fix some of the comments according to review --- internals/metrics/metrics.go | 40 +++++--- internals/metrics/metrics_test.go | 32 ++++--- internals/overlord/checkstate/manager.go | 59 +++++------- internals/overlord/servstate/handlers.go | 4 +- internals/overlord/servstate/manager.go | 9 +- internals/overlord/servstate/manager_test.go | 99 ++++++++++++++------ 6 files changed, 149 insertions(+), 94 deletions(-) diff --git a/internals/metrics/metrics.go b/internals/metrics/metrics.go index 78cbe0326..e72821bbc 100644 --- a/internals/metrics/metrics.go +++ b/internals/metrics/metrics.go @@ -27,6 +27,17 @@ const ( TypeGaugeInt ) +func (mt MetricType) String() string { + switch mt { + case TypeCounterInt: + return "counter" + case TypeGaugeInt: + return "gauge" + default: + panic("invalid metric type") + } +} + // Metric represents a single metric. type Metric struct { Name string @@ -63,28 +74,27 @@ func NewOpenTelemetryWriter(w io.Writer) *OpenTelemetryWriter { } func (otw *OpenTelemetryWriter) Write(m Metric) error { - var metricType string - switch m.Type { - case TypeCounterInt: - metricType = "counter" - case TypeGaugeInt: - metricType = "gauge" + if m.Comment != "" { + _, err := fmt.Fprintf(otw.w, "# HELP %s %s\n", m.Name, m.Comment) + if err != nil { + return err + } } - _, err := fmt.Fprintf(otw.w, "# HELP %s %s\n", m.Name, m.Comment) - if err != nil { - return err - } - _, err = fmt.Fprintf(otw.w, "# TYPE %s %s\n", m.Name, metricType) + _, err := fmt.Fprintf(otw.w, "# TYPE %s %s\n", m.Name, m.Type.String()) if err != nil { return err } - labels := make([]string, len(m.Labels)) - for i, label := range m.Labels { - labels[i] = fmt.Sprintf("%s=%s", label.key, label.value) + var labelsStr string + if len(m.Labels) > 0 { + labels := make([]string, len(m.Labels)) + for i, label := range m.Labels { + labels[i] = fmt.Sprintf("%s=%q", label.key, label.value) // Use %q to quote values + } + labelsStr = fmt.Sprintf("{%s}", strings.Join(labels, ",")) } - _, err = fmt.Fprintf(otw.w, "%s{%s} %d\n", m.Name, strings.Join(labels, ","), m.ValueInt64) + _, err = fmt.Fprintf(otw.w, "%s%s %d\n", m.Name, labelsStr, m.ValueInt64) return err } diff --git a/internals/metrics/metrics_test.go b/internals/metrics/metrics_test.go index 558156755..c454792e5 100644 --- a/internals/metrics/metrics_test.go +++ b/internals/metrics/metrics_test.go @@ -16,12 +16,17 @@ package metrics_test import ( "bytes" + "testing" . "gopkg.in/check.v1" "github.com/canonical/pebble/internals/metrics" ) +func Test(t *testing.T) { + TestingT(t) +} + type OpenTelemetryWriterSuite struct{} var _ = Suite(&OpenTelemetryWriterSuite{}) @@ -44,10 +49,11 @@ func (s *OpenTelemetryWriterSuite) TestOpenTelemetryWriter(c *C) { metrics.NewLabel("key2", "value2"), }, }, - expected: `# HELP my_counter A simple counter + expected: ` +# HELP my_counter A simple counter # TYPE my_counter counter -my_counter{key1=value1,key2=value2} 42 -`, +my_counter{key1="value1",key2="value2"} 42 +`[1:], }, { name: "GaugeInt", @@ -58,10 +64,11 @@ my_counter{key1=value1,key2=value2} 42 Comment: "A simple gauge", Labels: []metrics.Label{}, // Test with no labels }, - expected: `# HELP my_gauge A simple gauge + expected: ` +# HELP my_gauge A simple gauge # TYPE my_gauge gauge -my_gauge{} 1 -`, +my_gauge 1 +`[1:], }, { name: "NoComment", // Test without comment @@ -71,10 +78,10 @@ my_gauge{} 1 ValueInt64: 42, Labels: []metrics.Label{metrics.NewLabel("env", "prod")}, }, - expected: `# HELP no_comment_metric + expected: ` # TYPE no_comment_metric counter -no_comment_metric{env=prod} 42 -`, +no_comment_metric{env="prod"} 42 +`[1:], }, { @@ -89,10 +96,11 @@ no_comment_metric{env=prod} 42 metrics.NewLabel("key-with-dash", "value-with-dash"), }, }, - expected: `# HELP special_chars Metric with special characters + expected: ` +# HELP special_chars Metric with special characters # TYPE special_chars gauge -special_chars{key_with_underscore=value_with_underscore,key-with-dash=value-with-dash} 42 -`, +special_chars{key_with_underscore="value_with_underscore",key-with-dash="value-with-dash"} 42 +`[1:], }, } diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 0fd177653..07bbfc63f 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -324,19 +324,8 @@ func (m *CheckManager) Checks() ([]*CheckInfo, error) { infos := make([]*CheckInfo, 0, len(m.checks)) for _, info := range m.checks { - copied := &CheckInfo{ - Name: info.Name, - Level: info.Level, - Startup: info.Startup, - Status: info.Status, - Failures: info.Failures, - Threshold: info.Threshold, - ChangeID: info.ChangeID, - PerformCheckCount: info.PerformCheckCount, - RecoverCheckCount: info.RecoverCheckCount, - } - infos = append(infos, copied) - + copy := *info + infos = append(infos, ©) } sort.Slice(infos, func(i, j int) bool { return infos[i].Name < infos[j].Name @@ -359,26 +348,17 @@ func (m *CheckManager) updateCheckInfo(config *plan.Check, changeID string, fail startup = plan.CheckStartupEnabled } - if check, ok := m.checks[config.Name]; ok { - check.Level = config.Level - check.Startup = startup - check.Status = status - check.Failures = failures - check.Threshold = config.Threshold - check.ChangeID = changeID - } else { - m.checks[config.Name] = &CheckInfo{ - Name: config.Name, - Level: config.Level, - Startup: startup, - Status: status, - Failures: failures, - Threshold: config.Threshold, - ChangeID: changeID, - PerformCheckCount: 0, - RecoverCheckCount: 0, - } + check, ok := m.checks[config.Name] + if !ok { + check = &CheckInfo{Name: config.Name} + m.checks[config.Name] = check } + check.Level = config.Level + check.Startup = startup + check.Status = status + check.Failures = failures + check.Threshold = config.Threshold + check.ChangeID = changeID } func (m *CheckManager) incPerformCheckCount(config *plan.Check) { @@ -428,14 +408,14 @@ type checker interface { } func (c *CheckInfo) writeMetrics(writer metrics.Writer) error { - checkStatus := 0 + checkUp := int64(0) if c.Status == CheckStatusUp { - checkStatus = 1 + checkUp = 1 } err := writer.Write(metrics.Metric{ Name: "pebble_check_up", Type: metrics.TypeGaugeInt, - ValueInt64: int64(checkStatus), + ValueInt64: checkUp, Comment: "Whether the health check is up (1) or not (0)", Labels: []metrics.Label{metrics.NewLabel("check", c.Name)}, }) @@ -473,7 +453,14 @@ func (m *CheckManager) WriteMetrics(writer metrics.Writer) error { m.checksLock.Lock() defer m.checksLock.Unlock() - for _, info := range m.checks { + names := make([]string, 0, len(m.checks)) + for name := range m.checks { + names = append(names, name) + } + sort.Strings(names) + + for _, name := range names { + info := m.checks[name] err := info.writeMetrics(writer) if err != nil { return err diff --git a/internals/overlord/servstate/handlers.go b/internals/overlord/servstate/handlers.go index b2d4a9186..efe20c2b0 100644 --- a/internals/overlord/servstate/handlers.go +++ b/internals/overlord/servstate/handlers.go @@ -290,7 +290,7 @@ func (s *serviceData) transition(state serviceState) { // transitionRestarting changes the service's state and also sets the restarting flag. func (s *serviceData) transitionRestarting(state serviceState, restarting bool) { - // Update current-s_e time if derived status is changing. + // Update current-since time if derived status is changing. oldStatus := stateToStatus(s.state) newStatus := stateToStatus(state) if oldStatus != newStatus { @@ -849,7 +849,7 @@ func (d *serviceData) writeMetrics(writer metrics.Writer) error { Name: "pebble_service_active", Type: metrics.TypeGaugeInt, ValueInt64: int64(active), - Comment: "Indicates if the service is currently active (1) or not (0)", + Comment: "Whether the service is currently active (1) or not (0)", Labels: []metrics.Label{metrics.NewLabel("service", d.config.Name)}, }) if err != nil { diff --git a/internals/overlord/servstate/manager.go b/internals/overlord/servstate/manager.go index fb4c1347f..19ad88e65 100644 --- a/internals/overlord/servstate/manager.go +++ b/internals/overlord/servstate/manager.go @@ -340,7 +340,14 @@ func (m *ServiceManager) WriteMetrics(writer metrics.Writer) error { m.servicesLock.Lock() defer m.servicesLock.Unlock() - for _, service := range m.services { + names := make([]string, 0, len(m.services)) + for name := range m.services { + names = append(names, name) + } + sort.Strings(names) + + for _, name := range names { + service := m.services[name] err := service.writeMetrics(writer) if err != nil { return err diff --git a/internals/overlord/servstate/manager_test.go b/internals/overlord/servstate/manager_test.go index b325a4411..98677bff5 100644 --- a/internals/overlord/servstate/manager_test.go +++ b/internals/overlord/servstate/manager_test.go @@ -2105,38 +2105,81 @@ func (s *S) TestMetrics(c *C) { return } buf := new(bytes.Buffer) - openTelemetryWriter := metrics.NewOpenTelemetryWriter(buf) - s.manager.WriteMetrics(openTelemetryWriter) - metrics := buf.String() - c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test1} 1") - c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test1} 1") - c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test2} 1") - c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test2} 1") - + writer := metrics.NewOpenTelemetryWriter(buf) + s.manager.WriteMetrics(writer) + expected := ` +# HELP pebble_service_start_count Number of times the service has started +# TYPE pebble_service_start_count counter +pebble_service_start_count{service="test1"} 1 +# HELP pebble_service_active Whether the service is currently active (1) or not (0) +# TYPE pebble_service_active gauge +pebble_service_active{service="test1"} 1 +# HELP pebble_service_start_count Number of times the service has started +# TYPE pebble_service_start_count counter +pebble_service_start_count{service="test2"} 1 +# HELP pebble_service_active Whether the service is currently active (1) or not (0) +# TYPE pebble_service_active gauge +pebble_service_active{service="test2"} 1 +`[1:] + c.Assert(buf.String(), Equals, expected) + + buf.Reset() s.stopTestServices(c) - s.manager.WriteMetrics(openTelemetryWriter) - metrics = buf.String() - c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test1} 1") - c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test1} 0") - c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test2} 1") - c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test2} 0") - + s.manager.WriteMetrics(writer) + expected = ` +# HELP pebble_service_start_count Number of times the service has started +# TYPE pebble_service_start_count counter +pebble_service_start_count{service="test1"} 1 +# HELP pebble_service_active Whether the service is currently active (1) or not (0) +# TYPE pebble_service_active gauge +pebble_service_active{service="test1"} 0 +# HELP pebble_service_start_count Number of times the service has started +# TYPE pebble_service_start_count counter +pebble_service_start_count{service="test2"} 1 +# HELP pebble_service_active Whether the service is currently active (1) or not (0) +# TYPE pebble_service_active gauge +pebble_service_active{service="test2"} 0 +`[1:] + c.Assert(buf.String(), Equals, expected) + + buf.Reset() s.startTestServices(c, true) if c.Failed() { return } - s.manager.WriteMetrics(openTelemetryWriter) - metrics = buf.String() - c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test1} 2") - c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test1} 1") - c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test2} 2") - c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test2} 1") - + s.manager.WriteMetrics(writer) + expected = ` +# HELP pebble_service_start_count Number of times the service has started +# TYPE pebble_service_start_count counter +pebble_service_start_count{service="test1"} 2 +# HELP pebble_service_active Whether the service is currently active (1) or not (0) +# TYPE pebble_service_active gauge +pebble_service_active{service="test1"} 1 +# HELP pebble_service_start_count Number of times the service has started +# TYPE pebble_service_start_count counter +pebble_service_start_count{service="test2"} 2 +# HELP pebble_service_active Whether the service is currently active (1) or not (0) +# TYPE pebble_service_active gauge +pebble_service_active{service="test2"} 1 +`[1:] + c.Assert(buf.String(), Equals, expected) + + buf.Reset() s.stopTestServices(c) - s.manager.WriteMetrics(openTelemetryWriter) - metrics = buf.String() - c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test1} 2") - c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test1} 0") - c.Assert(metrics, testutil.Contains, "pebble_service_start_count{service=test2} 2") - c.Assert(metrics, testutil.Contains, "pebble_service_active{service=test2} 0") + s.manager.WriteMetrics(writer) + expected = ` +# HELP pebble_service_start_count Number of times the service has started +# TYPE pebble_service_start_count counter +pebble_service_start_count{service="test1"} 2 +# HELP pebble_service_active Whether the service is currently active (1) or not (0) +# TYPE pebble_service_active gauge +pebble_service_active{service="test1"} 0 +# HELP pebble_service_start_count Number of times the service has started +# TYPE pebble_service_start_count counter +pebble_service_start_count{service="test2"} 2 +# HELP pebble_service_active Whether the service is currently active (1) or not (0) +# TYPE pebble_service_active gauge +pebble_service_active{service="test2"} 0 +`[1:] + c.Assert(buf.String(), Equals, expected) } From 467d8fa5959a47f736b6dc64836f67c37fe01707 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Fri, 14 Feb 2025 12:59:09 +0800 Subject: [PATCH 32/41] chore: unexport check metrics and update tests --- internals/overlord/checkstate/handlers.go | 2 +- internals/overlord/checkstate/manager.go | 18 ++- internals/overlord/checkstate/manager_test.go | 129 ++++++++++++++---- 3 files changed, 116 insertions(+), 33 deletions(-) diff --git a/internals/overlord/checkstate/handlers.go b/internals/overlord/checkstate/handlers.go index 3c4b9a5a3..383bc699f 100644 --- a/internals/overlord/checkstate/handlers.go +++ b/internals/overlord/checkstate/handlers.go @@ -47,6 +47,7 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro select { case <-ticker.C: err := runCheck(tomb.Context(nil), chk, config.Timeout.Value) + m.incPerformCheckCount(config) if !tomb.Alive() { return checkStopped(config.Name, task.Kind(), tomb.Err()) } @@ -91,7 +92,6 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro task.Set(checkDetailsAttr, &details) m.state.Unlock() } - m.incPerformCheckCount(config) case <-tomb.Dying(): return checkStopped(config.Name, task.Kind(), tomb.Err()) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 07bbfc63f..0a57d9d4a 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -365,14 +365,20 @@ func (m *CheckManager) incPerformCheckCount(config *plan.Check) { m.checksLock.Lock() defer m.checksLock.Unlock() - m.checks[config.Name].PerformCheckCount += 1 + check, ok := m.checks[config.Name] + if !ok { + check = &CheckInfo{Name: config.Name} + m.checks[config.Name] = check + } + + m.checks[config.Name].performCheckCount += 1 } func (m *CheckManager) incRecoverCheckCount(config *plan.Check) { m.checksLock.Lock() defer m.checksLock.Unlock() - m.checks[config.Name].RecoverCheckCount += 1 + m.checks[config.Name].recoverCheckCount += 1 } func (m *CheckManager) deleteCheckInfo(name string) { @@ -391,8 +397,8 @@ type CheckInfo struct { Failures int Threshold int ChangeID string - PerformCheckCount int64 - RecoverCheckCount int64 + performCheckCount int64 + recoverCheckCount int64 } type CheckStatus string @@ -426,7 +432,7 @@ func (c *CheckInfo) writeMetrics(writer metrics.Writer) error { err = writer.Write(metrics.Metric{ Name: "pebble_perform_check_count", Type: metrics.TypeCounterInt, - ValueInt64: c.PerformCheckCount, + ValueInt64: c.performCheckCount, Comment: "Number of times the perform-check has run", Labels: []metrics.Label{metrics.NewLabel("check", c.Name)}, }) @@ -437,7 +443,7 @@ func (c *CheckInfo) writeMetrics(writer metrics.Writer) error { err = writer.Write(metrics.Metric{ Name: "pebble_recover_check_count", Type: metrics.TypeCounterInt, - ValueInt64: c.RecoverCheckCount, + ValueInt64: c.recoverCheckCount, Comment: "Number of times the recover-check has run", Labels: []metrics.Label{metrics.NewLabel("check", c.Name)}, }) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index 08773061d..ad54c24b7 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -15,6 +15,7 @@ package checkstate_test import ( + "bytes" "errors" "fmt" "os" @@ -28,6 +29,7 @@ import ( . "gopkg.in/check.v1" "github.com/canonical/pebble/internals/logger" + "github.com/canonical/pebble/internals/metrics" "github.com/canonical/pebble/internals/overlord" "github.com/canonical/pebble/internals/overlord/checkstate" "github.com/canonical/pebble/internals/overlord/planstate" @@ -869,8 +871,11 @@ func (s *ManagerSuite) TestReplan(c *C) { c.Assert(change.Kind(), Equals, "perform-check") } -func (s *ManagerSuite) TestMetrics(c *C) { - origLayer := &plan.Layer{ +func (s *ManagerSuite) TestMetricsPerformCheck(c *C) { + tempDir := c.MkDir() + tempFile := filepath.Join(tempDir, "file.txt") + command := fmt.Sprintf(`/bin/sh -c 'echo -n x >>%s'`, tempFile) + s.manager.PlanChanged(&plan.Plan{ Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", @@ -878,44 +883,116 @@ func (s *ManagerSuite) TestMetrics(c *C) { Period: plan.OptionalDuration{Value: 10 * time.Millisecond}, Timeout: plan.OptionalDuration{Value: time.Second}, Threshold: 3, - Exec: &plan.ExecCheck{Command: "echo chk1"}, - Startup: plan.CheckStartupDisabled, + Exec: &plan.ExecCheck{Command: command}, + Startup: plan.CheckStartupEnabled, }, }, + }) + + // Wait for check to run at least twice + for i := 0; ; i++ { + if i >= 1000 { + c.Fatalf("failed waiting for check to run") + } + b, _ := os.ReadFile(tempFile) + if len(b) >= 2 { + break + } + time.Sleep(time.Millisecond) } - err := s.planMgr.AppendLayer(origLayer, false) - c.Assert(err, IsNil) - // Ensure and wait for the counter to increase. + buf := new(bytes.Buffer) + writer := metrics.NewOpenTelemetryWriter(buf) + s.manager.WriteMetrics(writer) + expected := ` +# HELP pebble_check_up Whether the health check is up (1) or not (0) +# TYPE pebble_check_up gauge +pebble_check_up{check="chk1"} 1 +# HELP pebble_perform_check_count Number of times the perform-check has run +# TYPE pebble_perform_check_count counter +pebble_perform_check_count{check="chk1"} 2 +# HELP pebble_recover_check_count Number of times the recover-check has run +# TYPE pebble_recover_check_count counter +pebble_recover_check_count{check="chk1"} 0 +`[1:] + c.Assert(buf.String(), Equals, expected) +} + +func (s *ManagerSuite) TestMetricsRecoverCheck(c *C) { + testPath := c.MkDir() + "/test" + err := os.WriteFile(testPath, nil, 0o644) + c.Assert(err, IsNil) s.manager.PlanChanged(&plan.Plan{ Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", Override: "replace", - Period: plan.OptionalDuration{Value: 10 * time.Millisecond}, - Timeout: plan.OptionalDuration{Value: time.Second}, + Period: plan.OptionalDuration{Value: 20 * time.Millisecond}, + Timeout: plan.OptionalDuration{Value: 100 * time.Millisecond}, Threshold: 3, - Exec: &plan.ExecCheck{Command: "echo chk1"}, - Startup: plan.CheckStartupEnabled, + Exec: &plan.ExecCheck{ + Command: fmt.Sprintf(`/bin/sh -c 'echo details >/dev/stderr; [ ! -f %s ]'`, testPath), + }, }, }, }) - waitChecks(c, s.manager, []*checkstate.CheckInfo{ - {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3, PerformCheckCount: 2, RecoverCheckCount: 0}, + + // After 2 failures, check is still up, perform-check counter is 2. + waitCheck(c, s.manager, "chk1", func(check *checkstate.CheckInfo) bool { + return check.Failures == 2 }) - checks, err := s.manager.Checks() - c.Assert(err, IsNil) - c.Assert(checks[0].PerformCheckCount, Equals, int64(2)) - // Test updateCheckInfo (called by StopChecks) doesn't reset metrics. - changed, err := s.manager.StopChecks([]string{"chk1"}) - waitChecks(c, s.manager, []*checkstate.CheckInfo{ - {Name: "chk1", Startup: "disabled", Status: "inactive", Threshold: 3, PerformCheckCount: 2, RecoverCheckCount: 0}, + buf := new(bytes.Buffer) + writer := metrics.NewOpenTelemetryWriter(buf) + s.manager.WriteMetrics(writer) + expected := ` +# HELP pebble_check_up Whether the health check is up (1) or not (0) +# TYPE pebble_check_up gauge +pebble_check_up{check="chk1"} 1 +# HELP pebble_perform_check_count Number of times the perform-check has run +# TYPE pebble_perform_check_count counter +pebble_perform_check_count{check="chk1"} 2 +# HELP pebble_recover_check_count Number of times the recover-check has run +# TYPE pebble_recover_check_count counter +pebble_recover_check_count{check="chk1"} 0 +`[1:] + c.Assert(buf.String(), Equals, expected) + + // After 3 failures, check is down, perform-check counter is 3, recover-check counter is 0. + waitCheck(c, s.manager, "chk1", func(check *checkstate.CheckInfo) bool { + return check.Failures == 3 + }) + buf.Reset() + s.manager.WriteMetrics(writer) + expected = ` +# HELP pebble_check_up Whether the health check is up (1) or not (0) +# TYPE pebble_check_up gauge +pebble_check_up{check="chk1"} 0 +# HELP pebble_perform_check_count Number of times the perform-check has run +# TYPE pebble_perform_check_count counter +pebble_perform_check_count{check="chk1"} 3 +# HELP pebble_recover_check_count Number of times the recover-check has run +# TYPE pebble_recover_check_count counter +pebble_recover_check_count{check="chk1"} 0 +`[1:] + c.Assert(buf.String(), Equals, expected) + + // After 4 failures, check is down, perform-check counter is 3, recover-check counter is 1. + waitCheck(c, s.manager, "chk1", func(check *checkstate.CheckInfo) bool { + return check.Failures == 4 }) - c.Assert(err, IsNil) - c.Assert(changed, DeepEquals, []string{"chk1"}) - checks, err = s.manager.Checks() - c.Assert(err, IsNil) - c.Assert(checks[0].PerformCheckCount, Equals, int64(2)) - c.Assert(checks[0].RecoverCheckCount, Equals, int64(0)) + buf.Reset() + s.manager.WriteMetrics(writer) + expected = ` +# HELP pebble_check_up Whether the health check is up (1) or not (0) +# TYPE pebble_check_up gauge +pebble_check_up{check="chk1"} 0 +# HELP pebble_perform_check_count Number of times the perform-check has run +# TYPE pebble_perform_check_count counter +pebble_perform_check_count{check="chk1"} 3 +# HELP pebble_recover_check_count Number of times the recover-check has run +# TYPE pebble_recover_check_count counter +pebble_recover_check_count{check="chk1"} 1 +`[1:] + c.Assert(buf.String(), Equals, expected) } From 8bc87f17dfeab0da505427e08e1f28cb78ca7db7 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Fri, 14 Feb 2025 18:37:08 +0800 Subject: [PATCH 33/41] chore: use buffer in api metrics --- internals/daemon/api_metrics.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/internals/daemon/api_metrics.go b/internals/daemon/api_metrics.go index 7fd35b9d1..d2eb7bba8 100644 --- a/internals/daemon/api_metrics.go +++ b/internals/daemon/api_metrics.go @@ -15,6 +15,7 @@ package daemon import ( + "bytes" "net/http" "github.com/canonical/pebble/internals/logger" @@ -37,18 +38,26 @@ type metricsResponse struct { } func (r metricsResponse) ServeHTTP(w http.ResponseWriter, req *http.Request) { - openTelemetryWriter := metrics.NewOpenTelemetryWriter(w) + var buf bytes.Buffer + metricsWriter := metrics.NewOpenTelemetryWriter(&buf) - err := r.svcMgr.WriteMetrics(openTelemetryWriter) + err := r.svcMgr.WriteMetrics(metricsWriter) if err != nil { - logger.Noticef("Cannot write to HTTP response: %v", err.Error()) + logger.Noticef("Cannot write service metrics: %v", err) http.Error(w, "# internal server error", http.StatusInternalServerError) return } - err = r.chkMgr.WriteMetrics(openTelemetryWriter) + err = r.chkMgr.WriteMetrics(metricsWriter) if err != nil { - logger.Noticef("Cannot write to HTTP response: %v", err.Error()) + logger.Noticef("Cannot write check metrics: %v", err) + http.Error(w, "# internal server error", http.StatusInternalServerError) + return + } + + _, err = buf.WriteTo(w) + if err != nil { + logger.Noticef("Cannot write to HTTP response: %v", err) http.Error(w, "# internal server error", http.StatusInternalServerError) return } From 6963e5e09e4f7eef08e2ac9480ce9da7246dcbe6 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Fri, 14 Feb 2025 18:37:47 +0800 Subject: [PATCH 34/41] chore: use wtier for metrics labels --- internals/metrics/metrics.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/internals/metrics/metrics.go b/internals/metrics/metrics.go index e72821bbc..5f711af5b 100644 --- a/internals/metrics/metrics.go +++ b/internals/metrics/metrics.go @@ -17,7 +17,6 @@ package metrics import ( "fmt" "io" - "strings" ) type MetricType int @@ -86,15 +85,17 @@ func (otw *OpenTelemetryWriter) Write(m Metric) error { return err } - var labelsStr string + io.WriteString(otw.w, m.Name) if len(m.Labels) > 0 { - labels := make([]string, len(m.Labels)) + io.WriteString(otw.w, "{") for i, label := range m.Labels { - labels[i] = fmt.Sprintf("%s=%q", label.key, label.value) // Use %q to quote values + if i > 0 { + io.WriteString(otw.w, ",") + } + fmt.Fprintf(otw.w, "%s=%q", label.key, label.value) // Use %q to quote values. } - labelsStr = fmt.Sprintf("{%s}", strings.Join(labels, ",")) + io.WriteString(otw.w, "}") } - - _, err = fmt.Fprintf(otw.w, "%s%s %d\n", m.Name, labelsStr, m.ValueInt64) + fmt.Fprintf(otw.w, " %d\n", m.ValueInt64) return err } From cfa73a55363c2403b92961784870055eeab0ab6f Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Fri, 14 Feb 2025 18:39:29 +0800 Subject: [PATCH 35/41] test: add test for api metrics --- internals/daemon/api_metrics_test.go | 77 ++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 internals/daemon/api_metrics_test.go diff --git a/internals/daemon/api_metrics_test.go b/internals/daemon/api_metrics_test.go new file mode 100644 index 000000000..2a54e6b78 --- /dev/null +++ b/internals/daemon/api_metrics_test.go @@ -0,0 +1,77 @@ +// Copyright (c) 2025 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package daemon + +import ( + "bytes" + "net/http" + "net/http/httptest" + "time" + + "github.com/canonical/pebble/internals/overlord/servstate" + . "gopkg.in/check.v1" +) + +func (s *apiSuite) TestMetrics(c *C) { + writeTestLayer(s.pebbleDir, ` +services: + test1: + override: replace + command: sleep 10 +`) + d := s.daemon(c) + d.overlord.Loop() + + // Start test service. + payload := bytes.NewBufferString(`{"action": "start", "services": ["test1"]}`) + req, err := http.NewRequest("POST", "/v1/services", payload) + c.Assert(err, IsNil) + rsp := v1PostServices(apiCmd("/v1/services"), req, nil).(*resp) + rec := httptest.NewRecorder() + rsp.ServeHTTP(rec, req) + c.Check(rec.Result().StatusCode, Equals, 202) + + // Wait for it to be running. + serviceMgr := d.overlord.ServiceManager() + for i := 0; ; i++ { + if i > 50 { + c.Fatalf("timed out waiting for service to start") + } + services, err := serviceMgr.Services([]string{"test1"}) + c.Assert(err, IsNil) + if len(services) == 1 && services[0].Current == servstate.StatusActive { + break + } + time.Sleep(5 * time.Millisecond) + } + + // Get metrics. + metricsCmd := apiCmd("/v1/metrics") + metricsReq, err := http.NewRequest("GET", "/v1/metrics", nil) + c.Assert(err, IsNil) + metricsRec := httptest.NewRecorder() + metricsRsp := v1GetMetrics(metricsCmd, metricsReq, nil).(metricsResponse) + metricsRsp.ServeHTTP(metricsRec, metricsReq) + c.Check(metricsRec.Code, Equals, 200) + expected := ` +# HELP pebble_service_start_count Number of times the service has started +# TYPE pebble_service_start_count counter +pebble_service_start_count{service="test1"} 1 +# HELP pebble_service_active Whether the service is currently active (1) or not (0) +# TYPE pebble_service_active gauge +pebble_service_active{service="test1"} 1 +`[1:] + c.Assert(metricsRec.Body.String(), Equals, expected) +} From f37f94f2358a25e4b178d7dda6c89b2f3ca79541 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Fri, 14 Feb 2025 18:56:38 +0800 Subject: [PATCH 36/41] chore: fix linting --- internals/daemon/api_metrics_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internals/daemon/api_metrics_test.go b/internals/daemon/api_metrics_test.go index 2a54e6b78..2f86e39ce 100644 --- a/internals/daemon/api_metrics_test.go +++ b/internals/daemon/api_metrics_test.go @@ -20,8 +20,9 @@ import ( "net/http/httptest" "time" - "github.com/canonical/pebble/internals/overlord/servstate" . "gopkg.in/check.v1" + + "github.com/canonical/pebble/internals/overlord/servstate" ) func (s *apiSuite) TestMetrics(c *C) { From 7369fe32d33b6e38bce3d5b5767f6b1f80dadf73 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Fri, 14 Feb 2025 11:01:59 +0000 Subject: [PATCH 37/41] chore: remove unnecessary changes --- internals/daemon/daemon.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index 2b86b1569..24bd6a0b6 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -373,7 +373,6 @@ func (d *Daemon) Init() error { } logger.Noticef("Started daemon.") - return nil } From 136ae069463772aa6c1fbce5c970f50068a5ae19 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Mon, 17 Feb 2025 09:47:07 +0800 Subject: [PATCH 38/41] chore: refactor according to review --- internals/metrics/metrics.go | 39 +++++++++++++++++++----- internals/overlord/checkstate/manager.go | 27 ++++++++-------- 2 files changed, 45 insertions(+), 21 deletions(-) diff --git a/internals/metrics/metrics.go b/internals/metrics/metrics.go index 5f711af5b..22d9532c5 100644 --- a/internals/metrics/metrics.go +++ b/internals/metrics/metrics.go @@ -33,7 +33,7 @@ func (mt MetricType) String() string { case TypeGaugeInt: return "gauge" default: - panic("invalid metric type") + panic(fmt.Sprintf("invalid metric type %d", mt)) } } @@ -85,17 +85,40 @@ func (otw *OpenTelemetryWriter) Write(m Metric) error { return err } - io.WriteString(otw.w, m.Name) + _, err = io.WriteString(otw.w, m.Name) + if err != nil { + return err + } + if len(m.Labels) > 0 { - io.WriteString(otw.w, "{") + _, err = io.WriteString(otw.w, "{") + if err != nil { + return err + } + for i, label := range m.Labels { if i > 0 { - io.WriteString(otw.w, ",") + _, err = io.WriteString(otw.w, ",") + if err != nil { + return err + } + } + _, err = fmt.Fprintf(otw.w, "%s=%q", label.key, label.value) // Use %q to quote values. + if err != nil { + return err } - fmt.Fprintf(otw.w, "%s=%q", label.key, label.value) // Use %q to quote values. } - io.WriteString(otw.w, "}") + + _, err = io.WriteString(otw.w, "}") + if err != nil { + return err + } } - fmt.Fprintf(otw.w, " %d\n", m.ValueInt64) - return err + + _, err = fmt.Fprintf(otw.w, " %d\n", m.ValueInt64) + if err != nil { + return err + } + + return nil } diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 0a57d9d4a..8b0c68ade 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -333,6 +333,15 @@ func (m *CheckManager) Checks() ([]*CheckInfo, error) { return infos, nil } +func (m *CheckManager) ensureCheck(name string) *CheckInfo { + check, ok := m.checks[name] + if !ok { + check = &CheckInfo{Name: name} + m.checks[name] = check + } + return check +} + func (m *CheckManager) updateCheckInfo(config *plan.Check, changeID string, failures int) { m.checksLock.Lock() defer m.checksLock.Unlock() @@ -348,11 +357,7 @@ func (m *CheckManager) updateCheckInfo(config *plan.Check, changeID string, fail startup = plan.CheckStartupEnabled } - check, ok := m.checks[config.Name] - if !ok { - check = &CheckInfo{Name: config.Name} - m.checks[config.Name] = check - } + check := m.ensureCheck(config.Name) check.Level = config.Level check.Startup = startup check.Status = status @@ -365,20 +370,16 @@ func (m *CheckManager) incPerformCheckCount(config *plan.Check) { m.checksLock.Lock() defer m.checksLock.Unlock() - check, ok := m.checks[config.Name] - if !ok { - check = &CheckInfo{Name: config.Name} - m.checks[config.Name] = check - } - - m.checks[config.Name].performCheckCount += 1 + check := m.ensureCheck(config.Name) + check.performCheckCount += 1 } func (m *CheckManager) incRecoverCheckCount(config *plan.Check) { m.checksLock.Lock() defer m.checksLock.Unlock() - m.checks[config.Name].recoverCheckCount += 1 + check := m.ensureCheck(config.Name) + check.recoverCheckCount += 1 } func (m *CheckManager) deleteCheckInfo(name string) { From 13aa3b1f0fd9150d9764d23357a02dd3dbeb2ad4 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Tue, 18 Feb 2025 09:55:17 +0800 Subject: [PATCH 39/41] chore: create internal checkData --- internals/overlord/checkstate/handlers.go | 8 +- internals/overlord/checkstate/manager.go | 107 +++++++++++++--------- 2 files changed, 67 insertions(+), 48 deletions(-) diff --git a/internals/overlord/checkstate/handlers.go b/internals/overlord/checkstate/handlers.go index 383bc699f..05cb85647 100644 --- a/internals/overlord/checkstate/handlers.go +++ b/internals/overlord/checkstate/handlers.go @@ -58,9 +58,9 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro atThreshold := details.Failures >= config.Threshold if !atThreshold { // Update number of failures in check info. In threshold - // case, check info will be updated with new change ID by + // case, check data will be updated with new change ID by // changeStatusChanged. - m.updateCheckInfo(config, changeID, details.Failures) + m.updateCheckData(config, changeID, details.Failures) } m.state.Lock() @@ -84,7 +84,7 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro return err } } else if details.Failures > 0 { - m.updateCheckInfo(config, changeID, 0) + m.updateCheckData(config, changeID, 0) m.state.Lock() task.Logf("succeeded after %s", pluralise(details.Failures, "failure", "failures")) @@ -136,7 +136,7 @@ func (m *CheckManager) doRecoverCheck(task *state.Task, tomb *tombpkg.Tomb) erro } if err != nil { details.Failures++ - m.updateCheckInfo(config, changeID, details.Failures) + m.updateCheckData(config, changeID, details.Failures) m.state.Lock() task.Set(checkDetailsAttr, &details) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 8b0c68ade..627d54fd0 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -47,7 +47,7 @@ type CheckManager struct { failureHandlers []FailureFunc checksLock sync.Mutex - checks map[string]*CheckInfo + checks map[string]*checkData } // FailureFunc is the type of function called when a failure action is triggered. @@ -57,7 +57,7 @@ type FailureFunc func(name string) func NewManager(s *state.State, runner *state.TaskRunner, planMgr *planstate.PlanManager) *CheckManager { manager := &CheckManager{ state: s, - checks: make(map[string]*CheckInfo), + checks: make(map[string]*checkData), planMgr: planMgr, } @@ -155,7 +155,7 @@ func (m *CheckManager) PlanChanged(newPlan *plan.Plan) { newOrModified[details.Name] = true } change.Abort() - m.deleteCheckInfo(details.Name) + m.deleteCheckData(details.Name) shouldEnsure = true } } @@ -169,7 +169,7 @@ func (m *CheckManager) PlanChanged(newPlan *plan.Plan) { } else { // Check is new and should be inactive - no need to start it, // but we need to add it to the list of existing checks. - m.updateCheckInfo(config, "", 0) + m.updateCheckData(config, "", 0) } } } @@ -179,7 +179,7 @@ func (m *CheckManager) PlanChanged(newPlan *plan.Plan) { if newOrModified[config.Name] { merged := mergeServiceContext(newPlan, config) changeID := performCheckChange(m.state, merged) - m.updateCheckInfo(config, changeID, 0) + m.updateCheckData(config, changeID, 0) shouldEnsure = true } } @@ -198,7 +198,7 @@ func (m *CheckManager) changeStatusChanged(change *state.Change, old, new state. } config := m.state.Cached(performConfigKey{change.ID()}).(*plan.Check) // panic if key not present (always should be) changeID := recoverCheckChange(m.state, config, details.Failures) - m.updateCheckInfo(config, changeID, details.Failures) + m.updateCheckData(config, changeID, details.Failures) shouldEnsure = true case change.Kind() == recoverCheckKind && new == state.DoneStatus: @@ -208,7 +208,7 @@ func (m *CheckManager) changeStatusChanged(change *state.Change, old, new state. } config := m.state.Cached(recoverConfigKey{change.ID()}).(*plan.Check) // panic if key not present (always should be) changeID := performCheckChange(m.state, config) - m.updateCheckInfo(config, changeID, 0) + m.updateCheckData(config, changeID, 0) shouldEnsure = true } @@ -323,9 +323,17 @@ func (m *CheckManager) Checks() ([]*CheckInfo, error) { defer m.checksLock.Unlock() infos := make([]*CheckInfo, 0, len(m.checks)) - for _, info := range m.checks { - copy := *info - infos = append(infos, ©) + for _, checkData := range m.checks { + info := CheckInfo{ + Name: checkData.name, + Level: checkData.level, + Startup: checkData.startup, + Status: checkData.status, + Failures: checkData.failures, + Threshold: checkData.threshold, + ChangeID: checkData.changeID, + } + infos = append(infos, &info) } sort.Slice(infos, func(i, j int) bool { return infos[i].Name < infos[j].Name @@ -333,16 +341,16 @@ func (m *CheckManager) Checks() ([]*CheckInfo, error) { return infos, nil } -func (m *CheckManager) ensureCheck(name string) *CheckInfo { +func (m *CheckManager) ensureCheck(name string) *checkData { check, ok := m.checks[name] if !ok { - check = &CheckInfo{Name: name} + check = &checkData{name: name} m.checks[name] = check } return check } -func (m *CheckManager) updateCheckInfo(config *plan.Check, changeID string, failures int) { +func (m *CheckManager) updateCheckData(config *plan.Check, changeID string, failures int) { m.checksLock.Lock() defer m.checksLock.Unlock() @@ -358,12 +366,12 @@ func (m *CheckManager) updateCheckInfo(config *plan.Check, changeID string, fail } check := m.ensureCheck(config.Name) - check.Level = config.Level - check.Startup = startup - check.Status = status - check.Failures = failures - check.Threshold = config.Threshold - check.ChangeID = changeID + check.level = config.Level + check.startup = startup + check.status = status + check.failures = failures + check.threshold = config.Threshold + check.changeID = changeID } func (m *CheckManager) incPerformCheckCount(config *plan.Check) { @@ -382,7 +390,7 @@ func (m *CheckManager) incRecoverCheckCount(config *plan.Check) { check.recoverCheckCount += 1 } -func (m *CheckManager) deleteCheckInfo(name string) { +func (m *CheckManager) deleteCheckData(name string) { m.checksLock.Lock() defer m.checksLock.Unlock() @@ -391,13 +399,24 @@ func (m *CheckManager) deleteCheckInfo(name string) { // CheckInfo provides status information about a single check. type CheckInfo struct { - Name string - Level plan.CheckLevel - Startup plan.CheckStartup - Status CheckStatus - Failures int - Threshold int - ChangeID string + Name string + Level plan.CheckLevel + Startup plan.CheckStartup + Status CheckStatus + Failures int + Threshold int + ChangeID string +} + +// checkData holds the metrics and other data for a a single check. +type checkData struct { + name string + level plan.CheckLevel + startup plan.CheckStartup + status CheckStatus + failures int + threshold int + changeID string performCheckCount int64 recoverCheckCount int64 } @@ -414,9 +433,9 @@ type checker interface { check(ctx context.Context) error } -func (c *CheckInfo) writeMetrics(writer metrics.Writer) error { +func (c *checkData) writeMetrics(writer metrics.Writer) error { checkUp := int64(0) - if c.Status == CheckStatusUp { + if c.status == CheckStatusUp { checkUp = 1 } err := writer.Write(metrics.Metric{ @@ -424,7 +443,7 @@ func (c *CheckInfo) writeMetrics(writer metrics.Writer) error { Type: metrics.TypeGaugeInt, ValueInt64: checkUp, Comment: "Whether the health check is up (1) or not (0)", - Labels: []metrics.Label{metrics.NewLabel("check", c.Name)}, + Labels: []metrics.Label{metrics.NewLabel("check", c.name)}, }) if err != nil { return err @@ -435,7 +454,7 @@ func (c *CheckInfo) writeMetrics(writer metrics.Writer) error { Type: metrics.TypeCounterInt, ValueInt64: c.performCheckCount, Comment: "Number of times the perform-check has run", - Labels: []metrics.Label{metrics.NewLabel("check", c.Name)}, + Labels: []metrics.Label{metrics.NewLabel("check", c.name)}, }) if err != nil { return err @@ -446,7 +465,7 @@ func (c *CheckInfo) writeMetrics(writer metrics.Writer) error { Type: metrics.TypeCounterInt, ValueInt64: c.recoverCheckCount, Comment: "Number of times the recover-check has run", - Labels: []metrics.Label{metrics.NewLabel("check", c.Name)}, + Labels: []metrics.Label{metrics.NewLabel("check", c.name)}, }) if err != nil { return err @@ -510,21 +529,21 @@ func (m *CheckManager) StartChecks(checks []string) (started []string, err error for _, name := range checks { check := currentPlan.Checks[name] // We know this is ok because we checked it above. m.checksLock.Lock() - info, ok := m.checks[name] + checkData, ok := m.checks[name] m.checksLock.Unlock() if !ok { // This will be rare: either a PlanChanged is running and this is - // between the info being removed and then being added back, or the + // between the checkData being removed and then being added back, or the // check is new to the plan and PlanChanged hasn't finished yet. logger.Noticef("check %s is in the plan but not known to the manager", name) continue } // If the check is already running, skip it. - if info.ChangeID != "" { + if checkData.changeID != "" { continue } changeID := performCheckChange(m.state, check) - m.updateCheckInfo(check, changeID, 0) + m.updateCheckData(check, changeID, 0) started = append(started, check.Name) } @@ -553,7 +572,7 @@ func (m *CheckManager) StopChecks(checks []string) (stopped []string, err error) for _, name := range checks { check := currentPlan.Checks[name] // We know this is ok because we checked it above. m.checksLock.Lock() - info, ok := m.checks[name] + checkData, ok := m.checks[name] m.checksLock.Unlock() if !ok { // This will be rare: either a PlanChanged is running and this is @@ -563,10 +582,10 @@ func (m *CheckManager) StopChecks(checks []string) (stopped []string, err error) continue } // If the check is not running, skip it. - if info.ChangeID == "" { + if checkData.changeID == "" { continue } - change := m.state.Change(info.ChangeID) + change := m.state.Change(checkData.changeID) if change != nil { change.Abort() change.SetStatus(state.AbortStatus) @@ -576,7 +595,7 @@ func (m *CheckManager) StopChecks(checks []string) (stopped []string, err error) // same, so that people can inspect what the state of the check was when // it was stopped. The status of the check will be "inactive", but the // failure count combined with the threshold will give the full picture. - m.updateCheckInfo(check, "", info.Failures) + m.updateCheckData(check, "", checkData.failures) } return stopped, nil @@ -591,11 +610,11 @@ func (m *CheckManager) Replan() { for _, check := range currentPlan.Checks { m.checksLock.Lock() - info, ok := m.checks[check.Name] + checkData, ok := m.checks[check.Name] m.checksLock.Unlock() if !ok { // This will be rare: either a PlanChanged is running and this is - // between the info being removed and then being added back, or the + // between the checkData being removed and then being added back, or the // check is new to the plan and PlanChanged hasn't finished yet. logger.Noticef("check %s is in the plan but not known to the manager", check.Name) continue @@ -604,10 +623,10 @@ func (m *CheckManager) Replan() { continue } // If the check is already running, skip it. - if info.ChangeID != "" { + if checkData.changeID != "" { continue } changeID := performCheckChange(m.state, check) - m.updateCheckInfo(check, changeID, 0) + m.updateCheckData(check, changeID, 0) } } From 5fa2d92d65fb48a4b5d9885990f1b1ab5432a3d9 Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Tue, 18 Feb 2025 12:16:55 +0800 Subject: [PATCH 40/41] chore: change check metrics to success count and failure count --- internals/overlord/checkstate/handlers.go | 25 ++++--- internals/overlord/checkstate/manager.go | 38 +++++----- internals/overlord/checkstate/manager_test.go | 69 +++++-------------- 3 files changed, 50 insertions(+), 82 deletions(-) diff --git a/internals/overlord/checkstate/handlers.go b/internals/overlord/checkstate/handlers.go index 05cb85647..c4ed1bb0f 100644 --- a/internals/overlord/checkstate/handlers.go +++ b/internals/overlord/checkstate/handlers.go @@ -47,15 +47,16 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro select { case <-ticker.C: err := runCheck(tomb.Context(nil), chk, config.Timeout.Value) - m.incPerformCheckCount(config) if !tomb.Alive() { return checkStopped(config.Name, task.Kind(), tomb.Err()) } if err != nil { + m.incFailureCount(config) // Record check failure and perform any action if the threshold // is reached (for example, restarting a service). details.Failures++ atThreshold := details.Failures >= config.Threshold + if !atThreshold { // Update number of failures in check info. In threshold // case, check data will be updated with new change ID by @@ -83,14 +84,17 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro // and logs the error to the task log. return err } - } else if details.Failures > 0 { - m.updateCheckData(config, changeID, 0) - - m.state.Lock() - task.Logf("succeeded after %s", pluralise(details.Failures, "failure", "failures")) - details.Failures = 0 - task.Set(checkDetailsAttr, &details) - m.state.Unlock() + } else { + m.incSuccessCount(config) + if details.Failures > 0 { + m.updateCheckData(config, changeID, 0) + + m.state.Lock() + task.Logf("succeeded after %s", pluralise(details.Failures, "failure", "failures")) + details.Failures = 0 + task.Set(checkDetailsAttr, &details) + m.state.Unlock() + } } case <-tomb.Dying(): @@ -130,11 +134,11 @@ func (m *CheckManager) doRecoverCheck(task *state.Task, tomb *tombpkg.Tomb) erro select { case <-ticker.C: err := runCheck(tomb.Context(nil), chk, config.Timeout.Value) - m.incRecoverCheckCount(config) if !tomb.Alive() { return checkStopped(config.Name, task.Kind(), tomb.Err()) } if err != nil { + m.incFailureCount(config) details.Failures++ m.updateCheckData(config, changeID, details.Failures) @@ -149,6 +153,7 @@ func (m *CheckManager) doRecoverCheck(task *state.Task, tomb *tombpkg.Tomb) erro // Check succeeded, switch to performing a succeeding check. // Check info will be updated with new change ID by changeStatusChanged. + m.incSuccessCount(config) details.Failures = 0 // not strictly needed, but just to be safe details.Proceed = true m.state.Lock() diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 627d54fd0..4e09f31ae 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -374,20 +374,20 @@ func (m *CheckManager) updateCheckData(config *plan.Check, changeID string, fail check.changeID = changeID } -func (m *CheckManager) incPerformCheckCount(config *plan.Check) { +func (m *CheckManager) incSuccessCount(config *plan.Check) { m.checksLock.Lock() defer m.checksLock.Unlock() check := m.ensureCheck(config.Name) - check.performCheckCount += 1 + check.successCount += 1 } -func (m *CheckManager) incRecoverCheckCount(config *plan.Check) { +func (m *CheckManager) incFailureCount(config *plan.Check) { m.checksLock.Lock() defer m.checksLock.Unlock() check := m.ensureCheck(config.Name) - check.recoverCheckCount += 1 + check.failureCount += 1 } func (m *CheckManager) deleteCheckData(name string) { @@ -410,15 +410,15 @@ type CheckInfo struct { // checkData holds the metrics and other data for a a single check. type checkData struct { - name string - level plan.CheckLevel - startup plan.CheckStartup - status CheckStatus - failures int - threshold int - changeID string - performCheckCount int64 - recoverCheckCount int64 + name string + level plan.CheckLevel + startup plan.CheckStartup + status CheckStatus + failures int + threshold int + changeID string + successCount int64 + failureCount int64 } type CheckStatus string @@ -450,10 +450,10 @@ func (c *checkData) writeMetrics(writer metrics.Writer) error { } err = writer.Write(metrics.Metric{ - Name: "pebble_perform_check_count", + Name: "pebble_check_success_count", Type: metrics.TypeCounterInt, - ValueInt64: c.performCheckCount, - Comment: "Number of times the perform-check has run", + ValueInt64: c.successCount, + Comment: "Number of times the check has succeeded", Labels: []metrics.Label{metrics.NewLabel("check", c.name)}, }) if err != nil { @@ -461,10 +461,10 @@ func (c *checkData) writeMetrics(writer metrics.Writer) error { } err = writer.Write(metrics.Metric{ - Name: "pebble_recover_check_count", + Name: "pebble_check_failure_count", Type: metrics.TypeCounterInt, - ValueInt64: c.recoverCheckCount, - Comment: "Number of times the recover-check has run", + ValueInt64: c.failureCount, + Comment: "Number of times the check has failed", Labels: []metrics.Label{metrics.NewLabel("check", c.name)}, }) if err != nil { diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index ad54c24b7..da6f4ad1d 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -871,7 +871,7 @@ func (s *ManagerSuite) TestReplan(c *C) { c.Assert(change.Kind(), Equals, "perform-check") } -func (s *ManagerSuite) TestMetricsPerformCheck(c *C) { +func (s *ManagerSuite) TestMetricsCheckSuccess(c *C) { tempDir := c.MkDir() tempFile := filepath.Join(tempDir, "file.txt") command := fmt.Sprintf(`/bin/sh -c 'echo -n x >>%s'`, tempFile) @@ -908,17 +908,17 @@ func (s *ManagerSuite) TestMetricsPerformCheck(c *C) { # HELP pebble_check_up Whether the health check is up (1) or not (0) # TYPE pebble_check_up gauge pebble_check_up{check="chk1"} 1 -# HELP pebble_perform_check_count Number of times the perform-check has run -# TYPE pebble_perform_check_count counter -pebble_perform_check_count{check="chk1"} 2 -# HELP pebble_recover_check_count Number of times the recover-check has run -# TYPE pebble_recover_check_count counter -pebble_recover_check_count{check="chk1"} 0 +# HELP pebble_check_success_count Number of times the check has succeeded +# TYPE pebble_check_success_count counter +pebble_check_success_count{check="chk1"} 2 +# HELP pebble_check_failure_count Number of times the check has failed +# TYPE pebble_check_failure_count counter +pebble_check_failure_count{check="chk1"} 0 `[1:] c.Assert(buf.String(), Equals, expected) } -func (s *ManagerSuite) TestMetricsRecoverCheck(c *C) { +func (s *ManagerSuite) TestMetricsCheckFailure(c *C) { testPath := c.MkDir() + "/test" err := os.WriteFile(testPath, nil, 0o644) c.Assert(err, IsNil) @@ -937,7 +937,8 @@ func (s *ManagerSuite) TestMetricsRecoverCheck(c *C) { }, }) - // After 2 failures, check is still up, perform-check counter is 2. + // After 2 failures, check is still up, pebble_check_success_count counter is 0, + // pebble_check_failure_count is 2. waitCheck(c, s.manager, "chk1", func(check *checkstate.CheckInfo) bool { return check.Failures == 2 }) @@ -949,50 +950,12 @@ func (s *ManagerSuite) TestMetricsRecoverCheck(c *C) { # HELP pebble_check_up Whether the health check is up (1) or not (0) # TYPE pebble_check_up gauge pebble_check_up{check="chk1"} 1 -# HELP pebble_perform_check_count Number of times the perform-check has run -# TYPE pebble_perform_check_count counter -pebble_perform_check_count{check="chk1"} 2 -# HELP pebble_recover_check_count Number of times the recover-check has run -# TYPE pebble_recover_check_count counter -pebble_recover_check_count{check="chk1"} 0 -`[1:] - c.Assert(buf.String(), Equals, expected) - - // After 3 failures, check is down, perform-check counter is 3, recover-check counter is 0. - waitCheck(c, s.manager, "chk1", func(check *checkstate.CheckInfo) bool { - return check.Failures == 3 - }) - buf.Reset() - s.manager.WriteMetrics(writer) - expected = ` -# HELP pebble_check_up Whether the health check is up (1) or not (0) -# TYPE pebble_check_up gauge -pebble_check_up{check="chk1"} 0 -# HELP pebble_perform_check_count Number of times the perform-check has run -# TYPE pebble_perform_check_count counter -pebble_perform_check_count{check="chk1"} 3 -# HELP pebble_recover_check_count Number of times the recover-check has run -# TYPE pebble_recover_check_count counter -pebble_recover_check_count{check="chk1"} 0 -`[1:] - c.Assert(buf.String(), Equals, expected) - - // After 4 failures, check is down, perform-check counter is 3, recover-check counter is 1. - waitCheck(c, s.manager, "chk1", func(check *checkstate.CheckInfo) bool { - return check.Failures == 4 - }) - buf.Reset() - s.manager.WriteMetrics(writer) - expected = ` -# HELP pebble_check_up Whether the health check is up (1) or not (0) -# TYPE pebble_check_up gauge -pebble_check_up{check="chk1"} 0 -# HELP pebble_perform_check_count Number of times the perform-check has run -# TYPE pebble_perform_check_count counter -pebble_perform_check_count{check="chk1"} 3 -# HELP pebble_recover_check_count Number of times the recover-check has run -# TYPE pebble_recover_check_count counter -pebble_recover_check_count{check="chk1"} 1 +# HELP pebble_check_success_count Number of times the check has succeeded +# TYPE pebble_check_success_count counter +pebble_check_success_count{check="chk1"} 0 +# HELP pebble_check_failure_count Number of times the check has failed +# TYPE pebble_check_failure_count counter +pebble_check_failure_count{check="chk1"} 2 `[1:] c.Assert(buf.String(), Equals, expected) } From 45f206fffd13469974061730c9833200b415bc2c Mon Sep 17 00:00:00 2001 From: Tiexin Guo Date: Tue, 18 Feb 2025 12:19:55 +0800 Subject: [PATCH 41/41] chore: revert unnecessary change --- internals/overlord/checkstate/handlers.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internals/overlord/checkstate/handlers.go b/internals/overlord/checkstate/handlers.go index c4ed1bb0f..77c5a2766 100644 --- a/internals/overlord/checkstate/handlers.go +++ b/internals/overlord/checkstate/handlers.go @@ -56,7 +56,6 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro // is reached (for example, restarting a service). details.Failures++ atThreshold := details.Failures >= config.Threshold - if !atThreshold { // Update number of failures in check info. In threshold // case, check data will be updated with new change ID by