Skip to content

Commit

Permalink
Add support for OTEL tracing
Browse files Browse the repository at this point in the history
`crictl` now features 3 new CLI parameters:

- `--enable-tracing`: Enable OpenTelemetry tracing. (default: false)
- `--tracing-endpoint`: Address to which the gRPC tracing collector will send spans to. (default: "0.0.0.0:4317")
- `--tracing-sampling-rate-per-million`: Number of samples to collect per million OpenTelemetry spans. Set to 1000000 or -1 to always sample. (default: -1)

The tracer provider will be created on startup and the `Shutdown()`
invocation will ensure that all spans are processed before exiting the
binary.

The `hack/tracing` directory contains scripts for local testing:

```
> ./hack/tracing/start
…
Everything is ready, open http://localhost:16686 to access jaeger
```

When now running `crictl` with `--enable-tracing`:

```
> sudo ./build/bin/linux/amd64/crictl --enable-tracing ps
```

Then jaeger should show collected traces and spans for the 3 RPCs
`ListContainers`, `ImageFsInfo` as well as `Version`.

Signed-off-by: Sascha Grunert <[email protected]>
  • Loading branch information
saschagrunert committed Feb 9, 2024
1 parent 052273d commit f6e57a2
Show file tree
Hide file tree
Showing 175 changed files with 14,195 additions and 8,597 deletions.
70 changes: 64 additions & 6 deletions cmd/crictl/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package main

import (
"context"
"fmt"
"os"
"runtime"
Expand All @@ -25,11 +26,15 @@ import (

"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/trace"
"go.opentelemetry.io/otel/trace/noop"

internalapi "k8s.io/cri-api/pkg/apis"
"k8s.io/kubernetes/pkg/kubelet/cri/remote"

"github.com/kubernetes-sigs/cri-tools/pkg/common"
"github.com/kubernetes-sigs/cri-tools/pkg/tracing"
"github.com/kubernetes-sigs/cri-tools/pkg/version"
)

Expand All @@ -53,9 +58,11 @@ var (
PullImageOnCreate bool
// DisablePullOnRun disable pulling image on run requests
DisablePullOnRun bool
// tracerProvider is the global OpenTelemetry tracing instance.
tracerProvider *sdktrace.TracerProvider
)

func getRuntimeService(context *cli.Context, timeout time.Duration) (res internalapi.RuntimeService, err error) {
func getRuntimeService(_ *cli.Context, timeout time.Duration) (res internalapi.RuntimeService, err error) {
if RuntimeEndpointIsSet && RuntimeEndpoint == "" {
return nil, fmt.Errorf("--runtime-endpoint is not set")
}
Expand All @@ -67,6 +74,13 @@ func getRuntimeService(context *cli.Context, timeout time.Duration) (res interna
t = timeout
}

// Use the noop tracer provider and not tracerProvider directly, otherwise
// we'll panic in the unary call interceptor
var tp trace.TracerProvider = noop.NewTracerProvider()
if tracerProvider != nil {
tp = tracerProvider
}

// If no EP set then use the default endpoint types
if !RuntimeEndpointIsSet {
logrus.Warningf("runtime connect using default endpoints: %v. "+
Expand All @@ -79,7 +93,7 @@ func getRuntimeService(context *cli.Context, timeout time.Duration) (res interna
for _, endPoint := range defaultRuntimeEndpoints {
logrus.Debugf("Connect using endpoint %q with %q timeout", endPoint, t)

res, err = remote.NewRemoteRuntimeService(endPoint, t, nil)
res, err = remote.NewRemoteRuntimeService(endPoint, t, tp)
if err != nil {
logrus.Error(err)
continue
Expand All @@ -90,10 +104,10 @@ func getRuntimeService(context *cli.Context, timeout time.Duration) (res interna
}
return res, err
}
return remote.NewRemoteRuntimeService(RuntimeEndpoint, t, nil)
return remote.NewRemoteRuntimeService(RuntimeEndpoint, t, tp)
}

func getImageService(context *cli.Context) (res internalapi.ImageManagerService, err error) {
func getImageService(*cli.Context) (res internalapi.ImageManagerService, err error) {
if ImageEndpoint == "" {
if RuntimeEndpointIsSet && RuntimeEndpoint == "" {
return nil, fmt.Errorf("--image-endpoint is not set")
Expand All @@ -103,6 +117,14 @@ func getImageService(context *cli.Context) (res internalapi.ImageManagerService,
}

logrus.Debugf("get image connection")

// Use the noop tracer provider and not tracerProvider directly, otherwise
// we'll panic in the unary call interceptor
var tp trace.TracerProvider = noop.NewTracerProvider()
if tracerProvider != nil {
tp = tracerProvider
}

// If no EP set then use the default endpoint types
if !ImageEndpointIsSet {
logrus.Warningf("image connect using default endpoints: %v. "+
Expand All @@ -115,7 +137,7 @@ func getImageService(context *cli.Context) (res internalapi.ImageManagerService,
for _, endPoint := range defaultRuntimeEndpoints {
logrus.Debugf("Connect using endpoint %q with %q timeout", endPoint, Timeout)

res, err = remote.NewRemoteImageService(endPoint, Timeout, nil)
res, err = remote.NewRemoteImageService(endPoint, Timeout, tp)
if err != nil {
logrus.Error(err)
continue
Expand All @@ -126,7 +148,7 @@ func getImageService(context *cli.Context) (res internalapi.ImageManagerService,
}
return res, err
}
return remote.NewRemoteImageService(ImageEndpoint, Timeout, nil)
return remote.NewRemoteImageService(ImageEndpoint, Timeout, tp)
}

func getTimeout(timeDuration time.Duration) time.Duration {
Expand Down Expand Up @@ -220,6 +242,20 @@ func main() {
Aliases: []string{"D"},
Usage: "Enable debug mode",
},
&cli.BoolFlag{
Name: "enable-tracing",
Usage: "Enable OpenTelemetry tracing.",
},
&cli.IntFlag{
Name: "tracing-sampling-rate-per-million",
Usage: "Number of samples to collect per million OpenTelemetry spans. Set to 1000000 or -1 to always sample.",
Value: -1,
},
&cli.StringFlag{
Name: "tracing-endpoint",
Usage: "Address to which the gRPC tracing collector will send spans to.",
Value: "127.0.0.1:4317",
},
}

app.Before = func(context *cli.Context) (err error) {
Expand Down Expand Up @@ -290,6 +326,19 @@ func main() {
if Debug {
logrus.SetLevel(logrus.DebugLevel)
}

// Configure tracing if enabled
if context.IsSet("enable-tracing") {
tracerProvider, err = tracing.Init(
context.Context,
context.String("tracing-endpoint"),
context.Int("tracing-sampling-rate-per-million"),
)
if err != nil {
return fmt.Errorf("init tracing: %w", err)
}
}

return nil
}
// sort all flags
Expand All @@ -301,4 +350,13 @@ func main() {
if err := app.Run(os.Args); err != nil {
logrus.Fatal(err)
}

// Ensure that all spans are processed.
if tracerProvider != nil {
ctx, cancel := context.WithTimeout(context.Background(), Timeout)
defer cancel()
if err := tracerProvider.Shutdown(ctx); err != nil {
logrus.Errorf("Unable to shutdown tracer provider: %v", err)
}
}
}
26 changes: 13 additions & 13 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ require (
github.com/opencontainers/selinux v1.11.0
github.com/sirupsen/logrus v1.9.3
github.com/urfave/cli/v2 v2.27.1
go.opentelemetry.io/otel v1.22.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.22.0
go.opentelemetry.io/otel/sdk v1.22.0
go.opentelemetry.io/otel/trace v1.22.0
golang.org/x/net v0.20.0
golang.org/x/sys v0.17.0
golang.org/x/term v0.17.0
Expand Down Expand Up @@ -44,7 +48,7 @@ require (
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-logr/logr v1.3.0 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
Expand Down Expand Up @@ -77,22 +81,18 @@ require (
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.42.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 // indirect
go.opentelemetry.io/otel v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect
go.opentelemetry.io/otel/metric v1.19.0 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/otel/trace v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.22.0 // indirect
go.opentelemetry.io/otel/metric v1.22.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
golang.org/x/mod v0.14.0 // indirect
golang.org/x/oauth2 v0.10.0 // indirect
golang.org/x/oauth2 v0.13.0 // indirect
golang.org/x/time v0.3.0 // indirect
golang.org/x/tools v0.16.1 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20230726155614-23370e0ffb3e // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
google.golang.org/grpc v1.58.3 // indirect
google.golang.org/protobuf v1.31.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20231002182017-d307bd883b97 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20231002182017-d307bd883b97 // indirect
google.golang.org/grpc v1.60.1 // indirect
google.golang.org/protobuf v1.32.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
k8s.io/apiextensions-apiserver v0.0.0 // indirect
Expand Down
Loading

0 comments on commit f6e57a2

Please sign in to comment.