-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: Add initial support for opentelemetry tracing in maestro server #253
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
package common | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"os" | ||
"time" | ||
|
||
"go.opentelemetry.io/contrib/exporters/autoexport" | ||
"go.opentelemetry.io/otel" | ||
"go.opentelemetry.io/otel/propagation" | ||
"go.opentelemetry.io/otel/sdk/resource" | ||
tracesdk "go.opentelemetry.io/otel/sdk/trace" | ||
semconv "go.opentelemetry.io/otel/semconv/v1.25.0" | ||
|
||
errors "github.com/zgalor/weberr" | ||
|
||
"github.com/openshift-online/maestro/pkg/constants" | ||
"github.com/openshift-online/maestro/pkg/logger" | ||
) | ||
|
||
// Without a specific configuration, a noop tracer is used by default. | ||
// At least two environment variables must be configured to enable trace export: | ||
// - name: OTEL_EXPORTER_OTLP_ENDPOINT | ||
// value: http(s)://<service>.<namespace>:4318 | ||
// - name: OTEL_TRACES_EXPORTER | ||
// value: otlp | ||
func InstallOpenTelemetryTracer(ctx context.Context, log logger.OCMLogger) (func(context.Context) error, error) { | ||
log.Info("initializing OpenTelemetry tracer") | ||
|
||
exp, err := autoexport.NewSpanExporter(ctx, autoexport.WithFallbackSpanExporter(newNoopFactory)) | ||
if err != nil { | ||
return nil, errors.Errorf("failed to create OTEL exporter: %s", err) | ||
} | ||
|
||
resources, err := resource.New(context.Background(), | ||
resource.WithAttributes( | ||
semconv.ServiceNameKey.String(constants.DefaultSourceID), | ||
), | ||
resource.WithHost(), | ||
) | ||
if err != nil { | ||
return nil, errors.Errorf("failed to initialize trace resources: %s", err) | ||
} | ||
|
||
tp := tracesdk.NewTracerProvider( | ||
tracesdk.WithBatcher(exp), | ||
tracesdk.WithResource(resources), | ||
) | ||
otel.SetTracerProvider(tp) | ||
|
||
shutdown := func(ctx context.Context) error { | ||
ctx, cancel := context.WithTimeout(ctx, 5*time.Second) | ||
defer cancel() | ||
return tp.Shutdown(ctx) | ||
} | ||
|
||
propagator := propagation.NewCompositeTextMapPropagator(propagation.Baggage{}, propagation.TraceContext{}) | ||
otel.SetTextMapPropagator(propagator) | ||
|
||
otel.SetErrorHandler(otelErrorHandlerFunc(func(err error) { | ||
log.Error(fmt.Sprintf("OpenTelemetry.ErrorHandler: %v", err)) | ||
})) | ||
|
||
return shutdown, nil | ||
} | ||
|
||
// TracingEnabled returns true if the environment variable OTEL_TRACES_EXPORTER | ||
// to configure the OpenTelemetry Exporter is defined. | ||
func TracingEnabled() bool { | ||
_, ok := os.LookupEnv("OTEL_TRACES_EXPORTER") | ||
return ok | ||
} | ||
|
||
type otelErrorHandlerFunc func(error) | ||
|
||
// Handle implements otel.ErrorHandler | ||
func (f otelErrorHandlerFunc) Handle(err error) { | ||
f(err) | ||
} | ||
|
||
func newNoopFactory(_ context.Context) (tracesdk.SpanExporter, error) { | ||
return &noopSpanExporter{}, nil | ||
} | ||
|
||
var _ tracesdk.SpanExporter = noopSpanExporter{} | ||
|
||
// noopSpanExporter is an implementation of trace.SpanExporter that performs no operations. | ||
type noopSpanExporter struct{} | ||
|
||
// ExportSpans is part of trace.SpanExporter interface. | ||
func (e noopSpanExporter) ExportSpans(ctx context.Context, spans []tracesdk.ReadOnlySpan) error { | ||
return nil | ||
} | ||
|
||
// Shutdown is part of trace.SpanExporter interface. | ||
func (e noopSpanExporter) Shutdown(ctx context.Context) error { | ||
return nil | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -2,20 +2,23 @@ package servecmd | |||||||||
|
||||||||||
import ( | ||||||||||
"context" | ||||||||||
"fmt" | ||||||||||
"os" | ||||||||||
"os/signal" | ||||||||||
"syscall" | ||||||||||
|
||||||||||
"github.com/spf13/cobra" | ||||||||||
"k8s.io/klog/v2" | ||||||||||
|
||||||||||
"github.com/openshift-online/maestro/cmd/maestro/common" | ||||||||||
"github.com/openshift-online/maestro/cmd/maestro/environments" | ||||||||||
"github.com/openshift-online/maestro/cmd/maestro/server" | ||||||||||
"github.com/openshift-online/maestro/pkg/config" | ||||||||||
"github.com/openshift-online/maestro/pkg/controllers" | ||||||||||
"github.com/openshift-online/maestro/pkg/db" | ||||||||||
"github.com/openshift-online/maestro/pkg/dispatcher" | ||||||||||
"github.com/openshift-online/maestro/pkg/event" | ||||||||||
"github.com/openshift-online/maestro/pkg/logger" | ||||||||||
) | ||||||||||
|
||||||||||
func NewServerCommand() *cobra.Command { | ||||||||||
|
@@ -76,6 +79,20 @@ func runServer(cmd *cobra.Command, args []string) { | |||||||||
|
||||||||||
ctx, cancel := context.WithCancel(context.Background()) | ||||||||||
|
||||||||||
tracingShutdown := func(context.Context) error { return nil } | ||||||||||
log := logger.NewOCMLogger(ctx) | ||||||||||
if common.TracingEnabled() { | ||||||||||
tracingShutdown, err = common.InstallOpenTelemetryTracer(ctx, log) | ||||||||||
if err != nil { | ||||||||||
log.Error(fmt.Sprintf("Can't initialize OpenTelemetry trace provider: %v", err)) | ||||||||||
os.Exit(1) | ||||||||||
} | ||||||||||
} | ||||||||||
if err != nil { | ||||||||||
log.Error(fmt.Sprintf("Can't initialize OpenTelemetry trace provider: %v", err)) | ||||||||||
os.Exit(1) | ||||||||||
} | ||||||||||
Comment on lines
+91
to
+94
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it seems duplicated code?
Suggested change
|
||||||||||
|
||||||||||
stopCh := make(chan os.Signal, 1) | ||||||||||
signal.Notify(stopCh, syscall.SIGINT, syscall.SIGTERM) | ||||||||||
go func() { | ||||||||||
|
@@ -89,6 +106,10 @@ func runServer(cmd *cobra.Command, args []string) { | |||||||||
if err := metricsServer.Stop(); err != nil { | ||||||||||
klog.Errorf("Failed to stop metrics server, %v", err) | ||||||||||
} | ||||||||||
|
||||||||||
if tracingShutdown != nil && tracingShutdown(ctx) != nil { | ||||||||||
log.Warning(fmt.Sprintf("OpenTelemetry trace provider failed to shutdown: %v", err)) | ||||||||||
} | ||||||||||
}() | ||||||||||
|
||||||||||
// Start the event broadcaster | ||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,8 +15,10 @@ import ( | |
gorillahandlers "github.com/gorilla/handlers" | ||
sdk "github.com/openshift-online/ocm-sdk-go" | ||
"github.com/openshift-online/ocm-sdk-go/authentication" | ||
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" | ||
"k8s.io/klog/v2" | ||
|
||
"github.com/openshift-online/maestro/cmd/maestro/common" | ||
"github.com/openshift-online/maestro/cmd/maestro/environments" | ||
"github.com/openshift-online/maestro/data/generated/openapi" | ||
"github.com/openshift-online/maestro/pkg/errors" | ||
|
@@ -119,6 +121,16 @@ func NewAPIServer(eventBroadcaster *event.EventBroadcaster) Server { | |
)(mainHandler) | ||
|
||
mainHandler = removeTrailingSlash(mainHandler) | ||
mainHandler = traceAttributeMiddleware(mainHandler) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like we did for CS, we can add more attributes to the span in a follow-up PR to keep things simple. |
||
if common.TracingEnabled() { | ||
mainHandler = otelhttp.NewHandler(mainHandler, "apiserver", | ||
otelhttp.WithSpanNameFormatter( | ||
func(operation string, r *http.Request) string { | ||
return fmt.Sprintf("%s %s %s", operation, "HTTP", r.Method) | ||
}, | ||
), | ||
) | ||
} | ||
|
||
s.httpServer = &http.Server{ | ||
Addr: env().Config.HTTPServer.Hostname + ":" + env().Config.HTTPServer.BindPort, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,9 @@ import ( | |
|
||
gorillahandlers "github.com/gorilla/handlers" | ||
"github.com/gorilla/mux" | ||
"go.opentelemetry.io/contrib/instrumentation/github.com/gorilla/mux/otelmux" | ||
|
||
"github.com/openshift-online/maestro/cmd/maestro/common" | ||
"github.com/openshift-online/maestro/cmd/maestro/server/logging" | ||
"github.com/openshift-online/maestro/pkg/api" | ||
"github.com/openshift-online/maestro/pkg/auth" | ||
|
@@ -47,6 +49,10 @@ func (s *apiServer) routes() *mux.Router { | |
mainRouter := mux.NewRouter() | ||
mainRouter.NotFoundHandler = http.HandlerFunc(api.SendNotFound) | ||
|
||
if common.TracingEnabled() { | ||
mainRouter.Use(otelmux.Middleware("serve")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets use proto + method here as spanname. |
||
} | ||
|
||
// Operation ID middleware sets a relatively unique operation ID in the context of each request for debugging purposes | ||
mainRouter.Use(logger.OperationIDMiddleware) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
package server | ||
|
||
import ( | ||
"net/http" | ||
|
||
"go.opentelemetry.io/otel/attribute" | ||
"go.opentelemetry.io/otel/baggage" | ||
"go.opentelemetry.io/otel/trace" | ||
|
||
"github.com/openshift-online/maestro/pkg/constants" | ||
"github.com/openshift-online/maestro/pkg/logger" | ||
) | ||
|
||
// traceAttributeMiddleware is currently only relevant for the correlation of | ||
// requests by the ARO-HCP resource provider frontend. | ||
// | ||
// The middleware extracts correlation data transferred in the baggage and sets | ||
// it as an attribute in the currently active span. | ||
// This middleware has no effect if tracing is deactivated or if there is no | ||
// data in the transferred baggage. | ||
func traceAttributeMiddleware(h http.Handler) http.Handler { | ||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { | ||
ctx := logger.WithOpID(r.Context()) | ||
b := baggage.FromContext(ctx) | ||
attrs := []attribute.KeyValue{} | ||
bvalues := []string{constants.ClusterServiceClusterID, constants.AROCorrelationID, constants.AROClientRequestID, constants.ARORequestID, string(logger.OpIDKey)} | ||
for _, k := range bvalues { | ||
if v := b.Member(k).Value(); v != "" { | ||
attrs = append(attrs, attribute.String(k, b.Member(k).Value())) | ||
} | ||
} | ||
|
||
if len(attrs) > 0 { | ||
trace.SpanFromContext(ctx).SetAttributes(attrs...) | ||
} | ||
h.ServeHTTP(w, r) | ||
}) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we should place the initialization to a place where it can be reused by the frontend too. Wdyt?
E.g. here: https://github.com/Azure/ARO-HCP/tree/main/internal
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agreed but should it be somewhere else instead of "internal" package? Internal pkg usually means it is not exportable or should not be exportable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
github.com/openshift-online/ocm-common might be the best option since it's already imported by https://gitlab.cee.redhat.com/service/uhc-clusters-service/ too. And it should be ok to import it in https://github.com/Azure/ARO-HCP/ too.