Skip to content

Commit

Permalink
Add initial support for opentelemetry tracing in maestro server
Browse files Browse the repository at this point in the history
Signed-off-by: Vu Dinh <vudinh@outlook.com>
  • Loading branch information
dinhxuanvu committed Feb 19, 2025
1 parent 373563d commit 5ae6b39
Show file tree
Hide file tree
Showing 7 changed files with 192 additions and 0 deletions.
99 changes: 99 additions & 0 deletions cmd/maestro/common/otlp_sdk.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package common

import (
"context"
"fmt"
"os"
"time"

"go.opentelemetry.io/contrib/exporters/autoexport"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/resource"
tracesdk "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.25.0"

errors "github.com/zgalor/weberr"

"github.com/openshift-online/maestro/pkg/constants"
"github.com/openshift-online/maestro/pkg/logger"
)

// Without a specific configuration, a noop tracer is used by default.
// At least two environment variables must be configured to enable trace export:
// - name: OTEL_EXPORTER_OTLP_ENDPOINT
// value: http(s)://<service>.<namespace>:4318
// - name: OTEL_TRACES_EXPORTER
// value: otlp
func InstallOpenTelemetryTracer(ctx context.Context, log logger.OCMLogger) (func(context.Context) error, error) {
log.Info("initializing OpenTelemetry tracer")

exp, err := autoexport.NewSpanExporter(ctx, autoexport.WithFallbackSpanExporter(newNoopFactory))
if err != nil {
return nil, errors.Errorf("failed to create OTEL exporter: %s", err)
}

resources, err := resource.New(context.Background(),
resource.WithAttributes(
semconv.ServiceNameKey.String(constants.DefaultSourceID),
),
resource.WithHost(),
)
if err != nil {
return nil, errors.Errorf("failed to initialize trace resources: %s", err)
}

tp := tracesdk.NewTracerProvider(
tracesdk.WithBatcher(exp),
tracesdk.WithResource(resources),
)
otel.SetTracerProvider(tp)

shutdown := func(ctx context.Context) error {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
return tp.Shutdown(ctx)
}

propagator := propagation.NewCompositeTextMapPropagator(propagation.Baggage{}, propagation.TraceContext{})
otel.SetTextMapPropagator(propagator)

otel.SetErrorHandler(otelErrorHandlerFunc(func(err error) {
log.Error(fmt.Sprintf("OpenTelemetry.ErrorHandler: %v", err))
}))

return shutdown, nil
}

// TracingEnabled returns true if the environment variable OTEL_TRACES_EXPORTER
// to configure the OpenTelemetry Exporter is defined.
func TracingEnabled() bool {
_, ok := os.LookupEnv("OTEL_TRACES_EXPORTER")
return ok
}

type otelErrorHandlerFunc func(error)

// Handle implements otel.ErrorHandler
func (f otelErrorHandlerFunc) Handle(err error) {
f(err)
}

func newNoopFactory(_ context.Context) (tracesdk.SpanExporter, error) {
return &noopSpanExporter{}, nil
}

var _ tracesdk.SpanExporter = noopSpanExporter{}

// noopSpanExporter is an implementation of trace.SpanExporter that performs no operations.
type noopSpanExporter struct{}

// ExportSpans is part of trace.SpanExporter interface.
func (e noopSpanExporter) ExportSpans(ctx context.Context, spans []tracesdk.ReadOnlySpan) error {
return nil
}

// Shutdown is part of trace.SpanExporter interface.
func (e noopSpanExporter) Shutdown(ctx context.Context) error {
return nil
}
21 changes: 21 additions & 0 deletions cmd/maestro/servecmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,23 @@ package servecmd

import (
"context"
"fmt"
"os"
"os/signal"
"syscall"

"github.com/spf13/cobra"
"k8s.io/klog/v2"

"github.com/openshift-online/maestro/cmd/maestro/common"
"github.com/openshift-online/maestro/cmd/maestro/environments"
"github.com/openshift-online/maestro/cmd/maestro/server"
"github.com/openshift-online/maestro/pkg/config"
"github.com/openshift-online/maestro/pkg/controllers"
"github.com/openshift-online/maestro/pkg/db"
"github.com/openshift-online/maestro/pkg/dispatcher"
"github.com/openshift-online/maestro/pkg/event"
"github.com/openshift-online/maestro/pkg/logger"
)

func NewServerCommand() *cobra.Command {
Expand Down Expand Up @@ -76,6 +79,20 @@ func runServer(cmd *cobra.Command, args []string) {

ctx, cancel := context.WithCancel(context.Background())

tracingShutdown := func(context.Context) error { return nil }
log := logger.NewOCMLogger(ctx)
if common.TracingEnabled() {
tracingShutdown, err = common.InstallOpenTelemetryTracer(ctx, log)
if err != nil {
log.Error(fmt.Sprintf("Can't initialize OpenTelemetry trace provider: %v", err))
os.Exit(1)
}
}
if err != nil {
log.Error(fmt.Sprintf("Can't initialize OpenTelemetry trace provider: %v", err))
os.Exit(1)
}

stopCh := make(chan os.Signal, 1)
signal.Notify(stopCh, syscall.SIGINT, syscall.SIGTERM)
go func() {
Expand All @@ -89,6 +106,10 @@ func runServer(cmd *cobra.Command, args []string) {
if err := metricsServer.Stop(); err != nil {
klog.Errorf("Failed to stop metrics server, %v", err)
}

if tracingShutdown != nil && tracingShutdown(ctx) != nil {
log.Warning(fmt.Sprintf("OpenTelemetry trace provider failed to shutdown: %v", err))
}
}()

// Start the event broadcaster
Expand Down
12 changes: 12 additions & 0 deletions cmd/maestro/server/api_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ import (
gorillahandlers "github.com/gorilla/handlers"
sdk "github.com/openshift-online/ocm-sdk-go"
"github.com/openshift-online/ocm-sdk-go/authentication"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"k8s.io/klog/v2"

"github.com/openshift-online/maestro/cmd/maestro/common"
"github.com/openshift-online/maestro/cmd/maestro/environments"
"github.com/openshift-online/maestro/data/generated/openapi"
"github.com/openshift-online/maestro/pkg/errors"
Expand Down Expand Up @@ -119,6 +121,16 @@ func NewAPIServer(eventBroadcaster *event.EventBroadcaster) Server {
)(mainHandler)

mainHandler = removeTrailingSlash(mainHandler)
mainHandler = traceAttributeMiddleware(mainHandler)
if common.TracingEnabled() {
mainHandler = otelhttp.NewHandler(mainHandler, "apiserver",
otelhttp.WithSpanNameFormatter(
func(operation string, r *http.Request) string {
return fmt.Sprintf("%s %s %s", operation, "HTTP", r.Method)
},
),
)
}

s.httpServer = &http.Server{
Addr: env().Config.HTTPServer.Hostname + ":" + env().Config.HTTPServer.BindPort,
Expand Down
6 changes: 6 additions & 0 deletions cmd/maestro/server/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import (

gorillahandlers "github.com/gorilla/handlers"
"github.com/gorilla/mux"
"go.opentelemetry.io/contrib/instrumentation/github.com/gorilla/mux/otelmux"

"github.com/openshift-online/maestro/cmd/maestro/common"
"github.com/openshift-online/maestro/cmd/maestro/server/logging"
"github.com/openshift-online/maestro/pkg/api"
"github.com/openshift-online/maestro/pkg/auth"
Expand Down Expand Up @@ -47,6 +49,10 @@ func (s *apiServer) routes() *mux.Router {
mainRouter := mux.NewRouter()
mainRouter.NotFoundHandler = http.HandlerFunc(api.SendNotFound)

if common.TracingEnabled() {
mainRouter.Use(otelmux.Middleware("serve"))
}

// Operation ID middleware sets a relatively unique operation ID in the context of each request for debugging purposes
mainRouter.Use(logger.OperationIDMiddleware)

Expand Down
38 changes: 38 additions & 0 deletions cmd/maestro/server/trace_handler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package server

import (
"net/http"

"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/baggage"
"go.opentelemetry.io/otel/trace"

"github.com/openshift-online/maestro/pkg/constants"
"github.com/openshift-online/maestro/pkg/logger"
)

// traceAttributeMiddleware is currently only relevant for the correlation of
// requests by the ARO-HCP resource provider frontend.
//
// The middleware extracts correlation data transferred in the baggage and sets
// it as an attribute in the currently active span.
// This middleware has no effect if tracing is deactivated or if there is no
// data in the transferred baggage.
func traceAttributeMiddleware(h http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctx := logger.WithOpID(r.Context())
b := baggage.FromContext(ctx)
attrs := []attribute.KeyValue{}
bvalues := []string{constants.ClusterServiceClusterID, constants.AROCorrelationID, constants.AROClientRequestID, constants.ARORequestID, string(logger.OpIDKey)}
for _, k := range bvalues {
if v := b.Member(k).Value(); v != "" {
attrs = append(attrs, attribute.String(k, b.Member(k).Value()))
}
}

if len(attrs) > 0 {
trace.SpanFromContext(ctx).SetAttributes(attrs...)
}
h.ServeHTTP(w, r)
})
}
7 changes: 7 additions & 0 deletions pkg/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,11 @@ const (
// MinTokenLifeThreshold defines the minimum remaining lifetime (in seconds) of the access token before
// it should be refreshed.
MinTokenLifeThreshold = 60.0

// Tracing IDs
// TODO: May need to move to another repo so they can be shareable
ClusterServiceClusterID = "cs.cluster.id"
AROCorrelationID = "aro.correlation.id"
AROClientRequestID = "aro.client.request.id"
ARORequestID = "aro.request.id"
)
9 changes: 9 additions & 0 deletions pkg/logger/operationid_middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"context"
"net/http"

"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"

"github.com/getsentry/sentry-go"
"github.com/segmentio/ksuid"
)
Expand All @@ -17,9 +20,11 @@ const OpIDHeader OperationIDKey = "X-Operation-ID"
func OperationIDMiddleware(handler http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctx := WithOpID(r.Context())
span := trace.SpanFromContext(ctx)

opID, ok := ctx.Value(OpIDKey).(string)
if ok && len(opID) > 0 {
span.SetAttributes(operationIDAttribute(opID))
w.Header().Set(string(OpIDHeader), opID)
}

Expand Down Expand Up @@ -49,3 +54,7 @@ func GetOperationID(ctx context.Context) string {
}
return ""
}

func operationIDAttribute(id string) attribute.KeyValue {
return attribute.String(string(OpIDKey), id)
}

0 comments on commit 5ae6b39

Please sign in to comment.