diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 0172b16..2442438 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -13,6 +13,7 @@ import,github.com/containerd/log,Apache-2.0,unknown import,github.com/containerd/platforms,Apache-2.0,unknown import,github.com/containerd/ttrpc,Apache-2.0,unknown import,github.com/containerd/typeurl/v2,Apache-2.0,unknown +import,github.com/cpuguy83/go-md2man/v2/md2man,MIT,Copyright (c) 2014 Brian Goff import,github.com/davecgh/go-spew/spew,ISC,Copyright (c) 2012-2016 Dave Collins import,github.com/distribution/reference,Apache-2.0,unknown import,github.com/docker/docker,Apache-2.0,"Copyright 2013-2018 Docker, Inc. | Copyright 2012-2017 Docker, Inc." @@ -63,11 +64,13 @@ import,github.com/opencontainers/go-digest,Apache-2.0,"Copyright 2019, 2020 OCI import,github.com/opencontainers/image-spec,Apache-2.0,Copyright 2016 The Linux Foundation. import,github.com/opencontainers/runtime-spec/specs-go,Apache-2.0,Copyright 2015 The Linux Foundation. import,github.com/opencontainers/selinux,Apache-2.0,unknown -import,github.com/peterbourgon/ff/v3,Apache-2.0,unknown import,github.com/pkg/errors,BSD-2-Clause,"Copyright (c) 2015, Dave Cheney " +import,github.com/russross/blackfriday/v2,BSD-2-Clause,unknown import,github.com/sirupsen/logrus,MIT,Copyright (c) 2014 Simon Eskildsen import,github.com/tklauser/numcpus,Apache-2.0,unknown +import,github.com/urfave/cli/v2,MIT,Copyright (c) 2022 urfave/cli maintainers import,github.com/x448/float16,MIT,Copyright (c) 2019 Montgomery Edwards⁴⁴⁸ and Faye Amacker +import,github.com/xrash/smetrics,MIT,Copyright (C) 2016 Felipe da Cunha Gonçalves import,github.com/zeebo/xxh3,BSD-2-Clause,"Copyright (c) 2012-2014, Yann Collet | Copyright (c) 2019, Jeff Wendling" import,go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp,Apache-2.0,unknown import,go.opentelemetry.io/otel,Apache-2.0,unknown diff --git a/cli_flags.go b/cli_flags.go index 7784fbe..7112e12 100644 --- a/cli_flags.go +++ b/cli_flags.go @@ -1,24 +1,19 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Apache License 2.0. - * See the file "LICENSE" for details. - */ - // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2024 Datadog, Inc. package main import ( - "flag" + "context" "fmt" "os" - "strconv" + "runtime" "time" + "github.com/DataDog/dd-otel-host-profiler/version" cebpf "github.com/cilium/ebpf" - "github.com/peterbourgon/ff/v3" log "github.com/sirupsen/logrus" + "github.com/urfave/cli/v3" "github.com/open-telemetry/opentelemetry-ebpf-profiler/tracer" ) @@ -40,164 +35,283 @@ const ( maxArgMapScaleFactor = 8 ) -// Help strings for command line arguments -var ( - noKernelVersionCheckHelp = "Disable checking kernel version for eBPF support. " + - "Use at your own risk, to run the agent on older kernels with backported eBPF features." - collAgentAddrHelp = "The Datadog agent URL in the format of http://host:port." - copyrightHelp = "Show copyright and short license text." - verboseModeHelp = "Enable verbose logging and debugging capabilities." - tracersHelp = "Comma-separated list of interpreter tracers to include." - mapScaleFactorHelp = fmt.Sprintf("Scaling factor for eBPF map sizes. "+ - "Every increase by 1 doubles the map size. Increase if you see eBPF map size errors. "+ - "Default is %d corresponding to 4GB of executable address space, max is %d.", - defaultArgMapScaleFactor, maxArgMapScaleFactor) - bpfVerifierLogLevelHelp = "Log level of the eBPF verifier output (0,1,2). Default is 0." - bpfVerifierLogSizeHelp = "Size in bytes that will be allocated for the eBPF " + - "verifier output. Only takes effect if bpf-log-level > 0." - versionHelp = "Show version." - probabilisticThresholdHelp = fmt.Sprintf("If set to a value between 1 and %d will enable "+ - "probabilistic profiling: "+ - "every probabilistic-interval a random number between 0 and %d is "+ - "chosen. If the given probabilistic-threshold is greater than this "+ - "random number, the agent will collect profiles from this system for "+ - "the duration of the interval.", - tracer.ProbabilisticThresholdMax-1, tracer.ProbabilisticThresholdMax-1) - probabilisticIntervalHelp = "Time interval for which probabilistic profiling will be " + - "enabled or disabled." - pprofHelp = "Listening address (e.g. localhost:6060) to serve pprof information." - samplesPerSecondHelp = "Set the frequency (in Hz) of stack trace sampling." - reporterIntervalHelp = "Set the reporter's interval in seconds." - monitorIntervalHelp = "Set the monitor interval in seconds." - clockSyncIntervalHelp = "Set the sync interval with the realtime clock. " + - "If zero, monotonic-realtime clock sync will be performed once, " + - "on agent startup, but not periodically." - sendErrorFramesHelp = "Send error frames (devfiler only, breaks Kibana)" - saveCPUProfileHelp = "Save CPU pprof profile to `cpu.pprof`." - tagsHelp = "User-specified tags separated by ';'." - serviceHelp = "Service name." - nodeHelp = "The name of the node that the profiler is running on. " + - "If on Kubernetes, this must match the Kubernetes node name." -) - type arguments struct { - bpfVerifierLogLevel uint - bpfVerifierLogSize int + bpfVerifierLogLevel uint64 + bpfVerifierLogSize uint64 collAgentAddr string copyright bool - mapScaleFactor uint + mapScaleFactor uint64 monitorInterval time.Duration clockSyncInterval time.Duration noKernelVersionCheck bool node string - pprofAddr string probabilisticInterval time.Duration - probabilisticThreshold uint + probabilisticThreshold uint64 reporterInterval time.Duration - samplesPerSecond int - saveCPUProfile bool + samplesPerSecond uint64 + cpuProfileDump string sendErrorFrames bool serviceName string - symbolUpload bool + serviceVersion string + environment string + uploadSymbols bool + uploadDynamicSymbols bool + uploadSymbolsDryRun bool tags string timeline bool tracers string verboseMode bool - version bool + ddAPIKey string + ddAPPKey string + ddSite string - fs *flag.FlagSet + cmd *cli.Command } -// Package-scope variable, so that conditionally compiled other components can refer -// to the same flagset. - func parseArgs() (*arguments, error) { var args arguments - var err error - - fs := flag.NewFlagSet("dd-otel-host-profiler", flag.ExitOnError) - - // Please keep the parameters ordered alphabetically in the source-code. - fs.UintVar(&args.bpfVerifierLogLevel, "bpf-log-level", 0, bpfVerifierLogLevelHelp) - fs.IntVar(&args.bpfVerifierLogSize, "bpf-log-size", cebpf.DefaultVerifierLogSize, - bpfVerifierLogSizeHelp) - - fs.StringVar(&args.collAgentAddr, "collection-agent", defaultArgCollAgentAddr, - collAgentAddrHelp) - - fs.BoolVar(&args.copyright, "copyright", false, copyrightHelp) - fs.UintVar(&args.mapScaleFactor, "map-scale-factor", - defaultArgMapScaleFactor, mapScaleFactorHelp) - - fs.DurationVar(&args.monitorInterval, "monitor-interval", defaultArgMonitorInterval, - monitorIntervalHelp) - - fs.DurationVar(&args.clockSyncInterval, "clock-sync-interval", defaultClockSyncInterval, - clockSyncIntervalHelp) - - fs.BoolVar(&args.noKernelVersionCheck, "no-kernel-version-check", false, - noKernelVersionCheckHelp) - - fs.StringVar(&args.pprofAddr, "pprof", "", pprofHelp) + versionInfo := version.GetVersionInfo() - fs.DurationVar(&args.probabilisticInterval, "probabilistic-interval", - defaultProbabilisticInterval, probabilisticIntervalHelp) - fs.UintVar(&args.probabilisticThreshold, "probabilistic-threshold", - defaultProbabilisticThreshold, probabilisticThresholdHelp) - - fs.DurationVar(&args.reporterInterval, "reporter-interval", defaultArgReporterInterval, - reporterIntervalHelp) - - fs.IntVar(&args.samplesPerSecond, "samples-per-second", defaultArgSamplesPerSecond, - samplesPerSecondHelp) - fs.BoolVar(&args.timeline, "timeline", false, "Enable timeline feature.") - - fs.BoolVar(&args.sendErrorFrames, "send-error-frames", defaultArgSendErrorFrames, - sendErrorFramesHelp) - - fs.StringVar(&args.tracers, "t", "all", "Shorthand for -tracers.") - fs.StringVar(&args.tracers, "tracers", "all", tracersHelp) - - fs.BoolVar(&args.verboseMode, "v", false, "Shorthand for -verbose.") - fs.BoolVar(&args.verboseMode, "verbose", false, verboseModeHelp) - fs.BoolVar(&args.version, "version", false, versionHelp) - - fs.StringVar(&args.tags, "tags", "", tagsHelp) - fs.BoolVar(&args.saveCPUProfile, "save-cpuprofile", false, - saveCPUProfileHelp) - fs.StringVar(&args.serviceName, "service", "dd-otel-host-profiler", serviceHelp) + cli.VersionPrinter = func(_ *cli.Command) { + fmt.Printf("dd-otel-host-profiler, version %s (revision: %s, date: %s), arch: %v\n", + versionInfo.Version, versionInfo.VcsRevision, versionInfo.VcsTime, runtime.GOARCH) + } - fs.StringVar(&args.node, "node", "", nodeHelp) + cli.VersionFlag = &cli.BoolFlag{ + Name: "version", + Usage: "print the version", + } - fs.Usage = func() { - fs.PrintDefaults() + app := cli.Command{ + Name: "dd-otel-host-profiler", + Usage: "Datadog OpenTelemetry host profiler", + Copyright: copyright, + Version: versionInfo.Version, + Flags: []cli.Flag{ + &cli.UintFlag{ + Name: "bpf-log-level", + Value: 0, + Usage: "Log level of the eBPF verifier output (0,1,2).", + Destination: &args.bpfVerifierLogLevel, + }, + &cli.UintFlag{ + Name: "bpf-log-size", + Value: cebpf.DefaultVerifierLogSize, + Usage: "Size in bytes that will be allocated for the eBPF verifier output. " + + "Only takes effect if bpf-log-level > 0.", + Destination: &args.bpfVerifierLogSize, + }, + &cli.StringFlag{ + Name: "url", + Aliases: []string{"U"}, + Value: defaultArgCollAgentAddr, + Usage: "The Datadog agent URL in the format of http://host:port.", + Sources: cli.EnvVars("DD_TRACE_AGENT_URL"), + Destination: &args.collAgentAddr, + }, + &cli.StringFlag{ + Name: "service", + Aliases: []string{"S"}, + Usage: "Service name.", + Sources: cli.EnvVars("DD_SERVICE"), + Destination: &args.serviceName, + }, + &cli.StringFlag{ + Name: "environment", + Aliases: []string{"E"}, + Value: "dd-otel-host-profiler", + Usage: "The name of the environment to use in the Datadog UI.", + Sources: cli.EnvVars("DD_ENV"), + Destination: &args.environment, + }, + &cli.StringFlag{ + Name: "service-version", + Aliases: []string{"V"}, + Usage: "Version of the service being profiled.", + Destination: &args.serviceVersion, + }, + &cli.StringFlag{ + Name: "tags", + Usage: "User-specified tags separated by ',': key1:value1,key2:value2.", + Sources: cli.EnvVars("DD_TAGS"), + Destination: &args.tags, + }, + &cli.UintFlag{ + Name: "map-scale-factor", + Value: defaultArgMapScaleFactor, + Usage: fmt.Sprintf("Scaling factor for eBPF map sizes. "+ + "Every increase by 1 doubles the map size. Increase if you see eBPF map size errors. "+ + "Default is %d corresponding to 4GB of executable address space, max is %d.", + defaultArgMapScaleFactor, maxArgMapScaleFactor), + Destination: &args.mapScaleFactor, + }, + &cli.DurationFlag{ + Name: "monitor-interval", + Value: defaultArgMonitorInterval, + Usage: "Set the monitor interval in seconds.", + Destination: &args.monitorInterval, + }, + &cli.DurationFlag{ + Name: "clock-sync-interval", + Value: defaultClockSyncInterval, + Usage: "Set the sync interval with the realtime clock. " + + "If zero, monotonic-realtime clock sync will be performed once, " + + "on agent startup, but not periodically.", + Destination: &args.clockSyncInterval, + }, + &cli.BoolFlag{ + Name: "no-kernel-version-check", + Value: false, + Usage: "Disable checking kernel version for eBPF support. " + + "Use at your own risk, to run the agent on older kernels with backported eBPF features.", + Destination: &args.noKernelVersionCheck, + }, + &cli.DurationFlag{ + Name: "probabilistic-interval", + Value: defaultProbabilisticInterval, + Usage: "Time interval for which probabilistic profiling will be enabled or disabled.", + Destination: &args.probabilisticInterval, + }, + &cli.UintFlag{ + Name: "probabilistic-threshold", + Value: defaultProbabilisticThreshold, + Usage: fmt.Sprintf("If set to a value between 1 and %d will enable probabilistic profiling: "+ + "every probabilistic-interval a random number between 0 and %d is chosen. "+ + "If the given probabilistic-threshold is greater than this "+ + "random number, the agent will collect profiles from this system for the duration of the interval.", + tracer.ProbabilisticThresholdMax-1, tracer.ProbabilisticThresholdMax-1), + Destination: &args.probabilisticThreshold, + }, + &cli.DurationFlag{ + Name: "reporter-interval", + Value: defaultArgReporterInterval, + Usage: "Set the reporter's interval in seconds.", + Destination: &args.reporterInterval, + }, + &cli.UintFlag{ + Name: "samples-per-second", + Value: defaultArgSamplesPerSecond, + Usage: "Set the frequency (in Hz) of stack trace sampling.", + Destination: &args.samplesPerSecond, + }, + &cli.BoolFlag{ + Name: "timeline", + Value: false, + Usage: "Enable timeline feature.", + Destination: &args.timeline, + }, + &cli.BoolFlag{ + Name: "send-error-frames", + Value: defaultArgSendErrorFrames, + Usage: "Send error frames", + Destination: &args.sendErrorFrames, + }, + &cli.StringFlag{ + Name: "tracers", + Aliases: []string{"t"}, + Value: "all", + Usage: "Comma-separated list of interpreter tracers to include.", + Destination: &args.tracers, + }, + &cli.BoolFlag{ + Name: "verbose", + Aliases: []string{"v"}, + Value: false, + Usage: "Enable verbose logging and debugging capabilities.", + Destination: &args.verboseMode, + }, + &cli.StringFlag{ + Name: "dump-cpuprofile", + Usage: "Dump CPU pprof profile to `FILE`.", + Destination: &args.cpuProfileDump, + }, + &cli.StringFlag{ + Name: "node", + Usage: "The name of the node that the profiler is running on. " + + "If on Kubernetes, this must match the Kubernetes node name.", + Destination: &args.node, + }, + &cli.BoolFlag{ + Name: "upload-symbols", + Value: false, + Usage: "Enable local symbol upload.", + Hidden: true, + Sources: cli.EnvVars("DD_PROFILING_EXPERIMENTAL_UPLOAD_SYMBOLS"), + Destination: &args.uploadSymbols, + }, + &cli.BoolWithInverseFlag{ + BoolFlag: &cli.BoolFlag{ + Name: "upload-dynamic-symbols", + Usage: "Enable dynamic symbols upload.", + // Cannot set default value to true because it fails at runtime with: + // "Failure to parse arguments: cannot set both flags `--upload-dynamic-symbols` and `--no-upload-dynamic-symbols`" + // Value: true, + DefaultText: "true", + Hidden: true, + Sources: cli.EnvVars("DD_PROFILING_EXPERIMENTAL_UPLOAD_DYNAMIC_SYMBOLS"), + }, + }, + &cli.BoolFlag{ + Name: "upload-symbols-dry-run", + Value: false, + Usage: "Local symbol upload dry-run.", + Hidden: true, + Sources: cli.EnvVars("DD_PROFILING_EXPERIMENTAL_UPLOAD_SYMBOLS_DRY_RUN"), + Destination: &args.uploadSymbolsDryRun, + }, + &cli.StringFlag{ + Name: "api-key", + Usage: "Datadog API key.", + Hidden: true, + Sources: cli.EnvVars("DD_API_KEY"), + Destination: &args.ddAPIKey, + }, + &cli.StringFlag{ + Name: "app-key", + Usage: "Datadog APP key.", + Hidden: true, + Sources: cli.EnvVars("DD_APP_KEY"), + Destination: &args.ddAPPKey, + }, + &cli.StringFlag{ + Name: "dd-site", + Value: "datadoghq.com", + Usage: "Datadog site.", + Hidden: true, + Sources: cli.EnvVars("DD_SITE"), + Destination: &args.ddSite, + }, + }, + Action: func(_ context.Context, cmd *cli.Command) error { + if cmd.IsSet("upload-dynamic-symbols") { + args.uploadDynamicSymbols = cmd.Bool("upload-dynamic-symbols") + } else { + cmd.Set("upload-dynamic-symbols", "true") + args.uploadDynamicSymbols = true + } + args.cmd = cmd + return nil + }, } - args.fs = fs + if err := app.Run(context.Background(), os.Args); err != nil { + return nil, err + } - symbolUpload := os.Getenv("DD_EXPERIMENTAL_LOCAL_SYMBOL_UPLOAD") - if symbolUpload != "" { - args.symbolUpload, err = strconv.ParseBool(symbolUpload) - if err != nil { - args.symbolUpload = false - log.Warnf("Failed to parse DD_EXPERIMENTAL_LOCAL_SYMBOL_UPLOAD=%v: %v", symbolUpload, err) - } + if args.cmd == nil { + return nil, nil } - return &args, ff.Parse(fs, os.Args[1:], - ff.WithEnvVarPrefix("DD_OTEL_HOST_PROFILER"), - ff.WithConfigFileFlag("config"), - ff.WithConfigFileParser(ff.PlainParser), - // This will ignore configuration file (only) options that the current HA - // does not recognize. - ff.WithIgnoreUndefined(true), - ff.WithAllowMissingConfigFile(true), - ) + return &args, nil } func (args *arguments) dump() { log.Debug("Config:") - args.fs.VisitAll(func(f *flag.Flag) { - log.Debug(fmt.Sprintf("%s: %v", f.Name, f.Value)) - }) + for _, f := range args.cmd.Flags { + setStr := "default" + if args.cmd.IsSet(f.Names()[0]) { + setStr = "set" + } + log.Debugf("%s: \"%v\" [%s]", f.Names()[0], args.cmd.Value(f.Names()[0]), setStr) + } } diff --git a/go.mod b/go.mod index 0799f09..70d51b0 100644 --- a/go.mod +++ b/go.mod @@ -12,10 +12,10 @@ require ( github.com/google/pprof v0.0.0-20240829160300-da1f7e9f2b25 github.com/jsimonetti/rtnetlink v1.4.2 github.com/open-telemetry/opentelemetry-ebpf-profiler v0.0.0-20240918090752-0a8979a41728 - github.com/peterbourgon/ff/v3 v3.4.0 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.9.0 github.com/tklauser/numcpus v0.8.0 + github.com/urfave/cli/v3 v3.0.0-alpha9 github.com/zeebo/xxh3 v1.0.2 golang.org/x/sys v0.21.0 k8s.io/api v0.31.0 @@ -88,6 +88,7 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/x448/float16 v0.8.4 // indirect + github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect go.opentelemetry.io/otel v1.27.0 // indirect diff --git a/go.sum b/go.sum index 3dc2b39..842ba59 100644 --- a/go.sum +++ b/go.sum @@ -197,8 +197,6 @@ github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bl github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= -github.com/peterbourgon/ff/v3 v3.4.0 h1:QBvM/rizZM1cB0p0lGMdmR7HxZeI/ZrBWB4DqLkMUBc= -github.com/peterbourgon/ff/v3 v3.4.0/go.mod h1:zjJVUhx+twciwfDl0zBcFzl4dW8axCRyXE/eKY9RztQ= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -225,8 +223,12 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY= github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= +github.com/urfave/cli/v3 v3.0.0-alpha9 h1:P0RMy5fQm1AslQS+XCmy9UknDXctOmG/q/FZkUFnJSo= +github.com/urfave/cli/v3 v3.0.0-alpha9/go.mod h1:0kK/RUFHyh+yIKSfWxwheGndfnrvYSmYFVeKCh03ZUc= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= +github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= diff --git a/helpers.go b/helpers.go index c40ad7a..62ed87b 100644 --- a/helpers.go +++ b/helpers.go @@ -274,7 +274,7 @@ func enterNamespace(pid int, nsType string) (int, error) { } // ValidateTags parses and validates user-specified tags. -// Each tag must match ValidTagRegex with ';' used as a separator. +// Each tag must match ValidTagRegex with ',' used as a separator. // Tags that can't be validated are dropped. // The empty string is returned if no tags can be validated. func ValidateTags(tags string) reporter.Tags { @@ -282,7 +282,7 @@ func ValidateTags(tags string) reporter.Tags { return nil } - splitTags := strings.Split(tags, ";") + splitTags := strings.Split(tags, ",") validatedTags := make(reporter.Tags, 0, len(splitTags)) for _, tag := range splitTags { @@ -296,3 +296,15 @@ func ValidateTags(tags string) reporter.Tags { return validatedTags } + +func AddTagsFromArgs(tags *reporter.Tags, args arguments) { + if args.environment != "" { + *tags = append(*tags, reporter.MakeTag("env", args.environment)) + } + if args.serviceName != "" { + *tags = append(*tags, reporter.MakeTag("service", args.serviceName)) + } + if args.serviceVersion != "" { + *tags = append(*tags, reporter.MakeTag("version", args.serviceVersion)) + } +} diff --git a/main.go b/main.go index b8760a8..7064b29 100644 --- a/main.go +++ b/main.go @@ -12,7 +12,6 @@ package main import ( "context" "fmt" - "net/http" "os" "os/signal" "runtime" @@ -45,7 +44,7 @@ import ( ) // Short copyright / license text for eBPF code -var copyright = `Copyright 2024 Datadog, Inc. +const copyright = `Copyright 2024 Datadog, Inc. For the eBPF code loaded by Universal Profiling Agent into the kernel, the following license applies (GPLv2 only). You can obtain a copy of the GPLv2 code at: @@ -98,17 +97,16 @@ func mainWithExitCode() exitCode { return parseError("Failure to parse arguments: %v", err) } + if args == nil { + return exitSuccess + } + if args.copyright { fmt.Print(copyright) return exitSuccess } versionInfo := version.GetVersionInfo() - if args.version { - fmt.Printf("dd-otel-host-profiler, version %s (revision: %s, date: %s), arch: %v\n", - versionInfo.Version, versionInfo.VcsRevision, versionInfo.VcsTime, runtime.GOARCH) - return exitSuccess - } if args.verboseMode { log.SetLevel(log.DebugLevel) @@ -125,15 +123,6 @@ func mainWithExitCode() exitCode { unix.SIGINT, unix.SIGTERM, unix.SIGABRT) defer mainCancel() - if args.pprofAddr != "" { - go func() { - //nolint:gosec - if err = http.ListenAndServe(args.pprofAddr, nil); err != nil { - log.Errorf("Serving pprof on %s failed: %s", args.pprofAddr, err) - } - }() - } - log.Infof("Starting Datadog OTEL host profiler %s (revision: %s, date: %s), arch: %v", versionInfo.Version, versionInfo.VcsRevision, versionInfo.VcsTime, runtime.GOARCH) @@ -151,7 +140,7 @@ func mainWithExitCode() exitCode { } traceHandlerCacheSize := - traceCacheSize(args.monitorInterval, args.samplesPerSecond, uint16(presentCores)) + traceCacheSize(args.monitorInterval, int(args.samplesPerSecond), uint16(presentCores)) intervals := times.New(args.reporterInterval, args.monitorInterval, args.probabilisticInterval) @@ -186,6 +175,9 @@ func mainWithExitCode() exitCode { validatedTags := ValidateTags(args.tags) log.Debugf("Validated tags: %s", validatedTags) + // Add tags from the arguments + AddTagsFromArgs(&validatedTags, *args) + containerMetadataProvider, err := containermetadata.NewContainerMetadataProvider(mainCtx, args.node, intervals.MonitorInterval()) if err != nil { @@ -193,19 +185,26 @@ func mainWithExitCode() exitCode { } rep, err := reporter.Start(mainCtx, &reporter.Config{ - CollAgentAddr: args.collAgentAddr, - Name: args.serviceName, + AgentURL: args.collAgentAddr, Version: versionInfo.Version, ReportInterval: intervals.ReportInterval(), CacheSize: traceHandlerCacheSize, - SamplesPerSecond: args.samplesPerSecond, + SamplesPerSecond: int(args.samplesPerSecond), KernelVersion: kernelVersion, HostName: hostname, IPAddress: sourceIP, - SaveCPUProfile: args.saveCPUProfile, + CPUProfileDump: args.cpuProfileDump, Tags: validatedTags, Timeline: args.timeline, - UploadSymbols: args.symbolUpload, + SymbolUploaderConfig: reporter.SymbolUploaderConfig{ + Enabled: args.uploadSymbols, + UploadDynamicSymbols: args.uploadDynamicSymbols, + DryRun: args.uploadSymbolsDryRun, + DDAPIKey: args.ddAPIKey, + DDAPPKey: args.ddAPPKey, + DDSite: args.ddSite, + Version: args.serviceVersion, + }, }, containerMetadataProvider) if err != nil { return failure("Failed to start reporting: %v", err) @@ -222,13 +221,13 @@ func mainWithExitCode() exitCode { Intervals: intervals, IncludeTracers: includeTracers, FilterErrorFrames: !args.sendErrorFrames, - SamplesPerSecond: args.samplesPerSecond, + SamplesPerSecond: int(args.samplesPerSecond), MapScaleFactor: int(args.mapScaleFactor), KernelVersionCheck: !args.noKernelVersionCheck, BPFVerifierLogLevel: uint32(args.bpfVerifierLogLevel), - BPFVerifierLogSize: args.bpfVerifierLogSize, + BPFVerifierLogSize: int(args.bpfVerifierLogSize), ProbabilisticInterval: args.probabilisticInterval, - ProbabilisticThreshold: args.probabilisticThreshold, + ProbabilisticThreshold: uint(args.probabilisticThreshold), }) if err != nil { return failure("Failed to load eBPF tracer: %v", err) diff --git a/reporter/config.go b/reporter/config.go index 75a1dc4..1bb2127 100644 --- a/reporter/config.go +++ b/reporter/config.go @@ -24,12 +24,10 @@ func MakeTag(key, value string) Tag { } type Config struct { - // Name defines the name of the agent. - Name string // Version defines the version of the agent. Version string - // CollAgentAddr defines the destination of the backend connection. - CollAgentAddr string + // AgentURL defines the destination of the backend connection. + AgentURL string // CacheSize defines the size of the reporter caches. CacheSize uint32 // samplesPerSecond defines the number of samples per second. @@ -44,12 +42,29 @@ type Config struct { IPAddress string // ReportInterval defines the interval at which the agent reports data to the collection agent. ReportInterval time.Duration - // SaveCPUProfile defines whether the agent should dump a pprof CPU profile on disk. - SaveCPUProfile bool + // CPUProfileDump defines a file where the agent should dump pprof CPU profile. + CPUProfileDump string // Tags is a list of tags to be sent to the collection agent. Tags Tags // Whether to include timestamps on samples for the timeline feature Timeline bool - // SymbolUpload defines whether the agent should upload debug symbols to the backend. - UploadSymbols bool + // SymbolUploaderConfig defines the configuration for the symbol uploader. + SymbolUploaderConfig SymbolUploaderConfig +} + +type SymbolUploaderConfig struct { + // Enabled defines whether the agent should upload debug symbols to the backend. + Enabled bool + // UploadDynamicSymbols defines whether the agent should upload dynamic symbols to the backend. + UploadDynamicSymbols bool + // DryRun defines whether the agent should upload debug symbols to the backend in dry-run mode. + DryRun bool + // DataDog API key + DDAPIKey string + // DataDog APP key + DDAPPKey string + // DDSite is the site to upload symbols to. + DDSite string + // Version is the version of the profiler. + Version string } diff --git a/reporter/datadog_reporter.go b/reporter/datadog_reporter.go index be70235..3ca3510 100644 --- a/reporter/datadog_reporter.go +++ b/reporter/datadog_reporter.go @@ -87,10 +87,7 @@ type processMetadata struct { // DatadogReporter receives and transforms information to be OTLP/profiles compliant. type DatadogReporter struct { - // name is the ScopeProfile's name. - name string - - // version is the ScopeProfile's version. + // profiler version version string // stopSignal is the stop signal for shutting down all background tasks. @@ -136,8 +133,8 @@ type DatadogReporter struct { // agentAddr is the address of the Datadog agent. agentAddr string - // saveCPUProfile defines whether the agent should dump a pprof CPU profile on disk. - saveCPUProfile bool + // cpuProfilerDump defines a file where the agent should dump pprof CPU profile. + cpuProfilerDump string // tags is a list of tags alongside the profile. tags Tags @@ -148,6 +145,9 @@ type DatadogReporter struct { symbolUploader *DatadogSymbolUploader containerMetadataProvider containermetadata.Provider + + // profileSeq is the sequence number of the profile (ie. number of profiles uploaded until now). + profileSeq uint64 } // ReportTraceEvent enqueues reported trace events for the Datadog reporter. @@ -326,9 +326,9 @@ func Start(mainCtx context.Context, cfg *Config, p containermetadata.Provider) ( } var symbolUploader *DatadogSymbolUploader - if cfg.UploadSymbols { + if cfg.SymbolUploaderConfig.Enabled { log.Infof("Enabling Datadog local symbol upload") - symbolUploader, err = NewDatadogSymbolUploader(cfg.Version) + symbolUploader, err = NewDatadogSymbolUploader(cfg.SymbolUploaderConfig) if err != nil { log.Errorf( "Failed to create Datadog symbol uploader, symbol upload will be disabled: %v", @@ -337,7 +337,6 @@ func Start(mainCtx context.Context, cfg *Config, p containermetadata.Provider) ( } r := &DatadogReporter{ - name: cfg.Name, version: cfg.Version, kernelVersion: cfg.KernelVersion, hostName: cfg.HostName, @@ -352,11 +351,12 @@ func Start(mainCtx context.Context, cfg *Config, p containermetadata.Provider) ( containerMetadataProvider: p, traceEvents: xsync.NewRWMutex(map[traceAndMetaKey]*traceFramesCounts{}), processes: processes, - agentAddr: cfg.CollAgentAddr, - saveCPUProfile: cfg.SaveCPUProfile, + agentAddr: cfg.AgentURL, + cpuProfilerDump: cfg.CPUProfileDump, symbolUploader: symbolUploader, tags: cfg.Tags, timeline: cfg.Timeline, + profileSeq: 0, } // Create a child context for reporting features @@ -415,9 +415,9 @@ func (r *DatadogReporter) reportProfile(ctx context.Context) error { return fmt.Errorf("failed to compress profile: %w", err) } - if r.saveCPUProfile { - // write profile to cpu.pprof - f, err := os.Create("cpu.pprof") + if r.cpuProfilerDump != "" { + // write profile to disk + f, err := os.Create(r.cpuProfilerDump) if err != nil { return err } @@ -433,9 +433,15 @@ func (r *DatadogReporter) reportProfile(ctx context.Context) error { tags = append(tags, Tag{Key: "ddprof.custom_ctx", Value: attr}) } // The profiler_name tag allows us to differentiate the source of the profiles. - tags = append(tags, MakeTag("runtime", "native"), MakeTag("remote_symbols", "yes"), - MakeTag("profiler_name", profilerName), MakeTag("profiler_version", r.version), - MakeTag("cpu_arch", runtime.GOARCH), MakeTag("service", r.name)) + tags = append(tags, + MakeTag("runtime", "native"), + MakeTag("remote_symbols", "yes"), + MakeTag("profiler_name", profilerName), + MakeTag("profiler_version", r.version), + MakeTag("cpu_arch", runtime.GOARCH), + MakeTag("profile_seq", fmt.Sprintf("%d", r.profileSeq))) + + r.profileSeq++ log.Infof("Tags: %v", tags.String()) profilingURL, err := url.JoinPath(r.agentAddr, profilingEndPoint) diff --git a/reporter/symbol_uploader.go b/reporter/symbol_uploader.go index 46ecb00..41354d8 100644 --- a/reporter/symbol_uploader.go +++ b/reporter/symbol_uploader.go @@ -22,7 +22,6 @@ import ( "path/filepath" "runtime" "slices" - "strconv" "strings" "sync" "time" @@ -74,57 +73,46 @@ type DatadogSymbolUploader struct { symbolQuerier *DatadogSymbolQuerier } -func NewDatadogSymbolUploader(version string) (*DatadogSymbolUploader, error) { +func NewDatadogSymbolUploader(cfg SymbolUploaderConfig) (*DatadogSymbolUploader, error) { err := exec.Command("objcopy", "--version").Run() if err != nil { return nil, fmt.Errorf("objcopy is not available: %w", err) } - ddAPIKey := os.Getenv("DD_API_KEY") - if ddAPIKey == "" { + if cfg.DDAPIKey == "" { return nil, errors.New("DD_API_KEY is not set") } - ddAPPKey := os.Getenv("DD_APP_KEY") - if ddAPPKey == "" { + if cfg.DDAPPKey == "" { return nil, errors.New("DD_APP_KEY is not set") } - ddSite := os.Getenv("DD_SITE") - if ddSite == "" { + if cfg.DDSite == "" { return nil, errors.New("DD_SITE is not set") } - intakeURL, err := url.JoinPath("https://sourcemap-intake."+ddSite, sourceMapEndpoint) + intakeURL, err := url.JoinPath("https://sourcemap-intake."+cfg.DDSite, sourceMapEndpoint) if err != nil { return nil, fmt.Errorf("failed to parse URL: %w", err) } - dryRun, _ := strconv.ParseBool(os.Getenv("DD_EXPERIMENTAL_LOCAL_SYMBOL_UPLOAD_DRY_RUN")) - - uploadDynamicSymbols := true - b, err := strconv.ParseBool(os.Getenv("DD_EXPERIMENTAL_LOCAL_SYMBOL_UPLOAD_DYNAMIC_SYMBOLS")) - if err == nil { - uploadDynamicSymbols = b - } - uploadCache, err := lru.NewSynced[libpf.FileID, struct{}](uploadCacheSize, libpf.FileID.Hash32) if err != nil { return nil, fmt.Errorf("failed to create cache: %w", err) } - symbolQuerier, err := NewDatadogSymbolQuerier(ddSite, ddAPIKey, ddAPPKey) + symbolQuerier, err := NewDatadogSymbolQuerier(cfg.DDSite, cfg.DDAPIKey, cfg.DDAPPKey) if err != nil { return nil, fmt.Errorf("failed to create Datadog symbol querier: %w", err) } return &DatadogSymbolUploader{ - ddAPIKey: ddAPIKey, - ddAPPKey: ddAPPKey, + ddAPIKey: cfg.DDAPIKey, + ddAPPKey: cfg.DDAPPKey, intakeURL: intakeURL, - version: version, - dryRun: dryRun, - uploadDynamicSymbols: uploadDynamicSymbols, + version: cfg.Version, + dryRun: cfg.DryRun, + uploadDynamicSymbols: cfg.UploadDynamicSymbols, workerCount: uploadWorkerCount, client: &http.Client{Timeout: uploadTimeout}, uploadCache: uploadCache,