diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index fc4cf79..125236f 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -12,7 +12,7 @@ jobs: build: strategy: matrix: - go-version: [ 1.15.x ] + go-version: [ 1.16.x ] os: [ ubuntu-latest ] runs-on: ${{ matrix.os }} steps: diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 071bbed..19700a1 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -9,7 +9,7 @@ jobs: publish: strategy: matrix: - go-version: [ 1.15.x ] + go-version: [ 1.16.x ] os: [ ubuntu-latest ] runs-on: ${{ matrix.os }} steps: diff --git a/README.md b/README.md index 5c37de4..8ccacdc 100644 --- a/README.md +++ b/README.md @@ -98,12 +98,21 @@ The watchdog can be configured through environmental variables. You must always | `content_type` | Force a specific Content-Type response for all responses | | `write_timeout` | HTTP timeout for writing a response body from your function (in seconds) | | `read_timeout` | HTTP timeout for reading the payload from the client caller (in seconds) | +| `healthcheck_interval` | Interval (in seconds) for HTTP healthcheck by container orchestrator i.e. kubelet. Used for graceful shutdowns. | | `suppress_lock` | The watchdog will attempt to write a lockfile to /tmp/ for swarm healthchecks - set this to true to disable behaviour. | | `exec_timeout` | Hard timeout for process exec'd for each incoming request (in seconds). Disabled if set to 0 | | `write_debug` | Write all output, error messages, and additional information to the logs. Default is false | | `combine_output` | True by default - combines stdout/stderr in function response, when set to false `stderr` is written to the container logs and stdout is used for function response | | `max_inflight` | Limit the maximum number of requests in flight | +## Metrics + +| Name | Description | Type | +|---------------------------------|-------------------------|------------------------| +| http_requests_total | Total number of requests | Counter | +| http_request_duration_seconds | Duration of requests | Histogram | +| http_requests_in_flight | Number of requests in-flight | Gauge | + ## Advanced / tuning ### (New) of-watchdog and HTTP mode diff --git a/main.go b/main.go index 8e16a65..bc7f991 100644 --- a/main.go +++ b/main.go @@ -21,6 +21,7 @@ import ( "github.com/openfaas/classic-watchdog/metrics" "github.com/openfaas/classic-watchdog/types" + "github.com/prometheus/client_golang/prometheus/testutil" ) var ( @@ -67,6 +68,7 @@ func main() { readTimeout := config.readTimeout writeTimeout := config.writeTimeout + healthcheckInterval := config.healthcheckInterval s := &http.Server{ Addr: fmt.Sprintf(":%d", config.port), @@ -77,10 +79,11 @@ func main() { httpMetrics := metrics.NewHttp() - log.Printf("Timeouts: read: %s, write: %s hard: %s.\n", + log.Printf("Timeouts: read: %s write: %s hard: %s health: %s.\n", readTimeout, writeTimeout, - config.execTimeout) + config.execTimeout, + healthcheckInterval) log.Printf("Listening on port: %d\n", config.port) http.HandleFunc("/_/health", makeHealthHandler()) @@ -93,24 +96,14 @@ func main() { go metricsServer.Serve(cancel) - shutdownTimeout := config.writeTimeout - listenUntilShutdown(shutdownTimeout, s, config.suppressLock) -} - -func markUnhealthy() error { - atomic.StoreInt32(&acceptingConnections, 0) - - path := filepath.Join(os.TempDir(), ".lock") - log.Printf("Removing lock-file : %s\n", path) - removeErr := os.Remove(path) - return removeErr + listenUntilShutdown(s, healthcheckInterval, writeTimeout, config.suppressLock, &httpMetrics) } // listenUntilShutdown will listen for HTTP requests until SIGTERM // is sent at which point the code will wait `shutdownTimeout` before // closing off connections and a futher `shutdownTimeout` before // exiting -func listenUntilShutdown(shutdownTimeout time.Duration, s *http.Server, suppressLock bool) { +func listenUntilShutdown(s *http.Server, healthcheckInterval time.Duration, writeTimeout time.Duration, suppressLock bool, httpMetrics *metrics.Http) { idleConnsClosed := make(chan struct{}) go func() { @@ -119,24 +112,29 @@ func listenUntilShutdown(shutdownTimeout time.Duration, s *http.Server, suppress <-sig - log.Printf("SIGTERM received.. shutting down server in %s\n", shutdownTimeout.String()) + log.Printf("SIGTERM: no new connections in %s\n", healthcheckInterval.String()) - healthErr := markUnhealthy() - - if healthErr != nil { - log.Printf("Unable to mark unhealthy during shutdown: %s\n", healthErr.Error()) + if err := markUnhealthy(); err != nil { + log.Printf("Unable to mark server as unhealthy: %s\n", err.Error()) } - <-time.Tick(shutdownTimeout) + <-time.Tick(healthcheckInterval) + + connections := int64(testutil.ToFloat64(httpMetrics.InFlight)) + log.Printf("No new connections allowed, draining: %d requests\n", connections) - if err := s.Shutdown(context.Background()); err != nil { - // Error from closing listeners, or context timeout: + // The maximum time to wait for active connections whilst shutting down is + // equivalent to the maximum execution time i.e. writeTimeout. + ctx, cancel := context.WithTimeout(context.Background(), writeTimeout) + defer cancel() + + if err := s.Shutdown(ctx); err != nil { log.Printf("Error in Shutdown: %v", err) } - log.Printf("No new connections allowed. Exiting in: %s\n", shutdownTimeout.String()) + connections = int64(testutil.ToFloat64(httpMetrics.InFlight)) - <-time.Tick(shutdownTimeout) + log.Printf("Exiting. Active connections: %d\n", connections) close(idleConnsClosed) }() @@ -164,6 +162,15 @@ func listenUntilShutdown(shutdownTimeout time.Duration, s *http.Server, suppress <-idleConnsClosed } +func markUnhealthy() error { + atomic.StoreInt32(&acceptingConnections, 0) + + path := filepath.Join(os.TempDir(), ".lock") + log.Printf("Removing lock-file : %s\n", path) + removeErr := os.Remove(path) + return removeErr +} + func printVersion() { sha := "unknown" if len(GitCommit) > 0 { diff --git a/metrics/http.go b/metrics/http.go index ac62f02..d0cab14 100644 --- a/metrics/http.go +++ b/metrics/http.go @@ -8,10 +8,11 @@ import ( type Http struct { RequestsTotal *prometheus.CounterVec RequestDurationHistogram *prometheus.HistogramVec + InFlight prometheus.Gauge } func NewHttp() Http { - return Http{ + h := Http{ RequestsTotal: promauto.NewCounterVec(prometheus.CounterOpts{ Subsystem: "http", Name: "requests_total", @@ -23,5 +24,14 @@ func NewHttp() Http { Help: "Seconds spent serving HTTP requests.", Buckets: prometheus.DefBuckets, }, []string{"code", "method"}), + InFlight: promauto.NewGauge(prometheus.GaugeOpts{ + Subsystem: "http", + Name: "requests_in_flight", + Help: "total HTTP requests in-flight", + }), } + + // Default to 0 for queries during graceful shutdown. + h.InFlight.Set(0) + return h } diff --git a/metrics/metrics.go b/metrics/metrics.go index 18f48b5..e5d3286 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -48,18 +48,22 @@ func (m *MetricsServer) Serve(cancel chan bool) { }() go func() { - select { - case <-cancel: - log.Printf("metrics server shutdown\n") - - m.s.Shutdown(context.Background()) - } + <-cancel + log.Printf("metrics server shutdown\n") + m.s.Shutdown(context.Background()) }() } // InstrumentHandler returns a handler which records HTTP requests // as they are made func InstrumentHandler(next http.Handler, _http Http) http.HandlerFunc { - return promhttp.InstrumentHandlerCounter(_http.RequestsTotal, - promhttp.InstrumentHandlerDuration(_http.RequestDurationHistogram, next)) + return func(w http.ResponseWriter, r *http.Request) { + then := promhttp.InstrumentHandlerCounter(_http.RequestsTotal, + promhttp.InstrumentHandlerDuration(_http.RequestDurationHistogram, next)) + + _http.InFlight.Inc() + defer _http.InFlight.Dec() + + then(w, r) + } } diff --git a/readconfig.go b/readconfig.go index fc9321e..8da3959 100644 --- a/readconfig.go +++ b/readconfig.go @@ -66,6 +66,7 @@ func (ReadConfig) Read(hasEnv HasEnv) WatchdogConfig { cfg.readTimeout = parseIntOrDurationValue(hasEnv.Getenv("read_timeout"), time.Second*5) cfg.writeTimeout = parseIntOrDurationValue(hasEnv.Getenv("write_timeout"), time.Second*5) + cfg.healthcheckInterval = parseIntOrDurationValue(hasEnv.Getenv("healthcheck_interval"), cfg.writeTimeout) cfg.execTimeout = parseIntOrDurationValue(hasEnv.Getenv("exec_timeout"), time.Second*0) cfg.port = parseIntValue(hasEnv.Getenv("port"), 8080) @@ -106,6 +107,10 @@ type WatchdogConfig struct { // HTTP write timeout writeTimeout time.Duration + // healthcheckInterval is the interval that an external service runs its health checks to + // detect health and remove the watchdog from its pool of endpoints + healthcheckInterval time.Duration + // faasProcess is the process to exec faasProcess string diff --git a/vendor/github.com/prometheus/client_golang/prometheus/testutil/lint.go b/vendor/github.com/prometheus/client_golang/prometheus/testutil/lint.go new file mode 100644 index 0000000..7681877 --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/prometheus/testutil/lint.go @@ -0,0 +1,46 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testutil + +import ( + "fmt" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil/promlint" +) + +// CollectAndLint registers the provided Collector with a newly created pedantic +// Registry. It then calls GatherAndLint with that Registry and with the +// provided metricNames. +func CollectAndLint(c prometheus.Collector, metricNames ...string) ([]promlint.Problem, error) { + reg := prometheus.NewPedanticRegistry() + if err := reg.Register(c); err != nil { + return nil, fmt.Errorf("registering collector failed: %s", err) + } + return GatherAndLint(reg, metricNames...) +} + +// GatherAndLint gathers all metrics from the provided Gatherer and checks them +// with the linter in the promlint package. If any metricNames are provided, +// only metrics with those names are checked. +func GatherAndLint(g prometheus.Gatherer, metricNames ...string) ([]promlint.Problem, error) { + got, err := g.Gather() + if err != nil { + return nil, fmt.Errorf("gathering metrics failed: %s", err) + } + if metricNames != nil { + got = filterMetrics(got, metricNames) + } + return promlint.NewWithMetricFamilies(got).Lint() +} diff --git a/vendor/github.com/prometheus/client_golang/prometheus/testutil/promlint/promlint.go b/vendor/github.com/prometheus/client_golang/prometheus/testutil/promlint/promlint.go new file mode 100644 index 0000000..ec80617 --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/prometheus/testutil/promlint/promlint.go @@ -0,0 +1,386 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package promlint provides a linter for Prometheus metrics. +package promlint + +import ( + "fmt" + "io" + "regexp" + "sort" + "strings" + + "github.com/prometheus/common/expfmt" + + dto "github.com/prometheus/client_model/go" +) + +// A Linter is a Prometheus metrics linter. It identifies issues with metric +// names, types, and metadata, and reports them to the caller. +type Linter struct { + // The linter will read metrics in the Prometheus text format from r and + // then lint it, _and_ it will lint the metrics provided directly as + // MetricFamily proto messages in mfs. Note, however, that the current + // constructor functions New and NewWithMetricFamilies only ever set one + // of them. + r io.Reader + mfs []*dto.MetricFamily +} + +// A Problem is an issue detected by a Linter. +type Problem struct { + // The name of the metric indicated by this Problem. + Metric string + + // A description of the issue for this Problem. + Text string +} + +// newProblem is helper function to create a Problem. +func newProblem(mf *dto.MetricFamily, text string) Problem { + return Problem{ + Metric: mf.GetName(), + Text: text, + } +} + +// New creates a new Linter that reads an input stream of Prometheus metrics in +// the Prometheus text exposition format. +func New(r io.Reader) *Linter { + return &Linter{ + r: r, + } +} + +// NewWithMetricFamilies creates a new Linter that reads from a slice of +// MetricFamily protobuf messages. +func NewWithMetricFamilies(mfs []*dto.MetricFamily) *Linter { + return &Linter{ + mfs: mfs, + } +} + +// Lint performs a linting pass, returning a slice of Problems indicating any +// issues found in the metrics stream. The slice is sorted by metric name +// and issue description. +func (l *Linter) Lint() ([]Problem, error) { + var problems []Problem + + if l.r != nil { + d := expfmt.NewDecoder(l.r, expfmt.FmtText) + + mf := &dto.MetricFamily{} + for { + if err := d.Decode(mf); err != nil { + if err == io.EOF { + break + } + + return nil, err + } + + problems = append(problems, lint(mf)...) + } + } + for _, mf := range l.mfs { + problems = append(problems, lint(mf)...) + } + + // Ensure deterministic output. + sort.SliceStable(problems, func(i, j int) bool { + if problems[i].Metric == problems[j].Metric { + return problems[i].Text < problems[j].Text + } + return problems[i].Metric < problems[j].Metric + }) + + return problems, nil +} + +// lint is the entry point for linting a single metric. +func lint(mf *dto.MetricFamily) []Problem { + fns := []func(mf *dto.MetricFamily) []Problem{ + lintHelp, + lintMetricUnits, + lintCounter, + lintHistogramSummaryReserved, + lintMetricTypeInName, + lintReservedChars, + lintCamelCase, + lintUnitAbbreviations, + } + + var problems []Problem + for _, fn := range fns { + problems = append(problems, fn(mf)...) + } + + // TODO(mdlayher): lint rules for specific metrics types. + return problems +} + +// lintHelp detects issues related to the help text for a metric. +func lintHelp(mf *dto.MetricFamily) []Problem { + var problems []Problem + + // Expect all metrics to have help text available. + if mf.Help == nil { + problems = append(problems, newProblem(mf, "no help text")) + } + + return problems +} + +// lintMetricUnits detects issues with metric unit names. +func lintMetricUnits(mf *dto.MetricFamily) []Problem { + var problems []Problem + + unit, base, ok := metricUnits(*mf.Name) + if !ok { + // No known units detected. + return nil + } + + // Unit is already a base unit. + if unit == base { + return nil + } + + problems = append(problems, newProblem(mf, fmt.Sprintf("use base unit %q instead of %q", base, unit))) + + return problems +} + +// lintCounter detects issues specific to counters, as well as patterns that should +// only be used with counters. +func lintCounter(mf *dto.MetricFamily) []Problem { + var problems []Problem + + isCounter := mf.GetType() == dto.MetricType_COUNTER + isUntyped := mf.GetType() == dto.MetricType_UNTYPED + hasTotalSuffix := strings.HasSuffix(mf.GetName(), "_total") + + switch { + case isCounter && !hasTotalSuffix: + problems = append(problems, newProblem(mf, `counter metrics should have "_total" suffix`)) + case !isUntyped && !isCounter && hasTotalSuffix: + problems = append(problems, newProblem(mf, `non-counter metrics should not have "_total" suffix`)) + } + + return problems +} + +// lintHistogramSummaryReserved detects when other types of metrics use names or labels +// reserved for use by histograms and/or summaries. +func lintHistogramSummaryReserved(mf *dto.MetricFamily) []Problem { + // These rules do not apply to untyped metrics. + t := mf.GetType() + if t == dto.MetricType_UNTYPED { + return nil + } + + var problems []Problem + + isHistogram := t == dto.MetricType_HISTOGRAM + isSummary := t == dto.MetricType_SUMMARY + + n := mf.GetName() + + if !isHistogram && strings.HasSuffix(n, "_bucket") { + problems = append(problems, newProblem(mf, `non-histogram metrics should not have "_bucket" suffix`)) + } + if !isHistogram && !isSummary && strings.HasSuffix(n, "_count") { + problems = append(problems, newProblem(mf, `non-histogram and non-summary metrics should not have "_count" suffix`)) + } + if !isHistogram && !isSummary && strings.HasSuffix(n, "_sum") { + problems = append(problems, newProblem(mf, `non-histogram and non-summary metrics should not have "_sum" suffix`)) + } + + for _, m := range mf.GetMetric() { + for _, l := range m.GetLabel() { + ln := l.GetName() + + if !isHistogram && ln == "le" { + problems = append(problems, newProblem(mf, `non-histogram metrics should not have "le" label`)) + } + if !isSummary && ln == "quantile" { + problems = append(problems, newProblem(mf, `non-summary metrics should not have "quantile" label`)) + } + } + } + + return problems +} + +// lintMetricTypeInName detects when metric types are included in the metric name. +func lintMetricTypeInName(mf *dto.MetricFamily) []Problem { + var problems []Problem + n := strings.ToLower(mf.GetName()) + + for i, t := range dto.MetricType_name { + if i == int32(dto.MetricType_UNTYPED) { + continue + } + + typename := strings.ToLower(t) + if strings.Contains(n, "_"+typename+"_") || strings.HasSuffix(n, "_"+typename) { + problems = append(problems, newProblem(mf, fmt.Sprintf(`metric name should not include type '%s'`, typename))) + } + } + return problems +} + +// lintReservedChars detects colons in metric names. +func lintReservedChars(mf *dto.MetricFamily) []Problem { + var problems []Problem + if strings.Contains(mf.GetName(), ":") { + problems = append(problems, newProblem(mf, "metric names should not contain ':'")) + } + return problems +} + +var camelCase = regexp.MustCompile(`[a-z][A-Z]`) + +// lintCamelCase detects metric names and label names written in camelCase. +func lintCamelCase(mf *dto.MetricFamily) []Problem { + var problems []Problem + if camelCase.FindString(mf.GetName()) != "" { + problems = append(problems, newProblem(mf, "metric names should be written in 'snake_case' not 'camelCase'")) + } + + for _, m := range mf.GetMetric() { + for _, l := range m.GetLabel() { + if camelCase.FindString(l.GetName()) != "" { + problems = append(problems, newProblem(mf, "label names should be written in 'snake_case' not 'camelCase'")) + } + } + } + return problems +} + +// lintUnitAbbreviations detects abbreviated units in the metric name. +func lintUnitAbbreviations(mf *dto.MetricFamily) []Problem { + var problems []Problem + n := strings.ToLower(mf.GetName()) + for _, s := range unitAbbreviations { + if strings.Contains(n, "_"+s+"_") || strings.HasSuffix(n, "_"+s) { + problems = append(problems, newProblem(mf, "metric names should not contain abbreviated units")) + } + } + return problems +} + +// metricUnits attempts to detect known unit types used as part of a metric name, +// e.g. "foo_bytes_total" or "bar_baz_milligrams". +func metricUnits(m string) (unit string, base string, ok bool) { + ss := strings.Split(m, "_") + + for unit, base := range units { + // Also check for "no prefix". + for _, p := range append(unitPrefixes, "") { + for _, s := range ss { + // Attempt to explicitly match a known unit with a known prefix, + // as some words may look like "units" when matching suffix. + // + // As an example, "thermometers" should not match "meters", but + // "kilometers" should. + if s == p+unit { + return p + unit, base, true + } + } + } + } + + return "", "", false +} + +// Units and their possible prefixes recognized by this library. More can be +// added over time as needed. +var ( + // map a unit to the appropriate base unit. + units = map[string]string{ + // Base units. + "amperes": "amperes", + "bytes": "bytes", + "celsius": "celsius", // Also allow Celsius because it is common in typical Prometheus use cases. + "grams": "grams", + "joules": "joules", + "kelvin": "kelvin", // SI base unit, used in special cases (e.g. color temperature, scientific measurements). + "meters": "meters", // Both American and international spelling permitted. + "metres": "metres", + "seconds": "seconds", + "volts": "volts", + + // Non base units. + // Time. + "minutes": "seconds", + "hours": "seconds", + "days": "seconds", + "weeks": "seconds", + // Temperature. + "kelvins": "kelvin", + "fahrenheit": "celsius", + "rankine": "celsius", + // Length. + "inches": "meters", + "yards": "meters", + "miles": "meters", + // Bytes. + "bits": "bytes", + // Energy. + "calories": "joules", + // Mass. + "pounds": "grams", + "ounces": "grams", + } + + unitPrefixes = []string{ + "pico", + "nano", + "micro", + "milli", + "centi", + "deci", + "deca", + "hecto", + "kilo", + "kibi", + "mega", + "mibi", + "giga", + "gibi", + "tera", + "tebi", + "peta", + "pebi", + } + + // Common abbreviations that we'd like to discourage. + unitAbbreviations = []string{ + "s", + "ms", + "us", + "ns", + "sec", + "b", + "kb", + "mb", + "gb", + "tb", + "pb", + "m", + "h", + "d", + } +) diff --git a/vendor/github.com/prometheus/client_golang/prometheus/testutil/testutil.go b/vendor/github.com/prometheus/client_golang/prometheus/testutil/testutil.go new file mode 100644 index 0000000..9af60ce --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/prometheus/testutil/testutil.go @@ -0,0 +1,230 @@ +// Copyright 2018 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package testutil provides helpers to test code using the prometheus package +// of client_golang. +// +// While writing unit tests to verify correct instrumentation of your code, it's +// a common mistake to mostly test the instrumentation library instead of your +// own code. Rather than verifying that a prometheus.Counter's value has changed +// as expected or that it shows up in the exposition after registration, it is +// in general more robust and more faithful to the concept of unit tests to use +// mock implementations of the prometheus.Counter and prometheus.Registerer +// interfaces that simply assert that the Add or Register methods have been +// called with the expected arguments. However, this might be overkill in simple +// scenarios. The ToFloat64 function is provided for simple inspection of a +// single-value metric, but it has to be used with caution. +// +// End-to-end tests to verify all or larger parts of the metrics exposition can +// be implemented with the CollectAndCompare or GatherAndCompare functions. The +// most appropriate use is not so much testing instrumentation of your code, but +// testing custom prometheus.Collector implementations and in particular whole +// exporters, i.e. programs that retrieve telemetry data from a 3rd party source +// and convert it into Prometheus metrics. +// +// In a similar pattern, CollectAndLint and GatherAndLint can be used to detect +// metrics that have issues with their name, type, or metadata without being +// necessarily invalid, e.g. a counter with a name missing the “_total” suffix. +package testutil + +import ( + "bytes" + "fmt" + "io" + + "github.com/prometheus/common/expfmt" + + dto "github.com/prometheus/client_model/go" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/internal" +) + +// ToFloat64 collects all Metrics from the provided Collector. It expects that +// this results in exactly one Metric being collected, which must be a Gauge, +// Counter, or Untyped. In all other cases, ToFloat64 panics. ToFloat64 returns +// the value of the collected Metric. +// +// The Collector provided is typically a simple instance of Gauge or Counter, or +// – less commonly – a GaugeVec or CounterVec with exactly one element. But any +// Collector fulfilling the prerequisites described above will do. +// +// Use this function with caution. It is computationally very expensive and thus +// not suited at all to read values from Metrics in regular code. This is really +// only for testing purposes, and even for testing, other approaches are often +// more appropriate (see this package's documentation). +// +// A clear anti-pattern would be to use a metric type from the prometheus +// package to track values that are also needed for something else than the +// exposition of Prometheus metrics. For example, you would like to track the +// number of items in a queue because your code should reject queuing further +// items if a certain limit is reached. It is tempting to track the number of +// items in a prometheus.Gauge, as it is then easily available as a metric for +// exposition, too. However, then you would need to call ToFloat64 in your +// regular code, potentially quite often. The recommended way is to track the +// number of items conventionally (in the way you would have done it without +// considering Prometheus metrics) and then expose the number with a +// prometheus.GaugeFunc. +func ToFloat64(c prometheus.Collector) float64 { + var ( + m prometheus.Metric + mCount int + mChan = make(chan prometheus.Metric) + done = make(chan struct{}) + ) + + go func() { + for m = range mChan { + mCount++ + } + close(done) + }() + + c.Collect(mChan) + close(mChan) + <-done + + if mCount != 1 { + panic(fmt.Errorf("collected %d metrics instead of exactly 1", mCount)) + } + + pb := &dto.Metric{} + m.Write(pb) + if pb.Gauge != nil { + return pb.Gauge.GetValue() + } + if pb.Counter != nil { + return pb.Counter.GetValue() + } + if pb.Untyped != nil { + return pb.Untyped.GetValue() + } + panic(fmt.Errorf("collected a non-gauge/counter/untyped metric: %s", pb)) +} + +// CollectAndCount registers the provided Collector with a newly created +// pedantic Registry. It then calls GatherAndCount with that Registry and with +// the provided metricNames. In the unlikely case that the registration or the +// gathering fails, this function panics. (This is inconsistent with the other +// CollectAnd… functions in this package and has historical reasons. Changing +// the function signature would be a breaking change and will therefore only +// happen with the next major version bump.) +func CollectAndCount(c prometheus.Collector, metricNames ...string) int { + reg := prometheus.NewPedanticRegistry() + if err := reg.Register(c); err != nil { + panic(fmt.Errorf("registering collector failed: %s", err)) + } + result, err := GatherAndCount(reg, metricNames...) + if err != nil { + panic(err) + } + return result +} + +// GatherAndCount gathers all metrics from the provided Gatherer and counts +// them. It returns the number of metric children in all gathered metric +// families together. If any metricNames are provided, only metrics with those +// names are counted. +func GatherAndCount(g prometheus.Gatherer, metricNames ...string) (int, error) { + got, err := g.Gather() + if err != nil { + return 0, fmt.Errorf("gathering metrics failed: %s", err) + } + if metricNames != nil { + got = filterMetrics(got, metricNames) + } + + result := 0 + for _, mf := range got { + result += len(mf.GetMetric()) + } + return result, nil +} + +// CollectAndCompare registers the provided Collector with a newly created +// pedantic Registry. It then calls GatherAndCompare with that Registry and with +// the provided metricNames. +func CollectAndCompare(c prometheus.Collector, expected io.Reader, metricNames ...string) error { + reg := prometheus.NewPedanticRegistry() + if err := reg.Register(c); err != nil { + return fmt.Errorf("registering collector failed: %s", err) + } + return GatherAndCompare(reg, expected, metricNames...) +} + +// GatherAndCompare gathers all metrics from the provided Gatherer and compares +// it to an expected output read from the provided Reader in the Prometheus text +// exposition format. If any metricNames are provided, only metrics with those +// names are compared. +func GatherAndCompare(g prometheus.Gatherer, expected io.Reader, metricNames ...string) error { + got, err := g.Gather() + if err != nil { + return fmt.Errorf("gathering metrics failed: %s", err) + } + if metricNames != nil { + got = filterMetrics(got, metricNames) + } + var tp expfmt.TextParser + wantRaw, err := tp.TextToMetricFamilies(expected) + if err != nil { + return fmt.Errorf("parsing expected metrics failed: %s", err) + } + want := internal.NormalizeMetricFamilies(wantRaw) + + return compare(got, want) +} + +// compare encodes both provided slices of metric families into the text format, +// compares their string message, and returns an error if they do not match. +// The error contains the encoded text of both the desired and the actual +// result. +func compare(got, want []*dto.MetricFamily) error { + var gotBuf, wantBuf bytes.Buffer + enc := expfmt.NewEncoder(&gotBuf, expfmt.FmtText) + for _, mf := range got { + if err := enc.Encode(mf); err != nil { + return fmt.Errorf("encoding gathered metrics failed: %s", err) + } + } + enc = expfmt.NewEncoder(&wantBuf, expfmt.FmtText) + for _, mf := range want { + if err := enc.Encode(mf); err != nil { + return fmt.Errorf("encoding expected metrics failed: %s", err) + } + } + + if wantBuf.String() != gotBuf.String() { + return fmt.Errorf(` +metric output does not match expectation; want: + +%s +got: + +%s`, wantBuf.String(), gotBuf.String()) + + } + return nil +} + +func filterMetrics(metrics []*dto.MetricFamily, names []string) []*dto.MetricFamily { + var filtered []*dto.MetricFamily + for _, m := range metrics { + for _, name := range names { + if m.GetName() == name { + filtered = append(filtered, m) + break + } + } + } + return filtered +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 95a8268..ab5bbf5 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -19,6 +19,8 @@ github.com/prometheus/client_golang/prometheus github.com/prometheus/client_golang/prometheus/internal github.com/prometheus/client_golang/prometheus/promauto github.com/prometheus/client_golang/prometheus/promhttp +github.com/prometheus/client_golang/prometheus/testutil +github.com/prometheus/client_golang/prometheus/testutil/promlint # github.com/prometheus/client_model v0.2.0 github.com/prometheus/client_model/go # github.com/prometheus/common v0.15.0