From c0830d498413938df86fd7adba38c130235a72fe Mon Sep 17 00:00:00 2001 From: Mahe Tardy Date: Tue, 13 Feb 2024 20:30:21 +0100 Subject: [PATCH] pkg/metrics: add new gauge metrics for loaded tracing policies This add a few gauges to indicate in which state the loaded tracing policies are. This metrics values are generated at collection time by using the internal sensor manager list tracing policies. Signed-off-by: Mahe Tardy --- pkg/metrics/metricsconfig/initmetrics.go | 2 + .../policystatemetrics/policystatemetrics.go | 85 +++++++++++++++++++ .../policystatusmetrics_test.go | 51 +++++++++++ 3 files changed, 138 insertions(+) create mode 100644 pkg/metrics/policystatemetrics/policystatemetrics.go create mode 100644 pkg/metrics/policystatemetrics/policystatusmetrics_test.go diff --git a/pkg/metrics/metricsconfig/initmetrics.go b/pkg/metrics/metricsconfig/initmetrics.go index 4109597f899..46024c41005 100644 --- a/pkg/metrics/metricsconfig/initmetrics.go +++ b/pkg/metrics/metricsconfig/initmetrics.go @@ -13,6 +13,7 @@ import ( "github.com/cilium/tetragon/pkg/metrics/mapmetrics" "github.com/cilium/tetragon/pkg/metrics/opcodemetrics" "github.com/cilium/tetragon/pkg/metrics/policyfiltermetrics" + "github.com/cilium/tetragon/pkg/metrics/policystatemetrics" "github.com/cilium/tetragon/pkg/metrics/processexecmetrics" "github.com/cilium/tetragon/pkg/metrics/ratelimitmetrics" "github.com/cilium/tetragon/pkg/metrics/ringbufmetrics" @@ -43,6 +44,7 @@ func InitAllMetrics(registry *prometheus.Registry) { observer.InitMetrics(registry) tracing.InitMetrics(registry) ratelimitmetrics.InitMetrics(registry) + policystatemetrics.InitMetrics(registry) // register BPF collectors registry.MustRegister(mapmetrics.NewBPFCollector( diff --git a/pkg/metrics/policystatemetrics/policystatemetrics.go b/pkg/metrics/policystatemetrics/policystatemetrics.go new file mode 100644 index 00000000000..d64d946070c --- /dev/null +++ b/pkg/metrics/policystatemetrics/policystatemetrics.go @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package policystatemetrics + +import ( + "context" + "strings" + + "github.com/cilium/tetragon/api/v1/tetragon" + "github.com/cilium/tetragon/pkg/logger" + "github.com/cilium/tetragon/pkg/metrics/consts" + "github.com/cilium/tetragon/pkg/observer" + "github.com/cilium/tetragon/pkg/sensors" + "github.com/prometheus/client_golang/prometheus" +) + +type policyStateCollector struct { + descriptor *prometheus.Desc + sensorManager *sensors.Manager +} + +func InitMetrics(registry *prometheus.Registry) { + registry.MustRegister(newPolicyStateCollector(observer.GetSensorManager())) +} + +// This metric collector converts the output of ListTracingPolicies into a few +// gauges metrics on collection. Thus, it needs a sensor manager to query. +func newPolicyStateCollector(sensorManager *sensors.Manager) *policyStateCollector { + return &policyStateCollector{ + descriptor: prometheus.NewDesc( + prometheus.BuildFQName(consts.MetricsNamespace, "", "tracingpolicy_loaded"), + "The number of loaded tracing policy by state.", + []string{"state"}, nil, + ), + sensorManager: sensorManager, + } +} + +func (c *policyStateCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.descriptor +} + +func (c *policyStateCollector) Collect(ch chan<- prometheus.Metric) { + if c.sensorManager == nil { + logger.GetLogger().Debug("failed retrieving the sensor manager: manager is nil") + return + } + list, err := c.sensorManager.ListTracingPolicies(context.Background()) + if err != nil { + logger.GetLogger().WithError(err).Warn("error listing tracing policies to collect policies state") + return + } + + counters := map[tetragon.TracingPolicyState]int{} + for _, policy := range list.Policies { + state := policy.State + counters[state]++ + } + + ch <- prometheus.MustNewConstMetric( + c.descriptor, + prometheus.GaugeValue, + float64(counters[tetragon.TracingPolicyState_LOAD_ERROR]), + strings.ToLower(tetragon.TracingPolicyState_LOAD_ERROR.String()), + ) + ch <- prometheus.MustNewConstMetric( + c.descriptor, + prometheus.GaugeValue, + float64(counters[tetragon.TracingPolicyState_ERROR]), + strings.ToLower(tetragon.TracingPolicyState_ERROR.String()), + ) + ch <- prometheus.MustNewConstMetric( + c.descriptor, + prometheus.GaugeValue, + float64(counters[tetragon.TracingPolicyState_DISABLED]), + strings.ToLower(tetragon.TracingPolicyState_DISABLED.String()), + ) + ch <- prometheus.MustNewConstMetric( + c.descriptor, + prometheus.GaugeValue, + float64(counters[tetragon.TracingPolicyState_ENABLED]), + strings.ToLower(tetragon.TracingPolicyState_ENABLED.String()), + ) +} diff --git a/pkg/metrics/policystatemetrics/policystatusmetrics_test.go b/pkg/metrics/policystatemetrics/policystatusmetrics_test.go new file mode 100644 index 00000000000..0673297f029 --- /dev/null +++ b/pkg/metrics/policystatemetrics/policystatusmetrics_test.go @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package policystatemetrics + +import ( + "context" + "fmt" + "io" + "strings" + "testing" + + tus "github.com/cilium/tetragon/pkg/testutils/sensors" + "github.com/cilium/tetragon/pkg/tracingpolicy" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func Test_policyStatusCollector_Collect(t *testing.T) { + expectedMetrics := func(disabled, enabled, err, load_error int) io.Reader { + return strings.NewReader(fmt.Sprintf(`# HELP tetragon_tracingpolicy_loaded The number of loaded tracing policy by state. +# TYPE tetragon_tracingpolicy_loaded gauge +tetragon_tracingpolicy_loaded{state="disabled"} %d +tetragon_tracingpolicy_loaded{state="enabled"} %d +tetragon_tracingpolicy_loaded{state="error"} %d +tetragon_tracingpolicy_loaded{state="load_error"} %d +`, disabled, enabled, err, load_error)) + } + + reg := prometheus.NewRegistry() + manager := tus.GetTestSensorManager(context.TODO(), t).Manager + + collector := newPolicyStateCollector(manager) + reg.Register(collector) + + err := manager.AddTracingPolicy(context.TODO(), &tracingpolicy.GenericTracingPolicy{ + Metadata: v1.ObjectMeta{ + Name: "pizza", + }, + }) + assert.NoError(t, err) + err = testutil.CollectAndCompare(collector, expectedMetrics(0, 1, 0, 0)) + assert.NoError(t, err) + + err = manager.DisableTracingPolicy(context.TODO(), "pizza") + assert.NoError(t, err) + err = testutil.CollectAndCompare(collector, expectedMetrics(1, 0, 0, 0)) + assert.NoError(t, err) +}