diff --git a/contrib/upgrade-notes/latest.md b/contrib/upgrade-notes/latest.md index 11e36bd1570..21fabe08d8d 100644 --- a/contrib/upgrade-notes/latest.md +++ b/contrib/upgrade-notes/latest.md @@ -60,3 +60,5 @@ tetragon: `tetragon_event_cache_fetch_failures_total{entry_type=""}`. * `tetragon_event_cache_accesses_total` metric is renamed to `tetragon_event_cache_inserts_total`. * `tetragon_event_cache_retries_total` metric is renamed to `tetragon_event_cache_fetch_retries_total`. +* `tetragon_errors_total{type="event_missing_process_info"}` metric is replaced by + `tetragon_events_missing_process_info_total`. diff --git a/docs/content/en/docs/reference/metrics.md b/docs/content/en/docs/reference/metrics.md index 61a013a839d..1297aa9cb2f 100644 --- a/docs/content/en/docs/reference/metrics.md +++ b/docs/content/en/docs/reference/metrics.md @@ -51,7 +51,7 @@ The total number of Tetragon errors. For internal use only. | label | values | | ----- | ------ | -| `type ` | `event_finalize_process_info_failed, event_missing_process_info, handler_error, process_metadata_username_failed, process_metadata_username_ignored_not_in_host_namespaces, process_pid_tid_mismatch` | +| `type ` | `event_finalize_process_info_failed, handler_error, process_metadata_username_failed, process_metadata_username_ignored_not_in_host_namespaces, process_pid_tid_mismatch` | ### `tetragon_event_cache_entries` @@ -99,6 +99,10 @@ Total number of events exported Timestamp of the most recent event to be exported +### `tetragon_events_missing_process_info_total` + +Number of events missing process info. + ### `tetragon_flags_total` The total number of Tetragon flags. For internal use only. diff --git a/pkg/metrics/errormetrics/errormetrics.go b/pkg/metrics/errormetrics/errormetrics.go index 1967066effb..ee7a722321f 100644 --- a/pkg/metrics/errormetrics/errormetrics.go +++ b/pkg/metrics/errormetrics/errormetrics.go @@ -17,8 +17,6 @@ type ErrorType int const ( // Tid and Pid mismatch that could affect BPF and user space caching logic ProcessPidTidMismatch ErrorType = iota - // An event is missing process info. - EventMissingProcessInfo // An error occurred in an event handler. HandlerError // An event finalizer on Process failed @@ -32,7 +30,6 @@ const ( var errorTypeLabelValues = map[ErrorType]string{ ProcessPidTidMismatch: "process_pid_tid_mismatch", - EventMissingProcessInfo: "event_missing_process_info", HandlerError: "handler_error", EventFinalizeProcessInfoFailed: "event_finalize_process_info_failed", ProcessMetadataUsernameFailed: "process_metadata_username_failed", diff --git a/pkg/metrics/eventmetrics/eventmetrics.go b/pkg/metrics/eventmetrics/eventmetrics.go index d599112302c..b26503259b5 100644 --- a/pkg/metrics/eventmetrics/eventmetrics.go +++ b/pkg/metrics/eventmetrics/eventmetrics.go @@ -14,7 +14,6 @@ import ( "github.com/cilium/tetragon/pkg/logger" "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" - "github.com/cilium/tetragon/pkg/metrics/errormetrics" "github.com/cilium/tetragon/pkg/metrics/syscallmetrics" "github.com/cilium/tetragon/pkg/option" "github.com/cilium/tetragon/pkg/reader/exec" @@ -68,12 +67,21 @@ var ( Help: "Policy events calls observed.", ConstLabels: nil, }, []string{"policy", "hook"}) + + missingProcessInfo = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: consts.MetricsNamespace, + Name: "events_missing_process_info_total", + Help: "Number of events missing process info.", + }) ) func RegisterHealthMetrics(group metrics.Group) { - group.MustRegister(FlagCount) - group.MustRegister(NotifyOverflowedEvents) - group.MustRegister(NewBPFCollector()) + group.MustRegister( + FlagCount, + NotifyOverflowedEvents, + NewBPFCollector(), + missingProcessInfo, + ) } func InitHealthMetrics() { @@ -110,7 +118,7 @@ func GetProcessInfo(process *tetragon.Process) (binary, pod, workload, namespace pod = process.Pod.Name } } else { - errormetrics.ErrorTotalInc(errormetrics.EventMissingProcessInfo) + missingProcessInfo.Inc() } return binary, pod, workload, namespace }