Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add missed probes metrics #1941

Merged
merged 8 commits into from
Aug 6, 2024
6 changes: 3 additions & 3 deletions cmd/tetragon/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,9 +284,6 @@ func tetragonExecute() error {
return fmt.Errorf("Failed to move old tetragon base directory: %w", err)
}

// we need file system mounts setup above before we detect features
log.Info("BPF detected features: ", bpf.LogFeatures())

if option.Config.PprofAddr != "" {
go func() {
if err := servePprof(option.Config.PprofAddr); err != nil {
Expand Down Expand Up @@ -393,6 +390,9 @@ func tetragonExecute() error {
return err
}

// needs BTF, so caling it after InitCachedBTF
log.Info("BPF detected features: ", bpf.LogFeatures())

if err := observer.InitDataCache(option.Config.DataCacheSize); err != nil {
return err
}
Expand Down
18 changes: 18 additions & 0 deletions docs/content/en/docs/reference/metrics.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

128 changes: 119 additions & 9 deletions pkg/bpf/detect.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ import (

"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
ebtf "github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/features"
"github.com/cilium/ebpf/link"
"github.com/cilium/tetragon/pkg/arch"
"github.com/cilium/tetragon/pkg/btf"
"github.com/cilium/tetragon/pkg/logger"
"golang.org/x/sys/unix"
)
Expand All @@ -30,13 +32,15 @@ type Feature struct {
}

var (
kprobeMulti Feature
uprobeMulti Feature
buildid Feature
modifyReturn Feature
modifyReturnSyscall Feature
linkPin Feature
lsm Feature
kprobeMulti Feature
uprobeMulti Feature
buildid Feature
modifyReturn Feature
modifyReturnSyscall Feature
linkPin Feature
lsm Feature
missedStatsKprobe Feature
missedStatsKprobeMulti Feature
)

func HasOverrideHelper() bool {
Expand Down Expand Up @@ -311,9 +315,115 @@ func HasLinkPin() bool {
return linkPin.detected
}

func detectMissedStats() (bool, bool) {
spec, err := btf.NewBTF()
if err != nil {
return false, false
}

// bpf_link_info
var linkInfo *ebtf.Struct
if err := spec.TypeByName("bpf_link_info", &linkInfo); err != nil {
return false, false
}

if len(linkInfo.Members) < 4 {
return false, false
}

// bpf_link_info::union
m := linkInfo.Members[3]
union, ok := m.Type.(*ebtf.Union)
if !ok {
return false, false
}

kprobe := false
kprobeMulti := false

hasField := func(st *ebtf.Struct, name string) bool {
for _, m := range st.Members {
if m.Name == name {
return true
}
}
return false
}

detectKprobeMulti := func(m ebtf.Member) bool {
// bpf_link_info::kprobe_multi
st, ok := m.Type.(*ebtf.Struct)
if !ok {
return false
}
// bpf_link_info::kprobe_multi::missed
return hasField(st, "missed")
}

detectKprobe := func(m ebtf.Member) bool {
// bpf_link_info::perf_event
st, ok := m.Type.(*ebtf.Struct)
if !ok {
return false
}

if len(st.Members) < 2 {
return false
}

// bpf_link_info::perf_event::union
tm := st.Members[1]
un, ok := tm.Type.(*ebtf.Union)
if !ok {
return false
}

for _, mu := range un.Members {
// bpf_link_info::perf_event::kprobe
if mu.Name == "kprobe" {
st2, ok := mu.Type.(*ebtf.Struct)
if !ok {
return false
}
// bpf_link_info::perf_event::kprobe::missed
return hasField(st2, "missed")
}
}
return false
}

for _, m := range union.Members {
if m.Name == "kprobe_multi" {
kprobeMulti = detectKprobeMulti(m)
} else if m.Name == "perf_event" {
kprobe = detectKprobe(m)
}
}

return kprobe, kprobeMulti
}

func detectMissedStatsOnce() {
missedStatsKprobe.init.Do(func() {
kprobe, kprobeMulti := detectMissedStats()
missedStatsKprobe.detected = kprobe
missedStatsKprobeMulti.detected = kprobeMulti
})
}

func HasMissedStatsPerfEvent() bool {
detectMissedStatsOnce()
return missedStatsKprobe.detected
}

func HasMissedStatsKprobeMulti() bool {
detectMissedStatsOnce()
return missedStatsKprobeMulti.detected
}

func LogFeatures() string {
return fmt.Sprintf("override_return: %t, buildid: %t, kprobe_multi: %t, uprobe_multi %t, fmodret: %t, fmodret_syscall: %t, signal: %t, large: %t, link_pin: %t, lsm: %t",
return fmt.Sprintf("override_return: %t, buildid: %t, kprobe_multi: %t, uprobe_multi %t, fmodret: %t, fmodret_syscall: %t, signal: %t, large: %t, link_pin: %t, lsm: %t, missed_stats_kprobe_multi: %t, missed_stats_kprobe: %t",
HasOverrideHelper(), HasBuildId(), HasKprobeMulti(), HasUprobeMulti(),
HasModifyReturn(), HasModifyReturnSyscall(), HasSignalHelper(), HasProgramLargeSize(),
HasLinkPin(), HasLSMPrograms())
HasLinkPin(), HasLSMPrograms(), HasMissedStatsKprobeMulti(), HasMissedStatsPerfEvent())
}
83 changes: 83 additions & 0 deletions pkg/metrics/kprobemetrics/collector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package kprobemetrics

import (
"github.com/cilium/ebpf/link"
"github.com/cilium/tetragon/pkg/bpf"
"github.com/cilium/tetragon/pkg/metrics"
"github.com/cilium/tetragon/pkg/sensors"
"github.com/cilium/tetragon/pkg/sensors/program"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/sys/unix"
)

func NewBPFCollector() prometheus.Collector {
return metrics.NewCustomCollector(
metrics.CustomMetrics{
MissedLink,
MissedProg,
},
collect,
collectForDocs,
)
}

func collectLink(ch chan<- prometheus.Metric, load *program.Program) {
if load.Link == nil {
return
}

info, err := load.Link.Info()
if err != nil {
return
}

missed := uint64(0)

switch info.Type {
case link.PerfEventType:
if !bpf.HasMissedStatsPerfEvent() {
return
}
pevent := info.PerfEvent()
switch pevent.Type {
case unix.BPF_PERF_EVENT_KPROBE, unix.BPF_PERF_EVENT_KRETPROBE:
kprobe := pevent.Kprobe()
missed, _ = kprobe.Missed()
}
case link.KprobeMultiType:
if !bpf.HasMissedStatsKprobeMulti() {
return
}
kmulti := info.KprobeMulti()
missed, _ = kmulti.Missed()
default:
}

ch <- MissedLink.MustMetric(float64(missed), load.Policy, load.Attach)
}

func collectProg(ch chan<- prometheus.Metric, load *program.Program) {
info, err := load.Prog.Info()
if err != nil {
return
}

missed, _ := info.RecursionMisses()
ch <- MissedProg.MustMetric(float64(missed), load.Policy, load.Attach)
}

func collect(ch chan<- prometheus.Metric) {
allPrograms := sensors.AllPrograms()
for _, prog := range allPrograms {
collectLink(ch, prog)
collectProg(ch, prog)
}
}

func collectForDocs(ch chan<- prometheus.Metric) {
ch <- MissedLink.MustMetric(0, "monitor_panic", "sys_panic")
ch <- MissedProg.MustMetric(0, "monitor_panic", "sys_panic")
}
29 changes: 29 additions & 0 deletions pkg/metrics/kprobemetrics/missed.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package kprobemetrics

import (
"github.com/cilium/tetragon/pkg/metrics"
"github.com/cilium/tetragon/pkg/metrics/consts"
)

var (
MissedLink = metrics.MustNewCustomCounter(metrics.NewOpts(
consts.MetricsNamespace, "", "missed_link_probes_total",
"The total number of Tetragon probe missed by link.",
nil, nil, []metrics.UnconstrainedLabel{
metrics.UnconstrainedLabel{Name: "policy", ExampleValue: "monitor_panic"},
metrics.UnconstrainedLabel{Name: "attach", ExampleValue: "sys_panic"},
},
))

MissedProg = metrics.MustNewCustomCounter(metrics.NewOpts(
consts.MetricsNamespace, "", "missed_prog_probes_total",
"The total number of Tetragon probe missed by program.",
nil, nil, []metrics.UnconstrainedLabel{
metrics.UnconstrainedLabel{Name: "policy", ExampleValue: "monitor_panic"},
metrics.UnconstrainedLabel{Name: "attach", ExampleValue: "sys_panic"},
},
))
)
4 changes: 3 additions & 1 deletion pkg/metricsconfig/healthmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ var (

func GetHealthGroup() metrics.Group {
healthMetricsOnce.Do(func() {
healthMetrics = metrics.NewMetricsGroup(true)
healthMetrics = metrics.NewMetricsGroup(false)
})
return healthMetrics
}
Expand Down Expand Up @@ -102,4 +102,6 @@ func registerHealthMetrics(group metrics.Group) {
group.MustRegister(policystatemetrics.NewPolicyStateCollector())
// gRPC metrics
group.MustRegister(grpcmetrics.NewServerMetrics())
// missed metris
group.MustRegister(kprobemetrics.NewBPFCollector())
}
16 changes: 9 additions & 7 deletions pkg/sensors/base/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,45 +21,47 @@ const (
)

var (
basePolicy = "__base__"

Execve = program.Builder(
config.ExecObj(),
"sched/sched_process_exec",
"tracepoint/sys_execve",
"event_execve",
"execve",
)
).SetPolicy(basePolicy)

ExecveBprmCommit = program.Builder(
"bpf_execve_bprm_commit_creds.o",
"security_bprm_committing_creds",
"kprobe/security_bprm_committing_creds",
"tg_kp_bprm_committing_creds",
"kprobe",
)
).SetPolicy(basePolicy)

Exit = program.Builder(
"bpf_exit.o",
"acct_process",
"kprobe/acct_process",
"event_exit",
"kprobe",
)
).SetPolicy(basePolicy)

Fork = program.Builder(
"bpf_fork.o",
"wake_up_new_task",
"kprobe/wake_up_new_task",
"kprobe_pid_clear",
"kprobe",
)
).SetPolicy(basePolicy)

CgroupRmdir = program.Builder(
"bpf_cgroup.o",
"cgroup/cgroup_rmdir",
"raw_tracepoint/cgroup_rmdir",
"tg_cgroup_rmdir",
"raw_tracepoint",
)
).SetPolicy(basePolicy)

/* Event Ring map */
TCPMonMap = program.MapBuilder("tcpmon_map", Execve)
Expand All @@ -84,12 +86,12 @@ var (
MatchBinariesSetMap = program.MapBuilder(mbset.MapName, Execve)

sensor = sensors.Sensor{
Name: "__base__",
Name: basePolicy,
}
sensorInit sync.Once

sensorTest = sensors.Sensor{
Name: "__base__",
Name: basePolicy,
}
sensorTestInit sync.Once
)
Expand Down
Loading
Loading