From 6554bbbd286360509777f8224e4ad4ca15dcbc9a Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 21 Jun 2024 14:29:52 -0700 Subject: [PATCH] tetragon: Add debug interface to track cgroups to namespace mappings Debugging BPF and some kernel functions I want to understand cgroup to namespace/workload/kind mappings at event side. This patch maintains a stable mapping between cgroups and human readable namespaces. The end goal is to filter out noisy namespaces from execs which will be follow up series. This is minimally useful as is. To support this just extend the use of namespace filters from kprobe and tracepoints into a more general space where we can hook selectors. Next steps we can push namespace filters into other sensor types e.g. loader. Signed-off-by: John Fastabend --- bpf/process/bpf_execve_event.c | 2 + bpf/process/policy_filter.h | 10 ++- pkg/policyfilter/disabled.go | 8 ++- pkg/policyfilter/namespace.go | 83 +++++++++++++++++++++ pkg/policyfilter/policyfilter.go | 8 ++- pkg/policyfilter/rthooks/rthooks.go | 8 ++- pkg/policyfilter/state.go | 92 ++++++++++++++++++++++-- pkg/policyfilter/state_test.go | 12 ++-- pkg/sensors/tracing/policyfilter_test.go | 4 +- 9 files changed, 207 insertions(+), 20 deletions(-) create mode 100644 pkg/policyfilter/namespace.go diff --git a/bpf/process/bpf_execve_event.c b/bpf/process/bpf_execve_event.c index 0d05447a17c..f190c3498ac 100644 --- a/bpf/process/bpf_execve_event.c +++ b/bpf/process/bpf_execve_event.c @@ -11,6 +11,8 @@ #include "bpf_helpers.h" #include "bpf_rate.h" +#include "policy_filter.h" + char _license[] __attribute__((section("license"), used)) = "Dual BSD/GPL"; struct { diff --git a/bpf/process/policy_filter.h b/bpf/process/policy_filter.h index 04fd2abbcca..61563b6953a 100644 --- a/bpf/process/policy_filter.h +++ b/bpf/process/policy_filter.h @@ -6,7 +6,15 @@ #include "bpf_tracing.h" -#define POLICY_FILTER_MAX_POLICIES 128 +#define POLICY_FILTER_MAX_POLICIES 128 +#define POLICY_FILTER_MAX_NAMESPACES 1024 + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, POLICY_FILTER_MAX_NAMESPACES); + __uint(key_size, sizeof(u64)); + __uint(value_size, sizeof(u64)); +} tg_cgroup_namespace_map SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); diff --git a/pkg/policyfilter/disabled.go b/pkg/policyfilter/disabled.go index 3282af4e31f..b882cf3605a 100644 --- a/pkg/policyfilter/disabled.go +++ b/pkg/policyfilter/disabled.go @@ -31,12 +31,12 @@ func (s *disabled) DelPolicy(polID PolicyID) error { return fmt.Errorf("policyfilter is disabled") } -func (s *disabled) AddPodContainer(podID PodID, namespace string, podLabels labels.Labels, +func (s *disabled) AddPodContainer(podID PodID, namespace, workload, kind string, podLabels labels.Labels, containerID string, cgID CgroupID, containerName string) error { return nil } -func (s *disabled) UpdatePod(podID PodID, namespace string, podLabels labels.Labels, +func (s *disabled) UpdatePod(podID PodID, namespace, workload, kind string, podLabels labels.Labels, containerIDs []string, containerNames []string) error { return nil } @@ -55,3 +55,7 @@ func (s *disabled) RegisterPodHandlers(podInformer cache.SharedIndexInformer) { func (s *disabled) Close() error { return nil } + +func (s *disabled) GetNsId(stateID StateID) (*NSID, bool) { + return nil, false +} diff --git a/pkg/policyfilter/namespace.go b/pkg/policyfilter/namespace.go new file mode 100644 index 00000000000..a73a87f8cf6 --- /dev/null +++ b/pkg/policyfilter/namespace.go @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package policyfilter + +import ( + "fmt" + "os" + "path" + "path/filepath" + + "github.com/cilium/ebpf" + "github.com/cilium/tetragon/pkg/bpf" + "github.com/cilium/tetragon/pkg/option" + "github.com/cilium/tetragon/pkg/sensors/exec/config" + lru "github.com/hashicorp/golang-lru/v2" +) + +const ( + CgrpNsMapName = "tg_cgroup_namespace_map" + namespaceCacheSize = 1024 +) + +type NSID struct { + Namespace string + Workload string + Kind string +} + +// NamespaceMap is a simple wrapper for ebpf.Map so that we can write methods for it +type NamespaceMap struct { + cgroupIdMap *ebpf.Map + nsIdMap *lru.Cache[StateID, NSID] + nsNameMap *lru.Cache[NSID, StateID] + id StateID +} + +// newNamespaceMap returns a new namespace mapping. The namespace map consists of +// two pieces. First a cgroup to ID map. The ID is useful for BPF so we can avoid +// strings in BPF side. Then a stable ID to namespace mapping. +func newNamespaceMap() (*NamespaceMap, error) { + idCache, err := lru.New[StateID, NSID](namespaceCacheSize) + if err != nil { + return nil, fmt.Errorf("create namespace ID cache failed") + } + nameCache, err := lru.New[NSID, StateID](namespaceCacheSize) + if err != nil { + return nil, fmt.Errorf("create namespace name cache failed") + } + + objName := config.ExecObj() + objPath := path.Join(option.Config.HubbleLib, objName) + spec, err := ebpf.LoadCollectionSpec(objPath) + if err != nil { + return nil, fmt.Errorf("loading spec for %s failed: %w", objPath, err) + } + nsMapSpec, ok := spec.Maps[CgrpNsMapName] + if !ok { + return nil, fmt.Errorf("%s not found in %s", CgrpNsMapName, objPath) + } + + ret, err := ebpf.NewMap(nsMapSpec) + if err != nil { + return nil, err + } + + mapDir := bpf.MapPrefixPath() + pinPath := filepath.Join(mapDir, CgrpNsMapName) + os.Remove(pinPath) + os.Mkdir(mapDir, os.ModeDir) + err = ret.Pin(pinPath) + if err != nil { + ret.Close() + return nil, fmt.Errorf("failed to pin Namespace map in %s: %w", pinPath, err) + } + + return &NamespaceMap{ + cgroupIdMap: ret, + nsIdMap: idCache, + nsNameMap: nameCache, + id: 1, + }, err +} diff --git a/pkg/policyfilter/policyfilter.go b/pkg/policyfilter/policyfilter.go index 71128481188..ba3c8f2400e 100644 --- a/pkg/policyfilter/policyfilter.go +++ b/pkg/policyfilter/policyfilter.go @@ -98,12 +98,12 @@ type State interface { // AddPodContainer informs policyfilter about a new container and its cgroup id in a pod. // The pod might or might not have been encountered before. // This method is intended to update policyfilter state from container hooks - AddPodContainer(podID PodID, namespace string, podLabels labels.Labels, + AddPodContainer(podID PodID, namespace, workload, kind string, podLabels labels.Labels, containerID string, cgID CgroupID, containerName string) error // UpdatePod updates the pod state for a pod, where containerIDs contains all the container ids for the given pod. // This method is intended to be used from k8s watchers (where no cgroup information is available) - UpdatePod(podID PodID, namespace string, podLabels labels.Labels, + UpdatePod(podID PodID, namespace, workload, kind string, podLabels labels.Labels, containerIDs []string, containerNames []string) error // DelPodContainer informs policyfilter that a container was deleted from a pod @@ -111,6 +111,10 @@ type State interface { // DelPod informs policyfilter that a pod has been deleted DelPod(podID PodID) error + // Report opaque cgroup ID to nsId mapping. This method is intended to allow inspecting + // and reporting the state of the system to subsystems and tooling. + GetNsId(stateID StateID) (*NSID, bool) + // RegisterPodHandlers can be used to register appropriate pod handlers to a pod informer // that for keeping the policy filter state up-to-date. RegisterPodHandlers(podInformer cache.SharedIndexInformer) diff --git a/pkg/policyfilter/rthooks/rthooks.go b/pkg/policyfilter/rthooks/rthooks.go index 4a91b79151e..44fa3c1f297 100644 --- a/pkg/policyfilter/rthooks/rthooks.go +++ b/pkg/policyfilter/rthooks/rthooks.go @@ -12,6 +12,7 @@ import ( "github.com/cilium/tetragon/pkg/logger" "github.com/cilium/tetragon/pkg/metrics/policyfiltermetrics" "github.com/cilium/tetragon/pkg/policyfilter" + "github.com/cilium/tetragon/pkg/process" "github.com/cilium/tetragon/pkg/rthooks" "github.com/google/uuid" "github.com/sirupsen/logrus" @@ -93,6 +94,9 @@ func createContainerHook(_ context.Context, arg *rthooks.CreateContainerArg) err } namespace := pod.ObjectMeta.Namespace + workloadMeta, workloadKind := process.GetWorkloadMetaFromPod(pod) + workload := workloadMeta.Name + kind := workloadKind.Kind containerName := arg.Req.ContainerName if containerName == "" { @@ -103,12 +107,14 @@ func createContainerHook(_ context.Context, arg *rthooks.CreateContainerArg) err log.WithFields(logrus.Fields{ "pod-id": podID, "namespace": namespace, + "workload": workload, + "workload-kind": kind, "container-id": containerID, "cgroup-id": cgID, "container-name": containerName, }).Trace("policyfilter: add pod container") cgid := policyfilter.CgroupID(cgID) - err = pfState.AddPodContainer(policyfilter.PodID(podID), namespace, pod.Labels, containerID, cgid, containerName) + err = pfState.AddPodContainer(policyfilter.PodID(podID), namespace, workload, kind, pod.Labels, containerID, cgid, containerName) policyfiltermetrics.OpInc(policyfiltermetrics.RTHooksSubsys, policyfiltermetrics.AddContainerOperation, policyfilter.ErrorLabel(err)) if err != nil { diff --git a/pkg/policyfilter/state.go b/pkg/policyfilter/state.go index f6c24eab6ef..55936d59d37 100644 --- a/pkg/policyfilter/state.go +++ b/pkg/policyfilter/state.go @@ -8,11 +8,13 @@ import ( "sync" slimv1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1" + "github.com/cilium/ebpf" "github.com/cilium/tetragon/pkg/cgroups/fsscan" "github.com/cilium/tetragon/pkg/labels" "github.com/cilium/tetragon/pkg/logger" "github.com/cilium/tetragon/pkg/metrics/policyfiltermetrics" "github.com/cilium/tetragon/pkg/podhooks" + "github.com/cilium/tetragon/pkg/process" "github.com/google/uuid" "github.com/sirupsen/logrus" @@ -76,6 +78,7 @@ const ( type PolicyID uint32 type PodID uuid.UUID type CgroupID uint64 +type StateID uint64 const ( // we reserve 0 as a special value to indicate no filtering @@ -100,6 +103,8 @@ type podInfo struct { id PodID namespace string labels labels.Labels + workload string + kind string containers []containerInfo // cache of matched policies @@ -249,6 +254,9 @@ type state struct { // polify filters (outer) map handle pfMap PfMap + // global policy map handle + nsMap *NamespaceMap + cgidFinder cgidFinder } @@ -279,6 +287,11 @@ func newState( return nil, err } + ret.nsMap, err = newNamespaceMap() + if err != nil { + return nil, err + } + return ret, nil } @@ -294,12 +307,18 @@ func (m *state) updatePodHandler(pod *v1.Pod) error { } namespace := pod.Namespace - err = m.UpdatePod(PodID(podID), namespace, pod.Labels, containerIDs, containerNames) + workloadMeta, kindMeta := process.GetWorkloadMetaFromPod(pod) + workload := workloadMeta.Name + kind := kindMeta.Kind + + err = m.UpdatePod(PodID(podID), namespace, workload, kind, pod.Labels, containerIDs, containerNames) if err != nil { m.log.WithError(err).WithFields(logrus.Fields{ "pod-id": podID, "container-ids": containerIDs, "namespace": namespace, + "workload": workload, + "kind": kind, }).Warn("policyfilter, UpdatePod failed") return err } @@ -494,6 +513,55 @@ func cgIDPointerStr(p *CgroupID) string { return fmt.Sprintf("%d", *p) } +// addCgroupIDs add cgroups ids to the policy map +// todo: use batch operations when supported +func (m *state) addCgroupIDs(cinfo []containerInfo, pod *podInfo) error { + nsmap := m.nsMap + + for _, c := range cinfo { + key := NSID{ + Namespace: pod.namespace, + Workload: pod.workload, + Kind: pod.kind, + } + id, ok := nsmap.nsNameMap.Get(key) + if ok { + if err := nsmap.cgroupIdMap.Update(&c.cgID, id, ebpf.UpdateAny); err != nil { + logger.GetLogger().WithError(err).Warn("Unable to assign cgroup to existing namespace") + } + continue + } + logger.GetLogger().WithField("cgrp", c).WithField("pod", pod).WithField("id", nsmap.id).Warn("update cgroupid map") + + // If this is a new namespace we create a new map entry and bind it to a stable id. + if err := nsmap.cgroupIdMap.Update(&c.cgID, nsmap.id, ebpf.UpdateAny); err != nil { + logger.GetLogger().WithError(err).WithFields(logrus.Fields{ + "cgid": c.cgID, + "id": nsmap.id, + "ns": c.name, + }).Warn("Unable to insert cgroup id map") + continue + } + if ok := nsmap.nsIdMap.Add(nsmap.id, key); ok { + logger.GetLogger().WithFields(logrus.Fields{ + "cgid": c.cgID, + "id": nsmap.id, + "ns": c.name, + }).Info("Id to namespace map caused eviction") + } + if ok := nsmap.nsNameMap.Add(key, nsmap.id); ok { + logger.GetLogger().WithFields(logrus.Fields{ + "cgid": c.cgID, + "id": nsmap.id, + "ns": c.name, + }).Info("Namespace to Id map caused eviction") + } + nsmap.id++ + } + + return nil +} + // addPodContainers adds a list of containers (ids) to a pod. // It will update the state for all containers that do not exist. // It takes an optional argument of a list of cgroup ids (one per container). If this list is empty, @@ -554,6 +622,8 @@ func (m *state) addPodContainers(pod *podInfo, containerIDs []string, "containers-info": cinfo, }).Info("addPodContainers: container(s) added") + m.addCgroupIDs(cinfo, pod) + // update matching policy maps for _, policyID := range pod.matchedPolicies { pol := m.findPolicy(policyID) @@ -579,10 +649,12 @@ func (m *state) addPodContainers(pod *podInfo, containerIDs []string, } } -func (m *state) addNewPod(podID PodID, namespace string, podLabels labels.Labels) *podInfo { +func (m *state) addNewPod(podID PodID, namespace, workload, kind string, podLabels labels.Labels) *podInfo { m.pods = append(m.pods, podInfo{ id: podID, namespace: namespace, + workload: workload, + kind: kind, labels: podLabels, containers: nil, }) @@ -600,17 +672,18 @@ func (m *state) addNewPod(podID PodID, namespace string, podLabels labels.Labels // if the cgroup id of the container is known, cgID is not nil and it contains its value. // // The pod might or might not have been encountered before. -func (m *state) AddPodContainer(podID PodID, namespace string, podLabels labels.Labels, +func (m *state) AddPodContainer(podID PodID, namespace, workload, kind string, podLabels labels.Labels, containerID string, cgID CgroupID, containerName string) error { m.mu.Lock() defer m.mu.Unlock() pod := m.findPod(podID) if pod == nil { - pod = m.addNewPod(podID, namespace, podLabels) + pod = m.addNewPod(podID, namespace, workload, kind, podLabels) m.debugLogWithCallers(4).WithFields(logrus.Fields{ "pod-id": podID, "namespace": namespace, + "workload": workload, "container-id": containerID, "cgroup-id": cgID, "container-name": containerName, @@ -802,7 +875,7 @@ func (pod *podInfo) containerDiff(newContainerIDs []string) ([]string, []string) // - add the ones that do not exist in the current state // // It is intended to be used from k8s watchers (where no cgroup information is available) -func (m *state) UpdatePod(podID PodID, namespace string, podLabels labels.Labels, +func (m *state) UpdatePod(podID PodID, namespace, workload, kind string, podLabels labels.Labels, containerIDs []string, containerNames []string) error { m.mu.Lock() defer m.mu.Unlock() @@ -816,7 +889,7 @@ func (m *state) UpdatePod(podID PodID, namespace string, podLabels labels.Labels pod := m.findPod(podID) if pod == nil { - pod = m.addNewPod(podID, namespace, podLabels) + pod = m.addNewPod(podID, namespace, workload, kind, podLabels) dlog.Info("UpdatePod: added pod") } else if pod.namespace != namespace { // sanity check: old and new namespace should match @@ -865,3 +938,10 @@ func (m *state) UpdatePod(podID PodID, namespace string, podLabels labels.Labels m.addPodContainers(pod, addIDs, nil, addContainerNames) return nil } + +func (m *state) GetNsId(stateID StateID) (*NSID, bool) { + if ns, ok := m.nsMap.nsIdMap.Get(stateID); ok { + return &ns, ok + } + return nil, false +} diff --git a/pkg/policyfilter/state_test.go b/pkg/policyfilter/state_test.go index 5550b49dcad..bc3805702ab 100644 --- a/pkg/policyfilter/state_test.go +++ b/pkg/policyfilter/state_test.go @@ -26,27 +26,27 @@ func TestState(t *testing.T) { pod1 := PodID(uuid.New()) cgidi1 := CgroupID(2001) - err = s.AddPodContainer(pod1, "ns2", nil, "cont1", cgidi1, "main1") + err = s.AddPodContainer(pod1, "ns2", "wl2", "kind2", nil, "cont1", cgidi1, "main1") require.NoError(t, err) cgidi2 := CgroupID(2002) - err = s.AddPodContainer(pod1, "ns2", nil, "cont2", cgidi2, "main2") + err = s.AddPodContainer(pod1, "ns2", "wl2", "kind2", nil, "cont2", cgidi2, "main2") require.NoError(t, err) pod2 := PodID(uuid.New()) cgidi3 := CgroupID(1001) - err = s.AddPodContainer(pod2, "ns1", nil, "cont3", cgidi3, "main3") + err = s.AddPodContainer(pod2, "ns1", "wl1", "kind1", nil, "cont3", cgidi3, "main3") require.NoError(t, err) cgidi4 := CgroupID(3001) pod3 := PodID(uuid.New()) - err = s.AddPodContainer(pod3, "ns3", nil, "cont4", cgidi4, "main4") + err = s.AddPodContainer(pod3, "ns3", "wl3", "kind3", nil, "cont4", cgidi4, "main4") require.NoError(t, err) pod4 := PodID(uuid.New()) cgidi5 := CgroupID(3002) - err = s.AddPodContainer(pod4, "ns3", nil, "cont5", cgidi5, "main5") + err = s.AddPodContainer(pod4, "ns3", "wl3", "kind3", nil, "cont5", cgidi5, "main5") require.NoError(t, err) cgidi6 := CgroupID(3003) - err = s.AddPodContainer(pod4, "ns3", nil, "cont6", cgidi6, "main6") + err = s.AddPodContainer(pod4, "ns3", "wl3", "kind3", nil, "cont6", cgidi6, "main6") require.NoError(t, err) requirePfmEqualTo(t, s.pfMap, map[uint64][]uint64{ diff --git a/pkg/sensors/tracing/policyfilter_test.go b/pkg/sensors/tracing/policyfilter_test.go index 76b1291fe6b..25705632a32 100644 --- a/pkg/sensors/tracing/policyfilter_test.go +++ b/pkg/sensors/tracing/policyfilter_test.go @@ -269,10 +269,10 @@ func TestNamespacedPolicies(t *testing.T) { podId1 := uuid.New() podId2 := uuid.New() require.NoError(t, err) - err = pfState.AddPodContainer(policyfilter.PodID(podId1), "ns1", nil, + err = pfState.AddPodContainer(policyfilter.PodID(podId1), "ns1", "wl1", "kind1", nil, "pod1-container1", cgID1, "container-name1") require.NoError(t, err) - err = pfState.AddPodContainer(policyfilter.PodID(podId2), "ns2", nil, + err = pfState.AddPodContainer(policyfilter.PodID(podId2), "ns2", "wl2", "kind2", nil, "pod1-container2", cgID2, "container-name2") require.NoError(t, err)