Skip to content

Commit

Permalink
tetragon: Add debug interface to track cgroups to namespace mappings
Browse files Browse the repository at this point in the history
Debugging BPF and some kernel functions I want to understand cgroup to
namespace/workload/kind mappings at event side. This patch maintains a
stable mapping between cgroups and human readable namespaces. The end
goal is to filter out noisy namespaces from execs which will be follow
up series. This is minimally useful as is.

To support this just extend the use of namespace filters from kprobe
and tracepoints into a more general space where we can hook selectors.

Next steps we can push namespace filters into other sensor types e.g.
loader.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
  • Loading branch information
jrfastab committed Jun 21, 2024
1 parent ebba5ea commit 6554bbb
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 20 deletions.
2 changes: 2 additions & 0 deletions bpf/process/bpf_execve_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "bpf_helpers.h"
#include "bpf_rate.h"

#include "policy_filter.h"

char _license[] __attribute__((section("license"), used)) = "Dual BSD/GPL";

struct {
Expand Down
10 changes: 9 additions & 1 deletion bpf/process/policy_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,15 @@

#include "bpf_tracing.h"

#define POLICY_FILTER_MAX_POLICIES 128
#define POLICY_FILTER_MAX_POLICIES 128
#define POLICY_FILTER_MAX_NAMESPACES 1024

struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__uint(max_entries, POLICY_FILTER_MAX_NAMESPACES);
__uint(key_size, sizeof(u64));
__uint(value_size, sizeof(u64));
} tg_cgroup_namespace_map SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
Expand Down
8 changes: 6 additions & 2 deletions pkg/policyfilter/disabled.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ func (s *disabled) DelPolicy(polID PolicyID) error {
return fmt.Errorf("policyfilter is disabled")
}

func (s *disabled) AddPodContainer(podID PodID, namespace string, podLabels labels.Labels,
func (s *disabled) AddPodContainer(podID PodID, namespace, workload, kind string, podLabels labels.Labels,
containerID string, cgID CgroupID, containerName string) error {
return nil
}

func (s *disabled) UpdatePod(podID PodID, namespace string, podLabels labels.Labels,
func (s *disabled) UpdatePod(podID PodID, namespace, workload, kind string, podLabels labels.Labels,
containerIDs []string, containerNames []string) error {
return nil
}
Expand All @@ -55,3 +55,7 @@ func (s *disabled) RegisterPodHandlers(podInformer cache.SharedIndexInformer) {
func (s *disabled) Close() error {
return nil
}

func (s *disabled) GetNsId(stateID StateID) (*NSID, bool) {
return nil, false
}
83 changes: 83 additions & 0 deletions pkg/policyfilter/namespace.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package policyfilter

import (
"fmt"
"os"
"path"
"path/filepath"

"github.com/cilium/ebpf"
"github.com/cilium/tetragon/pkg/bpf"
"github.com/cilium/tetragon/pkg/option"
"github.com/cilium/tetragon/pkg/sensors/exec/config"
lru "github.com/hashicorp/golang-lru/v2"
)

const (
CgrpNsMapName = "tg_cgroup_namespace_map"
namespaceCacheSize = 1024
)

type NSID struct {
Namespace string
Workload string
Kind string
}

// NamespaceMap is a simple wrapper for ebpf.Map so that we can write methods for it
type NamespaceMap struct {
cgroupIdMap *ebpf.Map
nsIdMap *lru.Cache[StateID, NSID]
nsNameMap *lru.Cache[NSID, StateID]
id StateID
}

// newNamespaceMap returns a new namespace mapping. The namespace map consists of
// two pieces. First a cgroup to ID map. The ID is useful for BPF so we can avoid
// strings in BPF side. Then a stable ID to namespace mapping.
func newNamespaceMap() (*NamespaceMap, error) {
idCache, err := lru.New[StateID, NSID](namespaceCacheSize)
if err != nil {
return nil, fmt.Errorf("create namespace ID cache failed")
}
nameCache, err := lru.New[NSID, StateID](namespaceCacheSize)
if err != nil {
return nil, fmt.Errorf("create namespace name cache failed")
}

objName := config.ExecObj()
objPath := path.Join(option.Config.HubbleLib, objName)
spec, err := ebpf.LoadCollectionSpec(objPath)
if err != nil {
return nil, fmt.Errorf("loading spec for %s failed: %w", objPath, err)
}
nsMapSpec, ok := spec.Maps[CgrpNsMapName]
if !ok {
return nil, fmt.Errorf("%s not found in %s", CgrpNsMapName, objPath)
}

ret, err := ebpf.NewMap(nsMapSpec)
if err != nil {
return nil, err
}

mapDir := bpf.MapPrefixPath()
pinPath := filepath.Join(mapDir, CgrpNsMapName)
os.Remove(pinPath)
os.Mkdir(mapDir, os.ModeDir)
err = ret.Pin(pinPath)
if err != nil {
ret.Close()
return nil, fmt.Errorf("failed to pin Namespace map in %s: %w", pinPath, err)
}

return &NamespaceMap{
cgroupIdMap: ret,
nsIdMap: idCache,
nsNameMap: nameCache,
id: 1,
}, err
}
8 changes: 6 additions & 2 deletions pkg/policyfilter/policyfilter.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,19 +98,23 @@ type State interface {
// AddPodContainer informs policyfilter about a new container and its cgroup id in a pod.
// The pod might or might not have been encountered before.
// This method is intended to update policyfilter state from container hooks
AddPodContainer(podID PodID, namespace string, podLabels labels.Labels,
AddPodContainer(podID PodID, namespace, workload, kind string, podLabels labels.Labels,
containerID string, cgID CgroupID, containerName string) error

// UpdatePod updates the pod state for a pod, where containerIDs contains all the container ids for the given pod.
// This method is intended to be used from k8s watchers (where no cgroup information is available)
UpdatePod(podID PodID, namespace string, podLabels labels.Labels,
UpdatePod(podID PodID, namespace, workload, kind string, podLabels labels.Labels,
containerIDs []string, containerNames []string) error

// DelPodContainer informs policyfilter that a container was deleted from a pod
DelPodContainer(podID PodID, containerID string) error
// DelPod informs policyfilter that a pod has been deleted
DelPod(podID PodID) error

// Report opaque cgroup ID to nsId mapping. This method is intended to allow inspecting
// and reporting the state of the system to subsystems and tooling.
GetNsId(stateID StateID) (*NSID, bool)

// RegisterPodHandlers can be used to register appropriate pod handlers to a pod informer
// that for keeping the policy filter state up-to-date.
RegisterPodHandlers(podInformer cache.SharedIndexInformer)
Expand Down
8 changes: 7 additions & 1 deletion pkg/policyfilter/rthooks/rthooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics/policyfiltermetrics"
"github.com/cilium/tetragon/pkg/policyfilter"
"github.com/cilium/tetragon/pkg/process"
"github.com/cilium/tetragon/pkg/rthooks"
"github.com/google/uuid"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -93,6 +94,9 @@ func createContainerHook(_ context.Context, arg *rthooks.CreateContainerArg) err
}

namespace := pod.ObjectMeta.Namespace
workloadMeta, workloadKind := process.GetWorkloadMetaFromPod(pod)
workload := workloadMeta.Name
kind := workloadKind.Kind

containerName := arg.Req.ContainerName
if containerName == "" {
Expand All @@ -103,12 +107,14 @@ func createContainerHook(_ context.Context, arg *rthooks.CreateContainerArg) err
log.WithFields(logrus.Fields{
"pod-id": podID,
"namespace": namespace,
"workload": workload,
"workload-kind": kind,
"container-id": containerID,
"cgroup-id": cgID,
"container-name": containerName,
}).Trace("policyfilter: add pod container")
cgid := policyfilter.CgroupID(cgID)
err = pfState.AddPodContainer(policyfilter.PodID(podID), namespace, pod.Labels, containerID, cgid, containerName)
err = pfState.AddPodContainer(policyfilter.PodID(podID), namespace, workload, kind, pod.Labels, containerID, cgid, containerName)
policyfiltermetrics.OpInc(policyfiltermetrics.RTHooksSubsys, policyfiltermetrics.AddContainerOperation, policyfilter.ErrorLabel(err))

if err != nil {
Expand Down
92 changes: 86 additions & 6 deletions pkg/policyfilter/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ import (
"sync"

slimv1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1"
"github.com/cilium/ebpf"
"github.com/cilium/tetragon/pkg/cgroups/fsscan"
"github.com/cilium/tetragon/pkg/labels"
"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics/policyfiltermetrics"
"github.com/cilium/tetragon/pkg/podhooks"
"github.com/cilium/tetragon/pkg/process"

"github.com/google/uuid"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -76,6 +78,7 @@ const (
type PolicyID uint32
type PodID uuid.UUID
type CgroupID uint64
type StateID uint64

const (
// we reserve 0 as a special value to indicate no filtering
Expand All @@ -100,6 +103,8 @@ type podInfo struct {
id PodID
namespace string
labels labels.Labels
workload string
kind string
containers []containerInfo

// cache of matched policies
Expand Down Expand Up @@ -249,6 +254,9 @@ type state struct {
// polify filters (outer) map handle
pfMap PfMap

// global policy map handle
nsMap *NamespaceMap

cgidFinder cgidFinder
}

Expand Down Expand Up @@ -279,6 +287,11 @@ func newState(
return nil, err
}

ret.nsMap, err = newNamespaceMap()
if err != nil {
return nil, err
}

return ret, nil
}

Expand All @@ -294,12 +307,18 @@ func (m *state) updatePodHandler(pod *v1.Pod) error {
}

namespace := pod.Namespace
err = m.UpdatePod(PodID(podID), namespace, pod.Labels, containerIDs, containerNames)
workloadMeta, kindMeta := process.GetWorkloadMetaFromPod(pod)
workload := workloadMeta.Name
kind := kindMeta.Kind

err = m.UpdatePod(PodID(podID), namespace, workload, kind, pod.Labels, containerIDs, containerNames)
if err != nil {
m.log.WithError(err).WithFields(logrus.Fields{
"pod-id": podID,
"container-ids": containerIDs,
"namespace": namespace,
"workload": workload,
"kind": kind,
}).Warn("policyfilter, UpdatePod failed")
return err
}
Expand Down Expand Up @@ -494,6 +513,55 @@ func cgIDPointerStr(p *CgroupID) string {
return fmt.Sprintf("%d", *p)
}

// addCgroupIDs add cgroups ids to the policy map
// todo: use batch operations when supported
func (m *state) addCgroupIDs(cinfo []containerInfo, pod *podInfo) error {
nsmap := m.nsMap

for _, c := range cinfo {
key := NSID{
Namespace: pod.namespace,
Workload: pod.workload,
Kind: pod.kind,
}
id, ok := nsmap.nsNameMap.Get(key)
if ok {
if err := nsmap.cgroupIdMap.Update(&c.cgID, id, ebpf.UpdateAny); err != nil {
logger.GetLogger().WithError(err).Warn("Unable to assign cgroup to existing namespace")
}
continue
}
logger.GetLogger().WithField("cgrp", c).WithField("pod", pod).WithField("id", nsmap.id).Warn("update cgroupid map")

// If this is a new namespace we create a new map entry and bind it to a stable id.
if err := nsmap.cgroupIdMap.Update(&c.cgID, nsmap.id, ebpf.UpdateAny); err != nil {
logger.GetLogger().WithError(err).WithFields(logrus.Fields{
"cgid": c.cgID,
"id": nsmap.id,
"ns": c.name,
}).Warn("Unable to insert cgroup id map")
continue
}
if ok := nsmap.nsIdMap.Add(nsmap.id, key); ok {
logger.GetLogger().WithFields(logrus.Fields{
"cgid": c.cgID,
"id": nsmap.id,
"ns": c.name,
}).Info("Id to namespace map caused eviction")
}
if ok := nsmap.nsNameMap.Add(key, nsmap.id); ok {
logger.GetLogger().WithFields(logrus.Fields{
"cgid": c.cgID,
"id": nsmap.id,
"ns": c.name,
}).Info("Namespace to Id map caused eviction")
}
nsmap.id++
}

return nil
}

// addPodContainers adds a list of containers (ids) to a pod.
// It will update the state for all containers that do not exist.
// It takes an optional argument of a list of cgroup ids (one per container). If this list is empty,
Expand Down Expand Up @@ -554,6 +622,8 @@ func (m *state) addPodContainers(pod *podInfo, containerIDs []string,
"containers-info": cinfo,
}).Info("addPodContainers: container(s) added")

m.addCgroupIDs(cinfo, pod)

// update matching policy maps
for _, policyID := range pod.matchedPolicies {
pol := m.findPolicy(policyID)
Expand All @@ -579,10 +649,12 @@ func (m *state) addPodContainers(pod *podInfo, containerIDs []string,
}
}

func (m *state) addNewPod(podID PodID, namespace string, podLabels labels.Labels) *podInfo {
func (m *state) addNewPod(podID PodID, namespace, workload, kind string, podLabels labels.Labels) *podInfo {
m.pods = append(m.pods, podInfo{
id: podID,
namespace: namespace,
workload: workload,
kind: kind,
labels: podLabels,
containers: nil,
})
Expand All @@ -600,17 +672,18 @@ func (m *state) addNewPod(podID PodID, namespace string, podLabels labels.Labels
// if the cgroup id of the container is known, cgID is not nil and it contains its value.
//
// The pod might or might not have been encountered before.
func (m *state) AddPodContainer(podID PodID, namespace string, podLabels labels.Labels,
func (m *state) AddPodContainer(podID PodID, namespace, workload, kind string, podLabels labels.Labels,
containerID string, cgID CgroupID, containerName string) error {
m.mu.Lock()
defer m.mu.Unlock()

pod := m.findPod(podID)
if pod == nil {
pod = m.addNewPod(podID, namespace, podLabels)
pod = m.addNewPod(podID, namespace, workload, kind, podLabels)
m.debugLogWithCallers(4).WithFields(logrus.Fields{
"pod-id": podID,
"namespace": namespace,
"workload": workload,
"container-id": containerID,
"cgroup-id": cgID,
"container-name": containerName,
Expand Down Expand Up @@ -802,7 +875,7 @@ func (pod *podInfo) containerDiff(newContainerIDs []string) ([]string, []string)
// - add the ones that do not exist in the current state
//
// It is intended to be used from k8s watchers (where no cgroup information is available)
func (m *state) UpdatePod(podID PodID, namespace string, podLabels labels.Labels,
func (m *state) UpdatePod(podID PodID, namespace, workload, kind string, podLabels labels.Labels,
containerIDs []string, containerNames []string) error {
m.mu.Lock()
defer m.mu.Unlock()
Expand All @@ -816,7 +889,7 @@ func (m *state) UpdatePod(podID PodID, namespace string, podLabels labels.Labels

pod := m.findPod(podID)
if pod == nil {
pod = m.addNewPod(podID, namespace, podLabels)
pod = m.addNewPod(podID, namespace, workload, kind, podLabels)
dlog.Info("UpdatePod: added pod")
} else if pod.namespace != namespace {
// sanity check: old and new namespace should match
Expand Down Expand Up @@ -865,3 +938,10 @@ func (m *state) UpdatePod(podID PodID, namespace string, podLabels labels.Labels
m.addPodContainers(pod, addIDs, nil, addContainerNames)
return nil
}

func (m *state) GetNsId(stateID StateID) (*NSID, bool) {
if ns, ok := m.nsMap.nsIdMap.Get(stateID); ok {
return &ns, ok
}
return nil, false
}
Loading

0 comments on commit 6554bbb

Please sign in to comment.