Skip to content

Commit

Permalink
[bugfix] Fix clone event caching due to missing pod info
Browse files Browse the repository at this point in the history
[upstream commit 20bba35]

The eventcache API provides 2 handlers.

These are:
RetryInternal -> called to setup process information
Retry -> called to setup pod information

In the case of clone events, we used to have en empty implementation on
the Retry handler. This results in an issue with missing pod information
which is described in detail here: #2902

This patch provides the proper Retry implementation to handle also those
cases.

FIXES: #2902

Signed-off-by: Anastasios Papagiannis <tasos.papagiannnis@gmail.com>
  • Loading branch information
tpapagian committed Sep 24, 2024
1 parent 1c72fcb commit d6a85cc
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 8 deletions.
23 changes: 19 additions & 4 deletions pkg/grpc/exec/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,17 +300,32 @@ func (msg *MsgCloneEventUnix) Notify() bool {
}

func (msg *MsgCloneEventUnix) RetryInternal(_ notify.Event, _ uint64) (*process.ProcessInternal, error) {
return nil, process.AddCloneEvent(&msg.MsgCloneEvent)
return process.AddCloneEvent(&msg.MsgCloneEvent)
}

func (msg *MsgCloneEventUnix) Retry(_ *process.ProcessInternal, _ notify.Event) error {
func (msg *MsgCloneEventUnix) Retry(internal *process.ProcessInternal, _ notify.Event) error {
proc := internal.UnsafeGetProcess()
if option.Config.EnableK8s && proc.Docker != "" && proc.Pod == nil {
podInfo := process.GetPodInfo(proc.Docker, proc.Binary, proc.Arguments, msg.NSPID)
if podInfo == nil {
eventcachemetrics.EventCacheRetries(eventcachemetrics.PodInfo).Inc()
return eventcache.ErrFailedToGetPodInfo
}
internal.AddPodInfo(podInfo)
}
return nil
}

func (msg *MsgCloneEventUnix) HandleMessage() *tetragon.GetEventsResponse {
if err := process.AddCloneEvent(&msg.MsgCloneEvent); err != nil {
ec := eventcache.Get()
ec := eventcache.Get()
if internal, err := process.AddCloneEvent(&msg.MsgCloneEvent); err == nil {
if ec != nil && ec.Needed(internal.UnsafeGetProcess()) {
// adding to the cache due to missing pod info
ec.Add(internal, nil, msg.MsgCloneEvent.Common.Ktime, msg.MsgCloneEvent.Ktime, msg)
}
} else {
if ec != nil {
// adding to the cache due to missing parent
ec.Add(nil, nil, msg.MsgCloneEvent.Common.Ktime, msg.MsgCloneEvent.Ktime, msg)
}
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ func AddExecEvent(event *tetragonAPI.MsgExecveEventUnix) *ProcessInternal {
}

// AddCloneEvent adds a new process into the cache from a CloneEvent
func AddCloneEvent(event *tetragonAPI.MsgCloneEvent) error {
func AddCloneEvent(event *tetragonAPI.MsgCloneEvent) (*ProcessInternal, error) {
parentExecId := GetProcessID(event.Parent.Pid, event.Parent.Ktime)
parent, err := Get(parentExecId)
if err != nil {
Expand All @@ -490,17 +490,17 @@ func AddCloneEvent(event *tetragonAPI.MsgCloneEvent) error {
"event.parent.pid": event.Parent.Pid,
"event.parent.exec_id": parentExecId,
}).WithError(err).Debug("CloneEvent: parent process not found in cache")
return err
return nil, err
}

proc, err := initProcessInternalClone(event, parent, parentExecId)
if err != nil {
return err
return nil, err
}

parent.RefInc()
procCache.add(proc)
return nil
return proc, nil
}

func Get(execId string) (*ProcessInternal, error) {
Expand Down

0 comments on commit d6a85cc

Please sign in to comment.