From 6ebe0487666d9e73e461a605edaf7dcb72d6b1a8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 15 Feb 2025 21:41:36 +0000 Subject: [PATCH 1/7] tetragon: Add unknown process flag Adding new 'unknown' value for flags field in the message Process. It will be used in following changes for process without details. Signed-off-by: Jiri Olsa --- api/v1/README.md | 2 +- api/v1/tetragon/tetragon.pb.go | 2 ++ api/v1/tetragon/tetragon.proto | 2 ++ .../github.com/cilium/tetragon/api/v1/tetragon/tetragon.pb.go | 2 ++ .../github.com/cilium/tetragon/api/v1/tetragon/tetragon.proto | 2 ++ .../github.com/cilium/tetragon/api/v1/tetragon/tetragon.pb.go | 2 ++ .../github.com/cilium/tetragon/api/v1/tetragon/tetragon.proto | 2 ++ 7 files changed, 13 insertions(+), 1 deletion(-) diff --git a/api/v1/README.md b/api/v1/README.md index 7a363d9676f..e17e3bf633e 100644 --- a/api/v1/README.md +++ b/api/v1/README.md @@ -950,7 +950,7 @@ found. | cwd | [string](#string) | | Current working directory of the process. | | binary | [string](#string) | | Absolute path of the executed binary. | | arguments | [string](#string) | | Arguments passed to the binary at execution. | -| flags | [string](#string) | | Flags are for debugging purposes only and should not be considered a reliable source of information. They hold various information about which syscalls generated events, use of internal Tetragon buffers, errors and more. - `execve` This event is generated by an execve syscall for a new process. See procFs for the other option. A correctly formatted event should either set execve or procFS (described next). - `procFS` This event is generated from a proc interface. This happens at Tetragon init when existing processes are being loaded into Tetragon event buffer. All events should have either execve or procFS set. - `truncFilename` Indicates a truncated processes filename because the buffer size is too small to contain the process filename. Consider increasing buffer size to avoid this. - `truncArgs` Indicates truncated the processes arguments because the buffer size was too small to contain all exec args. Consider increasing buffer size to avoid this. - `taskWalk` Primarily useful for debugging. Indicates a walked process hierarchy to find a parent process in the Tetragon buffer. This may happen when we did not receive an exec event for the immediate parent of a process. Typically means we are looking at a fork that in turn did another fork we don't currently track fork events exactly and instead push an event with the original parent exec data. This flag can provide this insight into the event if needed. - `miss` An error flag indicating we could not find parent info in the Tetragon event buffer. If this is set it should be reported to Tetragon developers for debugging. Tetragon will do its best to recover information about the process from available kernel data structures instead of using cached info in this case. However, args will not be available. - `needsAUID` An internal flag for Tetragon to indicate the audit has not yet been resolved. The BPF hooks look at this flag to determine if probing the audit system is necessary. - `errorFilename` An error flag indicating an error happened while reading the filename. If this is set it should be reported to Tetragon developers for debugging. - `errorArgs` An error flag indicating an error happened while reading the process args. If this is set it should be reported to Tetragon developers for debugging - `needsCWD` An internal flag for Tetragon to indicate the current working directory has not yet been resolved. The Tetragon hooks look at this flag to determine if probing the CWD is necessary. - `noCWDSupport` Indicates that CWD is removed from the event because the buffer size is too small. Consider increasing buffer size to avoid this. - `rootCWD` Indicates that CWD is the root directory. This is necessary to inform readers the CWD is not in the event buffer and is '/' instead. - `errorCWD` An error flag indicating an error occurred while reading the CWD of a process. If this is set it should be reported to Tetragon developers for debugging. - `clone` Indicates the process issued a clone before exec*. This is the general flow to exec* a new process, however its possible to replace the current process with a new process by doing an exec* without a clone. In this case the flag will be omitted and the same PID will be used by the kernel for both the old process and the newly exec'd process. | +| flags | [string](#string) | | Flags are for debugging purposes only and should not be considered a reliable source of information. They hold various information about which syscalls generated events, use of internal Tetragon buffers, errors and more. - `execve` This event is generated by an execve syscall for a new process. See procFs for the other option. A correctly formatted event should either set execve or procFS (described next). - `procFS` This event is generated from a proc interface. This happens at Tetragon init when existing processes are being loaded into Tetragon event buffer. All events should have either execve or procFS set. - `truncFilename` Indicates a truncated processes filename because the buffer size is too small to contain the process filename. Consider increasing buffer size to avoid this. - `truncArgs` Indicates truncated the processes arguments because the buffer size was too small to contain all exec args. Consider increasing buffer size to avoid this. - `taskWalk` Primarily useful for debugging. Indicates a walked process hierarchy to find a parent process in the Tetragon buffer. This may happen when we did not receive an exec event for the immediate parent of a process. Typically means we are looking at a fork that in turn did another fork we don't currently track fork events exactly and instead push an event with the original parent exec data. This flag can provide this insight into the event if needed. - `miss` An error flag indicating we could not find parent info in the Tetragon event buffer. If this is set it should be reported to Tetragon developers for debugging. Tetragon will do its best to recover information about the process from available kernel data structures instead of using cached info in this case. However, args will not be available. - `needsAUID` An internal flag for Tetragon to indicate the audit has not yet been resolved. The BPF hooks look at this flag to determine if probing the audit system is necessary. - `errorFilename` An error flag indicating an error happened while reading the filename. If this is set it should be reported to Tetragon developers for debugging. - `errorArgs` An error flag indicating an error happened while reading the process args. If this is set it should be reported to Tetragon developers for debugging - `needsCWD` An internal flag for Tetragon to indicate the current working directory has not yet been resolved. The Tetragon hooks look at this flag to determine if probing the CWD is necessary. - `noCWDSupport` Indicates that CWD is removed from the event because the buffer size is too small. Consider increasing buffer size to avoid this. - `rootCWD` Indicates that CWD is the root directory. This is necessary to inform readers the CWD is not in the event buffer and is '/' instead. - `errorCWD` An error flag indicating an error occurred while reading the CWD of a process. If this is set it should be reported to Tetragon developers for debugging. - `clone` Indicates the process issued a clone before exec*. This is the general flow to exec* a new process, however its possible to replace the current process with a new process by doing an exec* without a clone. In this case the flag will be omitted and the same PID will be used by the kernel for both the old process and the newly exec'd process. - `unknown` Indicates the process was not found in the process cache and contains just pid and start time. | | start_time | [google.protobuf.Timestamp](#google-protobuf-Timestamp) | | Start time of the execution. | | auid | [google.protobuf.UInt32Value](#google-protobuf-UInt32Value) | | Audit user ID, this ID is assigned to a user upon login and is inherited by every process even when the user's identity changes. For example, by switching user accounts with su - john. | | pod | [Pod](#tetragon-Pod) | | Information about the the Kubernetes Pod where the event originated. | diff --git a/api/v1/tetragon/tetragon.pb.go b/api/v1/tetragon/tetragon.pb.go index 4190cc342e8..f73103bb064 100644 --- a/api/v1/tetragon/tetragon.pb.go +++ b/api/v1/tetragon/tetragon.pb.go @@ -1300,6 +1300,8 @@ type Process struct { // current process with a new process by doing an exec* without a clone. In // this case the flag will be omitted and the same PID will be used by the // kernel for both the old process and the newly exec'd process. + // - `unknown` Indicates the process was not found in the process cache + // and contains just pid and start time. Flags string `protobuf:"bytes,7,opt,name=flags,proto3" json:"flags,omitempty"` // Start time of the execution. StartTime *timestamppb.Timestamp `protobuf:"bytes,8,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` diff --git a/api/v1/tetragon/tetragon.proto b/api/v1/tetragon/tetragon.proto index bdb9d8e46eb..9496974e372 100644 --- a/api/v1/tetragon/tetragon.proto +++ b/api/v1/tetragon/tetragon.proto @@ -244,6 +244,8 @@ message Process { // current process with a new process by doing an exec* without a clone. In // this case the flag will be omitted and the same PID will be used by the // kernel for both the old process and the newly exec'd process. + // - `unknown` Indicates the process was not found in the process cache + // and contains just pid and start time. string flags = 7; // Start time of the execution. google.protobuf.Timestamp start_time = 8; diff --git a/contrib/tetragon-rthooks/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.pb.go b/contrib/tetragon-rthooks/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.pb.go index 4190cc342e8..f73103bb064 100644 --- a/contrib/tetragon-rthooks/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.pb.go +++ b/contrib/tetragon-rthooks/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.pb.go @@ -1300,6 +1300,8 @@ type Process struct { // current process with a new process by doing an exec* without a clone. In // this case the flag will be omitted and the same PID will be used by the // kernel for both the old process and the newly exec'd process. + // - `unknown` Indicates the process was not found in the process cache + // and contains just pid and start time. Flags string `protobuf:"bytes,7,opt,name=flags,proto3" json:"flags,omitempty"` // Start time of the execution. StartTime *timestamppb.Timestamp `protobuf:"bytes,8,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` diff --git a/contrib/tetragon-rthooks/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.proto b/contrib/tetragon-rthooks/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.proto index bdb9d8e46eb..9496974e372 100644 --- a/contrib/tetragon-rthooks/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.proto +++ b/contrib/tetragon-rthooks/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.proto @@ -244,6 +244,8 @@ message Process { // current process with a new process by doing an exec* without a clone. In // this case the flag will be omitted and the same PID will be used by the // kernel for both the old process and the newly exec'd process. + // - `unknown` Indicates the process was not found in the process cache + // and contains just pid and start time. string flags = 7; // Start time of the execution. google.protobuf.Timestamp start_time = 8; diff --git a/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.pb.go b/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.pb.go index 4190cc342e8..f73103bb064 100644 --- a/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.pb.go +++ b/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.pb.go @@ -1300,6 +1300,8 @@ type Process struct { // current process with a new process by doing an exec* without a clone. In // this case the flag will be omitted and the same PID will be used by the // kernel for both the old process and the newly exec'd process. + // - `unknown` Indicates the process was not found in the process cache + // and contains just pid and start time. Flags string `protobuf:"bytes,7,opt,name=flags,proto3" json:"flags,omitempty"` // Start time of the execution. StartTime *timestamppb.Timestamp `protobuf:"bytes,8,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` diff --git a/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.proto b/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.proto index bdb9d8e46eb..9496974e372 100644 --- a/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.proto +++ b/vendor/github.com/cilium/tetragon/api/v1/tetragon/tetragon.proto @@ -244,6 +244,8 @@ message Process { // current process with a new process by doing an exec* without a clone. In // this case the flag will be omitted and the same PID will be used by the // kernel for both the old process and the newly exec'd process. + // - `unknown` Indicates the process was not found in the process cache + // and contains just pid and start time. string flags = 7; // Start time of the execution. google.protobuf.Timestamp start_time = 8; From 485eaa8b1b4f26c12e92cf905365efa31da93446 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 20 Jan 2025 18:02:22 +0000 Subject: [PATCH 2/7] tetragon: Move d_path into bpf_d_path.h header Moving d_path into bpf_d_path.h header so it can be easily used in following changes from other places. Signed-off-by: Jiri Olsa --- bpf/lib/bpf_d_path.h | 340 ++++++++++++++++++++++++++++++++ bpf/lib/bpf_task.h | 13 -- bpf/process/bpf_process_event.h | 319 +----------------------------- 3 files changed, 342 insertions(+), 330 deletions(-) create mode 100644 bpf/lib/bpf_d_path.h diff --git a/bpf/lib/bpf_d_path.h b/bpf/lib/bpf_d_path.h new file mode 100644 index 00000000000..2a80d975c35 --- /dev/null +++ b/bpf/lib/bpf_d_path.h @@ -0,0 +1,340 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright Authors of Cilium */ + +#ifndef __D_PATH__ +#define __D_PATH__ + +#include "bpf_helpers.h" + +#define ENAMETOOLONG 36 /* File name too long */ + +#define MAX_BUF_LEN 4096 + +/* __d_path_local flags */ +// #define UNRESOLVED_MOUNT_POINTS 0x01 // (deprecated) +// this error is returned by __d_path_local in the following cases: +// - the path walk did not conclude (too many dentry) +// - the path was too long to fit in the buffer +#define UNRESOLVED_PATH_COMPONENTS 0x02 + +#ifdef __LARGE_BPF_PROG +#define PROBE_CWD_READ_ITERATIONS 128 +#else +#define PROBE_CWD_READ_ITERATIONS 11 +#endif + +#define offsetof_btf(s, memb) ((size_t)((char *)_(&((s *)0)->memb) - (char *)0)) + +#define container_of_btf(ptr, type, member) \ + ({ \ + void *__mptr = (void *)(ptr); \ + ((type *)(__mptr - offsetof_btf(type, member))); \ + }) +struct buffer_heap_map_value { + // Buffer need a bit more space here because of the verifier. In + // prepend_name unit tests, the verifier figures out that MAX_BUF_LEN is + // enough and that the buffer_offset will not overflow, but in the real + // use-case it looks like it's forgetting about that. + unsigned char buf[MAX_BUF_LEN + 256]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct buffer_heap_map_value); +} buffer_heap_map SEC(".maps"); + +FUNC_INLINE struct mount *real_mount(struct vfsmount *mnt) +{ + return container_of_btf(mnt, struct mount, mnt); +} + +FUNC_INLINE bool IS_ROOT(struct dentry *dentry) +{ + struct dentry *d_parent; + + probe_read(&d_parent, sizeof(d_parent), _(&dentry->d_parent)); + return (dentry == d_parent); +} + +FUNC_INLINE bool hlist_bl_unhashed(const struct hlist_bl_node *h) +{ + struct hlist_bl_node **pprev; + + probe_read(&pprev, sizeof(pprev), _(&h->pprev)); + return !pprev; +} + +FUNC_INLINE int d_unhashed(struct dentry *dentry) +{ + return hlist_bl_unhashed(_(&dentry->d_hash)); +} + +FUNC_INLINE int d_unlinked(struct dentry *dentry) +{ + return d_unhashed(dentry) && !IS_ROOT(dentry); +} + +/* + * Only called from path_with_deleted function before any path traversals. + * In the current scenarios, always buflen will be 256 and namelen 10. + * For this reason I will never return -ENAMETOOLONG. + */ +FUNC_INLINE int +prepend(char **buffer, int *buflen, const char *str, int namelen) +{ + *buflen -= namelen; + if (*buflen < 0) // will never happen - check function comment + return -ENAMETOOLONG; + *buffer -= namelen; + memcpy(*buffer, str, namelen); + return 0; +} + +struct cwd_read_data { + struct dentry *root_dentry; + struct vfsmount *root_mnt; + char *bf; + struct dentry *dentry; + struct vfsmount *vfsmnt; + struct mount *mnt; + char *bptr; + int blen; + bool resolved; +}; + +FUNC_INLINE int +prepend_name(char *buf, char **bufptr, int *buflen, const char *name, u32 namelen) +{ + // contains 1 if the buffer is large enough to contain the whole name and a slash prefix + bool write_slash = 1; + + u64 buffer_offset = (u64)(*bufptr) - (u64)buf; + + // Change name and namelen to fit in the buffer. + // We prefer to store the part of it that fits rather than discard it. + if (namelen >= *buflen) { + name += namelen - *buflen; + namelen = *buflen; + write_slash = 0; + } + + *buflen -= (namelen + write_slash); + + if (namelen + write_slash > buffer_offset) + return -ENAMETOOLONG; + + buffer_offset -= (namelen + write_slash); + + // This will never happen. buffer_offset is the diff of the initial buffer pointer + // with the current buffer pointer. This will be at max 4096 bytes (similar to the initial + // size). + // Needed to bound that for probe_read call. + if (buffer_offset >= MAX_BUF_LEN) + return -ENAMETOOLONG; + + if (write_slash) + buf[buffer_offset] = '/'; + + // This ensures that namelen is < 256, which is aligned with kernel's max dentry name length + // that is 255 (https://elixir.bootlin.com/linux/v5.10/source/include/uapi/linux/limits.h#L12). + // Needed to bound that for probe_read call. + asm volatile("%[namelen] &= 0xff;\n" + : [namelen] "+r"(namelen)); + probe_read(buf + buffer_offset + write_slash, namelen * sizeof(char), name); + + *bufptr = buf + buffer_offset; + return write_slash ? 0 : -ENAMETOOLONG; +} + +FUNC_INLINE long cwd_read(struct cwd_read_data *data) +{ + struct qstr d_name; + struct dentry *parent; + struct dentry *vfsmnt_mnt_root; + struct dentry *dentry = data->dentry; + struct vfsmount *vfsmnt = data->vfsmnt; + struct mount *mnt = data->mnt; + int error; + + if (!(dentry != data->root_dentry || vfsmnt != data->root_mnt)) { + data->resolved = + true; // resolved all path components successfully + return 1; + } + + probe_read(&vfsmnt_mnt_root, sizeof(vfsmnt_mnt_root), + _(&vfsmnt->mnt_root)); + if (dentry == vfsmnt_mnt_root || IS_ROOT(dentry)) { + struct mount *parent; + + probe_read(&parent, sizeof(parent), _(&mnt->mnt_parent)); + + /* Global root? */ + if (data->mnt != parent) { + probe_read(&data->dentry, sizeof(data->dentry), + _(&mnt->mnt_mountpoint)); + data->mnt = parent; + data->vfsmnt = _(&parent->mnt); + return 0; + } + // resolved all path components successfully + data->resolved = true; + return 1; + } + probe_read(&parent, sizeof(parent), _(&dentry->d_parent)); + probe_read(&d_name, sizeof(d_name), _(&dentry->d_name)); + error = prepend_name(data->bf, &data->bptr, &data->blen, + (const char *)d_name.name, d_name.len); + // This will happen where the dentry name does not fit in the buffer. + // We will stop the loop with resolved == false and later we will + // set the proper value in error before function return. + if (error) + return 1; + + data->dentry = parent; + return 0; +} + +#ifdef __V61_BPF_PROG +static long cwd_read_v61(__u32 index, void *data) +{ + return cwd_read(data); +} +#endif + +FUNC_INLINE int +prepend_path(const struct path *path, const struct path *root, char *bf, + char **buffer, int *buflen) +{ + struct cwd_read_data data = { + .bf = bf, + .bptr = *buffer, + .blen = *buflen, + }; + int error = 0; + + probe_read(&data.root_dentry, sizeof(data.root_dentry), + _(&root->dentry)); + probe_read(&data.root_mnt, sizeof(data.root_mnt), _(&root->mnt)); + probe_read(&data.dentry, sizeof(data.dentry), _(&path->dentry)); + probe_read(&data.vfsmnt, sizeof(data.vfsmnt), _(&path->mnt)); + data.mnt = real_mount(data.vfsmnt); + +#ifndef __V61_BPF_PROG +#pragma unroll + for (int i = 0; i < PROBE_CWD_READ_ITERATIONS; ++i) { + if (cwd_read(&data)) + break; + } +#else + loop(PROBE_CWD_READ_ITERATIONS, cwd_read_v61, (void *)&data, 0); +#endif /* __V61_BPF_PROG */ + + if (data.bptr == *buffer) { + *buflen = 0; + return 0; + } + if (!data.resolved) + error = UNRESOLVED_PATH_COMPONENTS; + *buffer = data.bptr; + *buflen = data.blen; + return error; +} + +FUNC_INLINE int +path_with_deleted(const struct path *path, const struct path *root, char *bf, + char **buf, int *buflen) +{ + struct dentry *dentry; + + probe_read(&dentry, sizeof(dentry), _(&path->dentry)); + if (d_unlinked(dentry)) { + int error = prepend(buf, buflen, " (deleted)", 10); + + if (error) // will never happen as prepend will never return a value != 0 + return error; + } + return prepend_path(path, root, bf, buf, buflen); +} + +/* + * This function returns the path of a dentry and works in a similar + * way to Linux d_path function (https://elixir.bootlin.com/linux/v5.10/source/fs/d_path.c#L262). + * + * Input variables: + * - 'path' is a pointer to a dentry path that we want to resolve + * - 'buf' is the buffer where the path will be stored (this should be always the value of 'buffer_heap_map' map) + * - 'buflen' is the available buffer size to store the path (now 256 in all cases, maybe we can increase that further) + * + * Input buffer layout: + * <-- buflen --> + * ----------------------------- + * | | + * ----------------------------- + * ^ + * | + * buf + * + * + * Output variables: + * - 'buf' is where the path is stored (>= compared to the input argument) + * - 'buflen' the size of the resolved path (0 < buflen <= 256). Will not be negative. If buflen == 0 nothing is written to the buffer. + * - 'error' 0 in case of success or UNRESOLVED_PATH_COMPONENTS in the case where the path is larger than the provided buffer. + * + * Output buffer layout: + * <-- buflen --> + * ----------------------------- + * | /etc/passwd| + * ----------------------------- + * ^ + * | + * buf + * + * ps. The size of the path will be (initial value of buflen) - (return value of buflen) if (buflen != 0) + */ +FUNC_INLINE char * +__d_path_local(const struct path *path, char *buf, int *buflen, int *error) +{ + char *res = buf + *buflen; + struct task_struct *task; + struct fs_struct *fs; + + task = (struct task_struct *)get_current_task(); + probe_read(&fs, sizeof(fs), _(&task->fs)); + *error = path_with_deleted(path, _(&fs->root), buf, &res, buflen); + return res; +} + +/* + * Entry point to the codepath used for path resolution. + * + * This function allocates a buffer from 'buffer_heap_map' map and calls + * __d_path_local. After __d_path_local returns, it also does the appropriate + * calculations on the buffer size (check __d_path_local comment). + * + * Returns the buffer where the path is stored. 'buflen' is the size of the + * resolved path (0 < buflen <= 256) and will not be negative. If buflen == 0 + * nothing is written to the buffer (still the value to the buffer is valid). + * 'error' is 0 in case of success or UNRESOLVED_PATH_COMPONENTS in the case + * where the path is larger than the provided buffer. + */ +FUNC_INLINE char * +d_path_local(const struct path *path, int *buflen, int *error) +{ + int zero = 0; + char *buffer = 0; + + buffer = map_lookup_elem(&buffer_heap_map, &zero); + if (!buffer) + return 0; + + *buflen = MAX_BUF_LEN; + buffer = __d_path_local(path, buffer, buflen, error); + if (*buflen > 0) + *buflen = MAX_BUF_LEN - *buflen; + + return buffer; +} +#endif /* __D_PATH__ */ diff --git a/bpf/lib/bpf_task.h b/bpf/lib/bpf_task.h index e1554fb2c7e..757b501a6cb 100644 --- a/bpf/lib/bpf_task.h +++ b/bpf/lib/bpf_task.h @@ -9,19 +9,6 @@ #include "generic.h" #include "vmlinux.h" -/* __d_path_local flags */ -// #define UNRESOLVED_MOUNT_POINTS 0x01 // (deprecated) -// this error is returned by __d_path_local in the following cases: -// - the path walk did not conclude (too many dentry) -// - the path was too long to fit in the buffer -#define UNRESOLVED_PATH_COMPONENTS 0x02 - -#ifdef __LARGE_BPF_PROG -#define PROBE_CWD_READ_ITERATIONS 128 -#else -#define PROBE_CWD_READ_ITERATIONS 11 -#endif - FUNC_INLINE struct task_struct *get_parent(struct task_struct *t) { struct task_struct *task; diff --git a/bpf/process/bpf_process_event.h b/bpf/process/bpf_process_event.h index baf767657d3..5bbf8633506 100644 --- a/bpf/process/bpf_process_event.h +++ b/bpf/process/bpf_process_event.h @@ -8,27 +8,11 @@ #include "bpf_cgroup.h" #include "bpf_cred.h" -#include "cgroup/cgtracker.h" +#include "bpf_d_path.h" -#define ENAMETOOLONG 36 /* File name too long */ +#include "cgroup/cgtracker.h" #define MATCH_BINARIES_PATH_MAX_LENGTH 256 -#define MAX_BUF_LEN 4096 - -struct buffer_heap_map_value { - // Buffer need a bit more space here because of the verifier. In - // prepend_name unit tests, the verifier figures out that MAX_BUF_LEN is - // enough and that the buffer_offset will not overflow, but in the real - // use-case it looks like it's forgetting about that. - unsigned char buf[MAX_BUF_LEN + 256]; -}; - -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __uint(max_entries, 1); - __type(key, int); - __type(value, struct buffer_heap_map_value); -} buffer_heap_map SEC(".maps"); FUNC_INLINE __u64 __get_auid(struct task_struct *t) { @@ -65,305 +49,6 @@ FUNC_INLINE __u32 get_auid(void) return __get_auid(task); } -#define offsetof_btf(s, memb) ((size_t)((char *)_(&((s *)0)->memb) - (char *)0)) - -#define container_of_btf(ptr, type, member) \ - ({ \ - void *__mptr = (void *)(ptr); \ - ((type *)(__mptr - offsetof_btf(type, member))); \ - }) - -FUNC_INLINE struct mount *real_mount(struct vfsmount *mnt) -{ - return container_of_btf(mnt, struct mount, mnt); -} - -FUNC_INLINE bool IS_ROOT(struct dentry *dentry) -{ - struct dentry *d_parent; - - probe_read(&d_parent, sizeof(d_parent), _(&dentry->d_parent)); - return (dentry == d_parent); -} - -FUNC_INLINE bool hlist_bl_unhashed(const struct hlist_bl_node *h) -{ - struct hlist_bl_node **pprev; - - probe_read(&pprev, sizeof(pprev), _(&h->pprev)); - return !pprev; -} - -FUNC_INLINE int d_unhashed(struct dentry *dentry) -{ - return hlist_bl_unhashed(_(&dentry->d_hash)); -} - -FUNC_INLINE int d_unlinked(struct dentry *dentry) -{ - return d_unhashed(dentry) && !IS_ROOT(dentry); -} - -FUNC_INLINE int -prepend_name(char *buf, char **bufptr, int *buflen, const char *name, u32 namelen) -{ - // contains 1 if the buffer is large enough to contain the whole name and a slash prefix - bool write_slash = 1; - - u64 buffer_offset = (u64)(*bufptr) - (u64)buf; - - // Change name and namelen to fit in the buffer. - // We prefer to store the part of it that fits rather than discard it. - if (namelen >= *buflen) { - name += namelen - *buflen; - namelen = *buflen; - write_slash = 0; - } - - *buflen -= (namelen + write_slash); - - if (namelen + write_slash > buffer_offset) - return -ENAMETOOLONG; - - buffer_offset -= (namelen + write_slash); - - // This will never happen. buffer_offset is the diff of the initial buffer pointer - // with the current buffer pointer. This will be at max 4096 bytes (similar to the initial - // size). - // Needed to bound that for probe_read call. - if (buffer_offset >= MAX_BUF_LEN) - return -ENAMETOOLONG; - - if (write_slash) - buf[buffer_offset] = '/'; - - // This ensures that namelen is < 256, which is aligned with kernel's max dentry name length - // that is 255 (https://elixir.bootlin.com/linux/v5.10/source/include/uapi/linux/limits.h#L12). - // Needed to bound that for probe_read call. - asm volatile("%[namelen] &= 0xff;\n" - : [namelen] "+r"(namelen)); - probe_read(buf + buffer_offset + write_slash, namelen * sizeof(char), name); - - *bufptr = buf + buffer_offset; - return write_slash ? 0 : -ENAMETOOLONG; -} - -/* - * Only called from path_with_deleted function before any path traversals. - * In the current scenarios, always buflen will be 256 and namelen 10. - * For this reason I will never return -ENAMETOOLONG. - */ -FUNC_INLINE int -prepend(char **buffer, int *buflen, const char *str, int namelen) -{ - *buflen -= namelen; - if (*buflen < 0) // will never happen - check function comment - return -ENAMETOOLONG; - *buffer -= namelen; - memcpy(*buffer, str, namelen); - return 0; -} - -struct cwd_read_data { - struct dentry *root_dentry; - struct vfsmount *root_mnt; - char *bf; - struct dentry *dentry; - struct vfsmount *vfsmnt; - struct mount *mnt; - char *bptr; - int blen; - bool resolved; -}; - -FUNC_INLINE long cwd_read(struct cwd_read_data *data) -{ - struct qstr d_name; - struct dentry *parent; - struct dentry *vfsmnt_mnt_root; - struct dentry *dentry = data->dentry; - struct vfsmount *vfsmnt = data->vfsmnt; - struct mount *mnt = data->mnt; - int error; - - if (!(dentry != data->root_dentry || vfsmnt != data->root_mnt)) { - data->resolved = - true; // resolved all path components successfully - return 1; - } - - probe_read(&vfsmnt_mnt_root, sizeof(vfsmnt_mnt_root), - _(&vfsmnt->mnt_root)); - if (dentry == vfsmnt_mnt_root || IS_ROOT(dentry)) { - struct mount *parent; - - probe_read(&parent, sizeof(parent), _(&mnt->mnt_parent)); - - /* Global root? */ - if (data->mnt != parent) { - probe_read(&data->dentry, sizeof(data->dentry), - _(&mnt->mnt_mountpoint)); - data->mnt = parent; - data->vfsmnt = _(&parent->mnt); - return 0; - } - // resolved all path components successfully - data->resolved = true; - return 1; - } - probe_read(&parent, sizeof(parent), _(&dentry->d_parent)); - probe_read(&d_name, sizeof(d_name), _(&dentry->d_name)); - error = prepend_name(data->bf, &data->bptr, &data->blen, - (const char *)d_name.name, d_name.len); - // This will happen where the dentry name does not fit in the buffer. - // We will stop the loop with resolved == false and later we will - // set the proper value in error before function return. - if (error) - return 1; - - data->dentry = parent; - return 0; -} - -#ifdef __V61_BPF_PROG -static long cwd_read_v61(__u32 index, void *data) -{ - return cwd_read(data); -} -#endif -FUNC_INLINE int -prepend_path(const struct path *path, const struct path *root, char *bf, - char **buffer, int *buflen) -{ - struct cwd_read_data data = { - .bf = bf, - .bptr = *buffer, - .blen = *buflen, - }; - int error = 0; - - probe_read(&data.root_dentry, sizeof(data.root_dentry), - _(&root->dentry)); - probe_read(&data.root_mnt, sizeof(data.root_mnt), _(&root->mnt)); - probe_read(&data.dentry, sizeof(data.dentry), _(&path->dentry)); - probe_read(&data.vfsmnt, sizeof(data.vfsmnt), _(&path->mnt)); - data.mnt = real_mount(data.vfsmnt); - -#ifndef __V61_BPF_PROG -#pragma unroll - for (int i = 0; i < PROBE_CWD_READ_ITERATIONS; ++i) { - if (cwd_read(&data)) - break; - } -#else - loop(PROBE_CWD_READ_ITERATIONS, cwd_read_v61, (void *)&data, 0); -#endif /* __V61_BPF_PROG */ - - if (data.bptr == *buffer) { - *buflen = 0; - return 0; - } - if (!data.resolved) - error = UNRESOLVED_PATH_COMPONENTS; - *buffer = data.bptr; - *buflen = data.blen; - return error; -} - -FUNC_INLINE int -path_with_deleted(const struct path *path, const struct path *root, char *bf, - char **buf, int *buflen) -{ - struct dentry *dentry; - - probe_read(&dentry, sizeof(dentry), _(&path->dentry)); - if (d_unlinked(dentry)) { - int error = prepend(buf, buflen, " (deleted)", 10); - if (error) // will never happen as prepend will never return a value != 0 - return error; - } - return prepend_path(path, root, bf, buf, buflen); -} - -/* - * This function returns the path of a dentry and works in a similar - * way to Linux d_path function (https://elixir.bootlin.com/linux/v5.10/source/fs/d_path.c#L262). - * - * Input variables: - * - 'path' is a pointer to a dentry path that we want to resolve - * - 'buf' is the buffer where the path will be stored (this should be always the value of 'buffer_heap_map' map) - * - 'buflen' is the available buffer size to store the path (now 256 in all cases, maybe we can increase that further) - * - * Input buffer layout: - * <-- buflen --> - * ----------------------------- - * | | - * ----------------------------- - * ^ - * | - * buf - * - * - * Output variables: - * - 'buf' is where the path is stored (>= compared to the input argument) - * - 'buflen' the size of the resolved path (0 < buflen <= 256). Will not be negative. If buflen == 0 nothing is written to the buffer. - * - 'error' 0 in case of success or UNRESOLVED_PATH_COMPONENTS in the case where the path is larger than the provided buffer. - * - * Output buffer layout: - * <-- buflen --> - * ----------------------------- - * | /etc/passwd| - * ----------------------------- - * ^ - * | - * buf - * - * ps. The size of the path will be (initial value of buflen) - (return value of buflen) if (buflen != 0) - */ -FUNC_INLINE char * -__d_path_local(const struct path *path, char *buf, int *buflen, int *error) -{ - char *res = buf + *buflen; - struct task_struct *task; - struct fs_struct *fs; - - task = (struct task_struct *)get_current_task(); - probe_read(&fs, sizeof(fs), _(&task->fs)); - *error = path_with_deleted(path, _(&fs->root), buf, &res, buflen); - return res; -} - -/* - * Entry point to the codepath used for path resolution. - * - * This function allocates a buffer from 'buffer_heap_map' map and calls - * __d_path_local. After __d_path_local returns, it also does the appropriate - * calculations on the buffer size (check __d_path_local comment). - * - * Returns the buffer where the path is stored. 'buflen' is the size of the - * resolved path (0 < buflen <= 256) and will not be negative. If buflen == 0 - * nothing is written to the buffer (still the value to the buffer is valid). - * 'error' is 0 in case of success or UNRESOLVED_PATH_COMPONENTS in the case - * where the path is larger than the provided buffer. - */ -FUNC_INLINE char * -d_path_local(const struct path *path, int *buflen, int *error) -{ - int zero = 0; - char *buffer = 0; - - buffer = map_lookup_elem(&buffer_heap_map, &zero); - if (!buffer) - return 0; - - *buflen = MAX_BUF_LEN; - buffer = __d_path_local(path, buffer, buflen, error); - if (*buflen > 0) - *buflen = MAX_BUF_LEN - *buflen; - - return buffer; -} - FUNC_INLINE __u32 getcwd(struct msg_process *curr, __u32 offset, __u32 proc_pid) { From a39e447d98908a862d7c09891067d0ccf3e007fd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 21 Jan 2025 06:51:15 +0000 Subject: [PATCH 3/7] tetragon: Move read_exe into process.h header Moving read_exe into process.h header so it can be easily used in following changes from other places. Signed-off-by: Jiri Olsa --- bpf/lib/process.h | 45 ++++++++++++++++++++++++++++++++++ bpf/process/bpf_execve_event.c | 45 ---------------------------------- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/bpf/lib/process.h b/bpf/lib/process.h index 240c8145eb4..ad89e9e5a5e 100644 --- a/bpf/lib/process.h +++ b/bpf/lib/process.h @@ -7,6 +7,7 @@ #include "bpf_event.h" #include "bpf_helpers.h" #include "bpf_cred.h" +#include "bpf_d_path.h" #include "../process/string_maps.h" /* Applying 'packed' attribute to structs causes clang to write to the @@ -620,4 +621,48 @@ perf_event_output_metric(void *ctx, u8 msg_op, void *map, u64 flags, void *data, perf_event_output_update_error_metric(msg_op, err); } +/** + * read_exe() Reads the path from the backing executable file of the current + * process. + * + * The executable file of a process can change using the prctl() system call + * and PR_SET_MM_EXE_FILE. Thus, this function should only be used under the + * execve path since the executable file is locked and usually there is only + * one remaining thread at its exit path. + */ +#ifdef __LARGE_BPF_PROG +FUNC_INLINE __u32 +read_exe(struct task_struct *task, struct heap_exe *exe) +{ + struct file *file = BPF_CORE_READ(task, mm, exe_file); + struct path *path = __builtin_preserve_access_index(&file->f_path); + __u64 offset = 0; + __u64 revlen = STRING_POSTFIX_MAX_LENGTH - 1; + + // we need to walk the complete 4096 len dentry in order to have an accurate + // matching on the prefix operators, even if we only keep a subset of that + char *buffer; + + buffer = d_path_local(path, (int *)&exe->len, (int *)&exe->error); + if (!buffer) + return 0; + + if (exe->len > STRING_POSTFIX_MAX_LENGTH - 1) + offset = exe->len - (STRING_POSTFIX_MAX_LENGTH - 1); + else + revlen = exe->len; + // buffer used by d_path_local can contain up to MAX_BUF_LEN i.e. 4096 we + // only keep the first 255 chars for our needs (we sacrifice one char to the + // verifier for the > 0 check) + if (exe->len > BINARY_PATH_MAX_LEN - 1) + exe->len = BINARY_PATH_MAX_LEN - 1; + asm volatile("%[len] &= 0xff;\n" + : [len] "+r"(exe->len)); + probe_read(exe->buf, exe->len, buffer); + if (revlen < STRING_POSTFIX_MAX_LENGTH) + probe_read(exe->end, revlen, (char *)(buffer + offset)); + + return exe->len; +} +#endif #endif //_PROCESS__ diff --git a/bpf/process/bpf_execve_event.c b/bpf/process/bpf_execve_event.c index f5053754b9d..c9b7fa0f557 100644 --- a/bpf/process/bpf_execve_event.c +++ b/bpf/process/bpf_execve_event.c @@ -181,51 +181,6 @@ read_execve_shared_info(void *ctx, struct msg_process *p, __u64 pid) execve_joined_info_map_clear(pid); } -/** - * read_exe() Reads the path from the backing executable file of the current - * process. - * - * The executable file of a process can change using the prctl() system call - * and PR_SET_MM_EXE_FILE. Thus, this function should only be used under the - * execve path since the executable file is locked and usually there is only - * one remaining thread at its exit path. - */ -#ifdef __LARGE_BPF_PROG -FUNC_INLINE __u32 -read_exe(struct task_struct *task, struct heap_exe *exe) -{ - struct file *file = BPF_CORE_READ(task, mm, exe_file); - struct path *path = __builtin_preserve_access_index(&file->f_path); - __u64 offset = 0; - __u64 revlen = STRING_POSTFIX_MAX_LENGTH - 1; - - // we need to walk the complete 4096 len dentry in order to have an accurate - // matching on the prefix operators, even if we only keep a subset of that - char *buffer; - - buffer = d_path_local(path, (int *)&exe->len, (int *)&exe->error); - if (!buffer) - return 0; - - if (exe->len > STRING_POSTFIX_MAX_LENGTH - 1) - offset = exe->len - (STRING_POSTFIX_MAX_LENGTH - 1); - else - revlen = exe->len; - // buffer used by d_path_local can contain up to MAX_BUF_LEN i.e. 4096 we - // only keep the first 255 chars for our needs (we sacrifice one char to the - // verifier for the > 0 check) - if (exe->len > BINARY_PATH_MAX_LEN - 1) - exe->len = BINARY_PATH_MAX_LEN - 1; - asm volatile("%[len] &= 0xff;\n" - : [len] "+r"(exe->len)); - probe_read(exe->buf, exe->len, buffer); - if (revlen < STRING_POSTFIX_MAX_LENGTH) - probe_read(exe->end, revlen, (char *)(buffer + offset)); - - return exe->len; -} -#endif - __attribute__((section("tracepoint/sys_execve"), used)) int event_execve(struct trace_event_raw_sched_process_exec *ctx) { From 86ea9c53e4a87281f21dc47ca10c87f06a13f5a2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 21 Jan 2025 08:55:41 +0000 Subject: [PATCH 4/7] tetragon: Set flags early in processing Setting flags early in the processing so in following changes the filter tail call can change it (which is executed before current flags setup code). Signed-off-by: Jiri Olsa --- bpf/process/bpf_generic_tracepoint.c | 1 + bpf/process/generic_calls.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bpf/process/bpf_generic_tracepoint.c b/bpf/process/bpf_generic_tracepoint.c index abbfbe092f2..27c75a126c2 100644 --- a/bpf/process/bpf_generic_tracepoint.c +++ b/bpf/process/bpf_generic_tracepoint.c @@ -226,6 +226,7 @@ generic_tracepoint_event(struct generic_tracepoint_event_arg *ctx) #ifdef __CAP_CHANGES_FILTER msg->sel.match_cap = 0; #endif + msg->common.flags = 0; tail_call(ctx, &tp_calls, TAIL_CALL_FILTER); return 0; } diff --git a/bpf/process/generic_calls.h b/bpf/process/generic_calls.h index c2dfafa24c8..d9bf48c1d8e 100644 --- a/bpf/process/generic_calls.h +++ b/bpf/process/generic_calls.h @@ -56,6 +56,7 @@ generic_start_process_filter(void *ctx, struct bpf_map_def *calls) #endif msg->lsm.post = false; + msg->common.flags = 0; /* Tail call into filters. */ tail_call(ctx, calls, TAIL_CALL_FILTER); @@ -168,7 +169,6 @@ generic_process_init(struct msg_generic_kprobe *e, u8 op, struct event_config *c { e->common.op = op; - e->common.flags = 0; e->common.pad[0] = 0; e->common.pad[1] = 0; e->common.size = 0; @@ -579,7 +579,7 @@ FUNC_INLINE int generic_retkprobe(void *ctx, struct bpf_map_def *calls, unsigned enter = event_find_curr(&ppid, &walker); e->common.op = MSG_OP_GENERIC_KPROBE; - e->common.flags |= MSG_COMMON_FLAG_RETURN; + e->common.flags = MSG_COMMON_FLAG_RETURN; e->common.pad[0] = 0; e->common.pad[1] = 0; e->common.size = size; From b8177f19e76fd1af0d6c6c2e248f3a05b23c3b4d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 15 Feb 2025 20:34:34 +0000 Subject: [PATCH 5/7] tetragon: Lookup unknown process when execve_map search fails Adding new filter logic that triggers when process is not found in execve_map (likely due to exhausted capacity). In this case we currently exit the bpf program and that might leave some crucial actions not executed (like sigkill). We add new event_find_curr_probe call to gather process info in filter context and use it to execute filters. The resulting kprobe event will have minimal process data with 'unknown' flags field. Signed-off-by: Jiri Olsa --- bpf/lib/common.h | 1 + bpf/lib/generic.h | 2 ++ bpf/process/bpf_process_event.h | 27 +++++++++++++++++++++++ bpf/process/generic_calls.h | 8 +++++-- pkg/api/processapi/processapi.go | 1 + pkg/grpc/tracing/tracing.go | 30 +++++++++++++++++--------- pkg/sensors/tracing/tracepoint_test.go | 9 +++++--- 7 files changed, 63 insertions(+), 15 deletions(-) diff --git a/bpf/lib/common.h b/bpf/lib/common.h index d4712c78091..ad8a3355ae5 100644 --- a/bpf/lib/common.h +++ b/bpf/lib/common.h @@ -8,6 +8,7 @@ #define MSG_COMMON_FLAG_KERNEL_STACKTRACE BIT(1) #define MSG_COMMON_FLAG_USER_STACKTRACE BIT(2) #define MSG_COMMON_FLAG_IMA_HASH BIT(3) +#define MSG_COMMON_FLAG_PROCESS_NOT_FOUND BIT(4) /* Msg Layout */ struct msg_common { diff --git a/bpf/lib/generic.h b/bpf/lib/generic.h index bf785b11bdb..27a36dbfbdb 100644 --- a/bpf/lib/generic.h +++ b/bpf/lib/generic.h @@ -62,6 +62,8 @@ struct msg_generic_kprobe { bool post; // true if event needs to be posted } lsm; }; + struct execve_map_value curr; + struct heap_exe exe; }; FUNC_INLINE size_t generic_kprobe_common_size(void) diff --git a/bpf/process/bpf_process_event.h b/bpf/process/bpf_process_event.h index 5bbf8633506..c6e1fe7fa48 100644 --- a/bpf/process/bpf_process_event.h +++ b/bpf/process/bpf_process_event.h @@ -288,4 +288,31 @@ set_in_init_tree(struct execve_map_value *curr, struct execve_map_value *parent) DEBUG("%s: nspid=1", __func__); } } + +#ifdef __LARGE_BPF_PROG +FUNC_INLINE struct execve_map_value * +event_find_curr_probe(struct msg_generic_kprobe *msg) +{ + struct task_struct *task = (struct task_struct *)get_current_task(); + struct execve_map_value *curr; + + curr = &msg->curr; + curr->key.pid = BPF_CORE_READ(task, tgid); + curr->key.ktime = ktime_get_ns(); + curr->nspid = get_task_pid_vnr_by_task(task); + + get_current_subj_caps(&curr->caps, task); + get_namespaces(&curr->ns, task); + set_in_init_tree(curr, NULL); + + read_exe((struct task_struct *)get_current_task(), &msg->exe); + return curr; +} +#else +FUNC_INLINE struct execve_map_value * +event_find_curr_probe(struct msg_generic_kprobe *msg) +{ + return NULL; +} +#endif #endif diff --git a/bpf/process/generic_calls.h b/bpf/process/generic_calls.h index d9bf48c1d8e..235a9cbc157 100644 --- a/bpf/process/generic_calls.h +++ b/bpf/process/generic_calls.h @@ -625,8 +625,12 @@ FUNC_INLINE int generic_process_filter(void) return 0; enter = event_find_curr(&ppid, &walker); - if (!enter) - return PFILTER_CURR_NOT_FOUND; + if (!enter) { + enter = event_find_curr_probe(msg); + if (!enter) + return PFILTER_CURR_NOT_FOUND; + msg->common.flags |= MSG_COMMON_FLAG_PROCESS_NOT_FOUND; + } f = map_lookup_elem(&filter_map, &msg->idx); if (!f) diff --git a/pkg/api/processapi/processapi.go b/pkg/api/processapi/processapi.go index 4d35ef5d026..9bdb7ef7c0b 100644 --- a/pkg/api/processapi/processapi.go +++ b/pkg/api/processapi/processapi.go @@ -44,6 +44,7 @@ const ( MSG_COMMON_FLAG_KERNEL_STACKTRACE = 0x2 MSG_COMMON_FLAG_USER_STACKTRACE = 0x4 MSG_COMMON_FLAG_IMA_HASH = 0x8 + MSG_COMMON_FLAG_PROCESS_NOT_FOUND = 0x10 BINARY_PATH_MAX_LEN = 256 MAX_ARG_LENGTH = 256 diff --git a/pkg/grpc/tracing/tracing.go b/pkg/grpc/tracing/tracing.go index eda9e6b310b..618f79acd6e 100644 --- a/pkg/grpc/tracing/tracing.go +++ b/pkg/grpc/tracing/tracing.go @@ -6,6 +6,7 @@ package tracing import ( "encoding/hex" "fmt" + "strings" "github.com/cilium/tetragon/pkg/reader/kernel" "golang.org/x/sys/unix" @@ -31,15 +32,20 @@ import ( "google.golang.org/protobuf/types/known/wrapperspb" ) -func getProcessParent(key *processapi.MsgExecveKey) (*process.ProcessInternal, *process.ProcessInternal, *tetragon.Process, *tetragon.Process) { +func getProcessParent(key *processapi.MsgExecveKey, flags uint8) (*process.ProcessInternal, *process.ProcessInternal, *tetragon.Process, *tetragon.Process) { var tetragonParent, tetragonProcess *tetragon.Process + unknown := flags&processapi.MSG_COMMON_FLAG_PROCESS_NOT_FOUND != 0 + proc, parent := process.GetParentProcessInternal(key.Pid, key.Ktime) if proc == nil { tetragonProcess = &tetragon.Process{ Pid: &wrapperspb.UInt32Value{Value: key.Pid}, StartTime: ktime.ToProto(key.Ktime), } + if unknown { + tetragonProcess.Flags = "unknown" + } } else { tetragonProcess = proc.UnsafeGetProcess() if err := proc.AnnotateProcess(option.Config.EnableProcessCred, option.Config.EnableProcessNs); err != nil { @@ -54,6 +60,10 @@ func getProcessParent(key *processapi.MsgExecveKey) (*process.ProcessInternal, * return proc, parent, tetragonProcess, tetragonParent } +func isUnknown(proc *tetragon.Process) bool { + return strings.Contains(proc.Flags, "unknown") +} + func kprobeAction(act uint64) tetragon.KprobeAction { switch act { case tracingapi.ActionPost: @@ -316,7 +326,7 @@ func GetProcessKprobe(event *MsgGenericKprobeUnix) *tetragon.ProcessKprobe { var tetragonArgs []*tetragon.KprobeArgument var tetragonReturnArg *tetragon.KprobeArgument - proc, parent, tetragonProcess, tetragonParent := getProcessParent(&event.Msg.ProcessKey) + proc, parent, tetragonProcess, tetragonParent := getProcessParent(&event.Msg.ProcessKey, event.Msg.Common.Flags) // Set the ancestors only if --enable-process-kprobe-ancestors flag is set. if option.Config.EnableProcessKprobeAncestors && proc.NeededAncestors() { @@ -407,7 +417,7 @@ func GetProcessKprobe(event *MsgGenericKprobeUnix) *tetragon.ProcessKprobe { return nil } - if ec := eventcache.Get(); ec != nil && + if ec := eventcache.Get(); ec != nil && !isUnknown(tetragonProcess) && (ec.Needed(tetragonProcess) || (tetragonProcess.Pid.Value > 1 && ec.Needed(tetragonParent)) || (option.Config.EnableProcessKprobeAncestors && ec.NeededAncestors(parent, ancestors))) { @@ -469,7 +479,7 @@ func (msg *MsgGenericTracepointUnix) HandleMessage() *tetragon.GetEventsResponse var ancestors []*process.ProcessInternal var tetragonAncestors []*tetragon.Process - proc, parent, tetragonProcess, tetragonParent := getProcessParent(&msg.Msg.ProcessKey) + proc, parent, tetragonProcess, tetragonParent := getProcessParent(&msg.Msg.ProcessKey, msg.Msg.Common.Flags) // Set the ancestors only if --enable-process-tracepoint-ancestors flag is set. if option.Config.EnableProcessTracepointAncestors && proc.NeededAncestors() { @@ -590,7 +600,7 @@ func (msg *MsgGenericTracepointUnix) HandleMessage() *tetragon.GetEventsResponse return nil } - if ec := eventcache.Get(); ec != nil && + if ec := eventcache.Get(); ec != nil && !isUnknown(tetragonProcess) && (ec.Needed(tetragonProcess) || (tetragonProcess.Pid.Value > 1 && ec.Needed(tetragonParent)) || (option.Config.EnableProcessTracepointAncestors && ec.NeededAncestors(parent, ancestors))) { @@ -700,7 +710,7 @@ func (event *ProcessLoaderNotify) SetAncestors([]*tetragon.Process) { } func GetProcessLoader(msg *MsgProcessLoaderUnix) *tetragon.ProcessLoader { - _, _, tetragonProcess, _ := getProcessParent(&msg.Msg.ProcessKey) + _, _, tetragonProcess, _ := getProcessParent(&msg.Msg.ProcessKey, 0) notifyEvent := &ProcessLoaderNotify{ ProcessLoader: tetragon.ProcessLoader{ @@ -788,7 +798,7 @@ func GetProcessUprobe(event *MsgGenericUprobeUnix) *tetragon.ProcessUprobe { var tetragonAncestors []*tetragon.Process var tetragonArgs []*tetragon.KprobeArgument - proc, parent, tetragonProcess, tetragonParent := getProcessParent(&event.Msg.ProcessKey) + proc, parent, tetragonProcess, tetragonParent := getProcessParent(&event.Msg.ProcessKey, event.Msg.Common.Flags) // Set the ancestors only if --enable-process-uprobe-ancestors flag is set. if option.Config.EnableProcessUprobeAncestors && proc.NeededAncestors() { @@ -819,7 +829,7 @@ func GetProcessUprobe(event *MsgGenericUprobeUnix) *tetragon.ProcessUprobe { return nil } - if ec := eventcache.Get(); ec != nil && + if ec := eventcache.Get(); ec != nil && !isUnknown(tetragonProcess) && (ec.Needed(tetragonProcess) || (tetragonProcess.Pid.Value > 1 && ec.Needed(tetragonParent)) || (option.Config.EnableProcessUprobeAncestors && ec.NeededAncestors(parent, ancestors))) { @@ -912,7 +922,7 @@ func GetProcessLsm(event *MsgGenericLsmUnix) *tetragon.ProcessLsm { var tetragonAncestors []*tetragon.Process var tetragonArgs []*tetragon.KprobeArgument - proc, parent, tetragonProcess, tetragonParent := getProcessParent(&event.Msg.ProcessKey) + proc, parent, tetragonProcess, tetragonParent := getProcessParent(&event.Msg.ProcessKey, event.Msg.Common.Flags) // Set the ancestors only if --enable-process-lsm-ancestors flag is set. if option.Config.EnableProcessLsmAncestors && proc.NeededAncestors() { @@ -962,7 +972,7 @@ func GetProcessLsm(event *MsgGenericLsmUnix) *tetragon.ProcessLsm { return nil } - if ec := eventcache.Get(); ec != nil && + if ec := eventcache.Get(); ec != nil && !isUnknown(tetragonProcess) && (ec.Needed(tetragonProcess) || (tetragonProcess.Pid.Value > 1 && ec.Needed(tetragonParent)) || (option.Config.EnableProcessLsmAncestors && ec.NeededAncestors(parent, ancestors))) { diff --git a/pkg/sensors/tracing/tracepoint_test.go b/pkg/sensors/tracing/tracepoint_test.go index 57f6e965421..f8ec3c6def6 100644 --- a/pkg/sensors/tracing/tracepoint_test.go +++ b/pkg/sensors/tracing/tracepoint_test.go @@ -443,9 +443,6 @@ func TestLoadTracepointSensor(t *testing.T) { // all but generic_tracepoint_output tus.SensorMap{Name: "tp_calls", Progs: []uint{0, 1, 2, 3, 4}}, - // only generic_tracepoint_event* - tus.SensorMap{Name: "buffer_heap_map", Progs: []uint{2}}, - // all but generic_tracepoint_event,generic_tracepoint_filter tus.SensorMap{Name: "retprobe_map", Progs: []uint{1, 2}}, @@ -462,9 +459,15 @@ func TestLoadTracepointSensor(t *testing.T) { if kernels.EnableLargeProgs() { // shared with base sensor sensorMaps = append(sensorMaps, tus.SensorMap{Name: "execve_map", Progs: []uint{3, 4, 5}}) + + // generic_tracepoint_event*,generic_tracepoint_filter + sensorMaps = append(sensorMaps, tus.SensorMap{Name: "buffer_heap_map", Progs: []uint{2, 3}}) } else { // shared with base sensor sensorMaps = append(sensorMaps, tus.SensorMap{Name: "execve_map", Progs: []uint{3}}) + + // only generic_tracepoint_event* + sensorMaps = append(sensorMaps, tus.SensorMap{Name: "buffer_heap_map", Progs: []uint{2}}) } readHook := ` From ca93b4420e6d8551e0faee61cb4dc66a49f08661 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 15 Feb 2025 21:14:02 +0000 Subject: [PATCH 6/7] tetragon: Add test for unknown process kprobe kill Adding test for unknown process kprobe kill. Signed-off-by: Jiri Olsa --- pkg/sensors/tracing/kprobe_sigkill_test.go | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pkg/sensors/tracing/kprobe_sigkill_test.go b/pkg/sensors/tracing/kprobe_sigkill_test.go index d945fe9d2b4..b594910c4dc 100644 --- a/pkg/sensors/tracing/kprobe_sigkill_test.go +++ b/pkg/sensors/tracing/kprobe_sigkill_test.go @@ -19,6 +19,7 @@ import ( lc "github.com/cilium/tetragon/pkg/matchers/listmatcher" sm "github.com/cilium/tetragon/pkg/matchers/stringmatcher" "github.com/cilium/tetragon/pkg/observer/observertesthelper" + "github.com/cilium/tetragon/pkg/option" "github.com/cilium/tetragon/pkg/testutils" tus "github.com/cilium/tetragon/pkg/testutils/sensors" "github.com/stretchr/testify/assert" @@ -113,6 +114,41 @@ func TestKprobeSigkill(t *testing.T) { testSigkill(t, makeSpecFile, checker) } +func TestKprobeSigkillExecveMap1(t *testing.T) { + if !kernels.MinKernelVersion("5.3.0") { + t.Skip("sigkill requires at least 5.3.0 version") + } + + // makeSpecFile creates a new spec file bsed on the template, and the provided arguments + makeSpecFile := func(pid string) string { + data := map[string]string{ + "MatchedPID": pid, + "NamespacePID": "false", + } + specName, err := testutils.GetSpecFromTemplate("sigkill.yaml.tmpl", data) + if err != nil { + t.Fatal(err) + } + return specName + } + + kpChecker := ec.NewProcessKprobeChecker(""). + WithFunctionName(sm.Full(arch.AddSyscallPrefixTestHelper(t, "sys_lseek"))). + WithArgs(ec.NewKprobeArgumentListMatcher(). + WithOperator(lc.Ordered). + WithValues( + ec.NewKprobeArgumentChecker().WithIntArg(5555), + )). + WithAction(tetragon.KprobeAction_KPROBE_ACTION_SIGKILL). + WithProcess(ec.NewProcessChecker().WithFlags(sm.Full("unknown"))) + + checker := ec.NewUnorderedEventChecker(kpChecker) + + option.Config.ExecveMapEntries = 1 + testSigkill(t, makeSpecFile, checker) + option.Config.ExecveMapEntries = 0 +} + func TestReturnKprobeSigkill(t *testing.T) { if !kernels.MinKernelVersion("5.3.0") { t.Skip("sigkill requires at least 5.3.0 version") From f6e583522ae9d1bbed962fc6b433de644a343768 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 15 Feb 2025 22:05:14 +0000 Subject: [PATCH 7/7] tetragon: Add test for unknown process tracepoint kill Adding test for unknown process tracepoint kill. Signed-off-by: Jiri Olsa --- pkg/sensors/tracing/kprobe_sigkill_test.go | 37 +++++++++++++++++++++ testdata/specs/sigkill_tracepoint.yaml.tmpl | 26 +++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 testdata/specs/sigkill_tracepoint.yaml.tmpl diff --git a/pkg/sensors/tracing/kprobe_sigkill_test.go b/pkg/sensors/tracing/kprobe_sigkill_test.go index b594910c4dc..4ef5b939696 100644 --- a/pkg/sensors/tracing/kprobe_sigkill_test.go +++ b/pkg/sensors/tracing/kprobe_sigkill_test.go @@ -18,6 +18,7 @@ import ( "github.com/cilium/tetragon/pkg/kernels" lc "github.com/cilium/tetragon/pkg/matchers/listmatcher" sm "github.com/cilium/tetragon/pkg/matchers/stringmatcher" + smatcher "github.com/cilium/tetragon/pkg/matchers/stringmatcher" "github.com/cilium/tetragon/pkg/observer/observertesthelper" "github.com/cilium/tetragon/pkg/option" "github.com/cilium/tetragon/pkg/testutils" @@ -149,6 +150,42 @@ func TestKprobeSigkillExecveMap1(t *testing.T) { option.Config.ExecveMapEntries = 0 } +func TestTracepointSigkillExecveMap1(t *testing.T) { + if !kernels.MinKernelVersion("5.3.0") { + t.Skip("sigkill requires at least 5.3.0 version") + } + + // makeSpecFile creates a new spec file bsed on the template, and the provided arguments + makeSpecFile := func(pid string) string { + data := map[string]string{ + "MatchedPID": pid, + "NamespacePID": "false", + } + specName, err := testutils.GetSpecFromTemplate("sigkill_tracepoint.yaml.tmpl", data) + if err != nil { + t.Fatal(err) + } + return specName + } + + kpChecker := ec.NewProcessTracepointChecker(""). + WithSubsys(smatcher.Full("syscalls")). + WithEvent(smatcher.Full("sys_enter_lseek")). + WithArgs(ec.NewKprobeArgumentListMatcher(). + WithOperator(lc.Ordered). + WithValues( + ec.NewKprobeArgumentChecker().WithIntArg(int32(5555)), + )). + WithAction(tetragon.KprobeAction_KPROBE_ACTION_SIGKILL). + WithProcess(ec.NewProcessChecker().WithFlags(sm.Full("unknown"))) + + checker := ec.NewUnorderedEventChecker(kpChecker) + + option.Config.ExecveMapEntries = 1 + testSigkill(t, makeSpecFile, checker) + option.Config.ExecveMapEntries = 0 +} + func TestReturnKprobeSigkill(t *testing.T) { if !kernels.MinKernelVersion("5.3.0") { t.Skip("sigkill requires at least 5.3.0 version") diff --git a/testdata/specs/sigkill_tracepoint.yaml.tmpl b/testdata/specs/sigkill_tracepoint.yaml.tmpl new file mode 100644 index 00000000000..ab8890a5089 --- /dev/null +++ b/testdata/specs/sigkill_tracepoint.yaml.tmpl @@ -0,0 +1,26 @@ +# test for the tracepoint sigkill action +apiVersion: cilium.io/v1alpha1 +kind: TracingPolicy +metadata: + name: "sigkilltest" +spec: + tracepoints: + - subsystem: "syscalls" + event: "sys_enter_lseek" + args: + # whence argument + - index: 7 + type: "int32" + selectors: + - matchPIDs: + - operator: In + values: + - {{.MatchedPID}} + isNamespacePID: {{.NamespacePID}} + matchArgs: + - index: 7 + operator: Equal + values: + - 5555 # magic value, see also sigkill-tester + matchActions: + - action: Sigkill