From 94022ca182d0649039b7308c522ab3bd0ef15909 Mon Sep 17 00:00:00 2001 From: Mahe Tardy Date: Thu, 5 Sep 2024 21:15:02 +0200 Subject: [PATCH] bugtool: add memory cgroup stats Retrieve the memory cgroup and copy memory stat files. Some of these stats should be found in the metrics. Signed-off-by: Mahe Tardy --- pkg/bugtool/bugtool.go | 136 ++++++++++++++++++++++++ pkg/bugtool/bugtool_test.go | 205 +++++++++++++++++++++++++++++++++++- 2 files changed, 340 insertions(+), 1 deletion(-) diff --git a/pkg/bugtool/bugtool.go b/pkg/bugtool/bugtool.go index 8ebf97d1c19..a3742213d4e 100644 --- a/pkg/bugtool/bugtool.go +++ b/pkg/bugtool/bugtool.go @@ -7,6 +7,7 @@ package bugtool import ( "archive/tar" + "bufio" "bytes" "compress/gzip" "context" @@ -259,6 +260,7 @@ func doBugtool(info *InitInfo, outFname string) error { si.dumpPolicyFilterMap(tarWriter) si.addGrpcInfo(tarWriter) si.addPmapOut(tarWriter) + si.addMemCgroupStats(tarWriter) return nil } @@ -619,3 +621,137 @@ func (s bugtoolInfo) addPmapOut(tarWriter *tar.Writer) error { s.execCmd(tarWriter, "pmap.out", pmap, "-x", fmt.Sprintf("%d", s.info.PID)) return nil } + +func findCgroupMountPath(r io.Reader, unified bool, controller string) (string, error) { + cgroupName := "cgroup" + if unified { + cgroupName = "cgroup2" + } + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := scanner.Text() + fields := strings.Fields(line) + if len(fields) >= 3 && (fields[2] == cgroupName) { + if unified || !unified && strings.HasSuffix(fields[1], controller) { + return fields[1], nil + } + } + } + + if err := scanner.Err(); err != nil { + return "", fmt.Errorf("error reading /proc/mounts: %v", err) + } + + return "", fmt.Errorf("cgroup filesystem not found") +} + +func FindCgroupMountPath(unified bool, controller string) (string, error) { + file, err := os.Open("/proc/mounts") + if err != nil { + return "", fmt.Errorf("failed to open /proc/mounts: %v", err) + } + defer file.Close() + return findCgroupMountPath(file, unified, controller) +} + +func findMemoryCgroupPath(r io.Reader) (bool, string, error) { + var unified bool + var memoryCgroupPath string + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := scanner.Text() + + // '/proc/$PID/cgroup' lists a process's cgroup membership. If legacy cgroup is + // in use in the system, this file may contain multiple lines, one for each + // hierarchy. The entry for cgroup v2 is always in the format '0::$PATH'. + if strings.HasPrefix(line, "0::/") { + unified = true + memoryCgroupPath = strings.TrimPrefix(line, "0::") + + // we don't break here because we want to consider cases in which + // cgroup v2 line is before other cgroup v1 lines and we want to + // consider hybrid as v1, not sure it can happen in real life + continue + } + + // Parsing for cgroup v1, consider hybrid as v1 + parts := strings.SplitN(line, ":", 3) + if len(parts) == 3 { + if parts[1] == "memory" { + unified = false + memoryCgroupPath = parts[2] + break + } + } + } + + if err := scanner.Err(); err != nil { + return false, "", fmt.Errorf("failed reading /proc/self/cgroup: %w", err) + } + + return unified, memoryCgroupPath, nil +} + +func FindMemoryCgroupPath() (unified bool, memoryCgroupPath string, err error) { + file, err := os.Open("/proc/self/cgroup") + if err != nil { + return false, "", fmt.Errorf("failed to open /proc/self/cgroup: %w", err) + } + defer file.Close() + return findMemoryCgroupPath(file) +} + +func (s bugtoolInfo) addMemCgroupStats(tarWriter *tar.Writer) error { + unifiedCgroup, memoryCgroupPath, err := FindMemoryCgroupPath() + if err != nil { + s.multiLog.WithError(err).Warn("failed finding the memory cgroup path") + return fmt.Errorf("failed to find memory cgroup path: %w", err) + } + + cgroupMountPath, err := FindCgroupMountPath(unifiedCgroup, "memory") + if err != nil { + s.multiLog.WithError(err).Warn("failed to find cgroup mount path") + return fmt.Errorf("failed to find cgroup mount path: %w", err) + } + + cgroupPath := filepath.Join(cgroupMountPath, memoryCgroupPath) + + // can't use s.tarAddFile here unfortunately because it is using io.Copy + // based on the size retrieved from the stat of the file, and cgroup fs + // files have size equal to 0 + readAndWrite := func(cgroupBasePath string, file string) error { + buf, err := os.ReadFile(filepath.Join(cgroupBasePath, file)) + if err != nil { + s.multiLog.WithError(err).WithField("file", file).Warn("failed to read cgroup file") + return fmt.Errorf("failed to read file %s: %w", file, err) + } + err = s.tarAddBuff(tarWriter, file, bytes.NewBuffer(buf)) + if err == nil { + s.multiLog.WithField("file", file).Info("cgroup file added") + return fmt.Errorf("failed to add buffer: %w", err) + } + return nil + } + + if unifiedCgroup { + readAndWrite(cgroupPath, "memory.current") + readAndWrite(cgroupPath, "memory.stat") + } else { + err := readAndWrite(cgroupPath, "memory.usage_in_bytes") + if err != nil { + // Before cgroup namespace, /proc/pid/cgroup mapping was broken, so + // Docker back in the days mounted the cgroup hierarchy flat in the + // containerfs. For compatibility, it still does that for cgroup v1. + // See more https://lewisgaul.co.uk/blog/coding/2022/05/13/cgroups-intro/#cgroups-and-containers + cgroupPath = cgroupMountPath + s.multiLog.WithField("cgroupPath", cgroupPath).Info("retrying to read cgroup file from a different legacy path") + readAndWrite(cgroupPath, "memory.usage_in_bytes") + } + readAndWrite(cgroupPath, "memory.kmem.usage_in_bytes") + readAndWrite(cgroupPath, "memory.stat") + } + + return nil +} diff --git a/pkg/bugtool/bugtool_test.go b/pkg/bugtool/bugtool_test.go index c6ae818377a..3f3390da3a0 100644 --- a/pkg/bugtool/bugtool_test.go +++ b/pkg/bugtool/bugtool_test.go @@ -4,8 +4,10 @@ package bugtool import ( + "io" "os" "reflect" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -37,8 +39,209 @@ func TestSaveAndLoad(t *testing.T) { } if !reflect.DeepEqual(&info1, info2) { - t.Errorf("mismatching structures: %s vs %s", info1, info2) + t.Errorf("mismatching structures: %v vs %v", info1, info2) } t.Log("Success") } + +func Test_findCgroupMountPath(t *testing.T) { + const cgroupMountsHybrid = `tmpfs /sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,mode=755 0 0 +cgroup2 /sys/fs/cgroup/unified cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate 0 0 +cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,name=systemd 0 0 +pstore /sys/fs/pstore pstore rw,nosuid,nodev,noexec,relatime 0 0 +efivarfs /sys/firmware/efi/efivars efivarfs rw,nosuid,nodev,noexec,relatime 0 0 +none /sys/fs/bpf bpf rw,nosuid,nodev,noexec,relatime,mode=700 0 0 +cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0 +cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0 +cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0 +cgroup /sys/fs/cgroup/net_cls,net_prio cgroup rw,nosuid,nodev,noexec,relatime,net_cls,net_prio 0 0 +cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0 +cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct 0 0 +cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0 +cgroup /sys/fs/cgroup/rdma cgroup rw,nosuid,nodev,noexec,relatime,rdma 0 0 +cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0 +cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0 +cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0 +systemd-1 /proc/sys/fs/binfmt_misc autofs rw,relatime,fd=28,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=351 0 0` + + const cgroupMountsLegacy = `tmpfs /sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,mode=755 0 0 +efivarfs /sys/firmware/efi/efivars efivarfs rw,nosuid,nodev,noexec,relatime 0 0 +none /sys/fs/bpf bpf rw,nosuid,nodev,noexec,relatime,mode=700 0 0 +cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0 +cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0 +cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0 +cgroup /sys/fs/cgroup/net_cls,net_prio cgroup rw,nosuid,nodev,noexec,relatime,net_cls,net_prio 0 0 +cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0 +cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct 0 0 +cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0 +cgroup /sys/fs/cgroup/rdma cgroup rw,nosuid,nodev,noexec,relatime,rdma 0 0 +cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0 +cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0 +cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0 +systemd-1 /proc/sys/fs/binfmt_misc autofs rw,relatime,fd=28,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=351 0 0` + + const cgroupMountsUnified = `tmpfs /dev/shm tmpfs rw,nosuid,nodev,inode64 0 0 +tmpfs /run/lock tmpfs rw,nosuid,nodev,noexec,relatime,size=5120k,inode64 0 0 +cgroup2 /sys/fs/cgroup cgroup2 rw,nosuid,nodev,noexec,relatime 0 0 +pstore /sys/fs/pstore pstore rw,nosuid,nodev,noexec,relatime 0 0 +bpf /sys/fs/bpf bpf rw,nosuid,nodev,noexec,relatime,mode=700 0 0` + + type args struct { + r io.Reader + unified bool + controller string + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + "cgroupv1_hybrid", + args{strings.NewReader(cgroupMountsHybrid), false, "memory"}, + "/sys/fs/cgroup/memory", + false, + }, + { + "cgroupv2_hybrid", + args{strings.NewReader(cgroupMountsHybrid), true, ""}, + "/sys/fs/cgroup/unified", + false, + }, + { + "cgroupv2", + args{strings.NewReader(cgroupMountsUnified), true, ""}, + "/sys/fs/cgroup", + false, + }, + { + "cgroupv1", + args{strings.NewReader(cgroupMountsLegacy), false, "freezer"}, + "/sys/fs/cgroup/freezer", + false, + }, + { + "cgroupv2_missing_legacy", + args{strings.NewReader(cgroupMountsLegacy), true, ""}, + "", + true, + }, + { + "cgroupv1_missing_unified", + args{strings.NewReader(cgroupMountsUnified), false, "devices"}, + "", + true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := findCgroupMountPath(tt.args.r, tt.args.unified, tt.args.controller) + if (err != nil) != tt.wantErr { + t.Errorf("findCgroupMountPath() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("findCgroupMountPath() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_findMemoryCgroupPath(t *testing.T) { + tests := []struct { + name string + args io.Reader + want bool + want1 string + wantErr bool + }{ + { + "cgroupv2", + strings.NewReader("0::/user.slice/user-501.slice/session-92.scope"), + true, + "/user.slice/user-501.slice/session-92.scope", + false, + }, + { + "cgroupv1", + strings.NewReader(`2:cpuset:/ +11:pids:/user.slice/user-501.slice/session-2.scope +10:blkio:/user.slice +9:rdma:/ +8:freezer:/ +7:cpu,cpuacct:/user.slice +6:devices:/user.slice +5:net_cls,net_prio:/ +4:perf_event:/ +3:memory:/user.slice/user-501.slice/session-2.scope +2:hugetlb:/ +1:name=systemd:/user.slice/user-501.slice/session-2.scope`), + false, + "/user.slice/user-501.slice/session-2.scope", + false, + }, + { + "cgroupv1_hybrid", + strings.NewReader(`2:cpuset:/ +11:pids:/user.slice/user-501.slice/session-2.scope +10:blkio:/user.slice +9:rdma:/ +8:freezer:/ +7:cpu,cpuacct:/user.slice +6:devices:/user.slice +5:net_cls,net_prio:/ +4:perf_event:/ +3:memory:/user.slice/user-501.slice/session-2.scope +2:hugetlb:/ +1:name=systemd:/user.slice/user-501.slice/session-2.scope +0::/user.slice/user-501.slice/session-3.scope`), + false, + "/user.slice/user-501.slice/session-2.scope", + false, + }, + { + "cgroupv1_hybrid", + // this situation is artificial and I'm not sure it can happen in real life + strings.NewReader(`2:cpuset:/ +0::/user.slice/user-501.slice/session-3.scope +11:pids:/user.slice/user-501.slice/session-2.scope +10:blkio:/user.slice +9:rdma:/ +8:freezer:/ +7:cpu,cpuacct:/user.slice +6:devices:/user.slice +5:net_cls,net_prio:/ +4:perf_event:/ +3:memory:/user.slice/user-501.slice/session-2.scope +2:hugetlb:/ +1:name=systemd:/user.slice/user-501.slice/session-2.scope`), + false, + "/user.slice/user-501.slice/session-2.scope", + false, + }, + { + "empty", + strings.NewReader(""), + false, + "", + false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, got1, err := findMemoryCgroupPath(tt.args) + if (err != nil) != tt.wantErr { + t.Errorf("findMemoryCgroupPath() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("findMemoryCgroupPath() got = %v, want %v", got, tt.want) + } + if got1 != tt.want1 { + t.Errorf("findMemoryCgroupPath() got1 = %v, want %v", got1, tt.want1) + } + }) + } +}