Skip to content

Commit

Permalink
bugtool: add memory cgroup stats
Browse files Browse the repository at this point in the history
Retrieve the memory cgroup and copy memory stat files. Some of these
stats should be found in the metrics.

Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
  • Loading branch information
mtardy committed Sep 9, 2024
1 parent bfba5e9 commit 94022ca
Show file tree
Hide file tree
Showing 2 changed files with 340 additions and 1 deletion.
136 changes: 136 additions & 0 deletions pkg/bugtool/bugtool.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package bugtool

import (
"archive/tar"
"bufio"
"bytes"
"compress/gzip"
"context"
Expand Down Expand Up @@ -259,6 +260,7 @@ func doBugtool(info *InitInfo, outFname string) error {
si.dumpPolicyFilterMap(tarWriter)
si.addGrpcInfo(tarWriter)
si.addPmapOut(tarWriter)
si.addMemCgroupStats(tarWriter)
return nil
}

Expand Down Expand Up @@ -619,3 +621,137 @@ func (s bugtoolInfo) addPmapOut(tarWriter *tar.Writer) error {
s.execCmd(tarWriter, "pmap.out", pmap, "-x", fmt.Sprintf("%d", s.info.PID))
return nil
}

func findCgroupMountPath(r io.Reader, unified bool, controller string) (string, error) {
cgroupName := "cgroup"
if unified {
cgroupName = "cgroup2"
}

scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := scanner.Text()
fields := strings.Fields(line)
if len(fields) >= 3 && (fields[2] == cgroupName) {
if unified || !unified && strings.HasSuffix(fields[1], controller) {
return fields[1], nil
}
}
}

if err := scanner.Err(); err != nil {
return "", fmt.Errorf("error reading /proc/mounts: %v", err)
}

return "", fmt.Errorf("cgroup filesystem not found")
}

func FindCgroupMountPath(unified bool, controller string) (string, error) {
file, err := os.Open("/proc/mounts")
if err != nil {
return "", fmt.Errorf("failed to open /proc/mounts: %v", err)
}
defer file.Close()
return findCgroupMountPath(file, unified, controller)
}

func findMemoryCgroupPath(r io.Reader) (bool, string, error) {
var unified bool
var memoryCgroupPath string

scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := scanner.Text()

// '/proc/$PID/cgroup' lists a process's cgroup membership. If legacy cgroup is
// in use in the system, this file may contain multiple lines, one for each
// hierarchy. The entry for cgroup v2 is always in the format '0::$PATH'.
if strings.HasPrefix(line, "0::/") {
unified = true
memoryCgroupPath = strings.TrimPrefix(line, "0::")

// we don't break here because we want to consider cases in which
// cgroup v2 line is before other cgroup v1 lines and we want to
// consider hybrid as v1, not sure it can happen in real life
continue
}

// Parsing for cgroup v1, consider hybrid as v1
parts := strings.SplitN(line, ":", 3)
if len(parts) == 3 {
if parts[1] == "memory" {
unified = false
memoryCgroupPath = parts[2]
break
}
}
}

if err := scanner.Err(); err != nil {
return false, "", fmt.Errorf("failed reading /proc/self/cgroup: %w", err)
}

return unified, memoryCgroupPath, nil
}

func FindMemoryCgroupPath() (unified bool, memoryCgroupPath string, err error) {
file, err := os.Open("/proc/self/cgroup")
if err != nil {
return false, "", fmt.Errorf("failed to open /proc/self/cgroup: %w", err)
}
defer file.Close()
return findMemoryCgroupPath(file)
}

func (s bugtoolInfo) addMemCgroupStats(tarWriter *tar.Writer) error {
unifiedCgroup, memoryCgroupPath, err := FindMemoryCgroupPath()
if err != nil {
s.multiLog.WithError(err).Warn("failed finding the memory cgroup path")
return fmt.Errorf("failed to find memory cgroup path: %w", err)
}

cgroupMountPath, err := FindCgroupMountPath(unifiedCgroup, "memory")
if err != nil {
s.multiLog.WithError(err).Warn("failed to find cgroup mount path")
return fmt.Errorf("failed to find cgroup mount path: %w", err)
}

cgroupPath := filepath.Join(cgroupMountPath, memoryCgroupPath)

// can't use s.tarAddFile here unfortunately because it is using io.Copy
// based on the size retrieved from the stat of the file, and cgroup fs
// files have size equal to 0
readAndWrite := func(cgroupBasePath string, file string) error {
buf, err := os.ReadFile(filepath.Join(cgroupBasePath, file))
if err != nil {
s.multiLog.WithError(err).WithField("file", file).Warn("failed to read cgroup file")
return fmt.Errorf("failed to read file %s: %w", file, err)
}
err = s.tarAddBuff(tarWriter, file, bytes.NewBuffer(buf))
if err == nil {
s.multiLog.WithField("file", file).Info("cgroup file added")
return fmt.Errorf("failed to add buffer: %w", err)
}
return nil
}

if unifiedCgroup {
readAndWrite(cgroupPath, "memory.current")
readAndWrite(cgroupPath, "memory.stat")
} else {
err := readAndWrite(cgroupPath, "memory.usage_in_bytes")
if err != nil {
// Before cgroup namespace, /proc/pid/cgroup mapping was broken, so
// Docker back in the days mounted the cgroup hierarchy flat in the
// containerfs. For compatibility, it still does that for cgroup v1.
// See more https://lewisgaul.co.uk/blog/coding/2022/05/13/cgroups-intro/#cgroups-and-containers
cgroupPath = cgroupMountPath
s.multiLog.WithField("cgroupPath", cgroupPath).Info("retrying to read cgroup file from a different legacy path")
readAndWrite(cgroupPath, "memory.usage_in_bytes")
}
readAndWrite(cgroupPath, "memory.kmem.usage_in_bytes")
readAndWrite(cgroupPath, "memory.stat")
}

return nil
}
205 changes: 204 additions & 1 deletion pkg/bugtool/bugtool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
package bugtool

import (
"io"
"os"
"reflect"
"strings"
"testing"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -37,8 +39,209 @@ func TestSaveAndLoad(t *testing.T) {
}

if !reflect.DeepEqual(&info1, info2) {
t.Errorf("mismatching structures: %s vs %s", info1, info2)
t.Errorf("mismatching structures: %v vs %v", info1, info2)
}

t.Log("Success")
}

func Test_findCgroupMountPath(t *testing.T) {
const cgroupMountsHybrid = `tmpfs /sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,mode=755 0 0
cgroup2 /sys/fs/cgroup/unified cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate 0 0
cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,name=systemd 0 0
pstore /sys/fs/pstore pstore rw,nosuid,nodev,noexec,relatime 0 0
efivarfs /sys/firmware/efi/efivars efivarfs rw,nosuid,nodev,noexec,relatime 0 0
none /sys/fs/bpf bpf rw,nosuid,nodev,noexec,relatime,mode=700 0 0
cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
cgroup /sys/fs/cgroup/net_cls,net_prio cgroup rw,nosuid,nodev,noexec,relatime,net_cls,net_prio 0 0
cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct 0 0
cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
cgroup /sys/fs/cgroup/rdma cgroup rw,nosuid,nodev,noexec,relatime,rdma 0 0
cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
systemd-1 /proc/sys/fs/binfmt_misc autofs rw,relatime,fd=28,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=351 0 0`

const cgroupMountsLegacy = `tmpfs /sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,mode=755 0 0
efivarfs /sys/firmware/efi/efivars efivarfs rw,nosuid,nodev,noexec,relatime 0 0
none /sys/fs/bpf bpf rw,nosuid,nodev,noexec,relatime,mode=700 0 0
cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
cgroup /sys/fs/cgroup/net_cls,net_prio cgroup rw,nosuid,nodev,noexec,relatime,net_cls,net_prio 0 0
cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct 0 0
cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
cgroup /sys/fs/cgroup/rdma cgroup rw,nosuid,nodev,noexec,relatime,rdma 0 0
cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
systemd-1 /proc/sys/fs/binfmt_misc autofs rw,relatime,fd=28,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=351 0 0`

const cgroupMountsUnified = `tmpfs /dev/shm tmpfs rw,nosuid,nodev,inode64 0 0
tmpfs /run/lock tmpfs rw,nosuid,nodev,noexec,relatime,size=5120k,inode64 0 0
cgroup2 /sys/fs/cgroup cgroup2 rw,nosuid,nodev,noexec,relatime 0 0
pstore /sys/fs/pstore pstore rw,nosuid,nodev,noexec,relatime 0 0
bpf /sys/fs/bpf bpf rw,nosuid,nodev,noexec,relatime,mode=700 0 0`

type args struct {
r io.Reader
unified bool
controller string
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
"cgroupv1_hybrid",
args{strings.NewReader(cgroupMountsHybrid), false, "memory"},
"/sys/fs/cgroup/memory",
false,
},
{
"cgroupv2_hybrid",
args{strings.NewReader(cgroupMountsHybrid), true, ""},
"/sys/fs/cgroup/unified",
false,
},
{
"cgroupv2",
args{strings.NewReader(cgroupMountsUnified), true, ""},
"/sys/fs/cgroup",
false,
},
{
"cgroupv1",
args{strings.NewReader(cgroupMountsLegacy), false, "freezer"},
"/sys/fs/cgroup/freezer",
false,
},
{
"cgroupv2_missing_legacy",
args{strings.NewReader(cgroupMountsLegacy), true, ""},
"",
true,
},
{
"cgroupv1_missing_unified",
args{strings.NewReader(cgroupMountsUnified), false, "devices"},
"",
true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := findCgroupMountPath(tt.args.r, tt.args.unified, tt.args.controller)
if (err != nil) != tt.wantErr {
t.Errorf("findCgroupMountPath() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("findCgroupMountPath() = %v, want %v", got, tt.want)
}
})
}
}

func Test_findMemoryCgroupPath(t *testing.T) {
tests := []struct {
name string
args io.Reader
want bool
want1 string
wantErr bool
}{
{
"cgroupv2",
strings.NewReader("0::/user.slice/user-501.slice/session-92.scope"),
true,
"/user.slice/user-501.slice/session-92.scope",
false,
},
{
"cgroupv1",
strings.NewReader(`2:cpuset:/
11:pids:/user.slice/user-501.slice/session-2.scope
10:blkio:/user.slice
9:rdma:/
8:freezer:/
7:cpu,cpuacct:/user.slice
6:devices:/user.slice
5:net_cls,net_prio:/
4:perf_event:/
3:memory:/user.slice/user-501.slice/session-2.scope
2:hugetlb:/
1:name=systemd:/user.slice/user-501.slice/session-2.scope`),
false,
"/user.slice/user-501.slice/session-2.scope",
false,
},
{
"cgroupv1_hybrid",
strings.NewReader(`2:cpuset:/
11:pids:/user.slice/user-501.slice/session-2.scope
10:blkio:/user.slice
9:rdma:/
8:freezer:/
7:cpu,cpuacct:/user.slice
6:devices:/user.slice
5:net_cls,net_prio:/
4:perf_event:/
3:memory:/user.slice/user-501.slice/session-2.scope
2:hugetlb:/
1:name=systemd:/user.slice/user-501.slice/session-2.scope
0::/user.slice/user-501.slice/session-3.scope`),
false,
"/user.slice/user-501.slice/session-2.scope",
false,
},
{
"cgroupv1_hybrid",
// this situation is artificial and I'm not sure it can happen in real life
strings.NewReader(`2:cpuset:/
0::/user.slice/user-501.slice/session-3.scope
11:pids:/user.slice/user-501.slice/session-2.scope
10:blkio:/user.slice
9:rdma:/
8:freezer:/
7:cpu,cpuacct:/user.slice
6:devices:/user.slice
5:net_cls,net_prio:/
4:perf_event:/
3:memory:/user.slice/user-501.slice/session-2.scope
2:hugetlb:/
1:name=systemd:/user.slice/user-501.slice/session-2.scope`),
false,
"/user.slice/user-501.slice/session-2.scope",
false,
},
{
"empty",
strings.NewReader(""),
false,
"",
false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, got1, err := findMemoryCgroupPath(tt.args)
if (err != nil) != tt.wantErr {
t.Errorf("findMemoryCgroupPath() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("findMemoryCgroupPath() got = %v, want %v", got, tt.want)
}
if got1 != tt.want1 {
t.Errorf("findMemoryCgroupPath() got1 = %v, want %v", got1, tt.want1)
}
})
}
}

0 comments on commit 94022ca

Please sign in to comment.