Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cpu/windows] Switch to performance counters #192

Merged
merged 30 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
bd97f54
inital commit, add pdh
VihasMakwana Nov 12, 2024
5b43db1
chore: windows pdh update
VihasMakwana Nov 18, 2024
fdb61e0
chore: windows pdh update
VihasMakwana Nov 18, 2024
f67904a
chore: windows pdh update
VihasMakwana Nov 18, 2024
76d90ab
chore: fix CI
VihasMakwana Nov 22, 2024
1521ccb
go.mod
VihasMakwana Nov 22, 2024
67086a2
lint
VihasMakwana Nov 22, 2024
f1c2689
chore: remove fallback
VihasMakwana Nov 22, 2024
3b4b69a
lint
VihasMakwana Nov 22, 2024
695ac10
chore: remove redundant
VihasMakwana Nov 22, 2024
1bf1d83
go.mod
VihasMakwana Nov 22, 2024
a5781e0
chore: memory improvements
VihasMakwana Nov 22, 2024
ea78b1d
chore: use rawCounterArray
VihasMakwana Nov 23, 2024
7955534
chore: clean up
VihasMakwana Nov 25, 2024
7692d3a
go.mod
VihasMakwana Nov 26, 2024
6150435
chore: error handling
VihasMakwana Nov 26, 2024
f0d2015
chore: test
VihasMakwana Nov 26, 2024
45028a5
lint
VihasMakwana Nov 26, 2024
ffce161
go.sum
VihasMakwana Nov 26, 2024
d7c3ff5
chore: add flag
VihasMakwana Nov 26, 2024
c7b6e03
chore: use init()
VihasMakwana Nov 29, 2024
35a0aa4
chore: comments
VihasMakwana Nov 29, 2024
491ed8f
move away from init
VihasMakwana Dec 3, 2024
3d0e745
Merge branch 'main' into pdh-windows-integrated
VihasMakwana Dec 3, 2024
eb83e60
chore: merge conflicts
VihasMakwana Dec 3, 2024
43fb0a3
fix tests
VihasMakwana Dec 5, 2024
8d35e55
fix: goimports
VihasMakwana Dec 5, 2024
22f8e5d
rename WithWindowsPerformanceCounter
VihasMakwana Dec 5, 2024
2f6a551
chore: improve readability
VihasMakwana Dec 6, 2024
d51bfa4
chore: update go.mod
VihasMakwana Dec 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.22.8

require (
github.com/docker/docker v26.1.5+incompatible
github.com/elastic/elastic-agent-libs v0.9.13
github.com/elastic/elastic-agent-libs v0.17.4-0.20241126154321-6ed75416832d
github.com/elastic/go-licenser v0.4.2
github.com/elastic/go-structform v0.0.9
github.com/elastic/go-sysinfo v1.14.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKoh
github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/elastic/elastic-agent-libs v0.9.13 h1:D1rh1s67zlkDWmixWQaNWzn+qy6DafIDPTQnLpBNBUA=
github.com/elastic/elastic-agent-libs v0.9.13/go.mod h1:G9ljFvDE+muOOOQBf2eRituF0fE4suGkv25rfjTwY+c=
github.com/elastic/elastic-agent-libs v0.17.4-0.20241126154321-6ed75416832d h1:nY8LSeTYU1uSDAAg7WwGH/cALgdovAXLdIzV25Ky0Bo=
github.com/elastic/elastic-agent-libs v0.17.4-0.20241126154321-6ed75416832d/go.mod h1:5CR02awPrBr+tfmjBBK+JI+dMmHNQjpVY24J0wjbC7M=
github.com/elastic/go-licenser v0.4.2 h1:bPbGm8bUd8rxzSswFOqvQh1dAkKGkgAmrPxbUi+Y9+A=
github.com/elastic/go-licenser v0.4.2/go.mod h1:W8eH6FaZDR8fQGm+7FnVa7MxI1b/6dAqxz+zPB8nm5c=
github.com/elastic/go-structform v0.0.9 h1:HpcS7xljL4kSyUfDJ8cXTJC6rU5ChL1wYb6cx3HLD+o=
Expand Down
51 changes: 51 additions & 0 deletions metric/cpu/metric_windows_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//go:build windows

package cpu

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestCounterLength(t *testing.T) {
query, err := buildQuery()
mauri870 marked this conversation as resolved.
Show resolved Hide resolved
require.NoError(t, err)
require.NoError(t, query.CollectData())

kernelRawData, err := query.GetRawCounterArray(totalKernelTimeCounter, true)
require.NoError(t, err)

idleRawData, err := query.GetRawCounterArray(totalIdleTimeCounter, true)
require.NoError(t, err)

userRawData, err := query.GetRawCounterArray(totalUserTimeCounter, true)
require.NoError(t, err)

require.Equal(t, len(kernelRawData), len(idleRawData))
require.Equal(t, len(userRawData), len(idleRawData))

for i := 0; i < len(userRawData); i++ {
require.Equal(t, userRawData[i].InstanceName, kernelRawData[i].InstanceName, "InstanceName should be equal")
}
for i := 0; i < len(kernelRawData); i++ {
require.Equal(t, kernelRawData[i].InstanceName, idleRawData[i].InstanceName, "InstanceName should be equal")
}
}
21 changes: 16 additions & 5 deletions metric/cpu/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,18 @@ The below code implements a "metrics tracker" that gives us the ability to
calculate CPU percentages, as we average usage across a time period.
*/

type option struct {
usePerformanceCounter bool
}

type OptionFunc func(*option)

func WithPerformanceCounter() OptionFunc {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume performance counters is just a windows thing? If so, we could add a comment at the top of this function that states that it only applies to windows.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup. That's better

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Putting Windows in the option name would be even more obvious.

Also, consider that we are probably going to want to make this the default once we are satisfied with it. Probably just deleting the option would work once we do that but something to consider for the near future.

return func(o *option) {
o.usePerformanceCounter = true
}
}

// Monitor is used to monitor the overall CPU usage of the system over time.
type Monitor struct {
lastSample CPUMetrics
Expand All @@ -98,8 +110,8 @@ func New(hostfs resolve.Resolver) *Monitor {

// Fetch collects a new sample of the CPU usage metrics.
// This will overwrite the currently stored samples.
func (m *Monitor) Fetch() (Metrics, error) {
metric, err := Get(m.Hostfs)
func (m *Monitor) Fetch(opts ...OptionFunc) (Metrics, error) {
metric, err := Get(m.Hostfs, opts...)
if err != nil {
return Metrics{}, fmt.Errorf("error fetching CPU metrics: %w", err)
}
Expand All @@ -112,9 +124,8 @@ func (m *Monitor) Fetch() (Metrics, error) {

// FetchCores collects a new sample of CPU usage metrics per-core
// This will overwrite the currently stored samples.
func (m *Monitor) FetchCores() ([]Metrics, error) {

metric, err := Get(m.Hostfs)
func (m *Monitor) FetchCores(opts ...OptionFunc) ([]Metrics, error) {
metric, err := Get(m.Hostfs, opts...)
if err != nil {
return nil, fmt.Errorf("error fetching CPU metrics: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion metric/cpu/metrics_aix.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func tick2msec(val uint64) uint64 {
}

// Get returns a metrics object for CPU data
func Get(_ resolve.Resolver) (CPUMetrics, error) {
func Get(_ resolve.Resolver, _ ...OptionFunc) (CPUMetrics, error) {

totals, err := getCPUTotals()
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion metric/cpu/metrics_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
)

// Get is the Darwin implementation of Get
func Get(_ resolve.Resolver) (CPUMetrics, error) {
func Get(_ resolve.Resolver, _ ...OptionFunc) (CPUMetrics, error) {
// We're using the gopsutil library here.
// The code used by both gosigar and go-sysinfo appears to be
// the same code as gopsutil, including copy-pasted comments.
Expand Down
2 changes: 1 addition & 1 deletion metric/cpu/metrics_openbsd.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ import (
)

// Get is the OpenBSD implementation of get
func Get(_ resolve.Resolver) (CPUMetrics, error) {
func Get(_ resolve.Resolver, _ ...OptionFunc) (CPUMetrics, error) {

// see man 2 sysctl
loadGlobal := [C.CPUSTATES]C.long{
Expand Down
2 changes: 1 addition & 1 deletion metric/cpu/metrics_procfs_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
)

// Get returns a metrics object for CPU data
func Get(procfs resolve.Resolver) (CPUMetrics, error) {
func Get(procfs resolve.Resolver, _ ...OptionFunc) (CPUMetrics, error) {
path := procfs.ResolveHostFS("/proc/stat")
fd, err := os.Open(path)
defer func() {
Expand Down
86 changes: 85 additions & 1 deletion metric/cpu/metrics_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,97 @@ import (
"fmt"
"time"

"github.com/elastic/elastic-agent-libs/helpers/windows/pdh"
"github.com/elastic/elastic-agent-libs/opt"
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
"github.com/elastic/gosigar/sys/windows"
)

var (
processorInformationCounter = "\\Processor Information(%s)\\%s"
VihasMakwana marked this conversation as resolved.
Show resolved Hide resolved
totalKernelTimeCounter = fmt.Sprintf(processorInformationCounter, "*", "% Privileged Time")
totalIdleTimeCounter = fmt.Sprintf(processorInformationCounter, "*", "% Idle Time")
totalUserTimeCounter = fmt.Sprintf(processorInformationCounter, "*", "% User Time")
)

var query, qError = buildQuery()

// Get fetches Windows CPU system times
func Get(_ resolve.Resolver) (CPUMetrics, error) {
func Get(_ resolve.Resolver, opts ...OptionFunc) (CPUMetrics, error) {
op := option{}
for _, o := range opts {
o(&op)
}
if !op.usePerformanceCounter {
return defaultGet()
}
globalMetrics := CPUMetrics{}
if qError != nil {
return globalMetrics, qError
}

if err := query.CollectData(); err != nil {
return globalMetrics, err
}

kernelRawData, err := query.GetRawCounterArray(totalKernelTimeCounter, true)
if err != nil {
return globalMetrics, fmt.Errorf("error calling GetRawCounterArray for kernel counter: %w", err)
}
idleRawData, err := query.GetRawCounterArray(totalIdleTimeCounter, true)
if err != nil {
return globalMetrics, fmt.Errorf("error calling GetRawCounterArray for idle counter: %w", err)
}
userRawData, err := query.GetRawCounterArray(totalUserTimeCounter, true)
if err != nil {
return globalMetrics, fmt.Errorf("error calling GetRawCounterArray for user counter: %w", err)
}
var idle, kernel, user time.Duration
globalMetrics.list = make([]CPU, len(userRawData))
for i := 0; i < len(globalMetrics.list); i++ {
// The values returned by GetRawCounterArray are of equal length and are sorted by instance names.
// For CPU core {i}, idleRawData[i], kernelRawData[i], and userRawData[i] correspond to the idle time, kernel time, and user time, respectively.

// values returned by counter are in 100-ns intervals. Hence, convert it to millisecond.
idleTime := time.Duration(idleRawData[i].RawValue.FirstValue*100) / time.Millisecond
kernelTime := time.Duration(kernelRawData[i].RawValue.FirstValue*100) / time.Millisecond
userTime := time.Duration(userRawData[i].RawValue.FirstValue*100) / time.Millisecond

globalMetrics.list[i].Idle = opt.UintWith(uint64(idleTime))
globalMetrics.list[i].Sys = opt.UintWith(uint64(kernelTime))
globalMetrics.list[i].User = opt.UintWith(uint64(userTime))

// add the per-cpu time to track the total time spent by system
idle += idleTime
kernel += kernelTime
user += userTime
}

globalMetrics.totals.Idle = opt.UintWith(uint64(idle))
globalMetrics.totals.Sys = opt.UintWith(uint64(kernel))
globalMetrics.totals.User = opt.UintWith(uint64(user))

return globalMetrics, nil
}

func buildQuery() (pdh.Query, error) {
var q pdh.Query
if err := q.Open(); err != nil {
return q, fmt.Errorf("failed to open query: %w", err)
}
if err := q.AddCounter(totalKernelTimeCounter, "", "", true, true); err != nil {
return q, fmt.Errorf("error calling AddCounter for kernel counter: %w", err)
}
if err := q.AddCounter(totalUserTimeCounter, "", "", true, true); err != nil {
return q, fmt.Errorf("error calling AddCounter for user counter: %w", err)
}
if err := q.AddCounter(totalIdleTimeCounter, "", "", true, true); err != nil {
return q, fmt.Errorf("error calling AddCounter for idle counter: %w", err)
}
return q, nil
}

func defaultGet() (CPUMetrics, error) {
idle, kernel, user, err := windows.GetSystemTimes()
if err != nil {
return CPUMetrics{}, fmt.Errorf("call to GetSystemTimes failed: %w", err)
Expand Down
Loading