Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: better rolling latency #2426

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fixtures/datasources/mssql_pass.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ metadata:
spec:
schedule: "@every 5m"
mssql:
- url: "server=mssql.canaries.svc.cluster.local;user id=$(username);password=$(password);port=1433;database=master"
- url: "server=mssql.canaries.svc.cluster.local;user id=$(username);password=$(password);port=1433;database=master;TrustServerCertificate=True"
name: mssql pass
username:
value: sa
Expand Down
8 changes: 1 addition & 7 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ require (
go.opentelemetry.io/otel/trace v1.29.0
golang.org/x/net v0.32.0
golang.org/x/sync v0.10.0
gonum.org/v1/gonum v0.15.1
google.golang.org/genproto v0.0.0-20240903143218-8af14fe29dc1
google.golang.org/grpc v1.66.2
gopkg.in/flanksource/yaml.v3 v3.2.3
Expand Down Expand Up @@ -131,11 +132,9 @@ require (
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.27.0 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.31.0 // indirect
github.com/aws/smithy-go v1.21.0 // indirect
github.com/bahlo/generic-list-go v0.2.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/bmatcuk/doublestar/v4 v4.7.1 // indirect
github.com/buger/jsonparser v1.1.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cert-manager/cert-manager v1.16.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
Expand Down Expand Up @@ -214,7 +213,6 @@ require (
github.com/hirochachacha/go-smb2 v1.1.0 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/invopop/jsonschema v0.12.0 // indirect
github.com/itchyny/gojq v0.12.17 // indirect
github.com/itchyny/timefmt-go v0.1.6 // indirect
github.com/jackc/chunkreader/v2 v2.0.1 // indirect
Expand Down Expand Up @@ -297,15 +295,11 @@ require (
github.com/valyala/fasttemplate v1.2.2 // indirect
github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
github.com/xdg-go/pbkdf2 v1.0.0 // indirect
github.com/xdg-go/scram v1.1.2 // indirect
github.com/xdg-go/stringprep v1.0.4 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
github.com/xlab/treeprint v1.2.0 // indirect
github.com/youmark/pkcs8 v0.0.0-20240424034433-3c2c7870ae76 // indirect
Expand Down
16 changes: 2 additions & 14 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -767,8 +767,6 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.31.0/go.mod h1:yMWe0F+XG0DkRZK5ODZhG
github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/aws/smithy-go v1.21.0 h1:H7L8dtDRk0P1Qm6y0ji7MCYMQObJ5R9CRpyPhRUkLYA=
github.com/aws/smithy-go v1.21.0/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg=
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
Expand All @@ -782,8 +780,6 @@ github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl
github.com/boombuler/barcode v1.0.1/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
github.com/bwesterb/go-ristretto v1.2.3/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0=
github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 h1:6lhrsTEnloDPXyeZBvSYvQf8u86jbKehZPVDDlkgDl4=
github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M=
Expand Down Expand Up @@ -1213,8 +1209,6 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/invopop/jsonschema v0.12.0 h1:6ovsNSuvn9wEQVOyc72aycBMVQFKz7cPdMJn10CvzRI=
github.com/invopop/jsonschema v0.12.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
github.com/itchyny/gojq v0.12.13/go.mod h1:JzwzAqenfhrPUuwbmEz3nu3JQmFLlQTQMUcOdnu/Sf4=
github.com/itchyny/gojq v0.12.17 h1:8av8eGduDb5+rvEdaOO+zQUjA04MS0m3Ps8HiD+fceg=
github.com/itchyny/gojq v0.12.17/go.mod h1:WBrEMkgAfAGO1LUcGOckBl5O726KPp+OlkKug0I/FEY=
Expand Down Expand Up @@ -1676,8 +1670,6 @@ github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IU
github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok=
github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g=
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM=
Expand All @@ -1688,12 +1680,6 @@ github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8=
github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
github.com/xhit/go-str2duration v1.2.0/go.mod h1:3cPSlfZlUHVlneIVfePFWcJZsuwf+P1v2SRTV4cUmp4=
github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
Expand Down Expand Up @@ -2251,6 +2237,8 @@ gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJ
gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0=
gonum.org/v1/gonum v0.11.0/go.mod h1:fSG4YDCxxUZQJ7rKsQrj0gMOg00Il0Z96/qMA4bVQhA=
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=
Expand Down
83 changes: 77 additions & 6 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@ package metrics

import (
"slices"
"sync"
"time"

"github.com/asecurityteam/rolling"
v1 "github.com/flanksource/canary-checker/api/v1"
"github.com/flanksource/canary-checker/pkg"
"github.com/flanksource/canary-checker/pkg/runner"
"gonum.org/v1/gonum/stat"

"github.com/flanksource/duty/context"
"github.com/flanksource/duty/types"
Expand Down Expand Up @@ -156,7 +158,9 @@ func GetMetrics(key string) (uptime types.Uptime, latency types.Latency) {

lat, ok := latencies.Get(key)
if ok {
latency = types.Latency{Rolling1H: lat.(*rolling.TimePolicy).Reduce(rolling.Percentile(95))}
lb := lat.(*LatencyBuffer)
p95, _ := lb.GetStats()
latency = types.Latency{Rolling1H: p95}
}
return
}
Expand Down Expand Up @@ -194,7 +198,8 @@ func Record(
severity := canary.Spec.Severity
// We are recording aggreated metrics at the canary level, not the individual check level
key := canary.GetCheckID(result.Check.GetName())
var fail, pass, latency *rolling.TimePolicy
var fail, pass *rolling.TimePolicy
var latency *LatencyBuffer

_fail, ok := failed.Get(key)
if !ok {
Expand All @@ -214,10 +219,10 @@ func Record(

_latencyV, ok := latencies.Get(key)
if !ok {
latency = rolling.NewTimePolicy(rolling.NewWindow(3600), time.Second)
latency = NewLatencyBuffer(2*time.Minute, 1*time.Hour)
latencies.Set(key, latency)
} else {
latency = _latencyV.(*rolling.TimePolicy)
latency = _latencyV.(*LatencyBuffer)
}

var additionalLabels []string
Expand All @@ -238,7 +243,7 @@ func Record(

if result.Duration > 0 {
RequestLatency.WithLabelValues(checkMetricLabels...).Observe(float64(result.Duration))
latency.Append(float64(result.Duration))
latency.Add(float64(result.Duration))
}

gaugeLabels := append([]string{key, checkType, canaryName, canaryNamespace, name}, v1.AdditionalCheckMetricLabels...)
Expand Down Expand Up @@ -293,7 +298,8 @@ func Record(

_uptime = types.Uptime{Passed: int(pass.Reduce(rolling.Sum)), Failed: int(fail.Reduce(rolling.Sum))}
if latency != nil {
_latency = types.Latency{Rolling1H: latency.Reduce(rolling.Percentile(95))}
p95, _ := latency.GetStats()
_latency = types.Latency{Rolling1H: p95}
} else {
_latency = types.Latency{}
}
Expand Down Expand Up @@ -405,3 +411,68 @@ func UnregisterGauge(ctx context.Context, checkIDs []string) {
Gauge.DeletePartialMatch(prometheus.Labels{"key": checkID})
}
}

type LatencyBuffer struct {
buffers [][]float64
current int
bufferSize int
maxBuffers int
interval time.Duration
lastRotation time.Time
mu sync.RWMutex
}

func NewLatencyBuffer(bufferInterval time.Duration, totalDuration time.Duration) *LatencyBuffer {
maxBuffers := int(totalDuration / bufferInterval)
return &LatencyBuffer{
buffers: make([][]float64, maxBuffers),
current: 0,
bufferSize: 1000,
maxBuffers: maxBuffers,
interval: bufferInterval,
lastRotation: time.Now(),
}
}

func (lb *LatencyBuffer) Add(latency float64) {
lb.mu.Lock()
defer lb.mu.Unlock()

now := time.Now()
intervalsPassed := int(now.Sub(lb.lastRotation) / lb.interval)

if intervalsPassed > 0 {
// Rotate the buffer for each interval that has passed
for i := 0; i < intervalsPassed; i++ {
lb.current = (lb.current + 1) % lb.maxBuffers
lb.buffers[lb.current] = make([]float64, 0, lb.bufferSize)
}
lb.lastRotation = lb.lastRotation.Add(time.Duration(intervalsPassed) * lb.interval)
}

if lb.buffers[lb.current] == nil {
lb.buffers[lb.current] = make([]float64, 0, lb.bufferSize)
}
lb.buffers[lb.current] = append(lb.buffers[lb.current], latency)
}

func (lb *LatencyBuffer) GetStats() (p95, p99 float64) {
lb.mu.RLock()
defer lb.mu.RUnlock()

var allLatencies []float64
for _, buffer := range lb.buffers {
if buffer != nil {
allLatencies = append(allLatencies, buffer...)
}
}

if len(allLatencies) == 0 {
return 0, 0
}

slices.Sort(allLatencies)
p95 = stat.Quantile(0.95, stat.Empirical, allLatencies, nil)
p99 = stat.Quantile(0.99, stat.Empirical, allLatencies, nil)
return
}
Loading