Skip to content

Commit

Permalink
fix: metrics for status and duration (#40)
Browse files Browse the repository at this point in the history
* fix: metrics for status and duration

* rollback old samples

* fix unit tests

* unit test all

Signed-off-by: ibraheem Al Saady <ibraheem@kuptan.io>
Co-authored-by: ibraheem Al Saady <ibraheem@kuptan.io>
  • Loading branch information
IbraheemAlSaady and IbraheemAlSaady authored Aug 12, 2022
1 parent cdd3382 commit dd9dfb2
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 39 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ build: generate fmt vet ## Build manager binary.

.PHONY: run
run: manifests generate fmt vet ## Run a controller from your host.
go run ./main.go
go run ./main.go --requeue-job-watch=5s

.PHONY: docker-build
docker-build: test ## Build docker image with the manager.
Expand Down
1 change: 0 additions & 1 deletion api/v1alpha1/terraform_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ const (
RunStarted TerraformRunStatus = "Started"
RunRunning TerraformRunStatus = "Running"
RunCompleted TerraformRunStatus = "Completed"
RunDestroyed TerraformRunStatus = "Destroyed"
RunFailed TerraformRunStatus = "Failed"
RunWaitingForDependency TerraformRunStatus = "WaitingForDependency"
RunDeleted TerraformRunStatus = "Deleted"
Expand Down
13 changes: 0 additions & 13 deletions config/samples/role-terraform-runner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,3 @@ rules:
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create", "update", "watch", "get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: terraform-runner
subjects:
- kind: ServiceAccount
name: terraform-runner # name of your service account
namespace: default # this is the namespace your service account is in
roleRef: # referring to your ClusterRole
kind: ClusterRole
name: terraform-runner
apiGroup: rbac.authorization.k8s.io
26 changes: 8 additions & 18 deletions controllers/terraform_controller_operation.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,14 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
)

var (
dateFormat string = "2006-01-02 15:04:05"
)

func (r *TerraformReconciler) updateRunStatus(ctx context.Context, run *v1alpha1.Terraform, status v1alpha1.TerraformRunStatus) {
run.Status.RunStatus = status

if status != v1alpha1.RunStarted {
if status == v1alpha1.RunCompleted || status == v1alpha1.RunFailed {
run.Status.CompletionTime = time.Now().Format(time.UnixDate)
r.MetricsRecorder.RecordStatus(run.Name, run.Namespace, status)
}

if status == v1alpha1.RunCompleted || status == v1alpha1.RunDestroyed {
run.Status.CompletionTime = time.Now().Format(dateFormat)
}

if err := r.Status().Update(ctx, run); err != nil {
r.Log.Error(err, "failed to update status")
}
Expand All @@ -38,7 +31,6 @@ func (r *TerraformReconciler) handleRunCreate(ctx context.Context, run *v1alpha1
dependencies, err := r.checkDependencies(ctx, *run)

run.Status.ObservedGeneration = run.Generation
run.Status.StartedTime = time.Now().Format(dateFormat)

if err != nil {
if !run.IsWaiting() {
Expand Down Expand Up @@ -72,6 +64,8 @@ func (r *TerraformReconciler) handleRunCreate(ctx context.Context, run *v1alpha1
}

run.Status.OutputSecretName = run.GetOutputSecretName()
run.Status.StartedTime = time.Now().Format(time.UnixDate)

r.updateRunStatus(ctx, run, v1alpha1.RunStarted)

return ctrl.Result{}, nil
Expand Down Expand Up @@ -105,17 +99,13 @@ func (r *TerraformReconciler) handleRunJobWatch(ctx context.Context, run *v1alph
return ctrl.Result{}, err
}

startTime, err := time.Parse(dateFormat, run.Status.StartedTime)
startTime, err := time.Parse(time.UnixDate, run.Status.StartedTime)

if err != nil {
r.Log.Error(err, "failed to parse status started at to time")
r.Log.Error(err, "failed to parse workflow start time")
}

defer r.MetricsRecorder.RecordDuration(
run.Name,
run.Namespace,
startTime,
)
defer r.MetricsRecorder.RecordDuration(run.Name, run.Namespace, startTime)

// job hasn't started
if job.Status.Active == 0 && job.Status.Succeeded == 0 && job.Status.Failed == 0 {
Expand All @@ -135,7 +125,7 @@ func (r *TerraformReconciler) handleRunJobWatch(ctx context.Context, run *v1alph

// job is successful
if job.Status.Succeeded > 0 {
r.Log.Info("terraform run job completed successfully, performing a cleanup on resources")
r.Log.Info("terraform run job completed successfully")

if run.Spec.DeleteCompletedJobs {
r.Log.Info("deleting completed job")
Expand Down
6 changes: 3 additions & 3 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func NewRecorder() RecorderInterface {
Name: "tfo_workflow_status",
Help: "The current status of a Terraform workflow/run resource reconciliation.",
},
[]string{"name", "namespace", "status"},
[]string{"name", "namespace"},
),
durationHistogram: prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Expand Down Expand Up @@ -70,11 +70,11 @@ func (r *Recorder) RecordTotal(name string, namespace string) {
func (r *Recorder) RecordStatus(name string, namespace string, status v1alpha1.TerraformRunStatus) {
var value float64

if status == v1alpha1.RunDeleted {
if status == v1alpha1.RunFailed {
value = 1
}

r.statusGauge.WithLabelValues(name, namespace, string(status)).Set(value)
r.statusGauge.WithLabelValues(name, namespace).Set(value)
}

// RecordDuration records the duration since start for the given ref.
Expand Down
6 changes: 3 additions & 3 deletions internal/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ var _ = Describe("Metrics Recorder", func() {
})

Context("Recording Status", func() {
It("should record the status metric", func() {
It("should record the completed status", func() {
rec.RecordStatus(name, namespace, v1alpha1.RunCompleted)

var (
Expand All @@ -59,8 +59,8 @@ var _ = Describe("Metrics Recorder", func() {
Expect(metricFamilies[0].Metric[0].Gauge.Value).To(Equal(&value))
})

It("should record the deleted status", func() {
rec.RecordStatus(name, namespace, v1alpha1.RunDeleted)
It("should record the failed status", func() {
rec.RecordStatus(name+"failed", namespace, v1alpha1.RunFailed)

var (
value float64 = 1.0
Expand Down

0 comments on commit dd9dfb2

Please sign in to comment.