diff --git a/.gitignore b/.gitignore index b997e51..7612327 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ *.so *.dylib testbin/* +active-monitor-controller # Temporary or metadata files *.yaml-e diff --git a/Dockerfile-local b/Dockerfile-local new file mode 100644 index 0000000..38aa473 --- /dev/null +++ b/Dockerfile-local @@ -0,0 +1,6 @@ +# Use distroless as minimal base image to package the manager binary +# Refer to https://github.com/GoogleContainerTools/distroless for more details +FROM gcr.io/distroless/static:latest +WORKDIR / +COPY active-monitor-controller . +ENTRYPOINT [ "/active-monitor-controller" ] diff --git a/Makefile b/Makefile index b459599..de305e1 100644 --- a/Makefile +++ b/Makefile @@ -72,6 +72,10 @@ test: manifests generate fmt vet envtest ## Run tests. build: manifests generate fmt vet ## Build manager binary. go build -o bin/manager cmd/main.go +.PHONY: build-amd64 +build: manifests generate fmt vet ## Build manager binary. + CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o active-monitor-controller cmd/main.go + .PHONY: run run: manifests generate fmt vet ## Run a controller from your host. go run ./cmd/main.go @@ -83,6 +87,10 @@ run: manifests generate fmt vet ## Run a controller from your host. docker-build: ## Build docker image with the manager. $(CONTAINER_TOOL) build -t ${IMG} . +.PHONY: docker-build-local +docker-build: ## Build docker image with the manager. + $(CONTAINER_TOOL) build -t ${IMG} -f Dockerfile-local . + .PHONY: docker-push docker-push: ## Push docker image with the manager. $(CONTAINER_TOOL) push ${IMG} diff --git a/internal/controllers/healthcheck_controller.go b/internal/controllers/healthcheck_controller.go index 00dac58..cf97c4d 100644 --- a/internal/controllers/healthcheck_controller.go +++ b/internal/controllers/healthcheck_controller.go @@ -67,6 +67,8 @@ const ( PodGCOnPodCompletion = "OnPodCompletion" WfManagedByLabelKey = "workflows.argoproj.io/managed-by" WfManagedByValue = "active-monitor" + + TenMinsRequeueAfter = 10 * time.Minute ) var ( @@ -191,6 +193,8 @@ func (r *HealthCheckReconciler) processHealthCheck(ctx context.Context, log logr finishedAtTime = healthCheck.Status.FinishedAt.Time.Unix() } + log.Info("FinishedAtTime", "finishedAtTime", finishedAtTime) + // workflows can be paused by setting repeatAfterSec to <= 0 and not specifying the schedule for cron. if hcSpec.RepeatAfterSec <= 0 && hcSpec.Schedule.Cron == "" { log.Info("Workflow will be skipped due to repeatAfterSec value", "repeatAfterSec", hcSpec.RepeatAfterSec) @@ -217,8 +221,8 @@ func (r *HealthCheckReconciler) processHealthCheck(ctx context.Context, log logr // we need to update the spec so have to healthCheck.Spec.RepeatAfterSec instead of local variable hcSpec healthCheck.Spec.RepeatAfterSec = int(schedule.Next(time.Now()).Sub(time.Now())/time.Second) + 1 log.Info("spec.RepeatAfterSec value is set", "RepeatAfterSec", healthCheck.Spec.RepeatAfterSec) - } else if int(time.Now().Unix()-finishedAtTime) < hcSpec.RepeatAfterSec { - log.Info("Workflow already executed", "finishedAtTime", finishedAtTime) + } else if int(time.Now().Unix()-finishedAtTime) < hcSpec.RepeatAfterSec && r.RepeatTimersByName[healthCheck.GetName()] != nil { + log.Info("Workflow already executed, and there is repeat schedule has been added to RepeatTimersByName map", "finishedAtTime", finishedAtTime) return ctrl.Result{}, nil } @@ -421,18 +425,25 @@ func (r *HealthCheckReconciler) deleteRBACForWorkflow(ctx context.Context, log l // this function exists to assist with how a function called by the timer.AfterFunc() method operates to call a // function which takes parameters, it is easiest to build this closure which holds access to the parameters we need. // the helper returns a function object taking no parameters directly, this is what we want to give AfterFunc -func (r *HealthCheckReconciler) createSubmitWorkflowHelper(ctx context.Context, log logr.Logger, wfNamespace string, hc *activemonitorv1alpha1.HealthCheck) func() { +func (r *HealthCheckReconciler) createSubmitWorkflowHelper(ctx context.Context, log logr.Logger, wfNamespace string, prevHealthCheck *activemonitorv1alpha1.HealthCheck) func() { return func() { log.Info("Creating and Submitting Workflow...") - wfName, err := r.createSubmitWorkflow(ctx, log, hc) + + healthCheckNew := &activemonitorv1alpha1.HealthCheck{} + if err := r.Get(ctx, client.ObjectKey{Name: prevHealthCheck.Name, Namespace: prevHealthCheck.Namespace}, healthCheckNew); err != nil { + log.Error(err, "Error getting healthcheck resource") + return + } + + wfName, err := r.createSubmitWorkflow(ctx, log, healthCheckNew) if err != nil { log.Error(err, "Error creating or submitting workflow") - r.Recorder.Event(hc, v1.EventTypeWarning, "Warning", "Error creating or submitting workflow") + r.Recorder.Event(healthCheckNew, v1.EventTypeWarning, "Warning", "Error creating or submitting workflow") } - err = r.watchWorkflowReschedule(ctx, ctrl.Request{}, log, wfNamespace, wfName, hc) + err = r.watchWorkflowReschedule(ctx, ctrl.Request{}, log, wfNamespace, wfName, healthCheckNew) if err != nil { log.Error(err, "Error watching or rescheduling workflow") - r.Recorder.Event(hc, v1.EventTypeWarning, "Warning", "Error watching or rescheduling workflow") + r.Recorder.Event(healthCheckNew, v1.EventTypeWarning, "Warning", "Error watching or rescheduling workflow") } } } @@ -651,13 +662,19 @@ func (r *HealthCheckReconciler) watchWorkflowReschedule(ctx context.Context, req } break } + log.Info("===DEBUG=== 2 Waiting for workflow to complete", "namespace", wfNamespace, "name", wfName) } + log.Info("===DEBUG=== 1 Waiting for workflow to complete", "namespace", wfNamespace, "name", wfName) } + log.Info("===DEBUG=== 4 outside wait loop", "namespace", wfNamespace, "name", wfName) + // since the workflow has taken an unknown duration of time to complete, it's possible that its parent // healthcheck may no longer exist; ensure that it still does before attempting to update it and reschedule // see: https://book.kubebuilder.io/reference/using-finalizers.html if hc.ObjectMeta.DeletionTimestamp.IsZero() { + log.Info("===DEBUG=== 3 Reschedule", "namespace", wfNamespace) + // since the underlying workflow has completed, we update the healthcheck accordingly err := r.updateHealthCheckStatus(ctx, log, hc) if err != nil {