From 46e0517fdceec58047cedca7545dbdb66ab9296a Mon Sep 17 00:00:00 2001 From: Ivan Ilichev Date: Mon, 26 Oct 2020 12:11:35 -0400 Subject: [PATCH] [datadog] Refactor liveness and readiness probes (#69) * [datadog] Refactor liveness and readiness probes * Address review feedback --- charts/datadog/CHANGELOG.md | 7 +++++ charts/datadog/Chart.yaml | 2 +- charts/datadog/README.md | 3 ++- charts/datadog/templates/NOTES.txt | 23 ++++++++++++++++ charts/datadog/templates/_helpers.tpl | 27 +++++++++++++++++++ .../agent-clusterchecks-deployment.yaml | 9 ++++--- .../templates/cluster-agent-deployment.yaml | 13 ++++----- charts/datadog/templates/container-agent.yaml | 13 +++++---- .../templates/container-trace-agent.yaml | 4 +-- charts/datadog/values.yaml | 9 +++---- 10 files changed, 84 insertions(+), 26 deletions(-) diff --git a/charts/datadog/CHANGELOG.md b/charts/datadog/CHANGELOG.md index 43b616fab..bbc57db9b 100644 --- a/charts/datadog/CHANGELOG.md +++ b/charts/datadog/CHANGELOG.md @@ -1,5 +1,12 @@ # Datadog changelog +## 2.4.30 + +* Refactor liveness and readiness probes with helpers to allow user overrides with other types of probes or disabling + probes entirely. +* Introduce `clusterChecksRunner.healthPort` default setting. +* Use health port defaults instead of hardcoded values. + ## 2.4.29 * Add `common-env-vars` to `system-probe` container diff --git a/charts/datadog/Chart.yaml b/charts/datadog/Chart.yaml index 29816c405..7c938f18d 100644 --- a/charts/datadog/Chart.yaml +++ b/charts/datadog/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v1 name: datadog -version: 2.4.29 +version: 2.4.30 appVersion: "7" description: Datadog Agent keywords: diff --git a/charts/datadog/README.md b/charts/datadog/README.md index 2f1c73935..ae4e06af6 100644 --- a/charts/datadog/README.md +++ b/charts/datadog/README.md @@ -1,6 +1,6 @@ # Datadog -![Version: 2.4.29](https://img.shields.io/badge/Version-2.4.29-informational?style=flat-square) ![AppVersion: 7](https://img.shields.io/badge/AppVersion-7-informational?style=flat-square) +![Version: 2.4.30](https://img.shields.io/badge/Version-2.4.30-informational?style=flat-square) ![AppVersion: 7](https://img.shields.io/badge/AppVersion-7-informational?style=flat-square) [Datadog](https://www.datadoghq.com/) is a hosted infrastructure monitoring platform. This chart adds the Datadog Agent to all nodes in your cluster via a DaemonSet. It also optionally depends on the [kube-state-metrics chart](https://github.com/kubernetes/charts/tree/master/stable/kube-state-metrics). For more information about monitoring Kubernetes with Datadog, please refer to the [Datadog documentation website](https://docs.datadoghq.com/agent/basic_agent_usage/kubernetes/). @@ -402,6 +402,7 @@ helm install --name \ | clusterChecksRunner.dnsConfig | object | `{}` | specify dns configuration options for datadog cluster agent containers e.g ndots | | clusterChecksRunner.enabled | bool | `false` | If true, deploys agent dedicated for running the Cluster Checks instead of running in the Daemonset's agents. | | clusterChecksRunner.env | list | `[]` | Environment variables specific to Cluster Checks Runner | +| clusterChecksRunner.healthPort | int | `5555` | Port number to use in the Cluster Checks Runner for the healthz endpoint | | clusterChecksRunner.image.pullPolicy | string | `"IfNotPresent"` | Datadog Agent image pull policy | | clusterChecksRunner.image.pullSecrets | list | `[]` | Datadog Agent repository pullSecret (ex: specify docker registry credentials) | | clusterChecksRunner.image.repository | string | `"datadog/agent"` | Datadog Agent image repository to use | diff --git a/charts/datadog/templates/NOTES.txt b/charts/datadog/templates/NOTES.txt index 72864aad0..2e8fc3ccb 100644 --- a/charts/datadog/templates/NOTES.txt +++ b/charts/datadog/templates/NOTES.txt @@ -93,6 +93,29 @@ Cluster Agent liveness probe port ({{ $liveness.port }}) is different from the c Cluster Agent readiness probe port ({{ $readiness.port }}) is different from the configured health port ({{ $healthPort }}). {{- end }} {{- end }} + {{- if and .Values.datadog.clusterChecks.enabled .Values.clusterChecksRunner.enabled }} + {{- $healthPort := .Values.clusterChecksRunner.healthPort }} + {{- with $liveness := .Values.clusterChecksRunner.livenessProbe.httpGet }} + {{- if and $liveness.port (ne $healthPort $liveness.port) }} + +##################################################################################### +#### ERROR: Cluster Checks Runner liveness probe misconfiguration #### +##################################################################################### + +Cluster Checks Runner liveness probe port ({{ $liveness.port }}) is different from the configured health port ({{ $healthPort }}). + {{- end }} + {{- end }} + {{- with $readiness := .Values.clusterChecksRunner.readinessProbe.httpGet }} + {{- if and $readiness.port (ne $healthPort $readiness.port) }} + +##################################################################################### +#### ERROR: Cluster Checks Runner readiness probe misconfiguration #### +##################################################################################### + +Cluster Checks Runner readiness probe port ({{ $readiness.port }}) is different from the configured health port ({{ $healthPort }}). + {{- end }} + {{- end }} + {{- end }} {{- end }} {{- if .Values.datadog.apm.enabled }} {{- $apmPort := .Values.datadog.apm.port }} diff --git a/charts/datadog/templates/_helpers.tpl b/charts/datadog/templates/_helpers.tpl index b2de2e780..13bc4a5b6 100644 --- a/charts/datadog/templates/_helpers.tpl +++ b/charts/datadog/templates/_helpers.tpl @@ -150,3 +150,30 @@ true false {{- end -}} {{- end -}} + +{{/* +Returns probe definition based on user settings and default HTTP port. +Accepts a map with `port` (default port), `path` (probe handler URI) and `settings` (probe settings). +*/}} +{{- define "probe.http" -}} +{{- if or .settings.httpGet .settings.tcpSocket .settings.exec -}} +{{ toYaml .settings }} +{{- else -}} +{{- $handler := dict "httpGet" (dict "port" .port "path" .path "scheme" "HTTP") -}} +{{ toYaml (merge $handler .settings) }} +{{- end -}} +{{- end -}} + +{{/* +Returns probe definition based on user settings and default TCP socket port. +Accepts a map with `port` (default port) and `settings` (probe settings). +*/}} +{{- define "probe.tcp" -}} +{{- if or .settings.httpGet .settings.tcpSocket .settings.exec -}} +{{ toYaml .settings }} +{{- else -}} +{{- $handler := dict "tcpSocket" (dict "port" .port) -}} +{{- toYaml (merge $handler .settings) -}} +{{- end -}} +{{- end -}} + diff --git a/charts/datadog/templates/agent-clusterchecks-deployment.yaml b/charts/datadog/templates/agent-clusterchecks-deployment.yaml index 09029f5a6..89e3bc031 100644 --- a/charts/datadog/templates/agent-clusterchecks-deployment.yaml +++ b/charts/datadog/templates/agent-clusterchecks-deployment.yaml @@ -101,7 +101,8 @@ spec: - name: DD_EXTRA_CONFIG_PROVIDERS value: "clusterchecks" - name: DD_HEALTH_PORT - value: "5555" + {{- $healthPort := .Values.clusterChecksRunner.healthPort }} + value: {{ $healthPort | quote }} # Cluster checks - name: DD_CLUSTER_AGENT_KUBERNETES_SERVICE_NAME value: {{ template "datadog.fullname" . }}-cluster-agent @@ -155,9 +156,11 @@ spec: {{ toYaml .Values.clusterChecksRunner.volumeMounts | indent 10 }} {{- end }} livenessProbe: -{{ toYaml .Values.clusterChecksRunner.livenessProbe | indent 10 }} +{{- $live := .Values.clusterChecksRunner.livenessProbe }} +{{ include "probe.http" (dict "settings" $live "path" "/live" "port" $healthPort) | indent 10 }} readinessProbe: -{{ toYaml .Values.clusterChecksRunner.readinessProbe | indent 10 }} +{{- $ready := .Values.clusterChecksRunner.readinessProbe }} +{{ include "probe.http" (dict "settings" $ready "path" "/ready" "port" $healthPort) | indent 10 }} volumes: - name: installinfo configMap: diff --git a/charts/datadog/templates/cluster-agent-deployment.yaml b/charts/datadog/templates/cluster-agent-deployment.yaml index 3c3bd84a6..9c8d68fbf 100644 --- a/charts/datadog/templates/cluster-agent-deployment.yaml +++ b/charts/datadog/templates/cluster-agent-deployment.yaml @@ -103,7 +103,8 @@ spec: {{- end }} env: - name: DD_HEALTH_PORT - value: {{ .Values.clusterAgent.healthPort | quote }} + {{- $healthPort := .Values.clusterAgent.healthPort }} + value: {{ $healthPort | quote }} - name: DD_API_KEY valueFrom: secretKeyRef: @@ -200,12 +201,12 @@ spec: {{ toYaml .Values.clusterAgent.env | indent 10 }} {{- end }} livenessProbe: -{{ $defaultLive := dict "httpGet" (dict "port" .Values.clusterAgent.healthPort "path" "/live" "scheme" "HTTP") }} -{{ toYaml (mergeOverwrite $defaultLive .Values.clusterAgent.livenessProbe) | indent 10 }} +{{- $live := .Values.clusterAgent.livenessProbe }} +{{ include "probe.http" (dict "path" "/live" "port" $healthPort "settings" $live) | indent 10 }} readinessProbe: -{{ $defaultReady := dict "httpGet" (dict "port" .Values.clusterAgent.healthPort "path" "/ready" "scheme" "HTTP") }} -{{ toYaml (mergeOverwrite $defaultReady .Values.clusterAgent.readinessProbe) | indent 10 }} - volumeMounts: +{{- $ready := .Values.clusterAgent.readinessProbe }} +{{ include "probe.http" (dict "path" "/ready" "port" $healthPort "settings" $ready) | indent 10 }} + volumeMounts: - name: installinfo subPath: install_info {{- if eq .Values.targetSystem "windows" }} diff --git a/charts/datadog/templates/container-agent.yaml b/charts/datadog/templates/container-agent.yaml index 0da6ba038..524feebd5 100644 --- a/charts/datadog/templates/container-agent.yaml +++ b/charts/datadog/templates/container-agent.yaml @@ -68,10 +68,9 @@ value: {{ (default false (or .Values.datadog.logs.containerCollectAll .Values.datadog.logsConfigContainerCollectAll)) | quote}} - name: DD_LOGS_CONFIG_K8S_CONTAINER_USE_FILE value: {{ .Values.datadog.logs.containerCollectUsingFiles | quote }} - {{- if not .Values.datadog.livenessProbe }} - name: DD_HEALTH_PORT - value: "5555" - {{- end }} + {{- $healthPort := .Values.agents.containers.agent.healthPort }} + value: {{ $healthPort | quote }} {{- if .Values.datadog.dogstatsd.useSocketVolume }} - name: DD_DOGSTATSD_SOCKET value: {{ .Values.datadog.dogstatsd.socketPath | quote }} @@ -165,9 +164,9 @@ {{ toYaml .Values.agents.volumeMounts | indent 4 }} {{- end }} livenessProbe: -{{ $defaultLive := dict "httpGet" (dict "port" .Values.agents.containers.agent.healthPort "path" "/live" "scheme" "HTTP") }} -{{ toYaml (mergeOverwrite $defaultLive .Values.agents.containers.agent.livenessProbe) | indent 4 }} +{{- $live := .Values.agents.containers.agent.livenessProbe }} +{{ include "probe.http" (dict "path" "/live" "port" $healthPort "settings" $live) | indent 4 }} readinessProbe: -{{ $defaultReady := dict "httpGet" (dict "port" .Values.agents.containers.agent.healthPort "path" "/ready" "scheme" "HTTP") }} -{{ toYaml (mergeOverwrite $defaultReady .Values.agents.containers.agent.readinessProbe) | indent 4 }} +{{- $ready := .Values.agents.containers.agent.readinessProbe }} +{{ include "probe.http" (dict "path" "/ready" "port" $healthPort "settings" $ready) | indent 4 }} {{- end -}} diff --git a/charts/datadog/templates/container-trace-agent.yaml b/charts/datadog/templates/container-trace-agent.yaml index 5bd4d6f8b..1521d3980 100644 --- a/charts/datadog/templates/container-trace-agent.yaml +++ b/charts/datadog/templates/container-trace-agent.yaml @@ -59,6 +59,6 @@ mountPath: {{ (dir .Values.datadog.apm.socketPath) }} {{- end }} livenessProbe: -{{ $defaultLive := dict "tcpSocket" (dict "port" .Values.datadog.apm.port) }} -{{ toYaml (mergeOverwrite $defaultLive .Values.agents.containers.traceAgent.livenessProbe) | indent 4 }} +{{- $live := .Values.agents.containers.traceAgent.livenessProbe }} +{{ include "probe.tcp" (dict "port" .Values.datadog.apm.port "settings" $live ) | indent 4 }} {{- end -}} diff --git a/charts/datadog/values.yaml b/charts/datadog/values.yaml index 77a97929b..824c1eb4b 100644 --- a/charts/datadog/values.yaml +++ b/charts/datadog/values.yaml @@ -901,6 +901,9 @@ clusterChecksRunner: # tolerations: [] + # clusterChecksRunner.healthPort -- Port number to use in the Cluster Checks Runner for the healthz endpoint + healthPort: 5555 + # clusterChecksRunner.livenessProbe -- Override default agent liveness probe settings # @default -- Every 15s / 6 KO / 1 OK ## In case of issues with the probe, you can disable it with the @@ -911,9 +914,6 @@ clusterChecksRunner: # command: ["/bin/true"] # livenessProbe: - httpGet: - path: /live - port: 5555 initialDelaySeconds: 15 periodSeconds: 15 timeoutSeconds: 5 @@ -930,9 +930,6 @@ clusterChecksRunner: # command: ["/bin/true"] # readinessProbe: - httpGet: - path: /ready - port: 5555 initialDelaySeconds: 15 periodSeconds: 15 timeoutSeconds: 5