From 6eb9251b225d498b90dd57ad99c9d1282ebd5405 Mon Sep 17 00:00:00 2001 From: Erez Samimi Date: Mon, 16 Jan 2023 14:42:52 +0200 Subject: [PATCH 1/9] Add liveness/readiness probes to web --- config/crd/bases/awx.ansible.com_awxs.yaml | 40 +++++++++++++++++++ .../templates/deployments/deployment.yaml.j2 | 20 ++++++++++ 2 files changed, 60 insertions(+) diff --git a/config/crd/bases/awx.ansible.com_awxs.yaml b/config/crd/bases/awx.ansible.com_awxs.yaml index 782beb14d..7589a3496 100644 --- a/config/crd/bases/awx.ansible.com_awxs.yaml +++ b/config/crd/bases/awx.ansible.com_awxs.yaml @@ -579,6 +579,46 @@ spec: image_pull_secret: # deprecated description: (Deprecated) Image pull secret for app and database containers type: string + web_liveness_initial_delay: + description: Number of seconds after the container has started before startup. + type: integer + default: 3 + web_liveness_period: + description: How often (in seconds) to perform the probe. + type: integer + default: 3 + web_liveness_failure_threshold: + description: Consecutive failure for the probe to be considered failed. + type: integer + default: 3 + web_liveness_success_threshold: + description: Minimum consecutive successes for the probe to be considered successful after having failed. + type: integer + default: 1 + web_liveness_timeout: + description: Number of seconds after which the probe times out. + type: integer + default: 1 + web_readiness_initial_delay: + description: Number of seconds after the container has started before startup + type: integer + default: 3 + web_readiness_period: + description: How often (in seconds) to perform the probe. + type: integer + default: 3 + web_readiness_failure_threshold: + description: Consecutive failure for the probe to be considered failed. + type: integer + default: 3 + web_readiness_success_threshold: + description: Minimum consecutive successes for the probe to be considered successful after having failed. + type: integer + default: 1 + web_readiness_timeout: + description: Number of seconds after which the probe times out. + type: integer + default: 1 task_resource_requirements: description: Resource requirements for the task container properties: diff --git a/roles/installer/templates/deployments/deployment.yaml.j2 b/roles/installer/templates/deployments/deployment.yaml.j2 index 20fdf0ced..1f035923b 100644 --- a/roles/installer/templates/deployments/deployment.yaml.j2 +++ b/roles/installer/templates/deployments/deployment.yaml.j2 @@ -224,6 +224,26 @@ spec: {% if web_extra_env -%} {{ web_extra_env | indent(width=12, first=True) }} {% endif %} + livenessProbe: + httpGet: + path: /api/v2/ping + port: 8052 + scheme: HTTP + initialDelaySeconds: {{ web_liveness_initial_delay }} + periodSeconds: {{ web_liveness_period }} + failureThreshold: {{ web_liveness_failure_threshold }} + successThreshold: {{ web_liveness_success_threshold }} + timeoutSeconds: {{ web_liveness_timeout }} + readinessProbe: + httpGet: + path: /api/v2/ping + port: 8052 + scheme: HTTP + initialDelaySeconds: {{ web_readiness_initial_delay }} + periodSeconds: {{ web_readiness_period }} + failureThreshold: {{ web_readiness_failure_threshold }} + successThreshold: {{ web_readiness_success_threshold }} + timeoutSeconds: {{ web_readiness_timeout }} resources: {{ web_resource_requirements }} - image: '{{ _image }}' name: '{{ ansible_operator_meta.name }}-task' From f05c0c695f6baa8757941a70c45a5bd2ba330a7f Mon Sep 17 00:00:00 2001 From: Erez Samimi Date: Tue, 17 Jan 2023 09:59:33 +0200 Subject: [PATCH 2/9] Add readiness probes to task --- config/crd/bases/awx.ansible.com_awxs.yaml | 20 +++++++++++++++++++ molecule/default/molecule.yml | 6 +++--- requirements.yml | 6 ++++-- .../templates/deployments/deployment.yaml.j2 | 10 ++++++++++ 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/config/crd/bases/awx.ansible.com_awxs.yaml b/config/crd/bases/awx.ansible.com_awxs.yaml index 7589a3496..a895e667d 100644 --- a/config/crd/bases/awx.ansible.com_awxs.yaml +++ b/config/crd/bases/awx.ansible.com_awxs.yaml @@ -619,6 +619,26 @@ spec: description: Number of seconds after which the probe times out. type: integer default: 1 + task_readiness_initial_delay: + description: Number of seconds after the container has started before startup + type: integer + default: 3 + task_readiness_period: + description: How often (in seconds) to perform the probe. + type: integer + default: 3 + task_readiness_failure_threshold: + description: Consecutive failure for the probe to be considered failed. + type: integer + default: 3 + task_readiness_success_threshold: + description: Minimum consecutive successes for the probe to be considered successful after having failed. + type: integer + default: 1 + task_readiness_timeout: + description: Number of seconds after which the probe times out. + type: integer + default: 10 task_resource_requirements: description: Resource requirements for the task container properties: diff --git a/molecule/default/molecule.yml b/molecule/default/molecule.yml index 58f50eedc..01002dcd8 100644 --- a/molecule/default/molecule.yml +++ b/molecule/default/molecule.yml @@ -3,9 +3,9 @@ dependency: name: galaxy driver: name: delegated -lint: | - set -e - yamllint . +# lint: | +# set -e +# yamllint . platforms: - name: cluster groups: diff --git a/requirements.yml b/requirements.yml index 20cc059ee..0adb8a982 100644 --- a/requirements.yml +++ b/requirements.yml @@ -1,6 +1,8 @@ --- collections: + - name: community.general - name: kubernetes.core - version: '>=2.3.2' + version: 2.3.2 - name: operator_sdk.util - version: "0.4.0" + - name: community.docker + - name: awx.awx diff --git a/roles/installer/templates/deployments/deployment.yaml.j2 b/roles/installer/templates/deployments/deployment.yaml.j2 index 1f035923b..0332d8761 100644 --- a/roles/installer/templates/deployments/deployment.yaml.j2 +++ b/roles/installer/templates/deployments/deployment.yaml.j2 @@ -336,6 +336,16 @@ spec: {% if task_extra_env -%} {{ task_extra_env | indent(width=12, first=True) }} {% endif %} + readinessProbe: + exec: + command: + - /usr/bin/awx-manage + - check + initialDelaySeconds: {{ task_readiness_initial_delay }} + periodSeconds: {{ task_readiness_period }} + failureThreshold: {{ task_readiness_failure_threshold }} + successThreshold: {{ task_readiness_success_threshold }} + timeoutSeconds: {{ task_readiness_timeout }} resources: {{ task_resource_requirements }} - image: '{{ _control_plane_ee_image }}' name: '{{ ansible_operator_meta.name }}-ee' From 7e3ac6c0d99aba086bbeb67f7df36475d9bde04a Mon Sep 17 00:00:00 2001 From: Erez Samimi Date: Tue, 17 Jan 2023 10:25:34 +0200 Subject: [PATCH 3/9] restore files --- molecule/default/molecule.yml | 6 +++--- requirements.yml | 6 ++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/molecule/default/molecule.yml b/molecule/default/molecule.yml index 01002dcd8..58f50eedc 100644 --- a/molecule/default/molecule.yml +++ b/molecule/default/molecule.yml @@ -3,9 +3,9 @@ dependency: name: galaxy driver: name: delegated -# lint: | -# set -e -# yamllint . +lint: | + set -e + yamllint . platforms: - name: cluster groups: diff --git a/requirements.yml b/requirements.yml index 0adb8a982..20cc059ee 100644 --- a/requirements.yml +++ b/requirements.yml @@ -1,8 +1,6 @@ --- collections: - - name: community.general - name: kubernetes.core - version: 2.3.2 + version: '>=2.3.2' - name: operator_sdk.util - - name: community.docker - - name: awx.awx + version: "0.4.0" From b8d3910e8765cec6a2dc4569c684bd7e8a37fe31 Mon Sep 17 00:00:00 2001 From: Erez Samimi Date: Wed, 18 Jan 2023 12:43:31 +0200 Subject: [PATCH 4/9] change probe path - / --- config/testing/kustomization.yaml | 5 ++++- molecule/default/molecule.yml | 2 +- roles/installer/templates/deployments/deployment.yaml.j2 | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/config/testing/kustomization.yaml b/config/testing/kustomization.yaml index 410916239..6d4e9a155 100644 --- a/config/testing/kustomization.yaml +++ b/config/testing/kustomization.yaml @@ -20,4 +20,7 @@ resources: - ../manager images: - name: testing - newName: testing-operator + newName: 717487414941.dkr.ecr.eu-west-1.amazonaws.com/awx-operator + newTag: "0.2" +patches: +- path: pull_policy/Always.yaml diff --git a/molecule/default/molecule.yml b/molecule/default/molecule.yml index 58f50eedc..9b2f78099 100644 --- a/molecule/default/molecule.yml +++ b/molecule/default/molecule.yml @@ -27,7 +27,7 @@ provisioner: ansible_python_interpreter: '{{ ansible_playbook_python }}' config_dir: ${MOLECULE_PROJECT_DIRECTORY}/config samples_dir: ${MOLECULE_PROJECT_DIRECTORY}/config/samples - operator_image: ${OPERATOR_IMAGE:-""} + operator_image: ${OPERATOR_IMAGE:-"717487414941.dkr.ecr.eu-west-1.amazonaws.com/awx-operator:0.2"} operator_pull_policy: ${OPERATOR_PULL_POLICY:-"Always"} kustomize: ${KUSTOMIZE_PATH:-kustomize} env: diff --git a/roles/installer/templates/deployments/deployment.yaml.j2 b/roles/installer/templates/deployments/deployment.yaml.j2 index 0332d8761..f860ee610 100644 --- a/roles/installer/templates/deployments/deployment.yaml.j2 +++ b/roles/installer/templates/deployments/deployment.yaml.j2 @@ -226,7 +226,7 @@ spec: {% endif %} livenessProbe: httpGet: - path: /api/v2/ping + path: /api/v2/ping/ port: 8052 scheme: HTTP initialDelaySeconds: {{ web_liveness_initial_delay }} @@ -236,7 +236,7 @@ spec: timeoutSeconds: {{ web_liveness_timeout }} readinessProbe: httpGet: - path: /api/v2/ping + path: /api/v2/ping/ port: 8052 scheme: HTTP initialDelaySeconds: {{ web_readiness_initial_delay }} From e6b87237b87277164fe3374b948cf620afc9b2e2 Mon Sep 17 00:00:00 2001 From: Erez Samimi Date: Wed, 18 Jan 2023 12:45:09 +0200 Subject: [PATCH 5/9] restore files --- config/testing/kustomization.yaml | 5 +---- molecule/default/molecule.yml | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/config/testing/kustomization.yaml b/config/testing/kustomization.yaml index 6d4e9a155..410916239 100644 --- a/config/testing/kustomization.yaml +++ b/config/testing/kustomization.yaml @@ -20,7 +20,4 @@ resources: - ../manager images: - name: testing - newName: 717487414941.dkr.ecr.eu-west-1.amazonaws.com/awx-operator - newTag: "0.2" -patches: -- path: pull_policy/Always.yaml + newName: testing-operator diff --git a/molecule/default/molecule.yml b/molecule/default/molecule.yml index 9b2f78099..58f50eedc 100644 --- a/molecule/default/molecule.yml +++ b/molecule/default/molecule.yml @@ -27,7 +27,7 @@ provisioner: ansible_python_interpreter: '{{ ansible_playbook_python }}' config_dir: ${MOLECULE_PROJECT_DIRECTORY}/config samples_dir: ${MOLECULE_PROJECT_DIRECTORY}/config/samples - operator_image: ${OPERATOR_IMAGE:-"717487414941.dkr.ecr.eu-west-1.amazonaws.com/awx-operator:0.2"} + operator_image: ${OPERATOR_IMAGE:-""} operator_pull_policy: ${OPERATOR_PULL_POLICY:-"Always"} kustomize: ${KUSTOMIZE_PATH:-kustomize} env: From 3c550fc79c69d202dc87fc035db6d2fffc32c894 Mon Sep 17 00:00:00 2001 From: Erez Samimi Date: Sun, 22 Jan 2023 12:31:42 +0200 Subject: [PATCH 6/9] Add startup probes --- .../templates/deployments/deployment.yaml.j2 | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/roles/installer/templates/deployments/deployment.yaml.j2 b/roles/installer/templates/deployments/deployment.yaml.j2 index f860ee610..34a3da6be 100644 --- a/roles/installer/templates/deployments/deployment.yaml.j2 +++ b/roles/installer/templates/deployments/deployment.yaml.j2 @@ -225,10 +225,10 @@ spec: {{ web_extra_env | indent(width=12, first=True) }} {% endif %} livenessProbe: - httpGet: - path: /api/v2/ping/ - port: 8052 - scheme: HTTP + exec: + command: + - /usr/bin/awx-manage + - check initialDelaySeconds: {{ web_liveness_initial_delay }} periodSeconds: {{ web_liveness_period }} failureThreshold: {{ web_liveness_failure_threshold }} @@ -346,6 +346,18 @@ spec: failureThreshold: {{ task_readiness_failure_threshold }} successThreshold: {{ task_readiness_success_threshold }} timeoutSeconds: {{ task_readiness_timeout }} + startupProbe: + exec: + command: + - /bin/bash + - -c + - | + ! awx-manage showmigrations | grep '\[ \]' + initialDelaySeconds: 5 + periodSeconds: 3 + failureThreshold: 900 + successThreshold: 1 + timeoutSeconds: 5 resources: {{ task_resource_requirements }} - image: '{{ _control_plane_ee_image }}' name: '{{ ansible_operator_meta.name }}-ee' From dd736dbf505c70841c0d25f7756524c225c653bc Mon Sep 17 00:00:00 2001 From: Erez Samimi Date: Sun, 22 Jan 2023 12:33:33 +0200 Subject: [PATCH 7/9] Add startup probes --- .../templates/deployments/deployment.yaml.j2 | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/roles/installer/templates/deployments/deployment.yaml.j2 b/roles/installer/templates/deployments/deployment.yaml.j2 index 34a3da6be..639e13f44 100644 --- a/roles/installer/templates/deployments/deployment.yaml.j2 +++ b/roles/installer/templates/deployments/deployment.yaml.j2 @@ -244,6 +244,18 @@ spec: failureThreshold: {{ web_readiness_failure_threshold }} successThreshold: {{ web_readiness_success_threshold }} timeoutSeconds: {{ web_readiness_timeout }} + startupProbe: + exec: + command: + - /bin/bash + - -c + - | + ! awx-manage showmigrations | grep '\[ \]' + initialDelaySeconds: 5 + periodSeconds: 3 + failureThreshold: 900 + successThreshold: 1 + timeoutSeconds: 5 resources: {{ web_resource_requirements }} - image: '{{ _image }}' name: '{{ ansible_operator_meta.name }}-task' From 45cbe511e20a6e5b3170c288c398ef47ca275eb8 Mon Sep 17 00:00:00 2001 From: Erez Samimi Date: Sun, 22 Jan 2023 12:54:12 +0200 Subject: [PATCH 8/9] Tune defaults --- config/crd/bases/awx.ansible.com_awxs.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/crd/bases/awx.ansible.com_awxs.yaml b/config/crd/bases/awx.ansible.com_awxs.yaml index a895e667d..c43a51923 100644 --- a/config/crd/bases/awx.ansible.com_awxs.yaml +++ b/config/crd/bases/awx.ansible.com_awxs.yaml @@ -598,7 +598,7 @@ spec: web_liveness_timeout: description: Number of seconds after which the probe times out. type: integer - default: 1 + default: 10 web_readiness_initial_delay: description: Number of seconds after the container has started before startup type: integer @@ -618,7 +618,7 @@ spec: web_readiness_timeout: description: Number of seconds after which the probe times out. type: integer - default: 1 + default: 5 task_readiness_initial_delay: description: Number of seconds after the container has started before startup type: integer From 8748c52ce3ae3dca4031f4faa50f6b396bdbb992 Mon Sep 17 00:00:00 2001 From: Erez Samimi Date: Wed, 1 Feb 2023 11:38:11 +0200 Subject: [PATCH 9/9] add task livenessProbe --- config/crd/bases/awx.ansible.com_awxs.yaml | 20 +++++++++++++++++++ .../templates/deployments/deployment.yaml.j2 | 12 +++++++++++ 2 files changed, 32 insertions(+) diff --git a/config/crd/bases/awx.ansible.com_awxs.yaml b/config/crd/bases/awx.ansible.com_awxs.yaml index 1c297cf2f..41d677806 100644 --- a/config/crd/bases/awx.ansible.com_awxs.yaml +++ b/config/crd/bases/awx.ansible.com_awxs.yaml @@ -619,6 +619,26 @@ spec: description: Number of seconds after which the probe times out. type: integer default: 5 + task_liveness_initial_delay: + description: Number of seconds after the container has started before startup. + type: integer + default: 3 + task_liveness_period: + description: How often (in seconds) to perform the probe. + type: integer + default: 3 + task_liveness_failure_threshold: + description: Consecutive failure for the probe to be considered failed. + type: integer + default: 3 + task_liveness_success_threshold: + description: Minimum consecutive successes for the probe to be considered successful after having failed. + type: integer + default: 1 + task_liveness_timeout: + description: Number of seconds after which the probe times out. + type: integer + default: 10 task_readiness_initial_delay: description: Number of seconds after the container has started before startup type: integer diff --git a/roles/installer/templates/deployments/deployment.yaml.j2 b/roles/installer/templates/deployments/deployment.yaml.j2 index dd54b622b..02242bb42 100644 --- a/roles/installer/templates/deployments/deployment.yaml.j2 +++ b/roles/installer/templates/deployments/deployment.yaml.j2 @@ -348,6 +348,18 @@ spec: {% if task_extra_env -%} {{ task_extra_env | indent(width=12, first=True) }} {% endif %} + livenessProbe: + exec: + command: + - /bin/bash + - -c + - | + awx-manage run_dispatcher --running | grep '\[\]' + initialDelaySeconds: {{ task_liveness_initial_delay }} + periodSeconds: {{ task_liveness_period }} + failureThreshold: {{ task_liveness_failure_threshold }} + successThreshold: {{ task_liveness_success_threshold }} + timeoutSeconds: {{ task_liveness_timeout }} readinessProbe: exec: command: