From b3eae638825d2f9cc3f20ad08a78e33d4a61862e Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Wed, 9 Aug 2023 17:03:30 +0200 Subject: [PATCH 01/10] K8SPS-73 - Add operator-self-healing test --- e2e-tests/conf/chaos-network-loss.yml | 15 ++ e2e-tests/conf/chaos-pod-failure.yml | 13 ++ e2e-tests/conf/chaos-pod-kill.yml | 11 ++ e2e-tests/functions | 137 +++++++++++++++--- e2e-tests/run-distro.csv | 1 + e2e-tests/run-minikube.csv | 1 + e2e-tests/run-pr.csv | 1 + e2e-tests/run-release.csv | 1 + .../operator-self-healing/00-assert.yaml | 26 ++++ .../00-deploy-operator.yaml | 14 ++ .../operator-self-healing/01-assert.yaml | 27 ++++ .../01-deploy-chaos-mesh.yaml | 11 ++ .../operator-self-healing/02-assert.yaml | 60 ++++++++ .../02-create-cluster.yaml | 21 +++ .../operator-self-healing/03-write-data.yaml | 16 ++ .../operator-self-healing/04-assert.yaml | 10 ++ .../04-read-from-primary.yaml | 13 ++ .../operator-self-healing/05-assert.yaml | 26 ++++ .../operator-self-healing/05-kill-pod.yaml | 19 +++ .../operator-self-healing/06-assert.yaml | 60 ++++++++ .../operator-self-healing/06-scale-up.yaml | 21 +++ .../operator-self-healing/07-assert.yaml | 26 ++++ .../07-network-loss.yaml | 12 ++ .../operator-self-healing/08-assert.yaml | 60 ++++++++ .../operator-self-healing/08-scale-down.yaml | 21 +++ .../operator-self-healing/09-assert.yaml | 26 ++++ .../operator-self-healing/09-pod-failure.yaml | 12 ++ .../operator-self-healing/10-assert.yaml | 60 ++++++++ .../operator-self-healing/10-scale-up.yaml | 21 +++ .../11-destroy-chaos-mesh.yaml | 12 ++ .../12-drop-finalizer.yaml | 5 + 31 files changed, 738 insertions(+), 21 deletions(-) create mode 100644 e2e-tests/conf/chaos-network-loss.yml create mode 100644 e2e-tests/conf/chaos-pod-failure.yml create mode 100644 e2e-tests/conf/chaos-pod-kill.yml create mode 100644 e2e-tests/tests/operator-self-healing/00-assert.yaml create mode 100644 e2e-tests/tests/operator-self-healing/00-deploy-operator.yaml create mode 100644 e2e-tests/tests/operator-self-healing/01-assert.yaml create mode 100644 e2e-tests/tests/operator-self-healing/01-deploy-chaos-mesh.yaml create mode 100644 e2e-tests/tests/operator-self-healing/02-assert.yaml create mode 100644 e2e-tests/tests/operator-self-healing/02-create-cluster.yaml create mode 100644 e2e-tests/tests/operator-self-healing/03-write-data.yaml create mode 100644 e2e-tests/tests/operator-self-healing/04-assert.yaml create mode 100644 e2e-tests/tests/operator-self-healing/04-read-from-primary.yaml create mode 100644 e2e-tests/tests/operator-self-healing/05-assert.yaml create mode 100644 e2e-tests/tests/operator-self-healing/05-kill-pod.yaml create mode 100644 e2e-tests/tests/operator-self-healing/06-assert.yaml create mode 100644 e2e-tests/tests/operator-self-healing/06-scale-up.yaml create mode 100644 e2e-tests/tests/operator-self-healing/07-assert.yaml create mode 100644 e2e-tests/tests/operator-self-healing/07-network-loss.yaml create mode 100644 e2e-tests/tests/operator-self-healing/08-assert.yaml create mode 100644 e2e-tests/tests/operator-self-healing/08-scale-down.yaml create mode 100644 e2e-tests/tests/operator-self-healing/09-assert.yaml create mode 100644 e2e-tests/tests/operator-self-healing/09-pod-failure.yaml create mode 100644 e2e-tests/tests/operator-self-healing/10-assert.yaml create mode 100644 e2e-tests/tests/operator-self-healing/10-scale-up.yaml create mode 100644 e2e-tests/tests/operator-self-healing/11-destroy-chaos-mesh.yaml create mode 100644 e2e-tests/tests/operator-self-healing/12-drop-finalizer.yaml diff --git a/e2e-tests/conf/chaos-network-loss.yml b/e2e-tests/conf/chaos-network-loss.yml new file mode 100644 index 000000000..c4cbb2db1 --- /dev/null +++ b/e2e-tests/conf/chaos-network-loss.yml @@ -0,0 +1,15 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: NetworkChaos +metadata: + name: network-loss-example +spec: + action: loss + mode: one + selector: + pods: + test-namespace: + - pod-name + loss: + loss: "100" + correlation: "100" + duration: "60s" diff --git a/e2e-tests/conf/chaos-pod-failure.yml b/e2e-tests/conf/chaos-pod-failure.yml new file mode 100644 index 000000000..3e4630609 --- /dev/null +++ b/e2e-tests/conf/chaos-pod-failure.yml @@ -0,0 +1,13 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: pod-failure-example +spec: + action: pod-failure + mode: one + value: "" + duration: "60s" + selector: + pods: + test-namespace: + - pod-name diff --git a/e2e-tests/conf/chaos-pod-kill.yml b/e2e-tests/conf/chaos-pod-kill.yml new file mode 100644 index 000000000..edf885d75 --- /dev/null +++ b/e2e-tests/conf/chaos-pod-kill.yml @@ -0,0 +1,11 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: pod-kill-example +spec: + action: pod-kill + mode: one + selector: + pods: + test-namespace: + - pod-name diff --git a/e2e-tests/functions b/e2e-tests/functions index 89bfae726..86e106d9a 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -50,7 +50,7 @@ deploy_pmm_server() { --set platform="${platform}" \ "https://percona-charts.storage.googleapis.com/pmm-server-${PMM_SERVER_VERSION}.tgz" fi - SERVICE="postgres" + local SERVICE="postgres" until kubectl -n "${NAMESPACE}" exec monitoring-0 -- bash -c "pgrep -x $SERVICE >/dev/null"; do echo "Retry $retry" sleep 5 @@ -63,13 +63,13 @@ deploy_pmm_server() { } get_pmm_api_key() { - ADMIN_PASSWORD=$(kubectl -n "${NAMESPACE}" exec monitoring-0 -- bash -c "printenv | grep ADMIN_PASSWORD | cut -d '=' -f2") + local ADMIN_PASSWORD=$(kubectl -n "${NAMESPACE}" exec monitoring-0 -- bash -c "printenv | grep ADMIN_PASSWORD | cut -d '=' -f2") echo $(curl --insecure -X POST -H "Content-Type: application/json" -d '{"name":"operator", "role": "Admin"}' "https://admin:$ADMIN_PASSWORD@"$(get_service_ip monitoring-service)"/graph/api/auth/keys" | jq .key) } deploy_minio() { - accessKey="$(kubectl -n "${NAMESPACE}" get secret minio-secret -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 -d)" - secretKey="$(kubectl -n "${NAMESPACE}" get secret minio-secret -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 -d)" + local accessKey="$(kubectl -n "${NAMESPACE}" get secret minio-secret -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 -d)" + local secretKey="$(kubectl -n "${NAMESPACE}" get secret minio-secret -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 -d)" helm uninstall -n "${NAMESPACE}" minio-service || : helm repo remove minio || : @@ -299,6 +299,7 @@ get_mysql_users() { get_service_ip() { local service=$1 + while (kubectl get service/$service -n "${NAMESPACE}" -o 'jsonpath={.spec.type}' 2>&1 || :) | grep -q NotFound; do sleep 1 done @@ -379,16 +380,43 @@ wait_pod() { set -o xtrace } +wait_deployment() { + local name=$1 + local target_namespace=${2:-"$namespace"} + + sleep 10 + set +o xtrace + retry=0 + echo -n $name + until [ -n "$(kubectl -n ${target_namespace} get deployment $name -o jsonpath='{.status.replicas}')" \ + -a "$(kubectl -n ${target_namespace} get deployment $name -o jsonpath='{.status.replicas}')" \ + == "$(kubectl -n ${target_namespace} get deployment $name -o jsonpath='{.status.readyReplicas}')" ]; do + sleep 1 + echo -n . + let retry+=1 + if [ $retry -ge 360 ]; then + kubectl logs $(get_operator_pod) -c operator \ + | grep -v 'level=info' \ + | grep -v 'level=debug' \ + | tail -100 + echo max retry count $retry reached. something went wrong with operator or kubernetes cluster + exit 1 + fi + done + echo + set -o xtrace +} + check_auto_tuning() { - RAM_SIZE=$1 - RDS_MEM_INSTANCE=12582880 - CUSTOM_INNODB_SIZE=$2 - CUSTOM_CONNECTIONS=$3 + local RAM_SIZE=$1 + local RDS_MEM_INSTANCE=12582880 + local CUSTOM_INNODB_SIZE=$2 + local CUSTOM_CONNECTIONS=$3 - INNODB_SIZE=$(run_mysql \ + local INNODB_SIZE=$(run_mysql \ 'SELECT @@innodb_buffer_pool_size;' \ "-h $(get_haproxy_svc "$(get_cluster_name)") -uroot -proot_password") - CONNECTIONS=$(run_mysql \ + local CONNECTIONS=$(run_mysql \ 'SELECT @@max_connections;' \ "-h $(get_haproxy_svc "$(get_cluster_name)") -uroot -proot_password") @@ -451,9 +479,8 @@ get_primary_from_haproxy() { verify_certificate_sans() { local certificate=$1 local expected_sans=$2 - - have=$(mktemp) - want=$(mktemp) + local have=$(mktemp) + local want=$(mktemp) kubectl -n "${NAMESPACE}" get certificate "${certificate}" -o jsonpath='{.spec.dnsNames}' | jq '.' >"${have}" echo "${expected_sans}" | jq '.' >"${want}" @@ -462,21 +489,19 @@ verify_certificate_sans() { } check_passwords_leak() { - - secrets=$(kubectl get secrets -o json | jq -r '.items[].data | to_entries | .[] | select(.key | (endswith(".crt") or endswith(".key") or endswith(".pub") or endswith(".pem") or endswith(".p12")) | not) | .value') - - passwords="$(for i in $secrets; do base64 -d <<< $i; echo; done) $secrets" - pods=$(kubectl -n "${NAMESPACE}" get pods -o name | awk -F "/" '{print $2}') + local secrets=$(kubectl get secrets -o json | jq -r '.items[].data | to_entries | .[] | select(.key | (endswith(".crt") or endswith(".key") or endswith(".pub") or endswith(".pem") or endswith(".p12")) | not) | .value') + local passwords="$(for i in $secrets; do base64 -d <<< $i; echo; done) $secrets" + local pods=$(kubectl -n "${NAMESPACE}" get pods -o name | awk -F "/" '{print $2}') collect_logs() { NS=$1 for p in $pods; do - containers=$(kubectl -n "$NS" get pod $p -o jsonpath='{.spec.containers[*].name}') + local containers=$(kubectl -n "$NS" get pod $p -o jsonpath='{.spec.containers[*].name}') for c in $containers; do kubectl -n "$NS" logs $p -c $c >${TEMP_DIR}/logs_output-$p-$c.txt echo logs saved in: ${TEMP_DIR}/logs_output-$p-$c.txt for pass in $passwords; do - count=$(grep -c --fixed-strings -- "$pass" ${TEMP_DIR}/logs_output-$p-$c.txt || :) + local count=$(grep -c --fixed-strings -- "$pass" ${TEMP_DIR}/logs_output-$p-$c.txt || :) if [[ $count != 0 ]]; then echo leaked passwords are found in log ${TEMP_DIR}/logs_output-$p-$c.txt false @@ -489,7 +514,77 @@ check_passwords_leak() { collect_logs $NAMESPACE if [ -n "$OPERATOR_NS" ]; then - pods=$(kubectl -n "${OPERATOR_NS}" get pods -o name | awk -F "/" '{print $2}') + local pods=$(kubectl -n "${OPERATOR_NS}" get pods -o name | awk -F "/" '{print $2}') collect_logs $OPERATOR_NS fi } + +deploy_chaos_mesh() { + destroy_chaos_mesh + + helm repo add chaos-mesh https://charts.chaos-mesh.org + helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/run/containerd/containerd.sock --set dashboard.create=false --version 2.5.1 + sleep 10 +} + +destroy_chaos_mesh() { + local chaos_mesh_ns=$(helm list --all-namespaces --filter chaos-mesh | tail -n1 | awk -F' ' '{print $2}' | sed 's/NAMESPACE//') + + for i in $(kubectl api-resources | grep chaos-mesh | awk '{print $1}'); do timeout 30 kubectl delete ${i} --all --all-namespaces || :; done + if [ -n "${chaos_mesh_ns}" ]; then + helm uninstall chaos-mesh --namespace ${chaos_mesh_ns} || : + fi + timeout 30 kubectl delete crd $(kubectl get crd | grep 'chaos-mesh.org' | awk '{print $1}') || : + timeout 30 kubectl delete clusterrolebinding $(kubectl get clusterrolebinding | grep 'chaos-mesh' | awk '{print $1}') || : + timeout 30 kubectl delete clusterrole $(kubectl get clusterrole | grep 'chaos-mesh' | awk '{print $1}') || : + timeout 30 kubectl delete MutatingWebhookConfiguration $(kubectl get MutatingWebhookConfiguration | grep 'chaos-mesh' | awk '{print $1}') || : + timeout 30 kubectl delete ValidatingWebhookConfiguration $(kubectl get ValidatingWebhookConfiguration | grep 'chaos-mesh' | awk '{print $1}') || : + timeout 30 kubectl delete ValidatingWebhookConfiguration $(kubectl get ValidatingWebhookConfiguration | grep 'validate-auth' | awk '{print $1}') || : +} + +kill_pods() { + local ns=$1 + local selector=$2 + local pod_label=$3 + local label_value=$4 + + if [ "${selector}" == "pod" ]; then + yq eval ' + .metadata.name = "chaos-pod-kill-'${RANDOM}'" | + del(.spec.selector.pods.test-namespace) | + .spec.selector.pods.'${ns}'[0] = "'${pod_label}'"' ${TESTS_CONFIG_DIR}/chaos-pod-kill.yml \ + | kubectl apply --namespace ${ns} -f - + elif [ "${selector}" == "label" ]; then + yq eval ' + .metadata.name = "chaos-kill-label-'${RANDOM}'" | + .spec.mode = "all" | + del(.spec.selector.pods) | + .spec.selector.labelSelectors."'${pod_label}'" = "'${label_value}'"' ${TESTS_CONFIG_DIR}/chaos-pod-kill.yml \ + | kubectl apply --namespace ${ns} -f - + fi + sleep 5 +} + +failure_pod() { + local ns=$1 + local pod=$2 + + yq eval ' + .metadata.name = "chaos-pod-failure-'${RANDOM}'" | + del(.spec.selector.pods.test-namespace) | + .spec.selector.pods.'${ns}'[0] = "'${pod}'"' ${TESTS_CONFIG_DIR}/chaos-pod-failure.yml \ + | kubectl apply --namespace ${ns} -f - + sleep 5 +} + +network_loss() { + local ns=$1 + local pod=$2 + + yq eval ' + .metadata.name = "chaos-pod-network-loss-'${RANDOM}'" | + del(.spec.selector.pods.test-namespace) | + .spec.selector.pods.'${ns}'[0] = "'${pod}'"' ${TESTS_CONFIG_DIR}/chaos-network-loss.yml \ + | kubectl apply --namespace ${ns} -f - + sleep 5 +} diff --git a/e2e-tests/run-distro.csv b/e2e-tests/run-distro.csv index 96e82793c..0f69e9dcd 100644 --- a/e2e-tests/run-distro.csv +++ b/e2e-tests/run-distro.csv @@ -12,6 +12,7 @@ haproxy init-deploy monitoring one-pod +operator-self-healing scaling service-per-pod sidecars diff --git a/e2e-tests/run-minikube.csv b/e2e-tests/run-minikube.csv index abe434b0c..ecd33a8b8 100644 --- a/e2e-tests/run-minikube.csv +++ b/e2e-tests/run-minikube.csv @@ -11,6 +11,7 @@ gr-tls-cert-manager haproxy init-deploy one-pod +operator-self-healing sidecars tls-cert-manager users diff --git a/e2e-tests/run-pr.csv b/e2e-tests/run-pr.csv index 23e41493f..29f70600d 100644 --- a/e2e-tests/run-pr.csv +++ b/e2e-tests/run-pr.csv @@ -16,6 +16,7 @@ init-deploy limits monitoring one-pod +operator-self-healing scaling service-per-pod sidecars diff --git a/e2e-tests/run-release.csv b/e2e-tests/run-release.csv index d34853d19..b894ac90e 100644 --- a/e2e-tests/run-release.csv +++ b/e2e-tests/run-release.csv @@ -15,6 +15,7 @@ init-deploy limits monitoring one-pod +operator-self-healing scaling service-per-pod sidecars diff --git a/e2e-tests/tests/operator-self-healing/00-assert.yaml b/e2e-tests/tests/operator-self-healing/00-assert.yaml new file mode 100644 index 000000000..d9146fe1b --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/00-assert.yaml @@ -0,0 +1,26 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: perconaservermysqls.ps.percona.com +spec: + group: ps.percona.com + names: + kind: PerconaServerMySQL + listKind: PerconaServerMySQLList + plural: perconaservermysqls + shortNames: + - ps + singular: perconaservermysql + scope: Namespaced +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: check-operator-deploy-status +timeout: 120 +commands: + - script: kubectl assert exist-enhanced deployment percona-server-mysql-operator -n ${OPERATOR_NS:-$NAMESPACE} --field-selector status.readyReplicas=1 diff --git a/e2e-tests/tests/operator-self-healing/00-deploy-operator.yaml b/e2e-tests/tests/operator-self-healing/00-deploy-operator.yaml new file mode 100644 index 000000000..67307fe5d --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/00-deploy-operator.yaml @@ -0,0 +1,14 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + deploy_operator + deploy_non_tls_cluster_secrets + deploy_tls_cluster_secrets + deploy_client diff --git a/e2e-tests/tests/operator-self-healing/01-assert.yaml b/e2e-tests/tests/operator-self-healing/01-assert.yaml new file mode 100644 index 000000000..9caa36184 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/01-assert.yaml @@ -0,0 +1,27 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chaos-controller-manager +spec: + replicas: 3 +status: + availableReplicas: 3 + readyReplicas: 3 + replicas: 3 + updatedReplicas: 3 +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: chaos-daemon +status: + currentNumberScheduled: 3 + desiredNumberScheduled: 3 + numberAvailable: 3 + numberMisscheduled: 0 + numberReady: 3 + updatedNumberScheduled: 3 diff --git a/e2e-tests/tests/operator-self-healing/01-deploy-chaos-mesh.yaml b/e2e-tests/tests/operator-self-healing/01-deploy-chaos-mesh.yaml new file mode 100644 index 000000000..2fcde5027 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/01-deploy-chaos-mesh.yaml @@ -0,0 +1,11 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + deploy_chaos_mesh diff --git a/e2e-tests/tests/operator-self-healing/02-assert.yaml b/e2e-tests/tests/operator-self-healing/02-assert.yaml new file mode 100644 index 000000000..143e658f6 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/02-assert.yaml @@ -0,0 +1,60 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 420 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-orc +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-haproxy +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: operator-self-healing + finalizers: + - delete-mysql-pods-in-order +status: + haproxy: + ready: 3 + size: 3 + state: ready + mysql: + ready: 3 + size: 3 + state: ready + orchestrator: + ready: 3 + size: 3 + state: ready + state: ready diff --git a/e2e-tests/tests/operator-self-healing/02-create-cluster.yaml b/e2e-tests/tests/operator-self-healing/02-create-cluster.yaml new file mode 100644 index 000000000..3dd3dea43 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/02-create-cluster.yaml @@ -0,0 +1,21 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + get_cr \ + | yq eval '.spec.mysql.clusterType="async"' - \ + | yq eval '.spec.mysql.size=3' - \ + | yq eval '.spec.mysql.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.proxy.haproxy.enabled=true' - \ + | yq eval '.spec.proxy.haproxy.size=3' - \ + | yq eval '.spec.proxy.haproxy.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.orchestrator.enabled=true' - \ + | yq eval '.spec.orchestrator.size=3' - \ + | yq eval '.spec.orchestrator.affinity.antiAffinityTopologyKey="none"' - \ + | kubectl -n "${NAMESPACE}" apply -f - diff --git a/e2e-tests/tests/operator-self-healing/03-write-data.yaml b/e2e-tests/tests/operator-self-healing/03-write-data.yaml new file mode 100644 index 000000000..bc82e7920 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/03-write-data.yaml @@ -0,0 +1,16 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_mysql \ + "CREATE DATABASE IF NOT EXISTS myDB; CREATE TABLE IF NOT EXISTS myDB.myTable (id int PRIMARY KEY)" \ + "-h $(get_haproxy_svc $(get_cluster_name)) -uroot -proot_password" + + run_mysql \ + "INSERT myDB.myTable (id) VALUES (100500)" \ + "-h $(get_haproxy_svc $(get_cluster_name)) -uroot -proot_password" diff --git a/e2e-tests/tests/operator-self-healing/04-assert.yaml b/e2e-tests/tests/operator-self-healing/04-assert.yaml new file mode 100644 index 000000000..8a8037060 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/04-assert.yaml @@ -0,0 +1,10 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 04-read-from-primary +data: + data: "100500" diff --git a/e2e-tests/tests/operator-self-healing/04-read-from-primary.yaml b/e2e-tests/tests/operator-self-healing/04-read-from-primary.yaml new file mode 100644 index 000000000..274332522 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/04-read-from-primary.yaml @@ -0,0 +1,13 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + data=$(run_mysql "SELECT * FROM myDB.myTable" "-h $(get_haproxy_svc $(get_cluster_name)) -uroot -proot_password") + + kubectl create configmap -n "${NAMESPACE}" 04-read-from-primary --from-literal=data="${data}" diff --git a/e2e-tests/tests/operator-self-healing/05-assert.yaml b/e2e-tests/tests/operator-self-healing/05-assert.yaml new file mode 100644 index 000000000..d9146fe1b --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/05-assert.yaml @@ -0,0 +1,26 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: perconaservermysqls.ps.percona.com +spec: + group: ps.percona.com + names: + kind: PerconaServerMySQL + listKind: PerconaServerMySQLList + plural: perconaservermysqls + shortNames: + - ps + singular: perconaservermysql + scope: Namespaced +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: check-operator-deploy-status +timeout: 120 +commands: + - script: kubectl assert exist-enhanced deployment percona-server-mysql-operator -n ${OPERATOR_NS:-$NAMESPACE} --field-selector status.readyReplicas=1 diff --git a/e2e-tests/tests/operator-self-healing/05-kill-pod.yaml b/e2e-tests/tests/operator-self-healing/05-kill-pod.yaml new file mode 100644 index 000000000..8304e98cc --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/05-kill-pod.yaml @@ -0,0 +1,19 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + init_pod=$(get_operator_pod) + kill_pods "${OPERATOR_NS:-$NAMESPACE}" "pod" "$init_pod" + sleep 10 # wait a bit for pod to be killed + wait_deployment percona-server-mysql-operator "${OPERATOR_NS:-$NAMESPACE}" + + if [ "$init_pod" == "$(get_operator_pod)" ]; then + echo "operator pod was not killed! something went wrong." + exit 1 + fi diff --git a/e2e-tests/tests/operator-self-healing/06-assert.yaml b/e2e-tests/tests/operator-self-healing/06-assert.yaml new file mode 100644 index 000000000..f5a696df9 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/06-assert.yaml @@ -0,0 +1,60 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 420 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-orc +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-haproxy +status: + observedGeneration: 2 + replicas: 5 + readyReplicas: 5 + currentReplicas: 5 + updatedReplicas: 5 + collisionCount: 0 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: operator-self-healing + finalizers: + - delete-mysql-pods-in-order +status: + haproxy: + ready: 5 + size: 5 + state: ready + mysql: + ready: 3 + size: 3 + state: ready + orchestrator: + ready: 3 + size: 3 + state: ready + state: ready diff --git a/e2e-tests/tests/operator-self-healing/06-scale-up.yaml b/e2e-tests/tests/operator-self-healing/06-scale-up.yaml new file mode 100644 index 000000000..aaa7cdb70 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/06-scale-up.yaml @@ -0,0 +1,21 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + get_cr \ + | yq eval '.spec.mysql.clusterType="async"' - \ + | yq eval '.spec.mysql.size=3' - \ + | yq eval '.spec.mysql.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.proxy.haproxy.enabled=true' - \ + | yq eval '.spec.proxy.haproxy.size=5' - \ + | yq eval '.spec.proxy.haproxy.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.orchestrator.enabled=true' - \ + | yq eval '.spec.orchestrator.size=3' - \ + | yq eval '.spec.orchestrator.affinity.antiAffinityTopologyKey="none"' - \ + | kubectl -n "${NAMESPACE}" apply -f - diff --git a/e2e-tests/tests/operator-self-healing/07-assert.yaml b/e2e-tests/tests/operator-self-healing/07-assert.yaml new file mode 100644 index 000000000..d9146fe1b --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/07-assert.yaml @@ -0,0 +1,26 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: perconaservermysqls.ps.percona.com +spec: + group: ps.percona.com + names: + kind: PerconaServerMySQL + listKind: PerconaServerMySQLList + plural: perconaservermysqls + shortNames: + - ps + singular: perconaservermysql + scope: Namespaced +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: check-operator-deploy-status +timeout: 120 +commands: + - script: kubectl assert exist-enhanced deployment percona-server-mysql-operator -n ${OPERATOR_NS:-$NAMESPACE} --field-selector status.readyReplicas=1 diff --git a/e2e-tests/tests/operator-self-healing/07-network-loss.yaml b/e2e-tests/tests/operator-self-healing/07-network-loss.yaml new file mode 100644 index 000000000..b19885cdd --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/07-network-loss.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + network_loss "${OPERATOR_NS:-$NAMESPACE}" "$(get_operator_pod)" + sleep 30 # wait for network loss to happen diff --git a/e2e-tests/tests/operator-self-healing/08-assert.yaml b/e2e-tests/tests/operator-self-healing/08-assert.yaml new file mode 100644 index 000000000..b4e8f08af --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/08-assert.yaml @@ -0,0 +1,60 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-orc +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-haproxy +status: + observedGeneration: 3 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: operator-self-healing + finalizers: + - delete-mysql-pods-in-order +status: + haproxy: + ready: 3 + size: 3 + state: ready + mysql: + ready: 3 + size: 3 + state: ready + orchestrator: + ready: 3 + size: 3 + state: ready + state: ready diff --git a/e2e-tests/tests/operator-self-healing/08-scale-down.yaml b/e2e-tests/tests/operator-self-healing/08-scale-down.yaml new file mode 100644 index 000000000..3dd3dea43 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/08-scale-down.yaml @@ -0,0 +1,21 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + get_cr \ + | yq eval '.spec.mysql.clusterType="async"' - \ + | yq eval '.spec.mysql.size=3' - \ + | yq eval '.spec.mysql.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.proxy.haproxy.enabled=true' - \ + | yq eval '.spec.proxy.haproxy.size=3' - \ + | yq eval '.spec.proxy.haproxy.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.orchestrator.enabled=true' - \ + | yq eval '.spec.orchestrator.size=3' - \ + | yq eval '.spec.orchestrator.affinity.antiAffinityTopologyKey="none"' - \ + | kubectl -n "${NAMESPACE}" apply -f - diff --git a/e2e-tests/tests/operator-self-healing/09-assert.yaml b/e2e-tests/tests/operator-self-healing/09-assert.yaml new file mode 100644 index 000000000..d9146fe1b --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/09-assert.yaml @@ -0,0 +1,26 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: perconaservermysqls.ps.percona.com +spec: + group: ps.percona.com + names: + kind: PerconaServerMySQL + listKind: PerconaServerMySQLList + plural: perconaservermysqls + shortNames: + - ps + singular: perconaservermysql + scope: Namespaced +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: check-operator-deploy-status +timeout: 120 +commands: + - script: kubectl assert exist-enhanced deployment percona-server-mysql-operator -n ${OPERATOR_NS:-$NAMESPACE} --field-selector status.readyReplicas=1 diff --git a/e2e-tests/tests/operator-self-healing/09-pod-failure.yaml b/e2e-tests/tests/operator-self-healing/09-pod-failure.yaml new file mode 100644 index 000000000..e37ba4dd8 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/09-pod-failure.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + failure_pod "${OPERATOR_NS:-$NAMESPACE}" "$(get_operator_pod)" + sleep 30 # wait for pod failure to happen diff --git a/e2e-tests/tests/operator-self-healing/10-assert.yaml b/e2e-tests/tests/operator-self-healing/10-assert.yaml new file mode 100644 index 000000000..c3121eccc --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/10-assert.yaml @@ -0,0 +1,60 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 420 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-orc +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: operator-self-healing-haproxy +status: + observedGeneration: 4 + replicas: 5 + readyReplicas: 5 + currentReplicas: 5 + updatedReplicas: 5 + collisionCount: 0 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: operator-self-healing + finalizers: + - delete-mysql-pods-in-order +status: + haproxy: + ready: 5 + size: 5 + state: ready + mysql: + ready: 3 + size: 3 + state: ready + orchestrator: + ready: 3 + size: 3 + state: ready + state: ready diff --git a/e2e-tests/tests/operator-self-healing/10-scale-up.yaml b/e2e-tests/tests/operator-self-healing/10-scale-up.yaml new file mode 100644 index 000000000..aaa7cdb70 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/10-scale-up.yaml @@ -0,0 +1,21 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + get_cr \ + | yq eval '.spec.mysql.clusterType="async"' - \ + | yq eval '.spec.mysql.size=3' - \ + | yq eval '.spec.mysql.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.proxy.haproxy.enabled=true' - \ + | yq eval '.spec.proxy.haproxy.size=5' - \ + | yq eval '.spec.proxy.haproxy.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.orchestrator.enabled=true' - \ + | yq eval '.spec.orchestrator.size=3' - \ + | yq eval '.spec.orchestrator.affinity.antiAffinityTopologyKey="none"' - \ + | kubectl -n "${NAMESPACE}" apply -f - diff --git a/e2e-tests/tests/operator-self-healing/11-destroy-chaos-mesh.yaml b/e2e-tests/tests/operator-self-healing/11-destroy-chaos-mesh.yaml new file mode 100644 index 000000000..3f0cbc6b8 --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/11-destroy-chaos-mesh.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 120 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + destroy_chaos_mesh + timeout: 120 diff --git a/e2e-tests/tests/operator-self-healing/12-drop-finalizer.yaml b/e2e-tests/tests/operator-self-healing/12-drop-finalizer.yaml new file mode 100644 index 000000000..73ab6351b --- /dev/null +++ b/e2e-tests/tests/operator-self-healing/12-drop-finalizer.yaml @@ -0,0 +1,5 @@ +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: operator-self-healing + finalizers: [] From c07ad533ee47addf205f8f70b26fc18b08f59ea9 Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Thu, 10 Aug 2023 18:17:10 +0200 Subject: [PATCH 02/10] K8SPS-73 - Add self-healing test --- e2e-tests/functions | 11 ++- e2e-tests/run-distro.csv | 1 + e2e-tests/run-minikube.csv | 1 + e2e-tests/run-pr.csv | 1 + e2e-tests/run-release.csv | 1 + e2e-tests/tests/self-healing/00-assert.yaml | 26 ++++++ .../self-healing/00-deploy-operator.yaml | 14 +++ e2e-tests/tests/self-healing/01-assert.yaml | 27 ++++++ .../self-healing/01-deploy-chaos-mesh.yaml | 11 +++ e2e-tests/tests/self-healing/02-assert.yaml | 60 +++++++++++++ .../tests/self-healing/02-create-cluster.yaml | 21 +++++ .../tests/self-healing/03-write-data.yaml | 16 ++++ e2e-tests/tests/self-healing/04-assert.yaml | 10 +++ .../self-healing/04-read-from-primary.yaml | 13 +++ e2e-tests/tests/self-healing/05-assert.yaml | 79 +++++++++++++++++ .../tests/self-healing/05-kill-primary.yaml | 18 ++++ .../tests/self-healing/06-write-data.yaml | 12 +++ e2e-tests/tests/self-healing/07-assert.yaml | 30 +++++++ .../self-healing/07-read-from-replicas.yaml | 15 ++++ e2e-tests/tests/self-healing/08-assert.yaml | 82 ++++++++++++++++++ .../self-healing/08-failure-primary.yaml | 12 +++ .../tests/self-healing/09-write-data.yaml | 12 +++ e2e-tests/tests/self-healing/10-assert.yaml | 33 +++++++ .../self-healing/10-read-from-replicas.yaml | 15 ++++ e2e-tests/tests/self-healing/11-assert.yaml | 86 +++++++++++++++++++ .../self-healing/11-network-loss-primary.yaml | 13 +++ .../tests/self-healing/12-write-data.yaml | 12 +++ e2e-tests/tests/self-healing/13-assert.yaml | 36 ++++++++ .../self-healing/13-read-from-replicas.yaml | 15 ++++ e2e-tests/tests/self-healing/14-assert.yaml | 79 +++++++++++++++++ .../tests/self-healing/14-cluster-crash.yaml | 12 +++ .../tests/self-healing/15-write-data.yaml | 12 +++ e2e-tests/tests/self-healing/16-assert.yaml | 39 +++++++++ .../self-healing/16-read-from-replicas.yaml | 15 ++++ .../self-healing/17-destroy-chaos-mesh.yaml | 12 +++ .../tests/self-healing/18-drop-finalizer.yaml | 5 ++ 36 files changed, 853 insertions(+), 4 deletions(-) create mode 100644 e2e-tests/tests/self-healing/00-assert.yaml create mode 100644 e2e-tests/tests/self-healing/00-deploy-operator.yaml create mode 100644 e2e-tests/tests/self-healing/01-assert.yaml create mode 100644 e2e-tests/tests/self-healing/01-deploy-chaos-mesh.yaml create mode 100644 e2e-tests/tests/self-healing/02-assert.yaml create mode 100644 e2e-tests/tests/self-healing/02-create-cluster.yaml create mode 100644 e2e-tests/tests/self-healing/03-write-data.yaml create mode 100644 e2e-tests/tests/self-healing/04-assert.yaml create mode 100644 e2e-tests/tests/self-healing/04-read-from-primary.yaml create mode 100644 e2e-tests/tests/self-healing/05-assert.yaml create mode 100644 e2e-tests/tests/self-healing/05-kill-primary.yaml create mode 100644 e2e-tests/tests/self-healing/06-write-data.yaml create mode 100644 e2e-tests/tests/self-healing/07-assert.yaml create mode 100644 e2e-tests/tests/self-healing/07-read-from-replicas.yaml create mode 100644 e2e-tests/tests/self-healing/08-assert.yaml create mode 100644 e2e-tests/tests/self-healing/08-failure-primary.yaml create mode 100644 e2e-tests/tests/self-healing/09-write-data.yaml create mode 100644 e2e-tests/tests/self-healing/10-assert.yaml create mode 100644 e2e-tests/tests/self-healing/10-read-from-replicas.yaml create mode 100644 e2e-tests/tests/self-healing/11-assert.yaml create mode 100644 e2e-tests/tests/self-healing/11-network-loss-primary.yaml create mode 100644 e2e-tests/tests/self-healing/12-write-data.yaml create mode 100644 e2e-tests/tests/self-healing/13-assert.yaml create mode 100644 e2e-tests/tests/self-healing/13-read-from-replicas.yaml create mode 100644 e2e-tests/tests/self-healing/14-assert.yaml create mode 100644 e2e-tests/tests/self-healing/14-cluster-crash.yaml create mode 100644 e2e-tests/tests/self-healing/15-write-data.yaml create mode 100644 e2e-tests/tests/self-healing/16-assert.yaml create mode 100644 e2e-tests/tests/self-healing/16-read-from-replicas.yaml create mode 100644 e2e-tests/tests/self-healing/17-destroy-chaos-mesh.yaml create mode 100644 e2e-tests/tests/self-healing/18-drop-finalizer.yaml diff --git a/e2e-tests/functions b/e2e-tests/functions index 86e106d9a..117989929 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -547,16 +547,17 @@ kill_pods() { local selector=$2 local pod_label=$3 local label_value=$4 + local chaos_suffix=$5 if [ "${selector}" == "pod" ]; then yq eval ' - .metadata.name = "chaos-pod-kill-'${RANDOM}'" | + .metadata.name = "chaos-pod-kill-'${chaos_suffix}'" | del(.spec.selector.pods.test-namespace) | .spec.selector.pods.'${ns}'[0] = "'${pod_label}'"' ${TESTS_CONFIG_DIR}/chaos-pod-kill.yml \ | kubectl apply --namespace ${ns} -f - elif [ "${selector}" == "label" ]; then yq eval ' - .metadata.name = "chaos-kill-label-'${RANDOM}'" | + .metadata.name = "chaos-kill-label-'${chaos_suffix}'" | .spec.mode = "all" | del(.spec.selector.pods) | .spec.selector.labelSelectors."'${pod_label}'" = "'${label_value}'"' ${TESTS_CONFIG_DIR}/chaos-pod-kill.yml \ @@ -568,9 +569,10 @@ kill_pods() { failure_pod() { local ns=$1 local pod=$2 + local chaos_suffix=$3 yq eval ' - .metadata.name = "chaos-pod-failure-'${RANDOM}'" | + .metadata.name = "chaos-pod-failure-'${chaos_suffix}'" | del(.spec.selector.pods.test-namespace) | .spec.selector.pods.'${ns}'[0] = "'${pod}'"' ${TESTS_CONFIG_DIR}/chaos-pod-failure.yml \ | kubectl apply --namespace ${ns} -f - @@ -580,9 +582,10 @@ failure_pod() { network_loss() { local ns=$1 local pod=$2 + local chaos_suffix=$3 yq eval ' - .metadata.name = "chaos-pod-network-loss-'${RANDOM}'" | + .metadata.name = "chaos-pod-network-loss-'${chaos_suffix}'" | del(.spec.selector.pods.test-namespace) | .spec.selector.pods.'${ns}'[0] = "'${pod}'"' ${TESTS_CONFIG_DIR}/chaos-network-loss.yml \ | kubectl apply --namespace ${ns} -f - diff --git a/e2e-tests/run-distro.csv b/e2e-tests/run-distro.csv index 0f69e9dcd..bf954881e 100644 --- a/e2e-tests/run-distro.csv +++ b/e2e-tests/run-distro.csv @@ -14,6 +14,7 @@ monitoring one-pod operator-self-healing scaling +self-healing service-per-pod sidecars tls-cert-manager diff --git a/e2e-tests/run-minikube.csv b/e2e-tests/run-minikube.csv index ecd33a8b8..c6c9b46a4 100644 --- a/e2e-tests/run-minikube.csv +++ b/e2e-tests/run-minikube.csv @@ -12,6 +12,7 @@ haproxy init-deploy one-pod operator-self-healing +self-healing sidecars tls-cert-manager users diff --git a/e2e-tests/run-pr.csv b/e2e-tests/run-pr.csv index 29f70600d..828a2afd7 100644 --- a/e2e-tests/run-pr.csv +++ b/e2e-tests/run-pr.csv @@ -18,6 +18,7 @@ monitoring one-pod operator-self-healing scaling +self-healing service-per-pod sidecars tls-cert-manager diff --git a/e2e-tests/run-release.csv b/e2e-tests/run-release.csv index b894ac90e..c7adf1e91 100644 --- a/e2e-tests/run-release.csv +++ b/e2e-tests/run-release.csv @@ -17,6 +17,7 @@ monitoring one-pod operator-self-healing scaling +self-healing service-per-pod sidecars tls-cert-manager diff --git a/e2e-tests/tests/self-healing/00-assert.yaml b/e2e-tests/tests/self-healing/00-assert.yaml new file mode 100644 index 000000000..d9146fe1b --- /dev/null +++ b/e2e-tests/tests/self-healing/00-assert.yaml @@ -0,0 +1,26 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: perconaservermysqls.ps.percona.com +spec: + group: ps.percona.com + names: + kind: PerconaServerMySQL + listKind: PerconaServerMySQLList + plural: perconaservermysqls + shortNames: + - ps + singular: perconaservermysql + scope: Namespaced +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: check-operator-deploy-status +timeout: 120 +commands: + - script: kubectl assert exist-enhanced deployment percona-server-mysql-operator -n ${OPERATOR_NS:-$NAMESPACE} --field-selector status.readyReplicas=1 diff --git a/e2e-tests/tests/self-healing/00-deploy-operator.yaml b/e2e-tests/tests/self-healing/00-deploy-operator.yaml new file mode 100644 index 000000000..67307fe5d --- /dev/null +++ b/e2e-tests/tests/self-healing/00-deploy-operator.yaml @@ -0,0 +1,14 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + deploy_operator + deploy_non_tls_cluster_secrets + deploy_tls_cluster_secrets + deploy_client diff --git a/e2e-tests/tests/self-healing/01-assert.yaml b/e2e-tests/tests/self-healing/01-assert.yaml new file mode 100644 index 000000000..9caa36184 --- /dev/null +++ b/e2e-tests/tests/self-healing/01-assert.yaml @@ -0,0 +1,27 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chaos-controller-manager +spec: + replicas: 3 +status: + availableReplicas: 3 + readyReplicas: 3 + replicas: 3 + updatedReplicas: 3 +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: chaos-daemon +status: + currentNumberScheduled: 3 + desiredNumberScheduled: 3 + numberAvailable: 3 + numberMisscheduled: 0 + numberReady: 3 + updatedNumberScheduled: 3 diff --git a/e2e-tests/tests/self-healing/01-deploy-chaos-mesh.yaml b/e2e-tests/tests/self-healing/01-deploy-chaos-mesh.yaml new file mode 100644 index 000000000..2fcde5027 --- /dev/null +++ b/e2e-tests/tests/self-healing/01-deploy-chaos-mesh.yaml @@ -0,0 +1,11 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + deploy_chaos_mesh diff --git a/e2e-tests/tests/self-healing/02-assert.yaml b/e2e-tests/tests/self-healing/02-assert.yaml new file mode 100644 index 000000000..b689bcc78 --- /dev/null +++ b/e2e-tests/tests/self-healing/02-assert.yaml @@ -0,0 +1,60 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 420 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-orc +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-haproxy +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: self-healing + finalizers: + - delete-mysql-pods-in-order +status: + haproxy: + ready: 3 + size: 3 + state: ready + mysql: + ready: 3 + size: 3 + state: ready + orchestrator: + ready: 3 + size: 3 + state: ready + state: ready diff --git a/e2e-tests/tests/self-healing/02-create-cluster.yaml b/e2e-tests/tests/self-healing/02-create-cluster.yaml new file mode 100644 index 000000000..3dd3dea43 --- /dev/null +++ b/e2e-tests/tests/self-healing/02-create-cluster.yaml @@ -0,0 +1,21 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + get_cr \ + | yq eval '.spec.mysql.clusterType="async"' - \ + | yq eval '.spec.mysql.size=3' - \ + | yq eval '.spec.mysql.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.proxy.haproxy.enabled=true' - \ + | yq eval '.spec.proxy.haproxy.size=3' - \ + | yq eval '.spec.proxy.haproxy.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.orchestrator.enabled=true' - \ + | yq eval '.spec.orchestrator.size=3' - \ + | yq eval '.spec.orchestrator.affinity.antiAffinityTopologyKey="none"' - \ + | kubectl -n "${NAMESPACE}" apply -f - diff --git a/e2e-tests/tests/self-healing/03-write-data.yaml b/e2e-tests/tests/self-healing/03-write-data.yaml new file mode 100644 index 000000000..bc82e7920 --- /dev/null +++ b/e2e-tests/tests/self-healing/03-write-data.yaml @@ -0,0 +1,16 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_mysql \ + "CREATE DATABASE IF NOT EXISTS myDB; CREATE TABLE IF NOT EXISTS myDB.myTable (id int PRIMARY KEY)" \ + "-h $(get_haproxy_svc $(get_cluster_name)) -uroot -proot_password" + + run_mysql \ + "INSERT myDB.myTable (id) VALUES (100500)" \ + "-h $(get_haproxy_svc $(get_cluster_name)) -uroot -proot_password" diff --git a/e2e-tests/tests/self-healing/04-assert.yaml b/e2e-tests/tests/self-healing/04-assert.yaml new file mode 100644 index 000000000..8a8037060 --- /dev/null +++ b/e2e-tests/tests/self-healing/04-assert.yaml @@ -0,0 +1,10 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 04-read-from-primary +data: + data: "100500" diff --git a/e2e-tests/tests/self-healing/04-read-from-primary.yaml b/e2e-tests/tests/self-healing/04-read-from-primary.yaml new file mode 100644 index 000000000..274332522 --- /dev/null +++ b/e2e-tests/tests/self-healing/04-read-from-primary.yaml @@ -0,0 +1,13 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + data=$(run_mysql "SELECT * FROM myDB.myTable" "-h $(get_haproxy_svc $(get_cluster_name)) -uroot -proot_password") + + kubectl create configmap -n "${NAMESPACE}" 04-read-from-primary --from-literal=data="${data}" diff --git a/e2e-tests/tests/self-healing/05-assert.yaml b/e2e-tests/tests/self-healing/05-assert.yaml new file mode 100644 index 000000000..c33cb0248 --- /dev/null +++ b/e2e-tests/tests/self-healing/05-assert.yaml @@ -0,0 +1,79 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-orc +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-haproxy +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: self-healing + finalizers: + - delete-mysql-pods-in-order +status: + haproxy: + ready: 3 + size: 3 + state: ready + mysql: + ready: 3 + size: 3 + state: ready + orchestrator: + ready: 3 + size: 3 + state: ready + state: ready +--- +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: chaos-pod-kill-primary +spec: + action: pod-kill + mode: one +status: + experiment: + containerRecords: + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + desiredPhase: Run diff --git a/e2e-tests/tests/self-healing/05-kill-primary.yaml b/e2e-tests/tests/self-healing/05-kill-primary.yaml new file mode 100644 index 000000000..1dc699052 --- /dev/null +++ b/e2e-tests/tests/self-healing/05-kill-primary.yaml @@ -0,0 +1,18 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + init_pod="$(get_primary_from_haproxy ${test_name}-haproxy-0)" + kill_pods "${NAMESPACE}" "pod" "$init_pod" "" "primary" + sleep 10 # wait a bit for pod to be killed + + if [ "$init_pod" == "$(get_primary_from_haproxy ${test_name}-haproxy-0)" ]; then + echo "primary pod was not killed! something went wrong." + exit 1 + fi diff --git a/e2e-tests/tests/self-healing/06-write-data.yaml b/e2e-tests/tests/self-healing/06-write-data.yaml new file mode 100644 index 000000000..dada31537 --- /dev/null +++ b/e2e-tests/tests/self-healing/06-write-data.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_mysql \ + "INSERT myDB.myTable (id) VALUES (100501)" \ + "-h $(get_haproxy_svc $(get_cluster_name)) -uroot -proot_password" diff --git a/e2e-tests/tests/self-healing/07-assert.yaml b/e2e-tests/tests/self-healing/07-assert.yaml new file mode 100644 index 000000000..d5acf9414 --- /dev/null +++ b/e2e-tests/tests/self-healing/07-assert.yaml @@ -0,0 +1,30 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 07-read-from-replicas-0 +data: + data: |- + 100500 + 100501 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 07-read-from-replicas-1 +data: + data: |- + 100500 + 100501 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 07-read-from-replicas-2 +data: + data: |- + 100500 + 100501 diff --git a/e2e-tests/tests/self-healing/07-read-from-replicas.yaml b/e2e-tests/tests/self-healing/07-read-from-replicas.yaml new file mode 100644 index 000000000..30f4d2649 --- /dev/null +++ b/e2e-tests/tests/self-healing/07-read-from-replicas.yaml @@ -0,0 +1,15 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + for i in 0 1 2; do + host=$(get_mysql_headless_fqdn $(get_cluster_name) $i) + data=$(run_mysql "SELECT * FROM myDB.myTable" "-h ${host} -uroot -proot_password") + kubectl create configmap -n "${NAMESPACE}" 07-read-from-replicas-${i} --from-literal=data="${data}" + done diff --git a/e2e-tests/tests/self-healing/08-assert.yaml b/e2e-tests/tests/self-healing/08-assert.yaml new file mode 100644 index 000000000..31bcaec8d --- /dev/null +++ b/e2e-tests/tests/self-healing/08-assert.yaml @@ -0,0 +1,82 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-orc +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-haproxy +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: self-healing + finalizers: + - delete-mysql-pods-in-order +status: + haproxy: + ready: 3 + size: 3 + state: ready + mysql: + ready: 3 + size: 3 + state: ready + orchestrator: + ready: 3 + size: 3 + state: ready + state: ready +--- +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: chaos-pod-failure-primary +spec: + action: pod-failure + duration: 60s + mode: one +status: + experiment: + containerRecords: + - events: + - operation: Apply + type: Succeeded + - operation: Recover + type: Succeeded + injectedCount: 1 + phase: Not Injected + recoveredCount: 1 + selectorKey: . + desiredPhase: Stop diff --git a/e2e-tests/tests/self-healing/08-failure-primary.yaml b/e2e-tests/tests/self-healing/08-failure-primary.yaml new file mode 100644 index 000000000..b159a17c4 --- /dev/null +++ b/e2e-tests/tests/self-healing/08-failure-primary.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + failure_pod "${NAMESPACE}" "$(get_primary_from_haproxy ${test_name}-haproxy-0)" "primary" + sleep 10 # wait a bit for pod to be killed diff --git a/e2e-tests/tests/self-healing/09-write-data.yaml b/e2e-tests/tests/self-healing/09-write-data.yaml new file mode 100644 index 000000000..b84c28860 --- /dev/null +++ b/e2e-tests/tests/self-healing/09-write-data.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_mysql \ + "INSERT myDB.myTable (id) VALUES (100502)" \ + "-h $(get_haproxy_svc $(get_cluster_name)) -uroot -proot_password" diff --git a/e2e-tests/tests/self-healing/10-assert.yaml b/e2e-tests/tests/self-healing/10-assert.yaml new file mode 100644 index 000000000..2f9ba0826 --- /dev/null +++ b/e2e-tests/tests/self-healing/10-assert.yaml @@ -0,0 +1,33 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 10-read-from-replicas-0 +data: + data: |- + 100500 + 100501 + 100502 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 10-read-from-replicas-1 +data: + data: |- + 100500 + 100501 + 100502 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 10-read-from-replicas-2 +data: + data: |- + 100500 + 100501 + 100502 diff --git a/e2e-tests/tests/self-healing/10-read-from-replicas.yaml b/e2e-tests/tests/self-healing/10-read-from-replicas.yaml new file mode 100644 index 000000000..55a419ab7 --- /dev/null +++ b/e2e-tests/tests/self-healing/10-read-from-replicas.yaml @@ -0,0 +1,15 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + for i in 0 1 2; do + host=$(get_mysql_headless_fqdn $(get_cluster_name) $i) + data=$(run_mysql "SELECT * FROM myDB.myTable" "-h ${host} -uroot -proot_password") + kubectl create configmap -n "${NAMESPACE}" 10-read-from-replicas-${i} --from-literal=data="${data}" + done diff --git a/e2e-tests/tests/self-healing/11-assert.yaml b/e2e-tests/tests/self-healing/11-assert.yaml new file mode 100644 index 000000000..a870f75bf --- /dev/null +++ b/e2e-tests/tests/self-healing/11-assert.yaml @@ -0,0 +1,86 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-orc +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-haproxy +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: self-healing + finalizers: + - delete-mysql-pods-in-order +status: + haproxy: + ready: 3 + size: 3 + state: ready + mysql: + ready: 3 + size: 3 + state: ready + orchestrator: + ready: 3 + size: 3 + state: ready + state: ready +--- +apiVersion: chaos-mesh.org/v1alpha1 +kind: NetworkChaos +metadata: + name: chaos-pod-network-loss-primary +spec: + action: loss + direction: to + duration: 60s + loss: + correlation: "100" + loss: "100" + mode: one +status: + experiment: + containerRecords: + - events: + - operation: Apply + type: Succeeded + - operation: Recover + type: Succeeded + injectedCount: 1 + phase: Not Injected + recoveredCount: 1 + selectorKey: . + desiredPhase: Stop diff --git a/e2e-tests/tests/self-healing/11-network-loss-primary.yaml b/e2e-tests/tests/self-healing/11-network-loss-primary.yaml new file mode 100644 index 000000000..599a9d82e --- /dev/null +++ b/e2e-tests/tests/self-healing/11-network-loss-primary.yaml @@ -0,0 +1,13 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 90 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + network_loss "${NAMESPACE}" "$(get_primary_from_haproxy ${test_name}-haproxy-0)" "primary" + sleep 30 # wait for new master to get elected + timeout: 90 diff --git a/e2e-tests/tests/self-healing/12-write-data.yaml b/e2e-tests/tests/self-healing/12-write-data.yaml new file mode 100644 index 000000000..3dce72866 --- /dev/null +++ b/e2e-tests/tests/self-healing/12-write-data.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_mysql \ + "INSERT myDB.myTable (id) VALUES (100503)" \ + "-h $(get_haproxy_svc $(get_cluster_name)) -uroot -proot_password" diff --git a/e2e-tests/tests/self-healing/13-assert.yaml b/e2e-tests/tests/self-healing/13-assert.yaml new file mode 100644 index 000000000..7d2f48cde --- /dev/null +++ b/e2e-tests/tests/self-healing/13-assert.yaml @@ -0,0 +1,36 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 13-read-from-replicas-0 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 13-read-from-replicas-1 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 13-read-from-replicas-2 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 diff --git a/e2e-tests/tests/self-healing/13-read-from-replicas.yaml b/e2e-tests/tests/self-healing/13-read-from-replicas.yaml new file mode 100644 index 000000000..77b99efc0 --- /dev/null +++ b/e2e-tests/tests/self-healing/13-read-from-replicas.yaml @@ -0,0 +1,15 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + for i in 0 1 2; do + host=$(get_mysql_headless_fqdn $(get_cluster_name) $i) + data=$(run_mysql "SELECT * FROM myDB.myTable" "-h ${host} -uroot -proot_password") + kubectl create configmap -n "${NAMESPACE}" 13-read-from-replicas-${i} --from-literal=data="${data}" + done diff --git a/e2e-tests/tests/self-healing/14-assert.yaml b/e2e-tests/tests/self-healing/14-assert.yaml new file mode 100644 index 000000000..bb14e7f81 --- /dev/null +++ b/e2e-tests/tests/self-healing/14-assert.yaml @@ -0,0 +1,79 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-orc +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: self-healing-haproxy +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: self-healing + finalizers: + - delete-mysql-pods-in-order +status: + haproxy: + ready: 3 + size: 3 + state: ready + mysql: + ready: 3 + size: 3 + state: ready + orchestrator: + ready: 3 + size: 3 + state: ready + state: ready +--- +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: chaos-kill-label-cluster-crash +spec: + action: pod-kill + mode: all +status: + experiment: + containerRecords: + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + desiredPhase: Run diff --git a/e2e-tests/tests/self-healing/14-cluster-crash.yaml b/e2e-tests/tests/self-healing/14-cluster-crash.yaml new file mode 100644 index 000000000..f4890c894 --- /dev/null +++ b/e2e-tests/tests/self-healing/14-cluster-crash.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + kill_pods "${NAMESPACE}" "label" "app.kubernetes.io/instance" "self-healing" "cluster-crash" + sleep 30 # wait for crash diff --git a/e2e-tests/tests/self-healing/15-write-data.yaml b/e2e-tests/tests/self-healing/15-write-data.yaml new file mode 100644 index 000000000..9c5791f6d --- /dev/null +++ b/e2e-tests/tests/self-healing/15-write-data.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_mysql \ + "INSERT myDB.myTable (id) VALUES (100504)" \ + "-h $(get_haproxy_svc $(get_cluster_name)) -uroot -proot_password" diff --git a/e2e-tests/tests/self-healing/16-assert.yaml b/e2e-tests/tests/self-healing/16-assert.yaml new file mode 100644 index 000000000..36febd696 --- /dev/null +++ b/e2e-tests/tests/self-healing/16-assert.yaml @@ -0,0 +1,39 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 16-read-from-replicas-0 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 + 100504 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 16-read-from-replicas-1 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 + 100504 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 16-read-from-replicas-2 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 + 100504 diff --git a/e2e-tests/tests/self-healing/16-read-from-replicas.yaml b/e2e-tests/tests/self-healing/16-read-from-replicas.yaml new file mode 100644 index 000000000..ab4cfa84c --- /dev/null +++ b/e2e-tests/tests/self-healing/16-read-from-replicas.yaml @@ -0,0 +1,15 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + for i in 0 1 2; do + host=$(get_mysql_headless_fqdn $(get_cluster_name) $i) + data=$(run_mysql "SELECT * FROM myDB.myTable" "-h ${host} -uroot -proot_password") + kubectl create configmap -n "${NAMESPACE}" 16-read-from-replicas-${i} --from-literal=data="${data}" + done diff --git a/e2e-tests/tests/self-healing/17-destroy-chaos-mesh.yaml b/e2e-tests/tests/self-healing/17-destroy-chaos-mesh.yaml new file mode 100644 index 000000000..3f0cbc6b8 --- /dev/null +++ b/e2e-tests/tests/self-healing/17-destroy-chaos-mesh.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 120 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + destroy_chaos_mesh + timeout: 120 diff --git a/e2e-tests/tests/self-healing/18-drop-finalizer.yaml b/e2e-tests/tests/self-healing/18-drop-finalizer.yaml new file mode 100644 index 000000000..38885e7ef --- /dev/null +++ b/e2e-tests/tests/self-healing/18-drop-finalizer.yaml @@ -0,0 +1,5 @@ +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: self-healing + finalizers: [] From 85fe299cad47ed770a084e0936349497d47c4f34 Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Mon, 14 Aug 2023 18:46:33 +0200 Subject: [PATCH 03/10] K8SPS-73 - Add gr-self-healing test --- Jenkinsfile | 1 + e2e-tests/functions | 4 + e2e-tests/run-distro.csv | 1 + e2e-tests/run-minikube.csv | 1 + e2e-tests/run-pr.csv | 1 + e2e-tests/run-release.csv | 1 + .../tests/gr-self-healing/00-assert.yaml | 26 +++ .../gr-self-healing/00-deploy-operator.yaml | 14 ++ .../tests/gr-self-healing/01-assert.yaml | 27 +++ .../gr-self-healing/01-deploy-chaos-mesh.yaml | 11 ++ .../tests/gr-self-healing/02-assert.yaml | 144 +++++++++++++++ .../gr-self-healing/02-create-cluster.yaml | 20 +++ .../tests/gr-self-healing/03-write-data.yaml | 16 ++ .../tests/gr-self-healing/04-assert.yaml | 10 ++ .../gr-self-healing/04-read-from-primary.yaml | 13 ++ .../tests/gr-self-healing/05-assert.yaml | 163 +++++++++++++++++ .../gr-self-healing/05-kill-primary.yaml | 18 ++ .../tests/gr-self-healing/06-write-data.yaml | 12 ++ .../tests/gr-self-healing/07-assert.yaml | 30 ++++ .../07-read-from-replicas.yaml | 15 ++ .../tests/gr-self-healing/08-assert.yaml | 166 +++++++++++++++++ .../gr-self-healing/08-failure-primary.yaml | 12 ++ .../tests/gr-self-healing/09-write-data.yaml | 12 ++ .../tests/gr-self-healing/10-assert.yaml | 33 ++++ .../10-read-from-replicas.yaml | 15 ++ .../tests/gr-self-healing/11-assert.yaml | 170 ++++++++++++++++++ .../11-network-loss-primary.yaml | 13 ++ .../tests/gr-self-healing/12-write-data.yaml | 12 ++ .../tests/gr-self-healing/13-assert.yaml | 36 ++++ .../13-read-from-replicas.yaml | 15 ++ .../tests/gr-self-healing/14-assert.yaml | 163 +++++++++++++++++ .../gr-self-healing/14-cluster-crash.yaml | 12 ++ .../tests/gr-self-healing/15-write-data.yaml | 12 ++ .../tests/gr-self-healing/16-assert.yaml | 39 ++++ .../16-read-from-replicas.yaml | 15 ++ .../17-destroy-chaos-mesh.yaml | 12 ++ .../gr-self-healing/18-drop-finalizer.yaml | 5 + e2e-tests/tests/self-healing/08-assert.yaml | 2 +- 38 files changed, 1271 insertions(+), 1 deletion(-) create mode 100644 e2e-tests/tests/gr-self-healing/00-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/00-deploy-operator.yaml create mode 100644 e2e-tests/tests/gr-self-healing/01-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/01-deploy-chaos-mesh.yaml create mode 100644 e2e-tests/tests/gr-self-healing/02-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/02-create-cluster.yaml create mode 100644 e2e-tests/tests/gr-self-healing/03-write-data.yaml create mode 100644 e2e-tests/tests/gr-self-healing/04-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/04-read-from-primary.yaml create mode 100644 e2e-tests/tests/gr-self-healing/05-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/05-kill-primary.yaml create mode 100644 e2e-tests/tests/gr-self-healing/06-write-data.yaml create mode 100644 e2e-tests/tests/gr-self-healing/07-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/07-read-from-replicas.yaml create mode 100644 e2e-tests/tests/gr-self-healing/08-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/08-failure-primary.yaml create mode 100644 e2e-tests/tests/gr-self-healing/09-write-data.yaml create mode 100644 e2e-tests/tests/gr-self-healing/10-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/10-read-from-replicas.yaml create mode 100644 e2e-tests/tests/gr-self-healing/11-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/11-network-loss-primary.yaml create mode 100644 e2e-tests/tests/gr-self-healing/12-write-data.yaml create mode 100644 e2e-tests/tests/gr-self-healing/13-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/13-read-from-replicas.yaml create mode 100644 e2e-tests/tests/gr-self-healing/14-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/14-cluster-crash.yaml create mode 100644 e2e-tests/tests/gr-self-healing/15-write-data.yaml create mode 100644 e2e-tests/tests/gr-self-healing/16-assert.yaml create mode 100644 e2e-tests/tests/gr-self-healing/16-read-from-replicas.yaml create mode 100644 e2e-tests/tests/gr-self-healing/17-destroy-chaos-mesh.yaml create mode 100644 e2e-tests/tests/gr-self-healing/18-drop-finalizer.yaml diff --git a/Jenkinsfile b/Jenkinsfile index 7d7355783..50a8ed719 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -275,6 +275,7 @@ void prepareNode() { # v0.15.0 kuttl version kubectl krew install --manifest-url https://raw.githubusercontent.com/kubernetes-sigs/krew-index/a67f31ecb2e62f15149ca66d096357050f07b77d/plugins/kuttl.yaml printf "%s is installed" "$(kubectl kuttl --version)" + kubectl krew install assert ''' } diff --git a/e2e-tests/functions b/e2e-tests/functions index 117989929..2358a4b5c 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -476,6 +476,10 @@ get_primary_from_haproxy() { run_mysql "SHOW VARIABLES LIKE '%hostname%';" "-h ${haproxy_pod_ip} -P3306 -uroot -proot_password" | awk '{print $2}' } +get_primary_from_group_replication() { + run_mysql "SELECT MEMBER_HOST FROM performance_schema.replication_group_members where MEMBER_ROLE='PRIMARY';" "-h $(get_mysql_router_service $(get_cluster_name)) -P 6446 -uroot -proot_password" | cut -d'.' -f1 +} + verify_certificate_sans() { local certificate=$1 local expected_sans=$2 diff --git a/e2e-tests/run-distro.csv b/e2e-tests/run-distro.csv index bf954881e..64cca21da 100644 --- a/e2e-tests/run-distro.csv +++ b/e2e-tests/run-distro.csv @@ -7,6 +7,7 @@ gr-haproxy gr-init-deploy gr-one-pod gr-scaling +gr-self-healing gr-tls-cert-manager haproxy init-deploy diff --git a/e2e-tests/run-minikube.csv b/e2e-tests/run-minikube.csv index c6c9b46a4..afec27fa2 100644 --- a/e2e-tests/run-minikube.csv +++ b/e2e-tests/run-minikube.csv @@ -7,6 +7,7 @@ gr-haproxy gr-init-deploy gr-one-pod gr-scaling +gr-self-healing gr-tls-cert-manager haproxy init-deploy diff --git a/e2e-tests/run-pr.csv b/e2e-tests/run-pr.csv index 828a2afd7..0c3b7ba8b 100644 --- a/e2e-tests/run-pr.csv +++ b/e2e-tests/run-pr.csv @@ -10,6 +10,7 @@ gr-ignore-annotations gr-init-deploy gr-one-pod gr-scaling +gr-self-healing gr-tls-cert-manager haproxy init-deploy diff --git a/e2e-tests/run-release.csv b/e2e-tests/run-release.csv index c7adf1e91..dbba00417 100644 --- a/e2e-tests/run-release.csv +++ b/e2e-tests/run-release.csv @@ -9,6 +9,7 @@ gr-ignore-annotations gr-init-deploy gr-one-pod gr-scaling +gr-self-healing gr-tls-cert-manager haproxy init-deploy diff --git a/e2e-tests/tests/gr-self-healing/00-assert.yaml b/e2e-tests/tests/gr-self-healing/00-assert.yaml new file mode 100644 index 000000000..d9146fe1b --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/00-assert.yaml @@ -0,0 +1,26 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: perconaservermysqls.ps.percona.com +spec: + group: ps.percona.com + names: + kind: PerconaServerMySQL + listKind: PerconaServerMySQLList + plural: perconaservermysqls + shortNames: + - ps + singular: perconaservermysql + scope: Namespaced +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: check-operator-deploy-status +timeout: 120 +commands: + - script: kubectl assert exist-enhanced deployment percona-server-mysql-operator -n ${OPERATOR_NS:-$NAMESPACE} --field-selector status.readyReplicas=1 diff --git a/e2e-tests/tests/gr-self-healing/00-deploy-operator.yaml b/e2e-tests/tests/gr-self-healing/00-deploy-operator.yaml new file mode 100644 index 000000000..67307fe5d --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/00-deploy-operator.yaml @@ -0,0 +1,14 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + deploy_operator + deploy_non_tls_cluster_secrets + deploy_tls_cluster_secrets + deploy_client diff --git a/e2e-tests/tests/gr-self-healing/01-assert.yaml b/e2e-tests/tests/gr-self-healing/01-assert.yaml new file mode 100644 index 000000000..9caa36184 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/01-assert.yaml @@ -0,0 +1,27 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chaos-controller-manager +spec: + replicas: 3 +status: + availableReplicas: 3 + readyReplicas: 3 + replicas: 3 + updatedReplicas: 3 +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: chaos-daemon +status: + currentNumberScheduled: 3 + desiredNumberScheduled: 3 + numberAvailable: 3 + numberMisscheduled: 0 + numberReady: 3 + updatedNumberScheduled: 3 diff --git a/e2e-tests/tests/gr-self-healing/01-deploy-chaos-mesh.yaml b/e2e-tests/tests/gr-self-healing/01-deploy-chaos-mesh.yaml new file mode 100644 index 000000000..2fcde5027 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/01-deploy-chaos-mesh.yaml @@ -0,0 +1,11 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + deploy_chaos_mesh diff --git a/e2e-tests/tests/gr-self-healing/02-assert.yaml b/e2e-tests/tests/gr-self-healing/02-assert.yaml new file mode 100644 index 000000000..85cf66437 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/02-assert.yaml @@ -0,0 +1,144 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 420 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: gr-self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: gr-self-healing-router +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + updatedReplicas: 3 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: gr-self-healing + finalizers: + - delete-mysql-pods-in-order +status: + mysql: + ready: 3 + size: 3 + state: ready + router: + ready: 3 + size: 3 + state: ready +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mysql + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + name: gr-self-healing-mysql + ownerReferences: + - apiVersion: ps.percona.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: PerconaServerMySQL + name: gr-self-healing +spec: + clusterIP: None + ports: + - name: mysql + port: 3306 + protocol: TCP + targetPort: 3306 + - name: mysql-admin + port: 33062 + protocol: TCP + targetPort: 33062 + - name: mysqlx + port: 33060 + protocol: TCP + targetPort: 33060 + - name: http + port: 6033 + protocol: TCP + targetPort: 6033 + - name: mysql-gr + port: 33061 + protocol: TCP + targetPort: 33061 + selector: + app.kubernetes.io/component: mysql + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + sessionAffinity: None + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: router + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + name: gr-self-healing-router + ownerReferences: + - apiVersion: ps.percona.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: PerconaServerMySQL + name: gr-self-healing +spec: + ports: + - name: http + port: 8443 + protocol: TCP + targetPort: 8443 + - name: rw-default + port: 3306 + protocol: TCP + targetPort: 6446 + - name: read-write + port: 6446 + protocol: TCP + targetPort: 6446 + - name: read-only + port: 6447 + protocol: TCP + targetPort: 6447 + - name: x-read-write + port: 6448 + protocol: TCP + targetPort: 6448 + - name: x-read-only + port: 6449 + protocol: TCP + targetPort: 6449 + - name: rw-admin + port: 33062 + protocol: TCP + targetPort: 33062 + selector: + app.kubernetes.io/component: router + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + sessionAffinity: None + type: ClusterIP diff --git a/e2e-tests/tests/gr-self-healing/02-create-cluster.yaml b/e2e-tests/tests/gr-self-healing/02-create-cluster.yaml new file mode 100644 index 000000000..4f0779353 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/02-create-cluster.yaml @@ -0,0 +1,20 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + get_cr \ + | yq eval '.spec.mysql.clusterType="group-replication"' - \ + | yq eval '.spec.mysql.size=3' - \ + | yq eval '.spec.mysql.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.proxy.haproxy.enabled=false' - \ + | yq eval '.spec.proxy.router.enabled=true' - \ + | yq eval '.spec.proxy.router.size=3' - \ + | yq eval '.spec.proxy.router.affinity.antiAffinityTopologyKey="none"' - \ + | yq eval '.spec.orchestrator.enabled=false' - \ + | kubectl -n "${NAMESPACE}" apply -f - diff --git a/e2e-tests/tests/gr-self-healing/03-write-data.yaml b/e2e-tests/tests/gr-self-healing/03-write-data.yaml new file mode 100644 index 000000000..1bbd291a9 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/03-write-data.yaml @@ -0,0 +1,16 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_mysql \ + "CREATE DATABASE IF NOT EXISTS myDB; CREATE TABLE IF NOT EXISTS myDB.myTable (id int PRIMARY KEY)" \ + "-h $(get_mysql_router_service $(get_cluster_name)) -P 6446 -uroot -proot_password" + + run_mysql \ + "INSERT myDB.myTable (id) VALUES (100500)" \ + "-h $(get_mysql_router_service $(get_cluster_name)) -P 6446 -uroot -proot_password" diff --git a/e2e-tests/tests/gr-self-healing/04-assert.yaml b/e2e-tests/tests/gr-self-healing/04-assert.yaml new file mode 100644 index 000000000..8a8037060 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/04-assert.yaml @@ -0,0 +1,10 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 04-read-from-primary +data: + data: "100500" diff --git a/e2e-tests/tests/gr-self-healing/04-read-from-primary.yaml b/e2e-tests/tests/gr-self-healing/04-read-from-primary.yaml new file mode 100644 index 000000000..68e1755aa --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/04-read-from-primary.yaml @@ -0,0 +1,13 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + data=$(run_mysql "SELECT * FROM myDB.myTable" "-h $(get_mysql_router_service $(get_cluster_name)) -P 6446 -uroot -proot_password") + + kubectl create configmap -n "${NAMESPACE}" 04-read-from-primary --from-literal=data="${data}" diff --git a/e2e-tests/tests/gr-self-healing/05-assert.yaml b/e2e-tests/tests/gr-self-healing/05-assert.yaml new file mode 100644 index 000000000..e107d4585 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/05-assert.yaml @@ -0,0 +1,163 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: gr-self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: gr-self-healing-router +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + updatedReplicas: 3 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: gr-self-healing + finalizers: + - delete-mysql-pods-in-order +status: + mysql: + ready: 3 + size: 3 + state: ready + router: + ready: 3 + size: 3 + state: ready +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mysql + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + name: gr-self-healing-mysql + ownerReferences: + - apiVersion: ps.percona.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: PerconaServerMySQL + name: gr-self-healing +spec: + clusterIP: None + ports: + - name: mysql + port: 3306 + protocol: TCP + targetPort: 3306 + - name: mysql-admin + port: 33062 + protocol: TCP + targetPort: 33062 + - name: mysqlx + port: 33060 + protocol: TCP + targetPort: 33060 + - name: http + port: 6033 + protocol: TCP + targetPort: 6033 + - name: mysql-gr + port: 33061 + protocol: TCP + targetPort: 33061 + selector: + app.kubernetes.io/component: mysql + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + sessionAffinity: None + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: router + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + name: gr-self-healing-router + ownerReferences: + - apiVersion: ps.percona.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: PerconaServerMySQL + name: gr-self-healing +spec: + ports: + - name: http + port: 8443 + protocol: TCP + targetPort: 8443 + - name: rw-default + port: 3306 + protocol: TCP + targetPort: 6446 + - name: read-write + port: 6446 + protocol: TCP + targetPort: 6446 + - name: read-only + port: 6447 + protocol: TCP + targetPort: 6447 + - name: x-read-write + port: 6448 + protocol: TCP + targetPort: 6448 + - name: x-read-only + port: 6449 + protocol: TCP + targetPort: 6449 + - name: rw-admin + port: 33062 + protocol: TCP + targetPort: 33062 + selector: + app.kubernetes.io/component: router + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + sessionAffinity: None + type: ClusterIP +--- +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: chaos-pod-kill-primary +spec: + action: pod-kill + mode: one +status: + experiment: + containerRecords: + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + desiredPhase: Run diff --git a/e2e-tests/tests/gr-self-healing/05-kill-primary.yaml b/e2e-tests/tests/gr-self-healing/05-kill-primary.yaml new file mode 100644 index 000000000..255f6a0b6 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/05-kill-primary.yaml @@ -0,0 +1,18 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + init_pod="$(get_primary_from_group_replication)" + kill_pods "${NAMESPACE}" "pod" "$init_pod" "" "primary" + sleep 10 # wait a bit for pod to be killed + + if [ "$init_pod" == "$(get_primary_from_group_replication)" ]; then + echo "primary pod was not killed! something went wrong." + exit 1 + fi diff --git a/e2e-tests/tests/gr-self-healing/06-write-data.yaml b/e2e-tests/tests/gr-self-healing/06-write-data.yaml new file mode 100644 index 000000000..a8da85c54 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/06-write-data.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_mysql \ + "INSERT myDB.myTable (id) VALUES (100501)" \ + "-h $(get_mysql_router_service $(get_cluster_name)) -P 6446 -uroot -proot_password" diff --git a/e2e-tests/tests/gr-self-healing/07-assert.yaml b/e2e-tests/tests/gr-self-healing/07-assert.yaml new file mode 100644 index 000000000..d5acf9414 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/07-assert.yaml @@ -0,0 +1,30 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 07-read-from-replicas-0 +data: + data: |- + 100500 + 100501 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 07-read-from-replicas-1 +data: + data: |- + 100500 + 100501 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 07-read-from-replicas-2 +data: + data: |- + 100500 + 100501 diff --git a/e2e-tests/tests/gr-self-healing/07-read-from-replicas.yaml b/e2e-tests/tests/gr-self-healing/07-read-from-replicas.yaml new file mode 100644 index 000000000..30f4d2649 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/07-read-from-replicas.yaml @@ -0,0 +1,15 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + for i in 0 1 2; do + host=$(get_mysql_headless_fqdn $(get_cluster_name) $i) + data=$(run_mysql "SELECT * FROM myDB.myTable" "-h ${host} -uroot -proot_password") + kubectl create configmap -n "${NAMESPACE}" 07-read-from-replicas-${i} --from-literal=data="${data}" + done diff --git a/e2e-tests/tests/gr-self-healing/08-assert.yaml b/e2e-tests/tests/gr-self-healing/08-assert.yaml new file mode 100644 index 000000000..eb55354ae --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/08-assert.yaml @@ -0,0 +1,166 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: gr-self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: gr-self-healing-router +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + updatedReplicas: 3 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: gr-self-healing + finalizers: + - delete-mysql-pods-in-order +status: + mysql: + ready: 3 + size: 3 + state: ready + router: + ready: 3 + size: 3 + state: ready +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mysql + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + name: gr-self-healing-mysql + ownerReferences: + - apiVersion: ps.percona.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: PerconaServerMySQL + name: gr-self-healing +spec: + clusterIP: None + ports: + - name: mysql + port: 3306 + protocol: TCP + targetPort: 3306 + - name: mysql-admin + port: 33062 + protocol: TCP + targetPort: 33062 + - name: mysqlx + port: 33060 + protocol: TCP + targetPort: 33060 + - name: http + port: 6033 + protocol: TCP + targetPort: 6033 + - name: mysql-gr + port: 33061 + protocol: TCP + targetPort: 33061 + selector: + app.kubernetes.io/component: mysql + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + sessionAffinity: None + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: router + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + name: gr-self-healing-router + ownerReferences: + - apiVersion: ps.percona.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: PerconaServerMySQL + name: gr-self-healing +spec: + ports: + - name: http + port: 8443 + protocol: TCP + targetPort: 8443 + - name: rw-default + port: 3306 + protocol: TCP + targetPort: 6446 + - name: read-write + port: 6446 + protocol: TCP + targetPort: 6446 + - name: read-only + port: 6447 + protocol: TCP + targetPort: 6447 + - name: x-read-write + port: 6448 + protocol: TCP + targetPort: 6448 + - name: x-read-only + port: 6449 + protocol: TCP + targetPort: 6449 + - name: rw-admin + port: 33062 + protocol: TCP + targetPort: 33062 + selector: + app.kubernetes.io/component: router + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + sessionAffinity: None + type: ClusterIP +--- +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: chaos-pod-failure-primary +spec: + action: pod-failure + duration: 60s + mode: one +status: + experiment: + containerRecords: + - events: + - operation: Apply + type: Succeeded + - operation: Recover + type: Succeeded + injectedCount: 1 + phase: Not Injected + recoveredCount: 1 + selectorKey: . + desiredPhase: Stop diff --git a/e2e-tests/tests/gr-self-healing/08-failure-primary.yaml b/e2e-tests/tests/gr-self-healing/08-failure-primary.yaml new file mode 100644 index 000000000..59d48c526 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/08-failure-primary.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + failure_pod "${NAMESPACE}" "$(get_primary_from_group_replication)" "primary" + sleep 10 # wait a bit for pod to be killed diff --git a/e2e-tests/tests/gr-self-healing/09-write-data.yaml b/e2e-tests/tests/gr-self-healing/09-write-data.yaml new file mode 100644 index 000000000..f3bb7eb48 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/09-write-data.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_mysql \ + "INSERT myDB.myTable (id) VALUES (100502)" \ + "-h $(get_mysql_router_service $(get_cluster_name)) -P 6446 -uroot -proot_password" diff --git a/e2e-tests/tests/gr-self-healing/10-assert.yaml b/e2e-tests/tests/gr-self-healing/10-assert.yaml new file mode 100644 index 000000000..2f9ba0826 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/10-assert.yaml @@ -0,0 +1,33 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 10-read-from-replicas-0 +data: + data: |- + 100500 + 100501 + 100502 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 10-read-from-replicas-1 +data: + data: |- + 100500 + 100501 + 100502 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 10-read-from-replicas-2 +data: + data: |- + 100500 + 100501 + 100502 diff --git a/e2e-tests/tests/gr-self-healing/10-read-from-replicas.yaml b/e2e-tests/tests/gr-self-healing/10-read-from-replicas.yaml new file mode 100644 index 000000000..55a419ab7 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/10-read-from-replicas.yaml @@ -0,0 +1,15 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + for i in 0 1 2; do + host=$(get_mysql_headless_fqdn $(get_cluster_name) $i) + data=$(run_mysql "SELECT * FROM myDB.myTable" "-h ${host} -uroot -proot_password") + kubectl create configmap -n "${NAMESPACE}" 10-read-from-replicas-${i} --from-literal=data="${data}" + done diff --git a/e2e-tests/tests/gr-self-healing/11-assert.yaml b/e2e-tests/tests/gr-self-healing/11-assert.yaml new file mode 100644 index 000000000..972daa107 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/11-assert.yaml @@ -0,0 +1,170 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: gr-self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: gr-self-healing-router +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + updatedReplicas: 3 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: gr-self-healing + finalizers: + - delete-mysql-pods-in-order +status: + mysql: + ready: 3 + size: 3 + state: ready + router: + ready: 3 + size: 3 + state: ready +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mysql + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + name: gr-self-healing-mysql + ownerReferences: + - apiVersion: ps.percona.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: PerconaServerMySQL + name: gr-self-healing +spec: + clusterIP: None + ports: + - name: mysql + port: 3306 + protocol: TCP + targetPort: 3306 + - name: mysql-admin + port: 33062 + protocol: TCP + targetPort: 33062 + - name: mysqlx + port: 33060 + protocol: TCP + targetPort: 33060 + - name: http + port: 6033 + protocol: TCP + targetPort: 6033 + - name: mysql-gr + port: 33061 + protocol: TCP + targetPort: 33061 + selector: + app.kubernetes.io/component: mysql + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + sessionAffinity: None + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: router + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + name: gr-self-healing-router + ownerReferences: + - apiVersion: ps.percona.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: PerconaServerMySQL + name: gr-self-healing +spec: + ports: + - name: http + port: 8443 + protocol: TCP + targetPort: 8443 + - name: rw-default + port: 3306 + protocol: TCP + targetPort: 6446 + - name: read-write + port: 6446 + protocol: TCP + targetPort: 6446 + - name: read-only + port: 6447 + protocol: TCP + targetPort: 6447 + - name: x-read-write + port: 6448 + protocol: TCP + targetPort: 6448 + - name: x-read-only + port: 6449 + protocol: TCP + targetPort: 6449 + - name: rw-admin + port: 33062 + protocol: TCP + targetPort: 33062 + selector: + app.kubernetes.io/component: router + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + sessionAffinity: None + type: ClusterIP +--- +apiVersion: chaos-mesh.org/v1alpha1 +kind: NetworkChaos +metadata: + name: chaos-pod-network-loss-primary +spec: + action: loss + direction: to + duration: 60s + loss: + correlation: "100" + loss: "100" + mode: one +status: + experiment: + containerRecords: + - events: + - operation: Apply + type: Succeeded + - operation: Recover + type: Succeeded + injectedCount: 1 + phase: Not Injected + recoveredCount: 1 + selectorKey: . + desiredPhase: Stop diff --git a/e2e-tests/tests/gr-self-healing/11-network-loss-primary.yaml b/e2e-tests/tests/gr-self-healing/11-network-loss-primary.yaml new file mode 100644 index 000000000..f63192c0e --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/11-network-loss-primary.yaml @@ -0,0 +1,13 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 90 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + network_loss "${NAMESPACE}" "$(get_primary_from_group_replication)" "primary" + sleep 30 # wait for new master to get elected + timeout: 90 diff --git a/e2e-tests/tests/gr-self-healing/12-write-data.yaml b/e2e-tests/tests/gr-self-healing/12-write-data.yaml new file mode 100644 index 000000000..a683b8542 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/12-write-data.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_mysql \ + "INSERT myDB.myTable (id) VALUES (100503)" \ + "-h $(get_mysql_router_service $(get_cluster_name)) -P 6446 -uroot -proot_password" diff --git a/e2e-tests/tests/gr-self-healing/13-assert.yaml b/e2e-tests/tests/gr-self-healing/13-assert.yaml new file mode 100644 index 000000000..7d2f48cde --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/13-assert.yaml @@ -0,0 +1,36 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 13-read-from-replicas-0 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 13-read-from-replicas-1 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 13-read-from-replicas-2 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 diff --git a/e2e-tests/tests/gr-self-healing/13-read-from-replicas.yaml b/e2e-tests/tests/gr-self-healing/13-read-from-replicas.yaml new file mode 100644 index 000000000..77b99efc0 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/13-read-from-replicas.yaml @@ -0,0 +1,15 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + for i in 0 1 2; do + host=$(get_mysql_headless_fqdn $(get_cluster_name) $i) + data=$(run_mysql "SELECT * FROM myDB.myTable" "-h ${host} -uroot -proot_password") + kubectl create configmap -n "${NAMESPACE}" 13-read-from-replicas-${i} --from-literal=data="${data}" + done diff --git a/e2e-tests/tests/gr-self-healing/14-assert.yaml b/e2e-tests/tests/gr-self-healing/14-assert.yaml new file mode 100644 index 000000000..aa4b1dd8d --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/14-assert.yaml @@ -0,0 +1,163 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 420 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: gr-self-healing-mysql +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + currentReplicas: 3 + updatedReplicas: 3 + collisionCount: 0 +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: gr-self-healing-router +status: + observedGeneration: 1 + replicas: 3 + readyReplicas: 3 + updatedReplicas: 3 +--- +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: gr-self-healing + finalizers: + - delete-mysql-pods-in-order +status: + mysql: + ready: 3 + size: 3 + state: ready + router: + ready: 3 + size: 3 + state: ready +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: mysql + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + name: gr-self-healing-mysql + ownerReferences: + - apiVersion: ps.percona.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: PerconaServerMySQL + name: gr-self-healing +spec: + clusterIP: None + ports: + - name: mysql + port: 3306 + protocol: TCP + targetPort: 3306 + - name: mysql-admin + port: 33062 + protocol: TCP + targetPort: 33062 + - name: mysqlx + port: 33060 + protocol: TCP + targetPort: 33060 + - name: http + port: 6033 + protocol: TCP + targetPort: 6033 + - name: mysql-gr + port: 33061 + protocol: TCP + targetPort: 33061 + selector: + app.kubernetes.io/component: mysql + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + sessionAffinity: None + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: router + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + name: gr-self-healing-router + ownerReferences: + - apiVersion: ps.percona.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: PerconaServerMySQL + name: gr-self-healing +spec: + ports: + - name: http + port: 8443 + protocol: TCP + targetPort: 8443 + - name: rw-default + port: 3306 + protocol: TCP + targetPort: 6446 + - name: read-write + port: 6446 + protocol: TCP + targetPort: 6446 + - name: read-only + port: 6447 + protocol: TCP + targetPort: 6447 + - name: x-read-write + port: 6448 + protocol: TCP + targetPort: 6448 + - name: x-read-only + port: 6449 + protocol: TCP + targetPort: 6449 + - name: rw-admin + port: 33062 + protocol: TCP + targetPort: 33062 + selector: + app.kubernetes.io/component: router + app.kubernetes.io/instance: gr-self-healing + app.kubernetes.io/managed-by: percona-server-operator + app.kubernetes.io/name: percona-server + app.kubernetes.io/part-of: percona-server + sessionAffinity: None + type: ClusterIP +--- +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: chaos-kill-label-cluster-crash +spec: + action: pod-kill + mode: all +status: + experiment: + containerRecords: + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + desiredPhase: Run diff --git a/e2e-tests/tests/gr-self-healing/14-cluster-crash.yaml b/e2e-tests/tests/gr-self-healing/14-cluster-crash.yaml new file mode 100644 index 000000000..23a786193 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/14-cluster-crash.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + kill_pods "${NAMESPACE}" "label" "app.kubernetes.io/instance" "gr-self-healing" "cluster-crash" + sleep 30 # wait for crash diff --git a/e2e-tests/tests/gr-self-healing/15-write-data.yaml b/e2e-tests/tests/gr-self-healing/15-write-data.yaml new file mode 100644 index 000000000..9152934e0 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/15-write-data.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_mysql \ + "INSERT myDB.myTable (id) VALUES (100504)" \ + "-h $(get_mysql_router_service $(get_cluster_name)) -P 6446 -uroot -proot_password" diff --git a/e2e-tests/tests/gr-self-healing/16-assert.yaml b/e2e-tests/tests/gr-self-healing/16-assert.yaml new file mode 100644 index 000000000..36febd696 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/16-assert.yaml @@ -0,0 +1,39 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 16-read-from-replicas-0 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 + 100504 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 16-read-from-replicas-1 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 + 100504 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 16-read-from-replicas-2 +data: + data: |2- + 100500 + 100501 + 100502 + 100503 + 100504 diff --git a/e2e-tests/tests/gr-self-healing/16-read-from-replicas.yaml b/e2e-tests/tests/gr-self-healing/16-read-from-replicas.yaml new file mode 100644 index 000000000..ab4cfa84c --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/16-read-from-replicas.yaml @@ -0,0 +1,15 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + for i in 0 1 2; do + host=$(get_mysql_headless_fqdn $(get_cluster_name) $i) + data=$(run_mysql "SELECT * FROM myDB.myTable" "-h ${host} -uroot -proot_password") + kubectl create configmap -n "${NAMESPACE}" 16-read-from-replicas-${i} --from-literal=data="${data}" + done diff --git a/e2e-tests/tests/gr-self-healing/17-destroy-chaos-mesh.yaml b/e2e-tests/tests/gr-self-healing/17-destroy-chaos-mesh.yaml new file mode 100644 index 000000000..3f0cbc6b8 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/17-destroy-chaos-mesh.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 120 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + destroy_chaos_mesh + timeout: 120 diff --git a/e2e-tests/tests/gr-self-healing/18-drop-finalizer.yaml b/e2e-tests/tests/gr-self-healing/18-drop-finalizer.yaml new file mode 100644 index 000000000..98952bc22 --- /dev/null +++ b/e2e-tests/tests/gr-self-healing/18-drop-finalizer.yaml @@ -0,0 +1,5 @@ +apiVersion: ps.percona.com/v1alpha1 +kind: PerconaServerMySQL +metadata: + name: gr-self-healing + finalizers: [] diff --git a/e2e-tests/tests/self-healing/08-assert.yaml b/e2e-tests/tests/self-healing/08-assert.yaml index 31bcaec8d..32a988aa5 100644 --- a/e2e-tests/tests/self-healing/08-assert.yaml +++ b/e2e-tests/tests/self-healing/08-assert.yaml @@ -1,6 +1,6 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert -timeout: 120 +timeout: 180 --- kind: StatefulSet apiVersion: apps/v1 From 279757517577193f9b5342f4ec7027dd4de8053f Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Wed, 16 Aug 2023 17:50:05 +0200 Subject: [PATCH 04/10] Some fixes for self-healing tests --- .../tests/gr-self-healing/14-assert.yaml | 37 +++++++++++- .../tests/gr-self-healing/16-assert.yaml | 6 +- e2e-tests/tests/self-healing/13-assert.yaml | 6 +- e2e-tests/tests/self-healing/14-assert.yaml | 58 ++++++++++++++++++- e2e-tests/tests/self-healing/16-assert.yaml | 6 +- 5 files changed, 102 insertions(+), 11 deletions(-) diff --git a/e2e-tests/tests/gr-self-healing/14-assert.yaml b/e2e-tests/tests/gr-self-healing/14-assert.yaml index aa4b1dd8d..497b0571d 100644 --- a/e2e-tests/tests/gr-self-healing/14-assert.yaml +++ b/e2e-tests/tests/gr-self-healing/14-assert.yaml @@ -1,6 +1,6 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert -timeout: 420 +timeout: 480 --- kind: StatefulSet apiVersion: apps/v1 @@ -153,6 +153,41 @@ spec: status: experiment: containerRecords: + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . - events: - operation: Apply type: Succeeded diff --git a/e2e-tests/tests/gr-self-healing/16-assert.yaml b/e2e-tests/tests/gr-self-healing/16-assert.yaml index 36febd696..5f278c285 100644 --- a/e2e-tests/tests/gr-self-healing/16-assert.yaml +++ b/e2e-tests/tests/gr-self-healing/16-assert.yaml @@ -7,7 +7,7 @@ apiVersion: v1 metadata: name: 16-read-from-replicas-0 data: - data: |2- + data: |- 100500 100501 100502 @@ -19,7 +19,7 @@ apiVersion: v1 metadata: name: 16-read-from-replicas-1 data: - data: |2- + data: |- 100500 100501 100502 @@ -31,7 +31,7 @@ apiVersion: v1 metadata: name: 16-read-from-replicas-2 data: - data: |2- + data: |- 100500 100501 100502 diff --git a/e2e-tests/tests/self-healing/13-assert.yaml b/e2e-tests/tests/self-healing/13-assert.yaml index 7d2f48cde..b21c4894e 100644 --- a/e2e-tests/tests/self-healing/13-assert.yaml +++ b/e2e-tests/tests/self-healing/13-assert.yaml @@ -7,7 +7,7 @@ apiVersion: v1 metadata: name: 13-read-from-replicas-0 data: - data: |2- + data: |- 100500 100501 100502 @@ -18,7 +18,7 @@ apiVersion: v1 metadata: name: 13-read-from-replicas-1 data: - data: |2- + data: |- 100500 100501 100502 @@ -29,7 +29,7 @@ apiVersion: v1 metadata: name: 13-read-from-replicas-2 data: - data: |2- + data: |- 100500 100501 100502 diff --git a/e2e-tests/tests/self-healing/14-assert.yaml b/e2e-tests/tests/self-healing/14-assert.yaml index bb14e7f81..fe47f563f 100644 --- a/e2e-tests/tests/self-healing/14-assert.yaml +++ b/e2e-tests/tests/self-healing/14-assert.yaml @@ -1,6 +1,6 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert -timeout: 120 +timeout: 480 --- kind: StatefulSet apiVersion: apps/v1 @@ -69,6 +69,62 @@ spec: status: experiment: containerRecords: + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . + - events: + - operation: Apply + type: Succeeded + injectedCount: 1 + phase: Injected + recoveredCount: 0 + selectorKey: . - events: - operation: Apply type: Succeeded diff --git a/e2e-tests/tests/self-healing/16-assert.yaml b/e2e-tests/tests/self-healing/16-assert.yaml index 36febd696..5f278c285 100644 --- a/e2e-tests/tests/self-healing/16-assert.yaml +++ b/e2e-tests/tests/self-healing/16-assert.yaml @@ -7,7 +7,7 @@ apiVersion: v1 metadata: name: 16-read-from-replicas-0 data: - data: |2- + data: |- 100500 100501 100502 @@ -19,7 +19,7 @@ apiVersion: v1 metadata: name: 16-read-from-replicas-1 data: - data: |2- + data: |- 100500 100501 100502 @@ -31,7 +31,7 @@ apiVersion: v1 metadata: name: 16-read-from-replicas-2 data: - data: |2- + data: |- 100500 100501 100502 From af9302ca23ee1108dec97ba2ac6f1aa037b82909 Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Wed, 16 Aug 2023 17:53:54 +0200 Subject: [PATCH 05/10] Remove OPERATOR_NS from Jenkinsfile since not supported yet --- Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 50a8ed719..a2e68a6cb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -288,7 +288,6 @@ pipeline { environment { CLOUDSDK_CORE_DISABLE_PROMPTS = 1 CLEAN_NAMESPACE = 1 - OPERATOR_NS = 'ps-operator' GIT_SHORT_COMMIT = sh(script: 'git rev-parse --short HEAD', , returnStdout: true).trim() VERSION = "${env.GIT_BRANCH}-${env.GIT_SHORT_COMMIT}" CLUSTER_NAME = sh(script: "echo jen-ps-${env.CHANGE_ID}-${GIT_SHORT_COMMIT}-${env.BUILD_NUMBER} | tr '[:upper:]' '[:lower:]'", , returnStdout: true).trim() From 49c3d93be1f8c9d4f41a052f0497c84966ad75fa Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Thu, 17 Aug 2023 10:53:48 +0200 Subject: [PATCH 06/10] Fix operator-self-healing test --- e2e-tests/tests/operator-self-healing/05-kill-pod.yaml | 2 +- e2e-tests/tests/operator-self-healing/07-network-loss.yaml | 2 +- e2e-tests/tests/operator-self-healing/09-pod-failure.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/e2e-tests/tests/operator-self-healing/05-kill-pod.yaml b/e2e-tests/tests/operator-self-healing/05-kill-pod.yaml index 8304e98cc..7151998d3 100644 --- a/e2e-tests/tests/operator-self-healing/05-kill-pod.yaml +++ b/e2e-tests/tests/operator-self-healing/05-kill-pod.yaml @@ -9,7 +9,7 @@ commands: source ../../functions init_pod=$(get_operator_pod) - kill_pods "${OPERATOR_NS:-$NAMESPACE}" "pod" "$init_pod" + kill_pods "${OPERATOR_NS:-$NAMESPACE}" "pod" "$init_pod" "" "operator" sleep 10 # wait a bit for pod to be killed wait_deployment percona-server-mysql-operator "${OPERATOR_NS:-$NAMESPACE}" diff --git a/e2e-tests/tests/operator-self-healing/07-network-loss.yaml b/e2e-tests/tests/operator-self-healing/07-network-loss.yaml index b19885cdd..44fb0c5b1 100644 --- a/e2e-tests/tests/operator-self-healing/07-network-loss.yaml +++ b/e2e-tests/tests/operator-self-healing/07-network-loss.yaml @@ -8,5 +8,5 @@ commands: source ../../functions - network_loss "${OPERATOR_NS:-$NAMESPACE}" "$(get_operator_pod)" + network_loss "${OPERATOR_NS:-$NAMESPACE}" "$(get_operator_pod)" "operator" sleep 30 # wait for network loss to happen diff --git a/e2e-tests/tests/operator-self-healing/09-pod-failure.yaml b/e2e-tests/tests/operator-self-healing/09-pod-failure.yaml index e37ba4dd8..1abeeb648 100644 --- a/e2e-tests/tests/operator-self-healing/09-pod-failure.yaml +++ b/e2e-tests/tests/operator-self-healing/09-pod-failure.yaml @@ -8,5 +8,5 @@ commands: source ../../functions - failure_pod "${OPERATOR_NS:-$NAMESPACE}" "$(get_operator_pod)" + failure_pod "${OPERATOR_NS:-$NAMESPACE}" "$(get_operator_pod)" "operator" sleep 30 # wait for pod failure to happen From c12f42ecdc439ef31800278d7467f824e309568c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ege=20G=C3=BCne=C5=9F?= Date: Tue, 23 Jul 2024 11:43:46 +0300 Subject: [PATCH 07/10] fix assertions --- e2e-tests/tests/self-healing/01-deploy-chaos-mesh.yaml | 2 +- e2e-tests/tests/self-healing/02-assert.yaml | 2 +- e2e-tests/tests/self-healing/05-assert.yaml | 2 +- e2e-tests/tests/self-healing/08-assert.yaml | 2 +- e2e-tests/tests/self-healing/11-assert.yaml | 2 +- e2e-tests/tests/self-healing/14-assert.yaml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/e2e-tests/tests/self-healing/01-deploy-chaos-mesh.yaml b/e2e-tests/tests/self-healing/01-deploy-chaos-mesh.yaml index 2fcde5027..e5638625e 100644 --- a/e2e-tests/tests/self-healing/01-deploy-chaos-mesh.yaml +++ b/e2e-tests/tests/self-healing/01-deploy-chaos-mesh.yaml @@ -1,6 +1,5 @@ apiVersion: kuttl.dev/v1beta1 kind: TestStep -timeout: 10 commands: - script: |- set -o errexit @@ -9,3 +8,4 @@ commands: source ../../functions deploy_chaos_mesh + timeout: 120 diff --git a/e2e-tests/tests/self-healing/02-assert.yaml b/e2e-tests/tests/self-healing/02-assert.yaml index b689bcc78..f2da509cf 100644 --- a/e2e-tests/tests/self-healing/02-assert.yaml +++ b/e2e-tests/tests/self-healing/02-assert.yaml @@ -43,7 +43,7 @@ kind: PerconaServerMySQL metadata: name: self-healing finalizers: - - delete-mysql-pods-in-order + - percona.com/delete-mysql-pods-in-order status: haproxy: ready: 3 diff --git a/e2e-tests/tests/self-healing/05-assert.yaml b/e2e-tests/tests/self-healing/05-assert.yaml index c33cb0248..7e8ec76e7 100644 --- a/e2e-tests/tests/self-healing/05-assert.yaml +++ b/e2e-tests/tests/self-healing/05-assert.yaml @@ -43,7 +43,7 @@ kind: PerconaServerMySQL metadata: name: self-healing finalizers: - - delete-mysql-pods-in-order + - percona.com/delete-mysql-pods-in-order status: haproxy: ready: 3 diff --git a/e2e-tests/tests/self-healing/08-assert.yaml b/e2e-tests/tests/self-healing/08-assert.yaml index 32a988aa5..41a5dd87d 100644 --- a/e2e-tests/tests/self-healing/08-assert.yaml +++ b/e2e-tests/tests/self-healing/08-assert.yaml @@ -43,7 +43,7 @@ kind: PerconaServerMySQL metadata: name: self-healing finalizers: - - delete-mysql-pods-in-order + - percona.com/delete-mysql-pods-in-order status: haproxy: ready: 3 diff --git a/e2e-tests/tests/self-healing/11-assert.yaml b/e2e-tests/tests/self-healing/11-assert.yaml index a870f75bf..c75f54535 100644 --- a/e2e-tests/tests/self-healing/11-assert.yaml +++ b/e2e-tests/tests/self-healing/11-assert.yaml @@ -43,7 +43,7 @@ kind: PerconaServerMySQL metadata: name: self-healing finalizers: - - delete-mysql-pods-in-order + - percona.com/delete-mysql-pods-in-order status: haproxy: ready: 3 diff --git a/e2e-tests/tests/self-healing/14-assert.yaml b/e2e-tests/tests/self-healing/14-assert.yaml index fe47f563f..aea139b02 100644 --- a/e2e-tests/tests/self-healing/14-assert.yaml +++ b/e2e-tests/tests/self-healing/14-assert.yaml @@ -43,7 +43,7 @@ kind: PerconaServerMySQL metadata: name: self-healing finalizers: - - delete-mysql-pods-in-order + - percona.com/delete-mysql-pods-in-order status: haproxy: ready: 3 From 3a6cbf777a99be144d29b7974baa77df8de89942 Mon Sep 17 00:00:00 2001 From: Inel Pandzic Date: Wed, 7 Aug 2024 11:31:32 +0200 Subject: [PATCH 08/10] Update e2e-tests/functions Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- e2e-tests/functions | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/e2e-tests/functions b/e2e-tests/functions index 0efb94037..0ef0ad8f4 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -6,8 +6,8 @@ test_name=$(basename "$(pwd)") source "${ROOT_REPO}/e2e-tests/vars.sh" init_temp_dir() { - rm -rf "$TEMP_DIR" - mkdir -p "$TEMP_DIR" + rm -rf "$TEMP_DIR" + mkdir -p "$TEMP_DIR" } create_namespace() { From 072fbf1ecc16ac9bb87639522734307aacf2287c Mon Sep 17 00:00:00 2001 From: Inel Pandzic Date: Wed, 7 Aug 2024 11:31:42 +0200 Subject: [PATCH 09/10] Update e2e-tests/functions Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- e2e-tests/functions | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/e2e-tests/functions b/e2e-tests/functions index 0ef0ad8f4..6291b3348 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -11,25 +11,25 @@ init_temp_dir() { } create_namespace() { - local namespace=$1 - - if [[ $OPENSHIFT ]]; then - set -o pipefail - if [[ $OPERATOR_NS ]] && (oc get project "$OPERATOR_NS" -o json >/dev/null 2>&1 | jq -r '.metadata.name' >/dev/null 2>&1); then - oc delete --grace-period=0 --force=true project "$namespace" && sleep 120 || : - else - oc delete project "$namespace" && sleep 40 || : - fi - wait_for_delete "project/$namespace" - - oc new-project "$namespace" - oc project "$namespace" - oc adm policy add-scc-to-user hostaccess -z default || : - else - kubectl delete namespace $namespace --ignore-not-found || : - kubectl wait --for=delete namespace "$namespace" || : - kubectl create namespace $namespace - fi + local namespace=$1 + + if [[ $OPENSHIFT ]]; then + set -o pipefail + if [[ $OPERATOR_NS ]] && (oc get project "$OPERATOR_NS" -o json >/dev/null 2>&1 | jq -r '.metadata.name' >/dev/null 2>&1); then + oc delete --grace-period=0 --force=true project "$namespace" && sleep 120 || : + else + oc delete project "$namespace" && sleep 40 || : + fi + wait_for_delete "project/$namespace" + + oc new-project "$namespace" + oc project "$namespace" + oc adm policy add-scc-to-user hostaccess -z default || : + else + kubectl delete namespace $namespace --ignore-not-found || : + kubectl wait --for=delete namespace "$namespace" || : + kubectl create namespace $namespace + fi } deploy_operator() { From 6555938fda5090d01f82352e5e73f432f1316314 Mon Sep 17 00:00:00 2001 From: Inel Pandzic Date: Wed, 7 Aug 2024 11:31:53 +0200 Subject: [PATCH 10/10] Update e2e-tests/functions Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- e2e-tests/functions | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e-tests/functions b/e2e-tests/functions index 6291b3348..1af3ce57a 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -33,7 +33,7 @@ create_namespace() { } deploy_operator() { - destroy_operator + destroy_operator if [[ $OPERATOR_NS ]]; then create_namespace "${OPERATOR_NS}"