From 5a5461d5e182dee0095f80333e6b18f6e7af113a Mon Sep 17 00:00:00 2001 From: Andrew Lavery Date: Tue, 21 May 2024 00:01:33 +0800 Subject: [PATCH] feat: wait for coredns and metrics-server, mk2 (#624) * Revert "Revert "feat: wait for coredns and metrics-server" (#623)" This reverts commit 965a58b77dded13f030ab6b06fdad54a13dc31ec. * only wait for metrics server, not coredns * watch for k8s infra status in the background * improve error messages when coredns or metrics server fail to become healthy * fix fmt * move k8s health warnings prints to defer func * store both errors in response channel at once --- cmd/embedded-cluster/restore.go | 13 ++++++++ pkg/addons/applier.go | 45 +++++-------------------- pkg/kubeutils/kubeutils.go | 58 +++++++++++++++++++++++++++++---- 3 files changed, 74 insertions(+), 42 deletions(-) diff --git a/cmd/embedded-cluster/restore.go b/cmd/embedded-cluster/restore.go index a380cd05e..fd95b5d0e 100644 --- a/cmd/embedded-cluster/restore.go +++ b/cmd/embedded-cluster/restore.go @@ -684,6 +684,19 @@ var restoreCommand = &cli.Command{ if err := waitForK0s(); err != nil { return fmt.Errorf("unable to wait for node: %w", err) } + + kcli, err := kubeutils.KubeClient() + if err != nil { + return fmt.Errorf("unable to create kube client: %w", err) + } + errCh := kubeutils.WaitForKubernetes(c.Context, kcli) + defer func() { + for len(errCh) > 0 { + err := <-errCh + logrus.Error(fmt.Errorf("the Kubernetes Infrastructure failed to become ready: %w", err)) + } + }() + logrus.Debugf("running outro") if err := runOutroForRestore(c); err != nil { return fmt.Errorf("unable to run outro: %w", err) diff --git a/pkg/addons/applier.go b/pkg/addons/applier.go index d4c8465a4..ef3c45562 100644 --- a/pkg/addons/applier.go +++ b/pkg/addons/applier.go @@ -6,8 +6,6 @@ package addons import ( "context" "fmt" - "time" - "github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1" k0sconfig "github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1" embeddedclusterv1beta1 "github.com/replicatedhq/embedded-cluster-kinds/apis/v1beta1" @@ -15,7 +13,6 @@ import ( kotsv1beta1 "github.com/replicatedhq/kotskinds/apis/kots/v1beta1" "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/replicatedhq/embedded-cluster/pkg/addons/adminconsole" @@ -62,6 +59,15 @@ func (a *Applier) Outro(ctx context.Context) error { if err != nil { return fmt.Errorf("unable to load addons: %w", err) } + + errCh := kubeutils.WaitForKubernetes(ctx, kcli) + defer func() { + for len(errCh) > 0 { + err := <-errCh + logrus.Error(fmt.Errorf("the Kubernetes Infrastructure failed to become ready: %w", err)) + } + }() + for _, addon := range addons { if err := addon.Outro(ctx, kcli); err != nil { return err @@ -319,39 +325,6 @@ func (a *Applier) Versions(additionalCharts []v1beta1.Chart) (map[string]string, return versions, nil } -// waitForKubernetes waits until we manage to make a successful connection to the -// Kubernetes API server. -func (a *Applier) waitForKubernetes(ctx context.Context) error { - loading := spinner.Start() - defer func() { - loading.Closef("Kubernetes API server is ready") - }() - kcli, err := kubeutils.KubeClient() - if err != nil { - return fmt.Errorf("unable to create kubernetes client: %w", err) - } - ticker := time.NewTicker(3 * time.Second) - defer ticker.Stop() - counter := 1 - loading.Infof("1/n Waiting for Kubernetes API server to be ready") - for { - select { - case <-ticker.C: - case <-ctx.Done(): - return ctx.Err() - } - counter++ - if err := kcli.List(ctx, &corev1.NamespaceList{}); err != nil { - loading.Infof( - "%d/n Waiting for Kubernetes API server to be ready.", - counter, - ) - continue - } - return nil - } -} - func spinForInstallation(ctx context.Context, cli client.Client) error { installSpin := spinner.Start() installSpin.Infof("Waiting for additional components to be ready") diff --git a/pkg/kubeutils/kubeutils.go b/pkg/kubeutils/kubeutils.go index 88189e272..53ba772ea 100644 --- a/pkg/kubeutils/kubeutils.go +++ b/pkg/kubeutils/kubeutils.go @@ -40,7 +40,11 @@ func WaitForNamespace(ctx context.Context, cli client.Client, ns string) error { return ready, nil }, ); err != nil { - return fmt.Errorf("timed out waiting for namespace %s: %v", ns, lasterr) + if lasterr != nil { + return fmt.Errorf("timed out waiting for namespace %s: %v", ns, lasterr) + } else { + return fmt.Errorf("timed out waiting for namespace %s", ns) + } } return nil @@ -60,7 +64,11 @@ func WaitForDeployment(ctx context.Context, cli client.Client, ns, name string) return ready, nil }, ); err != nil { - return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr) + if lasterr != nil { + return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr) + } else { + return fmt.Errorf("timed out waiting for %s to deploy", name) + } } return nil } @@ -79,7 +87,11 @@ func WaitForDaemonset(ctx context.Context, cli client.Client, ns, name string) e return ready, nil }, ); err != nil { - return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr) + if lasterr != nil { + return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr) + } else { + return fmt.Errorf("timed out waiting for %s to deploy", name) + } } return nil } @@ -98,7 +110,11 @@ func WaitForService(ctx context.Context, cli client.Client, ns, name string) err return svc.Spec.ClusterIP != "", nil }, ); err != nil { - return fmt.Errorf("timed out waiting for service %s to have an IP: %v", name, lasterr) + if lasterr != nil { + return fmt.Errorf("timed out waiting for service %s to have an IP: %v", name, lasterr) + } else { + return fmt.Errorf("timed out waiting for service %s to have an IP", name) + } } return nil } @@ -153,7 +169,11 @@ func WaitForInstallation(ctx context.Context, cli client.Client, writer *spinner }, ); err != nil { if wait.Interrupted(err) { - return fmt.Errorf("timed out waiting for the installation to finish: %v", lasterr) + if lasterr != nil { + return fmt.Errorf("timed out waiting for the installation to finish: %v", lasterr) + } else { + return fmt.Errorf("timed out waiting for the installation to finish") + } } return fmt.Errorf("error waiting for installation: %v", err) } @@ -211,7 +231,11 @@ func WaitForNodes(ctx context.Context, cli client.Client) error { return readynodes == len(nodes.Items), nil }, ); err != nil { - return fmt.Errorf("timed out waiting for nodes to be ready: %v", lasterr) + if lasterr != nil { + return fmt.Errorf("timed out waiting for nodes to be ready: %v", lasterr) + } else { + return fmt.Errorf("timed out waiting for nodes to be ready") + } } return nil } @@ -262,3 +286,25 @@ func IsDaemonsetReady(ctx context.Context, cli client.Client, ns, name string) ( } return false, nil } + +// WaitForKubernetes waits for coredns and metrics-server to be ready in kube-system, and returns an error channel. +// if either of them fails to become healthy, an error is returned via the channel. +func WaitForKubernetes(ctx context.Context, cli client.Client) <-chan error { + errch := make(chan error, 2) + + go func() { + err := WaitForDeployment(ctx, cli, "kube-system", "coredns") + if err != nil { + errch <- fmt.Errorf("CoreDNS failed to become healthy: %w", err) + } + }() + + go func() { + err := WaitForDeployment(ctx, cli, "kube-system", "metrics-server") + if err != nil { + errch <- fmt.Errorf("Metrics Server failed to become healthy: %w", err) + } + }() + + return errch +}