Skip to content

Commit

Permalink
feat: wait for coredns and metrics-server, mk2 (#624)
Browse files Browse the repository at this point in the history
* Revert "Revert "feat: wait for coredns and metrics-server" (#623)"

This reverts commit 965a58b.

* only wait for metrics server, not coredns

* watch for k8s infra status in the background

* improve error messages when coredns or metrics server fail to become healthy

* fix fmt

* move k8s health warnings prints to defer func

* store both errors in response channel at once
  • Loading branch information
laverya authored May 20, 2024
1 parent ccea8de commit 5a5461d
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 42 deletions.
13 changes: 13 additions & 0 deletions cmd/embedded-cluster/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,19 @@ var restoreCommand = &cli.Command{
if err := waitForK0s(); err != nil {
return fmt.Errorf("unable to wait for node: %w", err)
}

kcli, err := kubeutils.KubeClient()
if err != nil {
return fmt.Errorf("unable to create kube client: %w", err)
}
errCh := kubeutils.WaitForKubernetes(c.Context, kcli)
defer func() {
for len(errCh) > 0 {
err := <-errCh
logrus.Error(fmt.Errorf("the Kubernetes Infrastructure failed to become ready: %w", err))
}
}()

logrus.Debugf("running outro")
if err := runOutroForRestore(c); err != nil {
return fmt.Errorf("unable to run outro: %w", err)
Expand Down
45 changes: 9 additions & 36 deletions pkg/addons/applier.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,13 @@ package addons
import (
"context"
"fmt"
"time"

"github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1"
k0sconfig "github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1"
embeddedclusterv1beta1 "github.com/replicatedhq/embedded-cluster-kinds/apis/v1beta1"
"github.com/replicatedhq/embedded-cluster-kinds/types"
kotsv1beta1 "github.com/replicatedhq/kotskinds/apis/kots/v1beta1"
"github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/replicatedhq/embedded-cluster/pkg/addons/adminconsole"
Expand Down Expand Up @@ -62,6 +59,15 @@ func (a *Applier) Outro(ctx context.Context) error {
if err != nil {
return fmt.Errorf("unable to load addons: %w", err)
}

errCh := kubeutils.WaitForKubernetes(ctx, kcli)
defer func() {
for len(errCh) > 0 {
err := <-errCh
logrus.Error(fmt.Errorf("the Kubernetes Infrastructure failed to become ready: %w", err))
}
}()

for _, addon := range addons {
if err := addon.Outro(ctx, kcli); err != nil {
return err
Expand Down Expand Up @@ -319,39 +325,6 @@ func (a *Applier) Versions(additionalCharts []v1beta1.Chart) (map[string]string,
return versions, nil
}

// waitForKubernetes waits until we manage to make a successful connection to the
// Kubernetes API server.
func (a *Applier) waitForKubernetes(ctx context.Context) error {
loading := spinner.Start()
defer func() {
loading.Closef("Kubernetes API server is ready")
}()
kcli, err := kubeutils.KubeClient()
if err != nil {
return fmt.Errorf("unable to create kubernetes client: %w", err)
}
ticker := time.NewTicker(3 * time.Second)
defer ticker.Stop()
counter := 1
loading.Infof("1/n Waiting for Kubernetes API server to be ready")
for {
select {
case <-ticker.C:
case <-ctx.Done():
return ctx.Err()
}
counter++
if err := kcli.List(ctx, &corev1.NamespaceList{}); err != nil {
loading.Infof(
"%d/n Waiting for Kubernetes API server to be ready.",
counter,
)
continue
}
return nil
}
}

func spinForInstallation(ctx context.Context, cli client.Client) error {
installSpin := spinner.Start()
installSpin.Infof("Waiting for additional components to be ready")
Expand Down
58 changes: 52 additions & 6 deletions pkg/kubeutils/kubeutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ func WaitForNamespace(ctx context.Context, cli client.Client, ns string) error {
return ready, nil
},
); err != nil {
return fmt.Errorf("timed out waiting for namespace %s: %v", ns, lasterr)
if lasterr != nil {
return fmt.Errorf("timed out waiting for namespace %s: %v", ns, lasterr)
} else {
return fmt.Errorf("timed out waiting for namespace %s", ns)
}
}
return nil

Expand All @@ -60,7 +64,11 @@ func WaitForDeployment(ctx context.Context, cli client.Client, ns, name string)
return ready, nil
},
); err != nil {
return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr)
if lasterr != nil {
return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr)
} else {
return fmt.Errorf("timed out waiting for %s to deploy", name)
}
}
return nil
}
Expand All @@ -79,7 +87,11 @@ func WaitForDaemonset(ctx context.Context, cli client.Client, ns, name string) e
return ready, nil
},
); err != nil {
return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr)
if lasterr != nil {
return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr)
} else {
return fmt.Errorf("timed out waiting for %s to deploy", name)
}
}
return nil
}
Expand All @@ -98,7 +110,11 @@ func WaitForService(ctx context.Context, cli client.Client, ns, name string) err
return svc.Spec.ClusterIP != "", nil
},
); err != nil {
return fmt.Errorf("timed out waiting for service %s to have an IP: %v", name, lasterr)
if lasterr != nil {
return fmt.Errorf("timed out waiting for service %s to have an IP: %v", name, lasterr)
} else {
return fmt.Errorf("timed out waiting for service %s to have an IP", name)
}
}
return nil
}
Expand Down Expand Up @@ -153,7 +169,11 @@ func WaitForInstallation(ctx context.Context, cli client.Client, writer *spinner
},
); err != nil {
if wait.Interrupted(err) {
return fmt.Errorf("timed out waiting for the installation to finish: %v", lasterr)
if lasterr != nil {
return fmt.Errorf("timed out waiting for the installation to finish: %v", lasterr)
} else {
return fmt.Errorf("timed out waiting for the installation to finish")
}
}
return fmt.Errorf("error waiting for installation: %v", err)
}
Expand Down Expand Up @@ -211,7 +231,11 @@ func WaitForNodes(ctx context.Context, cli client.Client) error {
return readynodes == len(nodes.Items), nil
},
); err != nil {
return fmt.Errorf("timed out waiting for nodes to be ready: %v", lasterr)
if lasterr != nil {
return fmt.Errorf("timed out waiting for nodes to be ready: %v", lasterr)
} else {
return fmt.Errorf("timed out waiting for nodes to be ready")
}
}
return nil
}
Expand Down Expand Up @@ -262,3 +286,25 @@ func IsDaemonsetReady(ctx context.Context, cli client.Client, ns, name string) (
}
return false, nil
}

// WaitForKubernetes waits for coredns and metrics-server to be ready in kube-system, and returns an error channel.
// if either of them fails to become healthy, an error is returned via the channel.
func WaitForKubernetes(ctx context.Context, cli client.Client) <-chan error {
errch := make(chan error, 2)

go func() {
err := WaitForDeployment(ctx, cli, "kube-system", "coredns")
if err != nil {
errch <- fmt.Errorf("CoreDNS failed to become healthy: %w", err)
}
}()

go func() {
err := WaitForDeployment(ctx, cli, "kube-system", "metrics-server")
if err != nil {
errch <- fmt.Errorf("Metrics Server failed to become healthy: %w", err)
}
}()

return errch
}

0 comments on commit 5a5461d

Please sign in to comment.