Skip to content

Commit

Permalink
conditionally migrate
Browse files Browse the repository at this point in the history
  • Loading branch information
emosbaugh committed Jan 28, 2025
1 parent 3eecef3 commit 46934e7
Show file tree
Hide file tree
Showing 2 changed files with 207 additions and 4 deletions.
202 changes: 201 additions & 1 deletion operator/controllers/installation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import (

apv1b2 "github.com/k0sproject/k0s/pkg/apis/autopilot/v1beta2"
k0shelm "github.com/k0sproject/k0s/pkg/apis/helm/v1beta1"
k0sv1beta1 "github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1"
apcore "github.com/k0sproject/k0s/pkg/autopilot/controller/plans/core"
"github.com/replicatedhq/embedded-cluster/pkg/kubeutils"
"github.com/replicatedhq/embedded-cluster/pkg/runtimeconfig"
batchv1 "k8s.io/api/batch/v1"
Expand All @@ -42,8 +44,14 @@ import (
"sigs.k8s.io/controller-runtime/pkg/handler"

"github.com/replicatedhq/embedded-cluster/kinds/apis/v1beta1"
ectypes "github.com/replicatedhq/embedded-cluster/kinds/types"
"github.com/replicatedhq/embedded-cluster/operator/pkg/autopilot"
"github.com/replicatedhq/embedded-cluster/operator/pkg/charts"
"github.com/replicatedhq/embedded-cluster/operator/pkg/k8sutil"
"github.com/replicatedhq/embedded-cluster/operator/pkg/metrics"
"github.com/replicatedhq/embedded-cluster/operator/pkg/openebs"
"github.com/replicatedhq/embedded-cluster/operator/pkg/registry"
"github.com/replicatedhq/embedded-cluster/operator/pkg/upgrade"
"github.com/replicatedhq/embedded-cluster/operator/pkg/util"
)

Expand Down Expand Up @@ -255,6 +263,169 @@ func (r *InstallationReconciler) ReconcileOpenebs(ctx context.Context, in *v1bet
return nil
}

// ReconcileRegistry reconciles registry components, ensuring that the necessary secrets are
// created as well as rebalancing stateful pods when nodes are removed from the cluster.
func (r *InstallationReconciler) ReconcileRegistry(ctx context.Context, in *v1beta1.Installation) error {
if in == nil || !in.Spec.AirGap || !in.Spec.HighAvailability {
// do not create registry secrets or rebalance stateful pods if the installation is not HA or not airgapped
return nil
}

log := ctrl.LoggerFrom(ctx)

// fetch the current clusterConfig
var clusterConfig k0sv1beta1.ClusterConfig
if err := r.Get(ctx, client.ObjectKey{Name: "k0s", Namespace: "kube-system"}, &clusterConfig); err != nil {
return fmt.Errorf("failed to get cluster config: %w", err)
}

err := registry.MigrateRegistryData(ctx, in, r.Client)
if err != nil {
if err := r.Status().Update(ctx, in); err != nil {
log.Error(err, "Failed to update installation status")
}
return fmt.Errorf("failed to migrate registry data: %w", err)

}

return nil
}

// ReconcileHAStatus reconciles the HA migration status condition for the installation.
// This status is based on the HA condition being set, the Registry deployment having two running + healthy replicas,
// and the kotsadm rqlite statefulset having three healthy replicas.
func (r *InstallationReconciler) ReconcileHAStatus(ctx context.Context, in *v1beta1.Installation) error {
if in == nil {
return nil
}

if !in.Spec.HighAvailability {
in.Status.SetCondition(metav1.Condition{
Type: HAConditionType,
Status: metav1.ConditionFalse,
Reason: "HANotEnabled",
ObservedGeneration: in.Generation,
})
return nil
}

if in.Spec.AirGap {
seaweedReady, err := k8sutil.GetChartHealth(ctx, r.Client, "seaweedfs")
if err != nil {
return fmt.Errorf("failed to check seaweedfs readiness: %w", err)
}
if !seaweedReady {
in.Status.SetCondition(metav1.Condition{
Type: HAConditionType,
Status: metav1.ConditionFalse,
Reason: "SeaweedFSNotReady",
ObservedGeneration: in.Generation,
})
return nil
}

registryMigrated, err := registry.HasRegistryMigrated(ctx, r.Client)
if err != nil {
return fmt.Errorf("failed to check registry migration status: %w", err)
}
if !registryMigrated {
in.Status.SetCondition(metav1.Condition{
Type: HAConditionType,
Status: metav1.ConditionFalse,
Reason: "RegistryNotMigrated",
ObservedGeneration: in.Generation,
})
return nil
}

registryReady, err := k8sutil.GetChartHealth(ctx, r.Client, "docker-registry")
if err != nil {
return fmt.Errorf("failed to check docker-registry readiness: %w", err)
}
if !registryReady {
in.Status.SetCondition(metav1.Condition{
Type: HAConditionType,
Status: metav1.ConditionFalse,
Reason: "RegistryNotReady",
ObservedGeneration: in.Generation,
})
return nil
}
}

adminConsole, err := k8sutil.GetChartHealth(ctx, r.Client, "admin-console")
if err != nil {
return fmt.Errorf("failed to check admin-console readiness: %w", err)
}
if !adminConsole {
in.Status.SetCondition(metav1.Condition{
Type: HAConditionType,
Status: metav1.ConditionFalse,
Reason: "AdminConsoleNotReady",
ObservedGeneration: in.Generation,
})
return nil
}

if in.Status.State != v1beta1.InstallationStateInstalled {
in.Status.SetCondition(metav1.Condition{
Type: HAConditionType,
Status: metav1.ConditionFalse,
Reason: "InstallationNotReady",
ObservedGeneration: in.Generation,
})
return nil
}

in.Status.SetCondition(metav1.Condition{
Type: HAConditionType,
Status: metav1.ConditionTrue,
Reason: "HAReady",
ObservedGeneration: in.Generation,
})

return nil
}

// SetStateBasedOnPlan sets the installation state based on the Plan state. For now we do not
// report anything fancy but we should consider reporting here a summary of how many nodes
// have been upgraded and how many are still pending.
func (r *InstallationReconciler) SetStateBasedOnPlan(in *v1beta1.Installation, plan apv1b2.Plan, desiredVersion string) {
reason := autopilot.ReasonForState(plan)
switch plan.Status.State {
case "":
in.Status.SetState(v1beta1.InstallationStateEnqueued, reason, nil)
case apcore.PlanIncompleteTargets:
fallthrough
case apcore.PlanInconsistentTargets:
fallthrough
case apcore.PlanRestricted:
fallthrough
case apcore.PlanWarning:
fallthrough
case apcore.PlanMissingSignalNode:
fallthrough
case apcore.PlanApplyFailed:
r.Recorder.Eventf(in, corev1.EventTypeNormal, "K0sUpgradeFailed", "Upgrade of k0s to %s failed (%q)", desiredVersion, plan.Status.State)
in.Status.SetState(v1beta1.InstallationStateFailed, reason, nil)
case apcore.PlanSchedulable:
fallthrough
case apcore.PlanSchedulableWait:
in.Status.SetState(v1beta1.InstallationStateInstalling, reason, nil)
case apcore.PlanCompleted:
r.Recorder.Eventf(in, corev1.EventTypeNormal, "K0sUpgradeComplete", "Upgrade of k0s to %s completed", desiredVersion)
in.Status.SetState(v1beta1.InstallationStateKubernetesInstalled, reason, nil)
default:
r.Recorder.Eventf(in, corev1.EventTypeNormal, "K0sUpgradeUnknownState", "Upgrade of k0s to %s has an unknown state %q", desiredVersion, plan.Status.State)
in.Status.SetState(v1beta1.InstallationStateFailed, reason, nil)
}
}

// StartAutopilotUpgrade creates an autopilot plan to upgrade to version specified in spec.config.version.
func (r *InstallationReconciler) StartAutopilotUpgrade(ctx context.Context, in *v1beta1.Installation, meta *ectypes.ReleaseMetadata) error {
return upgrade.StartAutopilotUpgrade(ctx, r.Client, in, meta)
}

// CoalesceInstallations goes through all the installation objects and make sure that the
// status of the newest one is coherent with whole cluster status. Returns the newest
// installation object.
Expand Down Expand Up @@ -425,7 +596,12 @@ func (r *InstallationReconciler) Reconcile(ctx context.Context, req ctrl.Request
// parse the config otherwise we risk moving on with a reconcile
// using an erroneous config.
if err := r.ReadClusterConfigSpecFromSecret(ctx, in); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to read cluster config from secret: %w", err)
in.Status.SetState(v1beta1.InstallationStateFailed, err.Error(), nil)
if err := r.Status().Update(ctx, in); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update installation status: %w", err)
}
r.DisableOldInstallations(ctx, items)
return ctrl.Result{}, fmt.Errorf("failed to update installation status: %w", err)
}

// verify if a new node has been added, removed or changed.
Expand All @@ -444,6 +620,25 @@ func (r *InstallationReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{}, fmt.Errorf("failed to reconcile openebs: %w", err)
}

// reconcile helm chart dependencies including secrets.
if err := r.ReconcileRegistry(ctx, in); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to pre-reconcile helm charts: %w", err)
}

// reconcile the add-ons (k0s helm extensions).
log.Info("Reconciling helm charts")
ev, err := charts.ReconcileHelmCharts(ctx, r.Client, in)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to reconcile helm charts: %w", err)
}
if ev != nil {
r.Recorder.Event(in, corev1.EventTypeNormal, ev.Reason, ev.Message)
}

if err := r.ReconcileHAStatus(ctx, in); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to reconcile HA status: %w", err)
}

// save the installation status. nothing more to do with it.
if err := r.Status().Update(ctx, in.DeepCopy()); err != nil {
if k8serrors.IsConflict(err) {
Expand All @@ -452,6 +647,11 @@ func (r *InstallationReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{}, fmt.Errorf("failed to update installation status: %w", err)
}

// now that the status has been updated we can flag all older installation
// objects as obsolete. these are not necessary anymore and are kept only
// for historic reasons.
r.DisableOldInstallations(ctx, items)

// if we are not in an airgap environment this is the time to call back to
// replicated and inform the status of this installation.
if !in.Spec.AirGap {
Expand Down
9 changes: 6 additions & 3 deletions operator/pkg/cli/upgrade_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cli

import (
"fmt"
"os"
"time"

ecv1beta1 "github.com/replicatedhq/embedded-cluster/kinds/apis/v1beta1"
Expand Down Expand Up @@ -54,9 +55,11 @@ func UpgradeJobCmd() *cobra.Command {
fmt.Println(fmt.Sprintf(format, args...))
}

err = migratev2.Run(ctx, logf, cli, installation)
if err != nil {
return fmt.Errorf("failed to run v2 migration: %w", err)
if os.Getenv("MIGRATE_V2") == "true" {
err := migratev2.Run(ctx, logf, cli, installation)
if err != nil {
return fmt.Errorf("failed to run v2 migration: %w", err)
}
}

err = upgrade.Upgrade(ctx, cli, installation)
Expand Down

0 comments on commit 46934e7

Please sign in to comment.