Skip to content

Commit

Permalink
Do deployment rollout when capabilities change
Browse files Browse the repository at this point in the history
This patch changes the behavior of how the pod is exited when the
capabilities have changed. Instead of all the replicas exiting at
the same time, the exits are managed by causing the pod's deployment
to rollout a new set of pods.

This ensures that at least one pod is always online to respond to
webhook requests.

This patch also ensures the capabilities controllers are run only
on the leader.
  • Loading branch information
akutz committed Feb 10, 2025
1 parent 035929e commit c9069e8
Show file tree
Hide file tree
Showing 24 changed files with 626 additions and 162 deletions.
4 changes: 3 additions & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ linters-settings:
- alias: pkgctx
pkg: github.com/vmware-tanzu/vm-operator/pkg/context
- alias: pkgerr
pkg: github.com/vmware-tanzu/vm-operator/pkg/pkgerr
pkg: github.com/vmware-tanzu/vm-operator/pkg/errors
- alias: pkgexit
pkg: github.com/vmware-tanzu/vm-operator/pkg/exit
- alias: ctxop
pkg: github.com/vmware-tanzu/vm-operator/pkg/context/operation
- alias: pkgmgr
Expand Down
2 changes: 2 additions & 0 deletions config/default/manager_pod_info_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ spec:
containers:
- name: manager
env:
- name: DEPLOYMENT_NAME
value: DEPLOYMENT_NAME
- name: POD_NAMESPACE
valueFrom:
fieldRef:
Expand Down
1 change: 0 additions & 1 deletion config/default/manager_webhook_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ metadata:
spec:
template:
spec:
# There's an assumption in kustomization.yaml that manager is container[0]
containers:
- name: manager
env:
Expand Down
2 changes: 0 additions & 2 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ spec:
name: vmoperator-controller-manager
spec:
containers:
# There's an assumption in manager_webhook_patch.yaml that manager is container[0]
# If new containers are added, please bear this in mind
- command:
- /manager
image: controller:latest
Expand Down
7 changes: 7 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ rules:
- get
- list
- watch
- apiGroups:
- apps/v1
resources:
- deployments
verbs:
- get
- patch
- apiGroups:
- cns.vmware.com
resources:
Expand Down
15 changes: 15 additions & 0 deletions config/replacements/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,18 @@ replacements:
version: v1
kind: CustomResourceDefinition
name: virtualmachinesetresourcepolicies.vmoperator.vmware.com

# DEPLOYMENT_NAME
- source:
fieldPath: metadata.name
group: apps
version: v1
kind: Deployment
namespace: system
name: controller-manager
targets:
- select:
kind: Deployment
name: controller-manager
fieldPaths:
- spec.template.spec.containers.[name=manager].env.[name=DEPLOYMENT_NAME].value
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copyright (c) 2024 Broadcom. All Rights Reserved.
// Broadcom Confidential. The term "Broadcom" refers to Broadcom Inc.
// and/or its subsidiaries.
// © Broadcom. All Rights Reserved.
// The term Broadcom refers to Broadcom Inc. and/or its subsidiaries.
// SPDX-License-Identifier: Apache-2.0

package capability

Expand All @@ -20,14 +20,13 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/manager"

"github.com/vmware-tanzu/vm-operator/controllers/infra/capability/exit"
pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config"
"github.com/vmware-tanzu/vm-operator/pkg/config/capabilities"
pkgctx "github.com/vmware-tanzu/vm-operator/pkg/context"
pkgexit "github.com/vmware-tanzu/vm-operator/pkg/exit"
pkgmgr "github.com/vmware-tanzu/vm-operator/pkg/manager"
"github.com/vmware-tanzu/vm-operator/pkg/record"
kubeutil "github.com/vmware-tanzu/vm-operator/pkg/util/kube"
"github.com/vmware-tanzu/vm-operator/pkg/util/ptr"
)

// AddToManager adds this package's controller to the provided manager.
Expand All @@ -50,17 +49,18 @@ func AddToManager(ctx *pkgctx.ControllerManagerContext, mgr manager.Manager) err

r := NewReconciler(
ctx,
mgr.GetClient(),
cache,
ctrl.Log.WithName("controllers").WithName(controllerName),
record.New(mgr.GetEventRecorderFor(controllerNameLong)),
mgr.Elected(),
)

// This controller is also run on the non-leaders (webhooks) pods too
// so capabilities updates are reflected there.
c, err := controller.New(controllerName, mgr, controller.Options{
Reconciler: r,
MaxConcurrentReconciles: 1,
NeedLeaderElection: ptr.To(false),
})
if err != nil {
return err
Expand All @@ -87,23 +87,29 @@ func AddToManager(ctx *pkgctx.ControllerManagerContext, mgr manager.Manager) err

func NewReconciler(
ctx context.Context,
client ctrlclient.Reader,
client ctrlclient.Client,
reader ctrlclient.Reader,
logger logr.Logger,
recorder record.Recorder) *Reconciler {
recorder record.Recorder,
elected <-chan struct{}) *Reconciler {

return &Reconciler{
Context: ctx,
Client: client,
Reader: reader,
Logger: logger,
Recorder: recorder,
Elected: elected,
}
}

type Reconciler struct {
Context context.Context
Client ctrlclient.Reader
Client ctrlclient.Client
Reader ctrlclient.Reader
Logger logr.Logger
Recorder record.Recorder
Elected <-chan struct{}
}

// +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch
Expand All @@ -115,13 +121,20 @@ func (r *Reconciler) Reconcile(
ctx = pkgcfg.JoinContext(ctx, r.Context)

var obj corev1.ConfigMap
if err := r.Client.Get(ctx, req.NamespacedName, &obj); err != nil {
if err := r.Reader.Get(ctx, req.NamespacedName, &obj); err != nil {
return ctrl.Result{}, ctrlclient.IgnoreNotFound(err)
}

if capabilities.UpdateCapabilitiesFeatures(ctx, obj) {
r.Logger.Info("killing pod due to changed capabilities")
exit.Exit()
if capabilities.WouldUpdateCapabilitiesFeatures(ctx, obj) {
if err := pkgexit.Exit(
logr.NewContext(ctx, r.Logger),
"capabilities have changed",
r.Client,
r.Elected); err != nil {

r.Logger.Error(err, "Failed to exit due to capability change")
return ctrl.Result{}, err
}
}

return ctrl.Result{}, nil
Expand Down
Original file line number Diff line number Diff line change
@@ -1,30 +1,20 @@
// Copyright (c) 2024 Broadcom. All Rights Reserved.
// Broadcom Confidential. The term "Broadcom" refers to Broadcom Inc.
// and/or its subsidiaries.
// © Broadcom. All Rights Reserved.
// The term Broadcom refers to Broadcom Inc. and/or its subsidiaries.
// SPDX-License-Identifier: Apache-2.0

package capability_test

import (
"sync/atomic"
"testing"

. "github.com/onsi/ginkgo/v2"

capability "github.com/vmware-tanzu/vm-operator/controllers/infra/capability/configmap"
"github.com/vmware-tanzu/vm-operator/controllers/infra/capability/exit"
pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config"
"github.com/vmware-tanzu/vm-operator/pkg/manager"
"github.com/vmware-tanzu/vm-operator/test/builder"
)

var numExits int32

func init() {
exit.Exit = func() {
atomic.AddInt32(&numExits, 1)
}
}

var suite = builder.NewTestSuiteForControllerWithContext(
pkgcfg.UpdateContext(
pkgcfg.NewContextWithDefaultConfig(),
Expand Down
Loading

0 comments on commit c9069e8

Please sign in to comment.