diff --git a/charts/nebula-operator/templates/controller-manager-deployment.yaml b/charts/nebula-operator/templates/controller-manager-deployment.yaml index 9896a76d..66540e95 100644 --- a/charts/nebula-operator/templates/controller-manager-deployment.yaml +++ b/charts/nebula-operator/templates/controller-manager-deployment.yaml @@ -33,6 +33,9 @@ spec: {{- if .Values.watchNamespaces }} - --watch-namespaces={{ .Values.watchNamespaces }} {{- end }} + {{- if .Values.nebulaObjectSelector }} + - --nebula-object-selector={{ .Values.nebulaObjectSelector }} + {{- end }} - --sync-period={{ .Values.syncPeriod }} - --concurrent-nebulacluster-syncs={{ .Values.concurrentNebulaClusterSyncs }} - --concurrent-nebularestore-syncs={{ .Values.concurrentNebulaRestoreSyncs }} diff --git a/charts/nebula-operator/values.yaml b/charts/nebula-operator/values.yaml index 7da8e769..959b4a3f 100644 --- a/charts/nebula-operator/values.yaml +++ b/charts/nebula-operator/values.yaml @@ -98,6 +98,9 @@ syncPeriod: 0h30m0s # e.g. ns1,ns2,ns3 watchNamespaces: "" +# nebula object selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2). +nebulaObjectSelector: "" + # The number of NebulaCluster objects that are allowed to sync concurrently. (default 5) concurrentNebulaClusterSyncs: 5 diff --git a/pkg/controller/nebularestore/nebula_restore_control.go b/pkg/controller/nebularestore/nebula_restore_control.go index dc851161..7a522a43 100644 --- a/pkg/controller/nebularestore/nebula_restore_control.go +++ b/pkg/controller/nebularestore/nebula_restore_control.go @@ -17,8 +17,6 @@ limitations under the License. package nebularestore import ( - "fmt" - corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/klog/v2" @@ -79,14 +77,13 @@ func (c *defaultRestoreControl) UpdateNebulaRestore(nr *v1alpha1.NebulaRestore) return nil } for _, pod := range pods { - // TODO get pod failed details if pod.Status.Phase == corev1.PodFailed { klog.Infof("NebulaCluster [%s/%s] has failed pod %s.", ns, name, pod.Name) if err := c.clientSet.NebulaRestore().UpdateNebulaRestoreStatus(nr, &v1alpha1.RestoreCondition{ Type: v1alpha1.RestoreFailed, Status: corev1.ConditionTrue, Reason: "PodFailed", - Message: fmt.Sprintf("Pod %s has failed", pod.Name), + Message: getPodTerminateReason(pod), }, &kube.RestoreUpdateStatus{ ConditionType: v1alpha1.RestoreFailed, }); err != nil { diff --git a/pkg/controller/nebularestore/nebula_restore_manager.go b/pkg/controller/nebularestore/nebula_restore_manager.go index 4a1ccf28..e12cffc8 100644 --- a/pkg/controller/nebularestore/nebula_restore_manager.go +++ b/pkg/controller/nebularestore/nebula_restore_manager.go @@ -151,6 +151,7 @@ func (rm *restoreManager) syncRestoreProcess(nr *v1alpha1.NebulaRestore) error { if err != nil { return err } + defer restoreAgent.agentMgr.Close() if err := rm.loadCluster(original, restored, restoreAgent, options); err != nil { return err @@ -760,7 +761,6 @@ func (rm *restoreManager) endpointsConnected(restoreAgent *RestoreAgent, endpoin } agent, err := restoreAgent.agentMgr.GetAgent(host) if err != nil { - klog.Error(err) return false } resp, err := agent.HealthCheck(&pb.HealthCheckRequest{}) @@ -802,9 +802,23 @@ func (rm *restoreManager) getRestoredName(nr *v1alpha1.NebulaRestore) (string, e return "", err } - klog.Infof("generate restored nebula cluster name successfully") + klog.Infof("generate [%s/%s] restored nebula cluster name successfully", nr.Namespace, nr.Name) return genName, nil } return nr.Status.ClusterName, nil } + +func getPodTerminateReason(pod corev1.Pod) string { + for _, cs := range pod.Status.InitContainerStatuses { + if cs.State.Terminated != nil { + return cs.State.Terminated.String() + } + } + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Terminated != nil { + return cs.State.Terminated.String() + } + } + return "" +} diff --git a/pkg/nebula/agent.go b/pkg/nebula/agent.go index 2b883f84..26415b97 100644 --- a/pkg/nebula/agent.go +++ b/pkg/nebula/agent.go @@ -70,3 +70,10 @@ func (a *AgentManager) GetAgent(agentAddr *nebula.HostAddr) (*Agent, error) { a.agents[agentAddr.Host] = agent return agent, nil } + +func (a *AgentManager) Close() { + for addr, agent := range a.agents { + agent.Close() + delete(a.agents, addr) + } +}