From 3a6336f72bb07f8eb8aaed999ec7da3230eedbe6 Mon Sep 17 00:00:00 2001 From: Marco Iorio Date: Mon, 28 Oct 2024 11:03:09 +0100 Subject: [PATCH] cilium-cli: account for opt out labels in node to node encryption tests Node to node encryption can be disabled on specific nodes (control plane ones by default) to prevent e.g., losing connectivity to the Kubernetes API server if keys need to be rotated. Let's modify the corresponding Cilium CLI test to take these into account, so that the test doesn't unexpectedly fail due to unexpected leaks if either of the selected pods is running on an excluded node. Specifically, the pod selection logic is modified to exclude the ones running on nodes with node to node encryption disabled, returning a clear error if no valid pod is found. Additionally, an extra node affinity term is configured when node to node encryption is enabled to prefer nodes node not being part of the control plane, whenever possible. Signed-off-by: Marco Iorio --- cilium-cli/connectivity/check/deployment.go | 29 +++++++ cilium-cli/connectivity/check/features.go | 1 + cilium-cli/connectivity/tests/encryption.go | 94 +++++++++++++++++++-- 3 files changed, 115 insertions(+), 9 deletions(-) diff --git a/cilium-cli/connectivity/check/deployment.go b/cilium-cli/connectivity/check/deployment.go index 79b24a330c0b7..70a1828f71ca0 100644 --- a/cilium-cli/connectivity/check/deployment.go +++ b/cilium-cli/connectivity/check/deployment.go @@ -395,6 +395,30 @@ func (ct *ConnectivityTest) ingresses() map[string]string { return ingresses } +// maybeNodeToNodeEncryptionAffinity returns a node affinity term to prefer nodes +// not being part of the control plane when node to node encryption is enabled, +// because they are excluded by default from node to node encryption. This logic +// is currently suboptimal as it only accounts for the default selector, for the +// sake of simplicity, but it should cover all common use cases. +func (ct *ConnectivityTest) maybeNodeToNodeEncryptionAffinity() *corev1.NodeAffinity { + encryptNode, _ := ct.Feature(features.EncryptionNode) + if !encryptNode.Enabled || encryptNode.Mode == "" { + return nil + } + + return &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{{ + Weight: 100, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{{ + Key: "node-role.kubernetes.io/control-plane", + Operator: corev1.NodeSelectorOpDoesNotExist, + }}, + }, + }}, + } +} + // deploy ensures the test Namespace, Services and Deployments are running on the cluster. func (ct *ConnectivityTest) deploy(ctx context.Context) error { var err error @@ -633,6 +657,7 @@ func (ct *ConnectivityTest) deploy(ctx context.Context) error { }, }, }, + NodeAffinity: ct.maybeNodeToNodeEncryptionAffinity(), }, ReadinessProbe: newLocalReadinessProbe(containerPort, "/"), }, ct.params.DNSTestServerImage) @@ -655,6 +680,7 @@ func (ct *ConnectivityTest) deploy(ctx context.Context) error { Image: ct.params.CurlImage, Command: []string{"/usr/bin/pause"}, Annotations: ct.params.DeploymentAnnotations.Match(clientDeploymentName), + Affinity: &corev1.Affinity{NodeAffinity: ct.maybeNodeToNodeEncryptionAffinity()}, NodeSelector: ct.params.NodeSelector, }) _, err = ct.clients.src.CreateServiceAccount(ctx, ct.params.TestNamespace, k8s.NewServiceAccount(clientDeploymentName), metav1.CreateOptions{}) @@ -691,6 +717,7 @@ func (ct *ConnectivityTest) deploy(ctx context.Context) error { }, }, }, + NodeAffinity: ct.maybeNodeToNodeEncryptionAffinity(), }, NodeSelector: ct.params.NodeSelector, }) @@ -729,6 +756,7 @@ func (ct *ConnectivityTest) deploy(ctx context.Context) error { }, }, }, + NodeAffinity: ct.maybeNodeToNodeEncryptionAffinity(), }, NodeSelector: ct.params.NodeSelector, }) @@ -832,6 +860,7 @@ func (ct *ConnectivityTest) deploy(ctx context.Context) error { }, }, }, + NodeAffinity: ct.maybeNodeToNodeEncryptionAffinity(), }, NodeSelector: ct.params.NodeSelector, ReadinessProbe: newLocalReadinessProbe(containerPort, "/"), diff --git a/cilium-cli/connectivity/check/features.go b/cilium-cli/connectivity/check/features.go index 30063d0c2ee3d..a5ed68c62ccfc 100644 --- a/cilium-cli/connectivity/check/features.go +++ b/cilium-cli/connectivity/check/features.go @@ -66,6 +66,7 @@ func (ct *ConnectivityTest) extractFeaturesFromRuntimeConfig(ctx context.Context result[features.EncryptionNode] = features.Status{ Enabled: cfg.EncryptNode, + Mode: cfg.NodeEncryptionOptOutLabelsString, } isFeatureKNPEnabled, err := ct.isFeatureKNPEnabled(cfg.EnableK8sNetworkPolicy) diff --git a/cilium-cli/connectivity/tests/encryption.go b/cilium-cli/connectivity/tests/encryption.go index 787c524ace7d9..f42a4c0fbaa31 100644 --- a/cilium-cli/connectivity/tests/encryption.go +++ b/cilium-cli/connectivity/tests/encryption.go @@ -6,12 +6,18 @@ package tests import ( "context" "fmt" + "maps" + "slices" "strings" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" + "github.com/cilium/cilium/cilium-cli/connectivity/check" "github.com/cilium/cilium/cilium-cli/connectivity/sniff" "github.com/cilium/cilium/cilium-cli/utils/features" "github.com/cilium/cilium/pkg/defaults" + ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" "github.com/cilium/cilium/pkg/versioncheck" ) @@ -324,6 +330,65 @@ func testNoTrafficLeak(ctx context.Context, t *check.Test, s check.Scenario, } } +func nodeToNodeEncTestPods(nodes map[check.NodeIdentity]*ciliumv2.CiliumNode, excludeSelector labels.Selector, clients, servers []check.Pod) (client, server *check.Pod) { + nodeKey := func(pod *check.Pod) check.NodeIdentity { + if pod != nil { + return check.NodeIdentity{Cluster: pod.K8sClient.ClusterName(), Name: pod.NodeName()} + } + return check.NodeIdentity{} + } + + acceptableNodes := func(pods []check.Pod) sets.Set[check.NodeIdentity] { + keys := sets.New[check.NodeIdentity]() + for _, pod := range pods { + node := nodes[nodeKey(&pod)] + if node == nil { + continue + } + + if excludeSelector.Matches(labels.Set(node.Labels)) { + continue + } + + keys.Insert(nodeKey(&pod)) + } + return keys + } + + getRandomPod := func(pods []check.Pod, nodes sets.Set[check.NodeIdentity]) *check.Pod { + for _, pod := range pods { + if nodes.Has(nodeKey(&pod)) { + return &pod + } + } + + return nil + } + + clientNodes := acceptableNodes(clients) + serverNodes := acceptableNodes(servers) + + // Prefer selecting a client (server) running on a node which does not + // host a server (client) as well, to maximize the possibilities of finding + // a valid combination. + clientNodesOnly := clientNodes.Difference(serverNodes) + serverNodesOnly := serverNodes.Difference(clientNodes) + + client = getRandomPod(clients, clientNodesOnly) + if client == nil { + client = getRandomPod(clients, clientNodes) + } + + server = getRandomPod(servers, serverNodesOnly) + if server == nil { + // Make sure to not pick a server hosted on the same node of the client. + serverNodes.Delete(nodeKey(client)) + server = getRandomPod(servers, serverNodes) + } + + return client, server +} + func NodeToNodeEncryption(reqs ...features.Requirement) check.Scenario { return &nodeToNodeEncryption{reqs} } @@ -335,17 +400,28 @@ func (s *nodeToNodeEncryption) Name() string { } func (s *nodeToNodeEncryption) Run(ctx context.Context, t *check.Test) { - client := t.Context().RandomClientPod() - - var server check.Pod - for _, pod := range t.Context().EchoPods() { - // Make sure that the server pod is on another node than client - if pod.Pod.Status.HostIP != client.Pod.Status.HostIP { - server = pod - break + ct := t.Context() + encryptNode, _ := ct.Feature(features.EncryptionNode) + + // Node to node encryption can be disabled on specific nodes (e.g., + // control plane ones) to prevent e.g., losing connectivity to the + // Kubernetes API Server. Let's take that into account when selecting + // the target pods/nodes. + excludeNodes := labels.Nothing() + if encryptNode.Enabled { + var err error + if excludeNodes, err = labels.Parse(encryptNode.Mode); err != nil { + t.Fatalf("unable to parse label selector %s: %s", encryptNode.Mode, err) } } + client, server := nodeToNodeEncTestPods(ct.CiliumNodes(), excludeNodes, + slices.Collect(maps.Values(ct.ClientPods())), + slices.Collect(maps.Values(ct.EchoPods()))) + if client == nil || server == nil { + t.Fatal("Could not find matching pods: is node to node encryption disabled on all nodes hosting test pods?") + } + // clientHost is a pod running on the same node as the client pod, just in // the host netns. clientHost := t.Context().HostNetNSPodsByNode()[client.Pod.Spec.NodeName] @@ -386,6 +462,6 @@ func (s *nodeToNodeEncryption) Run(ctx context.Context, t *check.Test) { if onlyPodToPodWGWithTunnel { hostToPodAssertNoLeaks = true } - testNoTrafficLeak(ctx, t, s, &clientHost, &server, &clientHost, &serverHost, requestHTTP, ipFam, hostToPodAssertNoLeaks, false, wgEncap) + testNoTrafficLeak(ctx, t, s, &clientHost, server, &clientHost, &serverHost, requestHTTP, ipFam, hostToPodAssertNoLeaks, false, wgEncap) }) }