Skip to content

Commit 2bc767d

Browse files
committed
[Windows] CNI Server installs OpenFlow entries after PortStatus message is received
Signed-off-by: Wenying Dong <wenyingd@vmware.com>
1 parent 71d57f1 commit 2bc767d

26 files changed

+1227
-136
lines changed

cmd/antrea-agent/agent.go

+1
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ func run(o *Options) error {
600600
o.config.CNISocket,
601601
o.config.HostProcPathPrefix,
602602
nodeConfig,
603+
localPodInformer.Get(),
603604
k8sClient,
604605
routeClient,
605606
isChaining,

go.mod

+4-5
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ module antrea.io/antrea
33
go 1.23.0
44

55
require (
6-
antrea.io/libOpenflow v0.14.0
7-
antrea.io/ofnet v0.12.0
6+
antrea.io/libOpenflow v0.15.0
7+
antrea.io/ofnet v0.14.0
88
github.com/ClickHouse/clickhouse-go/v2 v2.6.1
99
github.com/DATA-DOG/go-sqlmock v1.5.2
1010
github.com/Mellanox/sriovnet v1.1.0
@@ -113,10 +113,9 @@ require (
113113
github.com/aws/smithy-go v1.12.1 // indirect
114114
github.com/beorn7/perks v1.0.1 // indirect
115115
github.com/blang/semver/v4 v4.0.0 // indirect
116-
github.com/cenk/hub v1.0.1 // indirect
117116
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
118-
github.com/cenkalti/hub v1.0.1 // indirect
119-
github.com/cenkalti/rpc2 v0.0.0-20180727162946-9642ea02d0aa // indirect
117+
github.com/cenkalti/hub v1.0.2 // indirect
118+
github.com/cenkalti/rpc2 v1.0.3 // indirect
120119
github.com/cespare/xxhash/v2 v2.3.0 // indirect
121120
github.com/chai2010/gettext-go v1.0.2 // indirect
122121
github.com/containerd/cgroups v1.1.0 // indirect

go.sum

+8-10
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
antrea.io/libOpenflow v0.14.0 h1:6MS1E52nGQyz/AJ8j3OrotgAc/1ubef5vc5i8ytIxFE=
2-
antrea.io/libOpenflow v0.14.0/go.mod h1:U8YR0ithWrjwLdUUhyeCsYnlKg/mEFjG5CbPNt1V+j0=
3-
antrea.io/ofnet v0.12.0 h1:pgygAsEZJUPK/kGmeuIesDh5hoGLpYeavSLdG/Dp8Ao=
4-
antrea.io/ofnet v0.12.0/go.mod h1:MB3qaInEimj+T8qtpBcIQK+EqeNiY1S/WbUdGk+TzNg=
1+
antrea.io/libOpenflow v0.15.0 h1:wGk+IVCf8piGZgC4+lbf4qfGrJG5ikzfq5Y1T5LzqmI=
2+
antrea.io/libOpenflow v0.15.0/go.mod h1:Mq1JEjYrb6eTVA7qjZRj9plVTKcsLM8wnQ87sLLYuiQ=
3+
antrea.io/ofnet v0.14.0 h1:BGOqg5DdRkvxpBqyoEgWmvGd4EvpacdU/Py1s6qOvSc=
4+
antrea.io/ofnet v0.14.0/go.mod h1:W5JPYFFcRM7tLwsItgmsKqIhtW/QofyIeNsUIecFaBo=
55
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
66
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
77
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
@@ -114,14 +114,12 @@ github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdn
114114
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
115115
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
116116
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
117-
github.com/cenk/hub v1.0.1 h1:RBwXNOF4a8KjD8BJ08XqN8KbrqaGiQLDrgvUGJSHuPA=
118-
github.com/cenk/hub v1.0.1/go.mod h1:rJM1LNAW0ppT8FMMuPK6c2NP/R2nH/UthtuRySSaf6Y=
119117
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
120118
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
121-
github.com/cenkalti/hub v1.0.1 h1:UMtjc6dHSaOQTO15SVA50MBIR9zQwvsukQupDrkIRtg=
122-
github.com/cenkalti/hub v1.0.1/go.mod h1:tcYwtS3a2d9NO/0xDXVJWx3IedurUjYCqFCmpi0lpHs=
123-
github.com/cenkalti/rpc2 v0.0.0-20180727162946-9642ea02d0aa h1:t+iWhuJE2aropY4uxKMVbyP+IJ29o422f7YAd73aTjg=
124-
github.com/cenkalti/rpc2 v0.0.0-20180727162946-9642ea02d0aa/go.mod h1:v2npkhrXyk5BCnkNIiPdRI23Uq6uWPUQGL2hnRcRr/M=
119+
github.com/cenkalti/hub v1.0.2 h1:Nqv9TNaA9boeO2wQFW8o87BY3zKthtnzXmWGmJqhAV8=
120+
github.com/cenkalti/hub v1.0.2/go.mod h1:8LAFAZcCasb83vfxatMUnZHRoQcffho2ELpHb+kaTJU=
121+
github.com/cenkalti/rpc2 v1.0.3 h1:OkMsNP/sP9seN1VRCLqhX1xkVGHPoLwWS6fZR14Ji/k=
122+
github.com/cenkalti/rpc2 v1.0.3/go.mod h1:2yfU5b86vOr16+iY1jN3MvT6Kxc9Nf8j5iZWwUf7iaw=
125123
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
126124
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
127125
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=

pkg/agent/cniserver/pod_configuration.go

+142-7
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,19 @@ import (
2020
"net"
2121
"strings"
2222
"sync"
23+
"time"
2324

2425
current "github.com/containernetworking/cni/pkg/types/100"
2526
"github.com/containernetworking/cni/pkg/version"
2627
corev1 "k8s.io/api/core/v1"
2728
"k8s.io/apimachinery/pkg/util/sets"
29+
k8swait "k8s.io/apimachinery/pkg/util/wait"
30+
clientset "k8s.io/client-go/kubernetes"
31+
"k8s.io/client-go/kubernetes/scheme"
32+
typedv1 "k8s.io/client-go/kubernetes/typed/core/v1"
33+
v1 "k8s.io/client-go/listers/core/v1"
34+
"k8s.io/client-go/tools/record"
35+
"k8s.io/client-go/util/workqueue"
2836
"k8s.io/klog/v2"
2937

3038
"antrea.io/antrea/pkg/agent/cniserver/ipam"
@@ -61,6 +69,10 @@ const (
6169

6270
var (
6371
getNSPath = util.GetNSPath
72+
// retryInterval is the interval to re-install Pod OpenFlow entries if any error happened.
73+
// Note, using a variable rather than constant for retryInterval because we may use a shorter time in the
74+
// test code.
75+
retryInterval = 5 * time.Second
6476
)
6577

6678
type podConfigurator struct {
@@ -76,6 +88,13 @@ type podConfigurator struct {
7688
// isSecondaryNetwork is true if this instance of podConfigurator is used to configure
7789
// Pod secondary network interfaces.
7890
isSecondaryNetwork bool
91+
podIfMonitor *podIfaceMonitor
92+
93+
eventBroadcaster record.EventBroadcaster
94+
record record.EventRecorder
95+
podLister v1.PodLister
96+
kubeClient clientset.Interface
97+
queue workqueue.TypedDelayingInterface[string]
7998
}
8099

81100
func newPodConfigurator(
@@ -88,11 +107,23 @@ func newPodConfigurator(
88107
isOvsHardwareOffloadEnabled bool,
89108
disableTXChecksumOffload bool,
90109
podUpdateNotifier channel.Notifier,
110+
podLister v1.PodLister,
91111
) (*podConfigurator, error) {
92112
ifConfigurator, err := newInterfaceConfigurator(ovsDatapathType, isOvsHardwareOffloadEnabled, disableTXChecksumOffload)
93113
if err != nil {
94114
return nil, err
95115
}
116+
queue := workqueue.NewTypedDelayingQueueWithConfig[string](
117+
workqueue.TypedDelayingQueueConfig[string]{
118+
Name: "podMonitor",
119+
},
120+
)
121+
eventBroadcaster := record.NewBroadcaster()
122+
recorder := eventBroadcaster.NewRecorder(
123+
scheme.Scheme,
124+
corev1.EventSource{Component: "AntreaAgentPodConfigurator"},
125+
)
126+
ifMonitor := newPodInterfaceMonitor(ofClient, ifaceStore, queue)
96127
return &podConfigurator{
97128
ovsBridgeClient: ovsBridgeClient,
98129
ofClient: ofClient,
@@ -101,6 +132,10 @@ func newPodConfigurator(
101132
gatewayMAC: gatewayMAC,
102133
ifConfigurator: ifConfigurator,
103134
podUpdateNotifier: podUpdateNotifier,
135+
podIfMonitor: ifMonitor,
136+
record: recorder,
137+
podLister: podLister,
138+
queue: queue,
104139
}, nil
105140
}
106141

@@ -166,13 +201,13 @@ func getContainerIPsString(ips []net.IP) string {
166201
// not created for a Pod interface.
167202
func ParseOVSPortInterfaceConfig(portData *ovsconfig.OVSPortData, portConfig *interfacestore.OVSPortConfig) *interfacestore.InterfaceConfig {
168203
if portData.ExternalIDs == nil {
169-
klog.V(2).Infof("OVS port %s has no external_ids", portData.Name)
204+
klog.V(2).InfoS("OVS port has no external_ids", "port", portData.Name)
170205
return nil
171206
}
172207

173208
containerID, found := portData.ExternalIDs[ovsExternalIDContainerID]
174209
if !found {
175-
klog.V(2).Infof("OVS port %s has no %s in external_ids", portData.Name, ovsExternalIDContainerID)
210+
klog.V(2).InfoS("OVS port has no containerID in external_ids", "port", portData.Name)
176211
return nil
177212
}
178213

@@ -187,8 +222,7 @@ func ParseOVSPortInterfaceConfig(portData *ovsconfig.OVSPortData, portConfig *in
187222

188223
containerMAC, err := net.ParseMAC(portData.ExternalIDs[ovsExternalIDMAC])
189224
if err != nil {
190-
klog.Errorf("Failed to parse MAC address from OVS external config %s: %v",
191-
portData.ExternalIDs[ovsExternalIDMAC], err)
225+
klog.ErrorS(err, "Failed to parse MAC address from OVS external config")
192226
}
193227
podName, _ := portData.ExternalIDs[ovsExternalIDPodName]
194228
podNamespace, _ := portData.ExternalIDs[ovsExternalIDPodNamespace]
@@ -279,7 +313,7 @@ func (pc *podConfigurator) createOVSPort(ovsPortName string, ovsAttachInfo map[s
279313
func (pc *podConfigurator) removeInterfaces(containerID string) error {
280314
containerConfig, found := pc.ifaceStore.GetContainerInterface(containerID)
281315
if !found {
282-
klog.V(2).Infof("Did not find the port for container %s in local cache", containerID)
316+
klog.V(2).InfoS("Did not find the port for container in local cache", "container", containerID)
283317
return nil
284318
}
285319

@@ -498,7 +532,7 @@ func (pc *podConfigurator) reconcile(pods []corev1.Pod, containerAccess *contain
498532
// disconnectInterfaceFromOVS disconnects an existing interface from ovs br-int.
499533
func (pc *podConfigurator) disconnectInterfaceFromOVS(containerConfig *interfacestore.InterfaceConfig) error {
500534
containerID := containerConfig.ContainerID
501-
klog.V(2).Infof("Deleting Openflow entries for container %s", containerID)
535+
klog.V(2).InfoS("Deleting Openflow entries for container", "container", containerID)
502536
if !pc.isSecondaryNetwork {
503537
if err := pc.ofClient.UninstallPodFlows(containerConfig.InterfaceName); err != nil {
504538
return fmt.Errorf("failed to delete Openflow entries for container %s: %v", containerID, err)
@@ -513,6 +547,12 @@ func (pc *podConfigurator) disconnectInterfaceFromOVS(containerConfig *interface
513547
if err := pc.ovsBridgeClient.DeletePort(containerConfig.PortUUID); err != nil {
514548
return fmt.Errorf("failed to delete OVS port for container %s interface %s: %v", containerID, containerConfig.InterfaceName, err)
515549
}
550+
551+
// Remove unready Pod info from local cache when Pod is deleted. This is called only on Windows.
552+
if pc.podIfMonitor != nil {
553+
pc.podIfMonitor.deleteUnreadyPod(containerConfig.InterfaceName)
554+
}
555+
516556
// Remove container configuration from cache.
517557
pc.ifaceStore.DeleteInterface(containerConfig)
518558
if !pc.isSecondaryNetwork {
@@ -558,7 +598,7 @@ func (pc *podConfigurator) connectInterceptedInterface(
558598
func (pc *podConfigurator) disconnectInterceptedInterface(podName, podNamespace, containerID string) error {
559599
containerConfig, found := pc.ifaceStore.GetContainerInterface(containerID)
560600
if !found {
561-
klog.V(2).Infof("Did not find the port for container %s in local cache", containerID)
601+
klog.V(2).InfoS("Did not find the port for container in local cache", "container", containerID)
562602
return nil
563603
}
564604
for _, ip := range containerConfig.IPs {
@@ -570,3 +610,98 @@ func (pc *podConfigurator) disconnectInterceptedInterface(podName, podNamespace,
570610
return pc.disconnectInterfaceFromOVS(containerConfig)
571611
// TODO recover pre-connect state? repatch vethpair to original bridge etc ?? to make first CNI happy??
572612
}
613+
614+
func (pc *podConfigurator) Run(stopCh <-chan struct{}) {
615+
pc.eventBroadcaster.StartStructuredLogging(0)
616+
pc.eventBroadcaster.StartRecordingToSink(&typedv1.EventSinkImpl{
617+
Interface: pc.kubeClient.CoreV1().Events(""),
618+
})
619+
defer pc.eventBroadcaster.Shutdown()
620+
go k8swait.Until(pc.worker, time.Second, stopCh)
621+
pc.podIfMonitor.Run(stopCh)
622+
}
623+
624+
func (pc *podConfigurator) processNextWorkItem() bool {
625+
key, quit := pc.queue.Get()
626+
if quit {
627+
return false
628+
}
629+
defer pc.queue.Done(key)
630+
631+
if err := pc.updateUnreadyPod(key); err != nil {
632+
// Put the item back on the workqueue to handle any transient errors.
633+
pc.queue.AddAfter(key, retryInterval)
634+
}
635+
return true
636+
}
637+
638+
// worker is a long-running function that will continually call the processNextWorkItem function in
639+
// order to read and process a message on the workqueue.
640+
func (pc *podConfigurator) worker() {
641+
for pc.processNextWorkItem() {
642+
}
643+
}
644+
645+
func (pc *podConfigurator) updateUnreadyPod(ovsPort string) error {
646+
if !pc.podIfMonitor.unreadyInterfaceExists(ovsPort) {
647+
klog.InfoS("Interface does not exist in un-ready state", "name", ovsPort)
648+
return nil
649+
}
650+
651+
ifConfig, found := pc.ifaceStore.GetInterfaceByName(ovsPort)
652+
if !found {
653+
klog.InfoS("Interface config is not found in local cache, remove from unready cache", "name", ovsPort)
654+
pc.podIfMonitor.deleteUnreadyPod(ovsPort)
655+
return nil
656+
}
657+
658+
if ifConfig.OFPort == 0 {
659+
// Add Pod not-ready event if the pod flows are not successfully installed, and the OpenFlow port is not allocated.
660+
// Returns error so that we can have a retry after 5s.
661+
_ = pc.processPodEvents(ifConfig, false)
662+
return fmt.Errorf("pod's OpenFlow port is not ready yet")
663+
}
664+
665+
if !pc.podIfMonitor.interfaceFlowsInstalled(ovsPort) {
666+
// Install OpenFlow entries for the Pod.
667+
klog.V(2).InfoS("Setting up Openflow entries for OVS port", "port", ovsPort)
668+
if err := pc.ofClient.InstallPodFlows(ovsPort, ifConfig.IPs, ifConfig.MAC, uint32(ifConfig.OFPort), ifConfig.VLANID, nil); err != nil {
669+
// Add Pod not-ready event if the pod flows installation are failed.
670+
// Returns error so that we can have a retry after 5s.
671+
_ = pc.processPodEvents(ifConfig, false)
672+
return fmt.Errorf("failed to add Openflow entries for OVS port %s: %v", ovsPort, err)
673+
}
674+
675+
// Notify the Pod update event to required components.
676+
event := agenttypes.PodUpdate{
677+
PodName: ifConfig.PodName,
678+
PodNamespace: ifConfig.PodNamespace,
679+
IsAdd: true,
680+
ContainerID: ifConfig.ContainerID,
681+
}
682+
pc.podUpdateNotifier.Notify(event)
683+
}
684+
685+
if err := pc.processPodEvents(ifConfig, true); err != nil {
686+
return err
687+
}
688+
pc.podIfMonitor.deleteUnreadyPod(ovsPort)
689+
return nil
690+
}
691+
692+
func (pc *podConfigurator) processPodEvents(ifConfig *interfacestore.InterfaceConfig, installed bool) error {
693+
pod, err := pc.podLister.Pods(ifConfig.PodNamespace).Get(ifConfig.PodName)
694+
if err != nil {
695+
klog.ErrorS(err, "Failed to get Pod, retrying", "Pod", klog.KRef(ifConfig.PodNamespace, ifConfig.PodName))
696+
return err
697+
}
698+
699+
if installed {
700+
// Add normal event to record Pod network is ready.
701+
pc.record.Eventf(pod, corev1.EventTypeNormal, "NetworkIsReady", "Installed Pod '%s/%s' network forwarding rules", ifConfig.PodNamespace, ifConfig.PodName)
702+
return nil
703+
}
704+
705+
pc.record.Eventf(pod, corev1.EventTypeWarning, "NetworkNotReady", "Pod '%s/%s' network forwarding rules not installed", ifConfig.PodNamespace, ifConfig.PodName)
706+
return nil
707+
}

pkg/agent/cniserver/pod_configuration_linux_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,7 @@ func createPodConfigurator(controller *gomock.Controller, testIfaceConfigurator
687687
mockOFClient = openflowtest.NewMockClient(controller)
688688
ifaceStore = interfacestore.NewInterfaceStore()
689689
mockRoute = routetest.NewMockInterface(controller)
690-
configurator, _ := newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, channel.NewSubscribableChannel("PodUpdate", 100))
690+
configurator, _ := newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, channel.NewSubscribableChannel("PodUpdate", 100), nil)
691691
configurator.ifConfigurator = testIfaceConfigurator
692692
return configurator
693693
}

0 commit comments

Comments
 (0)