Skip to content

Commit

Permalink
fix: Adding a defer func to connecttoTelemetryservice() to prevent CN…
Browse files Browse the repository at this point in the history
…I fro… (#1800)

* Adding a defer func to connecttoTelemetryservice() to prevent CNI from stucking in case of telemetry service failure.

* fix: addressing the comments for telemetry defer function.

* fix: addressing the comments for telemetry defer func.
  • Loading branch information
behzad-mir authored Feb 11, 2023
1 parent a3682cf commit 879b644
Showing 1 changed file with 25 additions and 23 deletions.
48 changes: 25 additions & 23 deletions telemetry/telemetrybuffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"github.com/Azure/azure-container-networking/common"
"github.com/Azure/azure-container-networking/log"
"github.com/Azure/azure-container-networking/platform"
"github.com/pkg/errors"
)

// TelemetryConfig - telemetry config read by telemetry service
Expand Down Expand Up @@ -328,34 +327,37 @@ func (tb *TelemetryBuffer) ConnectCNIToTelemetryService(telemetryNumRetries, tel
path, dir := getTelemetryServiceDirectory()
args := []string{"-d", dir}
for attempt := 0; attempt < 2; attempt++ {
if err := tb.Connect(); err != nil {
log.Logf("Connection to telemetry socket failed: %v", err)
if runtime.GOOS == "windows" {
if err = netPlugin.LockKeyValueStore(); err != nil {
log.Logf("lock acquire error: %v", err)
return errors.Wrap(err, "lock acquire error")
}
}
if err = tb.Cleanup(FdName); err != nil {
return errors.Wrap(err, "cleanup failed")
}
if err = StartTelemetryService(path, args); err != nil {
return errors.Wrap(err, "StartTelemetryService failed")
tb.startAndConnectTelemetryService(telemetryNumRetries, telemetryWaitTimeInMilliseconds, netPlugin, path, args)
}
return nil
}

// This function is getting called from ConnectCNIToTelemetryService() in each attempt inside for loop
// This function has been created to be able to add defer within the for loop
func (tb *TelemetryBuffer) startAndConnectTelemetryService(telemetryNumRetries, telemetryWaitTimeInMilliseconds int, netPlugin *cni.Plugin, path string, args []string) {
if err := tb.Connect(); err != nil {
log.Logf("Connection to telemetry socket failed: %v", err)
if runtime.GOOS == "windows" {
if err = netPlugin.LockKeyValueStore(); err != nil {
log.Logf("lock acquire error: %v", err)
}
WaitForTelemetrySocket(telemetryNumRetries, time.Duration(telemetryWaitTimeInMilliseconds))
if runtime.GOOS == "windows" {
defer func() {
if err = netPlugin.UnLockKeyValueStore(); err != nil {
log.Logf("failed to relinquish lock error: %v", err)
return errors.Wrap(err, "failed to relinquish lock error")
}
}
} else {
tb.Connected = true
log.Logf("Connected to telemetry service")
return nil
}()
}
if err = tb.Cleanup(FdName); err != nil {
log.Logf("cleanup failed: %v", err)
}
if err = StartTelemetryService(path, args); err != nil {
log.Logf("StartTelemetryService failed: %v", err)
}
WaitForTelemetrySocket(telemetryNumRetries, time.Duration(telemetryWaitTimeInMilliseconds))
} else {
tb.Connected = true
log.Logf("Connected to telemetry service")
}
return nil
}

func getTelemetryServiceDirectory() (path string, dir string) {
Expand Down

0 comments on commit 879b644

Please sign in to comment.