Skip to content

Commit fce9903

Browse files
committed
clean up
1 parent ff3467a commit fce9903

File tree

1 file changed

+22
-15
lines changed

1 file changed

+22
-15
lines changed

kubernetes/src/main/java/com/linecorp/armeria/client/kubernetes/endpoints/KubernetesEndpointGroup.java

+22-15
Original file line numberDiff line numberDiff line change
@@ -319,18 +319,19 @@ private boolean doStart(boolean initial) {
319319
// Initialize the endpoints.
320320
maybeUpdateEndpoints();
321321

322-
logger.info("[{}/{}] Watching the service, nodes and pods...", namespace, serviceName);
323322
watchService(service.getMetadata().getResourceVersion());
324323
watchNode(nodes.getMetadata().getResourceVersion());
325324
watchPod(pods.getMetadata().getResourceVersion());
326325
} catch (Exception e) {
327-
logger.warn("[{}/{}] Failed to start {}.", namespace, serviceName, this, e);
326+
logger.warn("[{}/{}] Failed to start {}. (initial: {})", namespace, serviceName, this, initial, e);
328327
if (initial) {
329328
failInit(e);
329+
// Do not retry if the initialization fails since the error is likely to be persistent.
330+
return false;
330331
} else {
331-
scheduleRestartWithBackoff(numStartFailures++);
332+
scheduleRestartWithBackoff(++numStartFailures);
333+
return true;
332334
}
333-
return false;
334335
}
335336

336337
if (closed) {
@@ -345,7 +346,8 @@ private boolean doStart(boolean initial) {
345346
}
346347

347348
private void watchService(String resourceVersion) {
348-
logger.info("[{}/{}] Start the service watcher...", namespace, serviceName);
349+
logger.info("[{}/{}] Start the service watcher... (resource version: {})", namespace, serviceName,
350+
resourceVersion);
349351
serviceWatch = doWatchService(resourceVersion);
350352
logger.info("[{}/{}] Service watcher is started.", namespace, serviceName);
351353
}
@@ -391,7 +393,7 @@ public void onClose(WatcherException cause) {
391393
logger.warn("[{}/{}] Service watcher is closed.", namespace, serviceName, cause);
392394

393395
// Immediately retry on the first failure.
394-
scheduleRestartWithBackoff(numServiceFailures++);
396+
scheduleRestartWithBackoff(++numServiceFailures);
395397
}
396398

397399
@Override
@@ -437,7 +439,8 @@ private boolean updateService(Service service) {
437439
}
438440

439441
private void watchPod(String resourceVersion) {
440-
logger.info("[{}/{}] Start the pod watcher...", namespace, serviceName);
442+
logger.info("[{}/{}] Start the pod watcher... (resource version: {})", namespace, serviceName,
443+
resourceVersion);
441444
podWatch = doWatchPod(resourceVersion);
442445
logger.info("[{}/{}] Pod watcher is started.", namespace, serviceName);
443446
}
@@ -464,7 +467,7 @@ public void onClose(WatcherException cause) {
464467
}
465468

466469
logger.warn("[{}/{}] Pod watcher is closed.", namespace, serviceName, cause);
467-
scheduleRestartWithBackoff(numPodFailures++);
470+
scheduleRestartWithBackoff(++numPodFailures);
468471
}
469472

470473
@Override
@@ -515,7 +518,8 @@ private boolean updatePod(Action action, Pod resource) {
515518
}
516519

517520
private void watchNode(String resourceVersion) {
518-
logger.info("[{}/{}] Start the node watcher...", namespace, serviceName);
521+
logger.info("[{}/{}] Start the node watcher... (resource version: {})", namespace, serviceName,
522+
resourceVersion);
519523
nodeWatch = doWatchNode(resourceVersion);
520524
logger.info("[{}/{}] Node watcher is started.", namespace, serviceName);
521525
}
@@ -544,7 +548,7 @@ public void onClose(WatcherException cause) {
544548
return;
545549
}
546550
logger.warn("[{}/{}] Node watcher is closed.", namespace, serviceName, cause);
547-
scheduleRestartWithBackoff(numNodeFailures++);
551+
scheduleRestartWithBackoff(++numNodeFailures);
548552
}
549553

550554
@Override
@@ -562,8 +566,8 @@ private boolean updateNode(Action action, Node node) {
562566
}
563567

564568
final String nodeName = node.getMetadata().getName();
565-
logger.debug("[{}/{}] Node event received. action: {}, node: {}",
566-
namespace, serviceName, action, nodeName);
569+
logger.debug("[{}/{}] Node event received. action: {}, node: {}, resource version: {}",
570+
namespace, serviceName, action, nodeName, node.getMetadata().getResourceVersion());
567571
switch (action) {
568572
case ADDED:
569573
case MODIFIED:
@@ -596,6 +600,8 @@ private ScheduledFuture<?> scheduleJob(Runnable job, long delayMillis) {
596600

597601
private void scheduleRestartWithBackoff(int numFailures) {
598602
final long delayMillis = delayMillis(numFailures);
603+
logger.info("[{}/{}] Reconnecting to the Kubernetes API in {} ms (numFailures: {})",
604+
namespace, serviceName, delayMillis, numFailures);
599605
scheduleRestart(delayMillis);
600606
}
601607

@@ -631,11 +637,12 @@ private Runnable safeRunnable(Runnable job) {
631637
};
632638
}
633639

634-
private static long delayMillis(int numAttempts) {
635-
if (numAttempts == 0) {
640+
private static long delayMillis(int numFailures) {
641+
if (numFailures == 1) {
642+
// Retry immediately on the first failure.
636643
return 0;
637644
}
638-
return Backoff.ofDefault().nextDelayMillis(numAttempts);
645+
return Backoff.ofDefault().nextDelayMillis(numFailures - 1);
639646
}
640647

641648
private void maybeUpdateEndpoints() {

0 commit comments

Comments
 (0)