Madan Jampani
Committed by Gerrit Code Review

ONOS-2015: Randomly choose retry delay to avoid update contention

Change-Id: I35db8126bb8daa832aecd5696094e5b884853bdd
...@@ -7,6 +7,7 @@ import com.google.common.collect.Lists; ...@@ -7,6 +7,7 @@ import com.google.common.collect.Lists;
7 import com.google.common.collect.Maps; 7 import com.google.common.collect.Maps;
8 import com.google.common.collect.Sets; 8 import com.google.common.collect.Sets;
9 9
10 +import org.apache.commons.lang.math.RandomUtils;
10 import org.apache.felix.scr.annotations.Activate; 11 import org.apache.felix.scr.annotations.Activate;
11 import org.apache.felix.scr.annotations.Component; 12 import org.apache.felix.scr.annotations.Component;
12 import org.apache.felix.scr.annotations.Deactivate; 13 import org.apache.felix.scr.annotations.Deactivate;
...@@ -102,7 +103,8 @@ public class DistributedLeadershipManager implements LeadershipService { ...@@ -102,7 +103,8 @@ public class DistributedLeadershipManager implements LeadershipService {
102 private Set<String> activeTopics = Sets.newConcurrentHashSet(); 103 private Set<String> activeTopics = Sets.newConcurrentHashSet();
103 private Map<String, CompletableFuture<Leadership>> pendingFutures = Maps.newConcurrentMap(); 104 private Map<String, CompletableFuture<Leadership>> pendingFutures = Maps.newConcurrentMap();
104 105
105 - private static final int ELECTION_JOIN_ATTEMPT_INTERVAL_SEC = 2; 106 + // The actual delay is randomly chosen between the interval [0, WAIT_BEFORE_RETRY_MILLIS)
107 + private static final int WAIT_BEFORE_RETRY_MILLIS = 150;
106 private static final int DELAY_BETWEEN_LEADER_LOCK_ATTEMPTS_SEC = 2; 108 private static final int DELAY_BETWEEN_LEADER_LOCK_ATTEMPTS_SEC = 2;
107 private static final int LEADERSHIP_STATUS_UPDATE_INTERVAL_SEC = 2; 109 private static final int LEADERSHIP_STATUS_UPDATE_INTERVAL_SEC = 2;
108 private static final int DELAY_BETWEEN_STALE_LEADERSHIP_PURGE_ATTEMPTS_SEC = 2; 110 private static final int DELAY_BETWEEN_STALE_LEADERSHIP_PURGE_ATTEMPTS_SEC = 2;
...@@ -302,7 +304,7 @@ public class DistributedLeadershipManager implements LeadershipService { ...@@ -302,7 +304,7 @@ public class DistributedLeadershipManager implements LeadershipService {
302 newCandidates.version(), 304 newCandidates.version(),
303 newCandidates.creationTime()))); 305 newCandidates.creationTime())));
304 } else { 306 } else {
305 - log.warn("Failed to withdraw from candidates list for {}. Will retry", path); 307 + log.debug("Failed to withdraw from candidates list for {}. Will retry", path);
306 retryWithdraw(path, future); 308 retryWithdraw(path, future);
307 } 309 }
308 } catch (Exception e) { 310 } catch (Exception e) {
...@@ -480,15 +482,15 @@ public class DistributedLeadershipManager implements LeadershipService { ...@@ -480,15 +482,15 @@ public class DistributedLeadershipManager implements LeadershipService {
480 private void rerunForLeadership(String path, CompletableFuture<Leadership> future) { 482 private void rerunForLeadership(String path, CompletableFuture<Leadership> future) {
481 lockExecutor.schedule( 483 lockExecutor.schedule(
482 () -> doRunForLeadership(path, future), 484 () -> doRunForLeadership(path, future),
483 - ELECTION_JOIN_ATTEMPT_INTERVAL_SEC, 485 + RandomUtils.nextInt(WAIT_BEFORE_RETRY_MILLIS),
484 - TimeUnit.SECONDS); 486 + TimeUnit.MILLISECONDS);
485 } 487 }
486 488
487 private void retryWithdraw(String path, CompletableFuture<Void> future) { 489 private void retryWithdraw(String path, CompletableFuture<Void> future) {
488 lockExecutor.schedule( 490 lockExecutor.schedule(
489 () -> doWithdraw(path, future), 491 () -> doWithdraw(path, future),
490 - DELAY_BETWEEN_LEADER_LOCK_ATTEMPTS_SEC, 492 + RandomUtils.nextInt(WAIT_BEFORE_RETRY_MILLIS),
491 - TimeUnit.SECONDS); 493 + TimeUnit.MILLISECONDS);
492 } 494 }
493 495
494 private void scheduleStaleLeadershipPurge(int afterDelaySec) { 496 private void scheduleStaleLeadershipPurge(int afterDelaySec) {
......