Yuta HIGUCHI

revisiting DeviceManager role handling

Change-Id: If7765b38e2eda99ca210316429a8e65d482c2791
...@@ -17,6 +17,7 @@ package org.onlab.onos.net.device.impl; ...@@ -17,6 +17,7 @@ package org.onlab.onos.net.device.impl;
17 17
18 import static com.google.common.base.Preconditions.checkNotNull; 18 import static com.google.common.base.Preconditions.checkNotNull;
19 import static org.onlab.onos.net.device.DeviceEvent.Type.DEVICE_MASTERSHIP_CHANGED; 19 import static org.onlab.onos.net.device.DeviceEvent.Type.DEVICE_MASTERSHIP_CHANGED;
20 +import static org.onlab.onos.net.MastershipRole.*;
20 import static org.slf4j.LoggerFactory.getLogger; 21 import static org.slf4j.LoggerFactory.getLogger;
21 22
22 import java.util.List; 23 import java.util.List;
...@@ -29,7 +30,6 @@ import org.apache.felix.scr.annotations.ReferenceCardinality; ...@@ -29,7 +30,6 @@ import org.apache.felix.scr.annotations.ReferenceCardinality;
29 import org.apache.felix.scr.annotations.Service; 30 import org.apache.felix.scr.annotations.Service;
30 import org.onlab.onos.cluster.ClusterService; 31 import org.onlab.onos.cluster.ClusterService;
31 import org.onlab.onos.cluster.NodeId; 32 import org.onlab.onos.cluster.NodeId;
32 -import org.onlab.onos.cluster.RoleInfo;
33 import org.onlab.onos.event.AbstractListenerRegistry; 33 import org.onlab.onos.event.AbstractListenerRegistry;
34 import org.onlab.onos.event.EventDeliveryService; 34 import org.onlab.onos.event.EventDeliveryService;
35 import org.onlab.onos.mastership.MastershipEvent; 35 import org.onlab.onos.mastership.MastershipEvent;
...@@ -59,8 +59,6 @@ import org.onlab.onos.net.provider.AbstractProviderRegistry; ...@@ -59,8 +59,6 @@ import org.onlab.onos.net.provider.AbstractProviderRegistry;
59 import org.onlab.onos.net.provider.AbstractProviderService; 59 import org.onlab.onos.net.provider.AbstractProviderService;
60 import org.slf4j.Logger; 60 import org.slf4j.Logger;
61 61
62 -import com.google.common.collect.HashMultimap;
63 -
64 /** 62 /**
65 * Provides implementation of the device SB & NB APIs. 63 * Provides implementation of the device SB & NB APIs.
66 */ 64 */
...@@ -160,31 +158,6 @@ public class DeviceManager ...@@ -160,31 +158,6 @@ public class DeviceManager
160 return store.isAvailable(deviceId); 158 return store.isAvailable(deviceId);
161 } 159 }
162 160
163 - // Applies the specified role to the device; ignores NONE
164 - private void applyRole(DeviceId deviceId, MastershipRole newRole) {
165 - if (newRole.equals(MastershipRole.NONE)) {
166 - return;
167 - }
168 -
169 - Device device = store.getDevice(deviceId);
170 - // FIXME: Device might not be there yet. (eventual consistent)
171 - if (device == null) {
172 - return;
173 - }
174 -
175 - DeviceProvider provider = getProvider(device.providerId());
176 - if (provider != null) {
177 - provider.roleChanged(device, newRole);
178 - // only trigger event when request was sent to provider
179 - // TODO: consider removing this from Device event type?
180 - post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, device));
181 -
182 - if (newRole.equals(MastershipRole.MASTER)) {
183 - provider.triggerProbe(device);
184 - }
185 - }
186 - }
187 -
188 // Check a device for control channel connectivity. 161 // Check a device for control channel connectivity.
189 private boolean isReachable(Device device) { 162 private boolean isReachable(Device device) {
190 // FIXME: Device might not be there yet. (eventual consistent) 163 // FIXME: Device might not be there yet. (eventual consistent)
...@@ -234,6 +207,38 @@ public class DeviceManager ...@@ -234,6 +207,38 @@ public class DeviceManager
234 super(provider); 207 super(provider);
235 } 208 }
236 209
210 + /**
211 + * Apply role in reaction to provider event.
212 + *
213 + * @param deviceId device identifier
214 + * @param newRole new role to apply to the device
215 + * @return true if the request was sent to provider
216 + */
217 + private boolean applyRole(DeviceId deviceId, MastershipRole newRole) {
218 +
219 + if (newRole.equals(MastershipRole.NONE)) {
220 + //no-op
221 + return true;
222 + }
223 +
224 + DeviceProvider provider = provider();
225 + if (provider == null) {
226 + log.warn("Provider for {} was not found. Cannot apply role {}", deviceId, newRole);
227 + return false;
228 + }
229 + // FIXME roleChanged should take DeviceId instead of Device
230 + Device device = store.getDevice(deviceId);
231 + if (device == null) {
232 + log.warn("{} was not there. Cannot apply role {}", deviceId, newRole);
233 + return false;
234 + }
235 + provider.roleChanged(device, newRole);
236 +
237 + // not triggering prove when triggered by provider service
238 + return true;
239 + }
240 +
241 +
237 @Override 242 @Override
238 public void deviceConnected(DeviceId deviceId, 243 public void deviceConnected(DeviceId deviceId,
239 DeviceDescription deviceDescription) { 244 DeviceDescription deviceDescription) {
...@@ -242,27 +247,16 @@ public class DeviceManager ...@@ -242,27 +247,16 @@ public class DeviceManager
242 checkValidity(); 247 checkValidity();
243 248
244 log.info("Device {} connected", deviceId); 249 log.info("Device {} connected", deviceId);
245 - // check my Role
246 - MastershipRole role = mastershipService.requestRoleFor(deviceId);
247 - if (role != MastershipRole.MASTER) {
248 - // TODO: Do we need to explicitly tell the Provider that
249 - // this instance is no longer the MASTER? probably not
250 -// Device device = getDevice(deviceId);
251 -// if (device != null) {
252 -// // FIXME roleChanged should take DeviceId instead of Device
253 -// provider().roleChanged(device, role);
254 -// }
255 - return;
256 - }
257 - MastershipTerm term = termService.getMastershipTerm(deviceId);
258 -
259 final NodeId myNodeId = clusterService.getLocalNode().id(); 250 final NodeId myNodeId = clusterService.getLocalNode().id();
251 +
252 + // check my Role
253 + mastershipService.requestRoleFor(deviceId);
254 + final MastershipTerm term = termService.getMastershipTerm(deviceId);
260 if (!myNodeId.equals(term.master())) { 255 if (!myNodeId.equals(term.master())) {
261 - // lost mastership after requestRole told this instance was MASTER.
262 log.info("Role of this node is STANDBY for {}", deviceId); 256 log.info("Role of this node is STANDBY for {}", deviceId);
263 // TODO: Do we need to explicitly tell the Provider that 257 // TODO: Do we need to explicitly tell the Provider that
264 - // this instance is no longer the MASTER? 258 + // this instance is not the MASTER
265 - //applyRole(deviceId, MastershipRole.STANDBY); 259 + applyRole(deviceId, MastershipRole.STANDBY);
266 return; 260 return;
267 } 261 }
268 log.info("Role of this node is MASTER for {}", deviceId); 262 log.info("Role of this node is MASTER for {}", deviceId);
...@@ -273,24 +267,15 @@ public class DeviceManager ...@@ -273,24 +267,15 @@ public class DeviceManager
273 DeviceEvent event = store.createOrUpdateDevice(provider().id(), 267 DeviceEvent event = store.createOrUpdateDevice(provider().id(),
274 deviceId, deviceDescription); 268 deviceId, deviceDescription);
275 269
270 + applyRole(deviceId, MastershipRole.MASTER);
271 +
276 // If there was a change of any kind, tell the provider 272 // If there was a change of any kind, tell the provider
277 // that this instance is the master. 273 // that this instance is the master.
278 - // Note: event can be null, if mastership was lost between
279 - // roleRequest and store update calls.
280 if (event != null) { 274 if (event != null) {
281 - // TODO: Check switch reconnected case. Is it assured that 275 + log.trace("event: {} {}", event.type(), event);
282 - // event will never be null?
283 - // Could there be a situation MastershipService told this
284 - // instance is the new Master, but
285 - // event returned from the store is null?
286 -
287 - // FIXME: 1st argument should be deviceId, to allow setting
288 - // certain roles even if the store returned null.
289 - log.info("event: {} {}", event.type(), event);
290 - provider().roleChanged(event.subject(), role);
291 post(event); 276 post(event);
292 } else { 277 } else {
293 - log.info("No event to publish"); 278 + post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, store.getDevice(deviceId)));
294 } 279 }
295 } 280 }
296 281
...@@ -441,118 +426,133 @@ public class DeviceManager ...@@ -441,118 +426,133 @@ public class DeviceManager
441 // Intercepts mastership events 426 // Intercepts mastership events
442 private class InternalMastershipListener implements MastershipListener { 427 private class InternalMastershipListener implements MastershipListener {
443 428
444 - // random cache size 429 + // Applies the specified role to the device; ignores NONE
445 - private final int cacheSize = 5; 430 + /**
446 - // temporarily stores term number + events to check for duplicates. A hack. 431 + * Apply role in reaction to mastership event.
447 - private HashMultimap<Integer, RoleInfo> eventCache = 432 + *
448 - HashMultimap.create(); 433 + * @param deviceId device identifier
434 + * @param newRole new role to apply to the device
435 + * @return true if the request was sent to provider
436 + */
437 + private boolean applyRole(DeviceId deviceId, MastershipRole newRole) {
438 + if (newRole.equals(MastershipRole.NONE)) {
439 + //no-op
440 + return true;
441 + }
442 +
443 + Device device = store.getDevice(deviceId);
444 + // FIXME: Device might not be there yet. (eventual consistent)
445 + if (device == null) {
446 + log.warn("{} was not there. Cannot apply role {}", deviceId, newRole);
447 + return false;
448 + }
449 +
450 + DeviceProvider provider = getProvider(device.providerId());
451 + if (provider == null) {
452 + log.warn("Provider for {} was not found. Cannot apply role {}", deviceId, newRole);
453 + return false;
454 + }
455 + // FIXME roleChanged should take DeviceId instead of Device
456 + provider.roleChanged(device, newRole);
457 +
458 + if (newRole.equals(MastershipRole.MASTER)) {
459 + // only trigger event when request was sent to provider
460 + // TODO: consider removing this from Device event type?
461 + post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, device));
462 +
463 + provider.triggerProbe(device);
464 + }
465 + return true;
466 + }
449 467
450 @Override 468 @Override
451 public void event(MastershipEvent event) { 469 public void event(MastershipEvent event) {
470 +
471 + if (event.type() != MastershipEvent.Type.MASTER_CHANGED) {
472 + // Don't care if backup list changed.
473 + return;
474 + }
475 +
452 final DeviceId did = event.subject(); 476 final DeviceId did = event.subject();
453 final NodeId myNodeId = clusterService.getLocalNode().id(); 477 final NodeId myNodeId = clusterService.getLocalNode().id();
454 478
479 + // myRole suggested by MastershipService
480 + MastershipRole myNextRole;
455 if (myNodeId.equals(event.roleInfo().master())) { 481 if (myNodeId.equals(event.roleInfo().master())) {
482 + // confirm latest info
456 MastershipTerm term = termService.getMastershipTerm(did); 483 MastershipTerm term = termService.getMastershipTerm(did);
457 - 484 + final boolean iHaveControl = myNodeId.equals(term.master());
458 - // TODO duplicate suppression should probably occur in the MastershipManager 485 + if (iHaveControl) {
459 - // itself, so listeners that can't deal with duplicates don't have to 486 + deviceClockProviderService.setMastershipTerm(did, term);
460 - // so this check themselves. 487 + myNextRole = MASTER;
461 -// if (checkDuplicate(event.roleInfo(), term.termNumber())) { 488 + } else {
462 -// return; 489 + myNextRole = STANDBY;
463 -// }
464 -
465 - if (!myNodeId.equals(term.master())) {
466 - // something went wrong in consistency, let go
467 - log.warn("Mastership has changed after this event."
468 - + "Term Service suggests {} for {}", term, did);
469 - // FIXME: Is it possible to let go of MASTER role
470 - // but remain on STANDBY list?
471 - mastershipService.relinquishMastership(did);
472 - applyRole(did, MastershipRole.STANDBY);
473 - return;
474 } 490 }
491 + } else if (event.roleInfo().backups().contains(myNodeId)) {
492 + myNextRole = STANDBY;
493 + } else {
494 + myNextRole = NONE;
495 + }
475 496
476 - // only set the new term if I am the master
477 - deviceClockProviderService.setMastershipTerm(did, term);
478 497
479 - // if the device is null here, we are the first master to claim the 498 + final Device device = getDevice(did);
480 - // device. No worries, the DeviceManager will create one soon. 499 + final boolean isReachable = isReachable(device);
481 - Device device = getDevice(did); 500 + if (!isReachable) {
482 - if ((device != null) && !isAvailable(did)) { 501 + // device is not connected to this node
483 - if (!isReachable(device)) { 502 + if (myNextRole != NONE) {
484 - log.warn("Device {} has disconnected after this event", did); 503 + log.warn("Node was instructed to be {} role for {}, "
485 - mastershipService.relinquishMastership(did); 504 + + "but this node cannot reach the device. "
486 - return; 505 + + "Relinquishing role. ",
487 - } 506 + myNextRole, did);
488 - //flag the device as online. Is there a better way to do this?
489 - DeviceEvent devEvent =
490 - store.createOrUpdateDevice(device.providerId(), did,
491 - new DefaultDeviceDescription(
492 - did.uri(), device.type(), device.manufacturer(),
493 - device.hwVersion(), device.swVersion(),
494 - device.serialNumber(), device.chassisId()));
495 - post(devEvent);
496 - }
497 - applyRole(did, MastershipRole.MASTER);
498 - } else if (event.roleInfo().backups().contains(myNodeId)) {
499 - if (!isReachable(getDevice(did))) {
500 - log.warn("Device {} has disconnected after this event", did);
501 mastershipService.relinquishMastership(did); 507 mastershipService.relinquishMastership(did);
502 - return;
503 - }
504 - applyRole(did, MastershipRole.STANDBY);
505 - } else {
506 - // Event suggests that this Node has no connection to this Device
507 - // confirm.
508 - final Device device = getDevice(did);
509 - if (!isReachable(device)) {
510 - // not connection to device, as expected
511 - return;
512 } 508 }
513 - // connection seems to exist 509 + return;
514 - log.info("Detected mastership info mismatch, requesting Role"); 510 + }
511 +
512 + // device is connected to this node:
513 +
514 + if (myNextRole == NONE) {
515 mastershipService.requestRoleFor(did); 515 mastershipService.requestRoleFor(did);
516 - final MastershipTerm term = termService.getMastershipTerm(did); 516 + MastershipTerm term = termService.getMastershipTerm(did);
517 if (myNodeId.equals(term.master())) { 517 if (myNodeId.equals(term.master())) {
518 - // became MASTER 518 + myNextRole = MASTER;
519 - // TODO: consider slicing out method for applying MASTER role 519 + } else {
520 - deviceClockProviderService.setMastershipTerm(did, term); 520 + myNextRole = STANDBY;
521 + }
522 + }
521 523
524 + switch (myNextRole) {
525 + case MASTER:
526 + if ((device != null) && !isAvailable(did)) {
522 //flag the device as online. Is there a better way to do this? 527 //flag the device as online. Is there a better way to do this?
528 + DefaultDeviceDescription deviceDescription
529 + = new DefaultDeviceDescription(did.uri(),
530 + device.type(),
531 + device.manufacturer(),
532 + device.hwVersion(),
533 + device.swVersion(),
534 + device.serialNumber(),
535 + device.chassisId());
523 DeviceEvent devEvent = 536 DeviceEvent devEvent =
524 store.createOrUpdateDevice(device.providerId(), did, 537 store.createOrUpdateDevice(device.providerId(), did,
525 - new DefaultDeviceDescription( 538 + deviceDescription);
526 - did.uri(), device.type(), device.manufacturer(),
527 - device.hwVersion(), device.swVersion(),
528 - device.serialNumber(), device.chassisId()));
529 - applyRole(did, MastershipRole.MASTER);
530 post(devEvent); 539 post(devEvent);
531 - } else {
532 - applyRole(did, MastershipRole.STANDBY);
533 } 540 }
541 + // TODO: should apply role only if there is mismatch
542 + log.info("Applying role {} to {}", myNextRole, did);
543 + applyRole(did, MASTER);
544 + break;
545 + case STANDBY:
546 + log.info("Applying role {} to {}", myNextRole, did);
547 + applyRole(did, STANDBY);
548 + break;
549 + case NONE:
550 + default:
551 + // should never reach here
552 + log.error("You didn't see anything. I did not exist.");
553 + break;
534 } 554 }
535 } 555 }
536 -
537 - // checks for duplicate event, returning true if one is found.
538 - private boolean checkDuplicate(RoleInfo roleInfo, int term) {
539 - // turning off duplicate check
540 - return false;
541 -// synchronized (eventCache) {
542 -// if (eventCache.get(term).contains(roleInfo)) {
543 -// log.info("duplicate event detected; ignoring");
544 -// return true;
545 -// } else {
546 -// eventCache.put(term, roleInfo);
547 -// // purge by-term oldest entries to keep the cache size under limit
548 -// if (eventCache.size() > cacheSize) {
549 -// eventCache.removeAll(term - cacheSize);
550 -// }
551 -// return false;
552 -// }
553 -// }
554 - }
555 -
556 } 556 }
557 557
558 // Store delegate to re-post events emitted from the store. 558 // Store delegate to re-post events emitted from the store.
......