revisiting DeviceManager role handling
Change-Id: If7765b38e2eda99ca210316429a8e65d482c2791
Showing
1 changed file
with
145 additions
and
145 deletions
... | @@ -17,6 +17,7 @@ package org.onlab.onos.net.device.impl; | ... | @@ -17,6 +17,7 @@ package org.onlab.onos.net.device.impl; |
17 | 17 | ||
18 | import static com.google.common.base.Preconditions.checkNotNull; | 18 | import static com.google.common.base.Preconditions.checkNotNull; |
19 | import static org.onlab.onos.net.device.DeviceEvent.Type.DEVICE_MASTERSHIP_CHANGED; | 19 | import static org.onlab.onos.net.device.DeviceEvent.Type.DEVICE_MASTERSHIP_CHANGED; |
20 | +import static org.onlab.onos.net.MastershipRole.*; | ||
20 | import static org.slf4j.LoggerFactory.getLogger; | 21 | import static org.slf4j.LoggerFactory.getLogger; |
21 | 22 | ||
22 | import java.util.List; | 23 | import java.util.List; |
... | @@ -29,7 +30,6 @@ import org.apache.felix.scr.annotations.ReferenceCardinality; | ... | @@ -29,7 +30,6 @@ import org.apache.felix.scr.annotations.ReferenceCardinality; |
29 | import org.apache.felix.scr.annotations.Service; | 30 | import org.apache.felix.scr.annotations.Service; |
30 | import org.onlab.onos.cluster.ClusterService; | 31 | import org.onlab.onos.cluster.ClusterService; |
31 | import org.onlab.onos.cluster.NodeId; | 32 | import org.onlab.onos.cluster.NodeId; |
32 | -import org.onlab.onos.cluster.RoleInfo; | ||
33 | import org.onlab.onos.event.AbstractListenerRegistry; | 33 | import org.onlab.onos.event.AbstractListenerRegistry; |
34 | import org.onlab.onos.event.EventDeliveryService; | 34 | import org.onlab.onos.event.EventDeliveryService; |
35 | import org.onlab.onos.mastership.MastershipEvent; | 35 | import org.onlab.onos.mastership.MastershipEvent; |
... | @@ -59,8 +59,6 @@ import org.onlab.onos.net.provider.AbstractProviderRegistry; | ... | @@ -59,8 +59,6 @@ import org.onlab.onos.net.provider.AbstractProviderRegistry; |
59 | import org.onlab.onos.net.provider.AbstractProviderService; | 59 | import org.onlab.onos.net.provider.AbstractProviderService; |
60 | import org.slf4j.Logger; | 60 | import org.slf4j.Logger; |
61 | 61 | ||
62 | -import com.google.common.collect.HashMultimap; | ||
63 | - | ||
64 | /** | 62 | /** |
65 | * Provides implementation of the device SB & NB APIs. | 63 | * Provides implementation of the device SB & NB APIs. |
66 | */ | 64 | */ |
... | @@ -160,31 +158,6 @@ public class DeviceManager | ... | @@ -160,31 +158,6 @@ public class DeviceManager |
160 | return store.isAvailable(deviceId); | 158 | return store.isAvailable(deviceId); |
161 | } | 159 | } |
162 | 160 | ||
163 | - // Applies the specified role to the device; ignores NONE | ||
164 | - private void applyRole(DeviceId deviceId, MastershipRole newRole) { | ||
165 | - if (newRole.equals(MastershipRole.NONE)) { | ||
166 | - return; | ||
167 | - } | ||
168 | - | ||
169 | - Device device = store.getDevice(deviceId); | ||
170 | - // FIXME: Device might not be there yet. (eventual consistent) | ||
171 | - if (device == null) { | ||
172 | - return; | ||
173 | - } | ||
174 | - | ||
175 | - DeviceProvider provider = getProvider(device.providerId()); | ||
176 | - if (provider != null) { | ||
177 | - provider.roleChanged(device, newRole); | ||
178 | - // only trigger event when request was sent to provider | ||
179 | - // TODO: consider removing this from Device event type? | ||
180 | - post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, device)); | ||
181 | - | ||
182 | - if (newRole.equals(MastershipRole.MASTER)) { | ||
183 | - provider.triggerProbe(device); | ||
184 | - } | ||
185 | - } | ||
186 | - } | ||
187 | - | ||
188 | // Check a device for control channel connectivity. | 161 | // Check a device for control channel connectivity. |
189 | private boolean isReachable(Device device) { | 162 | private boolean isReachable(Device device) { |
190 | // FIXME: Device might not be there yet. (eventual consistent) | 163 | // FIXME: Device might not be there yet. (eventual consistent) |
... | @@ -234,6 +207,38 @@ public class DeviceManager | ... | @@ -234,6 +207,38 @@ public class DeviceManager |
234 | super(provider); | 207 | super(provider); |
235 | } | 208 | } |
236 | 209 | ||
210 | + /** | ||
211 | + * Apply role in reaction to provider event. | ||
212 | + * | ||
213 | + * @param deviceId device identifier | ||
214 | + * @param newRole new role to apply to the device | ||
215 | + * @return true if the request was sent to provider | ||
216 | + */ | ||
217 | + private boolean applyRole(DeviceId deviceId, MastershipRole newRole) { | ||
218 | + | ||
219 | + if (newRole.equals(MastershipRole.NONE)) { | ||
220 | + //no-op | ||
221 | + return true; | ||
222 | + } | ||
223 | + | ||
224 | + DeviceProvider provider = provider(); | ||
225 | + if (provider == null) { | ||
226 | + log.warn("Provider for {} was not found. Cannot apply role {}", deviceId, newRole); | ||
227 | + return false; | ||
228 | + } | ||
229 | + // FIXME roleChanged should take DeviceId instead of Device | ||
230 | + Device device = store.getDevice(deviceId); | ||
231 | + if (device == null) { | ||
232 | + log.warn("{} was not there. Cannot apply role {}", deviceId, newRole); | ||
233 | + return false; | ||
234 | + } | ||
235 | + provider.roleChanged(device, newRole); | ||
236 | + | ||
237 | + // not triggering prove when triggered by provider service | ||
238 | + return true; | ||
239 | + } | ||
240 | + | ||
241 | + | ||
237 | @Override | 242 | @Override |
238 | public void deviceConnected(DeviceId deviceId, | 243 | public void deviceConnected(DeviceId deviceId, |
239 | DeviceDescription deviceDescription) { | 244 | DeviceDescription deviceDescription) { |
... | @@ -242,27 +247,16 @@ public class DeviceManager | ... | @@ -242,27 +247,16 @@ public class DeviceManager |
242 | checkValidity(); | 247 | checkValidity(); |
243 | 248 | ||
244 | log.info("Device {} connected", deviceId); | 249 | log.info("Device {} connected", deviceId); |
245 | - // check my Role | ||
246 | - MastershipRole role = mastershipService.requestRoleFor(deviceId); | ||
247 | - if (role != MastershipRole.MASTER) { | ||
248 | - // TODO: Do we need to explicitly tell the Provider that | ||
249 | - // this instance is no longer the MASTER? probably not | ||
250 | -// Device device = getDevice(deviceId); | ||
251 | -// if (device != null) { | ||
252 | -// // FIXME roleChanged should take DeviceId instead of Device | ||
253 | -// provider().roleChanged(device, role); | ||
254 | -// } | ||
255 | - return; | ||
256 | - } | ||
257 | - MastershipTerm term = termService.getMastershipTerm(deviceId); | ||
258 | - | ||
259 | final NodeId myNodeId = clusterService.getLocalNode().id(); | 250 | final NodeId myNodeId = clusterService.getLocalNode().id(); |
251 | + | ||
252 | + // check my Role | ||
253 | + mastershipService.requestRoleFor(deviceId); | ||
254 | + final MastershipTerm term = termService.getMastershipTerm(deviceId); | ||
260 | if (!myNodeId.equals(term.master())) { | 255 | if (!myNodeId.equals(term.master())) { |
261 | - // lost mastership after requestRole told this instance was MASTER. | ||
262 | log.info("Role of this node is STANDBY for {}", deviceId); | 256 | log.info("Role of this node is STANDBY for {}", deviceId); |
263 | // TODO: Do we need to explicitly tell the Provider that | 257 | // TODO: Do we need to explicitly tell the Provider that |
264 | - // this instance is no longer the MASTER? | 258 | + // this instance is not the MASTER |
265 | - //applyRole(deviceId, MastershipRole.STANDBY); | 259 | + applyRole(deviceId, MastershipRole.STANDBY); |
266 | return; | 260 | return; |
267 | } | 261 | } |
268 | log.info("Role of this node is MASTER for {}", deviceId); | 262 | log.info("Role of this node is MASTER for {}", deviceId); |
... | @@ -273,24 +267,15 @@ public class DeviceManager | ... | @@ -273,24 +267,15 @@ public class DeviceManager |
273 | DeviceEvent event = store.createOrUpdateDevice(provider().id(), | 267 | DeviceEvent event = store.createOrUpdateDevice(provider().id(), |
274 | deviceId, deviceDescription); | 268 | deviceId, deviceDescription); |
275 | 269 | ||
270 | + applyRole(deviceId, MastershipRole.MASTER); | ||
271 | + | ||
276 | // If there was a change of any kind, tell the provider | 272 | // If there was a change of any kind, tell the provider |
277 | // that this instance is the master. | 273 | // that this instance is the master. |
278 | - // Note: event can be null, if mastership was lost between | ||
279 | - // roleRequest and store update calls. | ||
280 | if (event != null) { | 274 | if (event != null) { |
281 | - // TODO: Check switch reconnected case. Is it assured that | 275 | + log.trace("event: {} {}", event.type(), event); |
282 | - // event will never be null? | ||
283 | - // Could there be a situation MastershipService told this | ||
284 | - // instance is the new Master, but | ||
285 | - // event returned from the store is null? | ||
286 | - | ||
287 | - // FIXME: 1st argument should be deviceId, to allow setting | ||
288 | - // certain roles even if the store returned null. | ||
289 | - log.info("event: {} {}", event.type(), event); | ||
290 | - provider().roleChanged(event.subject(), role); | ||
291 | post(event); | 276 | post(event); |
292 | } else { | 277 | } else { |
293 | - log.info("No event to publish"); | 278 | + post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, store.getDevice(deviceId))); |
294 | } | 279 | } |
295 | } | 280 | } |
296 | 281 | ||
... | @@ -441,118 +426,133 @@ public class DeviceManager | ... | @@ -441,118 +426,133 @@ public class DeviceManager |
441 | // Intercepts mastership events | 426 | // Intercepts mastership events |
442 | private class InternalMastershipListener implements MastershipListener { | 427 | private class InternalMastershipListener implements MastershipListener { |
443 | 428 | ||
444 | - // random cache size | 429 | + // Applies the specified role to the device; ignores NONE |
445 | - private final int cacheSize = 5; | 430 | + /** |
446 | - // temporarily stores term number + events to check for duplicates. A hack. | 431 | + * Apply role in reaction to mastership event. |
447 | - private HashMultimap<Integer, RoleInfo> eventCache = | 432 | + * |
448 | - HashMultimap.create(); | 433 | + * @param deviceId device identifier |
434 | + * @param newRole new role to apply to the device | ||
435 | + * @return true if the request was sent to provider | ||
436 | + */ | ||
437 | + private boolean applyRole(DeviceId deviceId, MastershipRole newRole) { | ||
438 | + if (newRole.equals(MastershipRole.NONE)) { | ||
439 | + //no-op | ||
440 | + return true; | ||
441 | + } | ||
449 | 442 | ||
450 | - @Override | 443 | + Device device = store.getDevice(deviceId); |
451 | - public void event(MastershipEvent event) { | 444 | + // FIXME: Device might not be there yet. (eventual consistent) |
452 | - final DeviceId did = event.subject(); | 445 | + if (device == null) { |
453 | - final NodeId myNodeId = clusterService.getLocalNode().id(); | 446 | + log.warn("{} was not there. Cannot apply role {}", deviceId, newRole); |
447 | + return false; | ||
448 | + } | ||
454 | 449 | ||
455 | - if (myNodeId.equals(event.roleInfo().master())) { | 450 | + DeviceProvider provider = getProvider(device.providerId()); |
456 | - MastershipTerm term = termService.getMastershipTerm(did); | 451 | + if (provider == null) { |
452 | + log.warn("Provider for {} was not found. Cannot apply role {}", deviceId, newRole); | ||
453 | + return false; | ||
454 | + } | ||
455 | + // FIXME roleChanged should take DeviceId instead of Device | ||
456 | + provider.roleChanged(device, newRole); | ||
457 | 457 | ||
458 | - // TODO duplicate suppression should probably occur in the MastershipManager | 458 | + if (newRole.equals(MastershipRole.MASTER)) { |
459 | - // itself, so listeners that can't deal with duplicates don't have to | 459 | + // only trigger event when request was sent to provider |
460 | - // so this check themselves. | 460 | + // TODO: consider removing this from Device event type? |
461 | -// if (checkDuplicate(event.roleInfo(), term.termNumber())) { | 461 | + post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, device)); |
462 | -// return; | ||
463 | -// } | ||
464 | 462 | ||
465 | - if (!myNodeId.equals(term.master())) { | 463 | + provider.triggerProbe(device); |
466 | - // something went wrong in consistency, let go | 464 | + } |
467 | - log.warn("Mastership has changed after this event." | 465 | + return true; |
468 | - + "Term Service suggests {} for {}", term, did); | ||
469 | - // FIXME: Is it possible to let go of MASTER role | ||
470 | - // but remain on STANDBY list? | ||
471 | - mastershipService.relinquishMastership(did); | ||
472 | - applyRole(did, MastershipRole.STANDBY); | ||
473 | - return; | ||
474 | } | 466 | } |
475 | 467 | ||
476 | - // only set the new term if I am the master | 468 | + @Override |
477 | - deviceClockProviderService.setMastershipTerm(did, term); | 469 | + public void event(MastershipEvent event) { |
478 | 470 | ||
479 | - // if the device is null here, we are the first master to claim the | 471 | + if (event.type() != MastershipEvent.Type.MASTER_CHANGED) { |
480 | - // device. No worries, the DeviceManager will create one soon. | 472 | + // Don't care if backup list changed. |
481 | - Device device = getDevice(did); | ||
482 | - if ((device != null) && !isAvailable(did)) { | ||
483 | - if (!isReachable(device)) { | ||
484 | - log.warn("Device {} has disconnected after this event", did); | ||
485 | - mastershipService.relinquishMastership(did); | ||
486 | return; | 473 | return; |
487 | } | 474 | } |
488 | - //flag the device as online. Is there a better way to do this? | 475 | + |
489 | - DeviceEvent devEvent = | 476 | + final DeviceId did = event.subject(); |
490 | - store.createOrUpdateDevice(device.providerId(), did, | 477 | + final NodeId myNodeId = clusterService.getLocalNode().id(); |
491 | - new DefaultDeviceDescription( | 478 | + |
492 | - did.uri(), device.type(), device.manufacturer(), | 479 | + // myRole suggested by MastershipService |
493 | - device.hwVersion(), device.swVersion(), | 480 | + MastershipRole myNextRole; |
494 | - device.serialNumber(), device.chassisId())); | 481 | + if (myNodeId.equals(event.roleInfo().master())) { |
495 | - post(devEvent); | 482 | + // confirm latest info |
483 | + MastershipTerm term = termService.getMastershipTerm(did); | ||
484 | + final boolean iHaveControl = myNodeId.equals(term.master()); | ||
485 | + if (iHaveControl) { | ||
486 | + deviceClockProviderService.setMastershipTerm(did, term); | ||
487 | + myNextRole = MASTER; | ||
488 | + } else { | ||
489 | + myNextRole = STANDBY; | ||
496 | } | 490 | } |
497 | - applyRole(did, MastershipRole.MASTER); | ||
498 | } else if (event.roleInfo().backups().contains(myNodeId)) { | 491 | } else if (event.roleInfo().backups().contains(myNodeId)) { |
499 | - if (!isReachable(getDevice(did))) { | 492 | + myNextRole = STANDBY; |
500 | - log.warn("Device {} has disconnected after this event", did); | ||
501 | - mastershipService.relinquishMastership(did); | ||
502 | - return; | ||
503 | - } | ||
504 | - applyRole(did, MastershipRole.STANDBY); | ||
505 | } else { | 493 | } else { |
506 | - // Event suggests that this Node has no connection to this Device | 494 | + myNextRole = NONE; |
507 | - // confirm. | 495 | + } |
496 | + | ||
497 | + | ||
508 | final Device device = getDevice(did); | 498 | final Device device = getDevice(did); |
509 | - if (!isReachable(device)) { | 499 | + final boolean isReachable = isReachable(device); |
510 | - // not connection to device, as expected | 500 | + if (!isReachable) { |
501 | + // device is not connected to this node | ||
502 | + if (myNextRole != NONE) { | ||
503 | + log.warn("Node was instructed to be {} role for {}, " | ||
504 | + + "but this node cannot reach the device. " | ||
505 | + + "Relinquishing role. ", | ||
506 | + myNextRole, did); | ||
507 | + mastershipService.relinquishMastership(did); | ||
508 | + } | ||
511 | return; | 509 | return; |
512 | } | 510 | } |
513 | - // connection seems to exist | 511 | + |
514 | - log.info("Detected mastership info mismatch, requesting Role"); | 512 | + // device is connected to this node: |
513 | + | ||
514 | + if (myNextRole == NONE) { | ||
515 | mastershipService.requestRoleFor(did); | 515 | mastershipService.requestRoleFor(did); |
516 | - final MastershipTerm term = termService.getMastershipTerm(did); | 516 | + MastershipTerm term = termService.getMastershipTerm(did); |
517 | if (myNodeId.equals(term.master())) { | 517 | if (myNodeId.equals(term.master())) { |
518 | - // became MASTER | 518 | + myNextRole = MASTER; |
519 | - // TODO: consider slicing out method for applying MASTER role | 519 | + } else { |
520 | - deviceClockProviderService.setMastershipTerm(did, term); | 520 | + myNextRole = STANDBY; |
521 | + } | ||
522 | + } | ||
521 | 523 | ||
524 | + switch (myNextRole) { | ||
525 | + case MASTER: | ||
526 | + if ((device != null) && !isAvailable(did)) { | ||
522 | //flag the device as online. Is there a better way to do this? | 527 | //flag the device as online. Is there a better way to do this? |
528 | + DefaultDeviceDescription deviceDescription | ||
529 | + = new DefaultDeviceDescription(did.uri(), | ||
530 | + device.type(), | ||
531 | + device.manufacturer(), | ||
532 | + device.hwVersion(), | ||
533 | + device.swVersion(), | ||
534 | + device.serialNumber(), | ||
535 | + device.chassisId()); | ||
523 | DeviceEvent devEvent = | 536 | DeviceEvent devEvent = |
524 | store.createOrUpdateDevice(device.providerId(), did, | 537 | store.createOrUpdateDevice(device.providerId(), did, |
525 | - new DefaultDeviceDescription( | 538 | + deviceDescription); |
526 | - did.uri(), device.type(), device.manufacturer(), | ||
527 | - device.hwVersion(), device.swVersion(), | ||
528 | - device.serialNumber(), device.chassisId())); | ||
529 | - applyRole(did, MastershipRole.MASTER); | ||
530 | post(devEvent); | 539 | post(devEvent); |
531 | - } else { | ||
532 | - applyRole(did, MastershipRole.STANDBY); | ||
533 | } | 540 | } |
541 | + // TODO: should apply role only if there is mismatch | ||
542 | + log.info("Applying role {} to {}", myNextRole, did); | ||
543 | + applyRole(did, MASTER); | ||
544 | + break; | ||
545 | + case STANDBY: | ||
546 | + log.info("Applying role {} to {}", myNextRole, did); | ||
547 | + applyRole(did, STANDBY); | ||
548 | + break; | ||
549 | + case NONE: | ||
550 | + default: | ||
551 | + // should never reach here | ||
552 | + log.error("You didn't see anything. I did not exist."); | ||
553 | + break; | ||
534 | } | 554 | } |
535 | } | 555 | } |
536 | - | ||
537 | - // checks for duplicate event, returning true if one is found. | ||
538 | - private boolean checkDuplicate(RoleInfo roleInfo, int term) { | ||
539 | - // turning off duplicate check | ||
540 | - return false; | ||
541 | -// synchronized (eventCache) { | ||
542 | -// if (eventCache.get(term).contains(roleInfo)) { | ||
543 | -// log.info("duplicate event detected; ignoring"); | ||
544 | -// return true; | ||
545 | -// } else { | ||
546 | -// eventCache.put(term, roleInfo); | ||
547 | -// // purge by-term oldest entries to keep the cache size under limit | ||
548 | -// if (eventCache.size() > cacheSize) { | ||
549 | -// eventCache.removeAll(term - cacheSize); | ||
550 | -// } | ||
551 | -// return false; | ||
552 | -// } | ||
553 | -// } | ||
554 | - } | ||
555 | - | ||
556 | } | 556 | } |
557 | 557 | ||
558 | // Store delegate to re-post events emitted from the store. | 558 | // Store delegate to re-post events emitted from the store. | ... | ... |
-
Please register or login to post a comment