Yuta HIGUCHI

revisiting DeviceManager role handling

Change-Id: If7765b38e2eda99ca210316429a8e65d482c2791
......@@ -17,6 +17,7 @@ package org.onlab.onos.net.device.impl;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.onlab.onos.net.device.DeviceEvent.Type.DEVICE_MASTERSHIP_CHANGED;
import static org.onlab.onos.net.MastershipRole.*;
import static org.slf4j.LoggerFactory.getLogger;
import java.util.List;
......@@ -29,7 +30,6 @@ import org.apache.felix.scr.annotations.ReferenceCardinality;
import org.apache.felix.scr.annotations.Service;
import org.onlab.onos.cluster.ClusterService;
import org.onlab.onos.cluster.NodeId;
import org.onlab.onos.cluster.RoleInfo;
import org.onlab.onos.event.AbstractListenerRegistry;
import org.onlab.onos.event.EventDeliveryService;
import org.onlab.onos.mastership.MastershipEvent;
......@@ -59,8 +59,6 @@ import org.onlab.onos.net.provider.AbstractProviderRegistry;
import org.onlab.onos.net.provider.AbstractProviderService;
import org.slf4j.Logger;
import com.google.common.collect.HashMultimap;
/**
* Provides implementation of the device SB & NB APIs.
*/
......@@ -160,31 +158,6 @@ public class DeviceManager
return store.isAvailable(deviceId);
}
// Applies the specified role to the device; ignores NONE
private void applyRole(DeviceId deviceId, MastershipRole newRole) {
if (newRole.equals(MastershipRole.NONE)) {
return;
}
Device device = store.getDevice(deviceId);
// FIXME: Device might not be there yet. (eventual consistent)
if (device == null) {
return;
}
DeviceProvider provider = getProvider(device.providerId());
if (provider != null) {
provider.roleChanged(device, newRole);
// only trigger event when request was sent to provider
// TODO: consider removing this from Device event type?
post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, device));
if (newRole.equals(MastershipRole.MASTER)) {
provider.triggerProbe(device);
}
}
}
// Check a device for control channel connectivity.
private boolean isReachable(Device device) {
// FIXME: Device might not be there yet. (eventual consistent)
......@@ -234,6 +207,38 @@ public class DeviceManager
super(provider);
}
/**
* Apply role in reaction to provider event.
*
* @param deviceId device identifier
* @param newRole new role to apply to the device
* @return true if the request was sent to provider
*/
private boolean applyRole(DeviceId deviceId, MastershipRole newRole) {
if (newRole.equals(MastershipRole.NONE)) {
//no-op
return true;
}
DeviceProvider provider = provider();
if (provider == null) {
log.warn("Provider for {} was not found. Cannot apply role {}", deviceId, newRole);
return false;
}
// FIXME roleChanged should take DeviceId instead of Device
Device device = store.getDevice(deviceId);
if (device == null) {
log.warn("{} was not there. Cannot apply role {}", deviceId, newRole);
return false;
}
provider.roleChanged(device, newRole);
// not triggering prove when triggered by provider service
return true;
}
@Override
public void deviceConnected(DeviceId deviceId,
DeviceDescription deviceDescription) {
......@@ -242,27 +247,16 @@ public class DeviceManager
checkValidity();
log.info("Device {} connected", deviceId);
// check my Role
MastershipRole role = mastershipService.requestRoleFor(deviceId);
if (role != MastershipRole.MASTER) {
// TODO: Do we need to explicitly tell the Provider that
// this instance is no longer the MASTER? probably not
// Device device = getDevice(deviceId);
// if (device != null) {
// // FIXME roleChanged should take DeviceId instead of Device
// provider().roleChanged(device, role);
// }
return;
}
MastershipTerm term = termService.getMastershipTerm(deviceId);
final NodeId myNodeId = clusterService.getLocalNode().id();
// check my Role
mastershipService.requestRoleFor(deviceId);
final MastershipTerm term = termService.getMastershipTerm(deviceId);
if (!myNodeId.equals(term.master())) {
// lost mastership after requestRole told this instance was MASTER.
log.info("Role of this node is STANDBY for {}", deviceId);
// TODO: Do we need to explicitly tell the Provider that
// this instance is no longer the MASTER?
//applyRole(deviceId, MastershipRole.STANDBY);
// this instance is not the MASTER
applyRole(deviceId, MastershipRole.STANDBY);
return;
}
log.info("Role of this node is MASTER for {}", deviceId);
......@@ -273,24 +267,15 @@ public class DeviceManager
DeviceEvent event = store.createOrUpdateDevice(provider().id(),
deviceId, deviceDescription);
applyRole(deviceId, MastershipRole.MASTER);
// If there was a change of any kind, tell the provider
// that this instance is the master.
// Note: event can be null, if mastership was lost between
// roleRequest and store update calls.
if (event != null) {
// TODO: Check switch reconnected case. Is it assured that
// event will never be null?
// Could there be a situation MastershipService told this
// instance is the new Master, but
// event returned from the store is null?
// FIXME: 1st argument should be deviceId, to allow setting
// certain roles even if the store returned null.
log.info("event: {} {}", event.type(), event);
provider().roleChanged(event.subject(), role);
log.trace("event: {} {}", event.type(), event);
post(event);
} else {
log.info("No event to publish");
post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, store.getDevice(deviceId)));
}
}
......@@ -441,118 +426,133 @@ public class DeviceManager
// Intercepts mastership events
private class InternalMastershipListener implements MastershipListener {
// random cache size
private final int cacheSize = 5;
// temporarily stores term number + events to check for duplicates. A hack.
private HashMultimap<Integer, RoleInfo> eventCache =
HashMultimap.create();
// Applies the specified role to the device; ignores NONE
/**
* Apply role in reaction to mastership event.
*
* @param deviceId device identifier
* @param newRole new role to apply to the device
* @return true if the request was sent to provider
*/
private boolean applyRole(DeviceId deviceId, MastershipRole newRole) {
if (newRole.equals(MastershipRole.NONE)) {
//no-op
return true;
}
@Override
public void event(MastershipEvent event) {
final DeviceId did = event.subject();
final NodeId myNodeId = clusterService.getLocalNode().id();
Device device = store.getDevice(deviceId);
// FIXME: Device might not be there yet. (eventual consistent)
if (device == null) {
log.warn("{} was not there. Cannot apply role {}", deviceId, newRole);
return false;
}
if (myNodeId.equals(event.roleInfo().master())) {
MastershipTerm term = termService.getMastershipTerm(did);
DeviceProvider provider = getProvider(device.providerId());
if (provider == null) {
log.warn("Provider for {} was not found. Cannot apply role {}", deviceId, newRole);
return false;
}
// FIXME roleChanged should take DeviceId instead of Device
provider.roleChanged(device, newRole);
// TODO duplicate suppression should probably occur in the MastershipManager
// itself, so listeners that can't deal with duplicates don't have to
// so this check themselves.
// if (checkDuplicate(event.roleInfo(), term.termNumber())) {
// return;
// }
if (newRole.equals(MastershipRole.MASTER)) {
// only trigger event when request was sent to provider
// TODO: consider removing this from Device event type?
post(new DeviceEvent(DEVICE_MASTERSHIP_CHANGED, device));
if (!myNodeId.equals(term.master())) {
// something went wrong in consistency, let go
log.warn("Mastership has changed after this event."
+ "Term Service suggests {} for {}", term, did);
// FIXME: Is it possible to let go of MASTER role
// but remain on STANDBY list?
mastershipService.relinquishMastership(did);
applyRole(did, MastershipRole.STANDBY);
return;
provider.triggerProbe(device);
}
return true;
}
// only set the new term if I am the master
deviceClockProviderService.setMastershipTerm(did, term);
@Override
public void event(MastershipEvent event) {
// if the device is null here, we are the first master to claim the
// device. No worries, the DeviceManager will create one soon.
Device device = getDevice(did);
if ((device != null) && !isAvailable(did)) {
if (!isReachable(device)) {
log.warn("Device {} has disconnected after this event", did);
mastershipService.relinquishMastership(did);
if (event.type() != MastershipEvent.Type.MASTER_CHANGED) {
// Don't care if backup list changed.
return;
}
//flag the device as online. Is there a better way to do this?
DeviceEvent devEvent =
store.createOrUpdateDevice(device.providerId(), did,
new DefaultDeviceDescription(
did.uri(), device.type(), device.manufacturer(),
device.hwVersion(), device.swVersion(),
device.serialNumber(), device.chassisId()));
post(devEvent);
final DeviceId did = event.subject();
final NodeId myNodeId = clusterService.getLocalNode().id();
// myRole suggested by MastershipService
MastershipRole myNextRole;
if (myNodeId.equals(event.roleInfo().master())) {
// confirm latest info
MastershipTerm term = termService.getMastershipTerm(did);
final boolean iHaveControl = myNodeId.equals(term.master());
if (iHaveControl) {
deviceClockProviderService.setMastershipTerm(did, term);
myNextRole = MASTER;
} else {
myNextRole = STANDBY;
}
applyRole(did, MastershipRole.MASTER);
} else if (event.roleInfo().backups().contains(myNodeId)) {
if (!isReachable(getDevice(did))) {
log.warn("Device {} has disconnected after this event", did);
mastershipService.relinquishMastership(did);
return;
}
applyRole(did, MastershipRole.STANDBY);
myNextRole = STANDBY;
} else {
// Event suggests that this Node has no connection to this Device
// confirm.
myNextRole = NONE;
}
final Device device = getDevice(did);
if (!isReachable(device)) {
// not connection to device, as expected
final boolean isReachable = isReachable(device);
if (!isReachable) {
// device is not connected to this node
if (myNextRole != NONE) {
log.warn("Node was instructed to be {} role for {}, "
+ "but this node cannot reach the device. "
+ "Relinquishing role. ",
myNextRole, did);
mastershipService.relinquishMastership(did);
}
return;
}
// connection seems to exist
log.info("Detected mastership info mismatch, requesting Role");
// device is connected to this node:
if (myNextRole == NONE) {
mastershipService.requestRoleFor(did);
final MastershipTerm term = termService.getMastershipTerm(did);
MastershipTerm term = termService.getMastershipTerm(did);
if (myNodeId.equals(term.master())) {
// became MASTER
// TODO: consider slicing out method for applying MASTER role
deviceClockProviderService.setMastershipTerm(did, term);
myNextRole = MASTER;
} else {
myNextRole = STANDBY;
}
}
switch (myNextRole) {
case MASTER:
if ((device != null) && !isAvailable(did)) {
//flag the device as online. Is there a better way to do this?
DefaultDeviceDescription deviceDescription
= new DefaultDeviceDescription(did.uri(),
device.type(),
device.manufacturer(),
device.hwVersion(),
device.swVersion(),
device.serialNumber(),
device.chassisId());
DeviceEvent devEvent =
store.createOrUpdateDevice(device.providerId(), did,
new DefaultDeviceDescription(
did.uri(), device.type(), device.manufacturer(),
device.hwVersion(), device.swVersion(),
device.serialNumber(), device.chassisId()));
applyRole(did, MastershipRole.MASTER);
deviceDescription);
post(devEvent);
} else {
applyRole(did, MastershipRole.STANDBY);
}
// TODO: should apply role only if there is mismatch
log.info("Applying role {} to {}", myNextRole, did);
applyRole(did, MASTER);
break;
case STANDBY:
log.info("Applying role {} to {}", myNextRole, did);
applyRole(did, STANDBY);
break;
case NONE:
default:
// should never reach here
log.error("You didn't see anything. I did not exist.");
break;
}
}
// checks for duplicate event, returning true if one is found.
private boolean checkDuplicate(RoleInfo roleInfo, int term) {
// turning off duplicate check
return false;
// synchronized (eventCache) {
// if (eventCache.get(term).contains(roleInfo)) {
// log.info("duplicate event detected; ignoring");
// return true;
// } else {
// eventCache.put(term, roleInfo);
// // purge by-term oldest entries to keep the cache size under limit
// if (eventCache.size() > cacheSize) {
// eventCache.removeAll(term - cacheSize);
// }
// return false;
// }
// }
}
}
// Store delegate to re-post events emitted from the store.
......