Madan Jampani
Committed by Gerrit Code Review

Cluster scaling enchancements

	- Updated ConfigFileBasedClusterMetadataProvider to handle both file and http protocols.
	- Server open logic updated to handle joining an existing cluster.

Change-Id: Idbaa39733c7bf814510c94c4b21e3714b3f97f8f
...@@ -43,6 +43,9 @@ public class PartitionsListCommand extends AbstractShellCommand { ...@@ -43,6 +43,9 @@ public class PartitionsListCommand extends AbstractShellCommand {
43 * @param partitionInfo partition descriptions 43 * @param partitionInfo partition descriptions
44 */ 44 */
45 private void displayPartitions(List<PartitionInfo> partitionInfo) { 45 private void displayPartitions(List<PartitionInfo> partitionInfo) {
46 + if (partitionInfo.isEmpty()) {
47 + return;
48 + }
46 print("----------------------------------------------------------"); 49 print("----------------------------------------------------------");
47 print(FMT, "Name", "Term", "Members", ""); 50 print(FMT, "Name", "Term", "Members", "");
48 print("----------------------------------------------------------"); 51 print("----------------------------------------------------------");
......
...@@ -19,10 +19,10 @@ import static com.google.common.base.Preconditions.checkNotNull; ...@@ -19,10 +19,10 @@ import static com.google.common.base.Preconditions.checkNotNull;
19 import static org.slf4j.LoggerFactory.getLogger; 19 import static org.slf4j.LoggerFactory.getLogger;
20 20
21 import java.net.InetAddress; 21 import java.net.InetAddress;
22 +import java.net.MalformedURLException;
22 import java.net.NetworkInterface; 23 import java.net.NetworkInterface;
23 import java.net.SocketException; 24 import java.net.SocketException;
24 -import java.net.URI; 25 +import java.net.URL;
25 -import java.net.URISyntaxException;
26 import java.util.Collection; 26 import java.util.Collection;
27 import java.util.Enumeration; 27 import java.util.Enumeration;
28 28
...@@ -47,8 +47,6 @@ import org.onosproject.net.provider.AbstractProviderService; ...@@ -47,8 +47,6 @@ import org.onosproject.net.provider.AbstractProviderService;
47 import org.onosproject.store.service.Versioned; 47 import org.onosproject.store.service.Versioned;
48 import org.slf4j.Logger; 48 import org.slf4j.Logger;
49 49
50 -import com.google.common.base.Throwables;
51 -
52 /** 50 /**
53 * Implementation of ClusterMetadataService. 51 * Implementation of ClusterMetadataService.
54 */ 52 */
...@@ -126,11 +124,15 @@ public class ClusterMetadataManager ...@@ -126,11 +124,15 @@ public class ClusterMetadataManager
126 * @return primary cluster metadata provider 124 * @return primary cluster metadata provider
127 */ 125 */
128 private ClusterMetadataProvider getPrimaryProvider() { 126 private ClusterMetadataProvider getPrimaryProvider() {
127 + String metadataUri = System.getProperty("onos.cluster.metadata.uri");
129 try { 128 try {
130 - URI uri = new URI(System.getProperty("onos.cluster.metadata.uri", "config:///cluster.json")); 129 + String protocol = metadataUri == null ? null : new URL(metadataUri).getProtocol();
131 - return getProvider(uri.getScheme()); 130 + if (protocol != null && (!protocol.equals("file") && !protocol.equals("http"))) {
132 - } catch (URISyntaxException e) { 131 + return getProvider(protocol);
133 - Throwables.propagate(e); 132 + }
133 + // file provider supports both "file" and "http" uris
134 + return getProvider("file");
135 + } catch (MalformedURLException e) {
134 return null; 136 return null;
135 } 137 }
136 } 138 }
......
...@@ -19,16 +19,14 @@ import static org.onlab.util.Tools.groupedThreads; ...@@ -19,16 +19,14 @@ import static org.onlab.util.Tools.groupedThreads;
19 import static org.slf4j.LoggerFactory.getLogger; 19 import static org.slf4j.LoggerFactory.getLogger;
20 20
21 import java.io.File; 21 import java.io.File;
22 +import java.io.FileInputStream;
22 import java.io.IOException; 23 import java.io.IOException;
23 -import java.nio.file.FileSystems; 24 +import java.net.URL;
24 -import java.nio.file.Path; 25 +import java.net.URLConnection;
25 -import java.nio.file.StandardWatchEventKinds;
26 -import java.nio.file.WatchEvent;
27 -import java.nio.file.WatchKey;
28 -import java.nio.file.WatchService;
29 import java.util.Set; 26 import java.util.Set;
30 -import java.util.concurrent.ExecutorService;
31 import java.util.concurrent.Executors; 27 import java.util.concurrent.Executors;
28 +import java.util.concurrent.ScheduledExecutorService;
29 +import java.util.concurrent.TimeUnit;
32 import java.util.concurrent.atomic.AtomicReference; 30 import java.util.concurrent.atomic.AtomicReference;
33 31
34 import org.apache.felix.scr.annotations.Activate; 32 import org.apache.felix.scr.annotations.Activate;
...@@ -87,11 +85,12 @@ public class ConfigFileBasedClusterMetadataProvider implements ClusterMetadataPr ...@@ -87,11 +85,12 @@ public class ConfigFileBasedClusterMetadataProvider implements ClusterMetadataPr
87 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) 85 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
88 protected ClusterMetadataProviderRegistry providerRegistry; 86 protected ClusterMetadataProviderRegistry providerRegistry;
89 87
90 - private static final ProviderId PROVIDER_ID = new ProviderId("config", "none"); 88 + private static final ProviderId PROVIDER_ID = new ProviderId("file", "none");
91 private final AtomicReference<Versioned<ClusterMetadata>> cachedMetadata = new AtomicReference<>(); 89 private final AtomicReference<Versioned<ClusterMetadata>> cachedMetadata = new AtomicReference<>();
92 - private final ExecutorService configFileChangeDetector = 90 + private final ScheduledExecutorService configFileChangeDetector =
93 - Executors.newSingleThreadExecutor(groupedThreads("onos/cluster/metadata/config-watcher", "")); 91 + Executors.newSingleThreadScheduledExecutor(groupedThreads("onos/cluster/metadata/config-watcher", ""));
94 92
93 + private String metadataUrl;
95 private ObjectMapper mapper; 94 private ObjectMapper mapper;
96 private ClusterMetadataProviderService providerService; 95 private ClusterMetadataProviderService providerService;
97 96
...@@ -108,14 +107,8 @@ public class ConfigFileBasedClusterMetadataProvider implements ClusterMetadataPr ...@@ -108,14 +107,8 @@ public class ConfigFileBasedClusterMetadataProvider implements ClusterMetadataPr
108 module.addDeserializer(PartitionId.class, new PartitionIdDeserializer()); 107 module.addDeserializer(PartitionId.class, new PartitionIdDeserializer());
109 mapper.registerModule(module); 108 mapper.registerModule(module);
110 providerService = providerRegistry.register(this); 109 providerService = providerRegistry.register(this);
111 - configFileChangeDetector.execute(() -> { 110 + metadataUrl = System.getProperty("onos.cluster.metadata.uri", "file://" + CONFIG_DIR + "/" + CONFIG_FILE);
112 - try { 111 + configFileChangeDetector.scheduleWithFixedDelay(() -> watchUrl(metadataUrl), 100, 500, TimeUnit.MILLISECONDS);
113 - watchConfigFile();
114 - } catch (IOException e) {
115 - log.warn("Failure in setting up a watch for config "
116 - + "file updates. updates to {} will be ignored", CONFIG_FILE, e);
117 - }
118 - });
119 log.info("Started"); 112 log.info("Started");
120 } 113 }
121 114
...@@ -136,7 +129,7 @@ public class ConfigFileBasedClusterMetadataProvider implements ClusterMetadataPr ...@@ -136,7 +129,7 @@ public class ConfigFileBasedClusterMetadataProvider implements ClusterMetadataPr
136 checkState(isAvailable()); 129 checkState(isAvailable());
137 synchronized (this) { 130 synchronized (this) {
138 if (cachedMetadata.get() == null) { 131 if (cachedMetadata.get() == null) {
139 - cachedMetadata.set(fetchMetadata()); 132 + cachedMetadata.set(fetchMetadata(metadataUrl));
140 } 133 }
141 return cachedMetadata.get(); 134 return cachedMetadata.get();
142 } 135 }
...@@ -170,23 +163,45 @@ public class ConfigFileBasedClusterMetadataProvider implements ClusterMetadataPr ...@@ -170,23 +163,45 @@ public class ConfigFileBasedClusterMetadataProvider implements ClusterMetadataPr
170 163
171 @Override 164 @Override
172 public boolean isAvailable() { 165 public boolean isAvailable() {
173 - return CONFIG_FILE.exists(); 166 + try {
167 + URL url = new URL(metadataUrl);
168 + if (url.getProtocol().equals("file")) {
169 + File file = new File(metadataUrl.replaceFirst("file://", ""));
170 + return file.exists();
171 + } else if (url.getProtocol().equals("http")) {
172 + url.openStream();
173 + return true;
174 + } else {
175 + // Unsupported protocol
176 + return false;
177 + }
178 + } catch (Exception e) {
179 + return false;
180 + }
174 } 181 }
175 182
176 - private Versioned<ClusterMetadata> fetchMetadata() { 183 + private Versioned<ClusterMetadata> fetchMetadata(String metadataUrl) {
177 - ClusterMetadata metadata = null;
178 - long version = 0;
179 try { 184 try {
180 - metadata = mapper.readValue(CONFIG_FILE, ClusterMetadata.class); 185 + URL url = new URL(metadataUrl);
181 - version = CONFIG_FILE.lastModified(); 186 + ClusterMetadata metadata = null;
187 + long version = 0;
188 + if (url.getProtocol().equals("file")) {
189 + File file = new File(metadataUrl.replaceFirst("file://", ""));
190 + version = file.lastModified();
191 + metadata = mapper.readValue(new FileInputStream(file), ClusterMetadata.class);
192 + } else if (url.getProtocol().equals("http")) {
193 + URLConnection conn = url.openConnection();
194 + version = conn.getLastModified();
195 + metadata = mapper.readValue(conn.getInputStream(), ClusterMetadata.class);
196 + }
197 + return new Versioned<>(new ClusterMetadata(PROVIDER_ID,
198 + metadata.getName(),
199 + Sets.newHashSet(metadata.getNodes()),
200 + Sets.newHashSet(metadata.getPartitions())),
201 + version);
182 } catch (IOException e) { 202 } catch (IOException e) {
183 - Throwables.propagate(e); 203 + throw Throwables.propagate(e);
184 } 204 }
185 - return new Versioned<>(new ClusterMetadata(PROVIDER_ID,
186 - metadata.getName(),
187 - Sets.newHashSet(metadata.getNodes()),
188 - Sets.newHashSet(metadata.getPartitions())),
189 - version);
190 } 205 }
191 206
192 private static class PartitionDeserializer extends JsonDeserializer<Partition> { 207 private static class PartitionDeserializer extends JsonDeserializer<Partition> {
...@@ -256,32 +271,16 @@ public class ConfigFileBasedClusterMetadataProvider implements ClusterMetadataPr ...@@ -256,32 +271,16 @@ public class ConfigFileBasedClusterMetadataProvider implements ClusterMetadataPr
256 } 271 }
257 272
258 /** 273 /**
259 - * Monitors the config file for any updates and notifies providerService accordingly. 274 + * Monitors the metadata url for any updates and notifies providerService accordingly.
260 * @throws IOException 275 * @throws IOException
261 */ 276 */
262 - private void watchConfigFile() throws IOException { 277 + private void watchUrl(String metadataUrl) {
263 - WatchService watcher = FileSystems.getDefault().newWatchService(); 278 + // TODO: We are merely polling the url.
264 - Path configFilePath = FileSystems.getDefault().getPath(CONFIG_DIR); 279 + // This can be easily addressed for files. For http urls we need to move to a push style protocol.
265 - configFilePath.register(watcher, StandardWatchEventKinds.ENTRY_MODIFY); 280 + Versioned<ClusterMetadata> latestMetadata = fetchMetadata(metadataUrl);
266 - while (true) { 281 + if (cachedMetadata.get() != null && cachedMetadata.get().version() < latestMetadata.version()) {
267 - try { 282 + cachedMetadata.set(latestMetadata);
268 - final WatchKey watchKey = watcher.take(); 283 + providerService.clusterMetadataChanged(latestMetadata);
269 - for (WatchEvent<?> event : watchKey.pollEvents()) {
270 - final Path changed = (Path) event.context();
271 - log.info("{} was updated", changed);
272 - // TODO: Fix concurrency issues
273 - Versioned<ClusterMetadata> latestMetadata = fetchMetadata();
274 - cachedMetadata.set(latestMetadata);
275 - providerService.clusterMetadataChanged(latestMetadata);
276 - }
277 - if (!watchKey.reset()) {
278 - log.debug("WatchKey has been unregistered");
279 - break;
280 - }
281 - } catch (InterruptedException e) {
282 - Thread.currentThread().interrupt();
283 - break;
284 - }
285 } 284 }
286 } 285 }
287 } 286 }
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -23,8 +23,10 @@ import io.atomix.variables.DistributedLong; ...@@ -23,8 +23,10 @@ import io.atomix.variables.DistributedLong;
23 import java.io.File; 23 import java.io.File;
24 import java.util.Collection; 24 import java.util.Collection;
25 import java.util.Optional; 25 import java.util.Optional;
26 +import java.util.Set;
26 import java.util.concurrent.CompletableFuture; 27 import java.util.concurrent.CompletableFuture;
27 import java.util.concurrent.atomic.AtomicBoolean; 28 import java.util.concurrent.atomic.AtomicBoolean;
29 +import java.util.stream.Collectors;
28 30
29 import org.onosproject.cluster.ClusterService; 31 import org.onosproject.cluster.ClusterService;
30 import org.onosproject.cluster.ControllerNode; 32 import org.onosproject.cluster.ControllerNode;
...@@ -83,7 +85,9 @@ public class StoragePartition implements Managed<StoragePartition> { ...@@ -83,7 +85,9 @@ public class StoragePartition implements Managed<StoragePartition> {
83 85
84 @Override 86 @Override
85 public CompletableFuture<Void> open() { 87 public CompletableFuture<Void> open() {
86 - openServer(); 88 + if (partition.getMembers().contains(localNodeId)) {
89 + openServer();
90 + }
87 return openClient().thenAccept(v -> isOpened.set(true)) 91 return openClient().thenAccept(v -> isOpened.set(true))
88 .thenApply(v -> null); 92 .thenApply(v -> null);
89 } 93 }
...@@ -120,6 +124,10 @@ public class StoragePartition implements Managed<StoragePartition> { ...@@ -120,6 +124,10 @@ public class StoragePartition implements Managed<StoragePartition> {
120 return Collections2.transform(partition.getMembers(), this::toAddress); 124 return Collections2.transform(partition.getMembers(), this::toAddress);
121 } 125 }
122 126
127 + /**
128 + * Attempts to rejoin the partition.
129 + * @return future that is completed after the operation is complete
130 + */
123 private CompletableFuture<Void> openServer() { 131 private CompletableFuture<Void> openServer() {
124 if (!partition.getMembers().contains(localNodeId) || server != null) { 132 if (!partition.getMembers().contains(localNodeId) || server != null) {
125 return CompletableFuture.completedFuture(null); 133 return CompletableFuture.completedFuture(null);
...@@ -135,6 +143,26 @@ public class StoragePartition implements Managed<StoragePartition> { ...@@ -135,6 +143,26 @@ public class StoragePartition implements Managed<StoragePartition> {
135 return server.open().thenRun(() -> this.server = server); 143 return server.open().thenRun(() -> this.server = server);
136 } 144 }
137 145
146 + /**
147 + * Attempts to join the partition as a new member.
148 + * @return future that is completed after the operation is complete
149 + */
150 + private CompletableFuture<Void> joinCluster() {
151 + Set<NodeId> otherMembers = partition.getMembers()
152 + .stream()
153 + .filter(nodeId -> !nodeId.equals(localNodeId))
154 + .collect(Collectors.toSet());
155 + StoragePartitionServer server = new StoragePartitionServer(toAddress(localNodeId),
156 + this,
157 + serializer,
158 + () -> new CopycatTransport(CopycatTransport.Mode.SERVER,
159 + partition.getId(),
160 + messagingService),
161 + RESOURCE_TYPES,
162 + logFolder);
163 + return server.join(Collections2.transform(otherMembers, this::toAddress)).thenRun(() -> this.server = server);
164 + }
165 +
138 private CompletableFuture<StoragePartitionClient> openClient() { 166 private CompletableFuture<StoragePartitionClient> openClient() {
139 client = new StoragePartitionClient(this, 167 client = new StoragePartitionClient(this,
140 serializer, 168 serializer,
...@@ -149,7 +177,7 @@ public class StoragePartition implements Managed<StoragePartition> { ...@@ -149,7 +177,7 @@ public class StoragePartition implements Managed<StoragePartition> {
149 * Closes the partition server if it was previously opened. 177 * Closes the partition server if it was previously opened.
150 * @return future that is completed when the operation completes 178 * @return future that is completed when the operation completes
151 */ 179 */
152 - public CompletableFuture<Void> closeServer() { 180 + public CompletableFuture<Void> leaveCluster() {
153 return server != null ? server.closeAndExit() : CompletableFuture.completedFuture(null); 181 return server != null ? server.closeAndExit() : CompletableFuture.completedFuture(null);
154 } 182 }
155 183
...@@ -181,15 +209,21 @@ public class StoragePartition implements Managed<StoragePartition> { ...@@ -181,15 +209,21 @@ public class StoragePartition implements Managed<StoragePartition> {
181 * @return partition info 209 * @return partition info
182 */ 210 */
183 public Optional<PartitionInfo> info() { 211 public Optional<PartitionInfo> info() {
184 - return server != null ? Optional.of(server.info()) : Optional.empty(); 212 + return server != null && !server.isClosed() ? Optional.of(server.info()) : Optional.empty();
185 } 213 }
186 214
187 - public void onUpdate(Partition partition) { 215 + public void onUpdate(Partition newValue) {
188 - this.partition = partition; 216 + if (partition.getMembers().contains(localNodeId) && newValue.getMembers().contains(localNodeId)) {
217 + return;
218 + }
219 + if (!partition.getMembers().contains(localNodeId) && !newValue.getMembers().contains(localNodeId)) {
220 + return;
221 + }
222 + this.partition = newValue;
189 if (partition.getMembers().contains(localNodeId)) { 223 if (partition.getMembers().contains(localNodeId)) {
190 - openServer(); 224 + joinCluster();
191 } else if (!partition.getMembers().contains(localNodeId)) { 225 } else if (!partition.getMembers().contains(localNodeId)) {
192 - closeServer(); 226 + leaveCluster();
193 } 227 }
194 } 228 }
195 } 229 }
......
...@@ -77,7 +77,7 @@ public class StoragePartitionServer implements Managed<StoragePartitionServer> { ...@@ -77,7 +77,7 @@ public class StoragePartitionServer implements Managed<StoragePartitionServer> {
77 return CompletableFuture.completedFuture(null); 77 return CompletableFuture.completedFuture(null);
78 } 78 }
79 synchronized (this) { 79 synchronized (this) {
80 - server = buildServer(); 80 + server = buildServer(partition.getMemberAddresses());
81 } 81 }
82 serverOpenFuture = server.open(); 82 serverOpenFuture = server.open();
83 } else { 83 } else {
...@@ -109,12 +109,12 @@ public class StoragePartitionServer implements Managed<StoragePartitionServer> { ...@@ -109,12 +109,12 @@ public class StoragePartitionServer implements Managed<StoragePartitionServer> {
109 return server.close(); 109 return server.close();
110 } 110 }
111 111
112 - private CopycatServer buildServer() { 112 + private CopycatServer buildServer(Collection<Address> clusterMembers) {
113 ResourceTypeResolver resourceResolver = new ServiceLoaderResourceResolver(); 113 ResourceTypeResolver resourceResolver = new ServiceLoaderResourceResolver();
114 ResourceRegistry registry = new ResourceRegistry(); 114 ResourceRegistry registry = new ResourceRegistry();
115 resourceTypes.forEach(registry::register); 115 resourceTypes.forEach(registry::register);
116 resourceResolver.resolve(registry); 116 resourceResolver.resolve(registry);
117 - CopycatServer server = CopycatServer.builder(localAddress, partition.getMemberAddresses()) 117 + CopycatServer server = CopycatServer.builder(localAddress, clusterMembers)
118 .withName("partition-" + partition.getId()) 118 .withName("partition-" + partition.getId())
119 .withSerializer(serializer.clone()) 119 .withSerializer(serializer.clone())
120 .withTransport(transport.get()) 120 .withTransport(transport.get())
...@@ -130,6 +130,18 @@ public class StoragePartitionServer implements Managed<StoragePartitionServer> { ...@@ -130,6 +130,18 @@ public class StoragePartitionServer implements Managed<StoragePartitionServer> {
130 return server; 130 return server;
131 } 131 }
132 132
133 + public CompletableFuture<Void> join(Collection<Address> otherMembers) {
134 + server = buildServer(otherMembers);
135 +
136 + return server.open().whenComplete((r, e) -> {
137 + if (e == null) {
138 + log.info("Successfully joined partition {}", partition.getId());
139 + } else {
140 + log.info("Failed to join partition {}", partition.getId(), e);
141 + }
142 + }).thenApply(v -> null);
143 + }
144 +
133 @Override 145 @Override
134 public boolean isOpen() { 146 public boolean isOpen() {
135 return server.isOpen(); 147 return server.isOpen();
......