Brian O'Connor

Updating IntentCleanup to check for stalled *_REQ and *ING intents.

Change-Id: Ibe06ee99463bb8230acf9751da4fb1012859b0ea
...@@ -43,10 +43,11 @@ public interface IntentStore extends Store<IntentEvent, IntentStoreDelegate> { ...@@ -43,10 +43,11 @@ public interface IntentStore extends Store<IntentEvent, IntentStoreDelegate> {
43 * Returns an iterable of all intent data objects in the store. 43 * Returns an iterable of all intent data objects in the store.
44 * 44 *
45 * @param localOnly should only intents for which this instance is master 45 * @param localOnly should only intents for which this instance is master
46 - * should be returned 46 + * be returned
47 + * @param olderThan specified duration in milliseconds (0 for "now")
47 * @return iterable of all intent data objects 48 * @return iterable of all intent data objects
48 */ 49 */
49 - Iterable<IntentData> getIntentData(boolean localOnly); 50 + Iterable<IntentData> getIntentData(boolean localOnly, long olderThan);
50 51
51 /** 52 /**
52 * Returns the state of the specified intent. 53 * Returns the state of the specified intent.
...@@ -119,4 +120,22 @@ public interface IntentStore extends Store<IntentEvent, IntentStoreDelegate> { ...@@ -119,4 +120,22 @@ public interface IntentStore extends Store<IntentEvent, IntentStoreDelegate> {
119 * @return pending intents 120 * @return pending intents
120 */ 121 */
121 Iterable<Intent> getPending(); 122 Iterable<Intent> getPending();
123 +
124 + /**
125 + * Returns the intent data objects that are pending processing.
126 + *
127 + * @return pending intent data objects
128 + */
129 + Iterable<IntentData> getPendingData();
130 +
131 + /**
132 + * Returns the intent data objects that are pending processing for longer
133 + * than the specified duration.
134 + *
135 + * @param localOnly should only intents for which this instance is master
136 + * be returned
137 + * @param olderThan specified duration in milliseconds (0 for "now")
138 + * @return pending intent data objects
139 + */
140 + Iterable<IntentData> getPendingData(boolean localOnly, long olderThan);
122 } 141 }
......
...@@ -28,6 +28,7 @@ import org.onosproject.net.intent.IntentEvent; ...@@ -28,6 +28,7 @@ import org.onosproject.net.intent.IntentEvent;
28 import org.onosproject.net.intent.IntentListener; 28 import org.onosproject.net.intent.IntentListener;
29 import org.onosproject.net.intent.IntentService; 29 import org.onosproject.net.intent.IntentService;
30 import org.onosproject.net.intent.IntentStore; 30 import org.onosproject.net.intent.IntentStore;
31 +import org.onosproject.net.intent.Key;
31 import org.osgi.service.component.ComponentContext; 32 import org.osgi.service.component.ComponentContext;
32 import org.slf4j.Logger; 33 import org.slf4j.Logger;
33 34
...@@ -41,12 +42,16 @@ import static com.google.common.base.Strings.isNullOrEmpty; ...@@ -41,12 +42,16 @@ import static com.google.common.base.Strings.isNullOrEmpty;
41 import static java.util.concurrent.Executors.newSingleThreadExecutor; 42 import static java.util.concurrent.Executors.newSingleThreadExecutor;
42 import static org.onlab.util.Tools.get; 43 import static org.onlab.util.Tools.get;
43 import static org.onlab.util.Tools.groupedThreads; 44 import static org.onlab.util.Tools.groupedThreads;
44 -import static org.onosproject.net.intent.IntentState.CORRUPT;
45 import static org.slf4j.LoggerFactory.getLogger; 45 import static org.slf4j.LoggerFactory.getLogger;
46 46
47 /** 47 /**
48 - * FIXME Class to cleanup Intents in CORRUPT state. 48 + * This component cleans up intents that have encountered errors or otherwise
49 - * FIXME move this to its own file eventually (but need executor for now) 49 + * stalled during installation or withdrawal.
50 + * <p>
51 + * It periodically polls (based on configured period) for pending and CORRUPT
52 + * intents from the store and retries. It also listens for CORRUPT event
53 + * notifications, which signify errors in processing, and retries.
54 + * </p>
50 */ 55 */
51 @Component(immediate = true) 56 @Component(immediate = true)
52 public class IntentCleanup implements Runnable, IntentListener { 57 public class IntentCleanup implements Runnable, IntentListener {
...@@ -58,6 +63,7 @@ public class IntentCleanup implements Runnable, IntentListener { ...@@ -58,6 +63,7 @@ public class IntentCleanup implements Runnable, IntentListener {
58 @Property(name = "period", intValue = DEFAULT_PERIOD, 63 @Property(name = "period", intValue = DEFAULT_PERIOD,
59 label = "Frequency in ms between cleanup runs") 64 label = "Frequency in ms between cleanup runs")
60 protected int period = DEFAULT_PERIOD; 65 protected int period = DEFAULT_PERIOD;
66 + private long periodMs;
61 67
62 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) 68 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
63 protected IntentService service; 69 protected IntentService service;
...@@ -126,7 +132,7 @@ public class IntentCleanup implements Runnable, IntentListener { ...@@ -126,7 +132,7 @@ public class IntentCleanup implements Runnable, IntentListener {
126 } 132 }
127 }; 133 };
128 134
129 - long periodMs = period * 1000; //convert to ms 135 + periodMs = period * 1_000; //convert to ms
130 timer.scheduleAtFixedRate(timerTask, periodMs, periodMs); 136 timer.scheduleAtFixedRate(timerTask, periodMs, periodMs);
131 } 137 }
132 138
...@@ -140,41 +146,79 @@ public class IntentCleanup implements Runnable, IntentListener { ...@@ -140,41 +146,79 @@ public class IntentCleanup implements Runnable, IntentListener {
140 } 146 }
141 } 147 }
142 148
149 + private void resubmitCorrupt(IntentData intentData, boolean checkThreshold) {
150 + //TODO we might want to give up when retry count exceeds a threshold
151 + // FIXME drop this if we exceed retry threshold
152 +
153 +
154 + switch (intentData.request()) {
155 + case INSTALL_REQ:
156 + service.submit(intentData.intent());
157 + break;
158 + case WITHDRAW_REQ:
159 + service.withdraw(intentData.intent());
160 + break;
161 + default:
162 + //TODO this is an error, might want to log it
163 + break;
164 + }
165 + }
166 +
167 + private void resubmitPendingRequest(IntentData intentData) {
168 + switch (intentData.request()) {
169 + case INSTALL_REQ:
170 + service.submit(intentData.intent());
171 + break;
172 + case WITHDRAW_REQ:
173 + service.withdraw(intentData.intent());
174 + break;
175 + default:
176 + //TODO this is an error (or could be purge), might want to log it
177 + break;
178 + }
179 + }
180 +
143 /** 181 /**
144 - * Iterate through CORRUPT intents and re-submit/withdraw. 182 + * Iterate through CORRUPT intents and re-submit/withdraw appropriately.
145 * 183 *
146 - * FIXME we want to eventually count number of retries per intent and give up
147 - * FIXME we probably also want to look at intents that have been stuck
148 - * in *_REQ or *ING for "too long".
149 */ 184 */
150 private void cleanup() { 185 private void cleanup() {
151 - int count = 0; 186 + int corruptCount = 0, stuckCount = 0, pendingCount = 0;
152 - for (IntentData intentData : store.getIntentData(true)) { 187 + for (IntentData intentData : store.getIntentData(true, periodMs)) {
153 - if (intentData.state() == CORRUPT) { 188 + switch (intentData.state()) {
154 - switch (intentData.request()) { 189 + case CORRUPT:
155 - case INSTALL_REQ: 190 + resubmitCorrupt(intentData, false);
156 - service.submit(intentData.intent()); 191 + corruptCount++;
157 - count++; 192 + case INSTALLING: //FALLTHROUGH
158 - break; 193 + case WITHDRAWING:
159 - case WITHDRAW_REQ: 194 + resubmitPendingRequest(intentData);
160 - service.withdraw(intentData.intent()); 195 + stuckCount++;
161 - count++; 196 + default:
162 - break; 197 + //NOOP
163 - default: 198 + break;
164 - //TODO this is an error
165 - break;
166 - }
167 } 199 }
168 } 200 }
169 - log.debug("Intent cleanup ran and resubmitted {} intents", count); 201 +
202 + for (IntentData intentData : store.getPendingData(true, periodMs)) {
203 + //TODO should we do age check here, or in the store?
204 + resubmitPendingRequest(intentData);
205 + stuckCount++;
206 + }
207 +
208 + log.debug("Intent cleanup ran and resubmitted {} corrupt, {} stuck, and {} pending intents",
209 + corruptCount, stuckCount, pendingCount);
170 } 210 }
171 211
172 @Override 212 @Override
173 public void event(IntentEvent event) { 213 public void event(IntentEvent event) {
214 + // fast path for CORRUPT intents, retry on event notification
215 + //TODO we might consider using the timer to back off for subsequent retries
174 if (event.type() == IntentEvent.Type.CORRUPT) { 216 if (event.type() == IntentEvent.Type.CORRUPT) {
175 - // FIXME drop this if we exceed retry threshold 217 + Key key = event.subject().key();
176 - // just run the whole cleanup script for now 218 + if (store.isMaster(key)) {
177 - executor.submit(this); 219 + IntentData data = store.getIntentData(event.subject().key());
220 + resubmitCorrupt(data, true);
221 + }
178 } 222 }
179 } 223 }
180 } 224 }
......
...@@ -36,6 +36,10 @@ public class WallClockTimestamp implements Timestamp { ...@@ -36,6 +36,10 @@ public class WallClockTimestamp implements Timestamp {
36 unixTimestamp = System.currentTimeMillis(); 36 unixTimestamp = System.currentTimeMillis();
37 } 37 }
38 38
39 + public WallClockTimestamp(long timestamp) {
40 + unixTimestamp = timestamp;
41 + }
42 +
39 @Override 43 @Override
40 public int compareTo(Timestamp o) { 44 public int compareTo(Timestamp o) {
41 checkArgument(o instanceof WallClockTimestamp, 45 checkArgument(o instanceof WallClockTimestamp,
......
...@@ -132,10 +132,13 @@ public class GossipIntentStore ...@@ -132,10 +132,13 @@ public class GossipIntentStore
132 } 132 }
133 133
134 @Override 134 @Override
135 - public Iterable<IntentData> getIntentData(boolean localOnly) { 135 + public Iterable<IntentData> getIntentData(boolean localOnly, long olderThan) {
136 - if (localOnly) { 136 + if (localOnly || olderThan > 0) {
137 + long now = System.currentTimeMillis();
138 + final WallClockTimestamp time = new WallClockTimestamp(now - olderThan);
137 return currentMap.values().stream() 139 return currentMap.values().stream()
138 - .filter(data -> isMaster(data.key())) 140 + .filter(data -> data.version().isOlderThan(time) &&
141 + (!localOnly || isMaster(data.key())))
139 .collect(Collectors.toList()); 142 .collect(Collectors.toList());
140 } 143 }
141 return currentMap.values(); 144 return currentMap.values();
...@@ -261,6 +264,21 @@ public class GossipIntentStore ...@@ -261,6 +264,21 @@ public class GossipIntentStore
261 .collect(Collectors.toList()); 264 .collect(Collectors.toList());
262 } 265 }
263 266
267 + @Override
268 + public Iterable<IntentData> getPendingData() {
269 + return pendingMap.values();
270 + }
271 +
272 + @Override
273 + public Iterable<IntentData> getPendingData(boolean localOnly, long olderThan) {
274 + long now = System.currentTimeMillis();
275 + final WallClockTimestamp time = new WallClockTimestamp(now - olderThan);
276 + return pendingMap.values().stream()
277 + .filter(data -> data.version().isOlderThan(time) &&
278 + (!localOnly || isMaster(data.key())))
279 + .collect(Collectors.toList());
280 + }
281 +
264 private void notifyDelegateIfNotNull(IntentEvent event) { 282 private void notifyDelegateIfNotNull(IntentEvent event) {
265 if (event != null) { 283 if (event != null) {
266 notifyDelegate(event); 284 notifyDelegate(event);
......
...@@ -36,7 +36,7 @@ import java.util.Map; ...@@ -36,7 +36,7 @@ import java.util.Map;
36 import java.util.stream.Collectors; 36 import java.util.stream.Collectors;
37 37
38 import static com.google.common.base.Preconditions.checkNotNull; 38 import static com.google.common.base.Preconditions.checkNotNull;
39 -import static org.onosproject.net.intent.IntentState.*; 39 +import static org.onosproject.net.intent.IntentState.PURGE_REQ;
40 import static org.slf4j.LoggerFactory.getLogger; 40 import static org.slf4j.LoggerFactory.getLogger;
41 41
42 /** 42 /**
...@@ -76,11 +76,15 @@ public class SimpleIntentStore ...@@ -76,11 +76,15 @@ public class SimpleIntentStore
76 } 76 }
77 77
78 @Override 78 @Override
79 - public Iterable<IntentData> getIntentData(boolean localOnly) { 79 + public Iterable<IntentData> getIntentData(boolean localOnly, long olderThan) {
80 - if (localOnly) { 80 + if (localOnly || olderThan > 0) {
81 - return current.values().stream() 81 + long older = System.nanoTime() - olderThan * 1_000_000; //convert ms to ns
82 - .filter(data -> isMaster(data.key())) 82 + final SystemClockTimestamp time = new SystemClockTimestamp(older);
83 + return pending.values().stream()
84 + .filter(data -> data.version().isOlderThan(time) &&
85 + (!localOnly || isMaster(data.key())))
83 .collect(Collectors.toList()); 86 .collect(Collectors.toList());
87 +
84 } 88 }
85 return Lists.newArrayList(current.values()); 89 return Lists.newArrayList(current.values());
86 } 90 }
...@@ -191,4 +195,19 @@ public class SimpleIntentStore ...@@ -191,4 +195,19 @@ public class SimpleIntentStore
191 .map(IntentData::intent) 195 .map(IntentData::intent)
192 .collect(Collectors.toList()); 196 .collect(Collectors.toList());
193 } 197 }
198 +
199 + @Override
200 + public Iterable<IntentData> getPendingData() {
201 + return Lists.newArrayList(pending.values());
202 + }
203 +
204 + @Override
205 + public Iterable<IntentData> getPendingData(boolean localOnly, long olderThan) {
206 + long older = System.nanoTime() - olderThan * 1_000_000; //convert ms to ns
207 + final SystemClockTimestamp time = new SystemClockTimestamp(older);
208 + return pending.values().stream()
209 + .filter(data -> data.version().isOlderThan(time) &&
210 + (!localOnly || isMaster(data.key())))
211 + .collect(Collectors.toList());
212 + }
194 } 213 }
......
...@@ -29,10 +29,14 @@ import static com.google.common.base.Preconditions.checkArgument; ...@@ -29,10 +29,14 @@ import static com.google.common.base.Preconditions.checkArgument;
29 */ 29 */
30 public class SystemClockTimestamp implements Timestamp { 30 public class SystemClockTimestamp implements Timestamp {
31 31
32 - private final long unixTimestamp; 32 + private final long nanoTimestamp;
33 33
34 public SystemClockTimestamp() { 34 public SystemClockTimestamp() {
35 - unixTimestamp = System.nanoTime(); 35 + nanoTimestamp = System.nanoTime();
36 + }
37 +
38 + public SystemClockTimestamp(long timestamp) {
39 + nanoTimestamp = timestamp;
36 } 40 }
37 41
38 @Override 42 @Override
...@@ -42,12 +46,12 @@ public class SystemClockTimestamp implements Timestamp { ...@@ -42,12 +46,12 @@ public class SystemClockTimestamp implements Timestamp {
42 SystemClockTimestamp that = (SystemClockTimestamp) o; 46 SystemClockTimestamp that = (SystemClockTimestamp) o;
43 47
44 return ComparisonChain.start() 48 return ComparisonChain.start()
45 - .compare(this.unixTimestamp, that.unixTimestamp) 49 + .compare(this.nanoTimestamp, that.nanoTimestamp)
46 .result(); 50 .result();
47 } 51 }
48 @Override 52 @Override
49 public int hashCode() { 53 public int hashCode() {
50 - return Objects.hash(unixTimestamp); 54 + return Objects.hash(nanoTimestamp);
51 } 55 }
52 56
53 @Override 57 @Override
...@@ -59,17 +63,21 @@ public class SystemClockTimestamp implements Timestamp { ...@@ -59,17 +63,21 @@ public class SystemClockTimestamp implements Timestamp {
59 return false; 63 return false;
60 } 64 }
61 SystemClockTimestamp that = (SystemClockTimestamp) obj; 65 SystemClockTimestamp that = (SystemClockTimestamp) obj;
62 - return Objects.equals(this.unixTimestamp, that.unixTimestamp); 66 + return Objects.equals(this.nanoTimestamp, that.nanoTimestamp);
63 } 67 }
64 68
65 @Override 69 @Override
66 public String toString() { 70 public String toString() {
67 return MoreObjects.toStringHelper(getClass()) 71 return MoreObjects.toStringHelper(getClass())
68 - .add("unixTimestamp", unixTimestamp) 72 + .add("nanoTimestamp", nanoTimestamp)
69 .toString(); 73 .toString();
70 } 74 }
71 75
76 + public long nanoTimestamp() {
77 + return nanoTimestamp;
78 + }
79 +
72 public long systemTimestamp() { 80 public long systemTimestamp() {
73 - return unixTimestamp; 81 + return nanoTimestamp / 1_000_000; // convert ns to ms
74 } 82 }
75 } 83 }
......