Updating IntentCleanup to check for stalled *_REQ and *ING intents.
Change-Id: Ibe06ee99463bb8230acf9751da4fb1012859b0ea
Showing
6 changed files
with
157 additions
and
45 deletions
... | @@ -43,10 +43,11 @@ public interface IntentStore extends Store<IntentEvent, IntentStoreDelegate> { | ... | @@ -43,10 +43,11 @@ public interface IntentStore extends Store<IntentEvent, IntentStoreDelegate> { |
43 | * Returns an iterable of all intent data objects in the store. | 43 | * Returns an iterable of all intent data objects in the store. |
44 | * | 44 | * |
45 | * @param localOnly should only intents for which this instance is master | 45 | * @param localOnly should only intents for which this instance is master |
46 | - * should be returned | 46 | + * be returned |
47 | + * @param olderThan specified duration in milliseconds (0 for "now") | ||
47 | * @return iterable of all intent data objects | 48 | * @return iterable of all intent data objects |
48 | */ | 49 | */ |
49 | - Iterable<IntentData> getIntentData(boolean localOnly); | 50 | + Iterable<IntentData> getIntentData(boolean localOnly, long olderThan); |
50 | 51 | ||
51 | /** | 52 | /** |
52 | * Returns the state of the specified intent. | 53 | * Returns the state of the specified intent. |
... | @@ -119,4 +120,22 @@ public interface IntentStore extends Store<IntentEvent, IntentStoreDelegate> { | ... | @@ -119,4 +120,22 @@ public interface IntentStore extends Store<IntentEvent, IntentStoreDelegate> { |
119 | * @return pending intents | 120 | * @return pending intents |
120 | */ | 121 | */ |
121 | Iterable<Intent> getPending(); | 122 | Iterable<Intent> getPending(); |
123 | + | ||
124 | + /** | ||
125 | + * Returns the intent data objects that are pending processing. | ||
126 | + * | ||
127 | + * @return pending intent data objects | ||
128 | + */ | ||
129 | + Iterable<IntentData> getPendingData(); | ||
130 | + | ||
131 | + /** | ||
132 | + * Returns the intent data objects that are pending processing for longer | ||
133 | + * than the specified duration. | ||
134 | + * | ||
135 | + * @param localOnly should only intents for which this instance is master | ||
136 | + * be returned | ||
137 | + * @param olderThan specified duration in milliseconds (0 for "now") | ||
138 | + * @return pending intent data objects | ||
139 | + */ | ||
140 | + Iterable<IntentData> getPendingData(boolean localOnly, long olderThan); | ||
122 | } | 141 | } | ... | ... |
... | @@ -28,6 +28,7 @@ import org.onosproject.net.intent.IntentEvent; | ... | @@ -28,6 +28,7 @@ import org.onosproject.net.intent.IntentEvent; |
28 | import org.onosproject.net.intent.IntentListener; | 28 | import org.onosproject.net.intent.IntentListener; |
29 | import org.onosproject.net.intent.IntentService; | 29 | import org.onosproject.net.intent.IntentService; |
30 | import org.onosproject.net.intent.IntentStore; | 30 | import org.onosproject.net.intent.IntentStore; |
31 | +import org.onosproject.net.intent.Key; | ||
31 | import org.osgi.service.component.ComponentContext; | 32 | import org.osgi.service.component.ComponentContext; |
32 | import org.slf4j.Logger; | 33 | import org.slf4j.Logger; |
33 | 34 | ||
... | @@ -41,12 +42,16 @@ import static com.google.common.base.Strings.isNullOrEmpty; | ... | @@ -41,12 +42,16 @@ import static com.google.common.base.Strings.isNullOrEmpty; |
41 | import static java.util.concurrent.Executors.newSingleThreadExecutor; | 42 | import static java.util.concurrent.Executors.newSingleThreadExecutor; |
42 | import static org.onlab.util.Tools.get; | 43 | import static org.onlab.util.Tools.get; |
43 | import static org.onlab.util.Tools.groupedThreads; | 44 | import static org.onlab.util.Tools.groupedThreads; |
44 | -import static org.onosproject.net.intent.IntentState.CORRUPT; | ||
45 | import static org.slf4j.LoggerFactory.getLogger; | 45 | import static org.slf4j.LoggerFactory.getLogger; |
46 | 46 | ||
47 | /** | 47 | /** |
48 | - * FIXME Class to cleanup Intents in CORRUPT state. | 48 | + * This component cleans up intents that have encountered errors or otherwise |
49 | - * FIXME move this to its own file eventually (but need executor for now) | 49 | + * stalled during installation or withdrawal. |
50 | + * <p> | ||
51 | + * It periodically polls (based on configured period) for pending and CORRUPT | ||
52 | + * intents from the store and retries. It also listens for CORRUPT event | ||
53 | + * notifications, which signify errors in processing, and retries. | ||
54 | + * </p> | ||
50 | */ | 55 | */ |
51 | @Component(immediate = true) | 56 | @Component(immediate = true) |
52 | public class IntentCleanup implements Runnable, IntentListener { | 57 | public class IntentCleanup implements Runnable, IntentListener { |
... | @@ -58,6 +63,7 @@ public class IntentCleanup implements Runnable, IntentListener { | ... | @@ -58,6 +63,7 @@ public class IntentCleanup implements Runnable, IntentListener { |
58 | @Property(name = "period", intValue = DEFAULT_PERIOD, | 63 | @Property(name = "period", intValue = DEFAULT_PERIOD, |
59 | label = "Frequency in ms between cleanup runs") | 64 | label = "Frequency in ms between cleanup runs") |
60 | protected int period = DEFAULT_PERIOD; | 65 | protected int period = DEFAULT_PERIOD; |
66 | + private long periodMs; | ||
61 | 67 | ||
62 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) | 68 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) |
63 | protected IntentService service; | 69 | protected IntentService service; |
... | @@ -126,7 +132,7 @@ public class IntentCleanup implements Runnable, IntentListener { | ... | @@ -126,7 +132,7 @@ public class IntentCleanup implements Runnable, IntentListener { |
126 | } | 132 | } |
127 | }; | 133 | }; |
128 | 134 | ||
129 | - long periodMs = period * 1000; //convert to ms | 135 | + periodMs = period * 1_000; //convert to ms |
130 | timer.scheduleAtFixedRate(timerTask, periodMs, periodMs); | 136 | timer.scheduleAtFixedRate(timerTask, periodMs, periodMs); |
131 | } | 137 | } |
132 | 138 | ||
... | @@ -140,41 +146,79 @@ public class IntentCleanup implements Runnable, IntentListener { | ... | @@ -140,41 +146,79 @@ public class IntentCleanup implements Runnable, IntentListener { |
140 | } | 146 | } |
141 | } | 147 | } |
142 | 148 | ||
149 | + private void resubmitCorrupt(IntentData intentData, boolean checkThreshold) { | ||
150 | + //TODO we might want to give up when retry count exceeds a threshold | ||
151 | + // FIXME drop this if we exceed retry threshold | ||
152 | + | ||
153 | + | ||
154 | + switch (intentData.request()) { | ||
155 | + case INSTALL_REQ: | ||
156 | + service.submit(intentData.intent()); | ||
157 | + break; | ||
158 | + case WITHDRAW_REQ: | ||
159 | + service.withdraw(intentData.intent()); | ||
160 | + break; | ||
161 | + default: | ||
162 | + //TODO this is an error, might want to log it | ||
163 | + break; | ||
164 | + } | ||
165 | + } | ||
166 | + | ||
167 | + private void resubmitPendingRequest(IntentData intentData) { | ||
168 | + switch (intentData.request()) { | ||
169 | + case INSTALL_REQ: | ||
170 | + service.submit(intentData.intent()); | ||
171 | + break; | ||
172 | + case WITHDRAW_REQ: | ||
173 | + service.withdraw(intentData.intent()); | ||
174 | + break; | ||
175 | + default: | ||
176 | + //TODO this is an error (or could be purge), might want to log it | ||
177 | + break; | ||
178 | + } | ||
179 | + } | ||
180 | + | ||
143 | /** | 181 | /** |
144 | - * Iterate through CORRUPT intents and re-submit/withdraw. | 182 | + * Iterate through CORRUPT intents and re-submit/withdraw appropriately. |
145 | * | 183 | * |
146 | - * FIXME we want to eventually count number of retries per intent and give up | ||
147 | - * FIXME we probably also want to look at intents that have been stuck | ||
148 | - * in *_REQ or *ING for "too long". | ||
149 | */ | 184 | */ |
150 | private void cleanup() { | 185 | private void cleanup() { |
151 | - int count = 0; | 186 | + int corruptCount = 0, stuckCount = 0, pendingCount = 0; |
152 | - for (IntentData intentData : store.getIntentData(true)) { | 187 | + for (IntentData intentData : store.getIntentData(true, periodMs)) { |
153 | - if (intentData.state() == CORRUPT) { | 188 | + switch (intentData.state()) { |
154 | - switch (intentData.request()) { | 189 | + case CORRUPT: |
155 | - case INSTALL_REQ: | 190 | + resubmitCorrupt(intentData, false); |
156 | - service.submit(intentData.intent()); | 191 | + corruptCount++; |
157 | - count++; | 192 | + case INSTALLING: //FALLTHROUGH |
158 | - break; | 193 | + case WITHDRAWING: |
159 | - case WITHDRAW_REQ: | 194 | + resubmitPendingRequest(intentData); |
160 | - service.withdraw(intentData.intent()); | 195 | + stuckCount++; |
161 | - count++; | 196 | + default: |
162 | - break; | 197 | + //NOOP |
163 | - default: | 198 | + break; |
164 | - //TODO this is an error | ||
165 | - break; | ||
166 | - } | ||
167 | } | 199 | } |
168 | } | 200 | } |
169 | - log.debug("Intent cleanup ran and resubmitted {} intents", count); | 201 | + |
202 | + for (IntentData intentData : store.getPendingData(true, periodMs)) { | ||
203 | + //TODO should we do age check here, or in the store? | ||
204 | + resubmitPendingRequest(intentData); | ||
205 | + stuckCount++; | ||
206 | + } | ||
207 | + | ||
208 | + log.debug("Intent cleanup ran and resubmitted {} corrupt, {} stuck, and {} pending intents", | ||
209 | + corruptCount, stuckCount, pendingCount); | ||
170 | } | 210 | } |
171 | 211 | ||
172 | @Override | 212 | @Override |
173 | public void event(IntentEvent event) { | 213 | public void event(IntentEvent event) { |
214 | + // fast path for CORRUPT intents, retry on event notification | ||
215 | + //TODO we might consider using the timer to back off for subsequent retries | ||
174 | if (event.type() == IntentEvent.Type.CORRUPT) { | 216 | if (event.type() == IntentEvent.Type.CORRUPT) { |
175 | - // FIXME drop this if we exceed retry threshold | 217 | + Key key = event.subject().key(); |
176 | - // just run the whole cleanup script for now | 218 | + if (store.isMaster(key)) { |
177 | - executor.submit(this); | 219 | + IntentData data = store.getIntentData(event.subject().key()); |
220 | + resubmitCorrupt(data, true); | ||
221 | + } | ||
178 | } | 222 | } |
179 | } | 223 | } |
180 | } | 224 | } | ... | ... |
... | @@ -36,6 +36,10 @@ public class WallClockTimestamp implements Timestamp { | ... | @@ -36,6 +36,10 @@ public class WallClockTimestamp implements Timestamp { |
36 | unixTimestamp = System.currentTimeMillis(); | 36 | unixTimestamp = System.currentTimeMillis(); |
37 | } | 37 | } |
38 | 38 | ||
39 | + public WallClockTimestamp(long timestamp) { | ||
40 | + unixTimestamp = timestamp; | ||
41 | + } | ||
42 | + | ||
39 | @Override | 43 | @Override |
40 | public int compareTo(Timestamp o) { | 44 | public int compareTo(Timestamp o) { |
41 | checkArgument(o instanceof WallClockTimestamp, | 45 | checkArgument(o instanceof WallClockTimestamp, | ... | ... |
... | @@ -132,10 +132,13 @@ public class GossipIntentStore | ... | @@ -132,10 +132,13 @@ public class GossipIntentStore |
132 | } | 132 | } |
133 | 133 | ||
134 | @Override | 134 | @Override |
135 | - public Iterable<IntentData> getIntentData(boolean localOnly) { | 135 | + public Iterable<IntentData> getIntentData(boolean localOnly, long olderThan) { |
136 | - if (localOnly) { | 136 | + if (localOnly || olderThan > 0) { |
137 | + long now = System.currentTimeMillis(); | ||
138 | + final WallClockTimestamp time = new WallClockTimestamp(now - olderThan); | ||
137 | return currentMap.values().stream() | 139 | return currentMap.values().stream() |
138 | - .filter(data -> isMaster(data.key())) | 140 | + .filter(data -> data.version().isOlderThan(time) && |
141 | + (!localOnly || isMaster(data.key()))) | ||
139 | .collect(Collectors.toList()); | 142 | .collect(Collectors.toList()); |
140 | } | 143 | } |
141 | return currentMap.values(); | 144 | return currentMap.values(); |
... | @@ -261,6 +264,21 @@ public class GossipIntentStore | ... | @@ -261,6 +264,21 @@ public class GossipIntentStore |
261 | .collect(Collectors.toList()); | 264 | .collect(Collectors.toList()); |
262 | } | 265 | } |
263 | 266 | ||
267 | + @Override | ||
268 | + public Iterable<IntentData> getPendingData() { | ||
269 | + return pendingMap.values(); | ||
270 | + } | ||
271 | + | ||
272 | + @Override | ||
273 | + public Iterable<IntentData> getPendingData(boolean localOnly, long olderThan) { | ||
274 | + long now = System.currentTimeMillis(); | ||
275 | + final WallClockTimestamp time = new WallClockTimestamp(now - olderThan); | ||
276 | + return pendingMap.values().stream() | ||
277 | + .filter(data -> data.version().isOlderThan(time) && | ||
278 | + (!localOnly || isMaster(data.key()))) | ||
279 | + .collect(Collectors.toList()); | ||
280 | + } | ||
281 | + | ||
264 | private void notifyDelegateIfNotNull(IntentEvent event) { | 282 | private void notifyDelegateIfNotNull(IntentEvent event) { |
265 | if (event != null) { | 283 | if (event != null) { |
266 | notifyDelegate(event); | 284 | notifyDelegate(event); | ... | ... |
... | @@ -36,7 +36,7 @@ import java.util.Map; | ... | @@ -36,7 +36,7 @@ import java.util.Map; |
36 | import java.util.stream.Collectors; | 36 | import java.util.stream.Collectors; |
37 | 37 | ||
38 | import static com.google.common.base.Preconditions.checkNotNull; | 38 | import static com.google.common.base.Preconditions.checkNotNull; |
39 | -import static org.onosproject.net.intent.IntentState.*; | 39 | +import static org.onosproject.net.intent.IntentState.PURGE_REQ; |
40 | import static org.slf4j.LoggerFactory.getLogger; | 40 | import static org.slf4j.LoggerFactory.getLogger; |
41 | 41 | ||
42 | /** | 42 | /** |
... | @@ -76,11 +76,15 @@ public class SimpleIntentStore | ... | @@ -76,11 +76,15 @@ public class SimpleIntentStore |
76 | } | 76 | } |
77 | 77 | ||
78 | @Override | 78 | @Override |
79 | - public Iterable<IntentData> getIntentData(boolean localOnly) { | 79 | + public Iterable<IntentData> getIntentData(boolean localOnly, long olderThan) { |
80 | - if (localOnly) { | 80 | + if (localOnly || olderThan > 0) { |
81 | - return current.values().stream() | 81 | + long older = System.nanoTime() - olderThan * 1_000_000; //convert ms to ns |
82 | - .filter(data -> isMaster(data.key())) | 82 | + final SystemClockTimestamp time = new SystemClockTimestamp(older); |
83 | + return pending.values().stream() | ||
84 | + .filter(data -> data.version().isOlderThan(time) && | ||
85 | + (!localOnly || isMaster(data.key()))) | ||
83 | .collect(Collectors.toList()); | 86 | .collect(Collectors.toList()); |
87 | + | ||
84 | } | 88 | } |
85 | return Lists.newArrayList(current.values()); | 89 | return Lists.newArrayList(current.values()); |
86 | } | 90 | } |
... | @@ -191,4 +195,19 @@ public class SimpleIntentStore | ... | @@ -191,4 +195,19 @@ public class SimpleIntentStore |
191 | .map(IntentData::intent) | 195 | .map(IntentData::intent) |
192 | .collect(Collectors.toList()); | 196 | .collect(Collectors.toList()); |
193 | } | 197 | } |
198 | + | ||
199 | + @Override | ||
200 | + public Iterable<IntentData> getPendingData() { | ||
201 | + return Lists.newArrayList(pending.values()); | ||
202 | + } | ||
203 | + | ||
204 | + @Override | ||
205 | + public Iterable<IntentData> getPendingData(boolean localOnly, long olderThan) { | ||
206 | + long older = System.nanoTime() - olderThan * 1_000_000; //convert ms to ns | ||
207 | + final SystemClockTimestamp time = new SystemClockTimestamp(older); | ||
208 | + return pending.values().stream() | ||
209 | + .filter(data -> data.version().isOlderThan(time) && | ||
210 | + (!localOnly || isMaster(data.key()))) | ||
211 | + .collect(Collectors.toList()); | ||
212 | + } | ||
194 | } | 213 | } | ... | ... |
... | @@ -29,10 +29,14 @@ import static com.google.common.base.Preconditions.checkArgument; | ... | @@ -29,10 +29,14 @@ import static com.google.common.base.Preconditions.checkArgument; |
29 | */ | 29 | */ |
30 | public class SystemClockTimestamp implements Timestamp { | 30 | public class SystemClockTimestamp implements Timestamp { |
31 | 31 | ||
32 | - private final long unixTimestamp; | 32 | + private final long nanoTimestamp; |
33 | 33 | ||
34 | public SystemClockTimestamp() { | 34 | public SystemClockTimestamp() { |
35 | - unixTimestamp = System.nanoTime(); | 35 | + nanoTimestamp = System.nanoTime(); |
36 | + } | ||
37 | + | ||
38 | + public SystemClockTimestamp(long timestamp) { | ||
39 | + nanoTimestamp = timestamp; | ||
36 | } | 40 | } |
37 | 41 | ||
38 | @Override | 42 | @Override |
... | @@ -42,12 +46,12 @@ public class SystemClockTimestamp implements Timestamp { | ... | @@ -42,12 +46,12 @@ public class SystemClockTimestamp implements Timestamp { |
42 | SystemClockTimestamp that = (SystemClockTimestamp) o; | 46 | SystemClockTimestamp that = (SystemClockTimestamp) o; |
43 | 47 | ||
44 | return ComparisonChain.start() | 48 | return ComparisonChain.start() |
45 | - .compare(this.unixTimestamp, that.unixTimestamp) | 49 | + .compare(this.nanoTimestamp, that.nanoTimestamp) |
46 | .result(); | 50 | .result(); |
47 | } | 51 | } |
48 | @Override | 52 | @Override |
49 | public int hashCode() { | 53 | public int hashCode() { |
50 | - return Objects.hash(unixTimestamp); | 54 | + return Objects.hash(nanoTimestamp); |
51 | } | 55 | } |
52 | 56 | ||
53 | @Override | 57 | @Override |
... | @@ -59,17 +63,21 @@ public class SystemClockTimestamp implements Timestamp { | ... | @@ -59,17 +63,21 @@ public class SystemClockTimestamp implements Timestamp { |
59 | return false; | 63 | return false; |
60 | } | 64 | } |
61 | SystemClockTimestamp that = (SystemClockTimestamp) obj; | 65 | SystemClockTimestamp that = (SystemClockTimestamp) obj; |
62 | - return Objects.equals(this.unixTimestamp, that.unixTimestamp); | 66 | + return Objects.equals(this.nanoTimestamp, that.nanoTimestamp); |
63 | } | 67 | } |
64 | 68 | ||
65 | @Override | 69 | @Override |
66 | public String toString() { | 70 | public String toString() { |
67 | return MoreObjects.toStringHelper(getClass()) | 71 | return MoreObjects.toStringHelper(getClass()) |
68 | - .add("unixTimestamp", unixTimestamp) | 72 | + .add("nanoTimestamp", nanoTimestamp) |
69 | .toString(); | 73 | .toString(); |
70 | } | 74 | } |
71 | 75 | ||
76 | + public long nanoTimestamp() { | ||
77 | + return nanoTimestamp; | ||
78 | + } | ||
79 | + | ||
72 | public long systemTimestamp() { | 80 | public long systemTimestamp() { |
73 | - return unixTimestamp; | 81 | + return nanoTimestamp / 1_000_000; // convert ns to ms |
74 | } | 82 | } |
75 | } | 83 | } | ... | ... |
-
Please register or login to post a comment