hc
2024-05-10 61598093bbdd283a7edc367d900f223070ead8d2
kernel/kernel/irq/timings.c
....@@ -1,14 +1,17 @@
11 // SPDX-License-Identifier: GPL-2.0
22 // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
3
+#define pr_fmt(fmt) "irq_timings: " fmt
34
45 #include <linux/kernel.h>
56 #include <linux/percpu.h>
67 #include <linux/slab.h>
78 #include <linux/static_key.h>
9
+#include <linux/init.h>
810 #include <linux/interrupt.h>
911 #include <linux/idr.h>
1012 #include <linux/irq.h>
1113 #include <linux/math64.h>
14
+#include <linux/log2.h>
1215
1316 #include <trace/events/irq.h>
1417
....@@ -17,16 +20,6 @@
1720 DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
1821
1922 DEFINE_PER_CPU(struct irq_timings, irq_timings);
20
-
21
-struct irqt_stat {
22
- u64 next_evt;
23
- u64 last_ts;
24
- u64 variance;
25
- u32 avg;
26
- u32 nr_samples;
27
- int anomalies;
28
- int valid;
29
-};
3023
3124 static DEFINE_IDR(irqt_stats);
3225
....@@ -40,75 +33,447 @@
4033 static_branch_disable(&irq_timing_enabled);
4134 }
4235
43
-/**
44
- * irqs_update - update the irq timing statistics with a new timestamp
36
+/*
37
+ * The main goal of this algorithm is to predict the next interrupt
38
+ * occurrence on the current CPU.
4539 *
46
- * @irqs: an irqt_stat struct pointer
47
- * @ts: the new timestamp
40
+ * Currently, the interrupt timings are stored in a circular array
41
+ * buffer every time there is an interrupt, as a tuple: the interrupt
42
+ * number and the associated timestamp when the event occurred <irq,
43
+ * timestamp>.
4844 *
49
- * The statistics are computed online, in other words, the code is
50
- * designed to compute the statistics on a stream of values rather
51
- * than doing multiple passes on the values to compute the average,
52
- * then the variance. The integer division introduces a loss of
53
- * precision but with an acceptable error margin regarding the results
54
- * we would have with the double floating precision: we are dealing
55
- * with nanosec, so big numbers, consequently the mantisse is
56
- * negligeable, especially when converting the time in usec
57
- * afterwards.
45
+ * For every interrupt occurring in a short period of time, we can
46
+ * measure the elapsed time between the occurrences for the same
47
+ * interrupt and we end up with a suite of intervals. The experience
48
+ * showed the interrupts are often coming following a periodic
49
+ * pattern.
5850 *
59
- * The computation happens at idle time. When the CPU is not idle, the
60
- * interrupts' timestamps are stored in the circular buffer, when the
61
- * CPU goes idle and this routine is called, all the buffer's values
62
- * are injected in the statistical model continuying to extend the
63
- * statistics from the previous busy-idle cycle.
51
+ * The objective of the algorithm is to find out this periodic pattern
52
+ * in a fastest way and use its period to predict the next irq event.
6453 *
65
- * The observations showed a device will trigger a burst of periodic
66
- * interrupts followed by one or two peaks of longer time, for
67
- * instance when a SD card device flushes its cache, then the periodic
68
- * intervals occur again. A one second inactivity period resets the
69
- * stats, that gives us the certitude the statistical values won't
70
- * exceed 1x10^9, thus the computation won't overflow.
54
+ * When the next interrupt event is requested, we are in the situation
55
+ * where the interrupts are disabled and the circular buffer
56
+ * containing the timings is filled with the events which happened
57
+ * after the previous next-interrupt-event request.
7158 *
72
- * Basically, the purpose of the algorithm is to watch the periodic
73
- * interrupts and eliminate the peaks.
59
+ * At this point, we read the circular buffer and we fill the irq
60
+ * related statistics structure. After this step, the circular array
61
+ * containing the timings is empty because all the values are
62
+ * dispatched in their corresponding buffers.
7463 *
75
- * An interrupt is considered periodically stable if the interval of
76
- * its occurences follow the normal distribution, thus the values
77
- * comply with:
64
+ * Now for each interrupt, we can predict the next event by using the
65
+ * suffix array, log interval and exponential moving average
7866 *
79
- * avg - 3 x stddev < value < avg + 3 x stddev
67
+ * 1. Suffix array
8068 *
81
- * Which can be simplified to:
69
+ * Suffix array is an array of all the suffixes of a string. It is
70
+ * widely used as a data structure for compression, text search, ...
71
+ * For instance for the word 'banana', the suffixes will be: 'banana'
72
+ * 'anana' 'nana' 'ana' 'na' 'a'
8273 *
83
- * -3 x stddev < value - avg < 3 x stddev
74
+ * Usually, the suffix array is sorted but for our purpose it is
75
+ * not necessary and won't provide any improvement in the context of
76
+ * the solved problem where we clearly define the boundaries of the
77
+ * search by a max period and min period.
8478 *
85
- * abs(value - avg) < 3 x stddev
79
+ * The suffix array will build a suite of intervals of different
80
+ * length and will look for the repetition of each suite. If the suite
81
+ * is repeating then we have the period because it is the length of
82
+ * the suite whatever its position in the buffer.
8683 *
87
- * In order to save a costly square root computation, we use the
88
- * variance. For the record, stddev = sqrt(variance). The equation
89
- * above becomes:
84
+ * 2. Log interval
9085 *
91
- * abs(value - avg) < 3 x sqrt(variance)
86
+ * We saw the irq timings allow to compute the interval of the
87
+ * occurrences for a specific interrupt. We can reasonibly assume the
88
+ * longer is the interval, the higher is the error for the next event
89
+ * and we can consider storing those interval values into an array
90
+ * where each slot in the array correspond to an interval at the power
91
+ * of 2 of the index. For example, index 12 will contain values
92
+ * between 2^11 and 2^12.
9293 *
93
- * And finally we square it:
94
+ * At the end we have an array of values where at each index defines a
95
+ * [2^index - 1, 2 ^ index] interval values allowing to store a large
96
+ * number of values inside a small array.
9497 *
95
- * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
98
+ * For example, if we have the value 1123, then we store it at
99
+ * ilog2(1123) = 10 index value.
96100 *
97
- * (value - avg) x (value - avg) < 9 x variance
101
+ * Storing those value at the specific index is done by computing an
102
+ * exponential moving average for this specific slot. For instance,
103
+ * for values 1800, 1123, 1453, ... fall under the same slot (10) and
104
+ * the exponential moving average is computed every time a new value
105
+ * is stored at this slot.
98106 *
99
- * Statistically speaking, any values out of this interval is
100
- * considered as an anomaly and is discarded. However, a normal
101
- * distribution appears when the number of samples is 30 (it is the
102
- * rule of thumb in statistics, cf. "30 samples" on Internet). When
103
- * there are three consecutive anomalies, the statistics are resetted.
107
+ * 3. Exponential Moving Average
104108 *
109
+ * The EMA is largely used to track a signal for stocks or as a low
110
+ * pass filter. The magic of the formula, is it is very simple and the
111
+ * reactivity of the average can be tuned with the factors called
112
+ * alpha.
113
+ *
114
+ * The higher the alphas are, the faster the average respond to the
115
+ * signal change. In our case, if a slot in the array is a big
116
+ * interval, we can have numbers with a big difference between
117
+ * them. The impact of those differences in the average computation
118
+ * can be tuned by changing the alpha value.
119
+ *
120
+ *
121
+ * -- The algorithm --
122
+ *
123
+ * We saw the different processing above, now let's see how they are
124
+ * used together.
125
+ *
126
+ * For each interrupt:
127
+ * For each interval:
128
+ * Compute the index = ilog2(interval)
129
+ * Compute a new_ema(buffer[index], interval)
130
+ * Store the index in a circular buffer
131
+ *
132
+ * Compute the suffix array of the indexes
133
+ *
134
+ * For each suffix:
135
+ * If the suffix is reverse-found 3 times
136
+ * Return suffix
137
+ *
138
+ * Return Not found
139
+ *
140
+ * However we can not have endless suffix array to be build, it won't
141
+ * make sense and it will add an extra overhead, so we can restrict
142
+ * this to a maximum suffix length of 5 and a minimum suffix length of
143
+ * 2. The experience showed 5 is the majority of the maximum pattern
144
+ * period found for different devices.
145
+ *
146
+ * The result is a pattern finding less than 1us for an interrupt.
147
+ *
148
+ * Example based on real values:
149
+ *
150
+ * Example 1 : MMC write/read interrupt interval:
151
+ *
152
+ * 223947, 1240, 1384, 1386, 1386,
153
+ * 217416, 1236, 1384, 1386, 1387,
154
+ * 214719, 1241, 1386, 1387, 1384,
155
+ * 213696, 1234, 1384, 1386, 1388,
156
+ * 219904, 1240, 1385, 1389, 1385,
157
+ * 212240, 1240, 1386, 1386, 1386,
158
+ * 214415, 1236, 1384, 1386, 1387,
159
+ * 214276, 1234, 1384, 1388, ?
160
+ *
161
+ * For each element, apply ilog2(value)
162
+ *
163
+ * 15, 8, 8, 8, 8,
164
+ * 15, 8, 8, 8, 8,
165
+ * 15, 8, 8, 8, 8,
166
+ * 15, 8, 8, 8, 8,
167
+ * 15, 8, 8, 8, 8,
168
+ * 15, 8, 8, 8, 8,
169
+ * 15, 8, 8, 8, 8,
170
+ * 15, 8, 8, 8, ?
171
+ *
172
+ * Max period of 5, we take the last (max_period * 3) 15 elements as
173
+ * we can be confident if the pattern repeats itself three times it is
174
+ * a repeating pattern.
175
+ *
176
+ * 8,
177
+ * 15, 8, 8, 8, 8,
178
+ * 15, 8, 8, 8, 8,
179
+ * 15, 8, 8, 8, ?
180
+ *
181
+ * Suffixes are:
182
+ *
183
+ * 1) 8, 15, 8, 8, 8 <- max period
184
+ * 2) 8, 15, 8, 8
185
+ * 3) 8, 15, 8
186
+ * 4) 8, 15 <- min period
187
+ *
188
+ * From there we search the repeating pattern for each suffix.
189
+ *
190
+ * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8
191
+ * | | | | | | | | | | | | | | |
192
+ * 8, 15, 8, 8, 8 | | | | | | | | | |
193
+ * 8, 15, 8, 8, 8 | | | | |
194
+ * 8, 15, 8, 8, 8
195
+ *
196
+ * When moving the suffix, we found exactly 3 matches.
197
+ *
198
+ * The first suffix with period 5 is repeating.
199
+ *
200
+ * The next event is (3 * max_period) % suffix_period
201
+ *
202
+ * In this example, the result 0, so the next event is suffix[0] => 8
203
+ *
204
+ * However, 8 is the index in the array of exponential moving average
205
+ * which was calculated on the fly when storing the values, so the
206
+ * interval is ema[8] = 1366
207
+ *
208
+ *
209
+ * Example 2:
210
+ *
211
+ * 4, 3, 5, 100,
212
+ * 3, 3, 5, 117,
213
+ * 4, 4, 5, 112,
214
+ * 4, 3, 4, 110,
215
+ * 3, 5, 3, 117,
216
+ * 4, 4, 5, 112,
217
+ * 4, 3, 4, 110,
218
+ * 3, 4, 5, 112,
219
+ * 4, 3, 4, 110
220
+ *
221
+ * ilog2
222
+ *
223
+ * 0, 0, 0, 4,
224
+ * 0, 0, 0, 4,
225
+ * 0, 0, 0, 4,
226
+ * 0, 0, 0, 4,
227
+ * 0, 0, 0, 4,
228
+ * 0, 0, 0, 4,
229
+ * 0, 0, 0, 4,
230
+ * 0, 0, 0, 4,
231
+ * 0, 0, 0, 4
232
+ *
233
+ * Max period 5:
234
+ * 0, 0, 4,
235
+ * 0, 0, 0, 4,
236
+ * 0, 0, 0, 4,
237
+ * 0, 0, 0, 4
238
+ *
239
+ * Suffixes:
240
+ *
241
+ * 1) 0, 0, 4, 0, 0
242
+ * 2) 0, 0, 4, 0
243
+ * 3) 0, 0, 4
244
+ * 4) 0, 0
245
+ *
246
+ * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
247
+ * | | | | | | X
248
+ * 0, 0, 4, 0, 0, | X
249
+ * 0, 0
250
+ *
251
+ * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
252
+ * | | | | | | | | | | | | | | |
253
+ * 0, 0, 4, 0, | | | | | | | | | | |
254
+ * 0, 0, 4, 0, | | | | | | |
255
+ * 0, 0, 4, 0, | | |
256
+ * 0 0 4
257
+ *
258
+ * Pattern is found 3 times, the remaining is 1 which results from
259
+ * (max_period * 3) % suffix_period. This value is the index in the
260
+ * suffix arrays. The suffix array for a period 4 has the value 4
261
+ * at index 1.
105262 */
106
-static void irqs_update(struct irqt_stat *irqs, u64 ts)
263
+#define EMA_ALPHA_VAL 64
264
+#define EMA_ALPHA_SHIFT 7
265
+
266
+#define PREDICTION_PERIOD_MIN 3
267
+#define PREDICTION_PERIOD_MAX 5
268
+#define PREDICTION_FACTOR 4
269
+#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */
270
+#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */
271
+
272
+/*
273
+ * Number of elements in the circular buffer: If it happens it was
274
+ * flushed before, then the number of elements could be smaller than
275
+ * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is
276
+ * used as we wrapped. The index begins from zero when we did not
277
+ * wrap. That could be done in a nicer way with the proper circular
278
+ * array structure type but with the cost of extra computation in the
279
+ * interrupt handler hot path. We choose efficiency.
280
+ */
281
+#define for_each_irqts(i, irqts) \
282
+ for (i = irqts->count < IRQ_TIMINGS_SIZE ? \
283
+ 0 : irqts->count & IRQ_TIMINGS_MASK, \
284
+ irqts->count = min(IRQ_TIMINGS_SIZE, \
285
+ irqts->count); \
286
+ irqts->count > 0; irqts->count--, \
287
+ i = (i + 1) & IRQ_TIMINGS_MASK)
288
+
289
+struct irqt_stat {
290
+ u64 last_ts;
291
+ u64 ema_time[PREDICTION_BUFFER_SIZE];
292
+ int timings[IRQ_TIMINGS_SIZE];
293
+ int circ_timings[IRQ_TIMINGS_SIZE];
294
+ int count;
295
+};
296
+
297
+/*
298
+ * Exponential moving average computation
299
+ */
300
+static u64 irq_timings_ema_new(u64 value, u64 ema_old)
301
+{
302
+ s64 diff;
303
+
304
+ if (unlikely(!ema_old))
305
+ return value;
306
+
307
+ diff = (value - ema_old) * EMA_ALPHA_VAL;
308
+ /*
309
+ * We can use a s64 type variable to be added with the u64
310
+ * ema_old variable as this one will never have its topmost
311
+ * bit set, it will be always smaller than 2^63 nanosec
312
+ * interrupt interval (292 years).
313
+ */
314
+ return ema_old + (diff >> EMA_ALPHA_SHIFT);
315
+}
316
+
317
+static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
318
+{
319
+ int period;
320
+
321
+ /*
322
+ * Move the beginning pointer to the end minus the max period x 3.
323
+ * We are at the point we can begin searching the pattern
324
+ */
325
+ buffer = &buffer[len - (period_max * 3)];
326
+
327
+ /* Adjust the length to the maximum allowed period x 3 */
328
+ len = period_max * 3;
329
+
330
+ /*
331
+ * The buffer contains the suite of intervals, in a ilog2
332
+ * basis, we are looking for a repetition. We point the
333
+ * beginning of the search three times the length of the
334
+ * period beginning at the end of the buffer. We do that for
335
+ * each suffix.
336
+ */
337
+ for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) {
338
+
339
+ /*
340
+ * The first comparison always succeed because the
341
+ * suffix is deduced from the first n-period bytes of
342
+ * the buffer and we compare the initial suffix with
343
+ * itself, so we can skip the first iteration.
344
+ */
345
+ int idx = period;
346
+ size_t size = period;
347
+
348
+ /*
349
+ * We look if the suite with period 'i' repeat
350
+ * itself. If it is truncated at the end, as it
351
+ * repeats we can use the period to find out the next
352
+ * element with the modulo.
353
+ */
354
+ while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) {
355
+
356
+ /*
357
+ * Move the index in a period basis
358
+ */
359
+ idx += size;
360
+
361
+ /*
362
+ * If this condition is reached, all previous
363
+ * memcmp were successful, so the period is
364
+ * found.
365
+ */
366
+ if (idx == len)
367
+ return buffer[len % period];
368
+
369
+ /*
370
+ * If the remaining elements to compare are
371
+ * smaller than the period, readjust the size
372
+ * of the comparison for the last iteration.
373
+ */
374
+ if (len - idx < period)
375
+ size = len - idx;
376
+ }
377
+ }
378
+
379
+ return -1;
380
+}
381
+
382
+static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
383
+{
384
+ int index, i, period_max, count, start, min = INT_MAX;
385
+
386
+ if ((now - irqs->last_ts) >= NSEC_PER_SEC) {
387
+ irqs->count = irqs->last_ts = 0;
388
+ return U64_MAX;
389
+ }
390
+
391
+ /*
392
+ * As we want to find three times the repetition, we need a
393
+ * number of intervals greater or equal to three times the
394
+ * maximum period, otherwise we truncate the max period.
395
+ */
396
+ period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ?
397
+ PREDICTION_PERIOD_MAX : irqs->count / 3;
398
+
399
+ /*
400
+ * If we don't have enough irq timings for this prediction,
401
+ * just bail out.
402
+ */
403
+ if (period_max <= PREDICTION_PERIOD_MIN)
404
+ return U64_MAX;
405
+
406
+ /*
407
+ * 'count' will depends if the circular buffer wrapped or not
408
+ */
409
+ count = irqs->count < IRQ_TIMINGS_SIZE ?
410
+ irqs->count : IRQ_TIMINGS_SIZE;
411
+
412
+ start = irqs->count < IRQ_TIMINGS_SIZE ?
413
+ 0 : (irqs->count & IRQ_TIMINGS_MASK);
414
+
415
+ /*
416
+ * Copy the content of the circular buffer into another buffer
417
+ * in order to linearize the buffer instead of dealing with
418
+ * wrapping indexes and shifted array which will be prone to
419
+ * error and extremelly difficult to debug.
420
+ */
421
+ for (i = 0; i < count; i++) {
422
+ int index = (start + i) & IRQ_TIMINGS_MASK;
423
+
424
+ irqs->timings[i] = irqs->circ_timings[index];
425
+ min = min_t(int, irqs->timings[i], min);
426
+ }
427
+
428
+ index = irq_timings_next_event_index(irqs->timings, count, period_max);
429
+ if (index < 0)
430
+ return irqs->last_ts + irqs->ema_time[min];
431
+
432
+ return irqs->last_ts + irqs->ema_time[index];
433
+}
434
+
435
+static __always_inline int irq_timings_interval_index(u64 interval)
436
+{
437
+ /*
438
+ * The PREDICTION_FACTOR increase the interval size for the
439
+ * array of exponential average.
440
+ */
441
+ u64 interval_us = (interval >> 10) / PREDICTION_FACTOR;
442
+
443
+ return likely(interval_us) ? ilog2(interval_us) : 0;
444
+}
445
+
446
+static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs,
447
+ u64 interval)
448
+{
449
+ int index;
450
+
451
+ /*
452
+ * Get the index in the ema table for this interrupt.
453
+ */
454
+ index = irq_timings_interval_index(interval);
455
+
456
+ if (index > PREDICTION_BUFFER_SIZE - 1) {
457
+ irqs->count = 0;
458
+ return;
459
+ }
460
+
461
+ /*
462
+ * Store the index as an element of the pattern in another
463
+ * circular array.
464
+ */
465
+ irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
466
+
467
+ irqs->ema_time[index] = irq_timings_ema_new(interval,
468
+ irqs->ema_time[index]);
469
+
470
+ irqs->count++;
471
+}
472
+
473
+static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
107474 {
108475 u64 old_ts = irqs->last_ts;
109
- u64 variance = 0;
110476 u64 interval;
111
- s64 diff;
112477
113478 /*
114479 * The timestamps are absolute time values, we need to compute
....@@ -125,7 +490,7 @@
125490
126491 /*
127492 * The interrupt triggered more than one second apart, that
128
- * ends the sequence as predictible for our purpose. In this
493
+ * ends the sequence as predictable for our purpose. In this
129494 * case, assume we have the beginning of a sequence and the
130495 * timestamp is the first value. As it is impossible to
131496 * predict anything at this point, return.
....@@ -135,87 +500,11 @@
135500 * want as we need another timestamp to compute an interval.
136501 */
137502 if (interval >= NSEC_PER_SEC) {
138
- memset(irqs, 0, sizeof(*irqs));
139
- irqs->last_ts = ts;
503
+ irqs->count = 0;
140504 return;
141505 }
142506
143
- /*
144
- * Pre-compute the delta with the average as the result is
145
- * used several times in this function.
146
- */
147
- diff = interval - irqs->avg;
148
-
149
- /*
150
- * Increment the number of samples.
151
- */
152
- irqs->nr_samples++;
153
-
154
- /*
155
- * Online variance divided by the number of elements if there
156
- * is more than one sample. Normally the formula is division
157
- * by nr_samples - 1 but we assume the number of element will be
158
- * more than 32 and dividing by 32 instead of 31 is enough
159
- * precise.
160
- */
161
- if (likely(irqs->nr_samples > 1))
162
- variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
163
-
164
- /*
165
- * The rule of thumb in statistics for the normal distribution
166
- * is having at least 30 samples in order to have the model to
167
- * apply. Values outside the interval are considered as an
168
- * anomaly.
169
- */
170
- if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
171
- /*
172
- * After three consecutive anomalies, we reset the
173
- * stats as it is no longer stable enough.
174
- */
175
- if (irqs->anomalies++ >= 3) {
176
- memset(irqs, 0, sizeof(*irqs));
177
- irqs->last_ts = ts;
178
- return;
179
- }
180
- } else {
181
- /*
182
- * The anomalies must be consecutives, so at this
183
- * point, we reset the anomalies counter.
184
- */
185
- irqs->anomalies = 0;
186
- }
187
-
188
- /*
189
- * The interrupt is considered stable enough to try to predict
190
- * the next event on it.
191
- */
192
- irqs->valid = 1;
193
-
194
- /*
195
- * Online average algorithm:
196
- *
197
- * new_average = average + ((value - average) / count)
198
- *
199
- * The variance computation depends on the new average
200
- * to be computed here first.
201
- *
202
- */
203
- irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
204
-
205
- /*
206
- * Online variance algorithm:
207
- *
208
- * new_variance = variance + (value - average) x (value - new_average)
209
- *
210
- * Warning: irqs->avg is updated with the line above, hence
211
- * 'interval - irqs->avg' is no longer equal to 'diff'
212
- */
213
- irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
214
-
215
- /*
216
- * Update the next event
217
- */
218
- irqs->next_evt = ts + irqs->avg;
507
+ __irq_timings_store(irq, irqs, interval);
219508 }
220509
221510 /**
....@@ -259,6 +548,9 @@
259548 */
260549 lockdep_assert_irqs_disabled();
261550
551
+ if (!irqts->count)
552
+ return next_evt;
553
+
262554 /*
263555 * Number of elements in the circular buffer: If it happens it
264556 * was flushed before, then the number of elements could be
....@@ -269,21 +561,15 @@
269561 * type but with the cost of extra computation in the
270562 * interrupt handler hot path. We choose efficiency.
271563 *
272
- * Inject measured irq/timestamp to the statistical model
273
- * while decrementing the counter because we consume the data
274
- * from our circular buffer.
564
+ * Inject measured irq/timestamp to the pattern prediction
565
+ * model while decrementing the counter because we consume the
566
+ * data from our circular buffer.
275567 */
276
- for (i = irqts->count & IRQ_TIMINGS_MASK,
277
- irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
278
- irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
279
-
568
+ for_each_irqts(i, irqts) {
280569 irq = irq_timing_decode(irqts->values[i], &ts);
281
-
282570 s = idr_find(&irqt_stats, irq);
283
- if (s) {
284
- irqs = this_cpu_ptr(s);
285
- irqs_update(irqs, ts);
286
- }
571
+ if (s)
572
+ irq_timings_store(irq, this_cpu_ptr(s), ts);
287573 }
288574
289575 /*
....@@ -294,26 +580,12 @@
294580
295581 irqs = this_cpu_ptr(s);
296582
297
- if (!irqs->valid)
298
- continue;
583
+ ts = __irq_timings_next_event(irqs, i, now);
584
+ if (ts <= now)
585
+ return now;
299586
300
- if (irqs->next_evt <= now) {
301
- irq = i;
302
- next_evt = now;
303
-
304
- /*
305
- * This interrupt mustn't use in the future
306
- * until new events occur and update the
307
- * statistics.
308
- */
309
- irqs->valid = 0;
310
- break;
311
- }
312
-
313
- if (irqs->next_evt < next_evt) {
314
- irq = i;
315
- next_evt = irqs->next_evt;
316
- }
587
+ if (ts < next_evt)
588
+ next_evt = ts;
317589 }
318590
319591 return next_evt;
....@@ -337,7 +609,7 @@
337609
338610 /*
339611 * Some platforms can have the same private interrupt per cpu,
340
- * so this function may be be called several times with the
612
+ * so this function may be called several times with the
341613 * same interrupt number. Just bail out in case the per cpu
342614 * stat structure is already allocated.
343615 */
....@@ -360,3 +632,327 @@
360632
361633 return 0;
362634 }
635
+
636
+#ifdef CONFIG_TEST_IRQ_TIMINGS
637
+struct timings_intervals {
638
+ u64 *intervals;
639
+ size_t count;
640
+};
641
+
642
+/*
643
+ * Intervals are given in nanosecond base
644
+ */
645
+static u64 intervals0[] __initdata = {
646
+ 10000, 50000, 200000, 500000,
647
+ 10000, 50000, 200000, 500000,
648
+ 10000, 50000, 200000, 500000,
649
+ 10000, 50000, 200000, 500000,
650
+ 10000, 50000, 200000, 500000,
651
+ 10000, 50000, 200000, 500000,
652
+ 10000, 50000, 200000, 500000,
653
+ 10000, 50000, 200000, 500000,
654
+ 10000, 50000, 200000,
655
+};
656
+
657
+static u64 intervals1[] __initdata = {
658
+ 223947000, 1240000, 1384000, 1386000, 1386000,
659
+ 217416000, 1236000, 1384000, 1386000, 1387000,
660
+ 214719000, 1241000, 1386000, 1387000, 1384000,
661
+ 213696000, 1234000, 1384000, 1386000, 1388000,
662
+ 219904000, 1240000, 1385000, 1389000, 1385000,
663
+ 212240000, 1240000, 1386000, 1386000, 1386000,
664
+ 214415000, 1236000, 1384000, 1386000, 1387000,
665
+ 214276000, 1234000,
666
+};
667
+
668
+static u64 intervals2[] __initdata = {
669
+ 4000, 3000, 5000, 100000,
670
+ 3000, 3000, 5000, 117000,
671
+ 4000, 4000, 5000, 112000,
672
+ 4000, 3000, 4000, 110000,
673
+ 3000, 5000, 3000, 117000,
674
+ 4000, 4000, 5000, 112000,
675
+ 4000, 3000, 4000, 110000,
676
+ 3000, 4000, 5000, 112000,
677
+ 4000,
678
+};
679
+
680
+static u64 intervals3[] __initdata = {
681
+ 1385000, 212240000, 1240000,
682
+ 1386000, 214415000, 1236000,
683
+ 1384000, 214276000, 1234000,
684
+ 1386000, 214415000, 1236000,
685
+ 1385000, 212240000, 1240000,
686
+ 1386000, 214415000, 1236000,
687
+ 1384000, 214276000, 1234000,
688
+ 1386000, 214415000, 1236000,
689
+ 1385000, 212240000, 1240000,
690
+};
691
+
692
+static u64 intervals4[] __initdata = {
693
+ 10000, 50000, 10000, 50000,
694
+ 10000, 50000, 10000, 50000,
695
+ 10000, 50000, 10000, 50000,
696
+ 10000, 50000, 10000, 50000,
697
+ 10000, 50000, 10000, 50000,
698
+ 10000, 50000, 10000, 50000,
699
+ 10000, 50000, 10000, 50000,
700
+ 10000, 50000, 10000, 50000,
701
+ 10000,
702
+};
703
+
704
+static struct timings_intervals tis[] __initdata = {
705
+ { intervals0, ARRAY_SIZE(intervals0) },
706
+ { intervals1, ARRAY_SIZE(intervals1) },
707
+ { intervals2, ARRAY_SIZE(intervals2) },
708
+ { intervals3, ARRAY_SIZE(intervals3) },
709
+ { intervals4, ARRAY_SIZE(intervals4) },
710
+};
711
+
712
+static int __init irq_timings_test_next_index(struct timings_intervals *ti)
713
+{
714
+ int _buffer[IRQ_TIMINGS_SIZE];
715
+ int buffer[IRQ_TIMINGS_SIZE];
716
+ int index, start, i, count, period_max;
717
+
718
+ count = ti->count - 1;
719
+
720
+ period_max = count > (3 * PREDICTION_PERIOD_MAX) ?
721
+ PREDICTION_PERIOD_MAX : count / 3;
722
+
723
+ /*
724
+ * Inject all values except the last one which will be used
725
+ * to compare with the next index result.
726
+ */
727
+ pr_debug("index suite: ");
728
+
729
+ for (i = 0; i < count; i++) {
730
+ index = irq_timings_interval_index(ti->intervals[i]);
731
+ _buffer[i & IRQ_TIMINGS_MASK] = index;
732
+ pr_cont("%d ", index);
733
+ }
734
+
735
+ start = count < IRQ_TIMINGS_SIZE ? 0 :
736
+ count & IRQ_TIMINGS_MASK;
737
+
738
+ count = min_t(int, count, IRQ_TIMINGS_SIZE);
739
+
740
+ for (i = 0; i < count; i++) {
741
+ int index = (start + i) & IRQ_TIMINGS_MASK;
742
+ buffer[i] = _buffer[index];
743
+ }
744
+
745
+ index = irq_timings_next_event_index(buffer, count, period_max);
746
+ i = irq_timings_interval_index(ti->intervals[ti->count - 1]);
747
+
748
+ if (index != i) {
749
+ pr_err("Expected (%d) and computed (%d) next indexes differ\n",
750
+ i, index);
751
+ return -EINVAL;
752
+ }
753
+
754
+ return 0;
755
+}
756
+
757
+static int __init irq_timings_next_index_selftest(void)
758
+{
759
+ int i, ret;
760
+
761
+ for (i = 0; i < ARRAY_SIZE(tis); i++) {
762
+
763
+ pr_info("---> Injecting intervals number #%d (count=%zd)\n",
764
+ i, tis[i].count);
765
+
766
+ ret = irq_timings_test_next_index(&tis[i]);
767
+ if (ret)
768
+ break;
769
+ }
770
+
771
+ return ret;
772
+}
773
+
774
+static int __init irq_timings_test_irqs(struct timings_intervals *ti)
775
+{
776
+ struct irqt_stat __percpu *s;
777
+ struct irqt_stat *irqs;
778
+ int i, index, ret, irq = 0xACE5;
779
+
780
+ ret = irq_timings_alloc(irq);
781
+ if (ret) {
782
+ pr_err("Failed to allocate irq timings\n");
783
+ return ret;
784
+ }
785
+
786
+ s = idr_find(&irqt_stats, irq);
787
+ if (!s) {
788
+ ret = -EIDRM;
789
+ goto out;
790
+ }
791
+
792
+ irqs = this_cpu_ptr(s);
793
+
794
+ for (i = 0; i < ti->count; i++) {
795
+
796
+ index = irq_timings_interval_index(ti->intervals[i]);
797
+ pr_debug("%d: interval=%llu ema_index=%d\n",
798
+ i, ti->intervals[i], index);
799
+
800
+ __irq_timings_store(irq, irqs, ti->intervals[i]);
801
+ if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) {
802
+ ret = -EBADSLT;
803
+ pr_err("Failed to store in the circular buffer\n");
804
+ goto out;
805
+ }
806
+ }
807
+
808
+ if (irqs->count != ti->count) {
809
+ ret = -ERANGE;
810
+ pr_err("Count differs\n");
811
+ goto out;
812
+ }
813
+
814
+ ret = 0;
815
+out:
816
+ irq_timings_free(irq);
817
+
818
+ return ret;
819
+}
820
+
821
+static int __init irq_timings_irqs_selftest(void)
822
+{
823
+ int i, ret;
824
+
825
+ for (i = 0; i < ARRAY_SIZE(tis); i++) {
826
+ pr_info("---> Injecting intervals number #%d (count=%zd)\n",
827
+ i, tis[i].count);
828
+ ret = irq_timings_test_irqs(&tis[i]);
829
+ if (ret)
830
+ break;
831
+ }
832
+
833
+ return ret;
834
+}
835
+
836
+static int __init irq_timings_test_irqts(struct irq_timings *irqts,
837
+ unsigned count)
838
+{
839
+ int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0;
840
+ int i, irq, oirq = 0xBEEF;
841
+ u64 ots = 0xDEAD, ts;
842
+
843
+ /*
844
+ * Fill the circular buffer by using the dedicated function.
845
+ */
846
+ for (i = 0; i < count; i++) {
847
+ pr_debug("%d: index=%d, ts=%llX irq=%X\n",
848
+ i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i);
849
+
850
+ irq_timings_push(ots + i, oirq + i);
851
+ }
852
+
853
+ /*
854
+ * Compute the first elements values after the index wrapped
855
+ * up or not.
856
+ */
857
+ ots += start;
858
+ oirq += start;
859
+
860
+ /*
861
+ * Test the circular buffer count is correct.
862
+ */
863
+ pr_debug("---> Checking timings array count (%d) is right\n", count);
864
+ if (WARN_ON(irqts->count != count))
865
+ return -EINVAL;
866
+
867
+ /*
868
+ * Test the macro allowing to browse all the irqts.
869
+ */
870
+ pr_debug("---> Checking the for_each_irqts() macro\n");
871
+ for_each_irqts(i, irqts) {
872
+
873
+ irq = irq_timing_decode(irqts->values[i], &ts);
874
+
875
+ pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n",
876
+ i, ts, ots, irq, oirq);
877
+
878
+ if (WARN_ON(ts != ots || irq != oirq))
879
+ return -EINVAL;
880
+
881
+ ots++; oirq++;
882
+ }
883
+
884
+ /*
885
+ * The circular buffer should have be flushed when browsed
886
+ * with for_each_irqts
887
+ */
888
+ pr_debug("---> Checking timings array is empty after browsing it\n");
889
+ if (WARN_ON(irqts->count))
890
+ return -EINVAL;
891
+
892
+ return 0;
893
+}
894
+
895
+static int __init irq_timings_irqts_selftest(void)
896
+{
897
+ struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
898
+ int i, ret;
899
+
900
+ /*
901
+ * Test the circular buffer with different number of
902
+ * elements. The purpose is to test at the limits (empty, half
903
+ * full, full, wrapped with the cursor at the boundaries,
904
+ * wrapped several times, etc ...
905
+ */
906
+ int count[] = { 0,
907
+ IRQ_TIMINGS_SIZE >> 1,
908
+ IRQ_TIMINGS_SIZE,
909
+ IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1),
910
+ 2 * IRQ_TIMINGS_SIZE,
911
+ (2 * IRQ_TIMINGS_SIZE) + 3,
912
+ };
913
+
914
+ for (i = 0; i < ARRAY_SIZE(count); i++) {
915
+
916
+ pr_info("---> Checking the timings with %d/%d values\n",
917
+ count[i], IRQ_TIMINGS_SIZE);
918
+
919
+ ret = irq_timings_test_irqts(irqts, count[i]);
920
+ if (ret)
921
+ break;
922
+ }
923
+
924
+ return ret;
925
+}
926
+
927
+static int __init irq_timings_selftest(void)
928
+{
929
+ int ret;
930
+
931
+ pr_info("------------------- selftest start -----------------\n");
932
+
933
+ /*
934
+ * At this point, we don't except any subsystem to use the irq
935
+ * timings but us, so it should not be enabled.
936
+ */
937
+ if (static_branch_unlikely(&irq_timing_enabled)) {
938
+ pr_warn("irq timings already initialized, skipping selftest\n");
939
+ return 0;
940
+ }
941
+
942
+ ret = irq_timings_irqts_selftest();
943
+ if (ret)
944
+ goto out;
945
+
946
+ ret = irq_timings_irqs_selftest();
947
+ if (ret)
948
+ goto out;
949
+
950
+ ret = irq_timings_next_index_selftest();
951
+out:
952
+ pr_info("---------- selftest end with %s -----------\n",
953
+ ret ? "failure" : "success");
954
+
955
+ return ret;
956
+}
957
+early_initcall(irq_timings_selftest);
958
+#endif