| .. | .. | 
|---|
| 1 | 1 |  // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | 2 |  // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> | 
|---|
 | 3 | +#define pr_fmt(fmt) "irq_timings: " fmt  | 
|---|
| 3 | 4 |   | 
|---|
| 4 | 5 |  #include <linux/kernel.h> | 
|---|
| 5 | 6 |  #include <linux/percpu.h> | 
|---|
| 6 | 7 |  #include <linux/slab.h> | 
|---|
| 7 | 8 |  #include <linux/static_key.h> | 
|---|
 | 9 | +#include <linux/init.h>  | 
|---|
| 8 | 10 |  #include <linux/interrupt.h> | 
|---|
| 9 | 11 |  #include <linux/idr.h> | 
|---|
| 10 | 12 |  #include <linux/irq.h> | 
|---|
| 11 | 13 |  #include <linux/math64.h> | 
|---|
 | 14 | +#include <linux/log2.h>  | 
|---|
| 12 | 15 |   | 
|---|
| 13 | 16 |  #include <trace/events/irq.h> | 
|---|
| 14 | 17 |   | 
|---|
| .. | .. | 
|---|
| 17 | 20 |  DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); | 
|---|
| 18 | 21 |   | 
|---|
| 19 | 22 |  DEFINE_PER_CPU(struct irq_timings, irq_timings); | 
|---|
| 20 |  | -  | 
|---|
| 21 |  | -struct irqt_stat {  | 
|---|
| 22 |  | -	u64	next_evt;  | 
|---|
| 23 |  | -	u64	last_ts;  | 
|---|
| 24 |  | -	u64	variance;  | 
|---|
| 25 |  | -	u32	avg;  | 
|---|
| 26 |  | -	u32	nr_samples;  | 
|---|
| 27 |  | -	int	anomalies;  | 
|---|
| 28 |  | -	int	valid;  | 
|---|
| 29 |  | -};  | 
|---|
| 30 | 23 |   | 
|---|
| 31 | 24 |  static DEFINE_IDR(irqt_stats); | 
|---|
| 32 | 25 |   | 
|---|
| .. | .. | 
|---|
| 40 | 33 |  	static_branch_disable(&irq_timing_enabled); | 
|---|
| 41 | 34 |  } | 
|---|
| 42 | 35 |   | 
|---|
| 43 |  | -/**  | 
|---|
| 44 |  | - * irqs_update - update the irq timing statistics with a new timestamp  | 
|---|
 | 36 | +/*  | 
|---|
 | 37 | + * The main goal of this algorithm is to predict the next interrupt  | 
|---|
 | 38 | + * occurrence on the current CPU.  | 
|---|
| 45 | 39 |   * | 
|---|
| 46 |  | - * @irqs: an irqt_stat struct pointer  | 
|---|
| 47 |  | - * @ts: the new timestamp  | 
|---|
 | 40 | + * Currently, the interrupt timings are stored in a circular array  | 
|---|
 | 41 | + * buffer every time there is an interrupt, as a tuple: the interrupt  | 
|---|
 | 42 | + * number and the associated timestamp when the event occurred <irq,  | 
|---|
 | 43 | + * timestamp>.  | 
|---|
| 48 | 44 |   * | 
|---|
| 49 |  | - * The statistics are computed online, in other words, the code is  | 
|---|
| 50 |  | - * designed to compute the statistics on a stream of values rather  | 
|---|
| 51 |  | - * than doing multiple passes on the values to compute the average,  | 
|---|
| 52 |  | - * then the variance. The integer division introduces a loss of  | 
|---|
| 53 |  | - * precision but with an acceptable error margin regarding the results  | 
|---|
| 54 |  | - * we would have with the double floating precision: we are dealing  | 
|---|
| 55 |  | - * with nanosec, so big numbers, consequently the mantisse is  | 
|---|
| 56 |  | - * negligeable, especially when converting the time in usec  | 
|---|
| 57 |  | - * afterwards.  | 
|---|
 | 45 | + * For every interrupt occurring in a short period of time, we can  | 
|---|
 | 46 | + * measure the elapsed time between the occurrences for the same  | 
|---|
 | 47 | + * interrupt and we end up with a suite of intervals. The experience  | 
|---|
 | 48 | + * showed the interrupts are often coming following a periodic  | 
|---|
 | 49 | + * pattern.  | 
|---|
| 58 | 50 |   * | 
|---|
| 59 |  | - * The computation happens at idle time. When the CPU is not idle, the  | 
|---|
| 60 |  | - * interrupts' timestamps are stored in the circular buffer, when the  | 
|---|
| 61 |  | - * CPU goes idle and this routine is called, all the buffer's values  | 
|---|
| 62 |  | - * are injected in the statistical model continuying to extend the  | 
|---|
| 63 |  | - * statistics from the previous busy-idle cycle.  | 
|---|
 | 51 | + * The objective of the algorithm is to find out this periodic pattern  | 
|---|
 | 52 | + * in a fastest way and use its period to predict the next irq event.  | 
|---|
| 64 | 53 |   * | 
|---|
| 65 |  | - * The observations showed a device will trigger a burst of periodic  | 
|---|
| 66 |  | - * interrupts followed by one or two peaks of longer time, for  | 
|---|
| 67 |  | - * instance when a SD card device flushes its cache, then the periodic  | 
|---|
| 68 |  | - * intervals occur again. A one second inactivity period resets the  | 
|---|
| 69 |  | - * stats, that gives us the certitude the statistical values won't  | 
|---|
| 70 |  | - * exceed 1x10^9, thus the computation won't overflow.  | 
|---|
 | 54 | + * When the next interrupt event is requested, we are in the situation  | 
|---|
 | 55 | + * where the interrupts are disabled and the circular buffer  | 
|---|
 | 56 | + * containing the timings is filled with the events which happened  | 
|---|
 | 57 | + * after the previous next-interrupt-event request.  | 
|---|
| 71 | 58 |   * | 
|---|
| 72 |  | - * Basically, the purpose of the algorithm is to watch the periodic  | 
|---|
| 73 |  | - * interrupts and eliminate the peaks.  | 
|---|
 | 59 | + * At this point, we read the circular buffer and we fill the irq  | 
|---|
 | 60 | + * related statistics structure. After this step, the circular array  | 
|---|
 | 61 | + * containing the timings is empty because all the values are  | 
|---|
 | 62 | + * dispatched in their corresponding buffers.  | 
|---|
| 74 | 63 |   * | 
|---|
| 75 |  | - * An interrupt is considered periodically stable if the interval of  | 
|---|
| 76 |  | - * its occurences follow the normal distribution, thus the values  | 
|---|
| 77 |  | - * comply with:  | 
|---|
 | 64 | + * Now for each interrupt, we can predict the next event by using the  | 
|---|
 | 65 | + * suffix array, log interval and exponential moving average  | 
|---|
| 78 | 66 |   * | 
|---|
| 79 |  | - *      avg - 3 x stddev < value < avg + 3 x stddev  | 
|---|
 | 67 | + * 1. Suffix array  | 
|---|
| 80 | 68 |   * | 
|---|
| 81 |  | - * Which can be simplified to:  | 
|---|
 | 69 | + * Suffix array is an array of all the suffixes of a string. It is  | 
|---|
 | 70 | + * widely used as a data structure for compression, text search, ...  | 
|---|
 | 71 | + * For instance for the word 'banana', the suffixes will be: 'banana'  | 
|---|
 | 72 | + * 'anana' 'nana' 'ana' 'na' 'a'  | 
|---|
| 82 | 73 |   * | 
|---|
| 83 |  | - *      -3 x stddev < value - avg < 3 x stddev  | 
|---|
 | 74 | + * Usually, the suffix array is sorted but for our purpose it is  | 
|---|
 | 75 | + * not necessary and won't provide any improvement in the context of  | 
|---|
 | 76 | + * the solved problem where we clearly define the boundaries of the  | 
|---|
 | 77 | + * search by a max period and min period.  | 
|---|
| 84 | 78 |   * | 
|---|
| 85 |  | - *      abs(value - avg) < 3 x stddev  | 
|---|
 | 79 | + * The suffix array will build a suite of intervals of different  | 
|---|
 | 80 | + * length and will look for the repetition of each suite. If the suite  | 
|---|
 | 81 | + * is repeating then we have the period because it is the length of  | 
|---|
 | 82 | + * the suite whatever its position in the buffer.  | 
|---|
| 86 | 83 |   * | 
|---|
| 87 |  | - * In order to save a costly square root computation, we use the  | 
|---|
| 88 |  | - * variance. For the record, stddev = sqrt(variance). The equation  | 
|---|
| 89 |  | - * above becomes:  | 
|---|
 | 84 | + * 2. Log interval  | 
|---|
| 90 | 85 |   * | 
|---|
| 91 |  | - *      abs(value - avg) < 3 x sqrt(variance)  | 
|---|
 | 86 | + * We saw the irq timings allow to compute the interval of the  | 
|---|
 | 87 | + * occurrences for a specific interrupt. We can reasonibly assume the  | 
|---|
 | 88 | + * longer is the interval, the higher is the error for the next event  | 
|---|
 | 89 | + * and we can consider storing those interval values into an array  | 
|---|
 | 90 | + * where each slot in the array correspond to an interval at the power  | 
|---|
 | 91 | + * of 2 of the index. For example, index 12 will contain values  | 
|---|
 | 92 | + * between 2^11 and 2^12.  | 
|---|
| 92 | 93 |   * | 
|---|
| 93 |  | - * And finally we square it:  | 
|---|
 | 94 | + * At the end we have an array of values where at each index defines a  | 
|---|
 | 95 | + * [2^index - 1, 2 ^ index] interval values allowing to store a large  | 
|---|
 | 96 | + * number of values inside a small array.  | 
|---|
| 94 | 97 |   * | 
|---|
| 95 |  | - *      (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2  | 
|---|
 | 98 | + * For example, if we have the value 1123, then we store it at  | 
|---|
 | 99 | + * ilog2(1123) = 10 index value.  | 
|---|
| 96 | 100 |   * | 
|---|
| 97 |  | - *      (value - avg) x (value - avg) < 9 x variance  | 
|---|
 | 101 | + * Storing those value at the specific index is done by computing an  | 
|---|
 | 102 | + * exponential moving average for this specific slot. For instance,  | 
|---|
 | 103 | + * for values 1800, 1123, 1453, ... fall under the same slot (10) and  | 
|---|
 | 104 | + * the exponential moving average is computed every time a new value  | 
|---|
 | 105 | + * is stored at this slot.  | 
|---|
| 98 | 106 |   * | 
|---|
| 99 |  | - * Statistically speaking, any values out of this interval is  | 
|---|
| 100 |  | - * considered as an anomaly and is discarded. However, a normal  | 
|---|
| 101 |  | - * distribution appears when the number of samples is 30 (it is the  | 
|---|
| 102 |  | - * rule of thumb in statistics, cf. "30 samples" on Internet). When  | 
|---|
| 103 |  | - * there are three consecutive anomalies, the statistics are resetted.  | 
|---|
 | 107 | + * 3. Exponential Moving Average  | 
|---|
| 104 | 108 |   * | 
|---|
 | 109 | + * The EMA is largely used to track a signal for stocks or as a low  | 
|---|
 | 110 | + * pass filter. The magic of the formula, is it is very simple and the  | 
|---|
 | 111 | + * reactivity of the average can be tuned with the factors called  | 
|---|
 | 112 | + * alpha.  | 
|---|
 | 113 | + *  | 
|---|
 | 114 | + * The higher the alphas are, the faster the average respond to the  | 
|---|
 | 115 | + * signal change. In our case, if a slot in the array is a big  | 
|---|
 | 116 | + * interval, we can have numbers with a big difference between  | 
|---|
 | 117 | + * them. The impact of those differences in the average computation  | 
|---|
 | 118 | + * can be tuned by changing the alpha value.  | 
|---|
 | 119 | + *  | 
|---|
 | 120 | + *  | 
|---|
 | 121 | + *  -- The algorithm --  | 
|---|
 | 122 | + *  | 
|---|
 | 123 | + * We saw the different processing above, now let's see how they are  | 
|---|
 | 124 | + * used together.  | 
|---|
 | 125 | + *  | 
|---|
 | 126 | + * For each interrupt:  | 
|---|
 | 127 | + *	For each interval:  | 
|---|
 | 128 | + *		Compute the index = ilog2(interval)  | 
|---|
 | 129 | + *		Compute a new_ema(buffer[index], interval)  | 
|---|
 | 130 | + *		Store the index in a circular buffer  | 
|---|
 | 131 | + *  | 
|---|
 | 132 | + *	Compute the suffix array of the indexes  | 
|---|
 | 133 | + *  | 
|---|
 | 134 | + *	For each suffix:  | 
|---|
 | 135 | + *		If the suffix is reverse-found 3 times  | 
|---|
 | 136 | + *			Return suffix  | 
|---|
 | 137 | + *  | 
|---|
 | 138 | + *	Return Not found  | 
|---|
 | 139 | + *  | 
|---|
 | 140 | + * However we can not have endless suffix array to be build, it won't  | 
|---|
 | 141 | + * make sense and it will add an extra overhead, so we can restrict  | 
|---|
 | 142 | + * this to a maximum suffix length of 5 and a minimum suffix length of  | 
|---|
 | 143 | + * 2. The experience showed 5 is the majority of the maximum pattern  | 
|---|
 | 144 | + * period found for different devices.  | 
|---|
 | 145 | + *  | 
|---|
 | 146 | + * The result is a pattern finding less than 1us for an interrupt.  | 
|---|
 | 147 | + *  | 
|---|
 | 148 | + * Example based on real values:  | 
|---|
 | 149 | + *  | 
|---|
 | 150 | + * Example 1 : MMC write/read interrupt interval:  | 
|---|
 | 151 | + *  | 
|---|
 | 152 | + *	223947, 1240, 1384, 1386, 1386,  | 
|---|
 | 153 | + *	217416, 1236, 1384, 1386, 1387,  | 
|---|
 | 154 | + *	214719, 1241, 1386, 1387, 1384,  | 
|---|
 | 155 | + *	213696, 1234, 1384, 1386, 1388,  | 
|---|
 | 156 | + *	219904, 1240, 1385, 1389, 1385,  | 
|---|
 | 157 | + *	212240, 1240, 1386, 1386, 1386,  | 
|---|
 | 158 | + *	214415, 1236, 1384, 1386, 1387,  | 
|---|
 | 159 | + *	214276, 1234, 1384, 1388, ?  | 
|---|
 | 160 | + *  | 
|---|
 | 161 | + * For each element, apply ilog2(value)  | 
|---|
 | 162 | + *  | 
|---|
 | 163 | + *	15, 8, 8, 8, 8,  | 
|---|
 | 164 | + *	15, 8, 8, 8, 8,  | 
|---|
 | 165 | + *	15, 8, 8, 8, 8,  | 
|---|
 | 166 | + *	15, 8, 8, 8, 8,  | 
|---|
 | 167 | + *	15, 8, 8, 8, 8,  | 
|---|
 | 168 | + *	15, 8, 8, 8, 8,  | 
|---|
 | 169 | + *	15, 8, 8, 8, 8,  | 
|---|
 | 170 | + *	15, 8, 8, 8, ?  | 
|---|
 | 171 | + *  | 
|---|
 | 172 | + * Max period of 5, we take the last (max_period * 3) 15 elements as  | 
|---|
 | 173 | + * we can be confident if the pattern repeats itself three times it is  | 
|---|
 | 174 | + * a repeating pattern.  | 
|---|
 | 175 | + *  | 
|---|
 | 176 | + *	             8,  | 
|---|
 | 177 | + *	15, 8, 8, 8, 8,  | 
|---|
 | 178 | + *	15, 8, 8, 8, 8,  | 
|---|
 | 179 | + *	15, 8, 8, 8, ?  | 
|---|
 | 180 | + *  | 
|---|
 | 181 | + * Suffixes are:  | 
|---|
 | 182 | + *  | 
|---|
 | 183 | + *  1) 8, 15, 8, 8, 8  <- max period  | 
|---|
 | 184 | + *  2) 8, 15, 8, 8  | 
|---|
 | 185 | + *  3) 8, 15, 8  | 
|---|
 | 186 | + *  4) 8, 15           <- min period  | 
|---|
 | 187 | + *  | 
|---|
 | 188 | + * From there we search the repeating pattern for each suffix.  | 
|---|
 | 189 | + *  | 
|---|
 | 190 | + * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8  | 
|---|
 | 191 | + *         |   |  |  |  |  |   |  |  |  |  |   |  |  |  |  | 
|---|
 | 192 | + *         8, 15, 8, 8, 8  |   |  |  |  |  |   |  |  |  |  | 
|---|
 | 193 | + *                         8, 15, 8, 8, 8  |   |  |  |  |  | 
|---|
 | 194 | + *                                         8, 15, 8, 8, 8  | 
|---|
 | 195 | + *  | 
|---|
 | 196 | + * When moving the suffix, we found exactly 3 matches.  | 
|---|
 | 197 | + *  | 
|---|
 | 198 | + * The first suffix with period 5 is repeating.  | 
|---|
 | 199 | + *  | 
|---|
 | 200 | + * The next event is (3 * max_period) % suffix_period  | 
|---|
 | 201 | + *  | 
|---|
 | 202 | + * In this example, the result 0, so the next event is suffix[0] => 8  | 
|---|
 | 203 | + *  | 
|---|
 | 204 | + * However, 8 is the index in the array of exponential moving average  | 
|---|
 | 205 | + * which was calculated on the fly when storing the values, so the  | 
|---|
 | 206 | + * interval is ema[8] = 1366  | 
|---|
 | 207 | + *  | 
|---|
 | 208 | + *  | 
|---|
 | 209 | + * Example 2:  | 
|---|
 | 210 | + *  | 
|---|
 | 211 | + *	4, 3, 5, 100,  | 
|---|
 | 212 | + *	3, 3, 5, 117,  | 
|---|
 | 213 | + *	4, 4, 5, 112,  | 
|---|
 | 214 | + *	4, 3, 4, 110,  | 
|---|
 | 215 | + *	3, 5, 3, 117,  | 
|---|
 | 216 | + *	4, 4, 5, 112,  | 
|---|
 | 217 | + *	4, 3, 4, 110,  | 
|---|
 | 218 | + *	3, 4, 5, 112,  | 
|---|
 | 219 | + *	4, 3, 4, 110  | 
|---|
 | 220 | + *  | 
|---|
 | 221 | + * ilog2  | 
|---|
 | 222 | + *  | 
|---|
 | 223 | + *	0, 0, 0, 4,  | 
|---|
 | 224 | + *	0, 0, 0, 4,  | 
|---|
 | 225 | + *	0, 0, 0, 4,  | 
|---|
 | 226 | + *	0, 0, 0, 4,  | 
|---|
 | 227 | + *	0, 0, 0, 4,  | 
|---|
 | 228 | + *	0, 0, 0, 4,  | 
|---|
 | 229 | + *	0, 0, 0, 4,  | 
|---|
 | 230 | + *	0, 0, 0, 4,  | 
|---|
 | 231 | + *	0, 0, 0, 4  | 
|---|
 | 232 | + *  | 
|---|
 | 233 | + * Max period 5:  | 
|---|
 | 234 | + *	   0, 0, 4,  | 
|---|
 | 235 | + *	0, 0, 0, 4,  | 
|---|
 | 236 | + *	0, 0, 0, 4,  | 
|---|
 | 237 | + *	0, 0, 0, 4  | 
|---|
 | 238 | + *  | 
|---|
 | 239 | + * Suffixes:  | 
|---|
 | 240 | + *  | 
|---|
 | 241 | + *  1) 0, 0, 4, 0, 0  | 
|---|
 | 242 | + *  2) 0, 0, 4, 0  | 
|---|
 | 243 | + *  3) 0, 0, 4  | 
|---|
 | 244 | + *  4) 0, 0  | 
|---|
 | 245 | + *  | 
|---|
 | 246 | + * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4  | 
|---|
 | 247 | + *         |  |  |  |  |  |  X  | 
|---|
 | 248 | + *         0, 0, 4, 0, 0, |  X  | 
|---|
 | 249 | + *                        0, 0  | 
|---|
 | 250 | + *  | 
|---|
 | 251 | + * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4  | 
|---|
 | 252 | + *         |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  | 
|---|
 | 253 | + *         0, 0, 4, 0, |  |  |  |  |  |  |  |  |  |  |  | 
|---|
 | 254 | + *                     0, 0, 4, 0, |  |  |  |  |  |  |  | 
|---|
 | 255 | + *                                 0, 0, 4, 0, |  |  |  | 
|---|
 | 256 | + *                                             0  0  4  | 
|---|
 | 257 | + *  | 
|---|
 | 258 | + * Pattern is found 3 times, the remaining is 1 which results from  | 
|---|
 | 259 | + * (max_period * 3) % suffix_period. This value is the index in the  | 
|---|
 | 260 | + * suffix arrays. The suffix array for a period 4 has the value 4  | 
|---|
 | 261 | + * at index 1.  | 
|---|
| 105 | 262 |   */ | 
|---|
| 106 |  | -static void irqs_update(struct irqt_stat *irqs, u64 ts)  | 
|---|
 | 263 | +#define EMA_ALPHA_VAL		64  | 
|---|
 | 264 | +#define EMA_ALPHA_SHIFT		7  | 
|---|
 | 265 | +  | 
|---|
 | 266 | +#define PREDICTION_PERIOD_MIN	3  | 
|---|
 | 267 | +#define PREDICTION_PERIOD_MAX	5  | 
|---|
 | 268 | +#define PREDICTION_FACTOR	4  | 
|---|
 | 269 | +#define PREDICTION_MAX		10 /* 2 ^ PREDICTION_MAX useconds */  | 
|---|
 | 270 | +#define PREDICTION_BUFFER_SIZE	16 /* slots for EMAs, hardly more than 16 */  | 
|---|
 | 271 | +  | 
|---|
 | 272 | +/*  | 
|---|
 | 273 | + * Number of elements in the circular buffer: If it happens it was  | 
|---|
 | 274 | + * flushed before, then the number of elements could be smaller than  | 
|---|
 | 275 | + * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is  | 
|---|
 | 276 | + * used as we wrapped. The index begins from zero when we did not  | 
|---|
 | 277 | + * wrap. That could be done in a nicer way with the proper circular  | 
|---|
 | 278 | + * array structure type but with the cost of extra computation in the  | 
|---|
 | 279 | + * interrupt handler hot path. We choose efficiency.  | 
|---|
 | 280 | + */  | 
|---|
 | 281 | +#define for_each_irqts(i, irqts)					\  | 
|---|
 | 282 | +	for (i = irqts->count < IRQ_TIMINGS_SIZE ?			\  | 
|---|
 | 283 | +		     0 : irqts->count & IRQ_TIMINGS_MASK,		\  | 
|---|
 | 284 | +		     irqts->count = min(IRQ_TIMINGS_SIZE,		\  | 
|---|
 | 285 | +					irqts->count);			\  | 
|---|
 | 286 | +	     irqts->count > 0; irqts->count--,				\  | 
|---|
 | 287 | +		     i = (i + 1) & IRQ_TIMINGS_MASK)  | 
|---|
 | 288 | +  | 
|---|
 | 289 | +struct irqt_stat {  | 
|---|
 | 290 | +	u64	last_ts;  | 
|---|
 | 291 | +	u64	ema_time[PREDICTION_BUFFER_SIZE];  | 
|---|
 | 292 | +	int	timings[IRQ_TIMINGS_SIZE];  | 
|---|
 | 293 | +	int	circ_timings[IRQ_TIMINGS_SIZE];  | 
|---|
 | 294 | +	int	count;  | 
|---|
 | 295 | +};  | 
|---|
 | 296 | +  | 
|---|
 | 297 | +/*  | 
|---|
 | 298 | + * Exponential moving average computation  | 
|---|
 | 299 | + */  | 
|---|
 | 300 | +static u64 irq_timings_ema_new(u64 value, u64 ema_old)  | 
|---|
 | 301 | +{  | 
|---|
 | 302 | +	s64 diff;  | 
|---|
 | 303 | +  | 
|---|
 | 304 | +	if (unlikely(!ema_old))  | 
|---|
 | 305 | +		return value;  | 
|---|
 | 306 | +  | 
|---|
 | 307 | +	diff = (value - ema_old) * EMA_ALPHA_VAL;  | 
|---|
 | 308 | +	/*  | 
|---|
 | 309 | +	 * We can use a s64 type variable to be added with the u64  | 
|---|
 | 310 | +	 * ema_old variable as this one will never have its topmost  | 
|---|
 | 311 | +	 * bit set, it will be always smaller than 2^63 nanosec  | 
|---|
 | 312 | +	 * interrupt interval (292 years).  | 
|---|
 | 313 | +	 */  | 
|---|
 | 314 | +	return ema_old + (diff >> EMA_ALPHA_SHIFT);  | 
|---|
 | 315 | +}  | 
|---|
 | 316 | +  | 
|---|
 | 317 | +static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)  | 
|---|
 | 318 | +{  | 
|---|
 | 319 | +	int period;  | 
|---|
 | 320 | +  | 
|---|
 | 321 | +	/*  | 
|---|
 | 322 | +	 * Move the beginning pointer to the end minus the max period x 3.  | 
|---|
 | 323 | +	 * We are at the point we can begin searching the pattern  | 
|---|
 | 324 | +	 */  | 
|---|
 | 325 | +	buffer = &buffer[len - (period_max * 3)];  | 
|---|
 | 326 | +  | 
|---|
 | 327 | +	/* Adjust the length to the maximum allowed period x 3 */  | 
|---|
 | 328 | +	len = period_max * 3;  | 
|---|
 | 329 | +  | 
|---|
 | 330 | +	/*  | 
|---|
 | 331 | +	 * The buffer contains the suite of intervals, in a ilog2  | 
|---|
 | 332 | +	 * basis, we are looking for a repetition. We point the  | 
|---|
 | 333 | +	 * beginning of the search three times the length of the  | 
|---|
 | 334 | +	 * period beginning at the end of the buffer. We do that for  | 
|---|
 | 335 | +	 * each suffix.  | 
|---|
 | 336 | +	 */  | 
|---|
 | 337 | +	for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) {  | 
|---|
 | 338 | +  | 
|---|
 | 339 | +		/*  | 
|---|
 | 340 | +		 * The first comparison always succeed because the  | 
|---|
 | 341 | +		 * suffix is deduced from the first n-period bytes of  | 
|---|
 | 342 | +		 * the buffer and we compare the initial suffix with  | 
|---|
 | 343 | +		 * itself, so we can skip the first iteration.  | 
|---|
 | 344 | +		 */  | 
|---|
 | 345 | +		int idx = period;  | 
|---|
 | 346 | +		size_t size = period;  | 
|---|
 | 347 | +  | 
|---|
 | 348 | +		/*  | 
|---|
 | 349 | +		 * We look if the suite with period 'i' repeat  | 
|---|
 | 350 | +		 * itself. If it is truncated at the end, as it  | 
|---|
 | 351 | +		 * repeats we can use the period to find out the next  | 
|---|
 | 352 | +		 * element with the modulo.  | 
|---|
 | 353 | +		 */  | 
|---|
 | 354 | +		while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) {  | 
|---|
 | 355 | +  | 
|---|
 | 356 | +			/*  | 
|---|
 | 357 | +			 * Move the index in a period basis  | 
|---|
 | 358 | +			 */  | 
|---|
 | 359 | +			idx += size;  | 
|---|
 | 360 | +  | 
|---|
 | 361 | +			/*  | 
|---|
 | 362 | +			 * If this condition is reached, all previous  | 
|---|
 | 363 | +			 * memcmp were successful, so the period is  | 
|---|
 | 364 | +			 * found.  | 
|---|
 | 365 | +			 */  | 
|---|
 | 366 | +			if (idx == len)  | 
|---|
 | 367 | +				return buffer[len % period];  | 
|---|
 | 368 | +  | 
|---|
 | 369 | +			/*  | 
|---|
 | 370 | +			 * If the remaining elements to compare are  | 
|---|
 | 371 | +			 * smaller than the period, readjust the size  | 
|---|
 | 372 | +			 * of the comparison for the last iteration.  | 
|---|
 | 373 | +			 */  | 
|---|
 | 374 | +			if (len - idx < period)  | 
|---|
 | 375 | +				size = len - idx;  | 
|---|
 | 376 | +		}  | 
|---|
 | 377 | +	}  | 
|---|
 | 378 | +  | 
|---|
 | 379 | +	return -1;  | 
|---|
 | 380 | +}  | 
|---|
 | 381 | +  | 
|---|
 | 382 | +static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)  | 
|---|
 | 383 | +{  | 
|---|
 | 384 | +	int index, i, period_max, count, start, min = INT_MAX;  | 
|---|
 | 385 | +  | 
|---|
 | 386 | +	if ((now - irqs->last_ts) >= NSEC_PER_SEC) {  | 
|---|
 | 387 | +		irqs->count = irqs->last_ts = 0;  | 
|---|
 | 388 | +		return U64_MAX;  | 
|---|
 | 389 | +	}  | 
|---|
 | 390 | +  | 
|---|
 | 391 | +	/*  | 
|---|
 | 392 | +	 * As we want to find three times the repetition, we need a  | 
|---|
 | 393 | +	 * number of intervals greater or equal to three times the  | 
|---|
 | 394 | +	 * maximum period, otherwise we truncate the max period.  | 
|---|
 | 395 | +	 */  | 
|---|
 | 396 | +	period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ?  | 
|---|
 | 397 | +		PREDICTION_PERIOD_MAX : irqs->count / 3;  | 
|---|
 | 398 | +  | 
|---|
 | 399 | +	/*  | 
|---|
 | 400 | +	 * If we don't have enough irq timings for this prediction,  | 
|---|
 | 401 | +	 * just bail out.  | 
|---|
 | 402 | +	 */  | 
|---|
 | 403 | +	if (period_max <= PREDICTION_PERIOD_MIN)  | 
|---|
 | 404 | +		return U64_MAX;  | 
|---|
 | 405 | +  | 
|---|
 | 406 | +	/*  | 
|---|
 | 407 | +	 * 'count' will depends if the circular buffer wrapped or not  | 
|---|
 | 408 | +	 */  | 
|---|
 | 409 | +	count = irqs->count < IRQ_TIMINGS_SIZE ?  | 
|---|
 | 410 | +		irqs->count : IRQ_TIMINGS_SIZE;  | 
|---|
 | 411 | +  | 
|---|
 | 412 | +	start = irqs->count < IRQ_TIMINGS_SIZE ?  | 
|---|
 | 413 | +		0 : (irqs->count & IRQ_TIMINGS_MASK);  | 
|---|
 | 414 | +  | 
|---|
 | 415 | +	/*  | 
|---|
 | 416 | +	 * Copy the content of the circular buffer into another buffer  | 
|---|
 | 417 | +	 * in order to linearize the buffer instead of dealing with  | 
|---|
 | 418 | +	 * wrapping indexes and shifted array which will be prone to  | 
|---|
 | 419 | +	 * error and extremelly difficult to debug.  | 
|---|
 | 420 | +	 */  | 
|---|
 | 421 | +	for (i = 0; i < count; i++) {  | 
|---|
 | 422 | +		int index = (start + i) & IRQ_TIMINGS_MASK;  | 
|---|
 | 423 | +  | 
|---|
 | 424 | +		irqs->timings[i] = irqs->circ_timings[index];  | 
|---|
 | 425 | +		min = min_t(int, irqs->timings[i], min);  | 
|---|
 | 426 | +	}  | 
|---|
 | 427 | +  | 
|---|
 | 428 | +	index = irq_timings_next_event_index(irqs->timings, count, period_max);  | 
|---|
 | 429 | +	if (index < 0)  | 
|---|
 | 430 | +		return irqs->last_ts + irqs->ema_time[min];  | 
|---|
 | 431 | +  | 
|---|
 | 432 | +	return irqs->last_ts + irqs->ema_time[index];  | 
|---|
 | 433 | +}  | 
|---|
 | 434 | +  | 
|---|
 | 435 | +static __always_inline int irq_timings_interval_index(u64 interval)  | 
|---|
 | 436 | +{  | 
|---|
 | 437 | +	/*  | 
|---|
 | 438 | +	 * The PREDICTION_FACTOR increase the interval size for the  | 
|---|
 | 439 | +	 * array of exponential average.  | 
|---|
 | 440 | +	 */  | 
|---|
 | 441 | +	u64 interval_us = (interval >> 10) / PREDICTION_FACTOR;  | 
|---|
 | 442 | +  | 
|---|
 | 443 | +	return likely(interval_us) ? ilog2(interval_us) : 0;  | 
|---|
 | 444 | +}  | 
|---|
 | 445 | +  | 
|---|
 | 446 | +static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs,  | 
|---|
 | 447 | +						u64 interval)  | 
|---|
 | 448 | +{  | 
|---|
 | 449 | +	int index;  | 
|---|
 | 450 | +  | 
|---|
 | 451 | +	/*  | 
|---|
 | 452 | +	 * Get the index in the ema table for this interrupt.  | 
|---|
 | 453 | +	 */  | 
|---|
 | 454 | +	index = irq_timings_interval_index(interval);  | 
|---|
 | 455 | +  | 
|---|
 | 456 | +	if (index > PREDICTION_BUFFER_SIZE - 1) {  | 
|---|
 | 457 | +		irqs->count = 0;  | 
|---|
 | 458 | +		return;  | 
|---|
 | 459 | +	}  | 
|---|
 | 460 | +  | 
|---|
 | 461 | +	/*  | 
|---|
 | 462 | +	 * Store the index as an element of the pattern in another  | 
|---|
 | 463 | +	 * circular array.  | 
|---|
 | 464 | +	 */  | 
|---|
 | 465 | +	irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;  | 
|---|
 | 466 | +  | 
|---|
 | 467 | +	irqs->ema_time[index] = irq_timings_ema_new(interval,  | 
|---|
 | 468 | +						    irqs->ema_time[index]);  | 
|---|
 | 469 | +  | 
|---|
 | 470 | +	irqs->count++;  | 
|---|
 | 471 | +}  | 
|---|
 | 472 | +  | 
|---|
 | 473 | +static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)  | 
|---|
| 107 | 474 |  { | 
|---|
| 108 | 475 |  	u64 old_ts = irqs->last_ts; | 
|---|
| 109 |  | -	u64 variance = 0;  | 
|---|
| 110 | 476 |  	u64 interval; | 
|---|
| 111 |  | -	s64 diff;  | 
|---|
| 112 | 477 |   | 
|---|
| 113 | 478 |  	/* | 
|---|
| 114 | 479 |  	 * The timestamps are absolute time values, we need to compute | 
|---|
| .. | .. | 
|---|
| 135 | 500 |  	 * want as we need another timestamp to compute an interval. | 
|---|
| 136 | 501 |  	 */ | 
|---|
| 137 | 502 |  	if (interval >= NSEC_PER_SEC) { | 
|---|
| 138 |  | -		memset(irqs, 0, sizeof(*irqs));  | 
|---|
| 139 |  | -		irqs->last_ts = ts;  | 
|---|
 | 503 | +		irqs->count = 0;  | 
|---|
| 140 | 504 |  		return; | 
|---|
| 141 | 505 |  	} | 
|---|
| 142 | 506 |   | 
|---|
| 143 |  | -	/*  | 
|---|
| 144 |  | -	 * Pre-compute the delta with the average as the result is  | 
|---|
| 145 |  | -	 * used several times in this function.  | 
|---|
| 146 |  | -	 */  | 
|---|
| 147 |  | -	diff = interval - irqs->avg;  | 
|---|
| 148 |  | -  | 
|---|
| 149 |  | -	/*  | 
|---|
| 150 |  | -	 * Increment the number of samples.  | 
|---|
| 151 |  | -	 */  | 
|---|
| 152 |  | -	irqs->nr_samples++;  | 
|---|
| 153 |  | -  | 
|---|
| 154 |  | -	/*  | 
|---|
| 155 |  | -	 * Online variance divided by the number of elements if there  | 
|---|
| 156 |  | -	 * is more than one sample.  Normally the formula is division  | 
|---|
| 157 |  | -	 * by nr_samples - 1 but we assume the number of element will be  | 
|---|
| 158 |  | -	 * more than 32 and dividing by 32 instead of 31 is enough  | 
|---|
| 159 |  | -	 * precise.  | 
|---|
| 160 |  | -	 */  | 
|---|
| 161 |  | -	if (likely(irqs->nr_samples > 1))  | 
|---|
| 162 |  | -		variance = irqs->variance >> IRQ_TIMINGS_SHIFT;  | 
|---|
| 163 |  | -  | 
|---|
| 164 |  | -	/*  | 
|---|
| 165 |  | -	 * The rule of thumb in statistics for the normal distribution  | 
|---|
| 166 |  | -	 * is having at least 30 samples in order to have the model to  | 
|---|
| 167 |  | -	 * apply. Values outside the interval are considered as an  | 
|---|
| 168 |  | -	 * anomaly.  | 
|---|
| 169 |  | -	 */  | 
|---|
| 170 |  | -	if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {  | 
|---|
| 171 |  | -		/*  | 
|---|
| 172 |  | -		 * After three consecutive anomalies, we reset the  | 
|---|
| 173 |  | -		 * stats as it is no longer stable enough.  | 
|---|
| 174 |  | -		 */  | 
|---|
| 175 |  | -		if (irqs->anomalies++ >= 3) {  | 
|---|
| 176 |  | -			memset(irqs, 0, sizeof(*irqs));  | 
|---|
| 177 |  | -			irqs->last_ts = ts;  | 
|---|
| 178 |  | -			return;  | 
|---|
| 179 |  | -		}  | 
|---|
| 180 |  | -	} else {  | 
|---|
| 181 |  | -		/*  | 
|---|
| 182 |  | -		 * The anomalies must be consecutives, so at this  | 
|---|
| 183 |  | -		 * point, we reset the anomalies counter.  | 
|---|
| 184 |  | -		 */  | 
|---|
| 185 |  | -		irqs->anomalies = 0;  | 
|---|
| 186 |  | -	}  | 
|---|
| 187 |  | -  | 
|---|
| 188 |  | -	/*  | 
|---|
| 189 |  | -	 * The interrupt is considered stable enough to try to predict  | 
|---|
| 190 |  | -	 * the next event on it.  | 
|---|
| 191 |  | -	 */  | 
|---|
| 192 |  | -	irqs->valid = 1;  | 
|---|
| 193 |  | -  | 
|---|
| 194 |  | -	/*  | 
|---|
| 195 |  | -	 * Online average algorithm:  | 
|---|
| 196 |  | -	 *  | 
|---|
| 197 |  | -	 *  new_average = average + ((value - average) / count)  | 
|---|
| 198 |  | -	 *  | 
|---|
| 199 |  | -	 * The variance computation depends on the new average  | 
|---|
| 200 |  | -	 * to be computed here first.  | 
|---|
| 201 |  | -	 *  | 
|---|
| 202 |  | -	 */  | 
|---|
| 203 |  | -	irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);  | 
|---|
| 204 |  | -  | 
|---|
| 205 |  | -	/*  | 
|---|
| 206 |  | -	 * Online variance algorithm:  | 
|---|
| 207 |  | -	 *  | 
|---|
| 208 |  | -	 *  new_variance = variance + (value - average) x (value - new_average)  | 
|---|
| 209 |  | -	 *  | 
|---|
| 210 |  | -	 * Warning: irqs->avg is updated with the line above, hence  | 
|---|
| 211 |  | -	 * 'interval - irqs->avg' is no longer equal to 'diff'  | 
|---|
| 212 |  | -	 */  | 
|---|
| 213 |  | -	irqs->variance = irqs->variance + (diff * (interval - irqs->avg));  | 
|---|
| 214 |  | -  | 
|---|
| 215 |  | -	/*  | 
|---|
| 216 |  | -	 * Update the next event  | 
|---|
| 217 |  | -	 */  | 
|---|
| 218 |  | -	irqs->next_evt = ts + irqs->avg;  | 
|---|
 | 507 | +	__irq_timings_store(irq, irqs, interval);  | 
|---|
| 219 | 508 |  } | 
|---|
| 220 | 509 |   | 
|---|
| 221 | 510 |  /** | 
|---|
| .. | .. | 
|---|
| 259 | 548 |  	 */ | 
|---|
| 260 | 549 |  	lockdep_assert_irqs_disabled(); | 
|---|
| 261 | 550 |   | 
|---|
 | 551 | +	if (!irqts->count)  | 
|---|
 | 552 | +		return next_evt;  | 
|---|
 | 553 | +  | 
|---|
| 262 | 554 |  	/* | 
|---|
| 263 | 555 |  	 * Number of elements in the circular buffer: If it happens it | 
|---|
| 264 | 556 |  	 * was flushed before, then the number of elements could be | 
|---|
| .. | .. | 
|---|
| 269 | 561 |  	 * type but with the cost of extra computation in the | 
|---|
| 270 | 562 |  	 * interrupt handler hot path. We choose efficiency. | 
|---|
| 271 | 563 |  	 * | 
|---|
| 272 |  | -	 * Inject measured irq/timestamp to the statistical model  | 
|---|
| 273 |  | -	 * while decrementing the counter because we consume the data  | 
|---|
| 274 |  | -	 * from our circular buffer.  | 
|---|
 | 564 | +	 * Inject measured irq/timestamp to the pattern prediction  | 
|---|
 | 565 | +	 * model while decrementing the counter because we consume the  | 
|---|
 | 566 | +	 * data from our circular buffer.  | 
|---|
| 275 | 567 |  	 */ | 
|---|
| 276 |  | -	for (i = irqts->count & IRQ_TIMINGS_MASK,  | 
|---|
| 277 |  | -		     irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);  | 
|---|
| 278 |  | -	     irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {  | 
|---|
| 279 |  | -  | 
|---|
 | 568 | +	for_each_irqts(i, irqts) {  | 
|---|
| 280 | 569 |  		irq = irq_timing_decode(irqts->values[i], &ts); | 
|---|
| 281 |  | -  | 
|---|
| 282 | 570 |  		s = idr_find(&irqt_stats, irq); | 
|---|
| 283 |  | -		if (s) {  | 
|---|
| 284 |  | -			irqs = this_cpu_ptr(s);  | 
|---|
| 285 |  | -			irqs_update(irqs, ts);  | 
|---|
| 286 |  | -		}  | 
|---|
 | 571 | +		if (s)  | 
|---|
 | 572 | +			irq_timings_store(irq, this_cpu_ptr(s), ts);  | 
|---|
| 287 | 573 |  	} | 
|---|
| 288 | 574 |   | 
|---|
| 289 | 575 |  	/* | 
|---|
| .. | .. | 
|---|
| 294 | 580 |   | 
|---|
| 295 | 581 |  		irqs = this_cpu_ptr(s); | 
|---|
| 296 | 582 |   | 
|---|
| 297 |  | -		if (!irqs->valid)  | 
|---|
| 298 |  | -			continue;  | 
|---|
 | 583 | +		ts = __irq_timings_next_event(irqs, i, now);  | 
|---|
 | 584 | +		if (ts <= now)  | 
|---|
 | 585 | +			return now;  | 
|---|
| 299 | 586 |   | 
|---|
| 300 |  | -		if (irqs->next_evt <= now) {  | 
|---|
| 301 |  | -			irq = i;  | 
|---|
| 302 |  | -			next_evt = now;  | 
|---|
| 303 |  | -  | 
|---|
| 304 |  | -			/*  | 
|---|
| 305 |  | -			 * This interrupt mustn't use in the future  | 
|---|
| 306 |  | -			 * until new events occur and update the  | 
|---|
| 307 |  | -			 * statistics.  | 
|---|
| 308 |  | -			 */  | 
|---|
| 309 |  | -			irqs->valid = 0;  | 
|---|
| 310 |  | -			break;  | 
|---|
| 311 |  | -		}  | 
|---|
| 312 |  | -  | 
|---|
| 313 |  | -		if (irqs->next_evt < next_evt) {  | 
|---|
| 314 |  | -			irq = i;  | 
|---|
| 315 |  | -			next_evt = irqs->next_evt;  | 
|---|
| 316 |  | -		}  | 
|---|
 | 587 | +		if (ts < next_evt)  | 
|---|
 | 588 | +			next_evt = ts;  | 
|---|
| 317 | 589 |  	} | 
|---|
| 318 | 590 |   | 
|---|
| 319 | 591 |  	return next_evt; | 
|---|
| .. | .. | 
|---|
| 337 | 609 |   | 
|---|
| 338 | 610 |  	/* | 
|---|
| 339 | 611 |  	 * Some platforms can have the same private interrupt per cpu, | 
|---|
| 340 |  | -	 * so this function may be be called several times with the  | 
|---|
 | 612 | +	 * so this function may be called several times with the  | 
|---|
| 341 | 613 |  	 * same interrupt number. Just bail out in case the per cpu | 
|---|
| 342 | 614 |  	 * stat structure is already allocated. | 
|---|
| 343 | 615 |  	 */ | 
|---|
| .. | .. | 
|---|
| 360 | 632 |   | 
|---|
| 361 | 633 |  	return 0; | 
|---|
| 362 | 634 |  } | 
|---|
 | 635 | +  | 
|---|
 | 636 | +#ifdef CONFIG_TEST_IRQ_TIMINGS  | 
|---|
 | 637 | +struct timings_intervals {  | 
|---|
 | 638 | +	u64 *intervals;  | 
|---|
 | 639 | +	size_t count;  | 
|---|
 | 640 | +};  | 
|---|
 | 641 | +  | 
|---|
 | 642 | +/*  | 
|---|
 | 643 | + * Intervals are given in nanosecond base  | 
|---|
 | 644 | + */  | 
|---|
 | 645 | +static u64 intervals0[] __initdata = {  | 
|---|
 | 646 | +	10000, 50000, 200000, 500000,  | 
|---|
 | 647 | +	10000, 50000, 200000, 500000,  | 
|---|
 | 648 | +	10000, 50000, 200000, 500000,  | 
|---|
 | 649 | +	10000, 50000, 200000, 500000,  | 
|---|
 | 650 | +	10000, 50000, 200000, 500000,  | 
|---|
 | 651 | +	10000, 50000, 200000, 500000,  | 
|---|
 | 652 | +	10000, 50000, 200000, 500000,  | 
|---|
 | 653 | +	10000, 50000, 200000, 500000,  | 
|---|
 | 654 | +	10000, 50000, 200000,  | 
|---|
 | 655 | +};  | 
|---|
 | 656 | +  | 
|---|
 | 657 | +static u64 intervals1[] __initdata = {  | 
|---|
 | 658 | +	223947000, 1240000, 1384000, 1386000, 1386000,  | 
|---|
 | 659 | +	217416000, 1236000, 1384000, 1386000, 1387000,  | 
|---|
 | 660 | +	214719000, 1241000, 1386000, 1387000, 1384000,  | 
|---|
 | 661 | +	213696000, 1234000, 1384000, 1386000, 1388000,  | 
|---|
 | 662 | +	219904000, 1240000, 1385000, 1389000, 1385000,  | 
|---|
 | 663 | +	212240000, 1240000, 1386000, 1386000, 1386000,  | 
|---|
 | 664 | +	214415000, 1236000, 1384000, 1386000, 1387000,  | 
|---|
 | 665 | +	214276000, 1234000,  | 
|---|
 | 666 | +};  | 
|---|
 | 667 | +  | 
|---|
 | 668 | +static u64 intervals2[] __initdata = {  | 
|---|
 | 669 | +	4000, 3000, 5000, 100000,  | 
|---|
 | 670 | +	3000, 3000, 5000, 117000,  | 
|---|
 | 671 | +	4000, 4000, 5000, 112000,  | 
|---|
 | 672 | +	4000, 3000, 4000, 110000,  | 
|---|
 | 673 | +	3000, 5000, 3000, 117000,  | 
|---|
 | 674 | +	4000, 4000, 5000, 112000,  | 
|---|
 | 675 | +	4000, 3000, 4000, 110000,  | 
|---|
 | 676 | +	3000, 4000, 5000, 112000,  | 
|---|
 | 677 | +	4000,  | 
|---|
 | 678 | +};  | 
|---|
 | 679 | +  | 
|---|
 | 680 | +static u64 intervals3[] __initdata = {  | 
|---|
 | 681 | +	1385000, 212240000, 1240000,  | 
|---|
 | 682 | +	1386000, 214415000, 1236000,  | 
|---|
 | 683 | +	1384000, 214276000, 1234000,  | 
|---|
 | 684 | +	1386000, 214415000, 1236000,  | 
|---|
 | 685 | +	1385000, 212240000, 1240000,  | 
|---|
 | 686 | +	1386000, 214415000, 1236000,  | 
|---|
 | 687 | +	1384000, 214276000, 1234000,  | 
|---|
 | 688 | +	1386000, 214415000, 1236000,  | 
|---|
 | 689 | +	1385000, 212240000, 1240000,  | 
|---|
 | 690 | +};  | 
|---|
 | 691 | +  | 
|---|
 | 692 | +static u64 intervals4[] __initdata = {  | 
|---|
 | 693 | +	10000, 50000, 10000, 50000,  | 
|---|
 | 694 | +	10000, 50000, 10000, 50000,  | 
|---|
 | 695 | +	10000, 50000, 10000, 50000,  | 
|---|
 | 696 | +	10000, 50000, 10000, 50000,  | 
|---|
 | 697 | +	10000, 50000, 10000, 50000,  | 
|---|
 | 698 | +	10000, 50000, 10000, 50000,  | 
|---|
 | 699 | +	10000, 50000, 10000, 50000,  | 
|---|
 | 700 | +	10000, 50000, 10000, 50000,  | 
|---|
 | 701 | +	10000,  | 
|---|
 | 702 | +};  | 
|---|
 | 703 | +  | 
|---|
 | 704 | +static struct timings_intervals tis[] __initdata = {  | 
|---|
 | 705 | +	{ intervals0, ARRAY_SIZE(intervals0) },  | 
|---|
 | 706 | +	{ intervals1, ARRAY_SIZE(intervals1) },  | 
|---|
 | 707 | +	{ intervals2, ARRAY_SIZE(intervals2) },  | 
|---|
 | 708 | +	{ intervals3, ARRAY_SIZE(intervals3) },  | 
|---|
 | 709 | +	{ intervals4, ARRAY_SIZE(intervals4) },  | 
|---|
 | 710 | +};  | 
|---|
 | 711 | +  | 
|---|
 | 712 | +static int __init irq_timings_test_next_index(struct timings_intervals *ti)  | 
|---|
 | 713 | +{  | 
|---|
 | 714 | +	int _buffer[IRQ_TIMINGS_SIZE];  | 
|---|
 | 715 | +	int buffer[IRQ_TIMINGS_SIZE];  | 
|---|
 | 716 | +	int index, start, i, count, period_max;  | 
|---|
 | 717 | +  | 
|---|
 | 718 | +	count = ti->count - 1;  | 
|---|
 | 719 | +  | 
|---|
 | 720 | +	period_max = count > (3 * PREDICTION_PERIOD_MAX) ?  | 
|---|
 | 721 | +		PREDICTION_PERIOD_MAX : count / 3;  | 
|---|
 | 722 | +  | 
|---|
 | 723 | +	/*  | 
|---|
 | 724 | +	 * Inject all values except the last one which will be used  | 
|---|
 | 725 | +	 * to compare with the next index result.  | 
|---|
 | 726 | +	 */  | 
|---|
 | 727 | +	pr_debug("index suite: ");  | 
|---|
 | 728 | +  | 
|---|
 | 729 | +	for (i = 0; i < count; i++) {  | 
|---|
 | 730 | +		index = irq_timings_interval_index(ti->intervals[i]);  | 
|---|
 | 731 | +		_buffer[i & IRQ_TIMINGS_MASK] = index;  | 
|---|
 | 732 | +		pr_cont("%d ", index);  | 
|---|
 | 733 | +	}  | 
|---|
 | 734 | +  | 
|---|
 | 735 | +	start = count < IRQ_TIMINGS_SIZE ? 0 :  | 
|---|
 | 736 | +		count & IRQ_TIMINGS_MASK;  | 
|---|
 | 737 | +  | 
|---|
 | 738 | +	count = min_t(int, count, IRQ_TIMINGS_SIZE);  | 
|---|
 | 739 | +  | 
|---|
 | 740 | +	for (i = 0; i < count; i++) {  | 
|---|
 | 741 | +		int index = (start + i) & IRQ_TIMINGS_MASK;  | 
|---|
 | 742 | +		buffer[i] = _buffer[index];  | 
|---|
 | 743 | +	}  | 
|---|
 | 744 | +  | 
|---|
 | 745 | +	index = irq_timings_next_event_index(buffer, count, period_max);  | 
|---|
 | 746 | +	i = irq_timings_interval_index(ti->intervals[ti->count - 1]);  | 
|---|
 | 747 | +  | 
|---|
 | 748 | +	if (index != i) {  | 
|---|
 | 749 | +		pr_err("Expected (%d) and computed (%d) next indexes differ\n",  | 
|---|
 | 750 | +		       i, index);  | 
|---|
 | 751 | +		return -EINVAL;  | 
|---|
 | 752 | +	}  | 
|---|
 | 753 | +  | 
|---|
 | 754 | +	return 0;  | 
|---|
 | 755 | +}  | 
|---|
 | 756 | +  | 
|---|
 | 757 | +static int __init irq_timings_next_index_selftest(void)  | 
|---|
 | 758 | +{  | 
|---|
 | 759 | +	int i, ret;  | 
|---|
 | 760 | +  | 
|---|
 | 761 | +	for (i = 0; i < ARRAY_SIZE(tis); i++) {  | 
|---|
 | 762 | +  | 
|---|
 | 763 | +		pr_info("---> Injecting intervals number #%d (count=%zd)\n",  | 
|---|
 | 764 | +			i, tis[i].count);  | 
|---|
 | 765 | +  | 
|---|
 | 766 | +		ret = irq_timings_test_next_index(&tis[i]);  | 
|---|
 | 767 | +		if (ret)  | 
|---|
 | 768 | +			break;  | 
|---|
 | 769 | +	}  | 
|---|
 | 770 | +  | 
|---|
 | 771 | +	return ret;  | 
|---|
 | 772 | +}  | 
|---|
 | 773 | +  | 
|---|
 | 774 | +static int __init irq_timings_test_irqs(struct timings_intervals *ti)  | 
|---|
 | 775 | +{  | 
|---|
 | 776 | +	struct irqt_stat __percpu *s;  | 
|---|
 | 777 | +	struct irqt_stat *irqs;  | 
|---|
 | 778 | +	int i, index, ret, irq = 0xACE5;  | 
|---|
 | 779 | +  | 
|---|
 | 780 | +	ret = irq_timings_alloc(irq);  | 
|---|
 | 781 | +	if (ret) {  | 
|---|
 | 782 | +		pr_err("Failed to allocate irq timings\n");  | 
|---|
 | 783 | +		return ret;  | 
|---|
 | 784 | +	}  | 
|---|
 | 785 | +  | 
|---|
 | 786 | +	s = idr_find(&irqt_stats, irq);  | 
|---|
 | 787 | +	if (!s) {  | 
|---|
 | 788 | +		ret = -EIDRM;  | 
|---|
 | 789 | +		goto out;  | 
|---|
 | 790 | +	}  | 
|---|
 | 791 | +  | 
|---|
 | 792 | +	irqs = this_cpu_ptr(s);  | 
|---|
 | 793 | +  | 
|---|
 | 794 | +	for (i = 0; i < ti->count; i++) {  | 
|---|
 | 795 | +  | 
|---|
 | 796 | +		index = irq_timings_interval_index(ti->intervals[i]);  | 
|---|
 | 797 | +		pr_debug("%d: interval=%llu ema_index=%d\n",  | 
|---|
 | 798 | +			 i, ti->intervals[i], index);  | 
|---|
 | 799 | +  | 
|---|
 | 800 | +		__irq_timings_store(irq, irqs, ti->intervals[i]);  | 
|---|
 | 801 | +		if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) {  | 
|---|
 | 802 | +			ret = -EBADSLT;  | 
|---|
 | 803 | +			pr_err("Failed to store in the circular buffer\n");  | 
|---|
 | 804 | +			goto out;  | 
|---|
 | 805 | +		}  | 
|---|
 | 806 | +	}  | 
|---|
 | 807 | +  | 
|---|
 | 808 | +	if (irqs->count != ti->count) {  | 
|---|
 | 809 | +		ret = -ERANGE;  | 
|---|
 | 810 | +		pr_err("Count differs\n");  | 
|---|
 | 811 | +		goto out;  | 
|---|
 | 812 | +	}  | 
|---|
 | 813 | +  | 
|---|
 | 814 | +	ret = 0;  | 
|---|
 | 815 | +out:  | 
|---|
 | 816 | +	irq_timings_free(irq);  | 
|---|
 | 817 | +  | 
|---|
 | 818 | +	return ret;  | 
|---|
 | 819 | +}  | 
|---|
 | 820 | +  | 
|---|
 | 821 | +static int __init irq_timings_irqs_selftest(void)  | 
|---|
 | 822 | +{  | 
|---|
 | 823 | +	int i, ret;  | 
|---|
 | 824 | +  | 
|---|
 | 825 | +	for (i = 0; i < ARRAY_SIZE(tis); i++) {  | 
|---|
 | 826 | +		pr_info("---> Injecting intervals number #%d (count=%zd)\n",  | 
|---|
 | 827 | +			i, tis[i].count);  | 
|---|
 | 828 | +		ret = irq_timings_test_irqs(&tis[i]);  | 
|---|
 | 829 | +		if (ret)  | 
|---|
 | 830 | +			break;  | 
|---|
 | 831 | +	}  | 
|---|
 | 832 | +  | 
|---|
 | 833 | +	return ret;  | 
|---|
 | 834 | +}  | 
|---|
 | 835 | +  | 
|---|
 | 836 | +static int __init irq_timings_test_irqts(struct irq_timings *irqts,  | 
|---|
 | 837 | +					 unsigned count)  | 
|---|
 | 838 | +{  | 
|---|
 | 839 | +	int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0;  | 
|---|
 | 840 | +	int i, irq, oirq = 0xBEEF;  | 
|---|
 | 841 | +	u64 ots = 0xDEAD, ts;  | 
|---|
 | 842 | +  | 
|---|
 | 843 | +	/*  | 
|---|
 | 844 | +	 * Fill the circular buffer by using the dedicated function.  | 
|---|
 | 845 | +	 */  | 
|---|
 | 846 | +	for (i = 0; i < count; i++) {  | 
|---|
 | 847 | +		pr_debug("%d: index=%d, ts=%llX irq=%X\n",  | 
|---|
 | 848 | +			 i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i);  | 
|---|
 | 849 | +  | 
|---|
 | 850 | +		irq_timings_push(ots + i, oirq + i);  | 
|---|
 | 851 | +	}  | 
|---|
 | 852 | +  | 
|---|
 | 853 | +	/*  | 
|---|
 | 854 | +	 * Compute the first elements values after the index wrapped  | 
|---|
 | 855 | +	 * up or not.  | 
|---|
 | 856 | +	 */  | 
|---|
 | 857 | +	ots += start;  | 
|---|
 | 858 | +	oirq += start;  | 
|---|
 | 859 | +  | 
|---|
 | 860 | +	/*  | 
|---|
 | 861 | +	 * Test the circular buffer count is correct.  | 
|---|
 | 862 | +	 */  | 
|---|
 | 863 | +	pr_debug("---> Checking timings array count (%d) is right\n", count);  | 
|---|
 | 864 | +	if (WARN_ON(irqts->count != count))  | 
|---|
 | 865 | +		return -EINVAL;  | 
|---|
 | 866 | +  | 
|---|
 | 867 | +	/*  | 
|---|
 | 868 | +	 * Test the macro allowing to browse all the irqts.  | 
|---|
 | 869 | +	 */  | 
|---|
 | 870 | +	pr_debug("---> Checking the for_each_irqts() macro\n");  | 
|---|
 | 871 | +	for_each_irqts(i, irqts) {  | 
|---|
 | 872 | +  | 
|---|
 | 873 | +		irq = irq_timing_decode(irqts->values[i], &ts);  | 
|---|
 | 874 | +  | 
|---|
 | 875 | +		pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n",  | 
|---|
 | 876 | +			 i, ts, ots, irq, oirq);  | 
|---|
 | 877 | +  | 
|---|
 | 878 | +		if (WARN_ON(ts != ots || irq != oirq))  | 
|---|
 | 879 | +			return -EINVAL;  | 
|---|
 | 880 | +  | 
|---|
 | 881 | +		ots++; oirq++;  | 
|---|
 | 882 | +	}  | 
|---|
 | 883 | +  | 
|---|
 | 884 | +	/*  | 
|---|
 | 885 | +	 * The circular buffer should have be flushed when browsed  | 
|---|
 | 886 | +	 * with for_each_irqts  | 
|---|
 | 887 | +	 */  | 
|---|
 | 888 | +	pr_debug("---> Checking timings array is empty after browsing it\n");  | 
|---|
 | 889 | +	if (WARN_ON(irqts->count))  | 
|---|
 | 890 | +		return -EINVAL;  | 
|---|
 | 891 | +  | 
|---|
 | 892 | +	return 0;  | 
|---|
 | 893 | +}  | 
|---|
 | 894 | +  | 
|---|
 | 895 | +static int __init irq_timings_irqts_selftest(void)  | 
|---|
 | 896 | +{  | 
|---|
 | 897 | +	struct irq_timings *irqts = this_cpu_ptr(&irq_timings);  | 
|---|
 | 898 | +	int i, ret;  | 
|---|
 | 899 | +  | 
|---|
 | 900 | +	/*  | 
|---|
 | 901 | +	 * Test the circular buffer with different number of  | 
|---|
 | 902 | +	 * elements. The purpose is to test at the limits (empty, half  | 
|---|
 | 903 | +	 * full, full, wrapped with the cursor at the boundaries,  | 
|---|
 | 904 | +	 * wrapped several times, etc ...  | 
|---|
 | 905 | +	 */  | 
|---|
 | 906 | +	int count[] = { 0,  | 
|---|
 | 907 | +			IRQ_TIMINGS_SIZE >> 1,  | 
|---|
 | 908 | +			IRQ_TIMINGS_SIZE,  | 
|---|
 | 909 | +			IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1),  | 
|---|
 | 910 | +			2 * IRQ_TIMINGS_SIZE,  | 
|---|
 | 911 | +			(2 * IRQ_TIMINGS_SIZE) + 3,  | 
|---|
 | 912 | +	};  | 
|---|
 | 913 | +  | 
|---|
 | 914 | +	for (i = 0; i < ARRAY_SIZE(count); i++) {  | 
|---|
 | 915 | +  | 
|---|
 | 916 | +		pr_info("---> Checking the timings with %d/%d values\n",  | 
|---|
 | 917 | +			count[i], IRQ_TIMINGS_SIZE);  | 
|---|
 | 918 | +  | 
|---|
 | 919 | +		ret = irq_timings_test_irqts(irqts, count[i]);  | 
|---|
 | 920 | +		if (ret)  | 
|---|
 | 921 | +			break;  | 
|---|
 | 922 | +	}  | 
|---|
 | 923 | +  | 
|---|
 | 924 | +	return ret;  | 
|---|
 | 925 | +}  | 
|---|
 | 926 | +  | 
|---|
 | 927 | +static int __init irq_timings_selftest(void)  | 
|---|
 | 928 | +{  | 
|---|
 | 929 | +	int ret;  | 
|---|
 | 930 | +  | 
|---|
 | 931 | +	pr_info("------------------- selftest start -----------------\n");  | 
|---|
 | 932 | +  | 
|---|
 | 933 | +	/*  | 
|---|
 | 934 | +	 * At this point, we don't except any subsystem to use the irq  | 
|---|
 | 935 | +	 * timings but us, so it should not be enabled.  | 
|---|
 | 936 | +	 */  | 
|---|
 | 937 | +	if (static_branch_unlikely(&irq_timing_enabled)) {  | 
|---|
 | 938 | +		pr_warn("irq timings already initialized, skipping selftest\n");  | 
|---|
 | 939 | +		return 0;  | 
|---|
 | 940 | +	}  | 
|---|
 | 941 | +  | 
|---|
 | 942 | +	ret = irq_timings_irqts_selftest();  | 
|---|
 | 943 | +	if (ret)  | 
|---|
 | 944 | +		goto out;  | 
|---|
 | 945 | +  | 
|---|
 | 946 | +	ret = irq_timings_irqs_selftest();  | 
|---|
 | 947 | +	if (ret)  | 
|---|
 | 948 | +		goto out;  | 
|---|
 | 949 | +  | 
|---|
 | 950 | +	ret = irq_timings_next_index_selftest();  | 
|---|
 | 951 | +out:  | 
|---|
 | 952 | +	pr_info("---------- selftest end with %s -----------\n",  | 
|---|
 | 953 | +		ret ? "failure" : "success");  | 
|---|
 | 954 | +  | 
|---|
 | 955 | +	return ret;  | 
|---|
 | 956 | +}  | 
|---|
 | 957 | +early_initcall(irq_timings_selftest);  | 
|---|
 | 958 | +#endif  | 
|---|