~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,14 +1,17 @@
1	1	// SPDX-License-Identifier: GPL-2.0
2	2	// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
	3	+#define pr_fmt(fmt) "irq_timings: " fmt
3	4
4	5	#include <linux/kernel.h>
5	6	#include <linux/percpu.h>
6	7	#include <linux/slab.h>
7	8	#include <linux/static_key.h>
	9	+#include <linux/init.h>
8	10	#include <linux/interrupt.h>
9	11	#include <linux/idr.h>
10	12	#include <linux/irq.h>
11	13	#include <linux/math64.h>
	14	+#include <linux/log2.h>
12	15
13	16	#include <trace/events/irq.h>
14	17
..	..	@@ -17,16 +20,6 @@
17	20	DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
18	21
19	22	DEFINE_PER_CPU(struct irq_timings, irq_timings);
20		-
21		-struct irqt_stat {
22		- u64 next_evt;
23		- u64 last_ts;
24		- u64 variance;
25		- u32 avg;
26		- u32 nr_samples;
27		- int anomalies;
28		- int valid;
29		-};
30	23
31	24	static DEFINE_IDR(irqt_stats);
32	25
..	..	@@ -40,75 +33,447 @@
40	33	static_branch_disable(&irq_timing_enabled);
41	34	}
42	35
43		-/**
44		- * irqs_update - update the irq timing statistics with a new timestamp
	36	+/*
	37	+ * The main goal of this algorithm is to predict the next interrupt
	38	+ * occurrence on the current CPU.
45	39	*
46		- * @irqs: an irqt_stat struct pointer
47		- * @ts: the new timestamp
	40	+ * Currently, the interrupt timings are stored in a circular array
	41	+ * buffer every time there is an interrupt, as a tuple: the interrupt
	42	+ * number and the associated timestamp when the event occurred <irq,
	43	+ * timestamp>.
48	44	*
49		- * The statistics are computed online, in other words, the code is
50		- * designed to compute the statistics on a stream of values rather
51		- * than doing multiple passes on the values to compute the average,
52		- * then the variance. The integer division introduces a loss of
53		- * precision but with an acceptable error margin regarding the results
54		- * we would have with the double floating precision: we are dealing
55		- * with nanosec, so big numbers, consequently the mantisse is
56		- * negligeable, especially when converting the time in usec
57		- * afterwards.
	45	+ * For every interrupt occurring in a short period of time, we can
	46	+ * measure the elapsed time between the occurrences for the same
	47	+ * interrupt and we end up with a suite of intervals. The experience
	48	+ * showed the interrupts are often coming following a periodic
	49	+ * pattern.
58	50	*
59		- * The computation happens at idle time. When the CPU is not idle, the
60		- * interrupts' timestamps are stored in the circular buffer, when the
61		- * CPU goes idle and this routine is called, all the buffer's values
62		- * are injected in the statistical model continuying to extend the
63		- * statistics from the previous busy-idle cycle.
	51	+ * The objective of the algorithm is to find out this periodic pattern
	52	+ * in a fastest way and use its period to predict the next irq event.
64	53	*
65		- * The observations showed a device will trigger a burst of periodic
66		- * interrupts followed by one or two peaks of longer time, for
67		- * instance when a SD card device flushes its cache, then the periodic
68		- * intervals occur again. A one second inactivity period resets the
69		- * stats, that gives us the certitude the statistical values won't
70		- * exceed 1x10^9, thus the computation won't overflow.
	54	+ * When the next interrupt event is requested, we are in the situation
	55	+ * where the interrupts are disabled and the circular buffer
	56	+ * containing the timings is filled with the events which happened
	57	+ * after the previous next-interrupt-event request.
71	58	*
72		- * Basically, the purpose of the algorithm is to watch the periodic
73		- * interrupts and eliminate the peaks.
	59	+ * At this point, we read the circular buffer and we fill the irq
	60	+ * related statistics structure. After this step, the circular array
	61	+ * containing the timings is empty because all the values are
	62	+ * dispatched in their corresponding buffers.
74	63	*
75		- * An interrupt is considered periodically stable if the interval of
76		- * its occurences follow the normal distribution, thus the values
77		- * comply with:
	64	+ * Now for each interrupt, we can predict the next event by using the
	65	+ * suffix array, log interval and exponential moving average
78	66	*
79		- * avg - 3 x stddev < value < avg + 3 x stddev
	67	+ * 1. Suffix array
80	68	*
81		- * Which can be simplified to:
	69	+ * Suffix array is an array of all the suffixes of a string. It is
	70	+ * widely used as a data structure for compression, text search, ...
	71	+ * For instance for the word 'banana', the suffixes will be: 'banana'
	72	+ * 'anana' 'nana' 'ana' 'na' 'a'
82	73	*
83		- * -3 x stddev < value - avg < 3 x stddev
	74	+ * Usually, the suffix array is sorted but for our purpose it is
	75	+ * not necessary and won't provide any improvement in the context of
	76	+ * the solved problem where we clearly define the boundaries of the
	77	+ * search by a max period and min period.
84	78	*
85		- * abs(value - avg) < 3 x stddev
	79	+ * The suffix array will build a suite of intervals of different
	80	+ * length and will look for the repetition of each suite. If the suite
	81	+ * is repeating then we have the period because it is the length of
	82	+ * the suite whatever its position in the buffer.
86	83	*
87		- * In order to save a costly square root computation, we use the
88		- * variance. For the record, stddev = sqrt(variance). The equation
89		- * above becomes:
	84	+ * 2. Log interval
90	85	*
91		- * abs(value - avg) < 3 x sqrt(variance)
	86	+ * We saw the irq timings allow to compute the interval of the
	87	+ * occurrences for a specific interrupt. We can reasonibly assume the
	88	+ * longer is the interval, the higher is the error for the next event
	89	+ * and we can consider storing those interval values into an array
	90	+ * where each slot in the array correspond to an interval at the power
	91	+ * of 2 of the index. For example, index 12 will contain values
	92	+ * between 2^11 and 2^12.
92	93	*
93		- * And finally we square it:
	94	+ * At the end we have an array of values where at each index defines a
	95	+ * [2^index - 1, 2 ^ index] interval values allowing to store a large
	96	+ * number of values inside a small array.
94	97	*
95		- * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
	98	+ * For example, if we have the value 1123, then we store it at
	99	+ * ilog2(1123) = 10 index value.
96	100	*
97		- * (value - avg) x (value - avg) < 9 x variance
	101	+ * Storing those value at the specific index is done by computing an
	102	+ * exponential moving average for this specific slot. For instance,
	103	+ * for values 1800, 1123, 1453, ... fall under the same slot (10) and
	104	+ * the exponential moving average is computed every time a new value
	105	+ * is stored at this slot.
98	106	*
99		- * Statistically speaking, any values out of this interval is
100		- * considered as an anomaly and is discarded. However, a normal
101		- * distribution appears when the number of samples is 30 (it is the
102		- * rule of thumb in statistics, cf. "30 samples" on Internet). When
103		- * there are three consecutive anomalies, the statistics are resetted.
	107	+ * 3. Exponential Moving Average
104	108	*
	109	+ * The EMA is largely used to track a signal for stocks or as a low
	110	+ * pass filter. The magic of the formula, is it is very simple and the
	111	+ * reactivity of the average can be tuned with the factors called
	112	+ * alpha.
	113	+ *
	114	+ * The higher the alphas are, the faster the average respond to the
	115	+ * signal change. In our case, if a slot in the array is a big
	116	+ * interval, we can have numbers with a big difference between
	117	+ * them. The impact of those differences in the average computation
	118	+ * can be tuned by changing the alpha value.
	119	+ *
	120	+ *
	121	+ * -- The algorithm --
	122	+ *
	123	+ * We saw the different processing above, now let's see how they are
	124	+ * used together.
	125	+ *
	126	+ * For each interrupt:
	127	+ * For each interval:
	128	+ * Compute the index = ilog2(interval)
	129	+ * Compute a new_ema(buffer[index], interval)
	130	+ * Store the index in a circular buffer
	131	+ *
	132	+ * Compute the suffix array of the indexes
	133	+ *
	134	+ * For each suffix:
	135	+ * If the suffix is reverse-found 3 times
	136	+ * Return suffix
	137	+ *
	138	+ * Return Not found
	139	+ *
	140	+ * However we can not have endless suffix array to be build, it won't
	141	+ * make sense and it will add an extra overhead, so we can restrict
	142	+ * this to a maximum suffix length of 5 and a minimum suffix length of
	143	+ * 2. The experience showed 5 is the majority of the maximum pattern
	144	+ * period found for different devices.
	145	+ *
	146	+ * The result is a pattern finding less than 1us for an interrupt.
	147	+ *
	148	+ * Example based on real values:
	149	+ *
	150	+ * Example 1 : MMC write/read interrupt interval:
	151	+ *
	152	+ * 223947, 1240, 1384, 1386, 1386,
	153	+ * 217416, 1236, 1384, 1386, 1387,
	154	+ * 214719, 1241, 1386, 1387, 1384,
	155	+ * 213696, 1234, 1384, 1386, 1388,
	156	+ * 219904, 1240, 1385, 1389, 1385,
	157	+ * 212240, 1240, 1386, 1386, 1386,
	158	+ * 214415, 1236, 1384, 1386, 1387,
	159	+ * 214276, 1234, 1384, 1388, ?
	160	+ *
	161	+ * For each element, apply ilog2(value)
	162	+ *
	163	+ * 15, 8, 8, 8, 8,
	164	+ * 15, 8, 8, 8, 8,
	165	+ * 15, 8, 8, 8, 8,
	166	+ * 15, 8, 8, 8, 8,
	167	+ * 15, 8, 8, 8, 8,
	168	+ * 15, 8, 8, 8, 8,
	169	+ * 15, 8, 8, 8, 8,
	170	+ * 15, 8, 8, 8, ?
	171	+ *
	172	+ * Max period of 5, we take the last (max_period * 3) 15 elements as
	173	+ * we can be confident if the pattern repeats itself three times it is
	174	+ * a repeating pattern.
	175	+ *
	176	+ * 8,
	177	+ * 15, 8, 8, 8, 8,
	178	+ * 15, 8, 8, 8, 8,
	179	+ * 15, 8, 8, 8, ?
	180	+ *
	181	+ * Suffixes are:
	182	+ *
	183	+ * 1) 8, 15, 8, 8, 8 <- max period
	184	+ * 2) 8, 15, 8, 8
	185	+ * 3) 8, 15, 8
	186	+ * 4) 8, 15 <- min period
	187	+ *
	188	+ * From there we search the repeating pattern for each suffix.
	189	+ *
	190	+ * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8
	191	+ * \| \| \| \| \| \| \| \| \| \| \| \| \| \| \|
	192	+ * 8, 15, 8, 8, 8 \| \| \| \| \| \| \| \| \| \|
	193	+ * 8, 15, 8, 8, 8 \| \| \| \| \|
	194	+ * 8, 15, 8, 8, 8
	195	+ *
	196	+ * When moving the suffix, we found exactly 3 matches.
	197	+ *
	198	+ * The first suffix with period 5 is repeating.
	199	+ *
	200	+ * The next event is (3 * max_period) % suffix_period
	201	+ *
	202	+ * In this example, the result 0, so the next event is suffix[0] => 8
	203	+ *
	204	+ * However, 8 is the index in the array of exponential moving average
	205	+ * which was calculated on the fly when storing the values, so the
	206	+ * interval is ema[8] = 1366
	207	+ *
	208	+ *
	209	+ * Example 2:
	210	+ *
	211	+ * 4, 3, 5, 100,
	212	+ * 3, 3, 5, 117,
	213	+ * 4, 4, 5, 112,
	214	+ * 4, 3, 4, 110,
	215	+ * 3, 5, 3, 117,
	216	+ * 4, 4, 5, 112,
	217	+ * 4, 3, 4, 110,
	218	+ * 3, 4, 5, 112,
	219	+ * 4, 3, 4, 110
	220	+ *
	221	+ * ilog2
	222	+ *
	223	+ * 0, 0, 0, 4,
	224	+ * 0, 0, 0, 4,
	225	+ * 0, 0, 0, 4,
	226	+ * 0, 0, 0, 4,
	227	+ * 0, 0, 0, 4,
	228	+ * 0, 0, 0, 4,
	229	+ * 0, 0, 0, 4,
	230	+ * 0, 0, 0, 4,
	231	+ * 0, 0, 0, 4
	232	+ *
	233	+ * Max period 5:
	234	+ * 0, 0, 4,
	235	+ * 0, 0, 0, 4,
	236	+ * 0, 0, 0, 4,
	237	+ * 0, 0, 0, 4
	238	+ *
	239	+ * Suffixes:
	240	+ *
	241	+ * 1) 0, 0, 4, 0, 0
	242	+ * 2) 0, 0, 4, 0
	243	+ * 3) 0, 0, 4
	244	+ * 4) 0, 0
	245	+ *
	246	+ * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
	247	+ * \| \| \| \| \| \| X
	248	+ * 0, 0, 4, 0, 0, \| X
	249	+ * 0, 0
	250	+ *
	251	+ * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
	252	+ * \| \| \| \| \| \| \| \| \| \| \| \| \| \| \|
	253	+ * 0, 0, 4, 0, \| \| \| \| \| \| \| \| \| \| \|
	254	+ * 0, 0, 4, 0, \| \| \| \| \| \| \|
	255	+ * 0, 0, 4, 0, \| \| \|
	256	+ * 0 0 4
	257	+ *
	258	+ * Pattern is found 3 times, the remaining is 1 which results from
	259	+ * (max_period * 3) % suffix_period. This value is the index in the
	260	+ * suffix arrays. The suffix array for a period 4 has the value 4
	261	+ * at index 1.
105	262	*/
106		-static void irqs_update(struct irqt_stat *irqs, u64 ts)
	263	+#define EMA_ALPHA_VAL 64
	264	+#define EMA_ALPHA_SHIFT 7
	265	+
	266	+#define PREDICTION_PERIOD_MIN 3
	267	+#define PREDICTION_PERIOD_MAX 5
	268	+#define PREDICTION_FACTOR 4
	269	+#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */
	270	+#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */
	271	+
	272	+/*
	273	+ * Number of elements in the circular buffer: If it happens it was
	274	+ * flushed before, then the number of elements could be smaller than
	275	+ * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is
	276	+ * used as we wrapped. The index begins from zero when we did not
	277	+ * wrap. That could be done in a nicer way with the proper circular
	278	+ * array structure type but with the cost of extra computation in the
	279	+ * interrupt handler hot path. We choose efficiency.
	280	+ */
	281	+#define for_each_irqts(i, irqts) \
	282	+ for (i = irqts->count < IRQ_TIMINGS_SIZE ? \
	283	+ 0 : irqts->count & IRQ_TIMINGS_MASK, \
	284	+ irqts->count = min(IRQ_TIMINGS_SIZE, \
	285	+ irqts->count); \
	286	+ irqts->count > 0; irqts->count--, \
	287	+ i = (i + 1) & IRQ_TIMINGS_MASK)
	288	+
	289	+struct irqt_stat {
	290	+ u64 last_ts;
	291	+ u64 ema_time[PREDICTION_BUFFER_SIZE];
	292	+ int timings[IRQ_TIMINGS_SIZE];
	293	+ int circ_timings[IRQ_TIMINGS_SIZE];
	294	+ int count;
	295	+};
	296	+
	297	+/*
	298	+ * Exponential moving average computation
	299	+ */
	300	+static u64 irq_timings_ema_new(u64 value, u64 ema_old)
	301	+{
	302	+ s64 diff;
	303	+
	304	+ if (unlikely(!ema_old))
	305	+ return value;
	306	+
	307	+ diff = (value - ema_old) * EMA_ALPHA_VAL;
	308	+ /*
	309	+ * We can use a s64 type variable to be added with the u64
	310	+ * ema_old variable as this one will never have its topmost
	311	+ * bit set, it will be always smaller than 2^63 nanosec
	312	+ * interrupt interval (292 years).
	313	+ */
	314	+ return ema_old + (diff >> EMA_ALPHA_SHIFT);
	315	+}
	316	+
	317	+static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
	318	+{
	319	+ int period;
	320	+
	321	+ /*
	322	+ * Move the beginning pointer to the end minus the max period x 3.
	323	+ * We are at the point we can begin searching the pattern
	324	+ */
	325	+ buffer = &buffer[len - (period_max * 3)];
	326	+
	327	+ /* Adjust the length to the maximum allowed period x 3 */
	328	+ len = period_max * 3;
	329	+
	330	+ /*
	331	+ * The buffer contains the suite of intervals, in a ilog2
	332	+ * basis, we are looking for a repetition. We point the
	333	+ * beginning of the search three times the length of the
	334	+ * period beginning at the end of the buffer. We do that for
	335	+ * each suffix.
	336	+ */
	337	+ for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) {
	338	+
	339	+ /*
	340	+ * The first comparison always succeed because the
	341	+ * suffix is deduced from the first n-period bytes of
	342	+ * the buffer and we compare the initial suffix with
	343	+ * itself, so we can skip the first iteration.
	344	+ */
	345	+ int idx = period;
	346	+ size_t size = period;
	347	+
	348	+ /*
	349	+ * We look if the suite with period 'i' repeat
	350	+ * itself. If it is truncated at the end, as it
	351	+ * repeats we can use the period to find out the next
	352	+ * element with the modulo.
	353	+ */
	354	+ while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) {
	355	+
	356	+ /*
	357	+ * Move the index in a period basis
	358	+ */
	359	+ idx += size;
	360	+
	361	+ /*
	362	+ * If this condition is reached, all previous
	363	+ * memcmp were successful, so the period is
	364	+ * found.
	365	+ */
	366	+ if (idx == len)
	367	+ return buffer[len % period];
	368	+
	369	+ /*
	370	+ * If the remaining elements to compare are
	371	+ * smaller than the period, readjust the size
	372	+ * of the comparison for the last iteration.
	373	+ */
	374	+ if (len - idx < period)
	375	+ size = len - idx;
	376	+ }
	377	+ }
	378	+
	379	+ return -1;
	380	+}
	381	+
	382	+static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
	383	+{
	384	+ int index, i, period_max, count, start, min = INT_MAX;
	385	+
	386	+ if ((now - irqs->last_ts) >= NSEC_PER_SEC) {
	387	+ irqs->count = irqs->last_ts = 0;
	388	+ return U64_MAX;
	389	+ }
	390	+
	391	+ /*
	392	+ * As we want to find three times the repetition, we need a
	393	+ * number of intervals greater or equal to three times the
	394	+ * maximum period, otherwise we truncate the max period.
	395	+ */
	396	+ period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ?
	397	+ PREDICTION_PERIOD_MAX : irqs->count / 3;
	398	+
	399	+ /*
	400	+ * If we don't have enough irq timings for this prediction,
	401	+ * just bail out.
	402	+ */
	403	+ if (period_max <= PREDICTION_PERIOD_MIN)
	404	+ return U64_MAX;
	405	+
	406	+ /*
	407	+ * 'count' will depends if the circular buffer wrapped or not
	408	+ */
	409	+ count = irqs->count < IRQ_TIMINGS_SIZE ?
	410	+ irqs->count : IRQ_TIMINGS_SIZE;
	411	+
	412	+ start = irqs->count < IRQ_TIMINGS_SIZE ?
	413	+ 0 : (irqs->count & IRQ_TIMINGS_MASK);
	414	+
	415	+ /*
	416	+ * Copy the content of the circular buffer into another buffer
	417	+ * in order to linearize the buffer instead of dealing with
	418	+ * wrapping indexes and shifted array which will be prone to
	419	+ * error and extremelly difficult to debug.
	420	+ */
	421	+ for (i = 0; i < count; i++) {
	422	+ int index = (start + i) & IRQ_TIMINGS_MASK;
	423	+
	424	+ irqs->timings[i] = irqs->circ_timings[index];
	425	+ min = min_t(int, irqs->timings[i], min);
	426	+ }
	427	+
	428	+ index = irq_timings_next_event_index(irqs->timings, count, period_max);
	429	+ if (index < 0)
	430	+ return irqs->last_ts + irqs->ema_time[min];
	431	+
	432	+ return irqs->last_ts + irqs->ema_time[index];
	433	+}
	434	+
	435	+static __always_inline int irq_timings_interval_index(u64 interval)
	436	+{
	437	+ /*
	438	+ * The PREDICTION_FACTOR increase the interval size for the
	439	+ * array of exponential average.
	440	+ */
	441	+ u64 interval_us = (interval >> 10) / PREDICTION_FACTOR;
	442	+
	443	+ return likely(interval_us) ? ilog2(interval_us) : 0;
	444	+}
	445	+
	446	+static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs,
	447	+ u64 interval)
	448	+{
	449	+ int index;
	450	+
	451	+ /*
	452	+ * Get the index in the ema table for this interrupt.
	453	+ */
	454	+ index = irq_timings_interval_index(interval);
	455	+
	456	+ if (index > PREDICTION_BUFFER_SIZE - 1) {
	457	+ irqs->count = 0;
	458	+ return;
	459	+ }
	460	+
	461	+ /*
	462	+ * Store the index as an element of the pattern in another
	463	+ * circular array.
	464	+ */
	465	+ irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
	466	+
	467	+ irqs->ema_time[index] = irq_timings_ema_new(interval,
	468	+ irqs->ema_time[index]);
	469	+
	470	+ irqs->count++;
	471	+}
	472	+
	473	+static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
107	474	{
108	475	u64 old_ts = irqs->last_ts;
109		- u64 variance = 0;
110	476	u64 interval;
111		- s64 diff;
112	477
113	478	/*
114	479	* The timestamps are absolute time values, we need to compute
..	..	@@ -125,7 +490,7 @@
125	490
126	491	/*
127	492	* The interrupt triggered more than one second apart, that
128		- * ends the sequence as predictible for our purpose. In this
	493	+ * ends the sequence as predictable for our purpose. In this
129	494	* case, assume we have the beginning of a sequence and the
130	495	* timestamp is the first value. As it is impossible to
131	496	* predict anything at this point, return.
..	..	@@ -135,87 +500,11 @@
135	500	* want as we need another timestamp to compute an interval.
136	501	*/
137	502	if (interval >= NSEC_PER_SEC) {
138		- memset(irqs, 0, sizeof(*irqs));
139		- irqs->last_ts = ts;
	503	+ irqs->count = 0;
140	504	return;
141	505	}
142	506
143		- /*
144		- * Pre-compute the delta with the average as the result is
145		- * used several times in this function.
146		- */
147		- diff = interval - irqs->avg;
148		-
149		- /*
150		- * Increment the number of samples.
151		- */
152		- irqs->nr_samples++;
153		-
154		- /*
155		- * Online variance divided by the number of elements if there
156		- * is more than one sample. Normally the formula is division
157		- * by nr_samples - 1 but we assume the number of element will be
158		- * more than 32 and dividing by 32 instead of 31 is enough
159		- * precise.
160		- */
161		- if (likely(irqs->nr_samples > 1))
162		- variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
163		-
164		- /*
165		- * The rule of thumb in statistics for the normal distribution
166		- * is having at least 30 samples in order to have the model to
167		- * apply. Values outside the interval are considered as an
168		- * anomaly.
169		- */
170		- if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
171		- /*
172		- * After three consecutive anomalies, we reset the
173		- * stats as it is no longer stable enough.
174		- */
175		- if (irqs->anomalies++ >= 3) {
176		- memset(irqs, 0, sizeof(*irqs));
177		- irqs->last_ts = ts;
178		- return;
179		- }
180		- } else {
181		- /*
182		- * The anomalies must be consecutives, so at this
183		- * point, we reset the anomalies counter.
184		- */
185		- irqs->anomalies = 0;
186		- }
187		-
188		- /*
189		- * The interrupt is considered stable enough to try to predict
190		- * the next event on it.
191		- */
192		- irqs->valid = 1;
193		-
194		- /*
195		- * Online average algorithm:
196		- *
197		- * new_average = average + ((value - average) / count)
198		- *
199		- * The variance computation depends on the new average
200		- * to be computed here first.
201		- *
202		- */
203		- irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
204		-
205		- /*
206		- * Online variance algorithm:
207		- *
208		- * new_variance = variance + (value - average) x (value - new_average)
209		- *
210		- * Warning: irqs->avg is updated with the line above, hence
211		- * 'interval - irqs->avg' is no longer equal to 'diff'
212		- */
213		- irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
214		-
215		- /*
216		- * Update the next event
217		- */
218		- irqs->next_evt = ts + irqs->avg;
	507	+ __irq_timings_store(irq, irqs, interval);
219	508	}
220	509
221	510	/**
..	..	@@ -259,6 +548,9 @@
259	548	*/
260	549	lockdep_assert_irqs_disabled();
261	550
	551	+ if (!irqts->count)
	552	+ return next_evt;
	553	+
262	554	/*
263	555	* Number of elements in the circular buffer: If it happens it
264	556	* was flushed before, then the number of elements could be
..	..	@@ -269,21 +561,15 @@
269	561	* type but with the cost of extra computation in the
270	562	* interrupt handler hot path. We choose efficiency.
271	563	*
272		- * Inject measured irq/timestamp to the statistical model
273		- * while decrementing the counter because we consume the data
274		- * from our circular buffer.
	564	+ * Inject measured irq/timestamp to the pattern prediction
	565	+ * model while decrementing the counter because we consume the
	566	+ * data from our circular buffer.
275	567	*/
276		- for (i = irqts->count & IRQ_TIMINGS_MASK,
277		- irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
278		- irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
279		-
	568	+ for_each_irqts(i, irqts) {
280	569	irq = irq_timing_decode(irqts->values[i], &ts);
281		-
282	570	s = idr_find(&irqt_stats, irq);
283		- if (s) {
284		- irqs = this_cpu_ptr(s);
285		- irqs_update(irqs, ts);
286		- }
	571	+ if (s)
	572	+ irq_timings_store(irq, this_cpu_ptr(s), ts);
287	573	}
288	574
289	575	/*
..	..	@@ -294,26 +580,12 @@
294	580
295	581	irqs = this_cpu_ptr(s);
296	582
297		- if (!irqs->valid)
298		- continue;
	583	+ ts = __irq_timings_next_event(irqs, i, now);
	584	+ if (ts <= now)
	585	+ return now;
299	586
300		- if (irqs->next_evt <= now) {
301		- irq = i;
302		- next_evt = now;
303		-
304		- /*
305		- * This interrupt mustn't use in the future
306		- * until new events occur and update the
307		- * statistics.
308		- */
309		- irqs->valid = 0;
310		- break;
311		- }
312		-
313		- if (irqs->next_evt < next_evt) {
314		- irq = i;
315		- next_evt = irqs->next_evt;
316		- }
	587	+ if (ts < next_evt)
	588	+ next_evt = ts;
317	589	}
318	590
319	591	return next_evt;
..	..	@@ -337,7 +609,7 @@
337	609
338	610	/*
339	611	* Some platforms can have the same private interrupt per cpu,
340		- * so this function may be be called several times with the
	612	+ * so this function may be called several times with the
341	613	* same interrupt number. Just bail out in case the per cpu
342	614	* stat structure is already allocated.
343	615	*/
..	..	@@ -360,3 +632,327 @@
360	632
361	633	return 0;
362	634	}
	635	+
	636	+#ifdef CONFIG_TEST_IRQ_TIMINGS
	637	+struct timings_intervals {
	638	+ u64 *intervals;
	639	+ size_t count;
	640	+};
	641	+
	642	+/*
	643	+ * Intervals are given in nanosecond base
	644	+ */
	645	+static u64 intervals0[] __initdata = {
	646	+ 10000, 50000, 200000, 500000,
	647	+ 10000, 50000, 200000, 500000,
	648	+ 10000, 50000, 200000, 500000,
	649	+ 10000, 50000, 200000, 500000,
	650	+ 10000, 50000, 200000, 500000,
	651	+ 10000, 50000, 200000, 500000,
	652	+ 10000, 50000, 200000, 500000,
	653	+ 10000, 50000, 200000, 500000,
	654	+ 10000, 50000, 200000,
	655	+};
	656	+
	657	+static u64 intervals1[] __initdata = {
	658	+ 223947000, 1240000, 1384000, 1386000, 1386000,
	659	+ 217416000, 1236000, 1384000, 1386000, 1387000,
	660	+ 214719000, 1241000, 1386000, 1387000, 1384000,
	661	+ 213696000, 1234000, 1384000, 1386000, 1388000,
	662	+ 219904000, 1240000, 1385000, 1389000, 1385000,
	663	+ 212240000, 1240000, 1386000, 1386000, 1386000,
	664	+ 214415000, 1236000, 1384000, 1386000, 1387000,
	665	+ 214276000, 1234000,
	666	+};
	667	+
	668	+static u64 intervals2[] __initdata = {
	669	+ 4000, 3000, 5000, 100000,
	670	+ 3000, 3000, 5000, 117000,
	671	+ 4000, 4000, 5000, 112000,
	672	+ 4000, 3000, 4000, 110000,
	673	+ 3000, 5000, 3000, 117000,
	674	+ 4000, 4000, 5000, 112000,
	675	+ 4000, 3000, 4000, 110000,
	676	+ 3000, 4000, 5000, 112000,
	677	+ 4000,
	678	+};
	679	+
	680	+static u64 intervals3[] __initdata = {
	681	+ 1385000, 212240000, 1240000,
	682	+ 1386000, 214415000, 1236000,
	683	+ 1384000, 214276000, 1234000,
	684	+ 1386000, 214415000, 1236000,
	685	+ 1385000, 212240000, 1240000,
	686	+ 1386000, 214415000, 1236000,
	687	+ 1384000, 214276000, 1234000,
	688	+ 1386000, 214415000, 1236000,
	689	+ 1385000, 212240000, 1240000,
	690	+};
	691	+
	692	+static u64 intervals4[] __initdata = {
	693	+ 10000, 50000, 10000, 50000,
	694	+ 10000, 50000, 10000, 50000,
	695	+ 10000, 50000, 10000, 50000,
	696	+ 10000, 50000, 10000, 50000,
	697	+ 10000, 50000, 10000, 50000,
	698	+ 10000, 50000, 10000, 50000,
	699	+ 10000, 50000, 10000, 50000,
	700	+ 10000, 50000, 10000, 50000,
	701	+ 10000,
	702	+};
	703	+
	704	+static struct timings_intervals tis[] __initdata = {
	705	+ { intervals0, ARRAY_SIZE(intervals0) },
	706	+ { intervals1, ARRAY_SIZE(intervals1) },
	707	+ { intervals2, ARRAY_SIZE(intervals2) },
	708	+ { intervals3, ARRAY_SIZE(intervals3) },
	709	+ { intervals4, ARRAY_SIZE(intervals4) },
	710	+};
	711	+
	712	+static int __init irq_timings_test_next_index(struct timings_intervals *ti)
	713	+{
	714	+ int _buffer[IRQ_TIMINGS_SIZE];
	715	+ int buffer[IRQ_TIMINGS_SIZE];
	716	+ int index, start, i, count, period_max;
	717	+
	718	+ count = ti->count - 1;
	719	+
	720	+ period_max = count > (3 * PREDICTION_PERIOD_MAX) ?
	721	+ PREDICTION_PERIOD_MAX : count / 3;
	722	+
	723	+ /*
	724	+ * Inject all values except the last one which will be used
	725	+ * to compare with the next index result.
	726	+ */
	727	+ pr_debug("index suite: ");
	728	+
	729	+ for (i = 0; i < count; i++) {
	730	+ index = irq_timings_interval_index(ti->intervals[i]);
	731	+ _buffer[i & IRQ_TIMINGS_MASK] = index;
	732	+ pr_cont("%d ", index);
	733	+ }
	734	+
	735	+ start = count < IRQ_TIMINGS_SIZE ? 0 :
	736	+ count & IRQ_TIMINGS_MASK;
	737	+
	738	+ count = min_t(int, count, IRQ_TIMINGS_SIZE);
	739	+
	740	+ for (i = 0; i < count; i++) {
	741	+ int index = (start + i) & IRQ_TIMINGS_MASK;
	742	+ buffer[i] = _buffer[index];
	743	+ }
	744	+
	745	+ index = irq_timings_next_event_index(buffer, count, period_max);
	746	+ i = irq_timings_interval_index(ti->intervals[ti->count - 1]);
	747	+
	748	+ if (index != i) {
	749	+ pr_err("Expected (%d) and computed (%d) next indexes differ\n",
	750	+ i, index);
	751	+ return -EINVAL;
	752	+ }
	753	+
	754	+ return 0;
	755	+}
	756	+
	757	+static int __init irq_timings_next_index_selftest(void)
	758	+{
	759	+ int i, ret;
	760	+
	761	+ for (i = 0; i < ARRAY_SIZE(tis); i++) {
	762	+
	763	+ pr_info("---> Injecting intervals number #%d (count=%zd)\n",
	764	+ i, tis[i].count);
	765	+
	766	+ ret = irq_timings_test_next_index(&tis[i]);
	767	+ if (ret)
	768	+ break;
	769	+ }
	770	+
	771	+ return ret;
	772	+}
	773	+
	774	+static int __init irq_timings_test_irqs(struct timings_intervals *ti)
	775	+{
	776	+ struct irqt_stat __percpu *s;
	777	+ struct irqt_stat *irqs;
	778	+ int i, index, ret, irq = 0xACE5;
	779	+
	780	+ ret = irq_timings_alloc(irq);
	781	+ if (ret) {
	782	+ pr_err("Failed to allocate irq timings\n");
	783	+ return ret;
	784	+ }
	785	+
	786	+ s = idr_find(&irqt_stats, irq);
	787	+ if (!s) {
	788	+ ret = -EIDRM;
	789	+ goto out;
	790	+ }
	791	+
	792	+ irqs = this_cpu_ptr(s);
	793	+
	794	+ for (i = 0; i < ti->count; i++) {
	795	+
	796	+ index = irq_timings_interval_index(ti->intervals[i]);
	797	+ pr_debug("%d: interval=%llu ema_index=%d\n",
	798	+ i, ti->intervals[i], index);
	799	+
	800	+ __irq_timings_store(irq, irqs, ti->intervals[i]);
	801	+ if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) {
	802	+ ret = -EBADSLT;
	803	+ pr_err("Failed to store in the circular buffer\n");
	804	+ goto out;
	805	+ }
	806	+ }
	807	+
	808	+ if (irqs->count != ti->count) {
	809	+ ret = -ERANGE;
	810	+ pr_err("Count differs\n");
	811	+ goto out;
	812	+ }
	813	+
	814	+ ret = 0;
	815	+out:
	816	+ irq_timings_free(irq);
	817	+
	818	+ return ret;
	819	+}
	820	+
	821	+static int __init irq_timings_irqs_selftest(void)
	822	+{
	823	+ int i, ret;
	824	+
	825	+ for (i = 0; i < ARRAY_SIZE(tis); i++) {
	826	+ pr_info("---> Injecting intervals number #%d (count=%zd)\n",
	827	+ i, tis[i].count);
	828	+ ret = irq_timings_test_irqs(&tis[i]);
	829	+ if (ret)
	830	+ break;
	831	+ }
	832	+
	833	+ return ret;
	834	+}
	835	+
	836	+static int __init irq_timings_test_irqts(struct irq_timings *irqts,
	837	+ unsigned count)
	838	+{
	839	+ int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0;
	840	+ int i, irq, oirq = 0xBEEF;
	841	+ u64 ots = 0xDEAD, ts;
	842	+
	843	+ /*
	844	+ * Fill the circular buffer by using the dedicated function.
	845	+ */
	846	+ for (i = 0; i < count; i++) {
	847	+ pr_debug("%d: index=%d, ts=%llX irq=%X\n",
	848	+ i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i);
	849	+
	850	+ irq_timings_push(ots + i, oirq + i);
	851	+ }
	852	+
	853	+ /*
	854	+ * Compute the first elements values after the index wrapped
	855	+ * up or not.
	856	+ */
	857	+ ots += start;
	858	+ oirq += start;
	859	+
	860	+ /*
	861	+ * Test the circular buffer count is correct.
	862	+ */
	863	+ pr_debug("---> Checking timings array count (%d) is right\n", count);
	864	+ if (WARN_ON(irqts->count != count))
	865	+ return -EINVAL;
	866	+
	867	+ /*
	868	+ * Test the macro allowing to browse all the irqts.
	869	+ */
	870	+ pr_debug("---> Checking the for_each_irqts() macro\n");
	871	+ for_each_irqts(i, irqts) {
	872	+
	873	+ irq = irq_timing_decode(irqts->values[i], &ts);
	874	+
	875	+ pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n",
	876	+ i, ts, ots, irq, oirq);
	877	+
	878	+ if (WARN_ON(ts != ots \|\| irq != oirq))
	879	+ return -EINVAL;
	880	+
	881	+ ots++; oirq++;
	882	+ }
	883	+
	884	+ /*
	885	+ * The circular buffer should have be flushed when browsed
	886	+ * with for_each_irqts
	887	+ */
	888	+ pr_debug("---> Checking timings array is empty after browsing it\n");
	889	+ if (WARN_ON(irqts->count))
	890	+ return -EINVAL;
	891	+
	892	+ return 0;
	893	+}
	894	+
	895	+static int __init irq_timings_irqts_selftest(void)
	896	+{
	897	+ struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
	898	+ int i, ret;
	899	+
	900	+ /*
	901	+ * Test the circular buffer with different number of
	902	+ * elements. The purpose is to test at the limits (empty, half
	903	+ * full, full, wrapped with the cursor at the boundaries,
	904	+ * wrapped several times, etc ...
	905	+ */
	906	+ int count[] = { 0,
	907	+ IRQ_TIMINGS_SIZE >> 1,
	908	+ IRQ_TIMINGS_SIZE,
	909	+ IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1),
	910	+ 2 * IRQ_TIMINGS_SIZE,
	911	+ (2 * IRQ_TIMINGS_SIZE) + 3,
	912	+ };
	913	+
	914	+ for (i = 0; i < ARRAY_SIZE(count); i++) {
	915	+
	916	+ pr_info("---> Checking the timings with %d/%d values\n",
	917	+ count[i], IRQ_TIMINGS_SIZE);
	918	+
	919	+ ret = irq_timings_test_irqts(irqts, count[i]);
	920	+ if (ret)
	921	+ break;
	922	+ }
	923	+
	924	+ return ret;
	925	+}
	926	+
	927	+static int __init irq_timings_selftest(void)
	928	+{
	929	+ int ret;
	930	+
	931	+ pr_info("------------------- selftest start -----------------\n");
	932	+
	933	+ /*
	934	+ * At this point, we don't except any subsystem to use the irq
	935	+ * timings but us, so it should not be enabled.
	936	+ */
	937	+ if (static_branch_unlikely(&irq_timing_enabled)) {
	938	+ pr_warn("irq timings already initialized, skipping selftest\n");
	939	+ return 0;
	940	+ }
	941	+
	942	+ ret = irq_timings_irqts_selftest();
	943	+ if (ret)
	944	+ goto out;
	945	+
	946	+ ret = irq_timings_irqs_selftest();
	947	+ if (ret)
	948	+ goto out;
	949	+
	950	+ ret = irq_timings_next_index_selftest();
	951	+out:
	952	+ pr_info("---------- selftest end with %s -----------\n",
	953	+ ret ? "failure" : "success");
	954	+
	955	+ return ret;
	956	+}
	957	+early_initcall(irq_timings_selftest);
	958	+#endif