hc
2024-05-10 61598093bbdd283a7edc367d900f223070ead8d2
kernel/kernel/sched/pelt.c
....@@ -28,7 +28,80 @@
2828 #include "sched.h"
2929 #include "pelt.h"
3030
31
-#include <trace/events/sched.h>
31
+int pelt_load_avg_period = PELT32_LOAD_AVG_PERIOD;
32
+int sysctl_sched_pelt_period = PELT32_LOAD_AVG_PERIOD;
33
+int pelt_load_avg_max = PELT32_LOAD_AVG_MAX;
34
+const u32 *pelt_runnable_avg_yN_inv = pelt32_runnable_avg_yN_inv;
35
+
36
+int get_pelt_halflife(void)
37
+{
38
+ return pelt_load_avg_period;
39
+}
40
+EXPORT_SYMBOL_GPL(get_pelt_halflife);
41
+
42
+static int __set_pelt_halflife(void *data)
43
+{
44
+ int rc = 0;
45
+ int num = *(int *)data;
46
+
47
+ switch (num) {
48
+ case PELT8_LOAD_AVG_PERIOD:
49
+ pelt_load_avg_period = PELT8_LOAD_AVG_PERIOD;
50
+ pelt_load_avg_max = PELT8_LOAD_AVG_MAX;
51
+ pelt_runnable_avg_yN_inv = pelt8_runnable_avg_yN_inv;
52
+ pr_info("PELT half life is set to %dms\n", num);
53
+ break;
54
+ case PELT32_LOAD_AVG_PERIOD:
55
+ pelt_load_avg_period = PELT32_LOAD_AVG_PERIOD;
56
+ pelt_load_avg_max = PELT32_LOAD_AVG_MAX;
57
+ pelt_runnable_avg_yN_inv = pelt32_runnable_avg_yN_inv;
58
+ pr_info("PELT half life is set to %dms\n", num);
59
+ break;
60
+ default:
61
+ rc = -EINVAL;
62
+ pr_err("Failed to set PELT half life to %dms, the current value is %dms\n",
63
+ num, pelt_load_avg_period);
64
+ }
65
+
66
+ sysctl_sched_pelt_period = pelt_load_avg_period;
67
+
68
+ return rc;
69
+}
70
+
71
+int set_pelt_halflife(int num)
72
+{
73
+ return stop_machine(__set_pelt_halflife, &num, NULL);
74
+}
75
+EXPORT_SYMBOL_GPL(set_pelt_halflife);
76
+
77
+int sched_pelt_period_update_handler(struct ctl_table *table, int write,
78
+ void *buffer, size_t *lenp, loff_t *ppos)
79
+{
80
+ int ret = proc_dointvec(table, write, buffer, lenp, ppos);
81
+
82
+ if (ret || !write)
83
+ return ret;
84
+
85
+ set_pelt_halflife(sysctl_sched_pelt_period);
86
+
87
+ return 0;
88
+}
89
+
90
+static int __init set_pelt(char *str)
91
+{
92
+ int rc, num;
93
+
94
+ rc = kstrtoint(str, 0, &num);
95
+ if (rc) {
96
+ pr_err("%s: kstrtoint failed. rc=%d\n", __func__, rc);
97
+ return 0;
98
+ }
99
+
100
+ __set_pelt_halflife(&num);
101
+ return rc;
102
+}
103
+
104
+early_param("pelt", set_pelt);
32105
33106 /*
34107 * Approximate:
....@@ -56,7 +129,7 @@
56129 local_n %= LOAD_AVG_PERIOD;
57130 }
58131
59
- val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32);
132
+ val = mul_u64_u32_shr(val, pelt_runnable_avg_yN_inv[local_n], 32);
60133 return val;
61134 }
62135
....@@ -82,8 +155,6 @@
82155
83156 return c1 + c2 + c3;
84157 }
85
-
86
-#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
87158
88159 /*
89160 * Accumulate the three separate parts of the sum; d1 the remainder
....@@ -121,23 +192,35 @@
121192 */
122193 if (periods) {
123194 sa->load_sum = decay_load(sa->load_sum, periods);
124
- sa->runnable_load_sum =
125
- decay_load(sa->runnable_load_sum, periods);
195
+ sa->runnable_sum =
196
+ decay_load(sa->runnable_sum, periods);
126197 sa->util_sum = decay_load((u64)(sa->util_sum), periods);
127198
128199 /*
129200 * Step 2
130201 */
131202 delta %= 1024;
132
- contrib = __accumulate_pelt_segments(periods,
133
- 1024 - sa->period_contrib, delta);
203
+ if (load) {
204
+ /*
205
+ * This relies on the:
206
+ *
207
+ * if (!load)
208
+ * runnable = running = 0;
209
+ *
210
+ * clause from ___update_load_sum(); this results in
211
+ * the below usage of @contrib to dissapear entirely,
212
+ * so no point in calculating it.
213
+ */
214
+ contrib = __accumulate_pelt_segments(periods,
215
+ 1024 - sa->period_contrib, delta);
216
+ }
134217 }
135218 sa->period_contrib = delta;
136219
137220 if (load)
138221 sa->load_sum += load * contrib;
139222 if (runnable)
140
- sa->runnable_load_sum += runnable * contrib;
223
+ sa->runnable_sum += runnable * contrib << SCHED_CAPACITY_SHIFT;
141224 if (running)
142225 sa->util_sum += contrib << SCHED_CAPACITY_SHIFT;
143226
....@@ -205,7 +288,9 @@
205288 * This means that weight will be 0 but not running for a sched_entity
206289 * but also for a cfs_rq if the latter becomes idle. As an example,
207290 * this happens during idle_balance() which calls
208
- * update_blocked_averages()
291
+ * update_blocked_averages().
292
+ *
293
+ * Also see the comment in accumulate_sum().
209294 */
210295 if (!load)
211296 runnable = running = 0;
....@@ -223,16 +308,40 @@
223308 return 1;
224309 }
225310
311
+/*
312
+ * When syncing *_avg with *_sum, we must take into account the current
313
+ * position in the PELT segment otherwise the remaining part of the segment
314
+ * will be considered as idle time whereas it's not yet elapsed and this will
315
+ * generate unwanted oscillation in the range [1002..1024[.
316
+ *
317
+ * The max value of *_sum varies with the position in the time segment and is
318
+ * equals to :
319
+ *
320
+ * LOAD_AVG_MAX*y + sa->period_contrib
321
+ *
322
+ * which can be simplified into:
323
+ *
324
+ * LOAD_AVG_MAX - 1024 + sa->period_contrib
325
+ *
326
+ * because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024
327
+ *
328
+ * The same care must be taken when a sched entity is added, updated or
329
+ * removed from a cfs_rq and we need to update sched_avg. Scheduler entities
330
+ * and the cfs rq, to which they are attached, have the same position in the
331
+ * time segment because they use the same clock. This means that we can use
332
+ * the period_contrib of cfs_rq when updating the sched_avg of a sched_entity
333
+ * if it's more convenient.
334
+ */
226335 static __always_inline void
227
-___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable)
336
+___update_load_avg(struct sched_avg *sa, unsigned long load)
228337 {
229
- u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib;
338
+ u32 divider = get_pelt_divider(sa);
230339
231340 /*
232341 * Step 2: update *_avg.
233342 */
234343 sa->load_avg = div_u64(load * sa->load_sum, divider);
235
- sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider);
344
+ sa->runnable_avg = div_u64(sa->runnable_sum, divider);
236345 WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
237346 }
238347
....@@ -240,56 +349,48 @@
240349 * sched_entity:
241350 *
242351 * task:
243
- * se_runnable() == se_weight()
352
+ * se_weight() = se->load.weight
353
+ * se_runnable() = !!on_rq
244354 *
245355 * group: [ see update_cfs_group() ]
246356 * se_weight() = tg->weight * grq->load_avg / tg->load_avg
247
- * se_runnable() = se_weight(se) * grq->runnable_load_avg / grq->load_avg
357
+ * se_runnable() = grq->h_nr_running
248358 *
249
- * load_sum := runnable_sum
250
- * load_avg = se_weight(se) * runnable_avg
359
+ * runnable_sum = se_runnable() * runnable = grq->runnable_sum
360
+ * runnable_avg = runnable_sum
251361 *
252
- * runnable_load_sum := runnable_sum
253
- * runnable_load_avg = se_runnable(se) * runnable_avg
254
- *
255
- * XXX collapse load_sum and runnable_load_sum
362
+ * load_sum := runnable
363
+ * load_avg = se_weight(se) * load_sum
256364 *
257365 * cfq_rq:
258366 *
367
+ * runnable_sum = \Sum se->avg.runnable_sum
368
+ * runnable_avg = \Sum se->avg.runnable_avg
369
+ *
259370 * load_sum = \Sum se_weight(se) * se->avg.load_sum
260371 * load_avg = \Sum se->avg.load_avg
261
- *
262
- * runnable_load_sum = \Sum se_runnable(se) * se->avg.runnable_load_sum
263
- * runnable_load_avg = \Sum se->avg.runable_load_avg
264372 */
265373
266374 int __update_load_avg_blocked_se(u64 now, struct sched_entity *se)
267375 {
268376 if (___update_load_sum(now, &se->avg, 0, 0, 0)) {
269
- ___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
270
-
271
- trace_sched_load_se(se);
272
-
377
+ ___update_load_avg(&se->avg, se_weight(se));
378
+ trace_pelt_se_tp(se);
273379 return 1;
274380 }
275381
276382 return 0;
277383 }
384
+EXPORT_SYMBOL_GPL(__update_load_avg_blocked_se);
278385
279386 int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se)
280387 {
281
- if (___update_load_sum(now, &se->avg, !!se->on_rq, !!se->on_rq,
282
-#ifdef CONFIG_ROCKCHIP_SCHED_PERFORMANCE_BIAS
283
- (sysctl_sched_performance_bias && se->on_rq) || (cfs_rq->curr == se))) {
284
-#else
388
+ if (___update_load_sum(now, &se->avg, !!se->on_rq, se_runnable(se),
285389 cfs_rq->curr == se)) {
286
-#endif
287390
288
- ___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
391
+ ___update_load_avg(&se->avg, se_weight(se));
289392 cfs_se_util_change(&se->avg);
290
-
291
- trace_sched_load_se(se);
292
-
393
+ trace_pelt_se_tp(se);
293394 return 1;
294395 }
295396
....@@ -300,13 +401,11 @@
300401 {
301402 if (___update_load_sum(now, &cfs_rq->avg,
302403 scale_load_down(cfs_rq->load.weight),
303
- scale_load_down(cfs_rq->runnable_weight),
404
+ cfs_rq->h_nr_running,
304405 cfs_rq->curr != NULL)) {
305406
306
- ___update_load_avg(&cfs_rq->avg, 1, 1);
307
-
308
- trace_sched_load_cfs_rq(cfs_rq);
309
-
407
+ ___update_load_avg(&cfs_rq->avg, 1);
408
+ trace_pelt_cfs_tp(cfs_rq);
310409 return 1;
311410 }
312411
....@@ -318,9 +417,9 @@
318417 *
319418 * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
320419 * util_sum = cpu_scale * load_sum
321
- * runnable_load_sum = load_sum
420
+ * runnable_sum = util_sum
322421 *
323
- * load_avg and runnable_load_avg are not supported and meaningless.
422
+ * load_avg and runnable_avg are not supported and meaningless.
324423 *
325424 */
326425
....@@ -331,10 +430,8 @@
331430 running,
332431 running)) {
333432
334
- ___update_load_avg(&rq->avg_rt, 1, 1);
335
-
336
- trace_sched_load_rt_rq(rq);
337
-
433
+ ___update_load_avg(&rq->avg_rt, 1);
434
+ trace_pelt_rt_tp(rq);
338435 return 1;
339436 }
340437
....@@ -346,7 +443,9 @@
346443 *
347444 * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
348445 * util_sum = cpu_scale * load_sum
349
- * runnable_load_sum = load_sum
446
+ * runnable_sum = util_sum
447
+ *
448
+ * load_avg and runnable_avg are not supported and meaningless.
350449 *
351450 */
352451
....@@ -357,12 +456,44 @@
357456 running,
358457 running)) {
359458
360
- ___update_load_avg(&rq->avg_dl, 1, 1);
459
+ ___update_load_avg(&rq->avg_dl, 1);
460
+ trace_pelt_dl_tp(rq);
361461 return 1;
362462 }
363463
364464 return 0;
365465 }
466
+
467
+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
468
+/*
469
+ * thermal:
470
+ *
471
+ * load_sum = \Sum se->avg.load_sum but se->avg.load_sum is not tracked
472
+ *
473
+ * util_avg and runnable_load_avg are not supported and meaningless.
474
+ *
475
+ * Unlike rt/dl utilization tracking that track time spent by a cpu
476
+ * running a rt/dl task through util_avg, the average thermal pressure is
477
+ * tracked through load_avg. This is because thermal pressure signal is
478
+ * time weighted "delta" capacity unlike util_avg which is binary.
479
+ * "delta capacity" = actual capacity -
480
+ * capped capacity a cpu due to a thermal event.
481
+ */
482
+
483
+int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
484
+{
485
+ if (___update_load_sum(now, &rq->avg_thermal,
486
+ capacity,
487
+ capacity,
488
+ capacity)) {
489
+ ___update_load_avg(&rq->avg_thermal, 1);
490
+ trace_pelt_thermal_tp(rq);
491
+ return 1;
492
+ }
493
+
494
+ return 0;
495
+}
496
+#endif
366497
367498 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
368499 /*
....@@ -370,7 +501,9 @@
370501 *
371502 * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
372503 * util_sum = cpu_scale * load_sum
373
- * runnable_load_sum = load_sum
504
+ * runnable_sum = util_sum
505
+ *
506
+ * load_avg and runnable_avg are not supported and meaningless.
374507 *
375508 */
376509
....@@ -384,7 +517,7 @@
384517 * reflect the real amount of computation
385518 */
386519 running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq)));
387
- running = cap_scale(running, arch_scale_cpu_capacity(NULL, cpu_of(rq)));
520
+ running = cap_scale(running, arch_scale_cpu_capacity(cpu_of(rq)));
388521
389522 /*
390523 * We know the time that has been used by interrupt since last update
....@@ -406,9 +539,58 @@
406539 1,
407540 1);
408541
409
- if (ret)
410
- ___update_load_avg(&rq->avg_irq, 1, 1);
542
+ if (ret) {
543
+ ___update_load_avg(&rq->avg_irq, 1);
544
+ trace_pelt_irq_tp(rq);
545
+ }
411546
412547 return ret;
413548 }
414549 #endif
550
+
551
+#include <trace/hooks/sched.h>
552
+DEFINE_PER_CPU(u64, clock_task_mult);
553
+
554
+unsigned int sysctl_sched_pelt_multiplier = 1;
555
+__read_mostly unsigned int sched_pelt_lshift;
556
+
557
+int sched_pelt_multiplier(struct ctl_table *table, int write, void *buffer,
558
+ size_t *lenp, loff_t *ppos)
559
+{
560
+ static DEFINE_MUTEX(mutex);
561
+ unsigned int old;
562
+ int ret;
563
+
564
+ mutex_lock(&mutex);
565
+
566
+ old = sysctl_sched_pelt_multiplier;
567
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
568
+ if (ret)
569
+ goto undo;
570
+ if (!write)
571
+ goto done;
572
+
573
+ trace_android_vh_sched_pelt_multiplier(old, sysctl_sched_pelt_multiplier, &ret);
574
+ if (ret)
575
+ goto undo;
576
+
577
+ switch (sysctl_sched_pelt_multiplier) {
578
+ case 1:
579
+ fallthrough;
580
+ case 2:
581
+ fallthrough;
582
+ case 4:
583
+ WRITE_ONCE(sched_pelt_lshift,
584
+ sysctl_sched_pelt_multiplier >> 1);
585
+ goto done;
586
+ default:
587
+ ret = -EINVAL;
588
+ }
589
+
590
+undo:
591
+ sysctl_sched_pelt_multiplier = old;
592
+done:
593
+ mutex_unlock(&mutex);
594
+
595
+ return ret;
596
+}