.. | .. |
---|
28 | 28 | #include "sched.h" |
---|
29 | 29 | #include "pelt.h" |
---|
30 | 30 | |
---|
31 | | -#include <trace/events/sched.h> |
---|
| 31 | +int pelt_load_avg_period = PELT32_LOAD_AVG_PERIOD; |
---|
| 32 | +int sysctl_sched_pelt_period = PELT32_LOAD_AVG_PERIOD; |
---|
| 33 | +int pelt_load_avg_max = PELT32_LOAD_AVG_MAX; |
---|
| 34 | +const u32 *pelt_runnable_avg_yN_inv = pelt32_runnable_avg_yN_inv; |
---|
| 35 | + |
---|
| 36 | +int get_pelt_halflife(void) |
---|
| 37 | +{ |
---|
| 38 | + return pelt_load_avg_period; |
---|
| 39 | +} |
---|
| 40 | +EXPORT_SYMBOL_GPL(get_pelt_halflife); |
---|
| 41 | + |
---|
| 42 | +static int __set_pelt_halflife(void *data) |
---|
| 43 | +{ |
---|
| 44 | + int rc = 0; |
---|
| 45 | + int num = *(int *)data; |
---|
| 46 | + |
---|
| 47 | + switch (num) { |
---|
| 48 | + case PELT8_LOAD_AVG_PERIOD: |
---|
| 49 | + pelt_load_avg_period = PELT8_LOAD_AVG_PERIOD; |
---|
| 50 | + pelt_load_avg_max = PELT8_LOAD_AVG_MAX; |
---|
| 51 | + pelt_runnable_avg_yN_inv = pelt8_runnable_avg_yN_inv; |
---|
| 52 | + pr_info("PELT half life is set to %dms\n", num); |
---|
| 53 | + break; |
---|
| 54 | + case PELT32_LOAD_AVG_PERIOD: |
---|
| 55 | + pelt_load_avg_period = PELT32_LOAD_AVG_PERIOD; |
---|
| 56 | + pelt_load_avg_max = PELT32_LOAD_AVG_MAX; |
---|
| 57 | + pelt_runnable_avg_yN_inv = pelt32_runnable_avg_yN_inv; |
---|
| 58 | + pr_info("PELT half life is set to %dms\n", num); |
---|
| 59 | + break; |
---|
| 60 | + default: |
---|
| 61 | + rc = -EINVAL; |
---|
| 62 | + pr_err("Failed to set PELT half life to %dms, the current value is %dms\n", |
---|
| 63 | + num, pelt_load_avg_period); |
---|
| 64 | + } |
---|
| 65 | + |
---|
| 66 | + sysctl_sched_pelt_period = pelt_load_avg_period; |
---|
| 67 | + |
---|
| 68 | + return rc; |
---|
| 69 | +} |
---|
| 70 | + |
---|
| 71 | +int set_pelt_halflife(int num) |
---|
| 72 | +{ |
---|
| 73 | + return stop_machine(__set_pelt_halflife, &num, NULL); |
---|
| 74 | +} |
---|
| 75 | +EXPORT_SYMBOL_GPL(set_pelt_halflife); |
---|
| 76 | + |
---|
| 77 | +int sched_pelt_period_update_handler(struct ctl_table *table, int write, |
---|
| 78 | + void *buffer, size_t *lenp, loff_t *ppos) |
---|
| 79 | +{ |
---|
| 80 | + int ret = proc_dointvec(table, write, buffer, lenp, ppos); |
---|
| 81 | + |
---|
| 82 | + if (ret || !write) |
---|
| 83 | + return ret; |
---|
| 84 | + |
---|
| 85 | + set_pelt_halflife(sysctl_sched_pelt_period); |
---|
| 86 | + |
---|
| 87 | + return 0; |
---|
| 88 | +} |
---|
| 89 | + |
---|
| 90 | +static int __init set_pelt(char *str) |
---|
| 91 | +{ |
---|
| 92 | + int rc, num; |
---|
| 93 | + |
---|
| 94 | + rc = kstrtoint(str, 0, &num); |
---|
| 95 | + if (rc) { |
---|
| 96 | + pr_err("%s: kstrtoint failed. rc=%d\n", __func__, rc); |
---|
| 97 | + return 0; |
---|
| 98 | + } |
---|
| 99 | + |
---|
| 100 | + __set_pelt_halflife(&num); |
---|
| 101 | + return rc; |
---|
| 102 | +} |
---|
| 103 | + |
---|
| 104 | +early_param("pelt", set_pelt); |
---|
32 | 105 | |
---|
33 | 106 | /* |
---|
34 | 107 | * Approximate: |
---|
.. | .. |
---|
56 | 129 | local_n %= LOAD_AVG_PERIOD; |
---|
57 | 130 | } |
---|
58 | 131 | |
---|
59 | | - val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32); |
---|
| 132 | + val = mul_u64_u32_shr(val, pelt_runnable_avg_yN_inv[local_n], 32); |
---|
60 | 133 | return val; |
---|
61 | 134 | } |
---|
62 | 135 | |
---|
.. | .. |
---|
82 | 155 | |
---|
83 | 156 | return c1 + c2 + c3; |
---|
84 | 157 | } |
---|
85 | | - |
---|
86 | | -#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) |
---|
87 | 158 | |
---|
88 | 159 | /* |
---|
89 | 160 | * Accumulate the three separate parts of the sum; d1 the remainder |
---|
.. | .. |
---|
121 | 192 | */ |
---|
122 | 193 | if (periods) { |
---|
123 | 194 | sa->load_sum = decay_load(sa->load_sum, periods); |
---|
124 | | - sa->runnable_load_sum = |
---|
125 | | - decay_load(sa->runnable_load_sum, periods); |
---|
| 195 | + sa->runnable_sum = |
---|
| 196 | + decay_load(sa->runnable_sum, periods); |
---|
126 | 197 | sa->util_sum = decay_load((u64)(sa->util_sum), periods); |
---|
127 | 198 | |
---|
128 | 199 | /* |
---|
129 | 200 | * Step 2 |
---|
130 | 201 | */ |
---|
131 | 202 | delta %= 1024; |
---|
132 | | - contrib = __accumulate_pelt_segments(periods, |
---|
133 | | - 1024 - sa->period_contrib, delta); |
---|
| 203 | + if (load) { |
---|
| 204 | + /* |
---|
| 205 | + * This relies on the: |
---|
| 206 | + * |
---|
| 207 | + * if (!load) |
---|
| 208 | + * runnable = running = 0; |
---|
| 209 | + * |
---|
| 210 | + * clause from ___update_load_sum(); this results in |
---|
| 211 | + * the below usage of @contrib to dissapear entirely, |
---|
| 212 | + * so no point in calculating it. |
---|
| 213 | + */ |
---|
| 214 | + contrib = __accumulate_pelt_segments(periods, |
---|
| 215 | + 1024 - sa->period_contrib, delta); |
---|
| 216 | + } |
---|
134 | 217 | } |
---|
135 | 218 | sa->period_contrib = delta; |
---|
136 | 219 | |
---|
137 | 220 | if (load) |
---|
138 | 221 | sa->load_sum += load * contrib; |
---|
139 | 222 | if (runnable) |
---|
140 | | - sa->runnable_load_sum += runnable * contrib; |
---|
| 223 | + sa->runnable_sum += runnable * contrib << SCHED_CAPACITY_SHIFT; |
---|
141 | 224 | if (running) |
---|
142 | 225 | sa->util_sum += contrib << SCHED_CAPACITY_SHIFT; |
---|
143 | 226 | |
---|
.. | .. |
---|
205 | 288 | * This means that weight will be 0 but not running for a sched_entity |
---|
206 | 289 | * but also for a cfs_rq if the latter becomes idle. As an example, |
---|
207 | 290 | * this happens during idle_balance() which calls |
---|
208 | | - * update_blocked_averages() |
---|
| 291 | + * update_blocked_averages(). |
---|
| 292 | + * |
---|
| 293 | + * Also see the comment in accumulate_sum(). |
---|
209 | 294 | */ |
---|
210 | 295 | if (!load) |
---|
211 | 296 | runnable = running = 0; |
---|
.. | .. |
---|
223 | 308 | return 1; |
---|
224 | 309 | } |
---|
225 | 310 | |
---|
| 311 | +/* |
---|
| 312 | + * When syncing *_avg with *_sum, we must take into account the current |
---|
| 313 | + * position in the PELT segment otherwise the remaining part of the segment |
---|
| 314 | + * will be considered as idle time whereas it's not yet elapsed and this will |
---|
| 315 | + * generate unwanted oscillation in the range [1002..1024[. |
---|
| 316 | + * |
---|
| 317 | + * The max value of *_sum varies with the position in the time segment and is |
---|
| 318 | + * equals to : |
---|
| 319 | + * |
---|
| 320 | + * LOAD_AVG_MAX*y + sa->period_contrib |
---|
| 321 | + * |
---|
| 322 | + * which can be simplified into: |
---|
| 323 | + * |
---|
| 324 | + * LOAD_AVG_MAX - 1024 + sa->period_contrib |
---|
| 325 | + * |
---|
| 326 | + * because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024 |
---|
| 327 | + * |
---|
| 328 | + * The same care must be taken when a sched entity is added, updated or |
---|
| 329 | + * removed from a cfs_rq and we need to update sched_avg. Scheduler entities |
---|
| 330 | + * and the cfs rq, to which they are attached, have the same position in the |
---|
| 331 | + * time segment because they use the same clock. This means that we can use |
---|
| 332 | + * the period_contrib of cfs_rq when updating the sched_avg of a sched_entity |
---|
| 333 | + * if it's more convenient. |
---|
| 334 | + */ |
---|
226 | 335 | static __always_inline void |
---|
227 | | -___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable) |
---|
| 336 | +___update_load_avg(struct sched_avg *sa, unsigned long load) |
---|
228 | 337 | { |
---|
229 | | - u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib; |
---|
| 338 | + u32 divider = get_pelt_divider(sa); |
---|
230 | 339 | |
---|
231 | 340 | /* |
---|
232 | 341 | * Step 2: update *_avg. |
---|
233 | 342 | */ |
---|
234 | 343 | sa->load_avg = div_u64(load * sa->load_sum, divider); |
---|
235 | | - sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider); |
---|
| 344 | + sa->runnable_avg = div_u64(sa->runnable_sum, divider); |
---|
236 | 345 | WRITE_ONCE(sa->util_avg, sa->util_sum / divider); |
---|
237 | 346 | } |
---|
238 | 347 | |
---|
.. | .. |
---|
240 | 349 | * sched_entity: |
---|
241 | 350 | * |
---|
242 | 351 | * task: |
---|
243 | | - * se_runnable() == se_weight() |
---|
| 352 | + * se_weight() = se->load.weight |
---|
| 353 | + * se_runnable() = !!on_rq |
---|
244 | 354 | * |
---|
245 | 355 | * group: [ see update_cfs_group() ] |
---|
246 | 356 | * se_weight() = tg->weight * grq->load_avg / tg->load_avg |
---|
247 | | - * se_runnable() = se_weight(se) * grq->runnable_load_avg / grq->load_avg |
---|
| 357 | + * se_runnable() = grq->h_nr_running |
---|
248 | 358 | * |
---|
249 | | - * load_sum := runnable_sum |
---|
250 | | - * load_avg = se_weight(se) * runnable_avg |
---|
| 359 | + * runnable_sum = se_runnable() * runnable = grq->runnable_sum |
---|
| 360 | + * runnable_avg = runnable_sum |
---|
251 | 361 | * |
---|
252 | | - * runnable_load_sum := runnable_sum |
---|
253 | | - * runnable_load_avg = se_runnable(se) * runnable_avg |
---|
254 | | - * |
---|
255 | | - * XXX collapse load_sum and runnable_load_sum |
---|
| 362 | + * load_sum := runnable |
---|
| 363 | + * load_avg = se_weight(se) * load_sum |
---|
256 | 364 | * |
---|
257 | 365 | * cfq_rq: |
---|
258 | 366 | * |
---|
| 367 | + * runnable_sum = \Sum se->avg.runnable_sum |
---|
| 368 | + * runnable_avg = \Sum se->avg.runnable_avg |
---|
| 369 | + * |
---|
259 | 370 | * load_sum = \Sum se_weight(se) * se->avg.load_sum |
---|
260 | 371 | * load_avg = \Sum se->avg.load_avg |
---|
261 | | - * |
---|
262 | | - * runnable_load_sum = \Sum se_runnable(se) * se->avg.runnable_load_sum |
---|
263 | | - * runnable_load_avg = \Sum se->avg.runable_load_avg |
---|
264 | 372 | */ |
---|
265 | 373 | |
---|
266 | 374 | int __update_load_avg_blocked_se(u64 now, struct sched_entity *se) |
---|
267 | 375 | { |
---|
268 | 376 | if (___update_load_sum(now, &se->avg, 0, 0, 0)) { |
---|
269 | | - ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); |
---|
270 | | - |
---|
271 | | - trace_sched_load_se(se); |
---|
272 | | - |
---|
| 377 | + ___update_load_avg(&se->avg, se_weight(se)); |
---|
| 378 | + trace_pelt_se_tp(se); |
---|
273 | 379 | return 1; |
---|
274 | 380 | } |
---|
275 | 381 | |
---|
276 | 382 | return 0; |
---|
277 | 383 | } |
---|
| 384 | +EXPORT_SYMBOL_GPL(__update_load_avg_blocked_se); |
---|
278 | 385 | |
---|
279 | 386 | int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se) |
---|
280 | 387 | { |
---|
281 | | - if (___update_load_sum(now, &se->avg, !!se->on_rq, !!se->on_rq, |
---|
282 | | -#ifdef CONFIG_ROCKCHIP_SCHED_PERFORMANCE_BIAS |
---|
283 | | - (sysctl_sched_performance_bias && se->on_rq) || (cfs_rq->curr == se))) { |
---|
284 | | -#else |
---|
| 388 | + if (___update_load_sum(now, &se->avg, !!se->on_rq, se_runnable(se), |
---|
285 | 389 | cfs_rq->curr == se)) { |
---|
286 | | -#endif |
---|
287 | 390 | |
---|
288 | | - ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); |
---|
| 391 | + ___update_load_avg(&se->avg, se_weight(se)); |
---|
289 | 392 | cfs_se_util_change(&se->avg); |
---|
290 | | - |
---|
291 | | - trace_sched_load_se(se); |
---|
292 | | - |
---|
| 393 | + trace_pelt_se_tp(se); |
---|
293 | 394 | return 1; |
---|
294 | 395 | } |
---|
295 | 396 | |
---|
.. | .. |
---|
300 | 401 | { |
---|
301 | 402 | if (___update_load_sum(now, &cfs_rq->avg, |
---|
302 | 403 | scale_load_down(cfs_rq->load.weight), |
---|
303 | | - scale_load_down(cfs_rq->runnable_weight), |
---|
| 404 | + cfs_rq->h_nr_running, |
---|
304 | 405 | cfs_rq->curr != NULL)) { |
---|
305 | 406 | |
---|
306 | | - ___update_load_avg(&cfs_rq->avg, 1, 1); |
---|
307 | | - |
---|
308 | | - trace_sched_load_cfs_rq(cfs_rq); |
---|
309 | | - |
---|
| 407 | + ___update_load_avg(&cfs_rq->avg, 1); |
---|
| 408 | + trace_pelt_cfs_tp(cfs_rq); |
---|
310 | 409 | return 1; |
---|
311 | 410 | } |
---|
312 | 411 | |
---|
.. | .. |
---|
318 | 417 | * |
---|
319 | 418 | * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked |
---|
320 | 419 | * util_sum = cpu_scale * load_sum |
---|
321 | | - * runnable_load_sum = load_sum |
---|
| 420 | + * runnable_sum = util_sum |
---|
322 | 421 | * |
---|
323 | | - * load_avg and runnable_load_avg are not supported and meaningless. |
---|
| 422 | + * load_avg and runnable_avg are not supported and meaningless. |
---|
324 | 423 | * |
---|
325 | 424 | */ |
---|
326 | 425 | |
---|
.. | .. |
---|
331 | 430 | running, |
---|
332 | 431 | running)) { |
---|
333 | 432 | |
---|
334 | | - ___update_load_avg(&rq->avg_rt, 1, 1); |
---|
335 | | - |
---|
336 | | - trace_sched_load_rt_rq(rq); |
---|
337 | | - |
---|
| 433 | + ___update_load_avg(&rq->avg_rt, 1); |
---|
| 434 | + trace_pelt_rt_tp(rq); |
---|
338 | 435 | return 1; |
---|
339 | 436 | } |
---|
340 | 437 | |
---|
.. | .. |
---|
346 | 443 | * |
---|
347 | 444 | * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked |
---|
348 | 445 | * util_sum = cpu_scale * load_sum |
---|
349 | | - * runnable_load_sum = load_sum |
---|
| 446 | + * runnable_sum = util_sum |
---|
| 447 | + * |
---|
| 448 | + * load_avg and runnable_avg are not supported and meaningless. |
---|
350 | 449 | * |
---|
351 | 450 | */ |
---|
352 | 451 | |
---|
.. | .. |
---|
357 | 456 | running, |
---|
358 | 457 | running)) { |
---|
359 | 458 | |
---|
360 | | - ___update_load_avg(&rq->avg_dl, 1, 1); |
---|
| 459 | + ___update_load_avg(&rq->avg_dl, 1); |
---|
| 460 | + trace_pelt_dl_tp(rq); |
---|
361 | 461 | return 1; |
---|
362 | 462 | } |
---|
363 | 463 | |
---|
364 | 464 | return 0; |
---|
365 | 465 | } |
---|
| 466 | + |
---|
| 467 | +#ifdef CONFIG_SCHED_THERMAL_PRESSURE |
---|
| 468 | +/* |
---|
| 469 | + * thermal: |
---|
| 470 | + * |
---|
| 471 | + * load_sum = \Sum se->avg.load_sum but se->avg.load_sum is not tracked |
---|
| 472 | + * |
---|
| 473 | + * util_avg and runnable_load_avg are not supported and meaningless. |
---|
| 474 | + * |
---|
| 475 | + * Unlike rt/dl utilization tracking that track time spent by a cpu |
---|
| 476 | + * running a rt/dl task through util_avg, the average thermal pressure is |
---|
| 477 | + * tracked through load_avg. This is because thermal pressure signal is |
---|
| 478 | + * time weighted "delta" capacity unlike util_avg which is binary. |
---|
| 479 | + * "delta capacity" = actual capacity - |
---|
| 480 | + * capped capacity a cpu due to a thermal event. |
---|
| 481 | + */ |
---|
| 482 | + |
---|
| 483 | +int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) |
---|
| 484 | +{ |
---|
| 485 | + if (___update_load_sum(now, &rq->avg_thermal, |
---|
| 486 | + capacity, |
---|
| 487 | + capacity, |
---|
| 488 | + capacity)) { |
---|
| 489 | + ___update_load_avg(&rq->avg_thermal, 1); |
---|
| 490 | + trace_pelt_thermal_tp(rq); |
---|
| 491 | + return 1; |
---|
| 492 | + } |
---|
| 493 | + |
---|
| 494 | + return 0; |
---|
| 495 | +} |
---|
| 496 | +#endif |
---|
366 | 497 | |
---|
367 | 498 | #ifdef CONFIG_HAVE_SCHED_AVG_IRQ |
---|
368 | 499 | /* |
---|
.. | .. |
---|
370 | 501 | * |
---|
371 | 502 | * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked |
---|
372 | 503 | * util_sum = cpu_scale * load_sum |
---|
373 | | - * runnable_load_sum = load_sum |
---|
| 504 | + * runnable_sum = util_sum |
---|
| 505 | + * |
---|
| 506 | + * load_avg and runnable_avg are not supported and meaningless. |
---|
374 | 507 | * |
---|
375 | 508 | */ |
---|
376 | 509 | |
---|
.. | .. |
---|
384 | 517 | * reflect the real amount of computation |
---|
385 | 518 | */ |
---|
386 | 519 | running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq))); |
---|
387 | | - running = cap_scale(running, arch_scale_cpu_capacity(NULL, cpu_of(rq))); |
---|
| 520 | + running = cap_scale(running, arch_scale_cpu_capacity(cpu_of(rq))); |
---|
388 | 521 | |
---|
389 | 522 | /* |
---|
390 | 523 | * We know the time that has been used by interrupt since last update |
---|
.. | .. |
---|
406 | 539 | 1, |
---|
407 | 540 | 1); |
---|
408 | 541 | |
---|
409 | | - if (ret) |
---|
410 | | - ___update_load_avg(&rq->avg_irq, 1, 1); |
---|
| 542 | + if (ret) { |
---|
| 543 | + ___update_load_avg(&rq->avg_irq, 1); |
---|
| 544 | + trace_pelt_irq_tp(rq); |
---|
| 545 | + } |
---|
411 | 546 | |
---|
412 | 547 | return ret; |
---|
413 | 548 | } |
---|
414 | 549 | #endif |
---|
| 550 | + |
---|
| 551 | +#include <trace/hooks/sched.h> |
---|
| 552 | +DEFINE_PER_CPU(u64, clock_task_mult); |
---|
| 553 | + |
---|
| 554 | +unsigned int sysctl_sched_pelt_multiplier = 1; |
---|
| 555 | +__read_mostly unsigned int sched_pelt_lshift; |
---|
| 556 | + |
---|
| 557 | +int sched_pelt_multiplier(struct ctl_table *table, int write, void *buffer, |
---|
| 558 | + size_t *lenp, loff_t *ppos) |
---|
| 559 | +{ |
---|
| 560 | + static DEFINE_MUTEX(mutex); |
---|
| 561 | + unsigned int old; |
---|
| 562 | + int ret; |
---|
| 563 | + |
---|
| 564 | + mutex_lock(&mutex); |
---|
| 565 | + |
---|
| 566 | + old = sysctl_sched_pelt_multiplier; |
---|
| 567 | + ret = proc_dointvec(table, write, buffer, lenp, ppos); |
---|
| 568 | + if (ret) |
---|
| 569 | + goto undo; |
---|
| 570 | + if (!write) |
---|
| 571 | + goto done; |
---|
| 572 | + |
---|
| 573 | + trace_android_vh_sched_pelt_multiplier(old, sysctl_sched_pelt_multiplier, &ret); |
---|
| 574 | + if (ret) |
---|
| 575 | + goto undo; |
---|
| 576 | + |
---|
| 577 | + switch (sysctl_sched_pelt_multiplier) { |
---|
| 578 | + case 1: |
---|
| 579 | + fallthrough; |
---|
| 580 | + case 2: |
---|
| 581 | + fallthrough; |
---|
| 582 | + case 4: |
---|
| 583 | + WRITE_ONCE(sched_pelt_lshift, |
---|
| 584 | + sysctl_sched_pelt_multiplier >> 1); |
---|
| 585 | + goto done; |
---|
| 586 | + default: |
---|
| 587 | + ret = -EINVAL; |
---|
| 588 | + } |
---|
| 589 | + |
---|
| 590 | +undo: |
---|
| 591 | + sysctl_sched_pelt_multiplier = old; |
---|
| 592 | +done: |
---|
| 593 | + mutex_unlock(&mutex); |
---|
| 594 | + |
---|
| 595 | + return ret; |
---|
| 596 | +} |
---|