forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/kernel/sched/cputime.c
....@@ -1,8 +1,10 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Simple CPU accounting cgroup controller
34 */
45 #include <linux/cpufreq_times.h>
56 #include "sched.h"
7
+#include <trace/hooks/sched.h>
68
79 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
810
....@@ -18,6 +20,7 @@
1820 * compromise in place of having locks on each irq in account_system_time.
1921 */
2022 DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
23
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_irqtime);
2124
2225 static int sched_clock_irqtime;
2326
....@@ -44,12 +47,13 @@
4447 }
4548
4649 /*
47
- * Called before incrementing preempt_count on {soft,}irq_enter
50
+ * Called after incrementing preempt_count on {soft,}irq_enter
4851 * and before decrementing preempt_count on {soft,}irq_exit.
4952 */
50
-void irqtime_account_irq(struct task_struct *curr)
53
+void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
5154 {
5255 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
56
+ unsigned int pc;
5357 s64 delta;
5458 int cpu;
5559
....@@ -59,6 +63,7 @@
5963 cpu = smp_processor_id();
6064 delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
6165 irqtime->irq_start_time += delta;
66
+ pc = irq_count() - offset;
6267
6368 /*
6469 * We do not account for softirq time from ksoftirqd here.
....@@ -66,10 +71,12 @@
6671 * in that case, so as not to confuse scheduler with a special task
6772 * that do not consume any time, but still wants to run.
6873 */
69
- if (hardirq_count())
74
+ if (pc & HARDIRQ_MASK)
7075 irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
71
- else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
76
+ else if ((pc & SOFTIRQ_OFFSET) && curr != this_cpu_ksoftirqd())
7277 irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
78
+
79
+ trace_android_rvh_account_irq(curr, cpu, delta);
7380 }
7481 EXPORT_SYMBOL_GPL(irqtime_account_irq);
7582
....@@ -361,7 +368,7 @@
361368 * softirq as those do not count in task exec_runtime any more.
362369 */
363370 static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
364
- struct rq *rq, int ticks)
371
+ int ticks)
365372 {
366373 u64 other, cputime = TICK_NSEC * ticks;
367374
....@@ -387,65 +394,59 @@
387394 account_system_index_time(p, cputime, CPUTIME_SOFTIRQ);
388395 } else if (user_tick) {
389396 account_user_time(p, cputime);
390
- } else if (p == rq->idle) {
397
+ } else if (p == this_rq()->idle) {
391398 account_idle_time(cputime);
392399 } else if (p->flags & PF_VCPU) { /* System time or guest time */
393400 account_guest_time(p, cputime);
394401 } else {
395402 account_system_index_time(p, cputime, CPUTIME_SYSTEM);
396403 }
404
+ trace_android_vh_irqtime_account_process_tick(p, this_rq(), user_tick, ticks);
397405 }
398406
399407 static void irqtime_account_idle_ticks(int ticks)
400408 {
401
- struct rq *rq = this_rq();
402
-
403
- irqtime_account_process_tick(current, 0, rq, ticks);
409
+ irqtime_account_process_tick(current, 0, ticks);
404410 }
405411 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
406412 static inline void irqtime_account_idle_ticks(int ticks) { }
407413 static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
408
- struct rq *rq, int nr_ticks) { }
414
+ int nr_ticks) { }
409415 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
410416
411417 /*
412418 * Use precise platform statistics if available:
413419 */
414
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
420
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
421
+
415422 # ifndef __ARCH_HAS_VTIME_TASK_SWITCH
416
-void vtime_common_task_switch(struct task_struct *prev)
423
+void vtime_task_switch(struct task_struct *prev)
417424 {
418425 if (is_idle_task(prev))
419426 vtime_account_idle(prev);
420427 else
421
- vtime_account_system(prev);
428
+ vtime_account_kernel(prev);
422429
423430 vtime_flush(prev);
424431 arch_vtime_task_switch(prev);
425432 }
426433 # endif
427
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
428434
429
-
430
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
431
-/*
432
- * Archs that account the whole time spent in the idle task
433
- * (outside irq) as idle time can rely on this and just implement
434
- * vtime_account_system() and vtime_account_idle(). Archs that
435
- * have other meaning of the idle time (s390 only includes the
436
- * time spent by the CPU when it's in low power mode) must override
437
- * vtime_account().
438
- */
439
-#ifndef __ARCH_HAS_VTIME_ACCOUNT
440
-void vtime_account_irq_enter(struct task_struct *tsk)
435
+void vtime_account_irq(struct task_struct *tsk, unsigned int offset)
441436 {
442
- if (!in_interrupt() && is_idle_task(tsk))
437
+ unsigned int pc = irq_count() - offset;
438
+
439
+ if (pc & HARDIRQ_OFFSET) {
440
+ vtime_account_hardirq(tsk);
441
+ } else if (pc & SOFTIRQ_OFFSET) {
442
+ vtime_account_softirq(tsk);
443
+ } else if (!IS_ENABLED(CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE) &&
444
+ is_idle_task(tsk)) {
443445 vtime_account_idle(tsk);
444
- else
445
- vtime_account_system(tsk);
446
+ } else {
447
+ vtime_account_kernel(tsk);
448
+ }
446449 }
447
-EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
448
-#endif /* __ARCH_HAS_VTIME_ACCOUNT */
449450
450451 void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
451452 u64 *ut, u64 *st)
....@@ -470,6 +471,7 @@
470471 *ut = cputime.utime;
471472 *st = cputime.stime;
472473 }
474
+EXPORT_SYMBOL_GPL(thread_group_cputime_adjusted);
473475
474476 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */
475477
....@@ -481,13 +483,13 @@
481483 void account_process_tick(struct task_struct *p, int user_tick)
482484 {
483485 u64 cputime, steal;
484
- struct rq *rq = this_rq();
485486
486
- if (vtime_accounting_cpu_enabled())
487
+ if (vtime_accounting_enabled_this_cpu())
487488 return;
489
+ trace_android_vh_account_task_time(p, this_rq(), user_tick);
488490
489491 if (sched_clock_irqtime) {
490
- irqtime_account_process_tick(p, user_tick, rq, 1);
492
+ irqtime_account_process_tick(p, user_tick, 1);
491493 return;
492494 }
493495
....@@ -501,7 +503,7 @@
501503
502504 if (user_tick)
503505 account_user_time(p, cputime);
504
- else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
506
+ else if ((p != this_rq()->idle) || (irq_count() != HARDIRQ_OFFSET))
505507 account_system_time(p, HARDIRQ_OFFSET, cputime);
506508 else
507509 account_idle_time(cputime);
....@@ -528,50 +530,6 @@
528530
529531 cputime -= steal;
530532 account_idle_time(cputime);
531
-}
532
-
533
-/*
534
- * Perform (stime * rtime) / total, but avoid multiplication overflow by
535
- * loosing precision when the numbers are big.
536
- */
537
-static u64 scale_stime(u64 stime, u64 rtime, u64 total)
538
-{
539
- u64 scaled;
540
-
541
- for (;;) {
542
- /* Make sure "rtime" is the bigger of stime/rtime */
543
- if (stime > rtime)
544
- swap(rtime, stime);
545
-
546
- /* Make sure 'total' fits in 32 bits */
547
- if (total >> 32)
548
- goto drop_precision;
549
-
550
- /* Does rtime (and thus stime) fit in 32 bits? */
551
- if (!(rtime >> 32))
552
- break;
553
-
554
- /* Can we just balance rtime/stime rather than dropping bits? */
555
- if (stime >> 31)
556
- goto drop_precision;
557
-
558
- /* We can grow stime and shrink rtime and try to make them both fit */
559
- stime <<= 1;
560
- rtime >>= 1;
561
- continue;
562
-
563
-drop_precision:
564
- /* We drop from rtime, it has more bits than stime */
565
- rtime >>= 1;
566
- total >>= 1;
567
- }
568
-
569
- /*
570
- * Make sure gcc understands that this is a 32x32->64 multiply,
571
- * followed by a 64/32->64 divide.
572
- */
573
- scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
574
- return scaled;
575533 }
576534
577535 /*
....@@ -633,7 +591,7 @@
633591 goto update;
634592 }
635593
636
- stime = scale_stime(stime, rtime, stime + utime);
594
+ stime = mul_u64_u64_div_u64(stime, rtime, stime + utime);
637595
638596 update:
639597 /*
....@@ -684,6 +642,8 @@
684642 thread_group_cputime(p, &cputime);
685643 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
686644 }
645
+EXPORT_SYMBOL_GPL(thread_group_cputime_adjusted);
646
+
687647 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
688648
689649 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
....@@ -717,8 +677,8 @@
717677 return delta - other;
718678 }
719679
720
-static void __vtime_account_system(struct task_struct *tsk,
721
- struct vtime *vtime)
680
+static void vtime_account_system(struct task_struct *tsk,
681
+ struct vtime *vtime)
722682 {
723683 vtime->stime += get_vtime_delta(vtime);
724684 if (vtime->stime >= TICK_NSEC) {
....@@ -737,7 +697,17 @@
737697 }
738698 }
739699
740
-void vtime_account_system(struct task_struct *tsk)
700
+static void __vtime_account_kernel(struct task_struct *tsk,
701
+ struct vtime *vtime)
702
+{
703
+ /* We might have scheduled out from guest path */
704
+ if (vtime->state == VTIME_GUEST)
705
+ vtime_account_guest(tsk, vtime);
706
+ else
707
+ vtime_account_system(tsk, vtime);
708
+}
709
+
710
+void vtime_account_kernel(struct task_struct *tsk)
741711 {
742712 struct vtime *vtime = &tsk->vtime;
743713
....@@ -745,11 +715,7 @@
745715 return;
746716
747717 write_seqcount_begin(&vtime->seqcount);
748
- /* We might have scheduled out from guest path */
749
- if (tsk->flags & PF_VCPU)
750
- vtime_account_guest(tsk, vtime);
751
- else
752
- __vtime_account_system(tsk, vtime);
718
+ __vtime_account_kernel(tsk, vtime);
753719 write_seqcount_end(&vtime->seqcount);
754720 }
755721
....@@ -758,7 +724,7 @@
758724 struct vtime *vtime = &tsk->vtime;
759725
760726 write_seqcount_begin(&vtime->seqcount);
761
- __vtime_account_system(tsk, vtime);
727
+ vtime_account_system(tsk, vtime);
762728 vtime->state = VTIME_USER;
763729 write_seqcount_end(&vtime->seqcount);
764730 }
....@@ -788,8 +754,9 @@
788754 * that can thus safely catch up with a tickless delta.
789755 */
790756 write_seqcount_begin(&vtime->seqcount);
791
- __vtime_account_system(tsk, vtime);
757
+ vtime_account_system(tsk, vtime);
792758 tsk->flags |= PF_VCPU;
759
+ vtime->state = VTIME_GUEST;
793760 write_seqcount_end(&vtime->seqcount);
794761 }
795762 EXPORT_SYMBOL_GPL(vtime_guest_enter);
....@@ -801,6 +768,7 @@
801768 write_seqcount_begin(&vtime->seqcount);
802769 vtime_account_guest(tsk, vtime);
803770 tsk->flags &= ~PF_VCPU;
771
+ vtime->state = VTIME_SYS;
804772 write_seqcount_end(&vtime->seqcount);
805773 }
806774 EXPORT_SYMBOL_GPL(vtime_guest_exit);
....@@ -810,19 +778,30 @@
810778 account_idle_time(get_vtime_delta(&tsk->vtime));
811779 }
812780
813
-void arch_vtime_task_switch(struct task_struct *prev)
781
+void vtime_task_switch_generic(struct task_struct *prev)
814782 {
815783 struct vtime *vtime = &prev->vtime;
816784
817785 write_seqcount_begin(&vtime->seqcount);
786
+ if (vtime->state == VTIME_IDLE)
787
+ vtime_account_idle(prev);
788
+ else
789
+ __vtime_account_kernel(prev, vtime);
818790 vtime->state = VTIME_INACTIVE;
791
+ vtime->cpu = -1;
819792 write_seqcount_end(&vtime->seqcount);
820793
821794 vtime = &current->vtime;
822795
823796 write_seqcount_begin(&vtime->seqcount);
824
- vtime->state = VTIME_SYS;
797
+ if (is_idle_task(current))
798
+ vtime->state = VTIME_IDLE;
799
+ else if (current->flags & PF_VCPU)
800
+ vtime->state = VTIME_GUEST;
801
+ else
802
+ vtime->state = VTIME_SYS;
825803 vtime->starttime = sched_clock();
804
+ vtime->cpu = smp_processor_id();
826805 write_seqcount_end(&vtime->seqcount);
827806 }
828807
....@@ -833,8 +812,9 @@
833812
834813 local_irq_save(flags);
835814 write_seqcount_begin(&vtime->seqcount);
836
- vtime->state = VTIME_SYS;
815
+ vtime->state = VTIME_IDLE;
837816 vtime->starttime = sched_clock();
817
+ vtime->cpu = cpu;
838818 write_seqcount_end(&vtime->seqcount);
839819 local_irq_restore(flags);
840820 }
....@@ -852,7 +832,7 @@
852832 seq = read_seqcount_begin(&vtime->seqcount);
853833
854834 gtime = t->gtime;
855
- if (vtime->state == VTIME_SYS && t->flags & PF_VCPU)
835
+ if (vtime->state == VTIME_GUEST)
856836 gtime += vtime->gtime + vtime_delta(vtime);
857837
858838 } while (read_seqcount_retry(&vtime->seqcount, seq));
....@@ -883,20 +863,233 @@
883863 *utime = t->utime;
884864 *stime = t->stime;
885865
886
- /* Task is sleeping, nothing to add */
887
- if (vtime->state == VTIME_INACTIVE || is_idle_task(t))
866
+ /* Task is sleeping or idle, nothing to add */
867
+ if (vtime->state < VTIME_SYS)
888868 continue;
889869
890870 delta = vtime_delta(vtime);
891871
892872 /*
893
- * Task runs either in user or kernel space, add pending nohz time to
894
- * the right place.
873
+ * Task runs either in user (including guest) or kernel space,
874
+ * add pending nohz time to the right place.
895875 */
896
- if (vtime->state == VTIME_USER || t->flags & PF_VCPU)
897
- *utime += vtime->utime + delta;
898
- else if (vtime->state == VTIME_SYS)
876
+ if (vtime->state == VTIME_SYS)
899877 *stime += vtime->stime + delta;
878
+ else
879
+ *utime += vtime->utime + delta;
900880 } while (read_seqcount_retry(&vtime->seqcount, seq));
901881 }
882
+
883
+static int vtime_state_fetch(struct vtime *vtime, int cpu)
884
+{
885
+ int state = READ_ONCE(vtime->state);
886
+
887
+ /*
888
+ * We raced against a context switch, fetch the
889
+ * kcpustat task again.
890
+ */
891
+ if (vtime->cpu != cpu && vtime->cpu != -1)
892
+ return -EAGAIN;
893
+
894
+ /*
895
+ * Two possible things here:
896
+ * 1) We are seeing the scheduling out task (prev) or any past one.
897
+ * 2) We are seeing the scheduling in task (next) but it hasn't
898
+ * passed though vtime_task_switch() yet so the pending
899
+ * cputime of the prev task may not be flushed yet.
900
+ *
901
+ * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
902
+ */
903
+ if (state == VTIME_INACTIVE)
904
+ return -EAGAIN;
905
+
906
+ return state;
907
+}
908
+
909
+static u64 kcpustat_user_vtime(struct vtime *vtime)
910
+{
911
+ if (vtime->state == VTIME_USER)
912
+ return vtime->utime + vtime_delta(vtime);
913
+ else if (vtime->state == VTIME_GUEST)
914
+ return vtime->gtime + vtime_delta(vtime);
915
+ return 0;
916
+}
917
+
918
+static int kcpustat_field_vtime(u64 *cpustat,
919
+ struct task_struct *tsk,
920
+ enum cpu_usage_stat usage,
921
+ int cpu, u64 *val)
922
+{
923
+ struct vtime *vtime = &tsk->vtime;
924
+ unsigned int seq;
925
+
926
+ do {
927
+ int state;
928
+
929
+ seq = read_seqcount_begin(&vtime->seqcount);
930
+
931
+ state = vtime_state_fetch(vtime, cpu);
932
+ if (state < 0)
933
+ return state;
934
+
935
+ *val = cpustat[usage];
936
+
937
+ /*
938
+ * Nice VS unnice cputime accounting may be inaccurate if
939
+ * the nice value has changed since the last vtime update.
940
+ * But proper fix would involve interrupting target on nice
941
+ * updates which is a no go on nohz_full (although the scheduler
942
+ * may still interrupt the target if rescheduling is needed...)
943
+ */
944
+ switch (usage) {
945
+ case CPUTIME_SYSTEM:
946
+ if (state == VTIME_SYS)
947
+ *val += vtime->stime + vtime_delta(vtime);
948
+ break;
949
+ case CPUTIME_USER:
950
+ if (task_nice(tsk) <= 0)
951
+ *val += kcpustat_user_vtime(vtime);
952
+ break;
953
+ case CPUTIME_NICE:
954
+ if (task_nice(tsk) > 0)
955
+ *val += kcpustat_user_vtime(vtime);
956
+ break;
957
+ case CPUTIME_GUEST:
958
+ if (state == VTIME_GUEST && task_nice(tsk) <= 0)
959
+ *val += vtime->gtime + vtime_delta(vtime);
960
+ break;
961
+ case CPUTIME_GUEST_NICE:
962
+ if (state == VTIME_GUEST && task_nice(tsk) > 0)
963
+ *val += vtime->gtime + vtime_delta(vtime);
964
+ break;
965
+ default:
966
+ break;
967
+ }
968
+ } while (read_seqcount_retry(&vtime->seqcount, seq));
969
+
970
+ return 0;
971
+}
972
+
973
+u64 kcpustat_field(struct kernel_cpustat *kcpustat,
974
+ enum cpu_usage_stat usage, int cpu)
975
+{
976
+ u64 *cpustat = kcpustat->cpustat;
977
+ u64 val = cpustat[usage];
978
+ struct rq *rq;
979
+ int err;
980
+
981
+ if (!vtime_accounting_enabled_cpu(cpu))
982
+ return val;
983
+
984
+ rq = cpu_rq(cpu);
985
+
986
+ for (;;) {
987
+ struct task_struct *curr;
988
+
989
+ rcu_read_lock();
990
+ curr = rcu_dereference(rq->curr);
991
+ if (WARN_ON_ONCE(!curr)) {
992
+ rcu_read_unlock();
993
+ return cpustat[usage];
994
+ }
995
+
996
+ err = kcpustat_field_vtime(cpustat, curr, usage, cpu, &val);
997
+ rcu_read_unlock();
998
+
999
+ if (!err)
1000
+ return val;
1001
+
1002
+ cpu_relax();
1003
+ }
1004
+}
1005
+EXPORT_SYMBOL_GPL(kcpustat_field);
1006
+
1007
+static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
1008
+ const struct kernel_cpustat *src,
1009
+ struct task_struct *tsk, int cpu)
1010
+{
1011
+ struct vtime *vtime = &tsk->vtime;
1012
+ unsigned int seq;
1013
+
1014
+ do {
1015
+ u64 *cpustat;
1016
+ u64 delta;
1017
+ int state;
1018
+
1019
+ seq = read_seqcount_begin(&vtime->seqcount);
1020
+
1021
+ state = vtime_state_fetch(vtime, cpu);
1022
+ if (state < 0)
1023
+ return state;
1024
+
1025
+ *dst = *src;
1026
+ cpustat = dst->cpustat;
1027
+
1028
+ /* Task is sleeping, dead or idle, nothing to add */
1029
+ if (state < VTIME_SYS)
1030
+ continue;
1031
+
1032
+ delta = vtime_delta(vtime);
1033
+
1034
+ /*
1035
+ * Task runs either in user (including guest) or kernel space,
1036
+ * add pending nohz time to the right place.
1037
+ */
1038
+ if (state == VTIME_SYS) {
1039
+ cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
1040
+ } else if (state == VTIME_USER) {
1041
+ if (task_nice(tsk) > 0)
1042
+ cpustat[CPUTIME_NICE] += vtime->utime + delta;
1043
+ else
1044
+ cpustat[CPUTIME_USER] += vtime->utime + delta;
1045
+ } else {
1046
+ WARN_ON_ONCE(state != VTIME_GUEST);
1047
+ if (task_nice(tsk) > 0) {
1048
+ cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
1049
+ cpustat[CPUTIME_NICE] += vtime->gtime + delta;
1050
+ } else {
1051
+ cpustat[CPUTIME_GUEST] += vtime->gtime + delta;
1052
+ cpustat[CPUTIME_USER] += vtime->gtime + delta;
1053
+ }
1054
+ }
1055
+ } while (read_seqcount_retry(&vtime->seqcount, seq));
1056
+
1057
+ return 0;
1058
+}
1059
+
1060
+void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
1061
+{
1062
+ const struct kernel_cpustat *src = &kcpustat_cpu(cpu);
1063
+ struct rq *rq;
1064
+ int err;
1065
+
1066
+ if (!vtime_accounting_enabled_cpu(cpu)) {
1067
+ *dst = *src;
1068
+ return;
1069
+ }
1070
+
1071
+ rq = cpu_rq(cpu);
1072
+
1073
+ for (;;) {
1074
+ struct task_struct *curr;
1075
+
1076
+ rcu_read_lock();
1077
+ curr = rcu_dereference(rq->curr);
1078
+ if (WARN_ON_ONCE(!curr)) {
1079
+ rcu_read_unlock();
1080
+ *dst = *src;
1081
+ return;
1082
+ }
1083
+
1084
+ err = kcpustat_cpu_fetch_vtime(dst, src, curr, cpu);
1085
+ rcu_read_unlock();
1086
+
1087
+ if (!err)
1088
+ return;
1089
+
1090
+ cpu_relax();
1091
+ }
1092
+}
1093
+EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
1094
+
9021095 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */