~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,8 +1,10 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* Simple CPU accounting cgroup controller
3	4	*/
4	5	#include <linux/cpufreq_times.h>
5	6	#include "sched.h"
	7	+#include <trace/hooks/sched.h>
6	8
7	9	#ifdef CONFIG_IRQ_TIME_ACCOUNTING
8	10
..	..	@@ -18,6 +20,7 @@
18	20	* compromise in place of having locks on each irq in account_system_time.
19	21	*/
20	22	DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
	23	+EXPORT_PER_CPU_SYMBOL_GPL(cpu_irqtime);
21	24
22	25	static int sched_clock_irqtime;
23	26
..	..	@@ -70,6 +73,8 @@
70	73	irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
71	74	else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
72	75	irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
	76	+
	77	+ trace_android_rvh_account_irq(curr, cpu, delta);
73	78	}
74	79	EXPORT_SYMBOL_GPL(irqtime_account_irq);
75	80
..	..	@@ -361,7 +366,7 @@
361	366	* softirq as those do not count in task exec_runtime any more.
362	367	*/
363	368	static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
364		- struct rq *rq, int ticks)
	369	+ int ticks)
365	370	{
366	371	u64 other, cputime = TICK_NSEC * ticks;
367	372
..	..	@@ -387,51 +392,48 @@
387	392	account_system_index_time(p, cputime, CPUTIME_SOFTIRQ);
388	393	} else if (user_tick) {
389	394	account_user_time(p, cputime);
390		- } else if (p == rq->idle) {
	395	+ } else if (p == this_rq()->idle) {
391	396	account_idle_time(cputime);
392	397	} else if (p->flags & PF_VCPU) { /* System time or guest time */
393	398	account_guest_time(p, cputime);
394	399	} else {
395	400	account_system_index_time(p, cputime, CPUTIME_SYSTEM);
396	401	}
	402	+ trace_android_vh_irqtime_account_process_tick(p, this_rq(), user_tick, ticks);
397	403	}
398	404
399	405	static void irqtime_account_idle_ticks(int ticks)
400	406	{
401		- struct rq *rq = this_rq();
402		-
403		- irqtime_account_process_tick(current, 0, rq, ticks);
	407	+ irqtime_account_process_tick(current, 0, ticks);
404	408	}
405	409	#else /* CONFIG_IRQ_TIME_ACCOUNTING */
406	410	static inline void irqtime_account_idle_ticks(int ticks) { }
407	411	static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
408		- struct rq *rq, int nr_ticks) { }
	412	+ int nr_ticks) { }
409	413	#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
410	414
411	415	/*
412	416	* Use precise platform statistics if available:
413	417	*/
414		-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
	418	+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
	419	+
415	420	# ifndef __ARCH_HAS_VTIME_TASK_SWITCH
416		-void vtime_common_task_switch(struct task_struct *prev)
	421	+void vtime_task_switch(struct task_struct *prev)
417	422	{
418	423	if (is_idle_task(prev))
419	424	vtime_account_idle(prev);
420	425	else
421		- vtime_account_system(prev);
	426	+ vtime_account_kernel(prev);
422	427
423	428	vtime_flush(prev);
424	429	arch_vtime_task_switch(prev);
425	430	}
426	431	# endif
427		-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
428	432
429		-
430		-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
431	433	/*
432	434	* Archs that account the whole time spent in the idle task
433	435	* (outside irq) as idle time can rely on this and just implement
434		- * vtime_account_system() and vtime_account_idle(). Archs that
	436	+ * vtime_account_kernel() and vtime_account_idle(). Archs that
435	437	* have other meaning of the idle time (s390 only includes the
436	438	* time spent by the CPU when it's in low power mode) must override
437	439	* vtime_account().
..	..	@@ -442,7 +444,7 @@
442	444	if (!in_interrupt() && is_idle_task(tsk))
443	445	vtime_account_idle(tsk);
444	446	else
445		- vtime_account_system(tsk);
	447	+ vtime_account_kernel(tsk);
446	448	}
447	449	EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
448	450	#endif /* __ARCH_HAS_VTIME_ACCOUNT */
..	..	@@ -470,6 +472,7 @@
470	472	*ut = cputime.utime;
471	473	*st = cputime.stime;
472	474	}
	475	+EXPORT_SYMBOL_GPL(thread_group_cputime_adjusted);
473	476
474	477	#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */
475	478
..	..	@@ -481,13 +484,13 @@
481	484	void account_process_tick(struct task_struct *p, int user_tick)
482	485	{
483	486	u64 cputime, steal;
484		- struct rq *rq = this_rq();
485	487
486		- if (vtime_accounting_cpu_enabled())
	488	+ if (vtime_accounting_enabled_this_cpu())
487	489	return;
	490	+ trace_android_vh_account_task_time(p, this_rq(), user_tick);
488	491
489	492	if (sched_clock_irqtime) {
490		- irqtime_account_process_tick(p, user_tick, rq, 1);
	493	+ irqtime_account_process_tick(p, user_tick, 1);
491	494	return;
492	495	}
493	496
..	..	@@ -501,7 +504,7 @@
501	504
502	505	if (user_tick)
503	506	account_user_time(p, cputime);
504		- else if ((p != rq->idle) \|\| (irq_count() != HARDIRQ_OFFSET))
	507	+ else if ((p != this_rq()->idle) \|\| (irq_count() != HARDIRQ_OFFSET))
505	508	account_system_time(p, HARDIRQ_OFFSET, cputime);
506	509	else
507	510	account_idle_time(cputime);
..	..	@@ -528,50 +531,6 @@
528	531
529	532	cputime -= steal;
530	533	account_idle_time(cputime);
531		-}
532		-
533		-/*
534		- * Perform (stime * rtime) / total, but avoid multiplication overflow by
535		- * loosing precision when the numbers are big.
536		- */
537		-static u64 scale_stime(u64 stime, u64 rtime, u64 total)
538		-{
539		- u64 scaled;
540		-
541		- for (;;) {
542		- /* Make sure "rtime" is the bigger of stime/rtime */
543		- if (stime > rtime)
544		- swap(rtime, stime);
545		-
546		- /* Make sure 'total' fits in 32 bits */
547		- if (total >> 32)
548		- goto drop_precision;
549		-
550		- /* Does rtime (and thus stime) fit in 32 bits? */
551		- if (!(rtime >> 32))
552		- break;
553		-
554		- /* Can we just balance rtime/stime rather than dropping bits? */
555		- if (stime >> 31)
556		- goto drop_precision;
557		-
558		- /* We can grow stime and shrink rtime and try to make them both fit */
559		- stime <<= 1;
560		- rtime >>= 1;
561		- continue;
562		-
563		-drop_precision:
564		- /* We drop from rtime, it has more bits than stime */
565		- rtime >>= 1;
566		- total >>= 1;
567		- }
568		-
569		- /*
570		- * Make sure gcc understands that this is a 32x32->64 multiply,
571		- * followed by a 64/32->64 divide.
572		- */
573		- scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
574		- return scaled;
575	534	}
576	535
577	536	/*
..	..	@@ -633,7 +592,7 @@
633	592	goto update;
634	593	}
635	594
636		- stime = scale_stime(stime, rtime, stime + utime);
	595	+ stime = mul_u64_u64_div_u64(stime, rtime, stime + utime);
637	596
638	597	update:
639	598	/*
..	..	@@ -684,6 +643,8 @@
684	643	thread_group_cputime(p, &cputime);
685	644	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
686	645	}
	646	+EXPORT_SYMBOL_GPL(thread_group_cputime_adjusted);
	647	+
687	648	#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
688	649
689	650	#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
..	..	@@ -717,8 +678,8 @@
717	678	return delta - other;
718	679	}
719	680
720		-static void __vtime_account_system(struct task_struct *tsk,
721		- struct vtime *vtime)
	681	+static void vtime_account_system(struct task_struct *tsk,
	682	+ struct vtime *vtime)
722	683	{
723	684	vtime->stime += get_vtime_delta(vtime);
724	685	if (vtime->stime >= TICK_NSEC) {
..	..	@@ -737,7 +698,17 @@
737	698	}
738	699	}
739	700
740		-void vtime_account_system(struct task_struct *tsk)
	701	+static void __vtime_account_kernel(struct task_struct *tsk,
	702	+ struct vtime *vtime)
	703	+{
	704	+ /* We might have scheduled out from guest path */
	705	+ if (vtime->state == VTIME_GUEST)
	706	+ vtime_account_guest(tsk, vtime);
	707	+ else
	708	+ vtime_account_system(tsk, vtime);
	709	+}
	710	+
	711	+void vtime_account_kernel(struct task_struct *tsk)
741	712	{
742	713	struct vtime *vtime = &tsk->vtime;
743	714
..	..	@@ -745,11 +716,7 @@
745	716	return;
746	717
747	718	write_seqcount_begin(&vtime->seqcount);
748		- /* We might have scheduled out from guest path */
749		- if (tsk->flags & PF_VCPU)
750		- vtime_account_guest(tsk, vtime);
751		- else
752		- __vtime_account_system(tsk, vtime);
	719	+ __vtime_account_kernel(tsk, vtime);
753	720	write_seqcount_end(&vtime->seqcount);
754	721	}
755	722
..	..	@@ -758,7 +725,7 @@
758	725	struct vtime *vtime = &tsk->vtime;
759	726
760	727	write_seqcount_begin(&vtime->seqcount);
761		- __vtime_account_system(tsk, vtime);
	728	+ vtime_account_system(tsk, vtime);
762	729	vtime->state = VTIME_USER;
763	730	write_seqcount_end(&vtime->seqcount);
764	731	}
..	..	@@ -788,8 +755,9 @@
788	755	* that can thus safely catch up with a tickless delta.
789	756	*/
790	757	write_seqcount_begin(&vtime->seqcount);
791		- __vtime_account_system(tsk, vtime);
	758	+ vtime_account_system(tsk, vtime);
792	759	tsk->flags \|= PF_VCPU;
	760	+ vtime->state = VTIME_GUEST;
793	761	write_seqcount_end(&vtime->seqcount);
794	762	}
795	763	EXPORT_SYMBOL_GPL(vtime_guest_enter);
..	..	@@ -801,6 +769,7 @@
801	769	write_seqcount_begin(&vtime->seqcount);
802	770	vtime_account_guest(tsk, vtime);
803	771	tsk->flags &= ~PF_VCPU;
	772	+ vtime->state = VTIME_SYS;
804	773	write_seqcount_end(&vtime->seqcount);
805	774	}
806	775	EXPORT_SYMBOL_GPL(vtime_guest_exit);
..	..	@@ -810,19 +779,30 @@
810	779	account_idle_time(get_vtime_delta(&tsk->vtime));
811	780	}
812	781
813		-void arch_vtime_task_switch(struct task_struct *prev)
	782	+void vtime_task_switch_generic(struct task_struct *prev)
814	783	{
815	784	struct vtime *vtime = &prev->vtime;
816	785
817	786	write_seqcount_begin(&vtime->seqcount);
	787	+ if (vtime->state == VTIME_IDLE)
	788	+ vtime_account_idle(prev);
	789	+ else
	790	+ __vtime_account_kernel(prev, vtime);
818	791	vtime->state = VTIME_INACTIVE;
	792	+ vtime->cpu = -1;
819	793	write_seqcount_end(&vtime->seqcount);
820	794
821	795	vtime = &current->vtime;
822	796
823	797	write_seqcount_begin(&vtime->seqcount);
824		- vtime->state = VTIME_SYS;
	798	+ if (is_idle_task(current))
	799	+ vtime->state = VTIME_IDLE;
	800	+ else if (current->flags & PF_VCPU)
	801	+ vtime->state = VTIME_GUEST;
	802	+ else
	803	+ vtime->state = VTIME_SYS;
825	804	vtime->starttime = sched_clock();
	805	+ vtime->cpu = smp_processor_id();
826	806	write_seqcount_end(&vtime->seqcount);
827	807	}
828	808
..	..	@@ -833,8 +813,9 @@
833	813
834	814	local_irq_save(flags);
835	815	write_seqcount_begin(&vtime->seqcount);
836		- vtime->state = VTIME_SYS;
	816	+ vtime->state = VTIME_IDLE;
837	817	vtime->starttime = sched_clock();
	818	+ vtime->cpu = cpu;
838	819	write_seqcount_end(&vtime->seqcount);
839	820	local_irq_restore(flags);
840	821	}
..	..	@@ -852,7 +833,7 @@
852	833	seq = read_seqcount_begin(&vtime->seqcount);
853	834
854	835	gtime = t->gtime;
855		- if (vtime->state == VTIME_SYS && t->flags & PF_VCPU)
	836	+ if (vtime->state == VTIME_GUEST)
856	837	gtime += vtime->gtime + vtime_delta(vtime);
857	838
858	839	} while (read_seqcount_retry(&vtime->seqcount, seq));
..	..	@@ -883,20 +864,233 @@
883	864	*utime = t->utime;
884	865	*stime = t->stime;
885	866
886		- /* Task is sleeping, nothing to add */
887		- if (vtime->state == VTIME_INACTIVE \|\| is_idle_task(t))
	867	+ /* Task is sleeping or idle, nothing to add */
	868	+ if (vtime->state < VTIME_SYS)
888	869	continue;
889	870
890	871	delta = vtime_delta(vtime);
891	872
892	873	/*
893		- * Task runs either in user or kernel space, add pending nohz time to
894		- * the right place.
	874	+ * Task runs either in user (including guest) or kernel space,
	875	+ * add pending nohz time to the right place.
895	876	*/
896		- if (vtime->state == VTIME_USER \|\| t->flags & PF_VCPU)
897		- *utime += vtime->utime + delta;
898		- else if (vtime->state == VTIME_SYS)
	877	+ if (vtime->state == VTIME_SYS)
899	878	*stime += vtime->stime + delta;
	879	+ else
	880	+ *utime += vtime->utime + delta;
900	881	} while (read_seqcount_retry(&vtime->seqcount, seq));
901	882	}
	883	+
	884	+static int vtime_state_fetch(struct vtime *vtime, int cpu)
	885	+{
	886	+ int state = READ_ONCE(vtime->state);
	887	+
	888	+ /*
	889	+ * We raced against a context switch, fetch the
	890	+ * kcpustat task again.
	891	+ */
	892	+ if (vtime->cpu != cpu && vtime->cpu != -1)
	893	+ return -EAGAIN;
	894	+
	895	+ /*
	896	+ * Two possible things here:
	897	+ * 1) We are seeing the scheduling out task (prev) or any past one.
	898	+ * 2) We are seeing the scheduling in task (next) but it hasn't
	899	+ * passed though vtime_task_switch() yet so the pending
	900	+ * cputime of the prev task may not be flushed yet.
	901	+ *
	902	+ * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
	903	+ */
	904	+ if (state == VTIME_INACTIVE)
	905	+ return -EAGAIN;
	906	+
	907	+ return state;
	908	+}
	909	+
	910	+static u64 kcpustat_user_vtime(struct vtime *vtime)
	911	+{
	912	+ if (vtime->state == VTIME_USER)
	913	+ return vtime->utime + vtime_delta(vtime);
	914	+ else if (vtime->state == VTIME_GUEST)
	915	+ return vtime->gtime + vtime_delta(vtime);
	916	+ return 0;
	917	+}
	918	+
	919	+static int kcpustat_field_vtime(u64 *cpustat,
	920	+ struct task_struct *tsk,
	921	+ enum cpu_usage_stat usage,
	922	+ int cpu, u64 *val)
	923	+{
	924	+ struct vtime *vtime = &tsk->vtime;
	925	+ unsigned int seq;
	926	+
	927	+ do {
	928	+ int state;
	929	+
	930	+ seq = read_seqcount_begin(&vtime->seqcount);
	931	+
	932	+ state = vtime_state_fetch(vtime, cpu);
	933	+ if (state < 0)
	934	+ return state;
	935	+
	936	+ *val = cpustat[usage];
	937	+
	938	+ /*
	939	+ * Nice VS unnice cputime accounting may be inaccurate if
	940	+ * the nice value has changed since the last vtime update.
	941	+ * But proper fix would involve interrupting target on nice
	942	+ * updates which is a no go on nohz_full (although the scheduler
	943	+ * may still interrupt the target if rescheduling is needed...)
	944	+ */
	945	+ switch (usage) {
	946	+ case CPUTIME_SYSTEM:
	947	+ if (state == VTIME_SYS)
	948	+ *val += vtime->stime + vtime_delta(vtime);
	949	+ break;
	950	+ case CPUTIME_USER:
	951	+ if (task_nice(tsk) <= 0)
	952	+ *val += kcpustat_user_vtime(vtime);
	953	+ break;
	954	+ case CPUTIME_NICE:
	955	+ if (task_nice(tsk) > 0)
	956	+ *val += kcpustat_user_vtime(vtime);
	957	+ break;
	958	+ case CPUTIME_GUEST:
	959	+ if (state == VTIME_GUEST && task_nice(tsk) <= 0)
	960	+ *val += vtime->gtime + vtime_delta(vtime);
	961	+ break;
	962	+ case CPUTIME_GUEST_NICE:
	963	+ if (state == VTIME_GUEST && task_nice(tsk) > 0)
	964	+ *val += vtime->gtime + vtime_delta(vtime);
	965	+ break;
	966	+ default:
	967	+ break;
	968	+ }
	969	+ } while (read_seqcount_retry(&vtime->seqcount, seq));
	970	+
	971	+ return 0;
	972	+}
	973	+
	974	+u64 kcpustat_field(struct kernel_cpustat *kcpustat,
	975	+ enum cpu_usage_stat usage, int cpu)
	976	+{
	977	+ u64 *cpustat = kcpustat->cpustat;
	978	+ u64 val = cpustat[usage];
	979	+ struct rq *rq;
	980	+ int err;
	981	+
	982	+ if (!vtime_accounting_enabled_cpu(cpu))
	983	+ return val;
	984	+
	985	+ rq = cpu_rq(cpu);
	986	+
	987	+ for (;;) {
	988	+ struct task_struct *curr;
	989	+
	990	+ rcu_read_lock();
	991	+ curr = rcu_dereference(rq->curr);
	992	+ if (WARN_ON_ONCE(!curr)) {
	993	+ rcu_read_unlock();
	994	+ return cpustat[usage];
	995	+ }
	996	+
	997	+ err = kcpustat_field_vtime(cpustat, curr, usage, cpu, &val);
	998	+ rcu_read_unlock();
	999	+
	1000	+ if (!err)
	1001	+ return val;
	1002	+
	1003	+ cpu_relax();
	1004	+ }
	1005	+}
	1006	+EXPORT_SYMBOL_GPL(kcpustat_field);
	1007	+
	1008	+static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
	1009	+ const struct kernel_cpustat *src,
	1010	+ struct task_struct *tsk, int cpu)
	1011	+{
	1012	+ struct vtime *vtime = &tsk->vtime;
	1013	+ unsigned int seq;
	1014	+
	1015	+ do {
	1016	+ u64 *cpustat;
	1017	+ u64 delta;
	1018	+ int state;
	1019	+
	1020	+ seq = read_seqcount_begin(&vtime->seqcount);
	1021	+
	1022	+ state = vtime_state_fetch(vtime, cpu);
	1023	+ if (state < 0)
	1024	+ return state;
	1025	+
	1026	+ dst = src;
	1027	+ cpustat = dst->cpustat;
	1028	+
	1029	+ /* Task is sleeping, dead or idle, nothing to add */
	1030	+ if (state < VTIME_SYS)
	1031	+ continue;
	1032	+
	1033	+ delta = vtime_delta(vtime);
	1034	+
	1035	+ /*
	1036	+ * Task runs either in user (including guest) or kernel space,
	1037	+ * add pending nohz time to the right place.
	1038	+ */
	1039	+ if (state == VTIME_SYS) {
	1040	+ cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
	1041	+ } else if (state == VTIME_USER) {
	1042	+ if (task_nice(tsk) > 0)
	1043	+ cpustat[CPUTIME_NICE] += vtime->utime + delta;
	1044	+ else
	1045	+ cpustat[CPUTIME_USER] += vtime->utime + delta;
	1046	+ } else {
	1047	+ WARN_ON_ONCE(state != VTIME_GUEST);
	1048	+ if (task_nice(tsk) > 0) {
	1049	+ cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
	1050	+ cpustat[CPUTIME_NICE] += vtime->gtime + delta;
	1051	+ } else {
	1052	+ cpustat[CPUTIME_GUEST] += vtime->gtime + delta;
	1053	+ cpustat[CPUTIME_USER] += vtime->gtime + delta;
	1054	+ }
	1055	+ }
	1056	+ } while (read_seqcount_retry(&vtime->seqcount, seq));
	1057	+
	1058	+ return 0;
	1059	+}
	1060	+
	1061	+void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
	1062	+{
	1063	+ const struct kernel_cpustat *src = &kcpustat_cpu(cpu);
	1064	+ struct rq *rq;
	1065	+ int err;
	1066	+
	1067	+ if (!vtime_accounting_enabled_cpu(cpu)) {
	1068	+ dst = src;
	1069	+ return;
	1070	+ }
	1071	+
	1072	+ rq = cpu_rq(cpu);
	1073	+
	1074	+ for (;;) {
	1075	+ struct task_struct *curr;
	1076	+
	1077	+ rcu_read_lock();
	1078	+ curr = rcu_dereference(rq->curr);
	1079	+ if (WARN_ON_ONCE(!curr)) {
	1080	+ rcu_read_unlock();
	1081	+ dst = src;
	1082	+ return;
	1083	+ }
	1084	+
	1085	+ err = kcpustat_cpu_fetch_vtime(dst, src, curr, cpu);
	1086	+ rcu_read_unlock();
	1087	+
	1088	+ if (!err)
	1089	+ return;
	1090	+
	1091	+ cpu_relax();
	1092	+ }
	1093	+}
	1094	+EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
	1095	+
902	1096	#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */