From e636c8d336489bf3eed5878299e6cc045bbad077 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:17:29 +0000 Subject: [PATCH] debug lk --- kernel/include/trace/events/sched.h | 514 +++++++++------------------------------------------------ 1 files changed, 81 insertions(+), 433 deletions(-) diff --git a/kernel/include/trace/events/sched.h b/kernel/include/trace/events/sched.h index 14efeb2..bcd7f1f 100644 --- a/kernel/include/trace/events/sched.h +++ b/kernel/include/trace/events/sched.h @@ -91,7 +91,7 @@ /* * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. - * It it not always called from the waking context. + * It is not always called from the waking context. */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup, TP_PROTO(struct task_struct *p), @@ -198,6 +198,7 @@ __field( int, prio ) __field( int, orig_cpu ) __field( int, dest_cpu ) + __field( int, running ) ), TP_fast_assign( @@ -206,11 +207,13 @@ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; + __entry->running = (p->state == TASK_RUNNING); ), - TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", + TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d running=%d", __entry->comm, __entry->pid, __entry->prio, - __entry->orig_cpu, __entry->dest_cpu) + __entry->orig_cpu, __entry->dest_cpu, + __entry->running) ); DECLARE_EVENT_CLASS(sched_process_template, @@ -241,7 +244,6 @@ DEFINE_EVENT(sched_process_template, sched_process_free, TP_PROTO(struct task_struct *p), TP_ARGS(p)); - /* * Tracepoint for a task exiting: @@ -336,11 +338,20 @@ __entry->pid, __entry->old_pid) ); + +#ifdef CONFIG_SCHEDSTATS +#define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT +#define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS +#else +#define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT_NOP +#define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS_NOP +#endif + /* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ -DECLARE_EVENT_CLASS(sched_stat_template, +DECLARE_EVENT_CLASS_SCHEDSTAT(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), @@ -363,12 +374,11 @@ (unsigned long long)__entry->delay) ); - /* * Tracepoint for accounting wait time (time the task is runnable * but not actually running due to scheduler contention). */ -DEFINE_EVENT(sched_stat_template, sched_stat_wait, +DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_wait, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); @@ -376,7 +386,7 @@ * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). */ -DEFINE_EVENT(sched_stat_template, sched_stat_sleep, +DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_sleep, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); @@ -384,14 +394,14 @@ * Tracepoint for accounting iowait time (time the task is not runnable * due to waiting on IO to complete). */ -DEFINE_EVENT(sched_stat_template, sched_stat_iowait, +DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_iowait, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); /* * Tracepoint for accounting blocked time (time the task is in uninterruptible). */ -DEFINE_EVENT(sched_stat_template, sched_stat_blocked, +DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); @@ -412,7 +422,7 @@ TP_fast_assign( __entry->pid = tsk->pid; - __entry->caller = (void*)get_wchan(tsk); + __entry->caller = (void *)get_wchan(tsk); __entry->io_wait = tsk->in_iowait; ), @@ -504,7 +514,11 @@ ); #endif /* CONFIG_DETECT_HUNG_TASK */ -DECLARE_EVENT_CLASS(sched_move_task_template, +/* + * Tracks migration of tasks from one runqueue to another. Can be used to + * detect if automatic NUMA balancing is bouncing between nodes. + */ +TRACE_EVENT(sched_move_numa, TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), @@ -536,23 +550,7 @@ __entry->dst_cpu, __entry->dst_nid) ); -/* - * Tracks migration of tasks from one runqueue to another. Can be used to - * detect if automatic NUMA balancing is bouncing between nodes - */ -DEFINE_EVENT(sched_move_task_template, sched_move_numa, - TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), - - TP_ARGS(tsk, src_cpu, dst_cpu) -); - -DEFINE_EVENT(sched_move_task_template, sched_stick_numa, - TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), - - TP_ARGS(tsk, src_cpu, dst_cpu) -); - -TRACE_EVENT(sched_swap_numa, +DECLARE_EVENT_CLASS(sched_numa_pair_template, TP_PROTO(struct task_struct *src_tsk, int src_cpu, struct task_struct *dst_tsk, int dst_cpu), @@ -578,11 +576,11 @@ __entry->src_ngid = task_numa_group_id(src_tsk); __entry->src_cpu = src_cpu; __entry->src_nid = cpu_to_node(src_cpu); - __entry->dst_pid = task_pid_nr(dst_tsk); - __entry->dst_tgid = task_tgid_nr(dst_tsk); - __entry->dst_ngid = task_numa_group_id(dst_tsk); + __entry->dst_pid = dst_tsk ? task_pid_nr(dst_tsk) : 0; + __entry->dst_tgid = dst_tsk ? task_tgid_nr(dst_tsk) : 0; + __entry->dst_ngid = dst_tsk ? task_numa_group_id(dst_tsk) : 0; __entry->dst_cpu = dst_cpu; - __entry->dst_nid = cpu_to_node(dst_cpu); + __entry->dst_nid = dst_cpu >= 0 ? cpu_to_node(dst_cpu) : -1; ), TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d", @@ -591,6 +589,23 @@ __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, __entry->dst_cpu, __entry->dst_nid) ); + +DEFINE_EVENT(sched_numa_pair_template, sched_stick_numa, + + TP_PROTO(struct task_struct *src_tsk, int src_cpu, + struct task_struct *dst_tsk, int dst_cpu), + + TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) +); + +DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, + + TP_PROTO(struct task_struct *src_tsk, int src_cpu, + struct task_struct *dst_tsk, int dst_cpu), + + TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) +); + /* * Tracepoint for waking a polling cpu without an IPI. @@ -612,423 +627,56 @@ TP_printk("cpu=%d", __entry->cpu) ); -#ifdef CONFIG_SMP -#ifdef CREATE_TRACE_POINTS -static inline -int __trace_sched_cpu(struct cfs_rq *cfs_rq, struct sched_entity *se) -{ -#ifdef CONFIG_FAIR_GROUP_SCHED - struct rq *rq = cfs_rq ? cfs_rq->rq : NULL; -#else - struct rq *rq = cfs_rq ? container_of(cfs_rq, struct rq, cfs) : NULL; -#endif - return rq ? cpu_of(rq) - : task_cpu((container_of(se, struct task_struct, se))); -} - -static inline -int __trace_sched_path(struct cfs_rq *cfs_rq, char *path, int len) -{ -#ifdef CONFIG_FAIR_GROUP_SCHED - int l = path ? len : 0; - - if (cfs_rq && task_group_is_autogroup(cfs_rq->tg)) - return autogroup_path(cfs_rq->tg, path, l) + 1; - else if (cfs_rq && cfs_rq->tg->css.cgroup) - return cgroup_path(cfs_rq->tg->css.cgroup, path, l) + 1; -#endif - if (path) - strcpy(path, "(null)"); - - return strlen("(null)"); -} - -static inline -struct cfs_rq *__trace_sched_group_cfs_rq(struct sched_entity *se) -{ -#ifdef CONFIG_FAIR_GROUP_SCHED - return se->my_q; -#else - return NULL; -#endif -} -#endif /* CREATE_TRACE_POINTS */ - /* - * Tracepoint for cfs_rq load tracking: + * Following tracepoints are not exported in tracefs and provide hooking + * mechanisms only for testing and debugging purposes. + * + * Postfixed with _tp to make them easily identifiable in the code. */ -TRACE_EVENT(sched_load_cfs_rq, - +DECLARE_TRACE(pelt_cfs_tp, TP_PROTO(struct cfs_rq *cfs_rq), + TP_ARGS(cfs_rq)); - TP_ARGS(cfs_rq), - - TP_STRUCT__entry( - __field( int, cpu ) - __dynamic_array(char, path, - __trace_sched_path(cfs_rq, NULL, 0) ) - __field( unsigned long, load ) - __field( unsigned long, rbl_load ) - __field( unsigned long, util ) - ), - - TP_fast_assign( - __entry->cpu = __trace_sched_cpu(cfs_rq, NULL); - __trace_sched_path(cfs_rq, __get_dynamic_array(path), - __get_dynamic_array_len(path)); - __entry->load = cfs_rq->avg.load_avg; - __entry->rbl_load = cfs_rq->avg.runnable_load_avg; - __entry->util = cfs_rq->avg.util_avg; - ), - - TP_printk("cpu=%d path=%s load=%lu rbl_load=%lu util=%lu", - __entry->cpu, __get_str(path), __entry->load, - __entry->rbl_load,__entry->util) -); - -/* - * Tracepoint for rt_rq load tracking: - */ -struct rq; -TRACE_EVENT(sched_load_rt_rq, - +DECLARE_TRACE(pelt_rt_tp, TP_PROTO(struct rq *rq), + TP_ARGS(rq)); - TP_ARGS(rq), +DECLARE_TRACE(pelt_dl_tp, + TP_PROTO(struct rq *rq), + TP_ARGS(rq)); - TP_STRUCT__entry( - __field( int, cpu ) - __field( unsigned long, util ) - ), +DECLARE_TRACE(pelt_thermal_tp, + TP_PROTO(struct rq *rq), + TP_ARGS(rq)); - TP_fast_assign( - __entry->cpu = rq->cpu; - __entry->util = rq->avg_rt.util_avg; - ), +DECLARE_TRACE(pelt_irq_tp, + TP_PROTO(struct rq *rq), + TP_ARGS(rq)); - TP_printk("cpu=%d util=%lu", __entry->cpu, - __entry->util) -); - -/* - * Tracepoint for sched_entity load tracking: - */ -TRACE_EVENT(sched_load_se, - +DECLARE_TRACE(pelt_se_tp, TP_PROTO(struct sched_entity *se), + TP_ARGS(se)); - TP_ARGS(se), +DECLARE_TRACE(sched_cpu_capacity_tp, + TP_PROTO(struct rq *rq), + TP_ARGS(rq)); - TP_STRUCT__entry( - __field( int, cpu ) - __dynamic_array(char, path, - __trace_sched_path(__trace_sched_group_cfs_rq(se), NULL, 0) ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( unsigned long, load ) - __field( unsigned long, rbl_load ) - __field( unsigned long, util ) - ), +DECLARE_TRACE(sched_overutilized_tp, + TP_PROTO(struct root_domain *rd, bool overutilized), + TP_ARGS(rd, overutilized)); - TP_fast_assign( - struct cfs_rq *gcfs_rq = __trace_sched_group_cfs_rq(se); - struct task_struct *p = gcfs_rq ? NULL - : container_of(se, struct task_struct, se); - - __entry->cpu = __trace_sched_cpu(gcfs_rq, se); - __trace_sched_path(gcfs_rq, __get_dynamic_array(path), - __get_dynamic_array_len(path)); - memcpy(__entry->comm, p ? p->comm : "(null)", - p ? TASK_COMM_LEN : sizeof("(null)")); - __entry->pid = p ? p->pid : -1; - __entry->load = se->avg.load_avg; - __entry->rbl_load = se->avg.runnable_load_avg; - __entry->util = se->avg.util_avg; - ), - - TP_printk("cpu=%d path=%s comm=%s pid=%d load=%lu rbl_load=%lu util=%lu", - __entry->cpu, __get_str(path), __entry->comm, __entry->pid, - __entry->load, __entry->rbl_load, __entry->util) -); - -/* - * Tracepoint for task_group load tracking: - */ -#ifdef CONFIG_FAIR_GROUP_SCHED -TRACE_EVENT(sched_load_tg, - +DECLARE_TRACE(sched_util_est_cfs_tp, TP_PROTO(struct cfs_rq *cfs_rq), + TP_ARGS(cfs_rq)); - TP_ARGS(cfs_rq), +DECLARE_TRACE(sched_util_est_se_tp, + TP_PROTO(struct sched_entity *se), + TP_ARGS(se)); - TP_STRUCT__entry( - __field( int, cpu ) - __dynamic_array(char, path, - __trace_sched_path(cfs_rq, NULL, 0) ) - __field( long, load ) - ), +DECLARE_TRACE(sched_update_nr_running_tp, + TP_PROTO(struct rq *rq, int change), + TP_ARGS(rq, change)); - TP_fast_assign( - __entry->cpu = cfs_rq->rq->cpu; - __trace_sched_path(cfs_rq, __get_dynamic_array(path), - __get_dynamic_array_len(path)); - __entry->load = atomic_long_read(&cfs_rq->tg->load_avg); - ), - - TP_printk("cpu=%d path=%s load=%ld", __entry->cpu, __get_str(path), - __entry->load) -); -#endif /* CONFIG_FAIR_GROUP_SCHED */ - -/* - * Tracepoint for tasks' estimated utilization. - */ -TRACE_EVENT(sched_util_est_task, - - TP_PROTO(struct task_struct *tsk, struct sched_avg *avg), - - TP_ARGS(tsk, avg), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, cpu ) - __field( unsigned int, util_avg ) - __field( unsigned int, est_enqueued ) - __field( unsigned int, est_ewma ) - - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->cpu = task_cpu(tsk); - __entry->util_avg = avg->util_avg; - __entry->est_enqueued = avg->util_est.enqueued; - __entry->est_ewma = avg->util_est.ewma; - ), - - TP_printk("comm=%s pid=%d cpu=%d util_avg=%u util_est_ewma=%u util_est_enqueued=%u", - __entry->comm, - __entry->pid, - __entry->cpu, - __entry->util_avg, - __entry->est_ewma, - __entry->est_enqueued) -); - -/* - * Tracepoint for root cfs_rq's estimated utilization. - */ -TRACE_EVENT(sched_util_est_cpu, - - TP_PROTO(int cpu, struct cfs_rq *cfs_rq), - - TP_ARGS(cpu, cfs_rq), - - TP_STRUCT__entry( - __field( int, cpu ) - __field( unsigned int, util_avg ) - __field( unsigned int, util_est_enqueued ) - ), - - TP_fast_assign( - __entry->cpu = cpu; - __entry->util_avg = cfs_rq->avg.util_avg; - __entry->util_est_enqueued = cfs_rq->avg.util_est.enqueued; - ), - - TP_printk("cpu=%d util_avg=%u util_est_enqueued=%u", - __entry->cpu, - __entry->util_avg, - __entry->util_est_enqueued) -); - -/* - * Tracepoint for find_best_target - */ -TRACE_EVENT(sched_find_best_target, - - TP_PROTO(struct task_struct *tsk, bool prefer_idle, - unsigned long min_util, int best_idle, int best_active, - int target, int backup), - - TP_ARGS(tsk, prefer_idle, min_util, best_idle, - best_active, target, backup), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( unsigned long, min_util ) - __field( bool, prefer_idle ) - __field( int, best_idle ) - __field( int, best_active ) - __field( int, target ) - __field( int, backup ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->min_util = min_util; - __entry->prefer_idle = prefer_idle; - __entry->best_idle = best_idle; - __entry->best_active = best_active; - __entry->target = target; - __entry->backup = backup; - ), - - TP_printk("pid=%d comm=%s prefer_idle=%d " - "best_idle=%d best_active=%d target=%d backup=%d", - __entry->pid, __entry->comm, __entry->prefer_idle, - __entry->best_idle, __entry->best_active, - __entry->target, __entry->backup) -); - -/* - * Tracepoint for accounting CPU boosted utilization - */ -TRACE_EVENT(sched_boost_cpu, - - TP_PROTO(int cpu, unsigned long util, long margin), - - TP_ARGS(cpu, util, margin), - - TP_STRUCT__entry( - __field( int, cpu ) - __field( unsigned long, util ) - __field(long, margin ) - ), - - TP_fast_assign( - __entry->cpu = cpu; - __entry->util = util; - __entry->margin = margin; - ), - - TP_printk("cpu=%d util=%lu margin=%ld", - __entry->cpu, - __entry->util, - __entry->margin) -); - -/* - * Tracepoint for schedtune_tasks_update - */ -TRACE_EVENT(sched_tune_tasks_update, - - TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx, - int boost, int max_boost, u64 group_ts), - - TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost, group_ts), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, cpu ) - __field( int, tasks ) - __field( int, idx ) - __field( int, boost ) - __field( int, max_boost ) - __field( u64, group_ts ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->cpu = cpu; - __entry->tasks = tasks; - __entry->idx = idx; - __entry->boost = boost; - __entry->max_boost = max_boost; - __entry->group_ts = group_ts; - ), - - TP_printk("pid=%d comm=%s " - "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d timeout=%llu", - __entry->pid, __entry->comm, - __entry->cpu, __entry->tasks, __entry->idx, - __entry->boost, __entry->max_boost, - __entry->group_ts) -); - -/* - * Tracepoint for schedtune_boostgroup_update - */ -TRACE_EVENT(sched_tune_boostgroup_update, - - TP_PROTO(int cpu, int variation, int max_boost), - - TP_ARGS(cpu, variation, max_boost), - - TP_STRUCT__entry( - __field( int, cpu ) - __field( int, variation ) - __field( int, max_boost ) - ), - - TP_fast_assign( - __entry->cpu = cpu; - __entry->variation = variation; - __entry->max_boost = max_boost; - ), - - TP_printk("cpu=%d variation=%d max_boost=%d", - __entry->cpu, __entry->variation, __entry->max_boost) -); - -/* - * Tracepoint for accounting task boosted utilization - */ -TRACE_EVENT(sched_boost_task, - - TP_PROTO(struct task_struct *tsk, unsigned long util, long margin), - - TP_ARGS(tsk, util, margin), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( unsigned long, util ) - __field( long, margin ) - - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->util = util; - __entry->margin = margin; - ), - - TP_printk("comm=%s pid=%d util=%lu margin=%ld", - __entry->comm, __entry->pid, - __entry->util, - __entry->margin) -); - -/* - * Tracepoint for system overutilized flag -*/ -TRACE_EVENT(sched_overutilized, - - TP_PROTO(int overutilized), - - TP_ARGS(overutilized), - - TP_STRUCT__entry( - __field( int, overutilized ) - ), - - TP_fast_assign( - __entry->overutilized = overutilized; - ), - - TP_printk("overutilized=%d", - __entry->overutilized) -); - -#endif /* CONFIG_SMP */ #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ -- Gitblit v1.6.2