From 61598093bbdd283a7edc367d900f223070ead8d2 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 10 May 2024 07:43:03 +0000 Subject: [PATCH] add ax88772C AX88772C_eeprom_tools --- kernel/kernel/sched/rt.c | 345 ++++++++++++++++++++++++++++++++++++--------------------- 1 files changed, 219 insertions(+), 126 deletions(-) diff --git a/kernel/kernel/sched/rt.c b/kernel/kernel/sched/rt.c index 5e85960..de4a16e 100644 --- a/kernel/kernel/sched/rt.c +++ b/kernel/kernel/sched/rt.c @@ -7,8 +7,12 @@ #include "pelt.h" +#include <trace/hooks/sched.h> + int sched_rr_timeslice = RR_TIMESLICE; int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; +/* More than 4 hours if BW_SHIFT equals 20. */ +static const u64 max_rt_runtime = MAX_BW; static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); @@ -45,8 +49,8 @@ raw_spin_lock_init(&rt_b->rt_runtime_lock); - hrtimer_init(&rt_b->rt_period_timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_HARD); rt_b->rt_period_timer.function = sched_rt_period_timer; } @@ -64,7 +68,8 @@ * to update the period. */ hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0)); - hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED); + hrtimer_start_expires(&rt_b->rt_period_timer, + HRTIMER_MODE_ABS_PINNED_HARD); } raw_spin_unlock(&rt_b->rt_runtime_lock); } @@ -434,7 +439,7 @@ #endif /* CONFIG_SMP */ static void enqueue_top_rt_rq(struct rt_rq *rt_rq); -static void dequeue_top_rt_rq(struct rt_rq *rt_rq); +static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count); static inline int on_rt_rq(struct sched_rt_entity *rt_se) { @@ -555,7 +560,7 @@ rt_se = rt_rq->tg->rt_se[cpu]; if (!rt_se) { - dequeue_top_rt_rq(rt_rq); + dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ cpufreq_update_util(rq_of_rt_rq(rt_rq), 0); } @@ -641,7 +646,7 @@ static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { - dequeue_top_rt_rq(rt_rq); + dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); } static inline int rt_rq_throttled(struct rt_rq *rt_rq) @@ -973,6 +978,13 @@ if (likely(rt_b->rt_runtime)) { rt_rq->rt_throttled = 1; printk_deferred_once("sched: RT throttling activated\n"); + + trace_android_vh_dump_throttled_rt_tasks( + raw_smp_processor_id(), + rq_clock(rq_of_rt_rq(rt_rq)), + sched_rt_period(rt_rq), + runtime, + hrtimer_get_expires_ns(&rt_b->rt_period_timer)); } else { /* * In case we did anyway, make it go away, @@ -1019,6 +1031,8 @@ curr->se.exec_start = now; cgroup_account_cputime(curr, delta_exec); + trace_android_vh_sched_stat_runtime_rt(curr, delta_exec); + if (!rt_bandwidth_enabled()) return; @@ -1040,7 +1054,7 @@ } static void -dequeue_top_rt_rq(struct rt_rq *rt_rq) +dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count) { struct rq *rq = rq_of_rt_rq(rt_rq); @@ -1051,7 +1065,7 @@ BUG_ON(!rq->nr_running); - sub_nr_running(rq, rt_rq->rt_nr_running); + sub_nr_running(rq, count); rt_rq->rt_queued = 0; } @@ -1330,18 +1344,21 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags) { struct sched_rt_entity *back = NULL; + unsigned int rt_nr_running; for_each_sched_rt_entity(rt_se) { rt_se->back = back; back = rt_se; } - dequeue_top_rt_rq(rt_rq_of_se(back)); + rt_nr_running = rt_rq_of_se(back)->rt_nr_running; for (rt_se = back; rt_se; rt_se = rt_se->back) { if (on_rt_rq(rt_se)) __dequeue_rt_entity(rt_se, flags); } + + dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running); } static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) @@ -1369,6 +1386,27 @@ enqueue_top_rt_rq(&rq->rt); } +#ifdef CONFIG_SMP +static inline bool should_honor_rt_sync(struct rq *rq, struct task_struct *p, + bool sync) +{ + /* + * If the waker is CFS, then an RT sync wakeup would preempt the waker + * and force it to run for a likely small time after the RT wakee is + * done. So, only honor RT sync wakeups from RT wakers. + */ + return sync && task_has_rt_policy(rq->curr) && + p->prio <= rq->rt.highest_prio.next && + rq->rt.rt_nr_running <= 2; +} +#else +static inline bool should_honor_rt_sync(struct rq *rq, struct task_struct *p, + bool sync) +{ + return 0; +} +#endif + /* * Adding/removing a task to/from a priority array: */ @@ -1376,23 +1414,21 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) { struct sched_rt_entity *rt_se = &p->rt; - - schedtune_enqueue_task(p, cpu_of(rq)); + bool sync = !!(flags & ENQUEUE_WAKEUP_SYNC); if (flags & ENQUEUE_WAKEUP) rt_se->timeout = 0; enqueue_rt_entity(rt_se, flags); - if (!task_current(rq, p) && p->nr_cpus_allowed > 1) + if (!task_current(rq, p) && p->nr_cpus_allowed > 1 && + !should_honor_rt_sync(rq, p, sync)) enqueue_pushable_task(rq, p); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) { struct sched_rt_entity *rt_se = &p->rt; - - schedtune_dequeue_task(p, cpu_of(rq)); update_curr_rt(rq); dequeue_rt_entity(rt_se, flags); @@ -1437,13 +1473,43 @@ #ifdef CONFIG_SMP static int find_lowest_rq(struct task_struct *task); +#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION +/* + * Return whether the task on the given cpu is currently non-preemptible + * while handling a potentially long softint, or if the task is likely + * to block preemptions soon because it is a ksoftirq thread that is + * handling slow softints. + */ +bool +task_may_not_preempt(struct task_struct *task, int cpu) +{ + __u32 softirqs = per_cpu(active_softirqs, cpu) | + __IRQ_STAT(cpu, __softirq_pending); + + struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu); + return ((softirqs & LONG_SOFTIRQ_MASK) && + (task == cpu_ksoftirqd || + task_thread_info(task)->preempt_count & SOFTIRQ_MASK)); +} +EXPORT_SYMBOL_GPL(task_may_not_preempt); +#endif /* CONFIG_RT_SOFTINT_OPTIMIZATION */ + static int -select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags, - int sibling_count_hint) +select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) { struct task_struct *curr; struct rq *rq; + struct rq *this_cpu_rq; bool test; + int target_cpu = -1; + bool may_not_preempt; + bool sync = !!(flags & WF_SYNC); + int this_cpu; + + trace_android_rvh_select_task_rq_rt(p, cpu, sd_flag, + flags, &target_cpu); + if (target_cpu >= 0) + return target_cpu; /* For anything but wake ups, just return the task_cpu */ if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) @@ -1453,9 +1519,16 @@ rcu_read_lock(); curr = READ_ONCE(rq->curr); /* unlocked access */ + this_cpu = smp_processor_id(); + this_cpu_rq = cpu_rq(this_cpu); /* - * If the current task on @p's runqueue is an RT task, then + * If the current task on @p's runqueue is a softirq task, + * it may run without preemption for a time that is + * ill-suited for a waiting RT task. Therefore, try to + * wake this RT task on another runqueue. + * + * Also, if the current task on @p's runqueue is an RT task, then * try to see if we can wake this RT task up on another * runqueue. Otherwise simply start this RT task * on its current runqueue. @@ -1480,9 +1553,21 @@ * requirement of the task - which is only important on heterogeneous * systems like big.LITTLE. */ - test = curr && - unlikely(rt_task(curr)) && - (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio); + may_not_preempt = task_may_not_preempt(curr, cpu); + test = (curr && (may_not_preempt || + (unlikely(rt_task(curr)) && + (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio)))); + + if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) + test |= rockchip_perf_misfit_rt(cpu); + /* + * Respect the sync flag as long as the task can run on this CPU. + */ + if (should_honor_rt_sync(this_cpu_rq, p, sync) && + cpumask_test_cpu(this_cpu, p->cpus_ptr)) { + cpu = this_cpu; + goto out_unlock; + } if (test || !rt_task_fits_capacity(p, cpu)) { int target = find_lowest_rq(p); @@ -1495,11 +1580,14 @@ goto out_unlock; /* - * Don't bother moving it if the destination CPU is + * If cpu is non-preemptible, prefer remote cpu + * even if it's running a higher-prio task. + * Otherwise: Don't bother moving it if the destination CPU is * not running a lower priority task. */ if (target != -1 && - p->prio < cpu_rq(target)->rt.highest_prio.curr) + (may_not_preempt || + p->prio < cpu_rq(target)->rt.highest_prio.curr)) cpu = target; } @@ -1537,6 +1625,26 @@ resched_curr(rq); } +static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf) +{ + if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) { + int done = 0; + + /* + * This is OK, because current is on_cpu, which avoids it being + * picked for load-balance and preemption/IRQs are still + * disabled avoiding further scheduler activity on it and we've + * not yet started the picking loop. + */ + rq_unpin_lock(rq, rf); + trace_android_rvh_sched_balance_rt(rq, p, &done); + if (!done) + pull_rt_task(rq); + rq_repin_lock(rq, rf); + } + + return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq); +} #endif /* CONFIG_SMP */ /* @@ -1567,8 +1675,28 @@ #endif } -static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, - struct rt_rq *rt_rq) +static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first) +{ + p->se.exec_start = rq_clock_task(rq); + + /* The running task is never eligible for pushing */ + dequeue_pushable_task(rq, p); + + if (!first) + return; + + /* + * If prev task was rt, put_prev_task() has already updated the + * utilization. We only care of the case where we start to schedule a + * rt task + */ + if (rq->curr->sched_class != &rt_sched_class) + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0); + + rt_queue_push_tasks(rq); +} + +static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq) { struct rt_prio_array *array = &rt_rq->active; struct sched_rt_entity *next = NULL; @@ -1579,6 +1707,8 @@ BUG_ON(idx >= MAX_RT_PRIO); queue = array->queue + idx; + if (SCHED_WARN_ON(list_empty(queue))) + return NULL; next = list_entry(queue->next, struct sched_rt_entity, run_list); return next; @@ -1587,74 +1717,27 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) { struct sched_rt_entity *rt_se; - struct task_struct *p; struct rt_rq *rt_rq = &rq->rt; do { - rt_se = pick_next_rt_entity(rq, rt_rq); - BUG_ON(!rt_se); + rt_se = pick_next_rt_entity(rt_rq); + if (unlikely(!rt_se)) + return NULL; rt_rq = group_rt_rq(rt_se); } while (rt_rq); - p = rt_task_of(rt_se); - p->se.exec_start = rq_clock_task(rq); - - return p; + return rt_task_of(rt_se); } -static struct task_struct * -pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) +static struct task_struct *pick_next_task_rt(struct rq *rq) { struct task_struct *p; - struct rt_rq *rt_rq = &rq->rt; - if (need_pull_rt_task(rq, prev)) { - /* - * This is OK, because current is on_cpu, which avoids it being - * picked for load-balance and preemption/IRQs are still - * disabled avoiding further scheduler activity on it and we're - * being very careful to re-start the picking loop. - */ - rq_unpin_lock(rq, rf); - pull_rt_task(rq); - rq_repin_lock(rq, rf); - /* - * pull_rt_task() can drop (and re-acquire) rq->lock; this - * means a dl or stop task can slip in, in which case we need - * to re-start task selection. - */ - if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) || - rq->dl.dl_nr_running)) - return RETRY_TASK; - } - - /* - * We may dequeue prev's rt_rq in put_prev_task(). - * So, we update time before rt_nr_running check. - */ - if (prev->sched_class == &rt_sched_class) - update_curr_rt(rq); - - if (!rt_rq->rt_queued) + if (!sched_rt_runnable(rq)) return NULL; - put_prev_task(rq, prev); - p = _pick_next_task_rt(rq); - - /* The running task is never eligible for pushing */ - dequeue_pushable_task(rq, p); - - rt_queue_push_tasks(rq); - - /* - * If prev task was rt, put_prev_task() has already updated the - * utilization. We only care of the case where we start to schedule a - * rt task - */ - if (rq->curr->sched_class != &rt_sched_class) - update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0); - + set_next_task_rt(rq, p, true); return p; } @@ -1680,7 +1763,7 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - cpumask_test_cpu(cpu, &p->cpus_allowed)) + cpumask_test_cpu(cpu, p->cpus_ptr)) return 1; return 0; @@ -1690,7 +1773,7 @@ * Return the highest pushable rq's task, which is suitable to be executed * on the CPU, NULL otherwise */ -static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) +struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) { struct plist_head *head = &rq->rt.pushable_tasks; struct task_struct *p; @@ -1705,6 +1788,7 @@ return NULL; } +EXPORT_SYMBOL_GPL(pick_highest_pushable_task); static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); @@ -1713,7 +1797,7 @@ struct sched_domain *sd; struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int this_cpu = smp_processor_id(); - int cpu = task_cpu(task); + int cpu = -1; int ret; /* Make sure the mask is initialized first */ @@ -1738,9 +1822,17 @@ task, lowest_mask); } + trace_android_rvh_find_lowest_rq(task, lowest_mask, ret, &cpu); + if (cpu >= 0) + return cpu; + if (!ret) return -1; /* No targets found */ + cpu = task_cpu(task); + + if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) + cpu = rockchip_perf_select_rt_cpu(cpu, lowest_mask); /* * At this point we have built a mask of CPUs representing the * lowest priority tasks in the system. Now we want to elect @@ -1833,7 +1925,7 @@ * Also make sure that it wasn't scheduled on its rq. */ if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) || + !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) || task_running(rq, task) || !rt_task(task) || !task_on_rq_queued(task))) { @@ -1895,10 +1987,8 @@ return 0; retry: - if (unlikely(next_task == rq->curr)) { - WARN_ON(1); + if (WARN_ON(next_task == rq->curr)) return 0; - } /* * It's possible that the next_task slipped in of @@ -2315,13 +2405,20 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) { /* - * If we are already running, then there's nothing - * that needs to be done. But if we are not running - * we may need to preempt the current running task. - * If that current running task is also an RT task + * If we are running, update the avg_rt tracking, as the running time + * will now on be accounted into the latter. + */ + if (task_current(rq, p)) { + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0); + return; + } + + /* + * If we are not running we may need to preempt the current + * running task. If that current running task is also an RT task * then see if we can move to another run queue. */ - if (task_on_rq_queued(p) && rq->curr != p) { + if (task_on_rq_queued(p)) { #ifdef CONFIG_SMP if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) rt_queue_push_tasks(rq); @@ -2390,8 +2487,10 @@ } next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); - if (p->rt.timeout > next) - p->cputime_expires.sched_exp = p->se.sum_exec_runtime; + if (p->rt.timeout > next) { + posix_cputimers_rt_watchdog(&p->posix_cputimers, + p->se.sum_exec_runtime); + } } } #else @@ -2440,16 +2539,6 @@ } } -static void set_curr_task_rt(struct rq *rq) -{ - struct task_struct *p = rq->curr; - - p->se.exec_start = rq_clock_task(rq); - - /* The running task is never eligible for pushing */ - dequeue_pushable_task(rq, p); -} - static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) { /* @@ -2461,8 +2550,8 @@ return 0; } -const struct sched_class rt_sched_class = { - .next = &fair_sched_class, +const struct sched_class rt_sched_class + __section("__rt_sched_class") = { .enqueue_task = enqueue_task_rt, .dequeue_task = dequeue_task_rt, .yield_task = yield_task_rt, @@ -2471,10 +2560,11 @@ .pick_next_task = pick_next_task_rt, .put_prev_task = put_prev_task_rt, + .set_next_task = set_next_task_rt, #ifdef CONFIG_SMP + .balance = balance_rt, .select_task_rq = select_task_rq_rt, - .set_cpus_allowed = set_cpus_allowed_common, .rq_online = rq_online_rt, .rq_offline = rq_offline_rt, @@ -2482,7 +2572,6 @@ .switched_from = switched_from_rt, #endif - .set_curr_task = set_curr_task_rt, .task_tick = task_tick_rt, .get_rr_interval = get_rr_interval_rt, @@ -2503,10 +2592,11 @@ */ static DEFINE_MUTEX(rt_constraints_mutex); -/* Must be called with tasklist_lock held */ static inline int tg_has_rt_tasks(struct task_group *tg) { - struct task_struct *g, *p; + struct task_struct *task; + struct css_task_iter it; + int ret = 0; /* * Autogroups do not have RT tasks; see autogroup_create(). @@ -2514,12 +2604,12 @@ if (task_group_is_autogroup(tg)) return 0; - for_each_process_thread(g, p) { - if (rt_task(p) && task_group(p) == tg) - return 1; - } + css_task_iter_start(&tg->css, 0, &it); + while (!ret && (task = css_task_iter_next(&it))) + ret |= rt_task(task); + css_task_iter_end(&it); - return 0; + return ret; } struct rt_schedulable_data { @@ -2550,9 +2640,10 @@ return -EINVAL; /* - * Ensure we don't starve existing RT tasks. + * Ensure we don't starve existing RT tasks if runtime turns zero. */ - if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg)) + if (rt_bandwidth_enabled() && !runtime && + tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg)) return -EBUSY; total = to_ratio(period, runtime); @@ -2617,8 +2708,13 @@ if (rt_period == 0) return -EINVAL; + /* + * Bound quota to defend quota against overflow during bandwidth shift. + */ + if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime) + return -EINVAL; + mutex_lock(&rt_constraints_mutex); - read_lock(&tasklist_lock); err = __rt_schedulable(tg, rt_period, rt_runtime); if (err) goto unlock; @@ -2636,7 +2732,6 @@ } raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); unlock: - read_unlock(&tasklist_lock); mutex_unlock(&rt_constraints_mutex); return err; @@ -2695,9 +2790,7 @@ int ret = 0; mutex_lock(&rt_constraints_mutex); - read_lock(&tasklist_lock); ret = __rt_schedulable(NULL, 0, 0); - read_unlock(&tasklist_lock); mutex_unlock(&rt_constraints_mutex); return ret; @@ -2738,7 +2831,9 @@ return -EINVAL; if ((sysctl_sched_rt_runtime != RUNTIME_INF) && - (sysctl_sched_rt_runtime > sysctl_sched_rt_period)) + ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) || + ((u64)sysctl_sched_rt_runtime * + NSEC_PER_USEC > max_rt_runtime))) return -EINVAL; return 0; @@ -2754,9 +2849,8 @@ raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); } -int sched_rt_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +int sched_rt_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { int old_period, old_runtime; static DEFINE_MUTEX(mutex); @@ -2794,9 +2888,8 @@ return ret; } -int sched_rr_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +int sched_rr_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { int ret; static DEFINE_MUTEX(mutex); -- Gitblit v1.6.2