hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/include/linux/sched.h
....@@ -18,25 +18,31 @@
1818 #include <linux/mutex.h>
1919 #include <linux/plist.h>
2020 #include <linux/hrtimer.h>
21
+#include <linux/irqflags.h>
2122 #include <linux/seccomp.h>
2223 #include <linux/nodemask.h>
2324 #include <linux/rcupdate.h>
25
+#include <linux/refcount.h>
2426 #include <linux/resource.h>
2527 #include <linux/latencytop.h>
2628 #include <linux/sched/prio.h>
29
+#include <linux/sched/types.h>
2730 #include <linux/signal_types.h>
2831 #include <linux/mm_types_task.h>
29
-#include <linux/mm_event.h>
3032 #include <linux/task_io_accounting.h>
33
+#include <linux/posix-timers.h>
3134 #include <linux/rseq.h>
35
+#include <linux/seqlock.h>
36
+#include <linux/kcsan.h>
37
+#include <linux/android_vendor.h>
3238 #include <linux/android_kabi.h>
33
-#include <asm/kmap_types.h>
3439
3540 /* task_struct member predeclarations (sorted alphabetically): */
3641 struct audit_context;
3742 struct backing_dev_info;
3843 struct bio_list;
3944 struct blk_plug;
45
+struct capture_control;
4046 struct cfs_rq;
4147 struct fs_struct;
4248 struct futex_pi_state;
....@@ -50,6 +56,8 @@
5056 struct rcu_node;
5157 struct reclaim_state;
5258 struct robust_list_head;
59
+struct root_domain;
60
+struct rq;
5361 struct sched_attr;
5462 struct sched_param;
5563 struct seq_file;
....@@ -57,6 +65,7 @@
5765 struct signal_struct;
5866 struct task_delay_info;
5967 struct task_group;
68
+struct io_uring_task;
6069
6170 /*
6271 * Task state bitmask. NOTE! These bits are also
....@@ -106,10 +115,6 @@
106115
107116 #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
108117
109
-#define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
110
- (task->flags & PF_FROZEN) == 0 && \
111
- (task->state & TASK_NOLOAD) == 0)
112
-
113118 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
114119
115120 /*
....@@ -145,7 +150,6 @@
145150 current->state = (state_value); \
146151 raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
147152 } while (0)
148
-
149153 #else
150154 /*
151155 * set_current_state() includes a barrier so that the write of current->state
....@@ -154,31 +158,31 @@
154158 *
155159 * for (;;) {
156160 * set_current_state(TASK_UNINTERRUPTIBLE);
157
- * if (!need_sleep)
158
- * break;
161
+ * if (CONDITION)
162
+ * break;
159163 *
160164 * schedule();
161165 * }
162166 * __set_current_state(TASK_RUNNING);
163167 *
164168 * If the caller does not need such serialisation (because, for instance, the
165
- * condition test and condition change and wakeup are under the same lock) then
169
+ * CONDITION test and condition change and wakeup are under the same lock) then
166170 * use __set_current_state().
167171 *
168172 * The above is typically ordered against the wakeup, which does:
169173 *
170
- * need_sleep = false;
174
+ * CONDITION = 1;
171175 * wake_up_state(p, TASK_UNINTERRUPTIBLE);
172176 *
173
- * where wake_up_state() executes a full memory barrier before accessing the
174
- * task state.
177
+ * where wake_up_state()/try_to_wake_up() executes a full memory barrier before
178
+ * accessing p->state.
175179 *
176180 * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
177181 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
178182 * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
179183 *
180184 * However, with slightly different timing the wakeup TASK_RUNNING store can
181
- * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not
185
+ * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not
182186 * a problem either because that will result in one extra go around the loop
183187 * and our @cond test will save the day.
184188 *
....@@ -223,13 +227,12 @@
223227 extern long schedule_timeout_idle(long timeout);
224228 asmlinkage void schedule(void);
225229 extern void schedule_preempt_disabled(void);
230
+asmlinkage void preempt_schedule_irq(void);
226231
227232 extern int __must_check io_schedule_prepare(void);
228233 extern void io_schedule_finish(int token);
229234 extern long io_schedule_timeout(long timeout);
230235 extern void io_schedule(void);
231
-
232
-int cpu_nr_pinned(int cpu);
233236
234237 /**
235238 * struct prev_cputime - snapshot of system and user cputime
....@@ -248,40 +251,24 @@
248251 #endif
249252 };
250253
251
-/**
252
- * struct task_cputime - collected CPU time counts
253
- * @utime: time spent in user mode, in nanoseconds
254
- * @stime: time spent in kernel mode, in nanoseconds
255
- * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
256
- *
257
- * This structure groups together three kinds of CPU time that are tracked for
258
- * threads and thread groups. Most things considering CPU time want to group
259
- * these counts together and treat all three of them in parallel.
260
- */
261
-struct task_cputime {
262
- u64 utime;
263
- u64 stime;
264
- unsigned long long sum_exec_runtime;
265
-};
266
-
267
-/* Alternate field names when used on cache expirations: */
268
-#define virt_exp utime
269
-#define prof_exp stime
270
-#define sched_exp sum_exec_runtime
271
-
272254 enum vtime_state {
273255 /* Task is sleeping or running in a CPU with VTIME inactive: */
274256 VTIME_INACTIVE = 0,
275
- /* Task runs in userspace in a CPU with VTIME active: */
276
- VTIME_USER,
257
+ /* Task is idle */
258
+ VTIME_IDLE,
277259 /* Task runs in kernelspace in a CPU with VTIME active: */
278260 VTIME_SYS,
261
+ /* Task runs in userspace in a CPU with VTIME active: */
262
+ VTIME_USER,
263
+ /* Task runs as guests in a CPU with VTIME active: */
264
+ VTIME_GUEST,
279265 };
280266
281267 struct vtime {
282268 seqcount_t seqcount;
283269 unsigned long long starttime;
284270 enum vtime_state state;
271
+ unsigned int cpu;
285272 u64 utime;
286273 u64 stime;
287274 u64 gtime;
....@@ -298,6 +285,11 @@
298285 UCLAMP_MAX,
299286 UCLAMP_CNT
300287 };
288
+
289
+#ifdef CONFIG_SMP
290
+extern struct root_domain def_root_domain;
291
+extern struct mutex sched_domains_mutex;
292
+#endif
301293
302294 struct sched_info {
303295 #ifdef CONFIG_SCHED_INFO
....@@ -360,36 +352,46 @@
360352 * Only for tasks we track a moving average of the past instantaneous
361353 * estimated utilization. This allows to absorb sporadic drops in utilization
362354 * of an otherwise almost periodic task.
355
+ *
356
+ * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
357
+ * updates. When a task is dequeued, its util_est should not be updated if its
358
+ * util_avg has not been updated in the meantime.
359
+ * This information is mapped into the MSB bit of util_est.enqueued at dequeue
360
+ * time. Since max value of util_est.enqueued for a task is 1024 (PELT util_avg
361
+ * for a task) it is safe to use MSB.
363362 */
364363 struct util_est {
365364 unsigned int enqueued;
366365 unsigned int ewma;
367366 #define UTIL_EST_WEIGHT_SHIFT 2
367
+#define UTIL_AVG_UNCHANGED 0x80000000
368368 } __attribute__((__aligned__(sizeof(u64))));
369369
370370 /*
371
- * The load_avg/util_avg accumulates an infinite geometric series
372
- * (see __update_load_avg() in kernel/sched/fair.c).
371
+ * The load/runnable/util_avg accumulates an infinite geometric series
372
+ * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c).
373373 *
374374 * [load_avg definition]
375375 *
376376 * load_avg = runnable% * scale_load_down(load)
377377 *
378
- * where runnable% is the time ratio that a sched_entity is runnable.
379
- * For cfs_rq, it is the aggregated load_avg of all runnable and
380
- * blocked sched_entities.
378
+ * [runnable_avg definition]
379
+ *
380
+ * runnable_avg = runnable% * SCHED_CAPACITY_SCALE
381381 *
382382 * [util_avg definition]
383383 *
384384 * util_avg = running% * SCHED_CAPACITY_SCALE
385385 *
386
- * where running% is the time ratio that a sched_entity is running on
387
- * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
388
- * and blocked sched_entities.
386
+ * where runnable% is the time ratio that a sched_entity is runnable and
387
+ * running% the time ratio that a sched_entity is running.
389388 *
390
- * load_avg and util_avg don't direcly factor frequency scaling and CPU
391
- * capacity scaling. The scaling is done through the rq_clock_pelt that
392
- * is used for computing those signals (see update_rq_clock_pelt())
389
+ * For cfs_rq, they are the aggregated values of all runnable and blocked
390
+ * sched_entities.
391
+ *
392
+ * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU
393
+ * capacity scaling. The scaling is done through the rq_clock_pelt that is used
394
+ * for computing those signals (see update_rq_clock_pelt())
393395 *
394396 * N.B., the above ratios (runnable% and running%) themselves are in the
395397 * range of [0, 1]. To do fixed point arithmetics, we therefore scale them
....@@ -413,11 +415,11 @@
413415 struct sched_avg {
414416 u64 last_update_time;
415417 u64 load_sum;
416
- u64 runnable_load_sum;
418
+ u64 runnable_sum;
417419 u32 util_sum;
418420 u32 period_contrib;
419421 unsigned long load_avg;
420
- unsigned long runnable_load_avg;
422
+ unsigned long runnable_avg;
421423 unsigned long util_avg;
422424 struct util_est util_est;
423425 } ____cacheline_aligned;
....@@ -461,7 +463,6 @@
461463 struct sched_entity {
462464 /* For load-balancing: */
463465 struct load_weight load;
464
- unsigned long runnable_weight;
465466 struct rb_node run_node;
466467 struct list_head group_node;
467468 unsigned int on_rq;
....@@ -482,6 +483,8 @@
482483 struct cfs_rq *cfs_rq;
483484 /* rq "owned" by this entity/group: */
484485 struct cfs_rq *my_q;
486
+ /* cached value of my_q->h_nr_running */
487
+ unsigned long runnable_weight;
485488 #endif
486489
487490 #ifdef CONFIG_SMP
....@@ -539,7 +542,7 @@
539542
540543 /*
541544 * Actual scheduling parameters. Initialized with the values above,
542
- * they are continously updated during task execution. Note that
545
+ * they are continuously updated during task execution. Note that
543546 * the remaining runtime could be < 0 in case we are in overrun.
544547 */
545548 s64 runtime; /* Remaining runtime for this instance */
....@@ -552,10 +555,6 @@
552555 * @dl_throttled tells if we exhausted the runtime. If so, the
553556 * task has to wait for a replenishment to be performed at the
554557 * next firing of dl_timer.
555
- *
556
- * @dl_boosted tells if we are boosted due to DI. If so we are
557
- * outside bandwidth enforcement mechanism (but only until we
558
- * exit the critical section);
559558 *
560559 * @dl_yielded tells if task gave up the CPU before consuming
561560 * all its available runtime during the last job.
....@@ -571,7 +570,6 @@
571570 * overruns.
572571 */
573572 unsigned int dl_throttled : 1;
574
- unsigned int dl_boosted : 1;
575573 unsigned int dl_yielded : 1;
576574 unsigned int dl_non_contending : 1;
577575 unsigned int dl_overrun : 1;
....@@ -590,6 +588,15 @@
590588 * time.
591589 */
592590 struct hrtimer inactive_timer;
591
+
592
+#ifdef CONFIG_RT_MUTEXES
593
+ /*
594
+ * Priority Inheritance. When a DEADLINE scheduling entity is boosted
595
+ * pi_se points to the donor, otherwise points to the dl_se it belongs
596
+ * to (the original one/itself).
597
+ */
598
+ struct sched_dl_entity *pi_se;
599
+#endif
593600 };
594601
595602 #ifdef CONFIG_UCLAMP_TASK
....@@ -631,10 +638,8 @@
631638 struct {
632639 u8 blocked;
633640 u8 need_qs;
634
- u8 exp_need_qs;
635
-
636
- /* Otherwise the compiler can store garbage here: */
637
- u8 pad;
641
+ u8 exp_hint; /* Hint for performance. */
642
+ u8 need_mb; /* Readers need smp_mb(). */
638643 } b; /* Bits. */
639644 u32 s; /* Set of bits. */
640645 };
....@@ -648,6 +653,13 @@
648653
649654 struct wake_q_node {
650655 struct wake_q_node *next;
656
+};
657
+
658
+struct kmap_ctrl {
659
+#ifdef CONFIG_KMAP_LOCAL
660
+ int idx;
661
+ pte_t pteval[KM_MAX_IDX];
662
+#endif
651663 };
652664
653665 struct task_struct {
....@@ -670,14 +682,14 @@
670682 randomized_struct_fields_start
671683
672684 void *stack;
673
- atomic_t usage;
685
+ refcount_t usage;
674686 /* Per task flags (PF_*), defined further below: */
675687 unsigned int flags;
676688 unsigned int ptrace;
677689
678690 #ifdef CONFIG_SMP
679
- struct llist_node wake_entry;
680691 int on_cpu;
692
+ struct __call_single_node wake_entry;
681693 #ifdef CONFIG_THREAD_INFO_IN_TASK
682694 /* Current CPU: */
683695 unsigned int cpu;
....@@ -706,23 +718,26 @@
706718 const struct sched_class *sched_class;
707719 struct sched_entity se;
708720 struct sched_rt_entity rt;
709
-
710
- /* task boost vendor fields */
711
- u64 last_sleep_ts;
712
- int boost;
713
- u64 boost_period;
714
- u64 boost_expires;
715
-
716721 #ifdef CONFIG_CGROUP_SCHED
717722 struct task_group *sched_task_group;
718723 #endif
719724 struct sched_dl_entity dl;
720725
721726 #ifdef CONFIG_UCLAMP_TASK
722
- /* Clamp values requested for a scheduling entity */
727
+ /*
728
+ * Clamp values requested for a scheduling entity.
729
+ * Must be updated with task_rq_lock() held.
730
+ */
723731 struct uclamp_se uclamp_req[UCLAMP_CNT];
724
- /* Effective clamp values used for a scheduling entity */
732
+ /*
733
+ * Effective clamp values used for a scheduling entity.
734
+ * Must be updated with task_rq_lock() held.
735
+ */
725736 struct uclamp_se uclamp[UCLAMP_CNT];
737
+#endif
738
+
739
+#ifdef CONFIG_HOTPLUG_CPU
740
+ struct list_head percpu_kthread_node;
726741 #endif
727742
728743 #ifdef CONFIG_PREEMPT_NOTIFIERS
....@@ -736,24 +751,13 @@
736751
737752 unsigned int policy;
738753 int nr_cpus_allowed;
739
-// cpumask_t cpus_allowed;
740
- cpumask_t cpus_requested;
741754 const cpumask_t *cpus_ptr;
742755 cpumask_t cpus_mask;
743
-#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
744
- int migrate_disable;
745
- bool migrate_disable_scheduled;
746
-# ifdef CONFIG_SCHED_DEBUG
747
- int pinned_on_cpu;
748
-# endif
749
-#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
750
-# ifdef CONFIG_SCHED_DEBUG
751
- int migrate_disable;
752
-# endif
756
+ void *migration_pending;
757
+#ifdef CONFIG_SMP
758
+ unsigned short migration_disabled;
753759 #endif
754
-#ifdef CONFIG_PREEMPT_RT_FULL
755
- int sleeping_lock;
756
-#endif
760
+ unsigned short migration_flags;
757761
758762 #ifdef CONFIG_PREEMPT_RCU
759763 int rcu_read_lock_nesting;
....@@ -769,6 +773,14 @@
769773 int rcu_tasks_idle_cpu;
770774 struct list_head rcu_tasks_holdout_list;
771775 #endif /* #ifdef CONFIG_TASKS_RCU */
776
+
777
+#ifdef CONFIG_TASKS_TRACE_RCU
778
+ int trc_reader_nesting;
779
+ int trc_ipi_to_cpu;
780
+ union rcu_special trc_reader_special;
781
+ bool trc_reader_checked;
782
+ struct list_head trc_holdout_list;
783
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
772784
773785 struct sched_info sched_info;
774786
....@@ -802,7 +814,6 @@
802814 unsigned sched_reset_on_fork:1;
803815 unsigned sched_contributes_to_load:1;
804816 unsigned sched_migrated:1;
805
- unsigned sched_remote_wakeup:1;
806817 #ifdef CONFIG_PSI
807818 unsigned sched_psi_wake_requeue:1;
808819 #endif
....@@ -812,6 +823,21 @@
812823
813824 /* Unserialized, strictly 'current' */
814825
826
+ /*
827
+ * This field must not be in the scheduler word above due to wakelist
828
+ * queueing no longer being serialized by p->on_cpu. However:
829
+ *
830
+ * p->XXX = X; ttwu()
831
+ * schedule() if (p->on_rq && ..) // false
832
+ * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true
833
+ * deactivate_task() ttwu_queue_wakelist())
834
+ * p->on_rq = 0; p->sched_remote_wakeup = Y;
835
+ *
836
+ * guarantees all stores of 'current' are visible before
837
+ * ->sched_remote_wakeup gets used, so it can be in this word.
838
+ */
839
+ unsigned sched_remote_wakeup:1;
840
+
815841 /* Bit to tell LSMs we're in execve(): */
816842 unsigned in_execve:1;
817843 unsigned in_iowait:1;
....@@ -820,9 +846,6 @@
820846 #endif
821847 #ifdef CONFIG_MEMCG
822848 unsigned in_user_fault:1;
823
-#ifdef CONFIG_MEMCG_KMEM
824
- unsigned memcg_kmem_skip_account:1;
825
-#endif
826849 #endif
827850 #ifdef CONFIG_COMPAT_BRK
828851 unsigned brk_randomized:1;
....@@ -830,10 +853,19 @@
830853 #ifdef CONFIG_CGROUPS
831854 /* disallow userland-initiated cgroup migration */
832855 unsigned no_cgroup_migration:1;
856
+ /* task is frozen/stopped (used by the cgroup freezer) */
857
+ unsigned frozen:1;
833858 #endif
834859 #ifdef CONFIG_BLK_CGROUP
835
- /* to be used once the psi infrastructure lands upstream. */
836860 unsigned use_memdelay:1;
861
+#endif
862
+#ifdef CONFIG_PSI
863
+ /* Stalled due to lack of memory */
864
+ unsigned in_memstall:1;
865
+#endif
866
+#ifdef CONFIG_EVENTFD
867
+ /* Recursion prevention for eventfd_signal() */
868
+ unsigned in_eventfd_signal:1;
837869 #endif
838870
839871 unsigned long atomic_flags; /* Flags requiring atomic access. */
....@@ -916,18 +948,17 @@
916948 u64 start_time;
917949
918950 /* Boot based time in nsecs: */
919
- u64 real_start_time;
951
+ u64 start_boottime;
920952
921953 /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
922954 unsigned long min_flt;
923955 unsigned long maj_flt;
924956
925
-#ifdef CONFIG_POSIX_TIMERS
926
- struct task_cputime cputime_expires;
927
- struct list_head cpu_timers[3];
928
-#ifdef CONFIG_PREEMPT_RT_BASE
929
- struct task_struct *posix_timer_list;
930
-#endif
957
+ /* Empty if CONFIG_POSIX_CPUTIMERS=n */
958
+ struct posix_cputimers posix_cputimers;
959
+
960
+#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
961
+ struct posix_cputimers_work posix_cputimers_work;
931962 #endif
932963
933964 /* Process credentials: */
....@@ -940,6 +971,11 @@
940971
941972 /* Effective (overridable) subjective task credentials (COW): */
942973 const struct cred __rcu *cred;
974
+
975
+#ifdef CONFIG_KEYS
976
+ /* Cached requested key. */
977
+ struct key *cached_requested_key;
978
+#endif
943979
944980 /*
945981 * executable name, excluding path.
....@@ -966,22 +1002,25 @@
9661002 /* Open file information: */
9671003 struct files_struct *files;
9681004
1005
+#ifdef CONFIG_IO_URING
1006
+ struct io_uring_task *io_uring;
1007
+#endif
1008
+
9691009 /* Namespaces: */
9701010 struct nsproxy *nsproxy;
9711011
9721012 /* Signal handlers: */
9731013 struct signal_struct *signal;
974
- struct sighand_struct *sighand;
1014
+ struct sighand_struct __rcu *sighand;
9751015 struct sigqueue *sigqueue_cache;
976
-
9771016 sigset_t blocked;
9781017 sigset_t real_blocked;
9791018 /* Restored if set_restore_sigmask() was used: */
9801019 sigset_t saved_sigmask;
9811020 struct sigpending pending;
982
-#ifdef CONFIG_PREEMPT_RT_FULL
1021
+#ifdef CONFIG_PREEMPT_RT
9831022 /* TODO: move me into ->restart_block ? */
984
- struct siginfo forced_info;
1023
+ struct kernel_siginfo forced_info;
9851024 #endif
9861025 unsigned long sas_ss_sp;
9871026 size_t sas_ss_size;
....@@ -989,8 +1028,10 @@
9891028
9901029 struct callback_head *task_works;
9911030
992
- struct audit_context *audit_context;
1031
+#ifdef CONFIG_AUDIT
9931032 #ifdef CONFIG_AUDITSYSCALL
1033
+ struct audit_context *audit_context;
1034
+#endif
9941035 kuid_t loginuid;
9951036 unsigned int sessionid;
9961037 #endif
....@@ -1008,6 +1049,7 @@
10081049
10091050 struct wake_q_node wake_q;
10101051 struct wake_q_node wake_q_sleeper;
1052
+ int wake_q_count;
10111053
10121054 #ifdef CONFIG_RT_MUTEXES
10131055 /* PI waiters blocked on a rt_mutex held by this task: */
....@@ -1017,29 +1059,26 @@
10171059 /* Deadlock detection and priority inheritance handling: */
10181060 struct rt_mutex_waiter *pi_blocked_on;
10191061 #endif
1020
-#ifdef CONFIG_MM_EVENT_STAT
1021
- struct mm_event_task mm_event[MM_TYPE_NUM];
1022
- unsigned long next_period;
1023
-#endif
1062
+
10241063 #ifdef CONFIG_DEBUG_MUTEXES
10251064 /* Mutex deadlock detection: */
10261065 struct mutex_waiter *blocked_on;
10271066 #endif
10281067
1068
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1069
+ int non_block_count;
1070
+#endif
1071
+
10291072 #ifdef CONFIG_TRACE_IRQFLAGS
1030
- unsigned int irq_events;
1031
- unsigned long hardirq_enable_ip;
1032
- unsigned long hardirq_disable_ip;
1033
- unsigned int hardirq_enable_event;
1034
- unsigned int hardirq_disable_event;
1035
- int hardirqs_enabled;
1036
- int hardirq_context;
1037
- unsigned long softirq_disable_ip;
1038
- unsigned long softirq_enable_ip;
1039
- unsigned int softirq_disable_event;
1040
- unsigned int softirq_enable_event;
1073
+ struct irqtrace_events irqtrace;
1074
+ unsigned int hardirq_threaded;
1075
+ u64 hardirq_chain_key;
10411076 int softirqs_enabled;
10421077 int softirq_context;
1078
+ int irq_config;
1079
+#endif
1080
+#ifdef CONFIG_PREEMPT_RT
1081
+ int softirq_disable_cnt;
10431082 #endif
10441083
10451084 #ifdef CONFIG_LOCKDEP
....@@ -1050,7 +1089,7 @@
10501089 struct held_lock held_locks[MAX_LOCK_DEPTH];
10511090 #endif
10521091
1053
-#ifdef CONFIG_UBSAN
1092
+#if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP)
10541093 unsigned int in_ubsan;
10551094 #endif
10561095
....@@ -1072,9 +1111,12 @@
10721111
10731112 struct io_context *io_context;
10741113
1114
+#ifdef CONFIG_COMPACTION
1115
+ struct capture_control *capture_control;
1116
+#endif
10751117 /* Ptrace state: */
10761118 unsigned long ptrace_message;
1077
- siginfo_t *last_siginfo;
1119
+ kernel_siginfo_t *last_siginfo;
10781120
10791121 struct task_io_accounting ioac;
10801122 #ifdef CONFIG_PSI
....@@ -1093,7 +1135,7 @@
10931135 /* Protected by ->alloc_lock: */
10941136 nodemask_t mems_allowed;
10951137 /* Seqence number to catch updates: */
1096
- seqcount_t mems_allowed_seq;
1138
+ seqcount_spinlock_t mems_allowed_seq;
10971139 int cpuset_mem_spread_rotor;
10981140 int cpuset_slab_spread_rotor;
10991141 #endif
....@@ -1103,7 +1145,7 @@
11031145 /* cg_list protected by css_set_lock and tsk->alloc_lock: */
11041146 struct list_head cg_list;
11051147 #endif
1106
-#ifdef CONFIG_INTEL_RDT
1148
+#ifdef CONFIG_X86_CPU_RESCTRL
11071149 u32 closid;
11081150 u32 rmid;
11091151 #endif
....@@ -1114,6 +1156,8 @@
11141156 #endif
11151157 struct list_head pi_state_list;
11161158 struct futex_pi_state *pi_state_cache;
1159
+ struct mutex futex_exit_mutex;
1160
+ unsigned int futex_state;
11171161 #endif
11181162 #ifdef CONFIG_PERF_EVENTS
11191163 struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
....@@ -1181,7 +1225,6 @@
11811225
11821226 #ifdef CONFIG_RSEQ
11831227 struct rseq __user *rseq;
1184
- u32 rseq_len;
11851228 u32 rseq_sig;
11861229 /*
11871230 * RmW on rseq_event_mask must be performed atomically
....@@ -1192,7 +1235,10 @@
11921235
11931236 struct tlbflush_unmap_batch tlb_ubc;
11941237
1195
- struct rcu_head rcu;
1238
+ union {
1239
+ refcount_t rcu_users;
1240
+ struct rcu_head rcu;
1241
+ };
11961242
11971243 /* Cache last used pipe for splice(): */
11981244 struct pipe_inode_info *splice_pipe;
....@@ -1227,8 +1273,19 @@
12271273 u64 timer_slack_ns;
12281274 u64 default_timer_slack_ns;
12291275
1230
-#ifdef CONFIG_KASAN
1276
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
12311277 unsigned int kasan_depth;
1278
+#endif
1279
+
1280
+#ifdef CONFIG_KCSAN
1281
+ struct kcsan_ctx kcsan_ctx;
1282
+#ifdef CONFIG_TRACE_IRQFLAGS
1283
+ struct irqtrace_events kcsan_save_irqtrace;
1284
+#endif
1285
+#endif
1286
+
1287
+#if IS_ENABLED(CONFIG_KUNIT)
1288
+ struct kunit *kunit_test;
12321289 #endif
12331290
12341291 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
....@@ -1280,6 +1337,9 @@
12801337
12811338 /* KCOV sequence number: */
12821339 int kcov_sequence;
1340
+
1341
+ /* Collect coverage from softirq context: */
1342
+ unsigned int kcov_softirq;
12831343 #endif
12841344
12851345 #ifdef CONFIG_MEMCG
....@@ -1305,22 +1365,9 @@
13051365 unsigned int sequential_io;
13061366 unsigned int sequential_io_avg;
13071367 #endif
1308
-#ifdef CONFIG_PREEMPT_RT_BASE
1309
- struct rcu_head put_rcu;
1310
- int softirq_nestcnt;
1311
- unsigned int softirqs_raised;
1312
-#endif
1313
-#ifdef CONFIG_PREEMPT_RT_FULL
1314
-# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
1315
- int kmap_idx;
1316
- pte_t kmap_pte[KM_TYPE_NR];
1317
-# endif
1318
-#endif
1368
+ struct kmap_ctrl kmap_ctrl;
13191369 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
13201370 unsigned long task_state_change;
1321
-#endif
1322
-#ifdef CONFIG_PREEMPT_RT_FULL
1323
- int xmit_recursion;
13241371 #endif
13251372 int pagefault_disabled;
13261373 #ifdef CONFIG_MMU
....@@ -1331,7 +1378,7 @@
13311378 #endif
13321379 #ifdef CONFIG_THREAD_INFO_IN_TASK
13331380 /* A live task holds one reference: */
1334
- atomic_t stack_refcount;
1381
+ refcount_t stack_refcount;
13351382 #endif
13361383 #ifdef CONFIG_LIVEPATCH
13371384 int patch_state;
....@@ -1340,27 +1387,33 @@
13401387 /* Used by LSM modules for access restriction: */
13411388 void *security;
13421389 #endif
1343
- /* task is frozen/stopped (used by the cgroup freezer) */
1344
- ANDROID_KABI_USE(1, unsigned frozen:1);
13451390
1346
- /* 095444fad7e3 ("futex: Replace PF_EXITPIDONE with a state") */
1347
- ANDROID_KABI_USE(2, unsigned int futex_state);
1391
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
1392
+ unsigned long lowest_stack;
1393
+ unsigned long prev_lowest_stack;
1394
+#endif
13481395
1349
- /*
1350
- * f9b0c6c556db ("futex: Add mutex around futex exit")
1351
- * A struct mutex takes 32 bytes, or 4 64bit entries, so pick off
1352
- * 4 of the reserved members, and replace them with a struct mutex.
1353
- * Do the GENKSYMS hack to work around the CRC issues
1354
- */
1355
-#ifdef __GENKSYMS__
1396
+#ifdef CONFIG_X86_MCE
1397
+ void __user *mce_vaddr;
1398
+ __u64 mce_kflags;
1399
+ u64 mce_addr;
1400
+ __u64 mce_ripv : 1,
1401
+ mce_whole_page : 1,
1402
+ __mce_reserved : 62;
1403
+ struct callback_head mce_kill_me;
1404
+ int mce_count;
1405
+#endif
1406
+ ANDROID_VENDOR_DATA_ARRAY(1, 64);
1407
+ ANDROID_OEM_DATA_ARRAY(1, 32);
1408
+
1409
+ /* PF_IO_WORKER */
1410
+ ANDROID_KABI_USE(1, void *pf_io_worker);
1411
+
1412
+ ANDROID_KABI_RESERVE(2);
13561413 ANDROID_KABI_RESERVE(3);
13571414 ANDROID_KABI_RESERVE(4);
13581415 ANDROID_KABI_RESERVE(5);
13591416 ANDROID_KABI_RESERVE(6);
1360
-#else
1361
- struct mutex futex_exit_mutex;
1362
-#endif
1363
-
13641417 ANDROID_KABI_RESERVE(7);
13651418 ANDROID_KABI_RESERVE(8);
13661419
....@@ -1538,10 +1591,10 @@
15381591 /*
15391592 * Per process flags
15401593 */
1541
-#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */
1594
+#define PF_VCPU 0x00000001 /* I'm a virtual CPU */
15421595 #define PF_IDLE 0x00000002 /* I am an IDLE thread */
15431596 #define PF_EXITING 0x00000004 /* Getting shut down */
1544
-#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
1597
+#define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
15451598 #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
15461599 #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
15471600 #define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */
....@@ -1556,14 +1609,14 @@
15561609 #define PF_KSWAPD 0x00020000 /* I am kswapd */
15571610 #define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */
15581611 #define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */
1559
-#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
1612
+#define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to,
1613
+ * I am cleaning dirty pages from some other bdi. */
15601614 #define PF_KTHREAD 0x00200000 /* I am a kernel thread */
15611615 #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
15621616 #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
1563
-#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */
1564
-#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
1617
+#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
15651618 #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1566
-#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
1619
+#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
15671620 #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
15681621 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
15691622
....@@ -1613,6 +1666,7 @@
16131666 #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
16141667 #define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
16151668 #define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
1669
+#define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */
16161670
16171671 #define TASK_PFA_TEST(name, func) \
16181672 static inline bool task_##func(struct task_struct *p) \
....@@ -1641,6 +1695,10 @@
16411695 TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
16421696 TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
16431697
1698
+TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1699
+TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1700
+TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1701
+
16441702 TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
16451703 TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
16461704
....@@ -1659,10 +1717,21 @@
16591717 }
16601718
16611719 extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
1662
-extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
1720
+extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus);
1721
+
1722
+#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION
1723
+extern bool cpupri_check_rt(void);
1724
+#else
1725
+static inline bool cpupri_check_rt(void)
1726
+{
1727
+ return false;
1728
+}
1729
+#endif
1730
+
16631731 #ifdef CONFIG_SMP
16641732 extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
16651733 extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
1734
+extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
16661735 #else
16671736 static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
16681737 {
....@@ -1673,10 +1742,6 @@
16731742 return -EINVAL;
16741743 return 0;
16751744 }
1676
-#endif
1677
-
1678
-#ifndef cpu_relax_yield
1679
-#define cpu_relax_yield() cpu_relax()
16801745 #endif
16811746
16821747 extern int yield_to(struct task_struct *p, bool preempt);
....@@ -1700,6 +1765,9 @@
17001765 extern int available_idle_cpu(int cpu);
17011766 extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
17021767 extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
1768
+extern void sched_set_fifo(struct task_struct *p);
1769
+extern void sched_set_fifo_low(struct task_struct *p);
1770
+extern void sched_set_normal(struct task_struct *p, int nice);
17031771 extern int sched_setattr(struct task_struct *, const struct sched_attr *);
17041772 extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *);
17051773 extern struct task_struct *idle_task(int cpu);
....@@ -1710,7 +1778,7 @@
17101778 *
17111779 * Return: 1 if @p is an idle task. 0 otherwise.
17121780 */
1713
-static inline bool is_idle_task(const struct task_struct *p)
1781
+static __always_inline bool is_idle_task(const struct task_struct *p)
17141782 {
17151783 return !!(p->flags & PF_IDLE);
17161784 }
....@@ -1789,7 +1857,15 @@
17891857 })
17901858
17911859 #ifdef CONFIG_SMP
1792
-void scheduler_ipi(void);
1860
+static __always_inline void scheduler_ipi(void)
1861
+{
1862
+ /*
1863
+ * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
1864
+ * TIF_NEED_RESCHED remotely (for the first time) will also send
1865
+ * this IPI.
1866
+ */
1867
+ preempt_fold_need_resched();
1868
+}
17931869 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
17941870 #else
17951871 static inline void scheduler_ipi(void) { }
....@@ -1891,7 +1967,7 @@
18911967 {
18921968 if (task->state & (__TASK_STOPPED | __TASK_TRACED))
18931969 return true;
1894
-#ifdef CONFIG_PREEMPT_RT_FULL
1970
+#ifdef CONFIG_PREEMPT_RT
18951971 if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
18961972 return true;
18971973 #endif
....@@ -1902,7 +1978,7 @@
19021978 {
19031979 bool traced_stopped;
19041980
1905
-#ifdef CONFIG_PREEMPT_RT_FULL
1981
+#ifdef CONFIG_PREEMPT_RT
19061982 unsigned long flags;
19071983
19081984 raw_spin_lock_irqsave(&task->pi_lock, flags);
....@@ -1920,7 +1996,7 @@
19201996
19211997 if (task->state & __TASK_TRACED)
19221998 return true;
1923
-#ifdef CONFIG_PREEMPT_RT_FULL
1999
+#ifdef CONFIG_PREEMPT_RT
19242000 /* in case the task is sleeping on tasklist_lock */
19252001 raw_spin_lock_irq(&task->pi_lock);
19262002 if (task->state & __TASK_TRACED)
....@@ -1938,7 +2014,7 @@
19382014 * value indicates whether a reschedule was done in fact.
19392015 * cond_resched_lock() will drop the spinlock before scheduling,
19402016 */
1941
-#ifndef CONFIG_PREEMPT
2017
+#ifndef CONFIG_PREEMPTION
19422018 extern int _cond_resched(void);
19432019 #else
19442020 static inline int _cond_resched(void) { return 0; }
....@@ -1967,12 +2043,12 @@
19672043
19682044 /*
19692045 * Does a critical section need to be broken due to another
1970
- * task waiting?: (technically does not depend on CONFIG_PREEMPT,
2046
+ * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
19712047 * but a general need for low latency)
19722048 */
19732049 static inline int spin_needbreak(spinlock_t *lock)
19742050 {
1975
-#ifdef CONFIG_PREEMPT
2051
+#ifdef CONFIG_PREEMPTION
19762052 return spin_is_contended(lock);
19772053 #else
19782054 return 0;
....@@ -1983,23 +2059,6 @@
19832059 {
19842060 return unlikely(tif_need_resched());
19852061 }
1986
-
1987
-#ifdef CONFIG_PREEMPT_RT_FULL
1988
-static inline void sleeping_lock_inc(void)
1989
-{
1990
- current->sleeping_lock++;
1991
-}
1992
-
1993
-static inline void sleeping_lock_dec(void)
1994
-{
1995
- current->sleeping_lock--;
1996
-}
1997
-
1998
-#else
1999
-
2000
-static inline void sleeping_lock_inc(void) { }
2001
-static inline void sleeping_lock_dec(void) { }
2002
-#endif
20032062
20042063 /*
20052064 * Wrappers for p->thread_info->cpu access. No-op on UP.
....@@ -2039,7 +2098,10 @@
20392098 * running or not.
20402099 */
20412100 #ifndef vcpu_is_preempted
2042
-# define vcpu_is_preempted(cpu) false
2101
+static inline bool vcpu_is_preempted(int cpu)
2102
+{
2103
+ return false;
2104
+}
20432105 #endif
20442106
20452107 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
....@@ -2113,12 +2175,10 @@
21132175 {
21142176 if (clone_flags & CLONE_VM) {
21152177 t->rseq = NULL;
2116
- t->rseq_len = 0;
21172178 t->rseq_sig = 0;
21182179 t->rseq_event_mask = 0;
21192180 } else {
21202181 t->rseq = current->rseq;
2121
- t->rseq_len = current->rseq_len;
21222182 t->rseq_sig = current->rseq_sig;
21232183 t->rseq_event_mask = current->rseq_event_mask;
21242184 }
....@@ -2127,7 +2187,6 @@
21272187 static inline void rseq_execve(struct task_struct *t)
21282188 {
21292189 t->rseq = NULL;
2130
- t->rseq_len = 0;
21312190 t->rseq_sig = 0;
21322191 t->rseq_event_mask = 0;
21332192 }
....@@ -2172,6 +2231,18 @@
21722231
21732232 #endif
21742233
2175
-extern struct task_struct *takedown_cpu_task;
2234
+const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
2235
+char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
2236
+int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
2237
+
2238
+const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq);
2239
+const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq);
2240
+const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq);
2241
+
2242
+int sched_trace_rq_cpu(struct rq *rq);
2243
+int sched_trace_rq_cpu_capacity(struct rq *rq);
2244
+int sched_trace_rq_nr_running(struct rq *rq);
2245
+
2246
+const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
21762247
21772248 #endif