| .. | .. | 
|---|
| 3 | 3 |   * Implement CPU time clocks for the POSIX clock interface. | 
|---|
| 4 | 4 |   */ | 
|---|
| 5 | 5 |   | 
|---|
| 6 |  | -#include <uapi/linux/sched/types.h>  | 
|---|
| 7 | 6 |  #include <linux/sched/signal.h> | 
|---|
| 8 | 7 |  #include <linux/sched/cputime.h> | 
|---|
| 9 |  | -#include <linux/sched/rt.h>  | 
|---|
| 10 | 8 |  #include <linux/posix-timers.h> | 
|---|
| 11 | 9 |  #include <linux/errno.h> | 
|---|
| 12 | 10 |  #include <linux/math64.h> | 
|---|
| .. | .. | 
|---|
| 17 | 15 |  #include <linux/workqueue.h> | 
|---|
| 18 | 16 |  #include <linux/compat.h> | 
|---|
| 19 | 17 |  #include <linux/sched/deadline.h> | 
|---|
| 20 |  | -#include <linux/smpboot.h>  | 
|---|
| 21 | 18 |   | 
|---|
| 22 | 19 |  #include "posix-timers.h" | 
|---|
| 23 | 20 |   | 
|---|
| 24 | 21 |  static void posix_cpu_timer_rearm(struct k_itimer *timer); | 
|---|
| 25 | 22 |   | 
|---|
 | 23 | +void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)  | 
|---|
 | 24 | +{  | 
|---|
 | 25 | +	posix_cputimers_init(pct);  | 
|---|
 | 26 | +	if (cpu_limit != RLIM_INFINITY) {  | 
|---|
 | 27 | +		pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;  | 
|---|
 | 28 | +		pct->timers_active = true;  | 
|---|
 | 29 | +	}  | 
|---|
 | 30 | +}  | 
|---|
 | 31 | +  | 
|---|
| 26 | 32 |  /* | 
|---|
| 27 | 33 |   * Called after updating RLIMIT_CPU to run cpu timer and update | 
|---|
| 28 |  | - * tsk->signal->cputime_expires expiration cache if necessary. Needs  | 
|---|
| 29 |  | - * siglock protection since other code may update expiration cache as  | 
|---|
| 30 |  | - * well.  | 
|---|
 | 34 | + * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if  | 
|---|
 | 35 | + * necessary. Needs siglock protection since other code may update the  | 
|---|
 | 36 | + * expiration cache as well.  | 
|---|
| 31 | 37 |   */ | 
|---|
| 32 | 38 |  void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) | 
|---|
| 33 | 39 |  { | 
|---|
| .. | .. | 
|---|
| 38 | 44 |  	spin_unlock_irq(&task->sighand->siglock); | 
|---|
| 39 | 45 |  } | 
|---|
| 40 | 46 |   | 
|---|
| 41 |  | -static int check_clock(const clockid_t which_clock)  | 
|---|
 | 47 | +/*  | 
|---|
 | 48 | + * Functions for validating access to tasks.  | 
|---|
 | 49 | + */  | 
|---|
 | 50 | +static struct pid *pid_for_clock(const clockid_t clock, bool gettime)  | 
|---|
| 42 | 51 |  { | 
|---|
| 43 |  | -	int error = 0;  | 
|---|
| 44 |  | -	struct task_struct *p;  | 
|---|
| 45 |  | -	const pid_t pid = CPUCLOCK_PID(which_clock);  | 
|---|
 | 52 | +	const bool thread = !!CPUCLOCK_PERTHREAD(clock);  | 
|---|
 | 53 | +	const pid_t upid = CPUCLOCK_PID(clock);  | 
|---|
 | 54 | +	struct pid *pid;  | 
|---|
| 46 | 55 |   | 
|---|
| 47 |  | -	if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)  | 
|---|
| 48 |  | -		return -EINVAL;  | 
|---|
 | 56 | +	if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)  | 
|---|
 | 57 | +		return NULL;  | 
|---|
| 49 | 58 |   | 
|---|
| 50 |  | -	if (pid == 0)  | 
|---|
| 51 |  | -		return 0;  | 
|---|
 | 59 | +	/*  | 
|---|
 | 60 | +	 * If the encoded PID is 0, then the timer is targeted at current  | 
|---|
 | 61 | +	 * or the process to which current belongs.  | 
|---|
 | 62 | +	 */  | 
|---|
 | 63 | +	if (upid == 0)  | 
|---|
 | 64 | +		return thread ? task_pid(current) : task_tgid(current);  | 
|---|
 | 65 | +  | 
|---|
 | 66 | +	pid = find_vpid(upid);  | 
|---|
 | 67 | +	if (!pid)  | 
|---|
 | 68 | +		return NULL;  | 
|---|
 | 69 | +  | 
|---|
 | 70 | +	if (thread) {  | 
|---|
 | 71 | +		struct task_struct *tsk = pid_task(pid, PIDTYPE_PID);  | 
|---|
 | 72 | +		return (tsk && same_thread_group(tsk, current)) ? pid : NULL;  | 
|---|
 | 73 | +	}  | 
|---|
 | 74 | +  | 
|---|
 | 75 | +	/*  | 
|---|
 | 76 | +	 * For clock_gettime(PROCESS) allow finding the process by  | 
|---|
 | 77 | +	 * with the pid of the current task.  The code needs the tgid  | 
|---|
 | 78 | +	 * of the process so that pid_task(pid, PIDTYPE_TGID) can be  | 
|---|
 | 79 | +	 * used to find the process.  | 
|---|
 | 80 | +	 */  | 
|---|
 | 81 | +	if (gettime && (pid == task_pid(current)))  | 
|---|
 | 82 | +		return task_tgid(current);  | 
|---|
 | 83 | +  | 
|---|
 | 84 | +	/*  | 
|---|
 | 85 | +	 * For processes require that pid identifies a process.  | 
|---|
 | 86 | +	 */  | 
|---|
 | 87 | +	return pid_has_task(pid, PIDTYPE_TGID) ? pid : NULL;  | 
|---|
 | 88 | +}  | 
|---|
 | 89 | +  | 
|---|
 | 90 | +static inline int validate_clock_permissions(const clockid_t clock)  | 
|---|
 | 91 | +{  | 
|---|
 | 92 | +	int ret;  | 
|---|
| 52 | 93 |   | 
|---|
| 53 | 94 |  	rcu_read_lock(); | 
|---|
| 54 |  | -	p = find_task_by_vpid(pid);  | 
|---|
| 55 |  | -	if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?  | 
|---|
| 56 |  | -		   same_thread_group(p, current) : has_group_leader_pid(p))) {  | 
|---|
| 57 |  | -		error = -EINVAL;  | 
|---|
| 58 |  | -	}  | 
|---|
 | 95 | +	ret = pid_for_clock(clock, false) ? 0 : -EINVAL;  | 
|---|
| 59 | 96 |  	rcu_read_unlock(); | 
|---|
| 60 | 97 |   | 
|---|
| 61 |  | -	return error;  | 
|---|
 | 98 | +	return ret;  | 
|---|
 | 99 | +}  | 
|---|
 | 100 | +  | 
|---|
 | 101 | +static inline enum pid_type clock_pid_type(const clockid_t clock)  | 
|---|
 | 102 | +{  | 
|---|
 | 103 | +	return CPUCLOCK_PERTHREAD(clock) ? PIDTYPE_PID : PIDTYPE_TGID;  | 
|---|
 | 104 | +}  | 
|---|
 | 105 | +  | 
|---|
 | 106 | +static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer)  | 
|---|
 | 107 | +{  | 
|---|
 | 108 | +	return pid_task(timer->it.cpu.pid, clock_pid_type(timer->it_clock));  | 
|---|
| 62 | 109 |  } | 
|---|
| 63 | 110 |   | 
|---|
| 64 | 111 |  /* | 
|---|
| 65 | 112 |   * Update expiry time from increment, and increase overrun count, | 
|---|
| 66 | 113 |   * given the current clock sample. | 
|---|
| 67 | 114 |   */ | 
|---|
| 68 |  | -static void bump_cpu_timer(struct k_itimer *timer, u64 now)  | 
|---|
 | 115 | +static u64 bump_cpu_timer(struct k_itimer *timer, u64 now)  | 
|---|
| 69 | 116 |  { | 
|---|
 | 117 | +	u64 delta, incr, expires = timer->it.cpu.node.expires;  | 
|---|
| 70 | 118 |  	int i; | 
|---|
| 71 |  | -	u64 delta, incr;  | 
|---|
| 72 | 119 |   | 
|---|
| 73 |  | -	if (timer->it.cpu.incr == 0)  | 
|---|
| 74 |  | -		return;  | 
|---|
 | 120 | +	if (!timer->it_interval)  | 
|---|
 | 121 | +		return expires;  | 
|---|
| 75 | 122 |   | 
|---|
| 76 |  | -	if (now < timer->it.cpu.expires)  | 
|---|
| 77 |  | -		return;  | 
|---|
 | 123 | +	if (now < expires)  | 
|---|
 | 124 | +		return expires;  | 
|---|
| 78 | 125 |   | 
|---|
| 79 |  | -	incr = timer->it.cpu.incr;  | 
|---|
| 80 |  | -	delta = now + incr - timer->it.cpu.expires;  | 
|---|
 | 126 | +	incr = timer->it_interval;  | 
|---|
 | 127 | +	delta = now + incr - expires;  | 
|---|
| 81 | 128 |   | 
|---|
| 82 | 129 |  	/* Don't use (incr*2 < delta), incr*2 might overflow. */ | 
|---|
| 83 | 130 |  	for (i = 0; incr < delta - incr; i++) | 
|---|
| .. | .. | 
|---|
| 87 | 134 |  		if (delta < incr) | 
|---|
| 88 | 135 |  			continue; | 
|---|
| 89 | 136 |   | 
|---|
| 90 |  | -		timer->it.cpu.expires += incr;  | 
|---|
 | 137 | +		timer->it.cpu.node.expires += incr;  | 
|---|
| 91 | 138 |  		timer->it_overrun += 1LL << i; | 
|---|
| 92 | 139 |  		delta -= incr; | 
|---|
| 93 | 140 |  	} | 
|---|
 | 141 | +	return timer->it.cpu.node.expires;  | 
|---|
| 94 | 142 |  } | 
|---|
| 95 | 143 |   | 
|---|
| 96 |  | -/**  | 
|---|
| 97 |  | - * task_cputime_zero - Check a task_cputime struct for all zero fields.  | 
|---|
| 98 |  | - *  | 
|---|
| 99 |  | - * @cputime:	The struct to compare.  | 
|---|
| 100 |  | - *  | 
|---|
| 101 |  | - * Checks @cputime to see if all fields are zero.  Returns true if all fields  | 
|---|
| 102 |  | - * are zero, false if any field is nonzero.  | 
|---|
| 103 |  | - */  | 
|---|
| 104 |  | -static inline int task_cputime_zero(const struct task_cputime *cputime)  | 
|---|
 | 144 | +/* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */  | 
|---|
 | 145 | +static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct)  | 
|---|
| 105 | 146 |  { | 
|---|
| 106 |  | -	if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)  | 
|---|
| 107 |  | -		return 1;  | 
|---|
| 108 |  | -	return 0;  | 
|---|
| 109 |  | -}  | 
|---|
| 110 |  | -  | 
|---|
| 111 |  | -static inline u64 prof_ticks(struct task_struct *p)  | 
|---|
| 112 |  | -{  | 
|---|
| 113 |  | -	u64 utime, stime;  | 
|---|
| 114 |  | -  | 
|---|
| 115 |  | -	task_cputime(p, &utime, &stime);  | 
|---|
| 116 |  | -  | 
|---|
| 117 |  | -	return utime + stime;  | 
|---|
| 118 |  | -}  | 
|---|
| 119 |  | -static inline u64 virt_ticks(struct task_struct *p)  | 
|---|
| 120 |  | -{  | 
|---|
| 121 |  | -	u64 utime, stime;  | 
|---|
| 122 |  | -  | 
|---|
| 123 |  | -	task_cputime(p, &utime, &stime);  | 
|---|
| 124 |  | -  | 
|---|
| 125 |  | -	return utime;  | 
|---|
 | 147 | +	return !(~pct->bases[CPUCLOCK_PROF].nextevt |  | 
|---|
 | 148 | +		 ~pct->bases[CPUCLOCK_VIRT].nextevt |  | 
|---|
 | 149 | +		 ~pct->bases[CPUCLOCK_SCHED].nextevt);  | 
|---|
| 126 | 150 |  } | 
|---|
| 127 | 151 |   | 
|---|
| 128 | 152 |  static int | 
|---|
| 129 | 153 |  posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) | 
|---|
| 130 | 154 |  { | 
|---|
| 131 |  | -	int error = check_clock(which_clock);  | 
|---|
 | 155 | +	int error = validate_clock_permissions(which_clock);  | 
|---|
 | 156 | +  | 
|---|
| 132 | 157 |  	if (!error) { | 
|---|
| 133 | 158 |  		tp->tv_sec = 0; | 
|---|
| 134 | 159 |  		tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ); | 
|---|
| .. | .. | 
|---|
| 145 | 170 |  } | 
|---|
| 146 | 171 |   | 
|---|
| 147 | 172 |  static int | 
|---|
| 148 |  | -posix_cpu_clock_set(const clockid_t which_clock, const struct timespec64 *tp)  | 
|---|
 | 173 | +posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp)  | 
|---|
| 149 | 174 |  { | 
|---|
 | 175 | +	int error = validate_clock_permissions(clock);  | 
|---|
 | 176 | +  | 
|---|
| 150 | 177 |  	/* | 
|---|
| 151 | 178 |  	 * You can never reset a CPU clock, but we check for other errors | 
|---|
| 152 | 179 |  	 * in the call before failing with EPERM. | 
|---|
| 153 | 180 |  	 */ | 
|---|
| 154 |  | -	int error = check_clock(which_clock);  | 
|---|
| 155 |  | -	if (error == 0) {  | 
|---|
| 156 |  | -		error = -EPERM;  | 
|---|
| 157 |  | -	}  | 
|---|
| 158 |  | -	return error;  | 
|---|
 | 181 | +	return error ? : -EPERM;  | 
|---|
| 159 | 182 |  } | 
|---|
| 160 | 183 |   | 
|---|
| 161 |  | -  | 
|---|
| 162 | 184 |  /* | 
|---|
| 163 |  | - * Sample a per-thread clock for the given task.  | 
|---|
 | 185 | + * Sample a per-thread clock for the given task. clkid is validated.  | 
|---|
| 164 | 186 |   */ | 
|---|
| 165 |  | -static int cpu_clock_sample(const clockid_t which_clock,  | 
|---|
| 166 |  | -			    struct task_struct *p, u64 *sample)  | 
|---|
 | 187 | +static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p)  | 
|---|
| 167 | 188 |  { | 
|---|
| 168 |  | -	switch (CPUCLOCK_WHICH(which_clock)) {  | 
|---|
| 169 |  | -	default:  | 
|---|
| 170 |  | -		return -EINVAL;  | 
|---|
 | 189 | +	u64 utime, stime;  | 
|---|
 | 190 | +  | 
|---|
 | 191 | +	if (clkid == CPUCLOCK_SCHED)  | 
|---|
 | 192 | +		return task_sched_runtime(p);  | 
|---|
 | 193 | +  | 
|---|
 | 194 | +	task_cputime(p, &utime, &stime);  | 
|---|
 | 195 | +  | 
|---|
 | 196 | +	switch (clkid) {  | 
|---|
| 171 | 197 |  	case CPUCLOCK_PROF: | 
|---|
| 172 |  | -		*sample = prof_ticks(p);  | 
|---|
| 173 |  | -		break;  | 
|---|
 | 198 | +		return utime + stime;  | 
|---|
| 174 | 199 |  	case CPUCLOCK_VIRT: | 
|---|
| 175 |  | -		*sample = virt_ticks(p);  | 
|---|
| 176 |  | -		break;  | 
|---|
| 177 |  | -	case CPUCLOCK_SCHED:  | 
|---|
| 178 |  | -		*sample = task_sched_runtime(p);  | 
|---|
| 179 |  | -		break;  | 
|---|
 | 200 | +		return utime;  | 
|---|
 | 201 | +	default:  | 
|---|
 | 202 | +		WARN_ON_ONCE(1);  | 
|---|
| 180 | 203 |  	} | 
|---|
| 181 | 204 |  	return 0; | 
|---|
 | 205 | +}  | 
|---|
 | 206 | +  | 
|---|
 | 207 | +static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime)  | 
|---|
 | 208 | +{  | 
|---|
 | 209 | +	samples[CPUCLOCK_PROF] = stime + utime;  | 
|---|
 | 210 | +	samples[CPUCLOCK_VIRT] = utime;  | 
|---|
 | 211 | +	samples[CPUCLOCK_SCHED] = rtime;  | 
|---|
 | 212 | +}  | 
|---|
 | 213 | +  | 
|---|
 | 214 | +static void task_sample_cputime(struct task_struct *p, u64 *samples)  | 
|---|
 | 215 | +{  | 
|---|
 | 216 | +	u64 stime, utime;  | 
|---|
 | 217 | +  | 
|---|
 | 218 | +	task_cputime(p, &utime, &stime);  | 
|---|
 | 219 | +	store_samples(samples, stime, utime, p->se.sum_exec_runtime);  | 
|---|
 | 220 | +}  | 
|---|
 | 221 | +  | 
|---|
 | 222 | +static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,  | 
|---|
 | 223 | +				       u64 *samples)  | 
|---|
 | 224 | +{  | 
|---|
 | 225 | +	u64 stime, utime, rtime;  | 
|---|
 | 226 | +  | 
|---|
 | 227 | +	utime = atomic64_read(&at->utime);  | 
|---|
 | 228 | +	stime = atomic64_read(&at->stime);  | 
|---|
 | 229 | +	rtime = atomic64_read(&at->sum_exec_runtime);  | 
|---|
 | 230 | +	store_samples(samples, stime, utime, rtime);  | 
|---|
| 182 | 231 |  } | 
|---|
| 183 | 232 |   | 
|---|
| 184 | 233 |  /* | 
|---|
| .. | .. | 
|---|
| 196 | 245 |  	} | 
|---|
| 197 | 246 |  } | 
|---|
| 198 | 247 |   | 
|---|
| 199 |  | -static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum)  | 
|---|
 | 248 | +static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,  | 
|---|
 | 249 | +			      struct task_cputime *sum)  | 
|---|
| 200 | 250 |  { | 
|---|
| 201 | 251 |  	__update_gt_cputime(&cputime_atomic->utime, sum->utime); | 
|---|
| 202 | 252 |  	__update_gt_cputime(&cputime_atomic->stime, sum->stime); | 
|---|
| 203 | 253 |  	__update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime); | 
|---|
| 204 | 254 |  } | 
|---|
| 205 | 255 |   | 
|---|
| 206 |  | -/* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */  | 
|---|
| 207 |  | -static inline void sample_cputime_atomic(struct task_cputime *times,  | 
|---|
| 208 |  | -					 struct task_cputime_atomic *atomic_times)  | 
|---|
| 209 |  | -{  | 
|---|
| 210 |  | -	times->utime = atomic64_read(&atomic_times->utime);  | 
|---|
| 211 |  | -	times->stime = atomic64_read(&atomic_times->stime);  | 
|---|
| 212 |  | -	times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime);  | 
|---|
| 213 |  | -}  | 
|---|
| 214 |  | -  | 
|---|
| 215 |  | -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)  | 
|---|
 | 256 | +/**  | 
|---|
 | 257 | + * thread_group_sample_cputime - Sample cputime for a given task  | 
|---|
 | 258 | + * @tsk:	Task for which cputime needs to be started  | 
|---|
 | 259 | + * @samples:	Storage for time samples  | 
|---|
 | 260 | + *  | 
|---|
 | 261 | + * Called from sys_getitimer() to calculate the expiry time of an active  | 
|---|
 | 262 | + * timer. That means group cputime accounting is already active. Called  | 
|---|
 | 263 | + * with task sighand lock held.  | 
|---|
 | 264 | + *  | 
|---|
 | 265 | + * Updates @times with an uptodate sample of the thread group cputimes.  | 
|---|
 | 266 | + */  | 
|---|
 | 267 | +void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples)  | 
|---|
| 216 | 268 |  { | 
|---|
| 217 | 269 |  	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 
|---|
| 218 |  | -	struct task_cputime sum;  | 
|---|
 | 270 | +	struct posix_cputimers *pct = &tsk->signal->posix_cputimers;  | 
|---|
 | 271 | +  | 
|---|
 | 272 | +	WARN_ON_ONCE(!pct->timers_active);  | 
|---|
 | 273 | +  | 
|---|
 | 274 | +	proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);  | 
|---|
 | 275 | +}  | 
|---|
 | 276 | +  | 
|---|
 | 277 | +/**  | 
|---|
 | 278 | + * thread_group_start_cputime - Start cputime and return a sample  | 
|---|
 | 279 | + * @tsk:	Task for which cputime needs to be started  | 
|---|
 | 280 | + * @samples:	Storage for time samples  | 
|---|
 | 281 | + *  | 
|---|
 | 282 | + * The thread group cputime accouting is avoided when there are no posix  | 
|---|
 | 283 | + * CPU timers armed. Before starting a timer it's required to check whether  | 
|---|
 | 284 | + * the time accounting is active. If not, a full update of the atomic  | 
|---|
 | 285 | + * accounting store needs to be done and the accounting enabled.  | 
|---|
 | 286 | + *  | 
|---|
 | 287 | + * Updates @times with an uptodate sample of the thread group cputimes.  | 
|---|
 | 288 | + */  | 
|---|
 | 289 | +static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)  | 
|---|
 | 290 | +{  | 
|---|
 | 291 | +	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;  | 
|---|
 | 292 | +	struct posix_cputimers *pct = &tsk->signal->posix_cputimers;  | 
|---|
| 219 | 293 |   | 
|---|
| 220 | 294 |  	/* Check if cputimer isn't running. This is accessed without locking. */ | 
|---|
| 221 |  | -	if (!READ_ONCE(cputimer->running)) {  | 
|---|
 | 295 | +	if (!READ_ONCE(pct->timers_active)) {  | 
|---|
 | 296 | +		struct task_cputime sum;  | 
|---|
 | 297 | +  | 
|---|
| 222 | 298 |  		/* | 
|---|
| 223 | 299 |  		 * The POSIX timer interface allows for absolute time expiry | 
|---|
| 224 | 300 |  		 * values through the TIMER_ABSTIME flag, therefore we have | 
|---|
| .. | .. | 
|---|
| 228 | 304 |  		update_gt_cputime(&cputimer->cputime_atomic, &sum); | 
|---|
| 229 | 305 |   | 
|---|
| 230 | 306 |  		/* | 
|---|
| 231 |  | -		 * We're setting cputimer->running without a lock. Ensure  | 
|---|
| 232 |  | -		 * this only gets written to in one operation. We set  | 
|---|
| 233 |  | -		 * running after update_gt_cputime() as a small optimization,  | 
|---|
| 234 |  | -		 * but barriers are not required because update_gt_cputime()  | 
|---|
 | 307 | +		 * We're setting timers_active without a lock. Ensure this  | 
|---|
 | 308 | +		 * only gets written to in one operation. We set it after  | 
|---|
 | 309 | +		 * update_gt_cputime() as a small optimization, but  | 
|---|
 | 310 | +		 * barriers are not required because update_gt_cputime()  | 
|---|
| 235 | 311 |  		 * can handle concurrent updates. | 
|---|
| 236 | 312 |  		 */ | 
|---|
| 237 |  | -		WRITE_ONCE(cputimer->running, true);  | 
|---|
 | 313 | +		WRITE_ONCE(pct->timers_active, true);  | 
|---|
| 238 | 314 |  	} | 
|---|
| 239 |  | -	sample_cputime_atomic(times, &cputimer->cputime_atomic);  | 
|---|
 | 315 | +	proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);  | 
|---|
 | 316 | +}  | 
|---|
 | 317 | +  | 
|---|
 | 318 | +static void __thread_group_cputime(struct task_struct *tsk, u64 *samples)  | 
|---|
 | 319 | +{  | 
|---|
 | 320 | +	struct task_cputime ct;  | 
|---|
 | 321 | +  | 
|---|
 | 322 | +	thread_group_cputime(tsk, &ct);  | 
|---|
 | 323 | +	store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime);  | 
|---|
| 240 | 324 |  } | 
|---|
| 241 | 325 |   | 
|---|
| 242 | 326 |  /* | 
|---|
| 243 |  | - * Sample a process (thread group) clock for the given group_leader task.  | 
|---|
| 244 |  | - * Must be called with task sighand lock held for safe while_each_thread()  | 
|---|
| 245 |  | - * traversal.  | 
|---|
 | 327 | + * Sample a process (thread group) clock for the given task clkid. If the  | 
|---|
 | 328 | + * group's cputime accounting is already enabled, read the atomic  | 
|---|
 | 329 | + * store. Otherwise a full update is required.  clkid is already validated.  | 
|---|
| 246 | 330 |   */ | 
|---|
| 247 |  | -static int cpu_clock_sample_group(const clockid_t which_clock,  | 
|---|
| 248 |  | -				  struct task_struct *p,  | 
|---|
| 249 |  | -				  u64 *sample)  | 
|---|
 | 331 | +static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p,  | 
|---|
 | 332 | +				  bool start)  | 
|---|
| 250 | 333 |  { | 
|---|
| 251 |  | -	struct task_cputime cputime;  | 
|---|
 | 334 | +	struct thread_group_cputimer *cputimer = &p->signal->cputimer;  | 
|---|
 | 335 | +	struct posix_cputimers *pct = &p->signal->posix_cputimers;  | 
|---|
 | 336 | +	u64 samples[CPUCLOCK_MAX];  | 
|---|
| 252 | 337 |   | 
|---|
| 253 |  | -	switch (CPUCLOCK_WHICH(which_clock)) {  | 
|---|
| 254 |  | -	default:  | 
|---|
| 255 |  | -		return -EINVAL;  | 
|---|
| 256 |  | -	case CPUCLOCK_PROF:  | 
|---|
| 257 |  | -		thread_group_cputime(p, &cputime);  | 
|---|
| 258 |  | -		*sample = cputime.utime + cputime.stime;  | 
|---|
| 259 |  | -		break;  | 
|---|
| 260 |  | -	case CPUCLOCK_VIRT:  | 
|---|
| 261 |  | -		thread_group_cputime(p, &cputime);  | 
|---|
| 262 |  | -		*sample = cputime.utime;  | 
|---|
| 263 |  | -		break;  | 
|---|
| 264 |  | -	case CPUCLOCK_SCHED:  | 
|---|
| 265 |  | -		thread_group_cputime(p, &cputime);  | 
|---|
| 266 |  | -		*sample = cputime.sum_exec_runtime;  | 
|---|
| 267 |  | -		break;  | 
|---|
| 268 |  | -	}  | 
|---|
| 269 |  | -	return 0;  | 
|---|
| 270 |  | -}  | 
|---|
| 271 |  | -  | 
|---|
| 272 |  | -static int posix_cpu_clock_get_task(struct task_struct *tsk,  | 
|---|
| 273 |  | -				    const clockid_t which_clock,  | 
|---|
| 274 |  | -				    struct timespec64 *tp)  | 
|---|
| 275 |  | -{  | 
|---|
| 276 |  | -	int err = -EINVAL;  | 
|---|
| 277 |  | -	u64 rtn;  | 
|---|
| 278 |  | -  | 
|---|
| 279 |  | -	if (CPUCLOCK_PERTHREAD(which_clock)) {  | 
|---|
| 280 |  | -		if (same_thread_group(tsk, current))  | 
|---|
| 281 |  | -			err = cpu_clock_sample(which_clock, tsk, &rtn);  | 
|---|
 | 338 | +	if (!READ_ONCE(pct->timers_active)) {  | 
|---|
 | 339 | +		if (start)  | 
|---|
 | 340 | +			thread_group_start_cputime(p, samples);  | 
|---|
 | 341 | +		else  | 
|---|
 | 342 | +			__thread_group_cputime(p, samples);  | 
|---|
| 282 | 343 |  	} else { | 
|---|
| 283 |  | -		if (tsk == current || thread_group_leader(tsk))  | 
|---|
| 284 |  | -			err = cpu_clock_sample_group(which_clock, tsk, &rtn);  | 
|---|
 | 344 | +		proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);  | 
|---|
| 285 | 345 |  	} | 
|---|
| 286 | 346 |   | 
|---|
| 287 |  | -	if (!err)  | 
|---|
| 288 |  | -		*tp = ns_to_timespec64(rtn);  | 
|---|
| 289 |  | -  | 
|---|
| 290 |  | -	return err;  | 
|---|
 | 347 | +	return samples[clkid];  | 
|---|
| 291 | 348 |  } | 
|---|
| 292 | 349 |   | 
|---|
| 293 |  | -  | 
|---|
| 294 |  | -static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *tp)  | 
|---|
 | 350 | +static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)  | 
|---|
| 295 | 351 |  { | 
|---|
| 296 |  | -	const pid_t pid = CPUCLOCK_PID(which_clock);  | 
|---|
| 297 |  | -	int err = -EINVAL;  | 
|---|
 | 352 | +	const clockid_t clkid = CPUCLOCK_WHICH(clock);  | 
|---|
 | 353 | +	struct task_struct *tsk;  | 
|---|
 | 354 | +	u64 t;  | 
|---|
| 298 | 355 |   | 
|---|
| 299 |  | -	if (pid == 0) {  | 
|---|
| 300 |  | -		/*  | 
|---|
| 301 |  | -		 * Special case constant value for our own clocks.  | 
|---|
| 302 |  | -		 * We don't have to do any lookup to find ourselves.  | 
|---|
| 303 |  | -		 */  | 
|---|
| 304 |  | -		err = posix_cpu_clock_get_task(current, which_clock, tp);  | 
|---|
| 305 |  | -	} else {  | 
|---|
| 306 |  | -		/*  | 
|---|
| 307 |  | -		 * Find the given PID, and validate that the caller  | 
|---|
| 308 |  | -		 * should be able to see it.  | 
|---|
| 309 |  | -		 */  | 
|---|
| 310 |  | -		struct task_struct *p;  | 
|---|
| 311 |  | -		rcu_read_lock();  | 
|---|
| 312 |  | -		p = find_task_by_vpid(pid);  | 
|---|
| 313 |  | -		if (p)  | 
|---|
| 314 |  | -			err = posix_cpu_clock_get_task(p, which_clock, tp);  | 
|---|
 | 356 | +	rcu_read_lock();  | 
|---|
 | 357 | +	tsk = pid_task(pid_for_clock(clock, true), clock_pid_type(clock));  | 
|---|
 | 358 | +	if (!tsk) {  | 
|---|
| 315 | 359 |  		rcu_read_unlock(); | 
|---|
 | 360 | +		return -EINVAL;  | 
|---|
| 316 | 361 |  	} | 
|---|
| 317 | 362 |   | 
|---|
| 318 |  | -	return err;  | 
|---|
 | 363 | +	if (CPUCLOCK_PERTHREAD(clock))  | 
|---|
 | 364 | +		t = cpu_clock_sample(clkid, tsk);  | 
|---|
 | 365 | +	else  | 
|---|
 | 366 | +		t = cpu_clock_sample_group(clkid, tsk, false);  | 
|---|
 | 367 | +	rcu_read_unlock();  | 
|---|
 | 368 | +  | 
|---|
 | 369 | +	*tp = ns_to_timespec64(t);  | 
|---|
 | 370 | +	return 0;  | 
|---|
| 319 | 371 |  } | 
|---|
| 320 | 372 |   | 
|---|
| 321 | 373 |  /* | 
|---|
| .. | .. | 
|---|
| 325 | 377 |   */ | 
|---|
| 326 | 378 |  static int posix_cpu_timer_create(struct k_itimer *new_timer) | 
|---|
| 327 | 379 |  { | 
|---|
| 328 |  | -	int ret = 0;  | 
|---|
| 329 |  | -	const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);  | 
|---|
| 330 |  | -	struct task_struct *p;  | 
|---|
| 331 |  | -  | 
|---|
| 332 |  | -	if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)  | 
|---|
| 333 |  | -		return -EINVAL;  | 
|---|
| 334 |  | -  | 
|---|
| 335 |  | -	new_timer->kclock = &clock_posix_cpu;  | 
|---|
| 336 |  | -  | 
|---|
| 337 |  | -	INIT_LIST_HEAD(&new_timer->it.cpu.entry);  | 
|---|
 | 380 | +	static struct lock_class_key posix_cpu_timers_key;  | 
|---|
 | 381 | +	struct pid *pid;  | 
|---|
| 338 | 382 |   | 
|---|
| 339 | 383 |  	rcu_read_lock(); | 
|---|
| 340 |  | -	if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {  | 
|---|
| 341 |  | -		if (pid == 0) {  | 
|---|
| 342 |  | -			p = current;  | 
|---|
| 343 |  | -		} else {  | 
|---|
| 344 |  | -			p = find_task_by_vpid(pid);  | 
|---|
| 345 |  | -			if (p && !same_thread_group(p, current))  | 
|---|
| 346 |  | -				p = NULL;  | 
|---|
| 347 |  | -		}  | 
|---|
| 348 |  | -	} else {  | 
|---|
| 349 |  | -		if (pid == 0) {  | 
|---|
| 350 |  | -			p = current->group_leader;  | 
|---|
| 351 |  | -		} else {  | 
|---|
| 352 |  | -			p = find_task_by_vpid(pid);  | 
|---|
| 353 |  | -			if (p && !has_group_leader_pid(p))  | 
|---|
| 354 |  | -				p = NULL;  | 
|---|
| 355 |  | -		}  | 
|---|
 | 384 | +	pid = pid_for_clock(new_timer->it_clock, false);  | 
|---|
 | 385 | +	if (!pid) {  | 
|---|
 | 386 | +		rcu_read_unlock();  | 
|---|
 | 387 | +		return -EINVAL;  | 
|---|
| 356 | 388 |  	} | 
|---|
| 357 |  | -	new_timer->it.cpu.task = p;  | 
|---|
| 358 |  | -	if (p) {  | 
|---|
| 359 |  | -		get_task_struct(p);  | 
|---|
| 360 |  | -	} else {  | 
|---|
| 361 |  | -		ret = -EINVAL;  | 
|---|
| 362 |  | -	}  | 
|---|
| 363 |  | -	rcu_read_unlock();  | 
|---|
| 364 | 389 |   | 
|---|
| 365 |  | -	return ret;  | 
|---|
 | 390 | +	/*  | 
|---|
 | 391 | +	 * If posix timer expiry is handled in task work context then  | 
|---|
 | 392 | +	 * timer::it_lock can be taken without disabling interrupts as all  | 
|---|
 | 393 | +	 * other locking happens in task context. This requires a seperate  | 
|---|
 | 394 | +	 * lock class key otherwise regular posix timer expiry would record  | 
|---|
 | 395 | +	 * the lock class being taken in interrupt context and generate a  | 
|---|
 | 396 | +	 * false positive warning.  | 
|---|
 | 397 | +	 */  | 
|---|
 | 398 | +	if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK))  | 
|---|
 | 399 | +		lockdep_set_class(&new_timer->it_lock, &posix_cpu_timers_key);  | 
|---|
 | 400 | +  | 
|---|
 | 401 | +	new_timer->kclock = &clock_posix_cpu;  | 
|---|
 | 402 | +	timerqueue_init(&new_timer->it.cpu.node);  | 
|---|
 | 403 | +	new_timer->it.cpu.pid = get_pid(pid);  | 
|---|
 | 404 | +	rcu_read_unlock();  | 
|---|
 | 405 | +	return 0;  | 
|---|
| 366 | 406 |  } | 
|---|
| 367 | 407 |   | 
|---|
| 368 | 408 |  /* | 
|---|
| .. | .. | 
|---|
| 373 | 413 |   */ | 
|---|
| 374 | 414 |  static int posix_cpu_timer_del(struct k_itimer *timer) | 
|---|
| 375 | 415 |  { | 
|---|
| 376 |  | -	int ret = 0;  | 
|---|
| 377 |  | -	unsigned long flags;  | 
|---|
 | 416 | +	struct cpu_timer *ctmr = &timer->it.cpu;  | 
|---|
| 378 | 417 |  	struct sighand_struct *sighand; | 
|---|
| 379 |  | -	struct task_struct *p = timer->it.cpu.task;  | 
|---|
 | 418 | +	struct task_struct *p;  | 
|---|
 | 419 | +	unsigned long flags;  | 
|---|
 | 420 | +	int ret = 0;  | 
|---|
| 380 | 421 |   | 
|---|
| 381 |  | -	if (WARN_ON_ONCE(!p))  | 
|---|
| 382 |  | -		return -EINVAL;  | 
|---|
 | 422 | +	rcu_read_lock();  | 
|---|
 | 423 | +	p = cpu_timer_task_rcu(timer);  | 
|---|
 | 424 | +	if (!p)  | 
|---|
 | 425 | +		goto out;  | 
|---|
| 383 | 426 |   | 
|---|
| 384 | 427 |  	/* | 
|---|
| 385 | 428 |  	 * Protect against sighand release/switch in exit/exec and process/ | 
|---|
| .. | .. | 
|---|
| 388 | 431 |  	sighand = lock_task_sighand(p, &flags); | 
|---|
| 389 | 432 |  	if (unlikely(sighand == NULL)) { | 
|---|
| 390 | 433 |  		/* | 
|---|
| 391 |  | -		 * We raced with the reaping of the task.  | 
|---|
| 392 |  | -		 * The deletion should have cleared us off the list.  | 
|---|
 | 434 | +		 * This raced with the reaping of the task. The exit cleanup  | 
|---|
 | 435 | +		 * should have removed this timer from the timer queue.  | 
|---|
| 393 | 436 |  		 */ | 
|---|
| 394 |  | -		WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));  | 
|---|
 | 437 | +		WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node));  | 
|---|
| 395 | 438 |  	} else { | 
|---|
| 396 | 439 |  		if (timer->it.cpu.firing) | 
|---|
| 397 | 440 |  			ret = TIMER_RETRY; | 
|---|
| 398 | 441 |  		else | 
|---|
| 399 |  | -			list_del(&timer->it.cpu.entry);  | 
|---|
 | 442 | +			cpu_timer_dequeue(ctmr);  | 
|---|
| 400 | 443 |   | 
|---|
| 401 | 444 |  		unlock_task_sighand(p, &flags); | 
|---|
| 402 | 445 |  	} | 
|---|
| 403 | 446 |   | 
|---|
 | 447 | +out:  | 
|---|
 | 448 | +	rcu_read_unlock();  | 
|---|
| 404 | 449 |  	if (!ret) | 
|---|
| 405 |  | -		put_task_struct(p);  | 
|---|
 | 450 | +		put_pid(ctmr->pid);  | 
|---|
| 406 | 451 |   | 
|---|
| 407 | 452 |  	return ret; | 
|---|
| 408 | 453 |  } | 
|---|
| 409 | 454 |   | 
|---|
| 410 |  | -static void cleanup_timers_list(struct list_head *head)  | 
|---|
 | 455 | +static void cleanup_timerqueue(struct timerqueue_head *head)  | 
|---|
| 411 | 456 |  { | 
|---|
| 412 |  | -	struct cpu_timer_list *timer, *next;  | 
|---|
 | 457 | +	struct timerqueue_node *node;  | 
|---|
 | 458 | +	struct cpu_timer *ctmr;  | 
|---|
| 413 | 459 |   | 
|---|
| 414 |  | -	list_for_each_entry_safe(timer, next, head, entry)  | 
|---|
| 415 |  | -		list_del_init(&timer->entry);  | 
|---|
 | 460 | +	while ((node = timerqueue_getnext(head))) {  | 
|---|
 | 461 | +		timerqueue_del(head, node);  | 
|---|
 | 462 | +		ctmr = container_of(node, struct cpu_timer, node);  | 
|---|
 | 463 | +		ctmr->head = NULL;  | 
|---|
 | 464 | +	}  | 
|---|
| 416 | 465 |  } | 
|---|
| 417 | 466 |   | 
|---|
| 418 | 467 |  /* | 
|---|
| 419 |  | - * Clean out CPU timers still ticking when a thread exited.  The task  | 
|---|
| 420 |  | - * pointer is cleared, and the expiry time is replaced with the residual  | 
|---|
| 421 |  | - * time for later timer_gettime calls to return.  | 
|---|
 | 468 | + * Clean out CPU timers which are still armed when a thread exits. The  | 
|---|
 | 469 | + * timers are only removed from the list. No other updates are done. The  | 
|---|
 | 470 | + * corresponding posix timers are still accessible, but cannot be rearmed.  | 
|---|
 | 471 | + *  | 
|---|
| 422 | 472 |   * This must be called with the siglock held. | 
|---|
| 423 | 473 |   */ | 
|---|
| 424 |  | -static void cleanup_timers(struct list_head *head)  | 
|---|
 | 474 | +static void cleanup_timers(struct posix_cputimers *pct)  | 
|---|
| 425 | 475 |  { | 
|---|
| 426 |  | -	cleanup_timers_list(head);  | 
|---|
| 427 |  | -	cleanup_timers_list(++head);  | 
|---|
| 428 |  | -	cleanup_timers_list(++head);  | 
|---|
 | 476 | +	cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead);  | 
|---|
 | 477 | +	cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead);  | 
|---|
 | 478 | +	cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead);  | 
|---|
| 429 | 479 |  } | 
|---|
| 430 | 480 |   | 
|---|
| 431 | 481 |  /* | 
|---|
| .. | .. | 
|---|
| 435 | 485 |   */ | 
|---|
| 436 | 486 |  void posix_cpu_timers_exit(struct task_struct *tsk) | 
|---|
| 437 | 487 |  { | 
|---|
| 438 |  | -	cleanup_timers(tsk->cpu_timers);  | 
|---|
 | 488 | +	cleanup_timers(&tsk->posix_cputimers);  | 
|---|
| 439 | 489 |  } | 
|---|
| 440 | 490 |  void posix_cpu_timers_exit_group(struct task_struct *tsk) | 
|---|
| 441 | 491 |  { | 
|---|
| 442 |  | -	cleanup_timers(tsk->signal->cpu_timers);  | 
|---|
| 443 |  | -}  | 
|---|
| 444 |  | -  | 
|---|
| 445 |  | -static inline int expires_gt(u64 expires, u64 new_exp)  | 
|---|
| 446 |  | -{  | 
|---|
| 447 |  | -	return expires == 0 || expires > new_exp;  | 
|---|
 | 492 | +	cleanup_timers(&tsk->signal->posix_cputimers);  | 
|---|
| 448 | 493 |  } | 
|---|
| 449 | 494 |   | 
|---|
| 450 | 495 |  /* | 
|---|
| 451 | 496 |   * Insert the timer on the appropriate list before any timers that | 
|---|
| 452 | 497 |   * expire later.  This must be called with the sighand lock held. | 
|---|
| 453 | 498 |   */ | 
|---|
| 454 |  | -static void arm_timer(struct k_itimer *timer)  | 
|---|
 | 499 | +static void arm_timer(struct k_itimer *timer, struct task_struct *p)  | 
|---|
| 455 | 500 |  { | 
|---|
| 456 |  | -	struct task_struct *p = timer->it.cpu.task;  | 
|---|
| 457 |  | -	struct list_head *head, *listpos;  | 
|---|
| 458 |  | -	struct task_cputime *cputime_expires;  | 
|---|
| 459 |  | -	struct cpu_timer_list *const nt = &timer->it.cpu;  | 
|---|
| 460 |  | -	struct cpu_timer_list *next;  | 
|---|
 | 501 | +	int clkidx = CPUCLOCK_WHICH(timer->it_clock);  | 
|---|
 | 502 | +	struct cpu_timer *ctmr = &timer->it.cpu;  | 
|---|
 | 503 | +	u64 newexp = cpu_timer_getexpires(ctmr);  | 
|---|
 | 504 | +	struct posix_cputimer_base *base;  | 
|---|
| 461 | 505 |   | 
|---|
| 462 |  | -	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {  | 
|---|
| 463 |  | -		head = p->cpu_timers;  | 
|---|
| 464 |  | -		cputime_expires = &p->cputime_expires;  | 
|---|
| 465 |  | -	} else {  | 
|---|
| 466 |  | -		head = p->signal->cpu_timers;  | 
|---|
| 467 |  | -		cputime_expires = &p->signal->cputime_expires;  | 
|---|
| 468 |  | -	}  | 
|---|
| 469 |  | -	head += CPUCLOCK_WHICH(timer->it_clock);  | 
|---|
 | 506 | +	if (CPUCLOCK_PERTHREAD(timer->it_clock))  | 
|---|
 | 507 | +		base = p->posix_cputimers.bases + clkidx;  | 
|---|
 | 508 | +	else  | 
|---|
 | 509 | +		base = p->signal->posix_cputimers.bases + clkidx;  | 
|---|
| 470 | 510 |   | 
|---|
| 471 |  | -	listpos = head;  | 
|---|
| 472 |  | -	list_for_each_entry(next, head, entry) {  | 
|---|
| 473 |  | -		if (nt->expires < next->expires)  | 
|---|
| 474 |  | -			break;  | 
|---|
| 475 |  | -		listpos = &next->entry;  | 
|---|
| 476 |  | -	}  | 
|---|
| 477 |  | -	list_add(&nt->entry, listpos);  | 
|---|
 | 511 | +	if (!cpu_timer_enqueue(&base->tqhead, ctmr))  | 
|---|
 | 512 | +		return;  | 
|---|
| 478 | 513 |   | 
|---|
| 479 |  | -	if (listpos == head) {  | 
|---|
| 480 |  | -		u64 exp = nt->expires;  | 
|---|
 | 514 | +	/*  | 
|---|
 | 515 | +	 * We are the new earliest-expiring POSIX 1.b timer, hence  | 
|---|
 | 516 | +	 * need to update expiration cache. Take into account that  | 
|---|
 | 517 | +	 * for process timers we share expiration cache with itimers  | 
|---|
 | 518 | +	 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.  | 
|---|
 | 519 | +	 */  | 
|---|
 | 520 | +	if (newexp < base->nextevt)  | 
|---|
 | 521 | +		base->nextevt = newexp;  | 
|---|
| 481 | 522 |   | 
|---|
| 482 |  | -		/*  | 
|---|
| 483 |  | -		 * We are the new earliest-expiring POSIX 1.b timer, hence  | 
|---|
| 484 |  | -		 * need to update expiration cache. Take into account that  | 
|---|
| 485 |  | -		 * for process timers we share expiration cache with itimers  | 
|---|
| 486 |  | -		 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.  | 
|---|
| 487 |  | -		 */  | 
|---|
| 488 |  | -  | 
|---|
| 489 |  | -		switch (CPUCLOCK_WHICH(timer->it_clock)) {  | 
|---|
| 490 |  | -		case CPUCLOCK_PROF:  | 
|---|
| 491 |  | -			if (expires_gt(cputime_expires->prof_exp, exp))  | 
|---|
| 492 |  | -				cputime_expires->prof_exp = exp;  | 
|---|
| 493 |  | -			break;  | 
|---|
| 494 |  | -		case CPUCLOCK_VIRT:  | 
|---|
| 495 |  | -			if (expires_gt(cputime_expires->virt_exp, exp))  | 
|---|
| 496 |  | -				cputime_expires->virt_exp = exp;  | 
|---|
| 497 |  | -			break;  | 
|---|
| 498 |  | -		case CPUCLOCK_SCHED:  | 
|---|
| 499 |  | -			if (expires_gt(cputime_expires->sched_exp, exp))  | 
|---|
| 500 |  | -				cputime_expires->sched_exp = exp;  | 
|---|
| 501 |  | -			break;  | 
|---|
| 502 |  | -		}  | 
|---|
| 503 |  | -		if (CPUCLOCK_PERTHREAD(timer->it_clock))  | 
|---|
| 504 |  | -			tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);  | 
|---|
| 505 |  | -		else  | 
|---|
| 506 |  | -			tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);  | 
|---|
| 507 |  | -	}  | 
|---|
 | 523 | +	if (CPUCLOCK_PERTHREAD(timer->it_clock))  | 
|---|
 | 524 | +		tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);  | 
|---|
 | 525 | +	else  | 
|---|
 | 526 | +		tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);  | 
|---|
| 508 | 527 |  } | 
|---|
| 509 | 528 |   | 
|---|
| 510 | 529 |  /* | 
|---|
| .. | .. | 
|---|
| 512 | 531 |   */ | 
|---|
| 513 | 532 |  static void cpu_timer_fire(struct k_itimer *timer) | 
|---|
| 514 | 533 |  { | 
|---|
 | 534 | +	struct cpu_timer *ctmr = &timer->it.cpu;  | 
|---|
 | 535 | +  | 
|---|
| 515 | 536 |  	if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { | 
|---|
| 516 | 537 |  		/* | 
|---|
| 517 | 538 |  		 * User don't want any signal. | 
|---|
| 518 | 539 |  		 */ | 
|---|
| 519 |  | -		timer->it.cpu.expires = 0;  | 
|---|
 | 540 | +		cpu_timer_setexpires(ctmr, 0);  | 
|---|
| 520 | 541 |  	} else if (unlikely(timer->sigq == NULL)) { | 
|---|
| 521 | 542 |  		/* | 
|---|
| 522 | 543 |  		 * This a special case for clock_nanosleep, | 
|---|
| 523 | 544 |  		 * not a normal timer from sys_timer_create. | 
|---|
| 524 | 545 |  		 */ | 
|---|
| 525 | 546 |  		wake_up_process(timer->it_process); | 
|---|
| 526 |  | -		timer->it.cpu.expires = 0;  | 
|---|
| 527 |  | -	} else if (timer->it.cpu.incr == 0) {  | 
|---|
 | 547 | +		cpu_timer_setexpires(ctmr, 0);  | 
|---|
 | 548 | +	} else if (!timer->it_interval) {  | 
|---|
| 528 | 549 |  		/* | 
|---|
| 529 | 550 |  		 * One-shot timer.  Clear it as soon as it's fired. | 
|---|
| 530 | 551 |  		 */ | 
|---|
| 531 | 552 |  		posix_timer_event(timer, 0); | 
|---|
| 532 |  | -		timer->it.cpu.expires = 0;  | 
|---|
 | 553 | +		cpu_timer_setexpires(ctmr, 0);  | 
|---|
| 533 | 554 |  	} else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { | 
|---|
| 534 | 555 |  		/* | 
|---|
| 535 | 556 |  		 * The signal did not get queued because the signal | 
|---|
| .. | .. | 
|---|
| 543 | 564 |  } | 
|---|
| 544 | 565 |   | 
|---|
| 545 | 566 |  /* | 
|---|
| 546 |  | - * Sample a process (thread group) timer for the given group_leader task.  | 
|---|
| 547 |  | - * Must be called with task sighand lock held for safe while_each_thread()  | 
|---|
| 548 |  | - * traversal.  | 
|---|
| 549 |  | - */  | 
|---|
| 550 |  | -static int cpu_timer_sample_group(const clockid_t which_clock,  | 
|---|
| 551 |  | -				  struct task_struct *p, u64 *sample)  | 
|---|
| 552 |  | -{  | 
|---|
| 553 |  | -	struct task_cputime cputime;  | 
|---|
| 554 |  | -  | 
|---|
| 555 |  | -	thread_group_cputimer(p, &cputime);  | 
|---|
| 556 |  | -	switch (CPUCLOCK_WHICH(which_clock)) {  | 
|---|
| 557 |  | -	default:  | 
|---|
| 558 |  | -		return -EINVAL;  | 
|---|
| 559 |  | -	case CPUCLOCK_PROF:  | 
|---|
| 560 |  | -		*sample = cputime.utime + cputime.stime;  | 
|---|
| 561 |  | -		break;  | 
|---|
| 562 |  | -	case CPUCLOCK_VIRT:  | 
|---|
| 563 |  | -		*sample = cputime.utime;  | 
|---|
| 564 |  | -		break;  | 
|---|
| 565 |  | -	case CPUCLOCK_SCHED:  | 
|---|
| 566 |  | -		*sample = cputime.sum_exec_runtime;  | 
|---|
| 567 |  | -		break;  | 
|---|
| 568 |  | -	}  | 
|---|
| 569 |  | -	return 0;  | 
|---|
| 570 |  | -}  | 
|---|
| 571 |  | -  | 
|---|
| 572 |  | -/*  | 
|---|
| 573 | 567 |   * Guts of sys_timer_settime for CPU timers. | 
|---|
| 574 | 568 |   * This is called with the timer locked and interrupts disabled. | 
|---|
| 575 | 569 |   * If we return TIMER_RETRY, it's necessary to release the timer's lock | 
|---|
| .. | .. | 
|---|
| 578 | 572 |  static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | 
|---|
| 579 | 573 |  			       struct itimerspec64 *new, struct itimerspec64 *old) | 
|---|
| 580 | 574 |  { | 
|---|
| 581 |  | -	unsigned long flags;  | 
|---|
| 582 |  | -	struct sighand_struct *sighand;  | 
|---|
| 583 |  | -	struct task_struct *p = timer->it.cpu.task;  | 
|---|
 | 575 | +	clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);  | 
|---|
| 584 | 576 |  	u64 old_expires, new_expires, old_incr, val; | 
|---|
| 585 |  | -	int ret;  | 
|---|
 | 577 | +	struct cpu_timer *ctmr = &timer->it.cpu;  | 
|---|
 | 578 | +	struct sighand_struct *sighand;  | 
|---|
 | 579 | +	struct task_struct *p;  | 
|---|
 | 580 | +	unsigned long flags;  | 
|---|
 | 581 | +	int ret = 0;  | 
|---|
| 586 | 582 |   | 
|---|
| 587 |  | -	if (WARN_ON_ONCE(!p))  | 
|---|
| 588 |  | -		return -EINVAL;  | 
|---|
 | 583 | +	rcu_read_lock();  | 
|---|
 | 584 | +	p = cpu_timer_task_rcu(timer);  | 
|---|
 | 585 | +	if (!p) {  | 
|---|
 | 586 | +		/*  | 
|---|
 | 587 | +		 * If p has just been reaped, we can no  | 
|---|
 | 588 | +		 * longer get any information about it at all.  | 
|---|
 | 589 | +		 */  | 
|---|
 | 590 | +		rcu_read_unlock();  | 
|---|
 | 591 | +		return -ESRCH;  | 
|---|
 | 592 | +	}  | 
|---|
| 589 | 593 |   | 
|---|
| 590 | 594 |  	/* | 
|---|
| 591 | 595 |  	 * Use the to_ktime conversion because that clamps the maximum | 
|---|
| .. | .. | 
|---|
| 603 | 607 |  	 * longer get any information about it at all. | 
|---|
| 604 | 608 |  	 */ | 
|---|
| 605 | 609 |  	if (unlikely(sighand == NULL)) { | 
|---|
 | 610 | +		rcu_read_unlock();  | 
|---|
| 606 | 611 |  		return -ESRCH; | 
|---|
| 607 | 612 |  	} | 
|---|
| 608 | 613 |   | 
|---|
| 609 | 614 |  	/* | 
|---|
| 610 | 615 |  	 * Disarm any old timer after extracting its expiry time. | 
|---|
| 611 | 616 |  	 */ | 
|---|
 | 617 | +	old_incr = timer->it_interval;  | 
|---|
 | 618 | +	old_expires = cpu_timer_getexpires(ctmr);  | 
|---|
| 612 | 619 |   | 
|---|
| 613 |  | -	ret = 0;  | 
|---|
| 614 |  | -	old_incr = timer->it.cpu.incr;  | 
|---|
| 615 |  | -	old_expires = timer->it.cpu.expires;  | 
|---|
| 616 | 620 |  	if (unlikely(timer->it.cpu.firing)) { | 
|---|
| 617 | 621 |  		timer->it.cpu.firing = -1; | 
|---|
| 618 | 622 |  		ret = TIMER_RETRY; | 
|---|
| 619 |  | -	} else  | 
|---|
| 620 |  | -		list_del_init(&timer->it.cpu.entry);  | 
|---|
 | 623 | +	} else {  | 
|---|
 | 624 | +		cpu_timer_dequeue(ctmr);  | 
|---|
 | 625 | +	}  | 
|---|
| 621 | 626 |   | 
|---|
| 622 | 627 |  	/* | 
|---|
| 623 | 628 |  	 * We need to sample the current value to convert the new | 
|---|
| .. | .. | 
|---|
| 627 | 632 |  	 * times (in arm_timer).  With an absolute time, we must | 
|---|
| 628 | 633 |  	 * check if it's already passed.  In short, we need a sample. | 
|---|
| 629 | 634 |  	 */ | 
|---|
| 630 |  | -	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {  | 
|---|
| 631 |  | -		cpu_clock_sample(timer->it_clock, p, &val);  | 
|---|
| 632 |  | -	} else {  | 
|---|
| 633 |  | -		cpu_timer_sample_group(timer->it_clock, p, &val);  | 
|---|
| 634 |  | -	}  | 
|---|
 | 635 | +	if (CPUCLOCK_PERTHREAD(timer->it_clock))  | 
|---|
 | 636 | +		val = cpu_clock_sample(clkid, p);  | 
|---|
 | 637 | +	else  | 
|---|
 | 638 | +		val = cpu_clock_sample_group(clkid, p, true);  | 
|---|
| 635 | 639 |   | 
|---|
| 636 | 640 |  	if (old) { | 
|---|
| 637 | 641 |  		if (old_expires == 0) { | 
|---|
| .. | .. | 
|---|
| 639 | 643 |  			old->it_value.tv_nsec = 0; | 
|---|
| 640 | 644 |  		} else { | 
|---|
| 641 | 645 |  			/* | 
|---|
| 642 |  | -			 * Update the timer in case it has  | 
|---|
| 643 |  | -			 * overrun already.  If it has,  | 
|---|
| 644 |  | -			 * we'll report it as having overrun  | 
|---|
| 645 |  | -			 * and with the next reloaded timer  | 
|---|
| 646 |  | -			 * already ticking, though we are  | 
|---|
| 647 |  | -			 * swallowing that pending  | 
|---|
| 648 |  | -			 * notification here to install the  | 
|---|
| 649 |  | -			 * new setting.  | 
|---|
 | 646 | +			 * Update the timer in case it has overrun already.  | 
|---|
 | 647 | +			 * If it has, we'll report it as having overrun and  | 
|---|
 | 648 | +			 * with the next reloaded timer already ticking,  | 
|---|
 | 649 | +			 * though we are swallowing that pending  | 
|---|
 | 650 | +			 * notification here to install the new setting.  | 
|---|
| 650 | 651 |  			 */ | 
|---|
| 651 |  | -			bump_cpu_timer(timer, val);  | 
|---|
| 652 |  | -			if (val < timer->it.cpu.expires) {  | 
|---|
| 653 |  | -				old_expires = timer->it.cpu.expires - val;  | 
|---|
 | 652 | +			u64 exp = bump_cpu_timer(timer, val);  | 
|---|
 | 653 | +  | 
|---|
 | 654 | +			if (val < exp) {  | 
|---|
 | 655 | +				old_expires = exp - val;  | 
|---|
| 654 | 656 |  				old->it_value = ns_to_timespec64(old_expires); | 
|---|
| 655 | 657 |  			} else { | 
|---|
| 656 | 658 |  				old->it_value.tv_nsec = 1; | 
|---|
| .. | .. | 
|---|
| 679 | 681 |  	 * For a timer with no notification action, we don't actually | 
|---|
| 680 | 682 |  	 * arm the timer (we'll just fake it for timer_gettime). | 
|---|
| 681 | 683 |  	 */ | 
|---|
| 682 |  | -	timer->it.cpu.expires = new_expires;  | 
|---|
 | 684 | +	cpu_timer_setexpires(ctmr, new_expires);  | 
|---|
| 683 | 685 |  	if (new_expires != 0 && val < new_expires) { | 
|---|
| 684 |  | -		arm_timer(timer);  | 
|---|
 | 686 | +		arm_timer(timer, p);  | 
|---|
| 685 | 687 |  	} | 
|---|
| 686 | 688 |   | 
|---|
| 687 | 689 |  	unlock_task_sighand(p, &flags); | 
|---|
| .. | .. | 
|---|
| 689 | 691 |  	 * Install the new reload setting, and | 
|---|
| 690 | 692 |  	 * set up the signal and overrun bookkeeping. | 
|---|
| 691 | 693 |  	 */ | 
|---|
| 692 |  | -	timer->it.cpu.incr = timespec64_to_ns(&new->it_interval);  | 
|---|
| 693 |  | -	timer->it_interval = ns_to_ktime(timer->it.cpu.incr);  | 
|---|
 | 694 | +	timer->it_interval = timespec64_to_ktime(new->it_interval);  | 
|---|
| 694 | 695 |   | 
|---|
| 695 | 696 |  	/* | 
|---|
| 696 | 697 |  	 * This acts as a modification timestamp for the timer, | 
|---|
| .. | .. | 
|---|
| 713 | 714 |   | 
|---|
| 714 | 715 |  	ret = 0; | 
|---|
| 715 | 716 |   out: | 
|---|
 | 717 | +	rcu_read_unlock();  | 
|---|
| 716 | 718 |  	if (old) | 
|---|
| 717 | 719 |  		old->it_interval = ns_to_timespec64(old_incr); | 
|---|
| 718 | 720 |   | 
|---|
| .. | .. | 
|---|
| 721 | 723 |   | 
|---|
| 722 | 724 |  static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) | 
|---|
| 723 | 725 |  { | 
|---|
| 724 |  | -	struct task_struct *p = timer->it.cpu.task;  | 
|---|
| 725 |  | -	u64 now;  | 
|---|
 | 726 | +	clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);  | 
|---|
 | 727 | +	struct cpu_timer *ctmr = &timer->it.cpu;  | 
|---|
 | 728 | +	u64 now, expires = cpu_timer_getexpires(ctmr);  | 
|---|
 | 729 | +	struct task_struct *p;  | 
|---|
| 726 | 730 |   | 
|---|
| 727 |  | -	if (WARN_ON_ONCE(!p))  | 
|---|
| 728 |  | -		return;  | 
|---|
 | 731 | +	rcu_read_lock();  | 
|---|
 | 732 | +	p = cpu_timer_task_rcu(timer);  | 
|---|
 | 733 | +	if (!p)  | 
|---|
 | 734 | +		goto out;  | 
|---|
| 729 | 735 |   | 
|---|
| 730 | 736 |  	/* | 
|---|
| 731 | 737 |  	 * Easy part: convert the reload time. | 
|---|
| 732 | 738 |  	 */ | 
|---|
| 733 |  | -	itp->it_interval = ns_to_timespec64(timer->it.cpu.incr);  | 
|---|
 | 739 | +	itp->it_interval = ktime_to_timespec64(timer->it_interval);  | 
|---|
| 734 | 740 |   | 
|---|
| 735 |  | -	if (!timer->it.cpu.expires)  | 
|---|
| 736 |  | -		return;  | 
|---|
 | 741 | +	if (!expires)  | 
|---|
 | 742 | +		goto out;  | 
|---|
| 737 | 743 |   | 
|---|
| 738 | 744 |  	/* | 
|---|
| 739 | 745 |  	 * Sample the clock to take the difference with the expiry time. | 
|---|
| 740 | 746 |  	 */ | 
|---|
| 741 |  | -	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {  | 
|---|
| 742 |  | -		cpu_clock_sample(timer->it_clock, p, &now);  | 
|---|
| 743 |  | -	} else {  | 
|---|
| 744 |  | -		struct sighand_struct *sighand;  | 
|---|
| 745 |  | -		unsigned long flags;  | 
|---|
 | 747 | +	if (CPUCLOCK_PERTHREAD(timer->it_clock))  | 
|---|
 | 748 | +		now = cpu_clock_sample(clkid, p);  | 
|---|
 | 749 | +	else  | 
|---|
 | 750 | +		now = cpu_clock_sample_group(clkid, p, false);  | 
|---|
| 746 | 751 |   | 
|---|
| 747 |  | -		/*  | 
|---|
| 748 |  | -		 * Protect against sighand release/switch in exit/exec and  | 
|---|
| 749 |  | -		 * also make timer sampling safe if it ends up calling  | 
|---|
| 750 |  | -		 * thread_group_cputime().  | 
|---|
| 751 |  | -		 */  | 
|---|
| 752 |  | -		sighand = lock_task_sighand(p, &flags);  | 
|---|
| 753 |  | -		if (unlikely(sighand == NULL)) {  | 
|---|
| 754 |  | -			/*  | 
|---|
| 755 |  | -			 * The process has been reaped.  | 
|---|
| 756 |  | -			 * We can't even collect a sample any more.  | 
|---|
| 757 |  | -			 * Call the timer disarmed, nothing else to do.  | 
|---|
| 758 |  | -			 */  | 
|---|
| 759 |  | -			timer->it.cpu.expires = 0;  | 
|---|
| 760 |  | -			return;  | 
|---|
| 761 |  | -		} else {  | 
|---|
| 762 |  | -			cpu_timer_sample_group(timer->it_clock, p, &now);  | 
|---|
| 763 |  | -			unlock_task_sighand(p, &flags);  | 
|---|
| 764 |  | -		}  | 
|---|
| 765 |  | -	}  | 
|---|
| 766 |  | -  | 
|---|
| 767 |  | -	if (now < timer->it.cpu.expires) {  | 
|---|
| 768 |  | -		itp->it_value = ns_to_timespec64(timer->it.cpu.expires - now);  | 
|---|
 | 752 | +	if (now < expires) {  | 
|---|
 | 753 | +		itp->it_value = ns_to_timespec64(expires - now);  | 
|---|
| 769 | 754 |  	} else { | 
|---|
| 770 | 755 |  		/* | 
|---|
| 771 | 756 |  		 * The timer should have expired already, but the firing | 
|---|
| .. | .. | 
|---|
| 774 | 759 |  		itp->it_value.tv_nsec = 1; | 
|---|
| 775 | 760 |  		itp->it_value.tv_sec = 0; | 
|---|
| 776 | 761 |  	} | 
|---|
 | 762 | +out:  | 
|---|
 | 763 | +	rcu_read_unlock();  | 
|---|
| 777 | 764 |  } | 
|---|
| 778 | 765 |   | 
|---|
| 779 |  | -static unsigned long long  | 
|---|
| 780 |  | -check_timers_list(struct list_head *timers,  | 
|---|
| 781 |  | -		  struct list_head *firing,  | 
|---|
| 782 |  | -		  unsigned long long curr)  | 
|---|
 | 766 | +#define MAX_COLLECTED	20  | 
|---|
 | 767 | +  | 
|---|
 | 768 | +static u64 collect_timerqueue(struct timerqueue_head *head,  | 
|---|
 | 769 | +			      struct list_head *firing, u64 now)  | 
|---|
| 783 | 770 |  { | 
|---|
| 784 |  | -	int maxfire = 20;  | 
|---|
 | 771 | +	struct timerqueue_node *next;  | 
|---|
 | 772 | +	int i = 0;  | 
|---|
| 785 | 773 |   | 
|---|
| 786 |  | -	while (!list_empty(timers)) {  | 
|---|
| 787 |  | -		struct cpu_timer_list *t;  | 
|---|
 | 774 | +	while ((next = timerqueue_getnext(head))) {  | 
|---|
 | 775 | +		struct cpu_timer *ctmr;  | 
|---|
 | 776 | +		u64 expires;  | 
|---|
| 788 | 777 |   | 
|---|
| 789 |  | -		t = list_first_entry(timers, struct cpu_timer_list, entry);  | 
|---|
 | 778 | +		ctmr = container_of(next, struct cpu_timer, node);  | 
|---|
 | 779 | +		expires = cpu_timer_getexpires(ctmr);  | 
|---|
 | 780 | +		/* Limit the number of timers to expire at once */  | 
|---|
 | 781 | +		if (++i == MAX_COLLECTED || now < expires)  | 
|---|
 | 782 | +			return expires;  | 
|---|
| 790 | 783 |   | 
|---|
| 791 |  | -		if (!--maxfire || curr < t->expires)  | 
|---|
| 792 |  | -			return t->expires;  | 
|---|
| 793 |  | -  | 
|---|
| 794 |  | -		t->firing = 1;  | 
|---|
| 795 |  | -		t->firing_cpu = smp_processor_id();  | 
|---|
| 796 |  | -		list_move_tail(&t->entry, firing);  | 
|---|
 | 784 | +		ctmr->firing = 1;  | 
|---|
 | 785 | +		cpu_timer_dequeue(ctmr);  | 
|---|
 | 786 | +		list_add_tail(&ctmr->elist, firing);  | 
|---|
| 797 | 787 |  	} | 
|---|
| 798 | 788 |   | 
|---|
| 799 |  | -	return 0;  | 
|---|
 | 789 | +	return U64_MAX;  | 
|---|
 | 790 | +}  | 
|---|
 | 791 | +  | 
|---|
 | 792 | +static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,  | 
|---|
 | 793 | +				    struct list_head *firing)  | 
|---|
 | 794 | +{  | 
|---|
 | 795 | +	struct posix_cputimer_base *base = pct->bases;  | 
|---|
 | 796 | +	int i;  | 
|---|
 | 797 | +  | 
|---|
 | 798 | +	for (i = 0; i < CPUCLOCK_MAX; i++, base++) {  | 
|---|
 | 799 | +		base->nextevt = collect_timerqueue(&base->tqhead, firing,  | 
|---|
 | 800 | +						    samples[i]);  | 
|---|
 | 801 | +	}  | 
|---|
| 800 | 802 |  } | 
|---|
| 801 | 803 |   | 
|---|
| 802 | 804 |  static inline void check_dl_overrun(struct task_struct *tsk) | 
|---|
| .. | .. | 
|---|
| 807 | 809 |  	} | 
|---|
| 808 | 810 |  } | 
|---|
| 809 | 811 |   | 
|---|
 | 812 | +static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)  | 
|---|
 | 813 | +{  | 
|---|
 | 814 | +	if (time < limit)  | 
|---|
 | 815 | +		return false;  | 
|---|
 | 816 | +  | 
|---|
 | 817 | +	if (print_fatal_signals) {  | 
|---|
 | 818 | +		pr_info("%s Watchdog Timeout (%s): %s[%d]\n",  | 
|---|
 | 819 | +			rt ? "RT" : "CPU", hard ? "hard" : "soft",  | 
|---|
 | 820 | +			current->comm, task_pid_nr(current));  | 
|---|
 | 821 | +	}  | 
|---|
 | 822 | +	__group_send_sig_info(signo, SEND_SIG_PRIV, current);  | 
|---|
 | 823 | +	return true;  | 
|---|
 | 824 | +}  | 
|---|
 | 825 | +  | 
|---|
| 810 | 826 |  /* | 
|---|
| 811 | 827 |   * Check for any per-thread CPU timers that have fired and move them off | 
|---|
| 812 | 828 |   * the tsk->cpu_timers[N] list onto the firing list.  Here we update the | 
|---|
| .. | .. | 
|---|
| 815 | 831 |  static void check_thread_timers(struct task_struct *tsk, | 
|---|
| 816 | 832 |  				struct list_head *firing) | 
|---|
| 817 | 833 |  { | 
|---|
| 818 |  | -	struct list_head *timers = tsk->cpu_timers;  | 
|---|
| 819 |  | -	struct task_cputime *tsk_expires = &tsk->cputime_expires;  | 
|---|
| 820 |  | -	u64 expires;  | 
|---|
 | 834 | +	struct posix_cputimers *pct = &tsk->posix_cputimers;  | 
|---|
 | 835 | +	u64 samples[CPUCLOCK_MAX];  | 
|---|
| 821 | 836 |  	unsigned long soft; | 
|---|
| 822 | 837 |   | 
|---|
| 823 | 838 |  	if (dl_task(tsk)) | 
|---|
| 824 | 839 |  		check_dl_overrun(tsk); | 
|---|
| 825 | 840 |   | 
|---|
| 826 |  | -	/*  | 
|---|
| 827 |  | -	 * If cputime_expires is zero, then there are no active  | 
|---|
| 828 |  | -	 * per thread CPU timers.  | 
|---|
| 829 |  | -	 */  | 
|---|
| 830 |  | -	if (task_cputime_zero(&tsk->cputime_expires))  | 
|---|
 | 841 | +	if (expiry_cache_is_inactive(pct))  | 
|---|
| 831 | 842 |  		return; | 
|---|
| 832 | 843 |   | 
|---|
| 833 |  | -	expires = check_timers_list(timers, firing, prof_ticks(tsk));  | 
|---|
| 834 |  | -	tsk_expires->prof_exp = expires;  | 
|---|
| 835 |  | -  | 
|---|
| 836 |  | -	expires = check_timers_list(++timers, firing, virt_ticks(tsk));  | 
|---|
| 837 |  | -	tsk_expires->virt_exp = expires;  | 
|---|
| 838 |  | -  | 
|---|
| 839 |  | -	tsk_expires->sched_exp = check_timers_list(++timers, firing,  | 
|---|
| 840 |  | -						   tsk->se.sum_exec_runtime);  | 
|---|
 | 844 | +	task_sample_cputime(tsk, samples);  | 
|---|
 | 845 | +	collect_posix_cputimers(pct, samples, firing);  | 
|---|
| 841 | 846 |   | 
|---|
| 842 | 847 |  	/* | 
|---|
| 843 | 848 |  	 * Check for the special case thread timers. | 
|---|
| 844 | 849 |  	 */ | 
|---|
| 845 | 850 |  	soft = task_rlimit(tsk, RLIMIT_RTTIME); | 
|---|
| 846 | 851 |  	if (soft != RLIM_INFINITY) { | 
|---|
 | 852 | +		/* Task RT timeout is accounted in jiffies. RTTIME is usec */  | 
|---|
 | 853 | +		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);  | 
|---|
| 847 | 854 |  		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); | 
|---|
| 848 | 855 |   | 
|---|
 | 856 | +		/* At the hard limit, send SIGKILL. No further action. */  | 
|---|
| 849 | 857 |  		if (hard != RLIM_INFINITY && | 
|---|
| 850 |  | -		    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {  | 
|---|
| 851 |  | -			/*  | 
|---|
| 852 |  | -			 * At the hard limit, we just die.  | 
|---|
| 853 |  | -			 * No need to calculate anything else now.  | 
|---|
| 854 |  | -			 */  | 
|---|
| 855 |  | -			if (print_fatal_signals) {  | 
|---|
| 856 |  | -				pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",  | 
|---|
| 857 |  | -					tsk->comm, task_pid_nr(tsk));  | 
|---|
| 858 |  | -			}  | 
|---|
| 859 |  | -			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);  | 
|---|
 | 858 | +		    check_rlimit(rttime, hard, SIGKILL, true, true))  | 
|---|
| 860 | 859 |  			return; | 
|---|
| 861 |  | -		}  | 
|---|
| 862 |  | -		if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {  | 
|---|
| 863 |  | -			/*  | 
|---|
| 864 |  | -			 * At the soft limit, send a SIGXCPU every second.  | 
|---|
| 865 |  | -			 */  | 
|---|
| 866 |  | -			if (soft < hard) {  | 
|---|
| 867 |  | -				soft += USEC_PER_SEC;  | 
|---|
| 868 |  | -				tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur =  | 
|---|
| 869 |  | -					soft;  | 
|---|
| 870 |  | -			}  | 
|---|
| 871 |  | -			if (print_fatal_signals) {  | 
|---|
| 872 |  | -				pr_info("RT Watchdog Timeout (soft): %s[%d]\n",  | 
|---|
| 873 |  | -					tsk->comm, task_pid_nr(tsk));  | 
|---|
| 874 |  | -			}  | 
|---|
| 875 |  | -			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);  | 
|---|
 | 860 | +  | 
|---|
 | 861 | +		/* At the soft limit, send a SIGXCPU every second */  | 
|---|
 | 862 | +		if (check_rlimit(rttime, soft, SIGXCPU, true, false)) {  | 
|---|
 | 863 | +			soft += USEC_PER_SEC;  | 
|---|
 | 864 | +			tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft;  | 
|---|
| 876 | 865 |  		} | 
|---|
| 877 | 866 |  	} | 
|---|
| 878 |  | -	if (task_cputime_zero(tsk_expires))  | 
|---|
 | 867 | +  | 
|---|
 | 868 | +	if (expiry_cache_is_inactive(pct))  | 
|---|
| 879 | 869 |  		tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); | 
|---|
| 880 | 870 |  } | 
|---|
| 881 | 871 |   | 
|---|
| 882 | 872 |  static inline void stop_process_timers(struct signal_struct *sig) | 
|---|
| 883 | 873 |  { | 
|---|
| 884 |  | -	struct thread_group_cputimer *cputimer = &sig->cputimer;  | 
|---|
 | 874 | +	struct posix_cputimers *pct = &sig->posix_cputimers;  | 
|---|
| 885 | 875 |   | 
|---|
| 886 |  | -	/* Turn off cputimer->running. This is done without locking. */  | 
|---|
| 887 |  | -	WRITE_ONCE(cputimer->running, false);  | 
|---|
 | 876 | +	/* Turn off the active flag. This is done without locking. */  | 
|---|
 | 877 | +	WRITE_ONCE(pct->timers_active, false);  | 
|---|
| 888 | 878 |  	tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); | 
|---|
| 889 | 879 |  } | 
|---|
| 890 | 880 |   | 
|---|
| .. | .. | 
|---|
| 906 | 896 |  		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk); | 
|---|
| 907 | 897 |  	} | 
|---|
| 908 | 898 |   | 
|---|
| 909 |  | -	if (it->expires && (!*expires || it->expires < *expires))  | 
|---|
 | 899 | +	if (it->expires && it->expires < *expires)  | 
|---|
| 910 | 900 |  		*expires = it->expires; | 
|---|
| 911 | 901 |  } | 
|---|
| 912 | 902 |   | 
|---|
| .. | .. | 
|---|
| 919 | 909 |  				 struct list_head *firing) | 
|---|
| 920 | 910 |  { | 
|---|
| 921 | 911 |  	struct signal_struct *const sig = tsk->signal; | 
|---|
| 922 |  | -	u64 utime, ptime, virt_expires, prof_expires;  | 
|---|
| 923 |  | -	u64 sum_sched_runtime, sched_expires;  | 
|---|
| 924 |  | -	struct list_head *timers = sig->cpu_timers;  | 
|---|
| 925 |  | -	struct task_cputime cputime;  | 
|---|
 | 912 | +	struct posix_cputimers *pct = &sig->posix_cputimers;  | 
|---|
 | 913 | +	u64 samples[CPUCLOCK_MAX];  | 
|---|
| 926 | 914 |  	unsigned long soft; | 
|---|
| 927 | 915 |   | 
|---|
| 928 |  | -	if (dl_task(tsk))  | 
|---|
| 929 |  | -		check_dl_overrun(tsk);  | 
|---|
| 930 |  | -  | 
|---|
| 931 | 916 |  	/* | 
|---|
| 932 |  | -	 * If cputimer is not running, then there are no active  | 
|---|
| 933 |  | -	 * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU).  | 
|---|
 | 917 | +	 * If there are no active process wide timers (POSIX 1.b, itimers,  | 
|---|
 | 918 | +	 * RLIMIT_CPU) nothing to check. Also skip the process wide timer  | 
|---|
 | 919 | +	 * processing when there is already another task handling them.  | 
|---|
| 934 | 920 |  	 */ | 
|---|
| 935 |  | -	if (!READ_ONCE(tsk->signal->cputimer.running))  | 
|---|
 | 921 | +	if (!READ_ONCE(pct->timers_active) || pct->expiry_active)  | 
|---|
| 936 | 922 |  		return; | 
|---|
| 937 | 923 |   | 
|---|
| 938 |  | -        /*  | 
|---|
 | 924 | +	/*  | 
|---|
| 939 | 925 |  	 * Signify that a thread is checking for process timers. | 
|---|
| 940 | 926 |  	 * Write access to this field is protected by the sighand lock. | 
|---|
| 941 | 927 |  	 */ | 
|---|
| 942 |  | -	sig->cputimer.checking_timer = true;  | 
|---|
 | 928 | +	pct->expiry_active = true;  | 
|---|
| 943 | 929 |   | 
|---|
| 944 | 930 |  	/* | 
|---|
| 945 |  | -	 * Collect the current process totals.  | 
|---|
 | 931 | +	 * Collect the current process totals. Group accounting is active  | 
|---|
 | 932 | +	 * so the sample can be taken directly.  | 
|---|
| 946 | 933 |  	 */ | 
|---|
| 947 |  | -	thread_group_cputimer(tsk, &cputime);  | 
|---|
| 948 |  | -	utime = cputime.utime;  | 
|---|
| 949 |  | -	ptime = utime + cputime.stime;  | 
|---|
| 950 |  | -	sum_sched_runtime = cputime.sum_exec_runtime;  | 
|---|
| 951 |  | -  | 
|---|
| 952 |  | -	prof_expires = check_timers_list(timers, firing, ptime);  | 
|---|
| 953 |  | -	virt_expires = check_timers_list(++timers, firing, utime);  | 
|---|
| 954 |  | -	sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);  | 
|---|
 | 934 | +	proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples);  | 
|---|
 | 935 | +	collect_posix_cputimers(pct, samples, firing);  | 
|---|
| 955 | 936 |   | 
|---|
| 956 | 937 |  	/* | 
|---|
| 957 | 938 |  	 * Check for the special case process timers. | 
|---|
| 958 | 939 |  	 */ | 
|---|
| 959 |  | -	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,  | 
|---|
| 960 |  | -			 SIGPROF);  | 
|---|
| 961 |  | -	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,  | 
|---|
| 962 |  | -			 SIGVTALRM);  | 
|---|
 | 940 | +	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF],  | 
|---|
 | 941 | +			 &pct->bases[CPUCLOCK_PROF].nextevt,  | 
|---|
 | 942 | +			 samples[CPUCLOCK_PROF], SIGPROF);  | 
|---|
 | 943 | +	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT],  | 
|---|
 | 944 | +			 &pct->bases[CPUCLOCK_VIRT].nextevt,  | 
|---|
 | 945 | +			 samples[CPUCLOCK_VIRT], SIGVTALRM);  | 
|---|
 | 946 | +  | 
|---|
| 963 | 947 |  	soft = task_rlimit(tsk, RLIMIT_CPU); | 
|---|
| 964 | 948 |  	if (soft != RLIM_INFINITY) { | 
|---|
| 965 |  | -		unsigned long psecs = div_u64(ptime, NSEC_PER_SEC);  | 
|---|
 | 949 | +		/* RLIMIT_CPU is in seconds. Samples are nanoseconds */  | 
|---|
| 966 | 950 |  		unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU); | 
|---|
| 967 |  | -		u64 x;  | 
|---|
| 968 |  | -		if (psecs >= hard) {  | 
|---|
| 969 |  | -			/*  | 
|---|
| 970 |  | -			 * At the hard limit, we just die.  | 
|---|
| 971 |  | -			 * No need to calculate anything else now.  | 
|---|
| 972 |  | -			 */  | 
|---|
| 973 |  | -			if (print_fatal_signals) {  | 
|---|
| 974 |  | -				pr_info("RT Watchdog Timeout (hard): %s[%d]\n",  | 
|---|
| 975 |  | -					tsk->comm, task_pid_nr(tsk));  | 
|---|
| 976 |  | -			}  | 
|---|
| 977 |  | -			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);  | 
|---|
 | 951 | +		u64 ptime = samples[CPUCLOCK_PROF];  | 
|---|
 | 952 | +		u64 softns = (u64)soft * NSEC_PER_SEC;  | 
|---|
 | 953 | +		u64 hardns = (u64)hard * NSEC_PER_SEC;  | 
|---|
 | 954 | +  | 
|---|
 | 955 | +		/* At the hard limit, send SIGKILL. No further action. */  | 
|---|
 | 956 | +		if (hard != RLIM_INFINITY &&  | 
|---|
 | 957 | +		    check_rlimit(ptime, hardns, SIGKILL, false, true))  | 
|---|
| 978 | 958 |  			return; | 
|---|
 | 959 | +  | 
|---|
 | 960 | +		/* At the soft limit, send a SIGXCPU every second */  | 
|---|
 | 961 | +		if (check_rlimit(ptime, softns, SIGXCPU, false, false)) {  | 
|---|
 | 962 | +			sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1;  | 
|---|
 | 963 | +			softns += NSEC_PER_SEC;  | 
|---|
| 979 | 964 |  		} | 
|---|
| 980 |  | -		if (psecs >= soft) {  | 
|---|
| 981 |  | -			/*  | 
|---|
| 982 |  | -			 * At the soft limit, send a SIGXCPU every second.  | 
|---|
| 983 |  | -			 */  | 
|---|
| 984 |  | -			if (print_fatal_signals) {  | 
|---|
| 985 |  | -				pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",  | 
|---|
| 986 |  | -					tsk->comm, task_pid_nr(tsk));  | 
|---|
| 987 |  | -			}  | 
|---|
| 988 |  | -			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);  | 
|---|
| 989 |  | -			if (soft < hard) {  | 
|---|
| 990 |  | -				soft++;  | 
|---|
| 991 |  | -				sig->rlim[RLIMIT_CPU].rlim_cur = soft;  | 
|---|
| 992 |  | -			}  | 
|---|
| 993 |  | -		}  | 
|---|
| 994 |  | -		x = soft * NSEC_PER_SEC;  | 
|---|
| 995 |  | -		if (!prof_expires || x < prof_expires)  | 
|---|
| 996 |  | -			prof_expires = x;  | 
|---|
 | 965 | +  | 
|---|
 | 966 | +		/* Update the expiry cache */  | 
|---|
 | 967 | +		if (softns < pct->bases[CPUCLOCK_PROF].nextevt)  | 
|---|
 | 968 | +			pct->bases[CPUCLOCK_PROF].nextevt = softns;  | 
|---|
| 997 | 969 |  	} | 
|---|
| 998 | 970 |   | 
|---|
| 999 |  | -	sig->cputime_expires.prof_exp = prof_expires;  | 
|---|
| 1000 |  | -	sig->cputime_expires.virt_exp = virt_expires;  | 
|---|
| 1001 |  | -	sig->cputime_expires.sched_exp = sched_expires;  | 
|---|
| 1002 |  | -	if (task_cputime_zero(&sig->cputime_expires))  | 
|---|
 | 971 | +	if (expiry_cache_is_inactive(pct))  | 
|---|
| 1003 | 972 |  		stop_process_timers(sig); | 
|---|
| 1004 | 973 |   | 
|---|
| 1005 |  | -	sig->cputimer.checking_timer = false;  | 
|---|
 | 974 | +	pct->expiry_active = false;  | 
|---|
| 1006 | 975 |  } | 
|---|
| 1007 | 976 |   | 
|---|
| 1008 | 977 |  /* | 
|---|
| .. | .. | 
|---|
| 1011 | 980 |   */ | 
|---|
| 1012 | 981 |  static void posix_cpu_timer_rearm(struct k_itimer *timer) | 
|---|
| 1013 | 982 |  { | 
|---|
| 1014 |  | -	struct task_struct *p = timer->it.cpu.task;  | 
|---|
 | 983 | +	clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);  | 
|---|
 | 984 | +	struct task_struct *p;  | 
|---|
| 1015 | 985 |  	struct sighand_struct *sighand; | 
|---|
| 1016 | 986 |  	unsigned long flags; | 
|---|
| 1017 | 987 |  	u64 now; | 
|---|
| 1018 | 988 |   | 
|---|
| 1019 |  | -	if (WARN_ON_ONCE(!p))  | 
|---|
| 1020 |  | -		return;  | 
|---|
 | 989 | +	rcu_read_lock();  | 
|---|
 | 990 | +	p = cpu_timer_task_rcu(timer);  | 
|---|
 | 991 | +	if (!p)  | 
|---|
 | 992 | +		goto out;  | 
|---|
 | 993 | +  | 
|---|
 | 994 | +	/* Protect timer list r/w in arm_timer() */  | 
|---|
 | 995 | +	sighand = lock_task_sighand(p, &flags);  | 
|---|
 | 996 | +	if (unlikely(sighand == NULL))  | 
|---|
 | 997 | +		goto out;  | 
|---|
| 1021 | 998 |   | 
|---|
| 1022 | 999 |  	/* | 
|---|
| 1023 | 1000 |  	 * Fetch the current sample and update the timer's expiry time. | 
|---|
| 1024 | 1001 |  	 */ | 
|---|
| 1025 |  | -	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {  | 
|---|
| 1026 |  | -		cpu_clock_sample(timer->it_clock, p, &now);  | 
|---|
| 1027 |  | -		bump_cpu_timer(timer, now);  | 
|---|
| 1028 |  | -		if (unlikely(p->exit_state))  | 
|---|
| 1029 |  | -			return;  | 
|---|
 | 1002 | +	if (CPUCLOCK_PERTHREAD(timer->it_clock))  | 
|---|
 | 1003 | +		now = cpu_clock_sample(clkid, p);  | 
|---|
 | 1004 | +	else  | 
|---|
 | 1005 | +		now = cpu_clock_sample_group(clkid, p, true);  | 
|---|
| 1030 | 1006 |   | 
|---|
| 1031 |  | -		/* Protect timer list r/w in arm_timer() */  | 
|---|
| 1032 |  | -		sighand = lock_task_sighand(p, &flags);  | 
|---|
| 1033 |  | -		if (!sighand)  | 
|---|
| 1034 |  | -			return;  | 
|---|
| 1035 |  | -	} else {  | 
|---|
| 1036 |  | -		/*  | 
|---|
| 1037 |  | -		 * Protect arm_timer() and timer sampling in case of call to  | 
|---|
| 1038 |  | -		 * thread_group_cputime().  | 
|---|
| 1039 |  | -		 */  | 
|---|
| 1040 |  | -		sighand = lock_task_sighand(p, &flags);  | 
|---|
| 1041 |  | -		if (unlikely(sighand == NULL)) {  | 
|---|
| 1042 |  | -			/*  | 
|---|
| 1043 |  | -			 * The process has been reaped.  | 
|---|
| 1044 |  | -			 * We can't even collect a sample any more.  | 
|---|
| 1045 |  | -			 */  | 
|---|
| 1046 |  | -			timer->it.cpu.expires = 0;  | 
|---|
| 1047 |  | -			return;  | 
|---|
| 1048 |  | -		} else if (unlikely(p->exit_state) && thread_group_empty(p)) {  | 
|---|
| 1049 |  | -			/* If the process is dying, no need to rearm */  | 
|---|
| 1050 |  | -			goto unlock;  | 
|---|
| 1051 |  | -		}  | 
|---|
| 1052 |  | -		cpu_timer_sample_group(timer->it_clock, p, &now);  | 
|---|
| 1053 |  | -		bump_cpu_timer(timer, now);  | 
|---|
| 1054 |  | -		/* Leave the sighand locked for the call below.  */  | 
|---|
| 1055 |  | -	}  | 
|---|
 | 1007 | +	bump_cpu_timer(timer, now);  | 
|---|
| 1056 | 1008 |   | 
|---|
| 1057 | 1009 |  	/* | 
|---|
| 1058 | 1010 |  	 * Now re-arm for the new expiry time. | 
|---|
| 1059 | 1011 |  	 */ | 
|---|
| 1060 |  | -	arm_timer(timer);  | 
|---|
| 1061 |  | -unlock:  | 
|---|
 | 1012 | +	arm_timer(timer, p);  | 
|---|
| 1062 | 1013 |  	unlock_task_sighand(p, &flags); | 
|---|
 | 1014 | +out:  | 
|---|
 | 1015 | +	rcu_read_unlock();  | 
|---|
| 1063 | 1016 |  } | 
|---|
| 1064 | 1017 |   | 
|---|
| 1065 | 1018 |  /** | 
|---|
| 1066 |  | - * task_cputime_expired - Compare two task_cputime entities.  | 
|---|
 | 1019 | + * task_cputimers_expired - Check whether posix CPU timers are expired  | 
|---|
| 1067 | 1020 |   * | 
|---|
| 1068 |  | - * @sample:	The task_cputime structure to be checked for expiration.  | 
|---|
| 1069 |  | - * @expires:	Expiration times, against which @sample will be checked.  | 
|---|
 | 1021 | + * @samples:	Array of current samples for the CPUCLOCK clocks  | 
|---|
 | 1022 | + * @pct:	Pointer to a posix_cputimers container  | 
|---|
| 1070 | 1023 |   * | 
|---|
| 1071 |  | - * Checks @sample against @expires to see if any field of @sample has expired.  | 
|---|
| 1072 |  | - * Returns true if any field of the former is greater than the corresponding  | 
|---|
| 1073 |  | - * field of the latter if the latter field is set.  Otherwise returns false.  | 
|---|
 | 1024 | + * Returns true if any member of @samples is greater than the corresponding  | 
|---|
 | 1025 | + * member of @pct->bases[CLK].nextevt. False otherwise  | 
|---|
| 1074 | 1026 |   */ | 
|---|
| 1075 |  | -static inline int task_cputime_expired(const struct task_cputime *sample,  | 
|---|
| 1076 |  | -					const struct task_cputime *expires)  | 
|---|
 | 1027 | +static inline bool  | 
|---|
 | 1028 | +task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct)  | 
|---|
| 1077 | 1029 |  { | 
|---|
| 1078 |  | -	if (expires->utime && sample->utime >= expires->utime)  | 
|---|
| 1079 |  | -		return 1;  | 
|---|
| 1080 |  | -	if (expires->stime && sample->utime + sample->stime >= expires->stime)  | 
|---|
| 1081 |  | -		return 1;  | 
|---|
| 1082 |  | -	if (expires->sum_exec_runtime != 0 &&  | 
|---|
| 1083 |  | -	    sample->sum_exec_runtime >= expires->sum_exec_runtime)  | 
|---|
| 1084 |  | -		return 1;  | 
|---|
| 1085 |  | -	return 0;  | 
|---|
 | 1030 | +	int i;  | 
|---|
 | 1031 | +  | 
|---|
 | 1032 | +	for (i = 0; i < CPUCLOCK_MAX; i++) {  | 
|---|
 | 1033 | +		if (samples[i] >= pct->bases[i].nextevt)  | 
|---|
 | 1034 | +			return true;  | 
|---|
 | 1035 | +	}  | 
|---|
 | 1036 | +	return false;  | 
|---|
| 1086 | 1037 |  } | 
|---|
| 1087 | 1038 |   | 
|---|
| 1088 | 1039 |  /** | 
|---|
| .. | .. | 
|---|
| 1095 | 1046 |   * timers and compare them with the corresponding expiration times.  Return | 
|---|
| 1096 | 1047 |   * true if a timer has expired, else return false. | 
|---|
| 1097 | 1048 |   */ | 
|---|
| 1098 |  | -static inline int fastpath_timer_check(struct task_struct *tsk)  | 
|---|
 | 1049 | +static inline bool fastpath_timer_check(struct task_struct *tsk)  | 
|---|
| 1099 | 1050 |  { | 
|---|
 | 1051 | +	struct posix_cputimers *pct = &tsk->posix_cputimers;  | 
|---|
| 1100 | 1052 |  	struct signal_struct *sig; | 
|---|
| 1101 | 1053 |   | 
|---|
| 1102 |  | -	if (!task_cputime_zero(&tsk->cputime_expires)) {  | 
|---|
| 1103 |  | -		struct task_cputime task_sample;  | 
|---|
 | 1054 | +	if (!expiry_cache_is_inactive(pct)) {  | 
|---|
 | 1055 | +		u64 samples[CPUCLOCK_MAX];  | 
|---|
| 1104 | 1056 |   | 
|---|
| 1105 |  | -		task_cputime(tsk, &task_sample.utime, &task_sample.stime);  | 
|---|
| 1106 |  | -		task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime;  | 
|---|
| 1107 |  | -		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))  | 
|---|
| 1108 |  | -			return 1;  | 
|---|
 | 1057 | +		task_sample_cputime(tsk, samples);  | 
|---|
 | 1058 | +		if (task_cputimers_expired(samples, pct))  | 
|---|
 | 1059 | +			return true;  | 
|---|
| 1109 | 1060 |  	} | 
|---|
| 1110 | 1061 |   | 
|---|
| 1111 | 1062 |  	sig = tsk->signal; | 
|---|
 | 1063 | +	pct = &sig->posix_cputimers;  | 
|---|
| 1112 | 1064 |  	/* | 
|---|
| 1113 |  | -	 * Check if thread group timers expired when the cputimer is  | 
|---|
| 1114 |  | -	 * running and no other thread in the group is already checking  | 
|---|
| 1115 |  | -	 * for thread group cputimers. These fields are read without the  | 
|---|
| 1116 |  | -	 * sighand lock. However, this is fine because this is meant to  | 
|---|
| 1117 |  | -	 * be a fastpath heuristic to determine whether we should try to  | 
|---|
| 1118 |  | -	 * acquire the sighand lock to check/handle timers.  | 
|---|
 | 1065 | +	 * Check if thread group timers expired when timers are active and  | 
|---|
 | 1066 | +	 * no other thread in the group is already handling expiry for  | 
|---|
 | 1067 | +	 * thread group cputimers. These fields are read without the  | 
|---|
 | 1068 | +	 * sighand lock. However, this is fine because this is meant to be  | 
|---|
 | 1069 | +	 * a fastpath heuristic to determine whether we should try to  | 
|---|
 | 1070 | +	 * acquire the sighand lock to handle timer expiry.  | 
|---|
| 1119 | 1071 |  	 * | 
|---|
| 1120 |  | -	 * In the worst case scenario, if 'running' or 'checking_timer' gets  | 
|---|
| 1121 |  | -	 * set but the current thread doesn't see the change yet, we'll wait  | 
|---|
| 1122 |  | -	 * until the next thread in the group gets a scheduler interrupt to  | 
|---|
| 1123 |  | -	 * handle the timer. This isn't an issue in practice because these  | 
|---|
| 1124 |  | -	 * types of delays with signals actually getting sent are expected.  | 
|---|
 | 1072 | +	 * In the worst case scenario, if concurrently timers_active is set  | 
|---|
 | 1073 | +	 * or expiry_active is cleared, but the current thread doesn't see  | 
|---|
 | 1074 | +	 * the change yet, the timer checks are delayed until the next  | 
|---|
 | 1075 | +	 * thread in the group gets a scheduler interrupt to handle the  | 
|---|
 | 1076 | +	 * timer. This isn't an issue in practice because these types of  | 
|---|
 | 1077 | +	 * delays with signals actually getting sent are expected.  | 
|---|
| 1125 | 1078 |  	 */ | 
|---|
| 1126 |  | -	if (READ_ONCE(sig->cputimer.running) &&  | 
|---|
| 1127 |  | -	    !READ_ONCE(sig->cputimer.checking_timer)) {  | 
|---|
| 1128 |  | -		struct task_cputime group_sample;  | 
|---|
 | 1079 | +	if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) {  | 
|---|
 | 1080 | +		u64 samples[CPUCLOCK_MAX];  | 
|---|
| 1129 | 1081 |   | 
|---|
| 1130 |  | -		sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);  | 
|---|
 | 1082 | +		proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic,  | 
|---|
 | 1083 | +					   samples);  | 
|---|
| 1131 | 1084 |   | 
|---|
| 1132 |  | -		if (task_cputime_expired(&group_sample, &sig->cputime_expires))  | 
|---|
| 1133 |  | -			return 1;  | 
|---|
 | 1085 | +		if (task_cputimers_expired(samples, pct))  | 
|---|
 | 1086 | +			return true;  | 
|---|
| 1134 | 1087 |  	} | 
|---|
| 1135 | 1088 |   | 
|---|
| 1136 | 1089 |  	if (dl_task(tsk) && tsk->dl.dl_overrun) | 
|---|
| 1137 |  | -		return 1;  | 
|---|
 | 1090 | +		return true;  | 
|---|
| 1138 | 1091 |   | 
|---|
| 1139 |  | -	return 0;  | 
|---|
 | 1092 | +	return false;  | 
|---|
| 1140 | 1093 |  } | 
|---|
| 1141 | 1094 |   | 
|---|
| 1142 |  | -static DEFINE_PER_CPU(spinlock_t, cpu_timer_expiry_lock) = __SPIN_LOCK_UNLOCKED(cpu_timer_expiry_lock);  | 
|---|
 | 1095 | +static void handle_posix_cpu_timers(struct task_struct *tsk);  | 
|---|
| 1143 | 1096 |   | 
|---|
| 1144 |  | -void cpu_timers_grab_expiry_lock(struct k_itimer *timer)  | 
|---|
 | 1097 | +#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK  | 
|---|
 | 1098 | +static void posix_cpu_timers_work(struct callback_head *work)  | 
|---|
| 1145 | 1099 |  { | 
|---|
| 1146 |  | -	int cpu = timer->it.cpu.firing_cpu;  | 
|---|
| 1147 |  | -  | 
|---|
| 1148 |  | -	if (cpu >= 0) {  | 
|---|
| 1149 |  | -		spinlock_t *expiry_lock = per_cpu_ptr(&cpu_timer_expiry_lock, cpu);  | 
|---|
| 1150 |  | -  | 
|---|
| 1151 |  | -		spin_lock_irq(expiry_lock);  | 
|---|
| 1152 |  | -		spin_unlock_irq(expiry_lock);  | 
|---|
| 1153 |  | -	}  | 
|---|
 | 1100 | +	handle_posix_cpu_timers(current);  | 
|---|
| 1154 | 1101 |  } | 
|---|
| 1155 | 1102 |   | 
|---|
| 1156 | 1103 |  /* | 
|---|
| 1157 |  | - * This is called from the timer interrupt handler.  The irq handler has  | 
|---|
| 1158 |  | - * already updated our counts.  We need to check if any timers fire now.  | 
|---|
| 1159 |  | - * Interrupts are disabled.  | 
|---|
 | 1104 | + * Clear existing posix CPU timers task work.  | 
|---|
| 1160 | 1105 |   */ | 
|---|
| 1161 |  | -static void __run_posix_cpu_timers(struct task_struct *tsk)  | 
|---|
 | 1106 | +void clear_posix_cputimers_work(struct task_struct *p)  | 
|---|
| 1162 | 1107 |  { | 
|---|
| 1163 |  | -	LIST_HEAD(firing);  | 
|---|
| 1164 |  | -	struct k_itimer *timer, *next;  | 
|---|
| 1165 |  | -	unsigned long flags;  | 
|---|
| 1166 |  | -	spinlock_t *expiry_lock;  | 
|---|
 | 1108 | +	/*  | 
|---|
 | 1109 | +	 * A copied work entry from the old task is not meaningful, clear it.  | 
|---|
 | 1110 | +	 * N.B. init_task_work will not do this.  | 
|---|
 | 1111 | +	 */  | 
|---|
 | 1112 | +	memset(&p->posix_cputimers_work.work, 0,  | 
|---|
 | 1113 | +	       sizeof(p->posix_cputimers_work.work));  | 
|---|
 | 1114 | +	init_task_work(&p->posix_cputimers_work.work,  | 
|---|
 | 1115 | +		       posix_cpu_timers_work);  | 
|---|
 | 1116 | +	p->posix_cputimers_work.scheduled = false;  | 
|---|
 | 1117 | +}  | 
|---|
 | 1118 | +  | 
|---|
 | 1119 | +/*  | 
|---|
 | 1120 | + * Initialize posix CPU timers task work in init task. Out of line to  | 
|---|
 | 1121 | + * keep the callback static and to avoid header recursion hell.  | 
|---|
 | 1122 | + */  | 
|---|
 | 1123 | +void __init posix_cputimers_init_work(void)  | 
|---|
 | 1124 | +{  | 
|---|
 | 1125 | +	clear_posix_cputimers_work(current);  | 
|---|
 | 1126 | +}  | 
|---|
 | 1127 | +  | 
|---|
 | 1128 | +/*  | 
|---|
 | 1129 | + * Note: All operations on tsk->posix_cputimer_work.scheduled happen either  | 
|---|
 | 1130 | + * in hard interrupt context or in task context with interrupts  | 
|---|
 | 1131 | + * disabled. Aside of that the writer/reader interaction is always in the  | 
|---|
 | 1132 | + * context of the current task, which means they are strict per CPU.  | 
|---|
 | 1133 | + */  | 
|---|
 | 1134 | +static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)  | 
|---|
 | 1135 | +{  | 
|---|
 | 1136 | +	return tsk->posix_cputimers_work.scheduled;  | 
|---|
 | 1137 | +}  | 
|---|
 | 1138 | +  | 
|---|
 | 1139 | +static inline void __run_posix_cpu_timers(struct task_struct *tsk)  | 
|---|
 | 1140 | +{  | 
|---|
 | 1141 | +	if (WARN_ON_ONCE(tsk->posix_cputimers_work.scheduled))  | 
|---|
 | 1142 | +		return;  | 
|---|
 | 1143 | +  | 
|---|
 | 1144 | +	/* Schedule task work to actually expire the timers */  | 
|---|
 | 1145 | +	tsk->posix_cputimers_work.scheduled = true;  | 
|---|
 | 1146 | +	task_work_add(tsk, &tsk->posix_cputimers_work.work, TWA_RESUME);  | 
|---|
 | 1147 | +}  | 
|---|
 | 1148 | +  | 
|---|
 | 1149 | +static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,  | 
|---|
 | 1150 | +						unsigned long start)  | 
|---|
 | 1151 | +{  | 
|---|
 | 1152 | +	bool ret = true;  | 
|---|
| 1167 | 1153 |   | 
|---|
| 1168 | 1154 |  	/* | 
|---|
| 1169 |  | -	 * The fast path checks that there are no expired thread or thread  | 
|---|
| 1170 |  | -	 * group timers.  If that's so, just return.  | 
|---|
 | 1155 | +	 * On !RT kernels interrupts are disabled while collecting expired  | 
|---|
 | 1156 | +	 * timers, so no tick can happen and the fast path check can be  | 
|---|
 | 1157 | +	 * reenabled without further checks.  | 
|---|
| 1171 | 1158 |  	 */ | 
|---|
| 1172 |  | -	if (!fastpath_timer_check(tsk))  | 
|---|
| 1173 |  | -		return;  | 
|---|
| 1174 |  | -  | 
|---|
| 1175 |  | -	expiry_lock = this_cpu_ptr(&cpu_timer_expiry_lock);  | 
|---|
| 1176 |  | -	spin_lock(expiry_lock);  | 
|---|
| 1177 |  | -  | 
|---|
| 1178 |  | -	if (!lock_task_sighand(tsk, &flags)) {  | 
|---|
| 1179 |  | -		spin_unlock(expiry_lock);  | 
|---|
| 1180 |  | -		return;  | 
|---|
 | 1159 | +	if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {  | 
|---|
 | 1160 | +		tsk->posix_cputimers_work.scheduled = false;  | 
|---|
 | 1161 | +		return true;  | 
|---|
| 1181 | 1162 |  	} | 
|---|
 | 1163 | +  | 
|---|
| 1182 | 1164 |  	/* | 
|---|
| 1183 |  | -	 * Here we take off tsk->signal->cpu_timers[N] and  | 
|---|
| 1184 |  | -	 * tsk->cpu_timers[N] all the timers that are firing, and  | 
|---|
| 1185 |  | -	 * put them on the firing list.  | 
|---|
 | 1165 | +	 * On RT enabled kernels ticks can happen while the expired timers  | 
|---|
 | 1166 | +	 * are collected under sighand lock. But any tick which observes  | 
|---|
 | 1167 | +	 * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath  | 
|---|
 | 1168 | +	 * checks. So reenabling the tick work has do be done carefully:  | 
|---|
 | 1169 | +	 *  | 
|---|
 | 1170 | +	 * Disable interrupts and run the fast path check if jiffies have  | 
|---|
 | 1171 | +	 * advanced since the collecting of expired timers started. If  | 
|---|
 | 1172 | +	 * jiffies have not advanced or the fast path check did not find  | 
|---|
 | 1173 | +	 * newly expired timers, reenable the fast path check in the timer  | 
|---|
 | 1174 | +	 * interrupt. If there are newly expired timers, return false and  | 
|---|
 | 1175 | +	 * let the collection loop repeat.  | 
|---|
| 1186 | 1176 |  	 */ | 
|---|
| 1187 |  | -	check_thread_timers(tsk, &firing);  | 
|---|
 | 1177 | +	local_irq_disable();  | 
|---|
 | 1178 | +	if (start != jiffies && fastpath_timer_check(tsk))  | 
|---|
 | 1179 | +		ret = false;  | 
|---|
 | 1180 | +	else  | 
|---|
 | 1181 | +		tsk->posix_cputimers_work.scheduled = false;  | 
|---|
 | 1182 | +	local_irq_enable();  | 
|---|
| 1188 | 1183 |   | 
|---|
| 1189 |  | -	check_process_timers(tsk, &firing);  | 
|---|
 | 1184 | +	return ret;  | 
|---|
 | 1185 | +}  | 
|---|
 | 1186 | +#else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */  | 
|---|
 | 1187 | +static inline void __run_posix_cpu_timers(struct task_struct *tsk)  | 
|---|
 | 1188 | +{  | 
|---|
 | 1189 | +	lockdep_posixtimer_enter();  | 
|---|
 | 1190 | +	handle_posix_cpu_timers(tsk);  | 
|---|
 | 1191 | +	lockdep_posixtimer_exit();  | 
|---|
 | 1192 | +}  | 
|---|
 | 1193 | +  | 
|---|
 | 1194 | +static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)  | 
|---|
 | 1195 | +{  | 
|---|
 | 1196 | +	return false;  | 
|---|
 | 1197 | +}  | 
|---|
 | 1198 | +  | 
|---|
 | 1199 | +static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,  | 
|---|
 | 1200 | +						unsigned long start)  | 
|---|
 | 1201 | +{  | 
|---|
 | 1202 | +	return true;  | 
|---|
 | 1203 | +}  | 
|---|
 | 1204 | +#endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */  | 
|---|
 | 1205 | +  | 
|---|
 | 1206 | +static void handle_posix_cpu_timers(struct task_struct *tsk)  | 
|---|
 | 1207 | +{  | 
|---|
 | 1208 | +	struct k_itimer *timer, *next;  | 
|---|
 | 1209 | +	unsigned long flags, start;  | 
|---|
 | 1210 | +	LIST_HEAD(firing);  | 
|---|
 | 1211 | +  | 
|---|
 | 1212 | +	if (!lock_task_sighand(tsk, &flags))  | 
|---|
 | 1213 | +		return;  | 
|---|
 | 1214 | +  | 
|---|
 | 1215 | +	do {  | 
|---|
 | 1216 | +		/*  | 
|---|
 | 1217 | +		 * On RT locking sighand lock does not disable interrupts,  | 
|---|
 | 1218 | +		 * so this needs to be careful vs. ticks. Store the current  | 
|---|
 | 1219 | +		 * jiffies value.  | 
|---|
 | 1220 | +		 */  | 
|---|
 | 1221 | +		start = READ_ONCE(jiffies);  | 
|---|
 | 1222 | +		barrier();  | 
|---|
 | 1223 | +  | 
|---|
 | 1224 | +		/*  | 
|---|
 | 1225 | +		 * Here we take off tsk->signal->cpu_timers[N] and  | 
|---|
 | 1226 | +		 * tsk->cpu_timers[N] all the timers that are firing, and  | 
|---|
 | 1227 | +		 * put them on the firing list.  | 
|---|
 | 1228 | +		 */  | 
|---|
 | 1229 | +		check_thread_timers(tsk, &firing);  | 
|---|
 | 1230 | +  | 
|---|
 | 1231 | +		check_process_timers(tsk, &firing);  | 
|---|
 | 1232 | +  | 
|---|
 | 1233 | +		/*  | 
|---|
 | 1234 | +		 * The above timer checks have updated the exipry cache and  | 
|---|
 | 1235 | +		 * because nothing can have queued or modified timers after  | 
|---|
 | 1236 | +		 * sighand lock was taken above it is guaranteed to be  | 
|---|
 | 1237 | +		 * consistent. So the next timer interrupt fastpath check  | 
|---|
 | 1238 | +		 * will find valid data.  | 
|---|
 | 1239 | +		 *  | 
|---|
 | 1240 | +		 * If timer expiry runs in the timer interrupt context then  | 
|---|
 | 1241 | +		 * the loop is not relevant as timers will be directly  | 
|---|
 | 1242 | +		 * expired in interrupt context. The stub function below  | 
|---|
 | 1243 | +		 * returns always true which allows the compiler to  | 
|---|
 | 1244 | +		 * optimize the loop out.  | 
|---|
 | 1245 | +		 *  | 
|---|
 | 1246 | +		 * If timer expiry is deferred to task work context then  | 
|---|
 | 1247 | +		 * the following rules apply:  | 
|---|
 | 1248 | +		 *  | 
|---|
 | 1249 | +		 * - On !RT kernels no tick can have happened on this CPU  | 
|---|
 | 1250 | +		 *   after sighand lock was acquired because interrupts are  | 
|---|
 | 1251 | +		 *   disabled. So reenabling task work before dropping  | 
|---|
 | 1252 | +		 *   sighand lock and reenabling interrupts is race free.  | 
|---|
 | 1253 | +		 *  | 
|---|
 | 1254 | +		 * - On RT kernels ticks might have happened but the tick  | 
|---|
 | 1255 | +		 *   work ignored posix CPU timer handling because the  | 
|---|
 | 1256 | +		 *   CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work  | 
|---|
 | 1257 | +		 *   must be done very carefully including a check whether  | 
|---|
 | 1258 | +		 *   ticks have happened since the start of the timer  | 
|---|
 | 1259 | +		 *   expiry checks. posix_cpu_timers_enable_work() takes  | 
|---|
 | 1260 | +		 *   care of that and eventually lets the expiry checks  | 
|---|
 | 1261 | +		 *   run again.  | 
|---|
 | 1262 | +		 */  | 
|---|
 | 1263 | +	} while (!posix_cpu_timers_enable_work(tsk, start));  | 
|---|
| 1190 | 1264 |   | 
|---|
| 1191 | 1265 |  	/* | 
|---|
| 1192 |  | -	 * We must release these locks before taking any timer's lock.  | 
|---|
 | 1266 | +	 * We must release sighand lock before taking any timer's lock.  | 
|---|
| 1193 | 1267 |  	 * There is a potential race with timer deletion here, as the | 
|---|
| 1194 | 1268 |  	 * siglock now protects our private firing list.  We have set | 
|---|
| 1195 | 1269 |  	 * the firing flag in each timer, so that a deletion attempt | 
|---|
| .. | .. | 
|---|
| 1204 | 1278 |  	 * each timer's lock before clearing its firing flag, so no | 
|---|
| 1205 | 1279 |  	 * timer call will interfere. | 
|---|
| 1206 | 1280 |  	 */ | 
|---|
| 1207 |  | -	list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {  | 
|---|
 | 1281 | +	list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {  | 
|---|
| 1208 | 1282 |  		int cpu_firing; | 
|---|
| 1209 | 1283 |   | 
|---|
 | 1284 | +		/*  | 
|---|
 | 1285 | +		 * spin_lock() is sufficient here even independent of the  | 
|---|
 | 1286 | +		 * expiry context. If expiry happens in hard interrupt  | 
|---|
 | 1287 | +		 * context it's obvious. For task work context it's safe  | 
|---|
 | 1288 | +		 * because all other operations on timer::it_lock happen in  | 
|---|
 | 1289 | +		 * task context (syscall or exit).  | 
|---|
 | 1290 | +		 */  | 
|---|
| 1210 | 1291 |  		spin_lock(&timer->it_lock); | 
|---|
| 1211 |  | -		list_del_init(&timer->it.cpu.entry);  | 
|---|
 | 1292 | +		list_del_init(&timer->it.cpu.elist);  | 
|---|
| 1212 | 1293 |  		cpu_firing = timer->it.cpu.firing; | 
|---|
| 1213 | 1294 |  		timer->it.cpu.firing = 0; | 
|---|
| 1214 |  | -		timer->it.cpu.firing_cpu = -1;  | 
|---|
| 1215 | 1295 |  		/* | 
|---|
| 1216 | 1296 |  		 * The firing flag is -1 if we collided with a reset | 
|---|
| 1217 | 1297 |  		 * of the timer, which already reported this | 
|---|
| .. | .. | 
|---|
| 1221 | 1301 |  			cpu_timer_fire(timer); | 
|---|
| 1222 | 1302 |  		spin_unlock(&timer->it_lock); | 
|---|
| 1223 | 1303 |  	} | 
|---|
| 1224 |  | -	spin_unlock(expiry_lock);  | 
|---|
| 1225 | 1304 |  } | 
|---|
| 1226 | 1305 |   | 
|---|
| 1227 |  | -#ifdef CONFIG_PREEMPT_RT_BASE  | 
|---|
| 1228 |  | -#include <linux/kthread.h>  | 
|---|
| 1229 |  | -#include <linux/cpu.h>  | 
|---|
| 1230 |  | -DEFINE_PER_CPU(struct task_struct *, posix_timer_task);  | 
|---|
| 1231 |  | -DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);  | 
|---|
| 1232 |  | -DEFINE_PER_CPU(bool, posix_timer_th_active);  | 
|---|
| 1233 |  | -  | 
|---|
| 1234 |  | -static void posix_cpu_kthread_fn(unsigned int cpu)  | 
|---|
 | 1306 | +/*  | 
|---|
 | 1307 | + * This is called from the timer interrupt handler.  The irq handler has  | 
|---|
 | 1308 | + * already updated our counts.  We need to check if any timers fire now.  | 
|---|
 | 1309 | + * Interrupts are disabled.  | 
|---|
 | 1310 | + */  | 
|---|
 | 1311 | +void run_posix_cpu_timers(void)  | 
|---|
| 1235 | 1312 |  { | 
|---|
| 1236 |  | -	struct task_struct *tsk = NULL;  | 
|---|
| 1237 |  | -	struct task_struct *next = NULL;  | 
|---|
 | 1313 | +	struct task_struct *tsk = current;  | 
|---|
| 1238 | 1314 |   | 
|---|
| 1239 |  | -	BUG_ON(per_cpu(posix_timer_task, cpu) != current);  | 
|---|
| 1240 |  | -  | 
|---|
| 1241 |  | -	/* grab task list */  | 
|---|
| 1242 |  | -	raw_local_irq_disable();  | 
|---|
| 1243 |  | -	tsk = per_cpu(posix_timer_tasklist, cpu);  | 
|---|
| 1244 |  | -	per_cpu(posix_timer_tasklist, cpu) = NULL;  | 
|---|
| 1245 |  | -	raw_local_irq_enable();  | 
|---|
| 1246 |  | -  | 
|---|
| 1247 |  | -	/* its possible the list is empty, just return */  | 
|---|
| 1248 |  | -	if (!tsk)  | 
|---|
| 1249 |  | -		return;  | 
|---|
| 1250 |  | -  | 
|---|
| 1251 |  | -	/* Process task list */  | 
|---|
| 1252 |  | -	while (1) {  | 
|---|
| 1253 |  | -		/* save next */  | 
|---|
| 1254 |  | -		next = tsk->posix_timer_list;  | 
|---|
| 1255 |  | -  | 
|---|
| 1256 |  | -		/* run the task timers, clear its ptr and  | 
|---|
| 1257 |  | -		 * unreference it  | 
|---|
| 1258 |  | -		 */  | 
|---|
| 1259 |  | -		__run_posix_cpu_timers(tsk);  | 
|---|
| 1260 |  | -		tsk->posix_timer_list = NULL;  | 
|---|
| 1261 |  | -		put_task_struct(tsk);  | 
|---|
| 1262 |  | -  | 
|---|
| 1263 |  | -		/* check if this is the last on the list */  | 
|---|
| 1264 |  | -		if (next == tsk)  | 
|---|
| 1265 |  | -			break;  | 
|---|
| 1266 |  | -		tsk = next;  | 
|---|
| 1267 |  | -	}  | 
|---|
| 1268 |  | -}  | 
|---|
| 1269 |  | -  | 
|---|
| 1270 |  | -static inline int __fastpath_timer_check(struct task_struct *tsk)  | 
|---|
| 1271 |  | -{  | 
|---|
| 1272 |  | -	/* tsk == current, ensure it is safe to use ->signal/sighand */  | 
|---|
| 1273 |  | -	if (unlikely(tsk->exit_state))  | 
|---|
| 1274 |  | -		return 0;  | 
|---|
| 1275 |  | -  | 
|---|
| 1276 |  | -	if (!task_cputime_zero(&tsk->cputime_expires))  | 
|---|
| 1277 |  | -			return 1;  | 
|---|
| 1278 |  | -  | 
|---|
| 1279 |  | -	if (!task_cputime_zero(&tsk->signal->cputime_expires))  | 
|---|
| 1280 |  | -			return 1;  | 
|---|
| 1281 |  | -  | 
|---|
| 1282 |  | -	return 0;  | 
|---|
| 1283 |  | -}  | 
|---|
| 1284 |  | -  | 
|---|
| 1285 |  | -void run_posix_cpu_timers(struct task_struct *tsk)  | 
|---|
| 1286 |  | -{  | 
|---|
| 1287 |  | -	unsigned int cpu = smp_processor_id();  | 
|---|
| 1288 |  | -	struct task_struct *tasklist;  | 
|---|
| 1289 |  | -  | 
|---|
| 1290 |  | -	BUG_ON(!irqs_disabled());  | 
|---|
| 1291 |  | -  | 
|---|
| 1292 |  | -	if (per_cpu(posix_timer_th_active, cpu) != true)  | 
|---|
| 1293 |  | -		return;  | 
|---|
| 1294 |  | -  | 
|---|
| 1295 |  | -	/* get per-cpu references */  | 
|---|
| 1296 |  | -	tasklist = per_cpu(posix_timer_tasklist, cpu);  | 
|---|
| 1297 |  | -  | 
|---|
| 1298 |  | -	/* check to see if we're already queued */  | 
|---|
| 1299 |  | -	if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {  | 
|---|
| 1300 |  | -		get_task_struct(tsk);  | 
|---|
| 1301 |  | -		if (tasklist) {  | 
|---|
| 1302 |  | -			tsk->posix_timer_list = tasklist;  | 
|---|
| 1303 |  | -		} else {  | 
|---|
| 1304 |  | -			/*  | 
|---|
| 1305 |  | -			 * The list is terminated by a self-pointing  | 
|---|
| 1306 |  | -			 * task_struct  | 
|---|
| 1307 |  | -			 */  | 
|---|
| 1308 |  | -			tsk->posix_timer_list = tsk;  | 
|---|
| 1309 |  | -		}  | 
|---|
| 1310 |  | -		per_cpu(posix_timer_tasklist, cpu) = tsk;  | 
|---|
| 1311 |  | -  | 
|---|
| 1312 |  | -		wake_up_process(per_cpu(posix_timer_task, cpu));  | 
|---|
| 1313 |  | -	}  | 
|---|
| 1314 |  | -}  | 
|---|
| 1315 |  | -  | 
|---|
| 1316 |  | -static int posix_cpu_kthread_should_run(unsigned int cpu)  | 
|---|
| 1317 |  | -{  | 
|---|
| 1318 |  | -	return __this_cpu_read(posix_timer_tasklist) != NULL;  | 
|---|
| 1319 |  | -}  | 
|---|
| 1320 |  | -  | 
|---|
| 1321 |  | -static void posix_cpu_kthread_park(unsigned int cpu)  | 
|---|
| 1322 |  | -{  | 
|---|
| 1323 |  | -	this_cpu_write(posix_timer_th_active, false);  | 
|---|
| 1324 |  | -}  | 
|---|
| 1325 |  | -  | 
|---|
| 1326 |  | -static void posix_cpu_kthread_unpark(unsigned int cpu)  | 
|---|
| 1327 |  | -{  | 
|---|
| 1328 |  | -	this_cpu_write(posix_timer_th_active, true);  | 
|---|
| 1329 |  | -}  | 
|---|
| 1330 |  | -  | 
|---|
| 1331 |  | -static void posix_cpu_kthread_setup(unsigned int cpu)  | 
|---|
| 1332 |  | -{  | 
|---|
| 1333 |  | -	struct sched_param sp;  | 
|---|
| 1334 |  | -  | 
|---|
| 1335 |  | -	sp.sched_priority = MAX_RT_PRIO - 1;  | 
|---|
| 1336 |  | -	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);  | 
|---|
| 1337 |  | -	posix_cpu_kthread_unpark(cpu);  | 
|---|
| 1338 |  | -}  | 
|---|
| 1339 |  | -  | 
|---|
| 1340 |  | -static struct smp_hotplug_thread posix_cpu_thread = {  | 
|---|
| 1341 |  | -	.store			= &posix_timer_task,  | 
|---|
| 1342 |  | -	.thread_should_run	= posix_cpu_kthread_should_run,  | 
|---|
| 1343 |  | -	.thread_fn		= posix_cpu_kthread_fn,  | 
|---|
| 1344 |  | -	.thread_comm		= "posixcputmr/%u",  | 
|---|
| 1345 |  | -	.setup			= posix_cpu_kthread_setup,  | 
|---|
| 1346 |  | -	.park			= posix_cpu_kthread_park,  | 
|---|
| 1347 |  | -	.unpark			= posix_cpu_kthread_unpark,  | 
|---|
| 1348 |  | -};  | 
|---|
| 1349 |  | -  | 
|---|
| 1350 |  | -static int __init posix_cpu_thread_init(void)  | 
|---|
| 1351 |  | -{  | 
|---|
| 1352 |  | -	/* Start one for boot CPU. */  | 
|---|
| 1353 |  | -	unsigned long cpu;  | 
|---|
| 1354 |  | -	int ret;  | 
|---|
| 1355 |  | -  | 
|---|
| 1356 |  | -	/* init the per-cpu posix_timer_tasklets */  | 
|---|
| 1357 |  | -	for_each_possible_cpu(cpu)  | 
|---|
| 1358 |  | -		per_cpu(posix_timer_tasklist, cpu) = NULL;  | 
|---|
| 1359 |  | -  | 
|---|
| 1360 |  | -	ret = smpboot_register_percpu_thread(&posix_cpu_thread);  | 
|---|
| 1361 |  | -	WARN_ON(ret);  | 
|---|
| 1362 |  | -  | 
|---|
| 1363 |  | -	return 0;  | 
|---|
| 1364 |  | -}  | 
|---|
| 1365 |  | -early_initcall(posix_cpu_thread_init);  | 
|---|
| 1366 |  | -#else /* CONFIG_PREEMPT_RT_BASE */  | 
|---|
| 1367 |  | -void run_posix_cpu_timers(struct task_struct *tsk)  | 
|---|
| 1368 |  | -{  | 
|---|
| 1369 | 1315 |  	lockdep_assert_irqs_disabled(); | 
|---|
 | 1316 | +  | 
|---|
 | 1317 | +	/*  | 
|---|
 | 1318 | +	 * If the actual expiry is deferred to task work context and the  | 
|---|
 | 1319 | +	 * work is already scheduled there is no point to do anything here.  | 
|---|
 | 1320 | +	 */  | 
|---|
 | 1321 | +	if (posix_cpu_timers_work_scheduled(tsk))  | 
|---|
 | 1322 | +		return;  | 
|---|
 | 1323 | +  | 
|---|
 | 1324 | +	/*  | 
|---|
 | 1325 | +	 * The fast path checks that there are no expired thread or thread  | 
|---|
 | 1326 | +	 * group timers.  If that's so, just return.  | 
|---|
 | 1327 | +	 */  | 
|---|
 | 1328 | +	if (!fastpath_timer_check(tsk))  | 
|---|
 | 1329 | +		return;  | 
|---|
 | 1330 | +  | 
|---|
| 1370 | 1331 |  	__run_posix_cpu_timers(tsk); | 
|---|
| 1371 | 1332 |  } | 
|---|
| 1372 |  | -#endif /* CONFIG_PREEMPT_RT_BASE */  | 
|---|
| 1373 | 1333 |   | 
|---|
| 1374 | 1334 |  /* | 
|---|
| 1375 | 1335 |   * Set one of the process-wide special case CPU timers or RLIMIT_CPU. | 
|---|
| 1376 | 1336 |   * The tsk->sighand->siglock must be held by the caller. | 
|---|
| 1377 | 1337 |   */ | 
|---|
| 1378 |  | -void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,  | 
|---|
 | 1338 | +void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,  | 
|---|
| 1379 | 1339 |  			   u64 *newval, u64 *oldval) | 
|---|
| 1380 | 1340 |  { | 
|---|
| 1381 |  | -	u64 now;  | 
|---|
| 1382 |  | -	int ret;  | 
|---|
 | 1341 | +	u64 now, *nextevt;  | 
|---|
| 1383 | 1342 |   | 
|---|
| 1384 |  | -	if (WARN_ON_ONCE(clock_idx >= CPUCLOCK_SCHED))  | 
|---|
 | 1343 | +	if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED))  | 
|---|
| 1385 | 1344 |  		return; | 
|---|
| 1386 | 1345 |   | 
|---|
| 1387 |  | -	ret = cpu_timer_sample_group(clock_idx, tsk, &now);  | 
|---|
 | 1346 | +	nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt;  | 
|---|
 | 1347 | +	now = cpu_clock_sample_group(clkid, tsk, true);  | 
|---|
| 1388 | 1348 |   | 
|---|
| 1389 |  | -	if (oldval && ret != -EINVAL) {  | 
|---|
 | 1349 | +	if (oldval) {  | 
|---|
| 1390 | 1350 |  		/* | 
|---|
| 1391 | 1351 |  		 * We are setting itimer. The *oldval is absolute and we update | 
|---|
| 1392 | 1352 |  		 * it to be relative, *newval argument is relative and we update | 
|---|
| .. | .. | 
|---|
| 1407 | 1367 |  	} | 
|---|
| 1408 | 1368 |   | 
|---|
| 1409 | 1369 |  	/* | 
|---|
| 1410 |  | -	 * Update expiration cache if we are the earliest timer, or eventually  | 
|---|
| 1411 |  | -	 * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.  | 
|---|
 | 1370 | +	 * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF  | 
|---|
 | 1371 | +	 * expiry cache is also used by RLIMIT_CPU!.  | 
|---|
| 1412 | 1372 |  	 */ | 
|---|
| 1413 |  | -	switch (clock_idx) {  | 
|---|
| 1414 |  | -	case CPUCLOCK_PROF:  | 
|---|
| 1415 |  | -		if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))  | 
|---|
| 1416 |  | -			tsk->signal->cputime_expires.prof_exp = *newval;  | 
|---|
| 1417 |  | -		break;  | 
|---|
| 1418 |  | -	case CPUCLOCK_VIRT:  | 
|---|
| 1419 |  | -		if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))  | 
|---|
| 1420 |  | -			tsk->signal->cputime_expires.virt_exp = *newval;  | 
|---|
| 1421 |  | -		break;  | 
|---|
| 1422 |  | -	}  | 
|---|
 | 1373 | +	if (*newval < *nextevt)  | 
|---|
 | 1374 | +		*nextevt = *newval;  | 
|---|
| 1423 | 1375 |   | 
|---|
| 1424 | 1376 |  	tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER); | 
|---|
| 1425 | 1377 |  } | 
|---|
| .. | .. | 
|---|
| 1441 | 1393 |  	timer.it_overrun = -1; | 
|---|
| 1442 | 1394 |  	error = posix_cpu_timer_create(&timer); | 
|---|
| 1443 | 1395 |  	timer.it_process = current; | 
|---|
 | 1396 | +  | 
|---|
| 1444 | 1397 |  	if (!error) { | 
|---|
| 1445 | 1398 |  		static struct itimerspec64 zero_it; | 
|---|
| 1446 | 1399 |  		struct restart_block *restart; | 
|---|
| .. | .. | 
|---|
| 1456 | 1409 |  		} | 
|---|
| 1457 | 1410 |   | 
|---|
| 1458 | 1411 |  		while (!signal_pending(current)) { | 
|---|
| 1459 |  | -			if (timer.it.cpu.expires == 0) {  | 
|---|
 | 1412 | +			if (!cpu_timer_getexpires(&timer.it.cpu)) {  | 
|---|
| 1460 | 1413 |  				/* | 
|---|
| 1461 | 1414 |  				 * Our timer fired and was reset, below | 
|---|
| 1462 | 1415 |  				 * deletion can not fail. | 
|---|
| .. | .. | 
|---|
| 1478 | 1431 |  		/* | 
|---|
| 1479 | 1432 |  		 * We were interrupted by a signal. | 
|---|
| 1480 | 1433 |  		 */ | 
|---|
| 1481 |  | -		expires = timer.it.cpu.expires;  | 
|---|
 | 1434 | +		expires = cpu_timer_getexpires(&timer.it.cpu);  | 
|---|
| 1482 | 1435 |  		error = posix_cpu_timer_set(&timer, 0, &zero_it, &it); | 
|---|
| 1483 | 1436 |  		if (!error) { | 
|---|
| 1484 | 1437 |  			/* | 
|---|
| .. | .. | 
|---|
| 1489 | 1442 |  		spin_unlock_irq(&timer.it_lock); | 
|---|
| 1490 | 1443 |   | 
|---|
| 1491 | 1444 |  		while (error == TIMER_RETRY) { | 
|---|
| 1492 |  | -  | 
|---|
| 1493 |  | -			cpu_timers_grab_expiry_lock(&timer);  | 
|---|
| 1494 | 1445 |  			/* | 
|---|
| 1495 | 1446 |  			 * We need to handle case when timer was or is in the | 
|---|
| 1496 | 1447 |  			 * middle of firing. In other cases we already freed | 
|---|
| .. | .. | 
|---|
| 1600 | 1551 |  } | 
|---|
| 1601 | 1552 |   | 
|---|
| 1602 | 1553 |  const struct k_clock clock_posix_cpu = { | 
|---|
| 1603 |  | -	.clock_getres	= posix_cpu_clock_getres,  | 
|---|
| 1604 |  | -	.clock_set	= posix_cpu_clock_set,  | 
|---|
| 1605 |  | -	.clock_get	= posix_cpu_clock_get,  | 
|---|
| 1606 |  | -	.timer_create	= posix_cpu_timer_create,  | 
|---|
| 1607 |  | -	.nsleep		= posix_cpu_nsleep,  | 
|---|
| 1608 |  | -	.timer_set	= posix_cpu_timer_set,  | 
|---|
| 1609 |  | -	.timer_del	= posix_cpu_timer_del,  | 
|---|
| 1610 |  | -	.timer_get	= posix_cpu_timer_get,  | 
|---|
| 1611 |  | -	.timer_rearm	= posix_cpu_timer_rearm,  | 
|---|
 | 1554 | +	.clock_getres		= posix_cpu_clock_getres,  | 
|---|
 | 1555 | +	.clock_set		= posix_cpu_clock_set,  | 
|---|
 | 1556 | +	.clock_get_timespec	= posix_cpu_clock_get,  | 
|---|
 | 1557 | +	.timer_create		= posix_cpu_timer_create,  | 
|---|
 | 1558 | +	.nsleep			= posix_cpu_nsleep,  | 
|---|
 | 1559 | +	.timer_set		= posix_cpu_timer_set,  | 
|---|
 | 1560 | +	.timer_del		= posix_cpu_timer_del,  | 
|---|
 | 1561 | +	.timer_get		= posix_cpu_timer_get,  | 
|---|
 | 1562 | +	.timer_rearm		= posix_cpu_timer_rearm,  | 
|---|
| 1612 | 1563 |  }; | 
|---|
| 1613 | 1564 |   | 
|---|
| 1614 | 1565 |  const struct k_clock clock_process = { | 
|---|
| 1615 |  | -	.clock_getres	= process_cpu_clock_getres,  | 
|---|
| 1616 |  | -	.clock_get	= process_cpu_clock_get,  | 
|---|
| 1617 |  | -	.timer_create	= process_cpu_timer_create,  | 
|---|
| 1618 |  | -	.nsleep		= process_cpu_nsleep,  | 
|---|
 | 1566 | +	.clock_getres		= process_cpu_clock_getres,  | 
|---|
 | 1567 | +	.clock_get_timespec	= process_cpu_clock_get,  | 
|---|
 | 1568 | +	.timer_create		= process_cpu_timer_create,  | 
|---|
 | 1569 | +	.nsleep			= process_cpu_nsleep,  | 
|---|
| 1619 | 1570 |  }; | 
|---|
| 1620 | 1571 |   | 
|---|
| 1621 | 1572 |  const struct k_clock clock_thread = { | 
|---|
| 1622 |  | -	.clock_getres	= thread_cpu_clock_getres,  | 
|---|
| 1623 |  | -	.clock_get	= thread_cpu_clock_get,  | 
|---|
| 1624 |  | -	.timer_create	= thread_cpu_timer_create,  | 
|---|
 | 1573 | +	.clock_getres		= thread_cpu_clock_getres,  | 
|---|
 | 1574 | +	.clock_get_timespec	= thread_cpu_clock_get,  | 
|---|
 | 1575 | +	.timer_create		= thread_cpu_timer_create,  | 
|---|
| 1625 | 1576 |  }; | 
|---|