.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> |
---|
3 | 4 | * |
---|
4 | 5 | * membarrier system call |
---|
5 | | - * |
---|
6 | | - * This program is free software; you can redistribute it and/or modify |
---|
7 | | - * it under the terms of the GNU General Public License as published by |
---|
8 | | - * the Free Software Foundation; either version 2 of the License, or |
---|
9 | | - * (at your option) any later version. |
---|
10 | | - * |
---|
11 | | - * This program is distributed in the hope that it will be useful, |
---|
12 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
13 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
14 | | - * GNU General Public License for more details. |
---|
15 | 6 | */ |
---|
16 | 7 | #include "sched.h" |
---|
17 | 8 | |
---|
.. | .. |
---|
27 | 18 | #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0 |
---|
28 | 19 | #endif |
---|
29 | 20 | |
---|
| 21 | +#ifdef CONFIG_RSEQ |
---|
| 22 | +#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \ |
---|
| 23 | + (MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \ |
---|
| 24 | + | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ) |
---|
| 25 | +#else |
---|
| 26 | +#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0 |
---|
| 27 | +#endif |
---|
| 28 | + |
---|
30 | 29 | #define MEMBARRIER_CMD_BITMASK \ |
---|
31 | 30 | (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \ |
---|
32 | 31 | | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \ |
---|
33 | 32 | | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ |
---|
34 | 33 | | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \ |
---|
35 | | - | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK) |
---|
| 34 | + | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ |
---|
| 35 | + | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK) |
---|
36 | 36 | |
---|
37 | 37 | static void ipi_mb(void *info) |
---|
38 | 38 | { |
---|
39 | 39 | smp_mb(); /* IPIs should be serializing but paranoid. */ |
---|
40 | 40 | } |
---|
41 | 41 | |
---|
| 42 | +static void ipi_sync_core(void *info) |
---|
| 43 | +{ |
---|
| 44 | + /* |
---|
| 45 | + * The smp_mb() in membarrier after all the IPIs is supposed to |
---|
| 46 | + * ensure that memory on remote CPUs that occur before the IPI |
---|
| 47 | + * become visible to membarrier()'s caller -- see scenario B in |
---|
| 48 | + * the big comment at the top of this file. |
---|
| 49 | + * |
---|
| 50 | + * A sync_core() would provide this guarantee, but |
---|
| 51 | + * sync_core_before_usermode() might end up being deferred until |
---|
| 52 | + * after membarrier()'s smp_mb(). |
---|
| 53 | + */ |
---|
| 54 | + smp_mb(); /* IPIs should be serializing but paranoid. */ |
---|
| 55 | + |
---|
| 56 | + sync_core_before_usermode(); |
---|
| 57 | +} |
---|
| 58 | + |
---|
| 59 | +static void ipi_rseq(void *info) |
---|
| 60 | +{ |
---|
| 61 | + /* |
---|
| 62 | + * Ensure that all stores done by the calling thread are visible |
---|
| 63 | + * to the current task before the current task resumes. We could |
---|
| 64 | + * probably optimize this away on most architectures, but by the |
---|
| 65 | + * time we've already sent an IPI, the cost of the extra smp_mb() |
---|
| 66 | + * is negligible. |
---|
| 67 | + */ |
---|
| 68 | + smp_mb(); |
---|
| 69 | + rseq_preempt(current); |
---|
| 70 | +} |
---|
| 71 | + |
---|
| 72 | +static void ipi_sync_rq_state(void *info) |
---|
| 73 | +{ |
---|
| 74 | + struct mm_struct *mm = (struct mm_struct *) info; |
---|
| 75 | + |
---|
| 76 | + if (current->mm != mm) |
---|
| 77 | + return; |
---|
| 78 | + this_cpu_write(runqueues.membarrier_state, |
---|
| 79 | + atomic_read(&mm->membarrier_state)); |
---|
| 80 | + /* |
---|
| 81 | + * Issue a memory barrier after setting |
---|
| 82 | + * MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to |
---|
| 83 | + * guarantee that no memory access following registration is reordered |
---|
| 84 | + * before registration. |
---|
| 85 | + */ |
---|
| 86 | + smp_mb(); |
---|
| 87 | +} |
---|
| 88 | + |
---|
| 89 | +void membarrier_exec_mmap(struct mm_struct *mm) |
---|
| 90 | +{ |
---|
| 91 | + /* |
---|
| 92 | + * Issue a memory barrier before clearing membarrier_state to |
---|
| 93 | + * guarantee that no memory access prior to exec is reordered after |
---|
| 94 | + * clearing this state. |
---|
| 95 | + */ |
---|
| 96 | + smp_mb(); |
---|
| 97 | + atomic_set(&mm->membarrier_state, 0); |
---|
| 98 | + /* |
---|
| 99 | + * Keep the runqueue membarrier_state in sync with this mm |
---|
| 100 | + * membarrier_state. |
---|
| 101 | + */ |
---|
| 102 | + this_cpu_write(runqueues.membarrier_state, 0); |
---|
| 103 | +} |
---|
| 104 | + |
---|
42 | 105 | static int membarrier_global_expedited(void) |
---|
43 | 106 | { |
---|
44 | 107 | int cpu; |
---|
45 | | - bool fallback = false; |
---|
46 | 108 | cpumask_var_t tmpmask; |
---|
47 | 109 | |
---|
48 | 110 | if (num_online_cpus() == 1) |
---|
.. | .. |
---|
54 | 116 | */ |
---|
55 | 117 | smp_mb(); /* system call entry is not a mb. */ |
---|
56 | 118 | |
---|
57 | | - /* |
---|
58 | | - * Expedited membarrier commands guarantee that they won't |
---|
59 | | - * block, hence the GFP_NOWAIT allocation flag and fallback |
---|
60 | | - * implementation. |
---|
61 | | - */ |
---|
62 | | - if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) { |
---|
63 | | - /* Fallback for OOM. */ |
---|
64 | | - fallback = true; |
---|
65 | | - } |
---|
| 119 | + if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) |
---|
| 120 | + return -ENOMEM; |
---|
66 | 121 | |
---|
67 | 122 | cpus_read_lock(); |
---|
| 123 | + rcu_read_lock(); |
---|
68 | 124 | for_each_online_cpu(cpu) { |
---|
69 | 125 | struct task_struct *p; |
---|
70 | 126 | |
---|
.. | .. |
---|
79 | 135 | if (cpu == raw_smp_processor_id()) |
---|
80 | 136 | continue; |
---|
81 | 137 | |
---|
82 | | - rcu_read_lock(); |
---|
83 | | - p = task_rcu_dereference(&cpu_rq(cpu)->curr); |
---|
84 | | - if (p && p->mm && (atomic_read(&p->mm->membarrier_state) & |
---|
85 | | - MEMBARRIER_STATE_GLOBAL_EXPEDITED)) { |
---|
86 | | - if (!fallback) |
---|
87 | | - __cpumask_set_cpu(cpu, tmpmask); |
---|
88 | | - else |
---|
89 | | - smp_call_function_single(cpu, ipi_mb, NULL, 1); |
---|
90 | | - } |
---|
91 | | - rcu_read_unlock(); |
---|
| 138 | + if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) & |
---|
| 139 | + MEMBARRIER_STATE_GLOBAL_EXPEDITED)) |
---|
| 140 | + continue; |
---|
| 141 | + |
---|
| 142 | + /* |
---|
| 143 | + * Skip the CPU if it runs a kernel thread. The scheduler |
---|
| 144 | + * leaves the prior task mm in place as an optimization when |
---|
| 145 | + * scheduling a kthread. |
---|
| 146 | + */ |
---|
| 147 | + p = rcu_dereference(cpu_rq(cpu)->curr); |
---|
| 148 | + if (p->flags & PF_KTHREAD) |
---|
| 149 | + continue; |
---|
| 150 | + |
---|
| 151 | + __cpumask_set_cpu(cpu, tmpmask); |
---|
92 | 152 | } |
---|
93 | | - if (!fallback) { |
---|
94 | | - preempt_disable(); |
---|
95 | | - smp_call_function_many(tmpmask, ipi_mb, NULL, 1); |
---|
96 | | - preempt_enable(); |
---|
97 | | - free_cpumask_var(tmpmask); |
---|
98 | | - } |
---|
| 153 | + rcu_read_unlock(); |
---|
| 154 | + |
---|
| 155 | + preempt_disable(); |
---|
| 156 | + smp_call_function_many(tmpmask, ipi_mb, NULL, 1); |
---|
| 157 | + preempt_enable(); |
---|
| 158 | + |
---|
| 159 | + free_cpumask_var(tmpmask); |
---|
99 | 160 | cpus_read_unlock(); |
---|
100 | 161 | |
---|
101 | 162 | /* |
---|
.. | .. |
---|
107 | 168 | return 0; |
---|
108 | 169 | } |
---|
109 | 170 | |
---|
110 | | -static int membarrier_private_expedited(int flags) |
---|
| 171 | +static int membarrier_private_expedited(int flags, int cpu_id) |
---|
111 | 172 | { |
---|
112 | | - int cpu; |
---|
113 | | - bool fallback = false; |
---|
114 | 173 | cpumask_var_t tmpmask; |
---|
| 174 | + struct mm_struct *mm = current->mm; |
---|
| 175 | + smp_call_func_t ipi_func = ipi_mb; |
---|
115 | 176 | |
---|
116 | | - if (flags & MEMBARRIER_FLAG_SYNC_CORE) { |
---|
| 177 | + if (flags == MEMBARRIER_FLAG_SYNC_CORE) { |
---|
117 | 178 | if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE)) |
---|
118 | 179 | return -EINVAL; |
---|
119 | | - if (!(atomic_read(¤t->mm->membarrier_state) & |
---|
| 180 | + if (!(atomic_read(&mm->membarrier_state) & |
---|
120 | 181 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY)) |
---|
121 | 182 | return -EPERM; |
---|
| 183 | + ipi_func = ipi_sync_core; |
---|
| 184 | + } else if (flags == MEMBARRIER_FLAG_RSEQ) { |
---|
| 185 | + if (!IS_ENABLED(CONFIG_RSEQ)) |
---|
| 186 | + return -EINVAL; |
---|
| 187 | + if (!(atomic_read(&mm->membarrier_state) & |
---|
| 188 | + MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY)) |
---|
| 189 | + return -EPERM; |
---|
| 190 | + ipi_func = ipi_rseq; |
---|
122 | 191 | } else { |
---|
123 | | - if (!(atomic_read(¤t->mm->membarrier_state) & |
---|
| 192 | + WARN_ON_ONCE(flags); |
---|
| 193 | + if (!(atomic_read(&mm->membarrier_state) & |
---|
124 | 194 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)) |
---|
125 | 195 | return -EPERM; |
---|
126 | 196 | } |
---|
127 | 197 | |
---|
128 | | - if (num_online_cpus() == 1) |
---|
| 198 | + if (flags != MEMBARRIER_FLAG_SYNC_CORE && |
---|
| 199 | + (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1)) |
---|
129 | 200 | return 0; |
---|
130 | 201 | |
---|
131 | 202 | /* |
---|
.. | .. |
---|
134 | 205 | */ |
---|
135 | 206 | smp_mb(); /* system call entry is not a mb. */ |
---|
136 | 207 | |
---|
137 | | - /* |
---|
138 | | - * Expedited membarrier commands guarantee that they won't |
---|
139 | | - * block, hence the GFP_NOWAIT allocation flag and fallback |
---|
140 | | - * implementation. |
---|
141 | | - */ |
---|
142 | | - if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) { |
---|
143 | | - /* Fallback for OOM. */ |
---|
144 | | - fallback = true; |
---|
145 | | - } |
---|
| 208 | + if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) |
---|
| 209 | + return -ENOMEM; |
---|
146 | 210 | |
---|
147 | 211 | cpus_read_lock(); |
---|
148 | | - for_each_online_cpu(cpu) { |
---|
| 212 | + |
---|
| 213 | + if (cpu_id >= 0) { |
---|
149 | 214 | struct task_struct *p; |
---|
150 | 215 | |
---|
151 | | - /* |
---|
152 | | - * Skipping the current CPU is OK even through we can be |
---|
153 | | - * migrated at any point. The current CPU, at the point |
---|
154 | | - * where we read raw_smp_processor_id(), is ensured to |
---|
155 | | - * be in program order with respect to the caller |
---|
156 | | - * thread. Therefore, we can skip this CPU from the |
---|
157 | | - * iteration. |
---|
158 | | - */ |
---|
159 | | - if (cpu == raw_smp_processor_id()) |
---|
160 | | - continue; |
---|
| 216 | + if (cpu_id >= nr_cpu_ids || !cpu_online(cpu_id)) |
---|
| 217 | + goto out; |
---|
161 | 218 | rcu_read_lock(); |
---|
162 | | - p = task_rcu_dereference(&cpu_rq(cpu)->curr); |
---|
163 | | - if (p && p->mm == current->mm) { |
---|
164 | | - if (!fallback) |
---|
| 219 | + p = rcu_dereference(cpu_rq(cpu_id)->curr); |
---|
| 220 | + if (!p || p->mm != mm) { |
---|
| 221 | + rcu_read_unlock(); |
---|
| 222 | + goto out; |
---|
| 223 | + } |
---|
| 224 | + rcu_read_unlock(); |
---|
| 225 | + } else { |
---|
| 226 | + int cpu; |
---|
| 227 | + |
---|
| 228 | + rcu_read_lock(); |
---|
| 229 | + for_each_online_cpu(cpu) { |
---|
| 230 | + struct task_struct *p; |
---|
| 231 | + |
---|
| 232 | + p = rcu_dereference(cpu_rq(cpu)->curr); |
---|
| 233 | + if (p && p->mm == mm) |
---|
165 | 234 | __cpumask_set_cpu(cpu, tmpmask); |
---|
166 | | - else |
---|
167 | | - smp_call_function_single(cpu, ipi_mb, NULL, 1); |
---|
168 | 235 | } |
---|
169 | 236 | rcu_read_unlock(); |
---|
170 | 237 | } |
---|
171 | | - if (!fallback) { |
---|
172 | | - preempt_disable(); |
---|
173 | | - smp_call_function_many(tmpmask, ipi_mb, NULL, 1); |
---|
174 | | - preempt_enable(); |
---|
175 | | - free_cpumask_var(tmpmask); |
---|
| 238 | + |
---|
| 239 | + if (cpu_id >= 0) { |
---|
| 240 | + /* |
---|
| 241 | + * smp_call_function_single() will call ipi_func() if cpu_id |
---|
| 242 | + * is the calling CPU. |
---|
| 243 | + */ |
---|
| 244 | + smp_call_function_single(cpu_id, ipi_func, NULL, 1); |
---|
| 245 | + } else { |
---|
| 246 | + /* |
---|
| 247 | + * For regular membarrier, we can save a few cycles by |
---|
| 248 | + * skipping the current cpu -- we're about to do smp_mb() |
---|
| 249 | + * below, and if we migrate to a different cpu, this cpu |
---|
| 250 | + * and the new cpu will execute a full barrier in the |
---|
| 251 | + * scheduler. |
---|
| 252 | + * |
---|
| 253 | + * For SYNC_CORE, we do need a barrier on the current cpu -- |
---|
| 254 | + * otherwise, if we are migrated and replaced by a different |
---|
| 255 | + * task in the same mm just before, during, or after |
---|
| 256 | + * membarrier, we will end up with some thread in the mm |
---|
| 257 | + * running without a core sync. |
---|
| 258 | + * |
---|
| 259 | + * For RSEQ, don't rseq_preempt() the caller. User code |
---|
| 260 | + * is not supposed to issue syscalls at all from inside an |
---|
| 261 | + * rseq critical section. |
---|
| 262 | + */ |
---|
| 263 | + if (flags != MEMBARRIER_FLAG_SYNC_CORE) { |
---|
| 264 | + preempt_disable(); |
---|
| 265 | + smp_call_function_many(tmpmask, ipi_func, NULL, true); |
---|
| 266 | + preempt_enable(); |
---|
| 267 | + } else { |
---|
| 268 | + on_each_cpu_mask(tmpmask, ipi_func, NULL, true); |
---|
| 269 | + } |
---|
176 | 270 | } |
---|
| 271 | + |
---|
| 272 | +out: |
---|
| 273 | + if (cpu_id < 0) |
---|
| 274 | + free_cpumask_var(tmpmask); |
---|
177 | 275 | cpus_read_unlock(); |
---|
178 | 276 | |
---|
179 | 277 | /* |
---|
.. | .. |
---|
182 | 280 | * rq->curr modification in scheduler. |
---|
183 | 281 | */ |
---|
184 | 282 | smp_mb(); /* exit from system call is not a mb */ |
---|
| 283 | + |
---|
| 284 | + return 0; |
---|
| 285 | +} |
---|
| 286 | + |
---|
| 287 | +static int sync_runqueues_membarrier_state(struct mm_struct *mm) |
---|
| 288 | +{ |
---|
| 289 | + int membarrier_state = atomic_read(&mm->membarrier_state); |
---|
| 290 | + cpumask_var_t tmpmask; |
---|
| 291 | + int cpu; |
---|
| 292 | + |
---|
| 293 | + if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) { |
---|
| 294 | + this_cpu_write(runqueues.membarrier_state, membarrier_state); |
---|
| 295 | + |
---|
| 296 | + /* |
---|
| 297 | + * For single mm user, we can simply issue a memory barrier |
---|
| 298 | + * after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the |
---|
| 299 | + * mm and in the current runqueue to guarantee that no memory |
---|
| 300 | + * access following registration is reordered before |
---|
| 301 | + * registration. |
---|
| 302 | + */ |
---|
| 303 | + smp_mb(); |
---|
| 304 | + return 0; |
---|
| 305 | + } |
---|
| 306 | + |
---|
| 307 | + if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) |
---|
| 308 | + return -ENOMEM; |
---|
| 309 | + |
---|
| 310 | + /* |
---|
| 311 | + * For mm with multiple users, we need to ensure all future |
---|
| 312 | + * scheduler executions will observe @mm's new membarrier |
---|
| 313 | + * state. |
---|
| 314 | + */ |
---|
| 315 | + synchronize_rcu(); |
---|
| 316 | + |
---|
| 317 | + /* |
---|
| 318 | + * For each cpu runqueue, if the task's mm match @mm, ensure that all |
---|
| 319 | + * @mm's membarrier state set bits are also set in the runqueue's |
---|
| 320 | + * membarrier state. This ensures that a runqueue scheduling |
---|
| 321 | + * between threads which are users of @mm has its membarrier state |
---|
| 322 | + * updated. |
---|
| 323 | + */ |
---|
| 324 | + cpus_read_lock(); |
---|
| 325 | + rcu_read_lock(); |
---|
| 326 | + for_each_online_cpu(cpu) { |
---|
| 327 | + struct rq *rq = cpu_rq(cpu); |
---|
| 328 | + struct task_struct *p; |
---|
| 329 | + |
---|
| 330 | + p = rcu_dereference(rq->curr); |
---|
| 331 | + if (p && p->mm == mm) |
---|
| 332 | + __cpumask_set_cpu(cpu, tmpmask); |
---|
| 333 | + } |
---|
| 334 | + rcu_read_unlock(); |
---|
| 335 | + |
---|
| 336 | + on_each_cpu_mask(tmpmask, ipi_sync_rq_state, mm, true); |
---|
| 337 | + |
---|
| 338 | + free_cpumask_var(tmpmask); |
---|
| 339 | + cpus_read_unlock(); |
---|
185 | 340 | |
---|
186 | 341 | return 0; |
---|
187 | 342 | } |
---|
.. | .. |
---|
190 | 345 | { |
---|
191 | 346 | struct task_struct *p = current; |
---|
192 | 347 | struct mm_struct *mm = p->mm; |
---|
| 348 | + int ret; |
---|
193 | 349 | |
---|
194 | 350 | if (atomic_read(&mm->membarrier_state) & |
---|
195 | 351 | MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY) |
---|
196 | 352 | return 0; |
---|
197 | 353 | atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state); |
---|
198 | | - if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) { |
---|
199 | | - /* |
---|
200 | | - * For single mm user, single threaded process, we can |
---|
201 | | - * simply issue a memory barrier after setting |
---|
202 | | - * MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that |
---|
203 | | - * no memory access following registration is reordered |
---|
204 | | - * before registration. |
---|
205 | | - */ |
---|
206 | | - smp_mb(); |
---|
207 | | - } else { |
---|
208 | | - /* |
---|
209 | | - * For multi-mm user threads, we need to ensure all |
---|
210 | | - * future scheduler executions will observe the new |
---|
211 | | - * thread flag state for this mm. |
---|
212 | | - */ |
---|
213 | | - synchronize_sched(); |
---|
214 | | - } |
---|
| 354 | + ret = sync_runqueues_membarrier_state(mm); |
---|
| 355 | + if (ret) |
---|
| 356 | + return ret; |
---|
215 | 357 | atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY, |
---|
216 | 358 | &mm->membarrier_state); |
---|
217 | 359 | |
---|
.. | .. |
---|
222 | 364 | { |
---|
223 | 365 | struct task_struct *p = current; |
---|
224 | 366 | struct mm_struct *mm = p->mm; |
---|
225 | | - int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY; |
---|
| 367 | + int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, |
---|
| 368 | + set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED, |
---|
| 369 | + ret; |
---|
226 | 370 | |
---|
227 | | - if (flags & MEMBARRIER_FLAG_SYNC_CORE) { |
---|
| 371 | + if (flags == MEMBARRIER_FLAG_SYNC_CORE) { |
---|
228 | 372 | if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE)) |
---|
229 | 373 | return -EINVAL; |
---|
230 | | - state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY; |
---|
| 374 | + ready_state = |
---|
| 375 | + MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY; |
---|
| 376 | + } else if (flags == MEMBARRIER_FLAG_RSEQ) { |
---|
| 377 | + if (!IS_ENABLED(CONFIG_RSEQ)) |
---|
| 378 | + return -EINVAL; |
---|
| 379 | + ready_state = |
---|
| 380 | + MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY; |
---|
| 381 | + } else { |
---|
| 382 | + WARN_ON_ONCE(flags); |
---|
231 | 383 | } |
---|
232 | 384 | |
---|
233 | 385 | /* |
---|
.. | .. |
---|
235 | 387 | * groups, which use the same mm. (CLONE_VM but not |
---|
236 | 388 | * CLONE_THREAD). |
---|
237 | 389 | */ |
---|
238 | | - if ((atomic_read(&mm->membarrier_state) & state) == state) |
---|
| 390 | + if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state) |
---|
239 | 391 | return 0; |
---|
240 | | - atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state); |
---|
241 | 392 | if (flags & MEMBARRIER_FLAG_SYNC_CORE) |
---|
242 | | - atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE, |
---|
243 | | - &mm->membarrier_state); |
---|
244 | | - if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) { |
---|
245 | | - /* |
---|
246 | | - * Ensure all future scheduler executions will observe the |
---|
247 | | - * new thread flag state for this process. |
---|
248 | | - */ |
---|
249 | | - synchronize_sched(); |
---|
250 | | - } |
---|
251 | | - atomic_or(state, &mm->membarrier_state); |
---|
| 393 | + set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE; |
---|
| 394 | + if (flags & MEMBARRIER_FLAG_RSEQ) |
---|
| 395 | + set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ; |
---|
| 396 | + atomic_or(set_state, &mm->membarrier_state); |
---|
| 397 | + ret = sync_runqueues_membarrier_state(mm); |
---|
| 398 | + if (ret) |
---|
| 399 | + return ret; |
---|
| 400 | + atomic_or(ready_state, &mm->membarrier_state); |
---|
252 | 401 | |
---|
253 | 402 | return 0; |
---|
254 | 403 | } |
---|
255 | 404 | |
---|
256 | 405 | /** |
---|
257 | 406 | * sys_membarrier - issue memory barriers on a set of threads |
---|
258 | | - * @cmd: Takes command values defined in enum membarrier_cmd. |
---|
259 | | - * @flags: Currently needs to be 0. For future extensions. |
---|
| 407 | + * @cmd: Takes command values defined in enum membarrier_cmd. |
---|
| 408 | + * @flags: Currently needs to be 0 for all commands other than |
---|
| 409 | + * MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ: in the latter |
---|
| 410 | + * case it can be MEMBARRIER_CMD_FLAG_CPU, indicating that @cpu_id |
---|
| 411 | + * contains the CPU on which to interrupt (= restart) |
---|
| 412 | + * the RSEQ critical section. |
---|
| 413 | + * @cpu_id: if @flags == MEMBARRIER_CMD_FLAG_CPU, indicates the cpu on which |
---|
| 414 | + * RSEQ CS should be interrupted (@cmd must be |
---|
| 415 | + * MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ). |
---|
260 | 416 | * |
---|
261 | 417 | * If this system call is not implemented, -ENOSYS is returned. If the |
---|
262 | 418 | * command specified does not exist, not available on the running |
---|
263 | 419 | * kernel, or if the command argument is invalid, this system call |
---|
264 | 420 | * returns -EINVAL. For a given command, with flags argument set to 0, |
---|
265 | | - * this system call is guaranteed to always return the same value until |
---|
266 | | - * reboot. |
---|
| 421 | + * if this system call returns -ENOSYS or -EINVAL, it is guaranteed to |
---|
| 422 | + * always return the same value until reboot. In addition, it can return |
---|
| 423 | + * -ENOMEM if there is not enough memory available to perform the system |
---|
| 424 | + * call. |
---|
267 | 425 | * |
---|
268 | 426 | * All memory accesses performed in program order from each targeted thread |
---|
269 | 427 | * is guaranteed to be ordered with respect to sys_membarrier(). If we use |
---|
.. | .. |
---|
280 | 438 | * smp_mb() X O O |
---|
281 | 439 | * sys_membarrier() O O O |
---|
282 | 440 | */ |
---|
283 | | -SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) |
---|
| 441 | +SYSCALL_DEFINE3(membarrier, int, cmd, unsigned int, flags, int, cpu_id) |
---|
284 | 442 | { |
---|
285 | | - if (unlikely(flags)) |
---|
286 | | - return -EINVAL; |
---|
| 443 | + switch (cmd) { |
---|
| 444 | + case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ: |
---|
| 445 | + if (unlikely(flags && flags != MEMBARRIER_CMD_FLAG_CPU)) |
---|
| 446 | + return -EINVAL; |
---|
| 447 | + break; |
---|
| 448 | + default: |
---|
| 449 | + if (unlikely(flags)) |
---|
| 450 | + return -EINVAL; |
---|
| 451 | + } |
---|
| 452 | + |
---|
| 453 | + if (!(flags & MEMBARRIER_CMD_FLAG_CPU)) |
---|
| 454 | + cpu_id = -1; |
---|
| 455 | + |
---|
287 | 456 | switch (cmd) { |
---|
288 | 457 | case MEMBARRIER_CMD_QUERY: |
---|
289 | 458 | { |
---|
.. | .. |
---|
298 | 467 | if (tick_nohz_full_enabled()) |
---|
299 | 468 | return -EINVAL; |
---|
300 | 469 | if (num_online_cpus() > 1) |
---|
301 | | - synchronize_sched(); |
---|
| 470 | + synchronize_rcu(); |
---|
302 | 471 | return 0; |
---|
303 | 472 | case MEMBARRIER_CMD_GLOBAL_EXPEDITED: |
---|
304 | 473 | return membarrier_global_expedited(); |
---|
305 | 474 | case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED: |
---|
306 | 475 | return membarrier_register_global_expedited(); |
---|
307 | 476 | case MEMBARRIER_CMD_PRIVATE_EXPEDITED: |
---|
308 | | - return membarrier_private_expedited(0); |
---|
| 477 | + return membarrier_private_expedited(0, cpu_id); |
---|
309 | 478 | case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: |
---|
310 | 479 | return membarrier_register_private_expedited(0); |
---|
311 | 480 | case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE: |
---|
312 | | - return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE); |
---|
| 481 | + return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE, cpu_id); |
---|
313 | 482 | case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE: |
---|
314 | 483 | return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE); |
---|
| 484 | + case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ: |
---|
| 485 | + return membarrier_private_expedited(MEMBARRIER_FLAG_RSEQ, cpu_id); |
---|
| 486 | + case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ: |
---|
| 487 | + return membarrier_register_private_expedited(MEMBARRIER_FLAG_RSEQ); |
---|
315 | 488 | default: |
---|
316 | 489 | return -EINVAL; |
---|
317 | 490 | } |
---|