hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/kernel/padata.c
....@@ -2,10 +2,13 @@
22 /*
33 * padata.c - generic interface to process data streams in parallel
44 *
5
- * See Documentation/padata.txt for an api documentation.
5
+ * See Documentation/core-api/padata.rst for more information.
66 *
77 * Copyright (C) 2008, 2009 secunet Security Networks AG
88 * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
9
+ *
10
+ * Copyright (c) 2020 Oracle and/or its affiliates.
11
+ * Author: Daniel Jordan <daniel.m.jordan@oracle.com>
912 *
1013 * This program is free software; you can redistribute it and/or modify it
1114 * under the terms and conditions of the GNU General Public License,
....@@ -21,6 +24,7 @@
2124 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
2225 */
2326
27
+#include <linux/completion.h>
2428 #include <linux/export.h>
2529 #include <linux/cpumask.h>
2630 #include <linux/err.h>
....@@ -31,11 +35,30 @@
3135 #include <linux/slab.h>
3236 #include <linux/sysfs.h>
3337 #include <linux/rcupdate.h>
34
-#include <linux/module.h>
3538
36
-#define MAX_OBJ_NUM 1000
39
+#define PADATA_WORK_ONSTACK 1 /* Work's memory is on stack */
40
+
41
+struct padata_work {
42
+ struct work_struct pw_work;
43
+ struct list_head pw_list; /* padata_free_works linkage */
44
+ void *pw_data;
45
+};
46
+
47
+static DEFINE_SPINLOCK(padata_works_lock);
48
+static struct padata_work *padata_works;
49
+static LIST_HEAD(padata_free_works);
50
+
51
+struct padata_mt_job_state {
52
+ spinlock_t lock;
53
+ struct completion completion;
54
+ struct padata_mt_job *job;
55
+ int nworks;
56
+ int nworks_fini;
57
+ unsigned long chunk_size;
58
+};
3759
3860 static void padata_free_pd(struct parallel_data *pd);
61
+static void __init padata_mt_helper(struct work_struct *work);
3962
4063 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
4164 {
....@@ -48,101 +71,168 @@
4871 return target_cpu;
4972 }
5073
51
-static int padata_cpu_hash(struct parallel_data *pd)
74
+static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr)
5275 {
53
- unsigned int seq_nr;
54
- int cpu_index;
55
-
5676 /*
5777 * Hash the sequence numbers to the cpus by taking
5878 * seq_nr mod. number of cpus in use.
5979 */
60
-
61
- seq_nr = atomic_inc_return(&pd->seq_nr);
62
- cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
80
+ int cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
6381
6482 return padata_index_to_cpu(pd, cpu_index);
6583 }
6684
85
+static struct padata_work *padata_work_alloc(void)
86
+{
87
+ struct padata_work *pw;
88
+
89
+ lockdep_assert_held(&padata_works_lock);
90
+
91
+ if (list_empty(&padata_free_works))
92
+ return NULL; /* No more work items allowed to be queued. */
93
+
94
+ pw = list_first_entry(&padata_free_works, struct padata_work, pw_list);
95
+ list_del(&pw->pw_list);
96
+ return pw;
97
+}
98
+
99
+static void padata_work_init(struct padata_work *pw, work_func_t work_fn,
100
+ void *data, int flags)
101
+{
102
+ if (flags & PADATA_WORK_ONSTACK)
103
+ INIT_WORK_ONSTACK(&pw->pw_work, work_fn);
104
+ else
105
+ INIT_WORK(&pw->pw_work, work_fn);
106
+ pw->pw_data = data;
107
+}
108
+
109
+static int __init padata_work_alloc_mt(int nworks, void *data,
110
+ struct list_head *head)
111
+{
112
+ int i;
113
+
114
+ spin_lock(&padata_works_lock);
115
+ /* Start at 1 because the current task participates in the job. */
116
+ for (i = 1; i < nworks; ++i) {
117
+ struct padata_work *pw = padata_work_alloc();
118
+
119
+ if (!pw)
120
+ break;
121
+ padata_work_init(pw, padata_mt_helper, data, 0);
122
+ list_add(&pw->pw_list, head);
123
+ }
124
+ spin_unlock(&padata_works_lock);
125
+
126
+ return i;
127
+}
128
+
129
+static void padata_work_free(struct padata_work *pw)
130
+{
131
+ lockdep_assert_held(&padata_works_lock);
132
+ list_add(&pw->pw_list, &padata_free_works);
133
+}
134
+
135
+static void __init padata_works_free(struct list_head *works)
136
+{
137
+ struct padata_work *cur, *next;
138
+
139
+ if (list_empty(works))
140
+ return;
141
+
142
+ spin_lock(&padata_works_lock);
143
+ list_for_each_entry_safe(cur, next, works, pw_list) {
144
+ list_del(&cur->pw_list);
145
+ padata_work_free(cur);
146
+ }
147
+ spin_unlock(&padata_works_lock);
148
+}
149
+
67150 static void padata_parallel_worker(struct work_struct *parallel_work)
68151 {
69
- struct padata_parallel_queue *pqueue;
70
- LIST_HEAD(local_list);
152
+ struct padata_work *pw = container_of(parallel_work, struct padata_work,
153
+ pw_work);
154
+ struct padata_priv *padata = pw->pw_data;
71155
72156 local_bh_disable();
73
- pqueue = container_of(parallel_work,
74
- struct padata_parallel_queue, work);
75
-
76
- spin_lock(&pqueue->parallel.lock);
77
- list_replace_init(&pqueue->parallel.list, &local_list);
78
- spin_unlock(&pqueue->parallel.lock);
79
-
80
- while (!list_empty(&local_list)) {
81
- struct padata_priv *padata;
82
-
83
- padata = list_entry(local_list.next,
84
- struct padata_priv, list);
85
-
86
- list_del_init(&padata->list);
87
-
88
- padata->parallel(padata);
89
- }
90
-
157
+ padata->parallel(padata);
158
+ spin_lock(&padata_works_lock);
159
+ padata_work_free(pw);
160
+ spin_unlock(&padata_works_lock);
91161 local_bh_enable();
92162 }
93163
94164 /**
95165 * padata_do_parallel - padata parallelization function
96166 *
97
- * @pinst: padata instance
167
+ * @ps: padatashell
98168 * @padata: object to be parallelized
99
- * @cb_cpu: cpu the serialization callback function will run on,
100
- * must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
169
+ * @cb_cpu: pointer to the CPU that the serialization callback function should
170
+ * run on. If it's not in the serial cpumask of @pinst
171
+ * (i.e. cpumask.cbcpu), this function selects a fallback CPU and if
172
+ * none found, returns -EINVAL.
101173 *
102174 * The parallelization callback function will run with BHs off.
103175 * Note: Every object which is parallelized by padata_do_parallel
104176 * must be seen by padata_do_serial.
177
+ *
178
+ * Return: 0 on success or else negative error code.
105179 */
106
-int padata_do_parallel(struct padata_instance *pinst,
107
- struct padata_priv *padata, int cb_cpu)
180
+int padata_do_parallel(struct padata_shell *ps,
181
+ struct padata_priv *padata, int *cb_cpu)
108182 {
109
- int target_cpu, err;
110
- struct padata_parallel_queue *queue;
183
+ struct padata_instance *pinst = ps->pinst;
184
+ int i, cpu, cpu_index, err;
111185 struct parallel_data *pd;
186
+ struct padata_work *pw;
112187
113188 rcu_read_lock_bh();
114189
115
- pd = rcu_dereference_bh(pinst->pd);
190
+ pd = rcu_dereference_bh(ps->pd);
116191
117192 err = -EINVAL;
118193 if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
119194 goto out;
120195
121
- if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
122
- goto out;
196
+ if (!cpumask_test_cpu(*cb_cpu, pd->cpumask.cbcpu)) {
197
+ if (!cpumask_weight(pd->cpumask.cbcpu))
198
+ goto out;
199
+
200
+ /* Select an alternate fallback CPU and notify the caller. */
201
+ cpu_index = *cb_cpu % cpumask_weight(pd->cpumask.cbcpu);
202
+
203
+ cpu = cpumask_first(pd->cpumask.cbcpu);
204
+ for (i = 0; i < cpu_index; i++)
205
+ cpu = cpumask_next(cpu, pd->cpumask.cbcpu);
206
+
207
+ *cb_cpu = cpu;
208
+ }
123209
124210 err = -EBUSY;
125211 if ((pinst->flags & PADATA_RESET))
126212 goto out;
127213
128
- if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
129
- goto out;
130
-
131
- err = 0;
132214 atomic_inc(&pd->refcnt);
133215 padata->pd = pd;
134
- padata->cb_cpu = cb_cpu;
216
+ padata->cb_cpu = *cb_cpu;
135217
136
- target_cpu = padata_cpu_hash(pd);
137
- padata->cpu = target_cpu;
138
- queue = per_cpu_ptr(pd->pqueue, target_cpu);
218
+ spin_lock(&padata_works_lock);
219
+ padata->seq_nr = ++pd->seq_nr;
220
+ pw = padata_work_alloc();
221
+ spin_unlock(&padata_works_lock);
139222
140
- spin_lock(&queue->parallel.lock);
141
- list_add_tail(&padata->list, &queue->parallel.list);
142
- spin_unlock(&queue->parallel.lock);
223
+ if (!pw) {
224
+ /* Maximum works limit exceeded, run in the current task. */
225
+ padata->parallel(padata);
226
+ }
143227
144
- queue_work_on(target_cpu, pinst->wq, &queue->work);
228
+ rcu_read_unlock_bh();
145229
230
+ if (pw) {
231
+ padata_work_init(pw, padata_parallel_worker, padata, 0);
232
+ queue_work(pinst->parallel_wq, &pw->pw_work);
233
+ }
234
+
235
+ return 0;
146236 out:
147237 rcu_read_unlock_bh();
148238
....@@ -151,63 +241,58 @@
151241 EXPORT_SYMBOL(padata_do_parallel);
152242
153243 /*
154
- * padata_get_next - Get the next object that needs serialization.
244
+ * padata_find_next - Find the next object that needs serialization.
155245 *
156
- * Return values are:
157
- *
158
- * A pointer to the control struct of the next object that needs
159
- * serialization, if present in one of the percpu reorder queues.
160
- *
161
- * -EINPROGRESS, if the next object that needs serialization will
162
- * be parallel processed by another cpu and is not yet present in
163
- * the cpu's reorder queue.
164
- *
165
- * -ENODATA, if this cpu has to do the parallel processing for
166
- * the next object.
246
+ * Return:
247
+ * * A pointer to the control struct of the next object that needs
248
+ * serialization, if present in one of the percpu reorder queues.
249
+ * * NULL, if the next object that needs serialization will
250
+ * be parallel processed by another cpu and is not yet present in
251
+ * the cpu's reorder queue.
167252 */
168
-static struct padata_priv *padata_get_next(struct parallel_data *pd)
253
+static struct padata_priv *padata_find_next(struct parallel_data *pd,
254
+ bool remove_object)
169255 {
170
- struct padata_parallel_queue *next_queue;
171256 struct padata_priv *padata;
172257 struct padata_list *reorder;
173258 int cpu = pd->cpu;
174259
175
- next_queue = per_cpu_ptr(pd->pqueue, cpu);
176
- reorder = &next_queue->reorder;
260
+ reorder = per_cpu_ptr(pd->reorder_list, cpu);
177261
178262 spin_lock(&reorder->lock);
179
- if (!list_empty(&reorder->list)) {
180
- padata = list_entry(reorder->list.next,
181
- struct padata_priv, list);
182
-
183
- list_del_init(&padata->list);
184
- atomic_dec(&pd->reorder_objects);
185
-
186
- pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
187
- false);
188
-
263
+ if (list_empty(&reorder->list)) {
189264 spin_unlock(&reorder->lock);
190
- goto out;
265
+ return NULL;
191266 }
267
+
268
+ padata = list_entry(reorder->list.next, struct padata_priv, list);
269
+
270
+ /*
271
+ * Checks the rare case where two or more parallel jobs have hashed to
272
+ * the same CPU and one of the later ones finishes first.
273
+ */
274
+ if (padata->seq_nr != pd->processed) {
275
+ spin_unlock(&reorder->lock);
276
+ return NULL;
277
+ }
278
+
279
+ if (remove_object) {
280
+ list_del_init(&padata->list);
281
+ ++pd->processed;
282
+ pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
283
+ }
284
+
192285 spin_unlock(&reorder->lock);
193
-
194
- if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
195
- padata = ERR_PTR(-ENODATA);
196
- goto out;
197
- }
198
-
199
- padata = ERR_PTR(-EINPROGRESS);
200
-out:
201286 return padata;
202287 }
203288
204289 static void padata_reorder(struct parallel_data *pd)
205290 {
291
+ struct padata_instance *pinst = pd->ps->pinst;
206292 int cb_cpu;
207293 struct padata_priv *padata;
208294 struct padata_serial_queue *squeue;
209
- struct padata_instance *pinst = pd->pinst;
210
- struct padata_parallel_queue *next_queue;
295
+ struct padata_list *reorder;
211296
212297 /*
213298 * We need to ensure that only one cpu can work on dequeueing of
....@@ -223,25 +308,15 @@
223308 return;
224309
225310 while (1) {
226
- padata = padata_get_next(pd);
311
+ padata = padata_find_next(pd, true);
227312
228313 /*
229314 * If the next object that needs serialization is parallel
230315 * processed by another cpu and is still on it's way to the
231316 * cpu's reorder queue, nothing to do for now.
232317 */
233
- if (PTR_ERR(padata) == -EINPROGRESS)
318
+ if (!padata)
234319 break;
235
-
236
- /*
237
- * This cpu has to do the parallel processing of the next
238
- * object. It's waiting in the cpu's parallelization queue,
239
- * so exit immediately.
240
- */
241
- if (PTR_ERR(padata) == -ENODATA) {
242
- spin_unlock_bh(&pd->lock);
243
- return;
244
- }
245320
246321 cb_cpu = padata->cb_cpu;
247322 squeue = per_cpu_ptr(pd->squeue, cb_cpu);
....@@ -250,7 +325,7 @@
250325 list_add_tail(&padata->list, &squeue->serial.list);
251326 spin_unlock(&squeue->serial.lock);
252327
253
- queue_work_on(cb_cpu, pinst->wq, &squeue->work);
328
+ queue_work_on(cb_cpu, pinst->serial_wq, &squeue->work);
254329 }
255330
256331 spin_unlock_bh(&pd->lock);
....@@ -261,13 +336,13 @@
261336 *
262337 * Ensure reorder queue is read after pd->lock is dropped so we see
263338 * new objects from another task in padata_do_serial. Pairs with
264
- * smp_mb__after_atomic in padata_do_serial.
339
+ * smp_mb in padata_do_serial.
265340 */
266341 smp_mb();
267342
268
- next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
269
- if (!list_empty(&next_queue->reorder.list))
270
- queue_work(pinst->wq, &pd->reorder_work);
343
+ reorder = per_cpu_ptr(pd->reorder_list, pd->cpu);
344
+ if (!list_empty(&reorder->list) && padata_find_next(pd, false))
345
+ queue_work(pinst->serial_wq, &pd->reorder_work);
271346 }
272347
273348 static void invoke_padata_reorder(struct work_struct *work)
....@@ -325,40 +400,139 @@
325400 void padata_do_serial(struct padata_priv *padata)
326401 {
327402 struct parallel_data *pd = padata->pd;
328
- struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
329
- padata->cpu);
403
+ int hashed_cpu = padata_cpu_hash(pd, padata->seq_nr);
404
+ struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu);
405
+ struct padata_priv *cur;
406
+ struct list_head *pos;
330407
331
- spin_lock(&pqueue->reorder.lock);
332
- list_add_tail(&padata->list, &pqueue->reorder.list);
333
- atomic_inc(&pd->reorder_objects);
334
- spin_unlock(&pqueue->reorder.lock);
408
+ spin_lock(&reorder->lock);
409
+ /* Sort in ascending order of sequence number. */
410
+ list_for_each_prev(pos, &reorder->list) {
411
+ cur = list_entry(pos, struct padata_priv, list);
412
+ if (cur->seq_nr < padata->seq_nr)
413
+ break;
414
+ }
415
+ list_add(&padata->list, pos);
416
+ spin_unlock(&reorder->lock);
335417
336418 /*
337419 * Ensure the addition to the reorder list is ordered correctly
338420 * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
339421 * in padata_reorder.
340422 */
341
- smp_mb__after_atomic();
423
+ smp_mb();
342424
343425 padata_reorder(pd);
344426 }
345427 EXPORT_SYMBOL(padata_do_serial);
346428
347
-static int padata_setup_cpumasks(struct parallel_data *pd,
348
- const struct cpumask *pcpumask,
349
- const struct cpumask *cbcpumask)
429
+static int padata_setup_cpumasks(struct padata_instance *pinst)
350430 {
351
- if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
431
+ struct workqueue_attrs *attrs;
432
+ int err;
433
+
434
+ attrs = alloc_workqueue_attrs();
435
+ if (!attrs)
352436 return -ENOMEM;
353437
354
- cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
355
- if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
356
- free_cpumask_var(pd->cpumask.pcpu);
357
- return -ENOMEM;
438
+ /* Restrict parallel_wq workers to pd->cpumask.pcpu. */
439
+ cpumask_copy(attrs->cpumask, pinst->cpumask.pcpu);
440
+ err = apply_workqueue_attrs(pinst->parallel_wq, attrs);
441
+ free_workqueue_attrs(attrs);
442
+
443
+ return err;
444
+}
445
+
446
+static void __init padata_mt_helper(struct work_struct *w)
447
+{
448
+ struct padata_work *pw = container_of(w, struct padata_work, pw_work);
449
+ struct padata_mt_job_state *ps = pw->pw_data;
450
+ struct padata_mt_job *job = ps->job;
451
+ bool done;
452
+
453
+ spin_lock(&ps->lock);
454
+
455
+ while (job->size > 0) {
456
+ unsigned long start, size, end;
457
+
458
+ start = job->start;
459
+ /* So end is chunk size aligned if enough work remains. */
460
+ size = roundup(start + 1, ps->chunk_size) - start;
461
+ size = min(size, job->size);
462
+ end = start + size;
463
+
464
+ job->start = end;
465
+ job->size -= size;
466
+
467
+ spin_unlock(&ps->lock);
468
+ job->thread_fn(start, end, job->fn_arg);
469
+ spin_lock(&ps->lock);
358470 }
359471
360
- cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
361
- return 0;
472
+ ++ps->nworks_fini;
473
+ done = (ps->nworks_fini == ps->nworks);
474
+ spin_unlock(&ps->lock);
475
+
476
+ if (done)
477
+ complete(&ps->completion);
478
+}
479
+
480
+/**
481
+ * padata_do_multithreaded - run a multithreaded job
482
+ * @job: Description of the job.
483
+ *
484
+ * See the definition of struct padata_mt_job for more details.
485
+ */
486
+void __init padata_do_multithreaded(struct padata_mt_job *job)
487
+{
488
+ /* In case threads finish at different times. */
489
+ static const unsigned long load_balance_factor = 4;
490
+ struct padata_work my_work, *pw;
491
+ struct padata_mt_job_state ps;
492
+ LIST_HEAD(works);
493
+ int nworks;
494
+
495
+ if (job->size == 0)
496
+ return;
497
+
498
+ /* Ensure at least one thread when size < min_chunk. */
499
+ nworks = max(job->size / job->min_chunk, 1ul);
500
+ nworks = min(nworks, job->max_threads);
501
+
502
+ if (nworks == 1) {
503
+ /* Single thread, no coordination needed, cut to the chase. */
504
+ job->thread_fn(job->start, job->start + job->size, job->fn_arg);
505
+ return;
506
+ }
507
+
508
+ spin_lock_init(&ps.lock);
509
+ init_completion(&ps.completion);
510
+ ps.job = job;
511
+ ps.nworks = padata_work_alloc_mt(nworks, &ps, &works);
512
+ ps.nworks_fini = 0;
513
+
514
+ /*
515
+ * Chunk size is the amount of work a helper does per call to the
516
+ * thread function. Load balance large jobs between threads by
517
+ * increasing the number of chunks, guarantee at least the minimum
518
+ * chunk size from the caller, and honor the caller's alignment.
519
+ */
520
+ ps.chunk_size = job->size / (ps.nworks * load_balance_factor);
521
+ ps.chunk_size = max(ps.chunk_size, job->min_chunk);
522
+ ps.chunk_size = roundup(ps.chunk_size, job->align);
523
+
524
+ list_for_each_entry(pw, &works, pw_list)
525
+ queue_work(system_unbound_wq, &pw->pw_work);
526
+
527
+ /* Use the current thread, which saves starting a workqueue worker. */
528
+ padata_work_init(&my_work, padata_mt_helper, &ps, PADATA_WORK_ONSTACK);
529
+ padata_mt_helper(&my_work.pw_work);
530
+
531
+ /* Wait for all the helpers to finish. */
532
+ wait_for_completion(&ps.completion);
533
+
534
+ destroy_work_on_stack(&my_work.pw_work);
535
+ padata_works_free(&works);
362536 }
363537
364538 static void __padata_list_init(struct padata_list *pd_list)
....@@ -381,68 +555,62 @@
381555 }
382556 }
383557
384
-/* Initialize all percpu queues used by parallel workers */
385
-static void padata_init_pqueues(struct parallel_data *pd)
558
+/* Initialize per-CPU reorder lists */
559
+static void padata_init_reorder_list(struct parallel_data *pd)
386560 {
387
- int cpu_index, cpu;
388
- struct padata_parallel_queue *pqueue;
561
+ int cpu;
562
+ struct padata_list *list;
389563
390
- cpu_index = 0;
391
- for_each_possible_cpu(cpu) {
392
- pqueue = per_cpu_ptr(pd->pqueue, cpu);
393
-
394
- if (!cpumask_test_cpu(cpu, pd->cpumask.pcpu)) {
395
- pqueue->cpu_index = -1;
396
- continue;
397
- }
398
-
399
- pqueue->cpu_index = cpu_index;
400
- cpu_index++;
401
-
402
- __padata_list_init(&pqueue->reorder);
403
- __padata_list_init(&pqueue->parallel);
404
- INIT_WORK(&pqueue->work, padata_parallel_worker);
405
- atomic_set(&pqueue->num_obj, 0);
564
+ for_each_cpu(cpu, pd->cpumask.pcpu) {
565
+ list = per_cpu_ptr(pd->reorder_list, cpu);
566
+ __padata_list_init(list);
406567 }
407568 }
408569
409570 /* Allocate and initialize the internal cpumask dependend resources. */
410
-static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
411
- const struct cpumask *pcpumask,
412
- const struct cpumask *cbcpumask)
571
+static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
413572 {
573
+ struct padata_instance *pinst = ps->pinst;
414574 struct parallel_data *pd;
415575
416576 pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
417577 if (!pd)
418578 goto err;
419579
420
- pd->pqueue = alloc_percpu(struct padata_parallel_queue);
421
- if (!pd->pqueue)
580
+ pd->reorder_list = alloc_percpu(struct padata_list);
581
+ if (!pd->reorder_list)
422582 goto err_free_pd;
423583
424584 pd->squeue = alloc_percpu(struct padata_serial_queue);
425585 if (!pd->squeue)
426
- goto err_free_pqueue;
427
- if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
428
- goto err_free_squeue;
586
+ goto err_free_reorder_list;
429587
430
- padata_init_pqueues(pd);
588
+ pd->ps = ps;
589
+
590
+ if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
591
+ goto err_free_squeue;
592
+ if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
593
+ goto err_free_pcpu;
594
+
595
+ cpumask_and(pd->cpumask.pcpu, pinst->cpumask.pcpu, cpu_online_mask);
596
+ cpumask_and(pd->cpumask.cbcpu, pinst->cpumask.cbcpu, cpu_online_mask);
597
+
598
+ padata_init_reorder_list(pd);
431599 padata_init_squeues(pd);
432
- atomic_set(&pd->seq_nr, -1);
433
- atomic_set(&pd->reorder_objects, 0);
600
+ pd->seq_nr = -1;
434601 atomic_set(&pd->refcnt, 1);
435
- pd->pinst = pinst;
436602 spin_lock_init(&pd->lock);
437603 pd->cpu = cpumask_first(pd->cpumask.pcpu);
438604 INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
439605
440606 return pd;
441607
608
+err_free_pcpu:
609
+ free_cpumask_var(pd->cpumask.pcpu);
442610 err_free_squeue:
443611 free_percpu(pd->squeue);
444
-err_free_pqueue:
445
- free_percpu(pd->pqueue);
612
+err_free_reorder_list:
613
+ free_percpu(pd->reorder_list);
446614 err_free_pd:
447615 kfree(pd);
448616 err:
....@@ -453,7 +621,7 @@
453621 {
454622 free_cpumask_var(pd->cpumask.pcpu);
455623 free_cpumask_var(pd->cpumask.cbcpu);
456
- free_percpu(pd->pqueue);
624
+ free_percpu(pd->reorder_list);
457625 free_percpu(pd->squeue);
458626 kfree(pd);
459627 }
....@@ -474,65 +642,43 @@
474642 }
475643
476644 /* Replace the internal control structure with a new one. */
477
-static void padata_replace(struct padata_instance *pinst,
478
- struct parallel_data *pd_new)
645
+static int padata_replace_one(struct padata_shell *ps)
479646 {
480
- struct parallel_data *pd_old = pinst->pd;
481
- int notification_mask = 0;
647
+ struct parallel_data *pd_new;
648
+
649
+ pd_new = padata_alloc_pd(ps);
650
+ if (!pd_new)
651
+ return -ENOMEM;
652
+
653
+ ps->opd = rcu_dereference_protected(ps->pd, 1);
654
+ rcu_assign_pointer(ps->pd, pd_new);
655
+
656
+ return 0;
657
+}
658
+
659
+static int padata_replace(struct padata_instance *pinst)
660
+{
661
+ struct padata_shell *ps;
662
+ int err = 0;
482663
483664 pinst->flags |= PADATA_RESET;
484665
485
- rcu_assign_pointer(pinst->pd, pd_new);
666
+ list_for_each_entry(ps, &pinst->pslist, list) {
667
+ err = padata_replace_one(ps);
668
+ if (err)
669
+ break;
670
+ }
486671
487672 synchronize_rcu();
488673
489
- if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
490
- notification_mask |= PADATA_CPU_PARALLEL;
491
- if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
492
- notification_mask |= PADATA_CPU_SERIAL;
493
-
494
- if (atomic_dec_and_test(&pd_old->refcnt))
495
- padata_free_pd(pd_old);
496
-
497
- if (notification_mask)
498
- blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
499
- notification_mask,
500
- &pd_new->cpumask);
674
+ list_for_each_entry_continue_reverse(ps, &pinst->pslist, list)
675
+ if (atomic_dec_and_test(&ps->opd->refcnt))
676
+ padata_free_pd(ps->opd);
501677
502678 pinst->flags &= ~PADATA_RESET;
503
-}
504679
505
-/**
506
- * padata_register_cpumask_notifier - Registers a notifier that will be called
507
- * if either pcpu or cbcpu or both cpumasks change.
508
- *
509
- * @pinst: A poineter to padata instance
510
- * @nblock: A pointer to notifier block.
511
- */
512
-int padata_register_cpumask_notifier(struct padata_instance *pinst,
513
- struct notifier_block *nblock)
514
-{
515
- return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
516
- nblock);
680
+ return err;
517681 }
518
-EXPORT_SYMBOL(padata_register_cpumask_notifier);
519
-
520
-/**
521
- * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
522
- * registered earlier using padata_register_cpumask_notifier
523
- *
524
- * @pinst: A pointer to data instance.
525
- * @nlock: A pointer to notifier block.
526
- */
527
-int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
528
- struct notifier_block *nblock)
529
-{
530
- return blocking_notifier_chain_unregister(
531
- &pinst->cpumask_change_notifier,
532
- nblock);
533
-}
534
-EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
535
-
536682
537683 /* If cpumask contains no active cpu, we mark the instance as invalid. */
538684 static bool padata_validate_cpumask(struct padata_instance *pinst,
....@@ -552,7 +698,7 @@
552698 cpumask_var_t cbcpumask)
553699 {
554700 int valid;
555
- struct parallel_data *pd;
701
+ int err;
556702
557703 valid = padata_validate_cpumask(pinst, pcpumask);
558704 if (!valid) {
....@@ -565,29 +711,26 @@
565711 __padata_stop(pinst);
566712
567713 out_replace:
568
- pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
569
- if (!pd)
570
- return -ENOMEM;
571
-
572714 cpumask_copy(pinst->cpumask.pcpu, pcpumask);
573715 cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
574716
575
- padata_replace(pinst, pd);
717
+ err = padata_setup_cpumasks(pinst) ?: padata_replace(pinst);
576718
577719 if (valid)
578720 __padata_start(pinst);
579721
580
- return 0;
722
+ return err;
581723 }
582724
583725 /**
584
- * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
585
- * equivalent to @cpumask.
586
- *
726
+ * padata_set_cpumask - Sets specified by @cpumask_type cpumask to the value
727
+ * equivalent to @cpumask.
587728 * @pinst: padata instance
588729 * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
589730 * to parallel and serial cpumasks respectively.
590731 * @cpumask: the cpumask to use
732
+ *
733
+ * Return: 0 on success or negative error code
591734 */
592735 int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
593736 cpumask_var_t cpumask)
....@@ -621,121 +764,37 @@
621764 }
622765 EXPORT_SYMBOL(padata_set_cpumask);
623766
624
-/**
625
- * padata_start - start the parallel processing
626
- *
627
- * @pinst: padata instance to start
628
- */
629
-int padata_start(struct padata_instance *pinst)
630
-{
631
- int err = 0;
632
-
633
- mutex_lock(&pinst->lock);
634
-
635
- if (pinst->flags & PADATA_INVALID)
636
- err = -EINVAL;
637
-
638
- __padata_start(pinst);
639
-
640
- mutex_unlock(&pinst->lock);
641
-
642
- return err;
643
-}
644
-EXPORT_SYMBOL(padata_start);
645
-
646
-/**
647
- * padata_stop - stop the parallel processing
648
- *
649
- * @pinst: padata instance to stop
650
- */
651
-void padata_stop(struct padata_instance *pinst)
652
-{
653
- mutex_lock(&pinst->lock);
654
- __padata_stop(pinst);
655
- mutex_unlock(&pinst->lock);
656
-}
657
-EXPORT_SYMBOL(padata_stop);
658
-
659767 #ifdef CONFIG_HOTPLUG_CPU
660768
661769 static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
662770 {
663
- struct parallel_data *pd;
771
+ int err = 0;
664772
665773 if (cpumask_test_cpu(cpu, cpu_online_mask)) {
666
- pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
667
- pinst->cpumask.cbcpu);
668
- if (!pd)
669
- return -ENOMEM;
670
-
671
- padata_replace(pinst, pd);
774
+ err = padata_replace(pinst);
672775
673776 if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
674777 padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
675778 __padata_start(pinst);
676779 }
677780
678
- return 0;
781
+ return err;
679782 }
680783
681784 static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
682785 {
683
- struct parallel_data *pd = NULL;
786
+ int err = 0;
684787
685
- if (cpumask_test_cpu(cpu, cpu_online_mask)) {
686
-
788
+ if (!cpumask_test_cpu(cpu, cpu_online_mask)) {
687789 if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
688790 !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
689791 __padata_stop(pinst);
690792
691
- pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
692
- pinst->cpumask.cbcpu);
693
- if (!pd)
694
- return -ENOMEM;
695
-
696
- padata_replace(pinst, pd);
697
-
698
- cpumask_clear_cpu(cpu, pd->cpumask.cbcpu);
699
- cpumask_clear_cpu(cpu, pd->cpumask.pcpu);
793
+ err = padata_replace(pinst);
700794 }
701
-
702
- return 0;
703
-}
704
-
705
- /**
706
- * padata_remove_cpu - remove a cpu from the one or both(serial and parallel)
707
- * padata cpumasks.
708
- *
709
- * @pinst: padata instance
710
- * @cpu: cpu to remove
711
- * @mask: bitmask specifying from which cpumask @cpu should be removed
712
- * The @mask may be any combination of the following flags:
713
- * PADATA_CPU_SERIAL - serial cpumask
714
- * PADATA_CPU_PARALLEL - parallel cpumask
715
- */
716
-int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask)
717
-{
718
- int err;
719
-
720
- if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
721
- return -EINVAL;
722
-
723
- mutex_lock(&pinst->lock);
724
-
725
- get_online_cpus();
726
- if (mask & PADATA_CPU_SERIAL)
727
- cpumask_clear_cpu(cpu, pinst->cpumask.cbcpu);
728
- if (mask & PADATA_CPU_PARALLEL)
729
- cpumask_clear_cpu(cpu, pinst->cpumask.pcpu);
730
-
731
- err = __padata_remove_cpu(pinst, cpu);
732
- put_online_cpus();
733
-
734
- mutex_unlock(&pinst->lock);
735795
736796 return err;
737797 }
738
-EXPORT_SYMBOL(padata_remove_cpu);
739798
740799 static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
741800 {
....@@ -748,7 +807,7 @@
748807 struct padata_instance *pinst;
749808 int ret;
750809
751
- pinst = hlist_entry_safe(node, struct padata_instance, node);
810
+ pinst = hlist_entry_safe(node, struct padata_instance, cpu_online_node);
752811 if (!pinst_has_cpu(pinst, cpu))
753812 return 0;
754813
....@@ -758,12 +817,12 @@
758817 return ret;
759818 }
760819
761
-static int padata_cpu_prep_down(unsigned int cpu, struct hlist_node *node)
820
+static int padata_cpu_dead(unsigned int cpu, struct hlist_node *node)
762821 {
763822 struct padata_instance *pinst;
764823 int ret;
765824
766
- pinst = hlist_entry_safe(node, struct padata_instance, node);
825
+ pinst = hlist_entry_safe(node, struct padata_instance, cpu_dead_node);
767826 if (!pinst_has_cpu(pinst, cpu))
768827 return 0;
769828
....@@ -779,13 +838,17 @@
779838 static void __padata_free(struct padata_instance *pinst)
780839 {
781840 #ifdef CONFIG_HOTPLUG_CPU
782
- cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
841
+ cpuhp_state_remove_instance_nocalls(CPUHP_PADATA_DEAD,
842
+ &pinst->cpu_dead_node);
843
+ cpuhp_state_remove_instance_nocalls(hp_online, &pinst->cpu_online_node);
783844 #endif
784845
785
- padata_stop(pinst);
786
- padata_free_pd(pinst->pd);
846
+ WARN_ON(!list_empty(&pinst->pslist));
847
+
787848 free_cpumask_var(pinst->cpumask.pcpu);
788849 free_cpumask_var(pinst->cpumask.cbcpu);
850
+ destroy_workqueue(pinst->serial_wq);
851
+ destroy_workqueue(pinst->parallel_wq);
789852 kfree(pinst);
790853 }
791854
....@@ -872,6 +935,7 @@
872935 &parallel_cpumask_attr.attr,
873936 NULL,
874937 };
938
+ATTRIBUTE_GROUPS(padata_default);
875939
876940 static ssize_t padata_sysfs_show(struct kobject *kobj,
877941 struct attribute *attr, char *buf)
....@@ -910,92 +974,86 @@
910974
911975 static struct kobj_type padata_attr_type = {
912976 .sysfs_ops = &padata_sysfs_ops,
913
- .default_attrs = padata_default_attrs,
977
+ .default_groups = padata_default_groups,
914978 .release = padata_sysfs_release,
915979 };
916980
917981 /**
918
- * padata_alloc - allocate and initialize a padata instance and specify
919
- * cpumasks for serial and parallel workers.
982
+ * padata_alloc - allocate and initialize a padata instance
983
+ * @name: used to identify the instance
920984 *
921
- * @wq: workqueue to use for the allocated padata instance
922
- * @pcpumask: cpumask that will be used for padata parallelization
923
- * @cbcpumask: cpumask that will be used for padata serialization
924
- *
925
- * Must be called from a cpus_read_lock() protected region
985
+ * Return: new instance on success, NULL on error
926986 */
927
-static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
928
- const struct cpumask *pcpumask,
929
- const struct cpumask *cbcpumask)
987
+struct padata_instance *padata_alloc(const char *name)
930988 {
931989 struct padata_instance *pinst;
932
- struct parallel_data *pd = NULL;
933990
934991 pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
935992 if (!pinst)
936993 goto err;
937994
938
- if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
995
+ pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_UNBOUND, 0,
996
+ name);
997
+ if (!pinst->parallel_wq)
939998 goto err_free_inst;
999
+
1000
+ get_online_cpus();
1001
+
1002
+ pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM |
1003
+ WQ_CPU_INTENSIVE, 1, name);
1004
+ if (!pinst->serial_wq)
1005
+ goto err_put_cpus;
1006
+
1007
+ if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
1008
+ goto err_free_serial_wq;
9401009 if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
9411010 free_cpumask_var(pinst->cpumask.pcpu);
942
- goto err_free_inst;
1011
+ goto err_free_serial_wq;
9431012 }
944
- if (!padata_validate_cpumask(pinst, pcpumask) ||
945
- !padata_validate_cpumask(pinst, cbcpumask))
1013
+
1014
+ INIT_LIST_HEAD(&pinst->pslist);
1015
+
1016
+ cpumask_copy(pinst->cpumask.pcpu, cpu_possible_mask);
1017
+ cpumask_copy(pinst->cpumask.cbcpu, cpu_possible_mask);
1018
+
1019
+ if (padata_setup_cpumasks(pinst))
9461020 goto err_free_masks;
9471021
948
- pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
949
- if (!pd)
950
- goto err_free_masks;
1022
+ __padata_start(pinst);
9511023
952
- rcu_assign_pointer(pinst->pd, pd);
953
-
954
- pinst->wq = wq;
955
-
956
- cpumask_copy(pinst->cpumask.pcpu, pcpumask);
957
- cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
958
-
959
- pinst->flags = 0;
960
-
961
- BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
9621024 kobject_init(&pinst->kobj, &padata_attr_type);
9631025 mutex_init(&pinst->lock);
9641026
9651027 #ifdef CONFIG_HOTPLUG_CPU
966
- cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
1028
+ cpuhp_state_add_instance_nocalls_cpuslocked(hp_online,
1029
+ &pinst->cpu_online_node);
1030
+ cpuhp_state_add_instance_nocalls_cpuslocked(CPUHP_PADATA_DEAD,
1031
+ &pinst->cpu_dead_node);
9671032 #endif
1033
+
1034
+ put_online_cpus();
1035
+
9681036 return pinst;
9691037
9701038 err_free_masks:
9711039 free_cpumask_var(pinst->cpumask.pcpu);
9721040 free_cpumask_var(pinst->cpumask.cbcpu);
1041
+err_free_serial_wq:
1042
+ destroy_workqueue(pinst->serial_wq);
1043
+err_put_cpus:
1044
+ put_online_cpus();
1045
+ destroy_workqueue(pinst->parallel_wq);
9731046 err_free_inst:
9741047 kfree(pinst);
9751048 err:
9761049 return NULL;
9771050 }
978
-
979
-/**
980
- * padata_alloc_possible - Allocate and initialize padata instance.
981
- * Use the cpu_possible_mask for serial and
982
- * parallel workers.
983
- *
984
- * @wq: workqueue to use for the allocated padata instance
985
- *
986
- * Must be called from a cpus_read_lock() protected region
987
- */
988
-struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
989
-{
990
- lockdep_assert_cpus_held();
991
- return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
992
-}
993
-EXPORT_SYMBOL(padata_alloc_possible);
1051
+EXPORT_SYMBOL(padata_alloc);
9941052
9951053 /**
9961054 * padata_free - free a padata instance
9971055 *
998
- * @padata_inst: padata instance to free
1056
+ * @pinst: padata instance to free
9991057 */
10001058 void padata_free(struct padata_instance *pinst)
10011059 {
....@@ -1003,25 +1061,99 @@
10031061 }
10041062 EXPORT_SYMBOL(padata_free);
10051063
1006
-#ifdef CONFIG_HOTPLUG_CPU
1007
-
1008
-static __init int padata_driver_init(void)
1064
+/**
1065
+ * padata_alloc_shell - Allocate and initialize padata shell.
1066
+ *
1067
+ * @pinst: Parent padata_instance object.
1068
+ *
1069
+ * Return: new shell on success, NULL on error
1070
+ */
1071
+struct padata_shell *padata_alloc_shell(struct padata_instance *pinst)
10091072 {
1073
+ struct parallel_data *pd;
1074
+ struct padata_shell *ps;
1075
+
1076
+ ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1077
+ if (!ps)
1078
+ goto out;
1079
+
1080
+ ps->pinst = pinst;
1081
+
1082
+ get_online_cpus();
1083
+ pd = padata_alloc_pd(ps);
1084
+ put_online_cpus();
1085
+
1086
+ if (!pd)
1087
+ goto out_free_ps;
1088
+
1089
+ mutex_lock(&pinst->lock);
1090
+ RCU_INIT_POINTER(ps->pd, pd);
1091
+ list_add(&ps->list, &pinst->pslist);
1092
+ mutex_unlock(&pinst->lock);
1093
+
1094
+ return ps;
1095
+
1096
+out_free_ps:
1097
+ kfree(ps);
1098
+out:
1099
+ return NULL;
1100
+}
1101
+EXPORT_SYMBOL(padata_alloc_shell);
1102
+
1103
+/**
1104
+ * padata_free_shell - free a padata shell
1105
+ *
1106
+ * @ps: padata shell to free
1107
+ */
1108
+void padata_free_shell(struct padata_shell *ps)
1109
+{
1110
+ if (!ps)
1111
+ return;
1112
+
1113
+ mutex_lock(&ps->pinst->lock);
1114
+ list_del(&ps->list);
1115
+ padata_free_pd(rcu_dereference_protected(ps->pd, 1));
1116
+ mutex_unlock(&ps->pinst->lock);
1117
+
1118
+ kfree(ps);
1119
+}
1120
+EXPORT_SYMBOL(padata_free_shell);
1121
+
1122
+void __init padata_init(void)
1123
+{
1124
+ unsigned int i, possible_cpus;
1125
+#ifdef CONFIG_HOTPLUG_CPU
10101126 int ret;
10111127
10121128 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "padata:online",
1013
- padata_cpu_online,
1014
- padata_cpu_prep_down);
1129
+ padata_cpu_online, NULL);
10151130 if (ret < 0)
1016
- return ret;
1131
+ goto err;
10171132 hp_online = ret;
1018
- return 0;
1019
-}
1020
-module_init(padata_driver_init);
10211133
1022
-static __exit void padata_driver_exit(void)
1023
-{
1024
- cpuhp_remove_multi_state(hp_online);
1025
-}
1026
-module_exit(padata_driver_exit);
1134
+ ret = cpuhp_setup_state_multi(CPUHP_PADATA_DEAD, "padata:dead",
1135
+ NULL, padata_cpu_dead);
1136
+ if (ret < 0)
1137
+ goto remove_online_state;
10271138 #endif
1139
+
1140
+ possible_cpus = num_possible_cpus();
1141
+ padata_works = kmalloc_array(possible_cpus, sizeof(struct padata_work),
1142
+ GFP_KERNEL);
1143
+ if (!padata_works)
1144
+ goto remove_dead_state;
1145
+
1146
+ for (i = 0; i < possible_cpus; ++i)
1147
+ list_add(&padata_works[i].pw_list, &padata_free_works);
1148
+
1149
+ return;
1150
+
1151
+remove_dead_state:
1152
+#ifdef CONFIG_HOTPLUG_CPU
1153
+ cpuhp_remove_multi_state(CPUHP_PADATA_DEAD);
1154
+remove_online_state:
1155
+ cpuhp_remove_multi_state(hp_online);
1156
+err:
1157
+#endif
1158
+ pr_warn("padata: initialization failed\n");
1159
+}