hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/padata.c
....@@ -2,10 +2,13 @@
22 /*
33 * padata.c - generic interface to process data streams in parallel
44 *
5
- * See Documentation/padata.txt for an api documentation.
5
+ * See Documentation/core-api/padata.rst for more information.
66 *
77 * Copyright (C) 2008, 2009 secunet Security Networks AG
88 * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
9
+ *
10
+ * Copyright (c) 2020 Oracle and/or its affiliates.
11
+ * Author: Daniel Jordan <daniel.m.jordan@oracle.com>
912 *
1013 * This program is free software; you can redistribute it and/or modify it
1114 * under the terms and conditions of the GNU General Public License,
....@@ -21,6 +24,7 @@
2124 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
2225 */
2326
27
+#include <linux/completion.h>
2428 #include <linux/export.h>
2529 #include <linux/cpumask.h>
2630 #include <linux/err.h>
....@@ -31,11 +35,30 @@
3135 #include <linux/slab.h>
3236 #include <linux/sysfs.h>
3337 #include <linux/rcupdate.h>
34
-#include <linux/module.h>
3538
36
-#define MAX_OBJ_NUM 1000
39
+#define PADATA_WORK_ONSTACK 1 /* Work's memory is on stack */
40
+
41
+struct padata_work {
42
+ struct work_struct pw_work;
43
+ struct list_head pw_list; /* padata_free_works linkage */
44
+ void *pw_data;
45
+};
46
+
47
+static DEFINE_SPINLOCK(padata_works_lock);
48
+static struct padata_work *padata_works;
49
+static LIST_HEAD(padata_free_works);
50
+
51
+struct padata_mt_job_state {
52
+ spinlock_t lock;
53
+ struct completion completion;
54
+ struct padata_mt_job *job;
55
+ int nworks;
56
+ int nworks_fini;
57
+ unsigned long chunk_size;
58
+};
3759
3860 static void padata_free_pd(struct parallel_data *pd);
61
+static void __init padata_mt_helper(struct work_struct *work);
3962
4063 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
4164 {
....@@ -48,101 +71,166 @@
4871 return target_cpu;
4972 }
5073
51
-static int padata_cpu_hash(struct parallel_data *pd)
74
+static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr)
5275 {
53
- unsigned int seq_nr;
54
- int cpu_index;
55
-
5676 /*
5777 * Hash the sequence numbers to the cpus by taking
5878 * seq_nr mod. number of cpus in use.
5979 */
60
-
61
- seq_nr = atomic_inc_return(&pd->seq_nr);
62
- cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
80
+ int cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
6381
6482 return padata_index_to_cpu(pd, cpu_index);
6583 }
6684
85
+static struct padata_work *padata_work_alloc(void)
86
+{
87
+ struct padata_work *pw;
88
+
89
+ lockdep_assert_held(&padata_works_lock);
90
+
91
+ if (list_empty(&padata_free_works))
92
+ return NULL; /* No more work items allowed to be queued. */
93
+
94
+ pw = list_first_entry(&padata_free_works, struct padata_work, pw_list);
95
+ list_del(&pw->pw_list);
96
+ return pw;
97
+}
98
+
99
+static void padata_work_init(struct padata_work *pw, work_func_t work_fn,
100
+ void *data, int flags)
101
+{
102
+ if (flags & PADATA_WORK_ONSTACK)
103
+ INIT_WORK_ONSTACK(&pw->pw_work, work_fn);
104
+ else
105
+ INIT_WORK(&pw->pw_work, work_fn);
106
+ pw->pw_data = data;
107
+}
108
+
109
+static int __init padata_work_alloc_mt(int nworks, void *data,
110
+ struct list_head *head)
111
+{
112
+ int i;
113
+
114
+ spin_lock(&padata_works_lock);
115
+ /* Start at 1 because the current task participates in the job. */
116
+ for (i = 1; i < nworks; ++i) {
117
+ struct padata_work *pw = padata_work_alloc();
118
+
119
+ if (!pw)
120
+ break;
121
+ padata_work_init(pw, padata_mt_helper, data, 0);
122
+ list_add(&pw->pw_list, head);
123
+ }
124
+ spin_unlock(&padata_works_lock);
125
+
126
+ return i;
127
+}
128
+
129
+static void padata_work_free(struct padata_work *pw)
130
+{
131
+ lockdep_assert_held(&padata_works_lock);
132
+ list_add(&pw->pw_list, &padata_free_works);
133
+}
134
+
135
+static void __init padata_works_free(struct list_head *works)
136
+{
137
+ struct padata_work *cur, *next;
138
+
139
+ if (list_empty(works))
140
+ return;
141
+
142
+ spin_lock(&padata_works_lock);
143
+ list_for_each_entry_safe(cur, next, works, pw_list) {
144
+ list_del(&cur->pw_list);
145
+ padata_work_free(cur);
146
+ }
147
+ spin_unlock(&padata_works_lock);
148
+}
149
+
67150 static void padata_parallel_worker(struct work_struct *parallel_work)
68151 {
69
- struct padata_parallel_queue *pqueue;
70
- LIST_HEAD(local_list);
152
+ struct padata_work *pw = container_of(parallel_work, struct padata_work,
153
+ pw_work);
154
+ struct padata_priv *padata = pw->pw_data;
71155
72156 local_bh_disable();
73
- pqueue = container_of(parallel_work,
74
- struct padata_parallel_queue, work);
75
-
76
- spin_lock(&pqueue->parallel.lock);
77
- list_replace_init(&pqueue->parallel.list, &local_list);
78
- spin_unlock(&pqueue->parallel.lock);
79
-
80
- while (!list_empty(&local_list)) {
81
- struct padata_priv *padata;
82
-
83
- padata = list_entry(local_list.next,
84
- struct padata_priv, list);
85
-
86
- list_del_init(&padata->list);
87
-
88
- padata->parallel(padata);
89
- }
90
-
157
+ padata->parallel(padata);
158
+ spin_lock(&padata_works_lock);
159
+ padata_work_free(pw);
160
+ spin_unlock(&padata_works_lock);
91161 local_bh_enable();
92162 }
93163
94164 /**
95165 * padata_do_parallel - padata parallelization function
96166 *
97
- * @pinst: padata instance
167
+ * @ps: padatashell
98168 * @padata: object to be parallelized
99
- * @cb_cpu: cpu the serialization callback function will run on,
100
- * must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
169
+ * @cb_cpu: pointer to the CPU that the serialization callback function should
170
+ * run on. If it's not in the serial cpumask of @pinst
171
+ * (i.e. cpumask.cbcpu), this function selects a fallback CPU and if
172
+ * none found, returns -EINVAL.
101173 *
102174 * The parallelization callback function will run with BHs off.
103175 * Note: Every object which is parallelized by padata_do_parallel
104176 * must be seen by padata_do_serial.
177
+ *
178
+ * Return: 0 on success or else negative error code.
105179 */
106
-int padata_do_parallel(struct padata_instance *pinst,
107
- struct padata_priv *padata, int cb_cpu)
180
+int padata_do_parallel(struct padata_shell *ps,
181
+ struct padata_priv *padata, int *cb_cpu)
108182 {
109
- int target_cpu, err;
110
- struct padata_parallel_queue *queue;
183
+ struct padata_instance *pinst = ps->pinst;
184
+ int i, cpu, cpu_index, err;
111185 struct parallel_data *pd;
186
+ struct padata_work *pw;
112187
113188 rcu_read_lock_bh();
114189
115
- pd = rcu_dereference_bh(pinst->pd);
190
+ pd = rcu_dereference_bh(ps->pd);
116191
117192 err = -EINVAL;
118193 if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
119194 goto out;
120195
121
- if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
122
- goto out;
196
+ if (!cpumask_test_cpu(*cb_cpu, pd->cpumask.cbcpu)) {
197
+ if (!cpumask_weight(pd->cpumask.cbcpu))
198
+ goto out;
199
+
200
+ /* Select an alternate fallback CPU and notify the caller. */
201
+ cpu_index = *cb_cpu % cpumask_weight(pd->cpumask.cbcpu);
202
+
203
+ cpu = cpumask_first(pd->cpumask.cbcpu);
204
+ for (i = 0; i < cpu_index; i++)
205
+ cpu = cpumask_next(cpu, pd->cpumask.cbcpu);
206
+
207
+ *cb_cpu = cpu;
208
+ }
123209
124210 err = -EBUSY;
125211 if ((pinst->flags & PADATA_RESET))
126212 goto out;
127213
128
- if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
129
- goto out;
130
-
131
- err = 0;
132214 atomic_inc(&pd->refcnt);
133215 padata->pd = pd;
134
- padata->cb_cpu = cb_cpu;
216
+ padata->cb_cpu = *cb_cpu;
135217
136
- target_cpu = padata_cpu_hash(pd);
137
- padata->cpu = target_cpu;
138
- queue = per_cpu_ptr(pd->pqueue, target_cpu);
218
+ spin_lock(&padata_works_lock);
219
+ padata->seq_nr = ++pd->seq_nr;
220
+ pw = padata_work_alloc();
221
+ spin_unlock(&padata_works_lock);
139222
140
- spin_lock(&queue->parallel.lock);
141
- list_add_tail(&padata->list, &queue->parallel.list);
142
- spin_unlock(&queue->parallel.lock);
223
+ rcu_read_unlock_bh();
143224
144
- queue_work_on(target_cpu, pinst->wq, &queue->work);
225
+ if (pw) {
226
+ padata_work_init(pw, padata_parallel_worker, padata, 0);
227
+ queue_work(pinst->parallel_wq, &pw->pw_work);
228
+ } else {
229
+ /* Maximum works limit exceeded, run in the current task. */
230
+ padata->parallel(padata);
231
+ }
145232
233
+ return 0;
146234 out:
147235 rcu_read_unlock_bh();
148236
....@@ -151,63 +239,58 @@
151239 EXPORT_SYMBOL(padata_do_parallel);
152240
153241 /*
154
- * padata_get_next - Get the next object that needs serialization.
242
+ * padata_find_next - Find the next object that needs serialization.
155243 *
156
- * Return values are:
157
- *
158
- * A pointer to the control struct of the next object that needs
159
- * serialization, if present in one of the percpu reorder queues.
160
- *
161
- * -EINPROGRESS, if the next object that needs serialization will
162
- * be parallel processed by another cpu and is not yet present in
163
- * the cpu's reorder queue.
164
- *
165
- * -ENODATA, if this cpu has to do the parallel processing for
166
- * the next object.
244
+ * Return:
245
+ * * A pointer to the control struct of the next object that needs
246
+ * serialization, if present in one of the percpu reorder queues.
247
+ * * NULL, if the next object that needs serialization will
248
+ * be parallel processed by another cpu and is not yet present in
249
+ * the cpu's reorder queue.
167250 */
168
-static struct padata_priv *padata_get_next(struct parallel_data *pd)
251
+static struct padata_priv *padata_find_next(struct parallel_data *pd,
252
+ bool remove_object)
169253 {
170
- struct padata_parallel_queue *next_queue;
171254 struct padata_priv *padata;
172255 struct padata_list *reorder;
173256 int cpu = pd->cpu;
174257
175
- next_queue = per_cpu_ptr(pd->pqueue, cpu);
176
- reorder = &next_queue->reorder;
258
+ reorder = per_cpu_ptr(pd->reorder_list, cpu);
177259
178260 spin_lock(&reorder->lock);
179
- if (!list_empty(&reorder->list)) {
180
- padata = list_entry(reorder->list.next,
181
- struct padata_priv, list);
182
-
183
- list_del_init(&padata->list);
184
- atomic_dec(&pd->reorder_objects);
185
-
186
- pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
187
- false);
188
-
261
+ if (list_empty(&reorder->list)) {
189262 spin_unlock(&reorder->lock);
190
- goto out;
263
+ return NULL;
191264 }
265
+
266
+ padata = list_entry(reorder->list.next, struct padata_priv, list);
267
+
268
+ /*
269
+ * Checks the rare case where two or more parallel jobs have hashed to
270
+ * the same CPU and one of the later ones finishes first.
271
+ */
272
+ if (padata->seq_nr != pd->processed) {
273
+ spin_unlock(&reorder->lock);
274
+ return NULL;
275
+ }
276
+
277
+ if (remove_object) {
278
+ list_del_init(&padata->list);
279
+ ++pd->processed;
280
+ pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
281
+ }
282
+
192283 spin_unlock(&reorder->lock);
193
-
194
- if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
195
- padata = ERR_PTR(-ENODATA);
196
- goto out;
197
- }
198
-
199
- padata = ERR_PTR(-EINPROGRESS);
200
-out:
201284 return padata;
202285 }
203286
204287 static void padata_reorder(struct parallel_data *pd)
205288 {
289
+ struct padata_instance *pinst = pd->ps->pinst;
206290 int cb_cpu;
207291 struct padata_priv *padata;
208292 struct padata_serial_queue *squeue;
209
- struct padata_instance *pinst = pd->pinst;
210
- struct padata_parallel_queue *next_queue;
293
+ struct padata_list *reorder;
211294
212295 /*
213296 * We need to ensure that only one cpu can work on dequeueing of
....@@ -223,25 +306,15 @@
223306 return;
224307
225308 while (1) {
226
- padata = padata_get_next(pd);
309
+ padata = padata_find_next(pd, true);
227310
228311 /*
229312 * If the next object that needs serialization is parallel
230313 * processed by another cpu and is still on it's way to the
231314 * cpu's reorder queue, nothing to do for now.
232315 */
233
- if (PTR_ERR(padata) == -EINPROGRESS)
316
+ if (!padata)
234317 break;
235
-
236
- /*
237
- * This cpu has to do the parallel processing of the next
238
- * object. It's waiting in the cpu's parallelization queue,
239
- * so exit immediately.
240
- */
241
- if (PTR_ERR(padata) == -ENODATA) {
242
- spin_unlock_bh(&pd->lock);
243
- return;
244
- }
245318
246319 cb_cpu = padata->cb_cpu;
247320 squeue = per_cpu_ptr(pd->squeue, cb_cpu);
....@@ -250,7 +323,7 @@
250323 list_add_tail(&padata->list, &squeue->serial.list);
251324 spin_unlock(&squeue->serial.lock);
252325
253
- queue_work_on(cb_cpu, pinst->wq, &squeue->work);
326
+ queue_work_on(cb_cpu, pinst->serial_wq, &squeue->work);
254327 }
255328
256329 spin_unlock_bh(&pd->lock);
....@@ -261,13 +334,13 @@
261334 *
262335 * Ensure reorder queue is read after pd->lock is dropped so we see
263336 * new objects from another task in padata_do_serial. Pairs with
264
- * smp_mb__after_atomic in padata_do_serial.
337
+ * smp_mb in padata_do_serial.
265338 */
266339 smp_mb();
267340
268
- next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
269
- if (!list_empty(&next_queue->reorder.list))
270
- queue_work(pinst->wq, &pd->reorder_work);
341
+ reorder = per_cpu_ptr(pd->reorder_list, pd->cpu);
342
+ if (!list_empty(&reorder->list) && padata_find_next(pd, false))
343
+ queue_work(pinst->serial_wq, &pd->reorder_work);
271344 }
272345
273346 static void invoke_padata_reorder(struct work_struct *work)
....@@ -325,40 +398,136 @@
325398 void padata_do_serial(struct padata_priv *padata)
326399 {
327400 struct parallel_data *pd = padata->pd;
328
- struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
329
- padata->cpu);
401
+ int hashed_cpu = padata_cpu_hash(pd, padata->seq_nr);
402
+ struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu);
403
+ struct padata_priv *cur;
330404
331
- spin_lock(&pqueue->reorder.lock);
332
- list_add_tail(&padata->list, &pqueue->reorder.list);
333
- atomic_inc(&pd->reorder_objects);
334
- spin_unlock(&pqueue->reorder.lock);
405
+ spin_lock(&reorder->lock);
406
+ /* Sort in ascending order of sequence number. */
407
+ list_for_each_entry_reverse(cur, &reorder->list, list)
408
+ if (cur->seq_nr < padata->seq_nr)
409
+ break;
410
+ list_add(&padata->list, &cur->list);
411
+ spin_unlock(&reorder->lock);
335412
336413 /*
337414 * Ensure the addition to the reorder list is ordered correctly
338415 * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
339416 * in padata_reorder.
340417 */
341
- smp_mb__after_atomic();
418
+ smp_mb();
342419
343420 padata_reorder(pd);
344421 }
345422 EXPORT_SYMBOL(padata_do_serial);
346423
347
-static int padata_setup_cpumasks(struct parallel_data *pd,
348
- const struct cpumask *pcpumask,
349
- const struct cpumask *cbcpumask)
424
+static int padata_setup_cpumasks(struct padata_instance *pinst)
350425 {
351
- if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
426
+ struct workqueue_attrs *attrs;
427
+ int err;
428
+
429
+ attrs = alloc_workqueue_attrs();
430
+ if (!attrs)
352431 return -ENOMEM;
353432
354
- cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
355
- if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
356
- free_cpumask_var(pd->cpumask.pcpu);
357
- return -ENOMEM;
433
+ /* Restrict parallel_wq workers to pd->cpumask.pcpu. */
434
+ cpumask_copy(attrs->cpumask, pinst->cpumask.pcpu);
435
+ err = apply_workqueue_attrs(pinst->parallel_wq, attrs);
436
+ free_workqueue_attrs(attrs);
437
+
438
+ return err;
439
+}
440
+
441
+static void __init padata_mt_helper(struct work_struct *w)
442
+{
443
+ struct padata_work *pw = container_of(w, struct padata_work, pw_work);
444
+ struct padata_mt_job_state *ps = pw->pw_data;
445
+ struct padata_mt_job *job = ps->job;
446
+ bool done;
447
+
448
+ spin_lock(&ps->lock);
449
+
450
+ while (job->size > 0) {
451
+ unsigned long start, size, end;
452
+
453
+ start = job->start;
454
+ /* So end is chunk size aligned if enough work remains. */
455
+ size = roundup(start + 1, ps->chunk_size) - start;
456
+ size = min(size, job->size);
457
+ end = start + size;
458
+
459
+ job->start = end;
460
+ job->size -= size;
461
+
462
+ spin_unlock(&ps->lock);
463
+ job->thread_fn(start, end, job->fn_arg);
464
+ spin_lock(&ps->lock);
358465 }
359466
360
- cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
361
- return 0;
467
+ ++ps->nworks_fini;
468
+ done = (ps->nworks_fini == ps->nworks);
469
+ spin_unlock(&ps->lock);
470
+
471
+ if (done)
472
+ complete(&ps->completion);
473
+}
474
+
475
+/**
476
+ * padata_do_multithreaded - run a multithreaded job
477
+ * @job: Description of the job.
478
+ *
479
+ * See the definition of struct padata_mt_job for more details.
480
+ */
481
+void __init padata_do_multithreaded(struct padata_mt_job *job)
482
+{
483
+ /* In case threads finish at different times. */
484
+ static const unsigned long load_balance_factor = 4;
485
+ struct padata_work my_work, *pw;
486
+ struct padata_mt_job_state ps;
487
+ LIST_HEAD(works);
488
+ int nworks;
489
+
490
+ if (job->size == 0)
491
+ return;
492
+
493
+ /* Ensure at least one thread when size < min_chunk. */
494
+ nworks = max(job->size / job->min_chunk, 1ul);
495
+ nworks = min(nworks, job->max_threads);
496
+
497
+ if (nworks == 1) {
498
+ /* Single thread, no coordination needed, cut to the chase. */
499
+ job->thread_fn(job->start, job->start + job->size, job->fn_arg);
500
+ return;
501
+ }
502
+
503
+ spin_lock_init(&ps.lock);
504
+ init_completion(&ps.completion);
505
+ ps.job = job;
506
+ ps.nworks = padata_work_alloc_mt(nworks, &ps, &works);
507
+ ps.nworks_fini = 0;
508
+
509
+ /*
510
+ * Chunk size is the amount of work a helper does per call to the
511
+ * thread function. Load balance large jobs between threads by
512
+ * increasing the number of chunks, guarantee at least the minimum
513
+ * chunk size from the caller, and honor the caller's alignment.
514
+ */
515
+ ps.chunk_size = job->size / (ps.nworks * load_balance_factor);
516
+ ps.chunk_size = max(ps.chunk_size, job->min_chunk);
517
+ ps.chunk_size = roundup(ps.chunk_size, job->align);
518
+
519
+ list_for_each_entry(pw, &works, pw_list)
520
+ queue_work(system_unbound_wq, &pw->pw_work);
521
+
522
+ /* Use the current thread, which saves starting a workqueue worker. */
523
+ padata_work_init(&my_work, padata_mt_helper, &ps, PADATA_WORK_ONSTACK);
524
+ padata_mt_helper(&my_work.pw_work);
525
+
526
+ /* Wait for all the helpers to finish. */
527
+ wait_for_completion(&ps.completion);
528
+
529
+ destroy_work_on_stack(&my_work.pw_work);
530
+ padata_works_free(&works);
362531 }
363532
364533 static void __padata_list_init(struct padata_list *pd_list)
....@@ -381,68 +550,62 @@
381550 }
382551 }
383552
384
-/* Initialize all percpu queues used by parallel workers */
385
-static void padata_init_pqueues(struct parallel_data *pd)
553
+/* Initialize per-CPU reorder lists */
554
+static void padata_init_reorder_list(struct parallel_data *pd)
386555 {
387
- int cpu_index, cpu;
388
- struct padata_parallel_queue *pqueue;
556
+ int cpu;
557
+ struct padata_list *list;
389558
390
- cpu_index = 0;
391
- for_each_possible_cpu(cpu) {
392
- pqueue = per_cpu_ptr(pd->pqueue, cpu);
393
-
394
- if (!cpumask_test_cpu(cpu, pd->cpumask.pcpu)) {
395
- pqueue->cpu_index = -1;
396
- continue;
397
- }
398
-
399
- pqueue->cpu_index = cpu_index;
400
- cpu_index++;
401
-
402
- __padata_list_init(&pqueue->reorder);
403
- __padata_list_init(&pqueue->parallel);
404
- INIT_WORK(&pqueue->work, padata_parallel_worker);
405
- atomic_set(&pqueue->num_obj, 0);
559
+ for_each_cpu(cpu, pd->cpumask.pcpu) {
560
+ list = per_cpu_ptr(pd->reorder_list, cpu);
561
+ __padata_list_init(list);
406562 }
407563 }
408564
409565 /* Allocate and initialize the internal cpumask dependend resources. */
410
-static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
411
- const struct cpumask *pcpumask,
412
- const struct cpumask *cbcpumask)
566
+static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
413567 {
568
+ struct padata_instance *pinst = ps->pinst;
414569 struct parallel_data *pd;
415570
416571 pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
417572 if (!pd)
418573 goto err;
419574
420
- pd->pqueue = alloc_percpu(struct padata_parallel_queue);
421
- if (!pd->pqueue)
575
+ pd->reorder_list = alloc_percpu(struct padata_list);
576
+ if (!pd->reorder_list)
422577 goto err_free_pd;
423578
424579 pd->squeue = alloc_percpu(struct padata_serial_queue);
425580 if (!pd->squeue)
426
- goto err_free_pqueue;
427
- if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
428
- goto err_free_squeue;
581
+ goto err_free_reorder_list;
429582
430
- padata_init_pqueues(pd);
583
+ pd->ps = ps;
584
+
585
+ if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
586
+ goto err_free_squeue;
587
+ if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
588
+ goto err_free_pcpu;
589
+
590
+ cpumask_and(pd->cpumask.pcpu, pinst->cpumask.pcpu, cpu_online_mask);
591
+ cpumask_and(pd->cpumask.cbcpu, pinst->cpumask.cbcpu, cpu_online_mask);
592
+
593
+ padata_init_reorder_list(pd);
431594 padata_init_squeues(pd);
432
- atomic_set(&pd->seq_nr, -1);
433
- atomic_set(&pd->reorder_objects, 0);
595
+ pd->seq_nr = -1;
434596 atomic_set(&pd->refcnt, 1);
435
- pd->pinst = pinst;
436597 spin_lock_init(&pd->lock);
437598 pd->cpu = cpumask_first(pd->cpumask.pcpu);
438599 INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
439600
440601 return pd;
441602
603
+err_free_pcpu:
604
+ free_cpumask_var(pd->cpumask.pcpu);
442605 err_free_squeue:
443606 free_percpu(pd->squeue);
444
-err_free_pqueue:
445
- free_percpu(pd->pqueue);
607
+err_free_reorder_list:
608
+ free_percpu(pd->reorder_list);
446609 err_free_pd:
447610 kfree(pd);
448611 err:
....@@ -453,7 +616,7 @@
453616 {
454617 free_cpumask_var(pd->cpumask.pcpu);
455618 free_cpumask_var(pd->cpumask.cbcpu);
456
- free_percpu(pd->pqueue);
619
+ free_percpu(pd->reorder_list);
457620 free_percpu(pd->squeue);
458621 kfree(pd);
459622 }
....@@ -474,65 +637,43 @@
474637 }
475638
476639 /* Replace the internal control structure with a new one. */
477
-static void padata_replace(struct padata_instance *pinst,
478
- struct parallel_data *pd_new)
640
+static int padata_replace_one(struct padata_shell *ps)
479641 {
480
- struct parallel_data *pd_old = pinst->pd;
481
- int notification_mask = 0;
642
+ struct parallel_data *pd_new;
643
+
644
+ pd_new = padata_alloc_pd(ps);
645
+ if (!pd_new)
646
+ return -ENOMEM;
647
+
648
+ ps->opd = rcu_dereference_protected(ps->pd, 1);
649
+ rcu_assign_pointer(ps->pd, pd_new);
650
+
651
+ return 0;
652
+}
653
+
654
+static int padata_replace(struct padata_instance *pinst)
655
+{
656
+ struct padata_shell *ps;
657
+ int err = 0;
482658
483659 pinst->flags |= PADATA_RESET;
484660
485
- rcu_assign_pointer(pinst->pd, pd_new);
661
+ list_for_each_entry(ps, &pinst->pslist, list) {
662
+ err = padata_replace_one(ps);
663
+ if (err)
664
+ break;
665
+ }
486666
487667 synchronize_rcu();
488668
489
- if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
490
- notification_mask |= PADATA_CPU_PARALLEL;
491
- if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
492
- notification_mask |= PADATA_CPU_SERIAL;
493
-
494
- if (atomic_dec_and_test(&pd_old->refcnt))
495
- padata_free_pd(pd_old);
496
-
497
- if (notification_mask)
498
- blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
499
- notification_mask,
500
- &pd_new->cpumask);
669
+ list_for_each_entry_continue_reverse(ps, &pinst->pslist, list)
670
+ if (atomic_dec_and_test(&ps->opd->refcnt))
671
+ padata_free_pd(ps->opd);
501672
502673 pinst->flags &= ~PADATA_RESET;
503
-}
504674
505
-/**
506
- * padata_register_cpumask_notifier - Registers a notifier that will be called
507
- * if either pcpu or cbcpu or both cpumasks change.
508
- *
509
- * @pinst: A poineter to padata instance
510
- * @nblock: A pointer to notifier block.
511
- */
512
-int padata_register_cpumask_notifier(struct padata_instance *pinst,
513
- struct notifier_block *nblock)
514
-{
515
- return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
516
- nblock);
675
+ return err;
517676 }
518
-EXPORT_SYMBOL(padata_register_cpumask_notifier);
519
-
520
-/**
521
- * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
522
- * registered earlier using padata_register_cpumask_notifier
523
- *
524
- * @pinst: A pointer to data instance.
525
- * @nlock: A pointer to notifier block.
526
- */
527
-int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
528
- struct notifier_block *nblock)
529
-{
530
- return blocking_notifier_chain_unregister(
531
- &pinst->cpumask_change_notifier,
532
- nblock);
533
-}
534
-EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
535
-
536677
537678 /* If cpumask contains no active cpu, we mark the instance as invalid. */
538679 static bool padata_validate_cpumask(struct padata_instance *pinst,
....@@ -552,7 +693,7 @@
552693 cpumask_var_t cbcpumask)
553694 {
554695 int valid;
555
- struct parallel_data *pd;
696
+ int err;
556697
557698 valid = padata_validate_cpumask(pinst, pcpumask);
558699 if (!valid) {
....@@ -565,29 +706,26 @@
565706 __padata_stop(pinst);
566707
567708 out_replace:
568
- pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
569
- if (!pd)
570
- return -ENOMEM;
571
-
572709 cpumask_copy(pinst->cpumask.pcpu, pcpumask);
573710 cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
574711
575
- padata_replace(pinst, pd);
712
+ err = padata_setup_cpumasks(pinst) ?: padata_replace(pinst);
576713
577714 if (valid)
578715 __padata_start(pinst);
579716
580
- return 0;
717
+ return err;
581718 }
582719
583720 /**
584
- * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
585
- * equivalent to @cpumask.
586
- *
721
+ * padata_set_cpumask - Sets specified by @cpumask_type cpumask to the value
722
+ * equivalent to @cpumask.
587723 * @pinst: padata instance
588724 * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
589725 * to parallel and serial cpumasks respectively.
590726 * @cpumask: the cpumask to use
727
+ *
728
+ * Return: 0 on success or negative error code
591729 */
592730 int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
593731 cpumask_var_t cpumask)
....@@ -621,121 +759,37 @@
621759 }
622760 EXPORT_SYMBOL(padata_set_cpumask);
623761
624
-/**
625
- * padata_start - start the parallel processing
626
- *
627
- * @pinst: padata instance to start
628
- */
629
-int padata_start(struct padata_instance *pinst)
630
-{
631
- int err = 0;
632
-
633
- mutex_lock(&pinst->lock);
634
-
635
- if (pinst->flags & PADATA_INVALID)
636
- err = -EINVAL;
637
-
638
- __padata_start(pinst);
639
-
640
- mutex_unlock(&pinst->lock);
641
-
642
- return err;
643
-}
644
-EXPORT_SYMBOL(padata_start);
645
-
646
-/**
647
- * padata_stop - stop the parallel processing
648
- *
649
- * @pinst: padata instance to stop
650
- */
651
-void padata_stop(struct padata_instance *pinst)
652
-{
653
- mutex_lock(&pinst->lock);
654
- __padata_stop(pinst);
655
- mutex_unlock(&pinst->lock);
656
-}
657
-EXPORT_SYMBOL(padata_stop);
658
-
659762 #ifdef CONFIG_HOTPLUG_CPU
660763
661764 static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
662765 {
663
- struct parallel_data *pd;
766
+ int err = 0;
664767
665768 if (cpumask_test_cpu(cpu, cpu_online_mask)) {
666
- pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
667
- pinst->cpumask.cbcpu);
668
- if (!pd)
669
- return -ENOMEM;
670
-
671
- padata_replace(pinst, pd);
769
+ err = padata_replace(pinst);
672770
673771 if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
674772 padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
675773 __padata_start(pinst);
676774 }
677775
678
- return 0;
776
+ return err;
679777 }
680778
681779 static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
682780 {
683
- struct parallel_data *pd = NULL;
781
+ int err = 0;
684782
685
- if (cpumask_test_cpu(cpu, cpu_online_mask)) {
686
-
783
+ if (!cpumask_test_cpu(cpu, cpu_online_mask)) {
687784 if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
688785 !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
689786 __padata_stop(pinst);
690787
691
- pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
692
- pinst->cpumask.cbcpu);
693
- if (!pd)
694
- return -ENOMEM;
695
-
696
- padata_replace(pinst, pd);
697
-
698
- cpumask_clear_cpu(cpu, pd->cpumask.cbcpu);
699
- cpumask_clear_cpu(cpu, pd->cpumask.pcpu);
788
+ err = padata_replace(pinst);
700789 }
701
-
702
- return 0;
703
-}
704
-
705
- /**
706
- * padata_remove_cpu - remove a cpu from the one or both(serial and parallel)
707
- * padata cpumasks.
708
- *
709
- * @pinst: padata instance
710
- * @cpu: cpu to remove
711
- * @mask: bitmask specifying from which cpumask @cpu should be removed
712
- * The @mask may be any combination of the following flags:
713
- * PADATA_CPU_SERIAL - serial cpumask
714
- * PADATA_CPU_PARALLEL - parallel cpumask
715
- */
716
-int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask)
717
-{
718
- int err;
719
-
720
- if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
721
- return -EINVAL;
722
-
723
- mutex_lock(&pinst->lock);
724
-
725
- get_online_cpus();
726
- if (mask & PADATA_CPU_SERIAL)
727
- cpumask_clear_cpu(cpu, pinst->cpumask.cbcpu);
728
- if (mask & PADATA_CPU_PARALLEL)
729
- cpumask_clear_cpu(cpu, pinst->cpumask.pcpu);
730
-
731
- err = __padata_remove_cpu(pinst, cpu);
732
- put_online_cpus();
733
-
734
- mutex_unlock(&pinst->lock);
735790
736791 return err;
737792 }
738
-EXPORT_SYMBOL(padata_remove_cpu);
739793
740794 static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
741795 {
....@@ -748,7 +802,7 @@
748802 struct padata_instance *pinst;
749803 int ret;
750804
751
- pinst = hlist_entry_safe(node, struct padata_instance, node);
805
+ pinst = hlist_entry_safe(node, struct padata_instance, cpu_online_node);
752806 if (!pinst_has_cpu(pinst, cpu))
753807 return 0;
754808
....@@ -758,12 +812,12 @@
758812 return ret;
759813 }
760814
761
-static int padata_cpu_prep_down(unsigned int cpu, struct hlist_node *node)
815
+static int padata_cpu_dead(unsigned int cpu, struct hlist_node *node)
762816 {
763817 struct padata_instance *pinst;
764818 int ret;
765819
766
- pinst = hlist_entry_safe(node, struct padata_instance, node);
820
+ pinst = hlist_entry_safe(node, struct padata_instance, cpu_dead_node);
767821 if (!pinst_has_cpu(pinst, cpu))
768822 return 0;
769823
....@@ -779,13 +833,17 @@
779833 static void __padata_free(struct padata_instance *pinst)
780834 {
781835 #ifdef CONFIG_HOTPLUG_CPU
782
- cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
836
+ cpuhp_state_remove_instance_nocalls(CPUHP_PADATA_DEAD,
837
+ &pinst->cpu_dead_node);
838
+ cpuhp_state_remove_instance_nocalls(hp_online, &pinst->cpu_online_node);
783839 #endif
784840
785
- padata_stop(pinst);
786
- padata_free_pd(pinst->pd);
841
+ WARN_ON(!list_empty(&pinst->pslist));
842
+
787843 free_cpumask_var(pinst->cpumask.pcpu);
788844 free_cpumask_var(pinst->cpumask.cbcpu);
845
+ destroy_workqueue(pinst->serial_wq);
846
+ destroy_workqueue(pinst->parallel_wq);
789847 kfree(pinst);
790848 }
791849
....@@ -872,6 +930,7 @@
872930 &parallel_cpumask_attr.attr,
873931 NULL,
874932 };
933
+ATTRIBUTE_GROUPS(padata_default);
875934
876935 static ssize_t padata_sysfs_show(struct kobject *kobj,
877936 struct attribute *attr, char *buf)
....@@ -910,92 +969,86 @@
910969
911970 static struct kobj_type padata_attr_type = {
912971 .sysfs_ops = &padata_sysfs_ops,
913
- .default_attrs = padata_default_attrs,
972
+ .default_groups = padata_default_groups,
914973 .release = padata_sysfs_release,
915974 };
916975
917976 /**
918
- * padata_alloc - allocate and initialize a padata instance and specify
919
- * cpumasks for serial and parallel workers.
977
+ * padata_alloc - allocate and initialize a padata instance
978
+ * @name: used to identify the instance
920979 *
921
- * @wq: workqueue to use for the allocated padata instance
922
- * @pcpumask: cpumask that will be used for padata parallelization
923
- * @cbcpumask: cpumask that will be used for padata serialization
924
- *
925
- * Must be called from a cpus_read_lock() protected region
980
+ * Return: new instance on success, NULL on error
926981 */
927
-static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
928
- const struct cpumask *pcpumask,
929
- const struct cpumask *cbcpumask)
982
+struct padata_instance *padata_alloc(const char *name)
930983 {
931984 struct padata_instance *pinst;
932
- struct parallel_data *pd = NULL;
933985
934986 pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
935987 if (!pinst)
936988 goto err;
937989
938
- if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
990
+ pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_UNBOUND, 0,
991
+ name);
992
+ if (!pinst->parallel_wq)
939993 goto err_free_inst;
994
+
995
+ get_online_cpus();
996
+
997
+ pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM |
998
+ WQ_CPU_INTENSIVE, 1, name);
999
+ if (!pinst->serial_wq)
1000
+ goto err_put_cpus;
1001
+
1002
+ if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
1003
+ goto err_free_serial_wq;
9401004 if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
9411005 free_cpumask_var(pinst->cpumask.pcpu);
942
- goto err_free_inst;
1006
+ goto err_free_serial_wq;
9431007 }
944
- if (!padata_validate_cpumask(pinst, pcpumask) ||
945
- !padata_validate_cpumask(pinst, cbcpumask))
1008
+
1009
+ INIT_LIST_HEAD(&pinst->pslist);
1010
+
1011
+ cpumask_copy(pinst->cpumask.pcpu, cpu_possible_mask);
1012
+ cpumask_copy(pinst->cpumask.cbcpu, cpu_possible_mask);
1013
+
1014
+ if (padata_setup_cpumasks(pinst))
9461015 goto err_free_masks;
9471016
948
- pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
949
- if (!pd)
950
- goto err_free_masks;
1017
+ __padata_start(pinst);
9511018
952
- rcu_assign_pointer(pinst->pd, pd);
953
-
954
- pinst->wq = wq;
955
-
956
- cpumask_copy(pinst->cpumask.pcpu, pcpumask);
957
- cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
958
-
959
- pinst->flags = 0;
960
-
961
- BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
9621019 kobject_init(&pinst->kobj, &padata_attr_type);
9631020 mutex_init(&pinst->lock);
9641021
9651022 #ifdef CONFIG_HOTPLUG_CPU
966
- cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
1023
+ cpuhp_state_add_instance_nocalls_cpuslocked(hp_online,
1024
+ &pinst->cpu_online_node);
1025
+ cpuhp_state_add_instance_nocalls_cpuslocked(CPUHP_PADATA_DEAD,
1026
+ &pinst->cpu_dead_node);
9671027 #endif
1028
+
1029
+ put_online_cpus();
1030
+
9681031 return pinst;
9691032
9701033 err_free_masks:
9711034 free_cpumask_var(pinst->cpumask.pcpu);
9721035 free_cpumask_var(pinst->cpumask.cbcpu);
1036
+err_free_serial_wq:
1037
+ destroy_workqueue(pinst->serial_wq);
1038
+err_put_cpus:
1039
+ put_online_cpus();
1040
+ destroy_workqueue(pinst->parallel_wq);
9731041 err_free_inst:
9741042 kfree(pinst);
9751043 err:
9761044 return NULL;
9771045 }
978
-
979
-/**
980
- * padata_alloc_possible - Allocate and initialize padata instance.
981
- * Use the cpu_possible_mask for serial and
982
- * parallel workers.
983
- *
984
- * @wq: workqueue to use for the allocated padata instance
985
- *
986
- * Must be called from a cpus_read_lock() protected region
987
- */
988
-struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
989
-{
990
- lockdep_assert_cpus_held();
991
- return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
992
-}
993
-EXPORT_SYMBOL(padata_alloc_possible);
1046
+EXPORT_SYMBOL(padata_alloc);
9941047
9951048 /**
9961049 * padata_free - free a padata instance
9971050 *
998
- * @padata_inst: padata instance to free
1051
+ * @pinst: padata instance to free
9991052 */
10001053 void padata_free(struct padata_instance *pinst)
10011054 {
....@@ -1003,25 +1056,99 @@
10031056 }
10041057 EXPORT_SYMBOL(padata_free);
10051058
1006
-#ifdef CONFIG_HOTPLUG_CPU
1007
-
1008
-static __init int padata_driver_init(void)
1059
+/**
1060
+ * padata_alloc_shell - Allocate and initialize padata shell.
1061
+ *
1062
+ * @pinst: Parent padata_instance object.
1063
+ *
1064
+ * Return: new shell on success, NULL on error
1065
+ */
1066
+struct padata_shell *padata_alloc_shell(struct padata_instance *pinst)
10091067 {
1068
+ struct parallel_data *pd;
1069
+ struct padata_shell *ps;
1070
+
1071
+ ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1072
+ if (!ps)
1073
+ goto out;
1074
+
1075
+ ps->pinst = pinst;
1076
+
1077
+ get_online_cpus();
1078
+ pd = padata_alloc_pd(ps);
1079
+ put_online_cpus();
1080
+
1081
+ if (!pd)
1082
+ goto out_free_ps;
1083
+
1084
+ mutex_lock(&pinst->lock);
1085
+ RCU_INIT_POINTER(ps->pd, pd);
1086
+ list_add(&ps->list, &pinst->pslist);
1087
+ mutex_unlock(&pinst->lock);
1088
+
1089
+ return ps;
1090
+
1091
+out_free_ps:
1092
+ kfree(ps);
1093
+out:
1094
+ return NULL;
1095
+}
1096
+EXPORT_SYMBOL(padata_alloc_shell);
1097
+
1098
+/**
1099
+ * padata_free_shell - free a padata shell
1100
+ *
1101
+ * @ps: padata shell to free
1102
+ */
1103
+void padata_free_shell(struct padata_shell *ps)
1104
+{
1105
+ if (!ps)
1106
+ return;
1107
+
1108
+ mutex_lock(&ps->pinst->lock);
1109
+ list_del(&ps->list);
1110
+ padata_free_pd(rcu_dereference_protected(ps->pd, 1));
1111
+ mutex_unlock(&ps->pinst->lock);
1112
+
1113
+ kfree(ps);
1114
+}
1115
+EXPORT_SYMBOL(padata_free_shell);
1116
+
1117
+void __init padata_init(void)
1118
+{
1119
+ unsigned int i, possible_cpus;
1120
+#ifdef CONFIG_HOTPLUG_CPU
10101121 int ret;
10111122
10121123 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "padata:online",
1013
- padata_cpu_online,
1014
- padata_cpu_prep_down);
1124
+ padata_cpu_online, NULL);
10151125 if (ret < 0)
1016
- return ret;
1126
+ goto err;
10171127 hp_online = ret;
1018
- return 0;
1019
-}
1020
-module_init(padata_driver_init);
10211128
1022
-static __exit void padata_driver_exit(void)
1023
-{
1024
- cpuhp_remove_multi_state(hp_online);
1025
-}
1026
-module_exit(padata_driver_exit);
1129
+ ret = cpuhp_setup_state_multi(CPUHP_PADATA_DEAD, "padata:dead",
1130
+ NULL, padata_cpu_dead);
1131
+ if (ret < 0)
1132
+ goto remove_online_state;
10271133 #endif
1134
+
1135
+ possible_cpus = num_possible_cpus();
1136
+ padata_works = kmalloc_array(possible_cpus, sizeof(struct padata_work),
1137
+ GFP_KERNEL);
1138
+ if (!padata_works)
1139
+ goto remove_dead_state;
1140
+
1141
+ for (i = 0; i < possible_cpus; ++i)
1142
+ list_add(&padata_works[i].pw_list, &padata_free_works);
1143
+
1144
+ return;
1145
+
1146
+remove_dead_state:
1147
+#ifdef CONFIG_HOTPLUG_CPU
1148
+ cpuhp_remove_multi_state(CPUHP_PADATA_DEAD);
1149
+remove_online_state:
1150
+ cpuhp_remove_multi_state(hp_online);
1151
+err:
1152
+#endif
1153
+ pr_warn("padata: initialization failed\n");
1154
+}