hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/block/blk-mq-tag.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0
12 /*
23 * Tag allocation using scalable bitmaps. Uses active queue tracking to support
34 * fairer distribution of tags between multiple submitters when a shared tag map
....@@ -9,17 +10,10 @@
910 #include <linux/module.h>
1011
1112 #include <linux/blk-mq.h>
13
+#include <linux/delay.h>
1214 #include "blk.h"
1315 #include "blk-mq.h"
1416 #include "blk-mq-tag.h"
15
-
16
-bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
17
-{
18
- if (!tags)
19
- return true;
20
-
21
- return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
22
-}
2317
2418 /*
2519 * If a previously inactive queue goes active, bump the active user count.
....@@ -29,9 +23,18 @@
2923 */
3024 bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
3125 {
32
- if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
33
- !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
34
- atomic_inc(&hctx->tags->active_queues);
26
+ if (blk_mq_is_sbitmap_shared(hctx->flags)) {
27
+ struct request_queue *q = hctx->queue;
28
+ struct blk_mq_tag_set *set = q->tag_set;
29
+
30
+ if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
31
+ !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
32
+ atomic_inc(&set->active_queues_shared_sbitmap);
33
+ } else {
34
+ if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
35
+ !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
36
+ atomic_inc(&hctx->tags->active_queues);
37
+ }
3538
3639 return true;
3740 }
....@@ -41,9 +44,9 @@
4144 */
4245 void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
4346 {
44
- sbitmap_queue_wake_all(&tags->bitmap_tags);
47
+ sbitmap_queue_wake_all(tags->bitmap_tags);
4548 if (include_reserve)
46
- sbitmap_queue_wake_all(&tags->breserved_tags);
49
+ sbitmap_queue_wake_all(tags->breserved_tags);
4750 }
4851
4952 /*
....@@ -53,52 +56,30 @@
5356 void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
5457 {
5558 struct blk_mq_tags *tags = hctx->tags;
59
+ struct request_queue *q = hctx->queue;
60
+ struct blk_mq_tag_set *set = q->tag_set;
5661
57
- if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
58
- return;
59
-
60
- atomic_dec(&tags->active_queues);
62
+ if (blk_mq_is_sbitmap_shared(hctx->flags)) {
63
+ if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
64
+ &q->queue_flags))
65
+ return;
66
+ atomic_dec(&set->active_queues_shared_sbitmap);
67
+ } else {
68
+ if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
69
+ return;
70
+ atomic_dec(&tags->active_queues);
71
+ }
6172
6273 blk_mq_tag_wakeup_all(tags, false);
63
-}
64
-
65
-/*
66
- * For shared tag users, we track the number of currently active users
67
- * and attempt to provide a fair share of the tag depth for each of them.
68
- */
69
-static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
70
- struct sbitmap_queue *bt)
71
-{
72
- unsigned int depth, users;
73
-
74
- if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
75
- return true;
76
- if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
77
- return true;
78
-
79
- /*
80
- * Don't try dividing an ant
81
- */
82
- if (bt->sb.depth == 1)
83
- return true;
84
-
85
- users = atomic_read(&hctx->tags->active_queues);
86
- if (!users)
87
- return true;
88
-
89
- /*
90
- * Allow at least some tags
91
- */
92
- depth = max((bt->sb.depth + users - 1) / users, 4U);
93
- return atomic_read(&hctx->nr_active) < depth;
9474 }
9575
9676 static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
9777 struct sbitmap_queue *bt)
9878 {
99
- if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
100
- !hctx_may_queue(data->hctx, bt))
101
- return -1;
79
+ if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) &&
80
+ !hctx_may_queue(data->hctx, bt))
81
+ return BLK_MQ_NO_TAG;
82
+
10283 if (data->shallow_depth)
10384 return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
10485 else
....@@ -110,32 +91,30 @@
11091 struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
11192 struct sbitmap_queue *bt;
11293 struct sbq_wait_state *ws;
113
- DEFINE_WAIT(wait);
94
+ DEFINE_SBQ_WAIT(wait);
11495 unsigned int tag_offset;
115
- bool drop_ctx;
11696 int tag;
11797
11898 if (data->flags & BLK_MQ_REQ_RESERVED) {
11999 if (unlikely(!tags->nr_reserved_tags)) {
120100 WARN_ON_ONCE(1);
121
- return BLK_MQ_TAG_FAIL;
101
+ return BLK_MQ_NO_TAG;
122102 }
123
- bt = &tags->breserved_tags;
103
+ bt = tags->breserved_tags;
124104 tag_offset = 0;
125105 } else {
126
- bt = &tags->bitmap_tags;
106
+ bt = tags->bitmap_tags;
127107 tag_offset = tags->nr_reserved_tags;
128108 }
129109
130110 tag = __blk_mq_get_tag(data, bt);
131
- if (tag != -1)
111
+ if (tag != BLK_MQ_NO_TAG)
132112 goto found_tag;
133113
134114 if (data->flags & BLK_MQ_REQ_NOWAIT)
135
- return BLK_MQ_TAG_FAIL;
115
+ return BLK_MQ_NO_TAG;
136116
137117 ws = bt_wait_ptr(bt, data->hctx);
138
- drop_ctx = data->ctx == NULL;
139118 do {
140119 struct sbitmap_queue *bt_prev;
141120
....@@ -151,31 +130,28 @@
151130 * as running the queue may also have found completions.
152131 */
153132 tag = __blk_mq_get_tag(data, bt);
154
- if (tag != -1)
133
+ if (tag != BLK_MQ_NO_TAG)
155134 break;
156135
157
- prepare_to_wait_exclusive(&ws->wait, &wait,
158
- TASK_UNINTERRUPTIBLE);
136
+ sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
159137
160138 tag = __blk_mq_get_tag(data, bt);
161
- if (tag != -1)
139
+ if (tag != BLK_MQ_NO_TAG)
162140 break;
163
-
164
- if (data->ctx)
165
- blk_mq_put_ctx(data->ctx);
166141
167142 bt_prev = bt;
168143 io_schedule();
169144
145
+ sbitmap_finish_wait(bt, ws, &wait);
146
+
170147 data->ctx = blk_mq_get_ctx(data->q);
171
- data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
148
+ data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
149
+ data->ctx);
172150 tags = blk_mq_tags_from_data(data);
173151 if (data->flags & BLK_MQ_REQ_RESERVED)
174
- bt = &tags->breserved_tags;
152
+ bt = tags->breserved_tags;
175153 else
176
- bt = &tags->bitmap_tags;
177
-
178
- finish_wait(&ws->wait, &wait);
154
+ bt = tags->bitmap_tags;
179155
180156 /*
181157 * If destination hw queue is changed, fake wake up on
....@@ -188,26 +164,31 @@
188164 ws = bt_wait_ptr(bt, data->hctx);
189165 } while (1);
190166
191
- if (drop_ctx && data->ctx)
192
- blk_mq_put_ctx(data->ctx);
193
-
194
- finish_wait(&ws->wait, &wait);
167
+ sbitmap_finish_wait(bt, ws, &wait);
195168
196169 found_tag:
170
+ /*
171
+ * Give up this allocation if the hctx is inactive. The caller will
172
+ * retry on an active hctx.
173
+ */
174
+ if (unlikely(test_bit(BLK_MQ_S_INACTIVE, &data->hctx->state))) {
175
+ blk_mq_put_tag(tags, data->ctx, tag + tag_offset);
176
+ return BLK_MQ_NO_TAG;
177
+ }
197178 return tag + tag_offset;
198179 }
199180
200
-void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
201
- struct blk_mq_ctx *ctx, unsigned int tag)
181
+void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
182
+ unsigned int tag)
202183 {
203184 if (!blk_mq_tag_is_reserved(tags, tag)) {
204185 const int real_tag = tag - tags->nr_reserved_tags;
205186
206187 BUG_ON(real_tag >= tags->nr_tags);
207
- sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
188
+ sbitmap_queue_clear(tags->bitmap_tags, real_tag, ctx->cpu);
208189 } else {
209190 BUG_ON(tag >= tags->nr_reserved_tags);
210
- sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
191
+ sbitmap_queue_clear(tags->breserved_tags, tag, ctx->cpu);
211192 }
212193 }
213194
....@@ -218,6 +199,20 @@
218199 bool reserved;
219200 };
220201
202
+static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
203
+ unsigned int bitnr)
204
+{
205
+ struct request *rq;
206
+ unsigned long flags;
207
+
208
+ spin_lock_irqsave(&tags->lock, flags);
209
+ rq = tags->rqs[bitnr];
210
+ if (!rq || rq->tag != bitnr || !refcount_inc_not_zero(&rq->ref))
211
+ rq = NULL;
212
+ spin_unlock_irqrestore(&tags->lock, flags);
213
+ return rq;
214
+}
215
+
221216 static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
222217 {
223218 struct bt_iter_data *iter_data = data;
....@@ -225,20 +220,38 @@
225220 struct blk_mq_tags *tags = hctx->tags;
226221 bool reserved = iter_data->reserved;
227222 struct request *rq;
223
+ bool ret = true;
228224
229225 if (!reserved)
230226 bitnr += tags->nr_reserved_tags;
231
- rq = tags->rqs[bitnr];
232
-
233227 /*
234228 * We can hit rq == NULL here, because the tagging functions
235
- * test and set the bit before assining ->rqs[].
229
+ * test and set the bit before assigning ->rqs[].
236230 */
237
- if (rq && rq->q == hctx->queue)
238
- iter_data->fn(hctx, rq, iter_data->data, reserved);
239
- return true;
231
+ rq = blk_mq_find_and_get_req(tags, bitnr);
232
+ if (!rq)
233
+ return true;
234
+
235
+ if (rq->q == hctx->queue && rq->mq_hctx == hctx)
236
+ ret = iter_data->fn(hctx, rq, iter_data->data, reserved);
237
+ blk_mq_put_rq_ref(rq);
238
+ return ret;
240239 }
241240
241
+/**
242
+ * bt_for_each - iterate over the requests associated with a hardware queue
243
+ * @hctx: Hardware queue to examine.
244
+ * @bt: sbitmap to examine. This is either the breserved_tags member
245
+ * or the bitmap_tags member of struct blk_mq_tags.
246
+ * @fn: Pointer to the function that will be called for each request
247
+ * associated with @hctx that has been assigned a driver tag.
248
+ * @fn will be called as follows: @fn(@hctx, rq, @data, @reserved)
249
+ * where rq is a pointer to a request. Return true to continue
250
+ * iterating tags, false to stop.
251
+ * @data: Will be passed as third argument to @fn.
252
+ * @reserved: Indicates whether @bt is the breserved_tags member or the
253
+ * bitmap_tags member of struct blk_mq_tags.
254
+ */
242255 static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
243256 busy_iter_fn *fn, void *data, bool reserved)
244257 {
....@@ -256,52 +269,112 @@
256269 struct blk_mq_tags *tags;
257270 busy_tag_iter_fn *fn;
258271 void *data;
259
- bool reserved;
272
+ unsigned int flags;
260273 };
274
+
275
+#define BT_TAG_ITER_RESERVED (1 << 0)
276
+#define BT_TAG_ITER_STARTED (1 << 1)
277
+#define BT_TAG_ITER_STATIC_RQS (1 << 2)
261278
262279 static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
263280 {
264281 struct bt_tags_iter_data *iter_data = data;
265282 struct blk_mq_tags *tags = iter_data->tags;
266
- bool reserved = iter_data->reserved;
283
+ bool reserved = iter_data->flags & BT_TAG_ITER_RESERVED;
267284 struct request *rq;
285
+ bool ret = true;
286
+ bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS);
268287
269288 if (!reserved)
270289 bitnr += tags->nr_reserved_tags;
271290
272291 /*
273292 * We can hit rq == NULL here, because the tagging functions
274
- * test and set the bit before assining ->rqs[].
293
+ * test and set the bit before assigning ->rqs[].
275294 */
276
- rq = tags->rqs[bitnr];
277
- if (rq && blk_mq_request_started(rq))
278
- iter_data->fn(rq, iter_data->data, reserved);
295
+ if (iter_static_rqs)
296
+ rq = tags->static_rqs[bitnr];
297
+ else
298
+ rq = blk_mq_find_and_get_req(tags, bitnr);
299
+ if (!rq)
300
+ return true;
279301
280
- return true;
302
+ if (!(iter_data->flags & BT_TAG_ITER_STARTED) ||
303
+ blk_mq_request_started(rq))
304
+ ret = iter_data->fn(rq, iter_data->data, reserved);
305
+ if (!iter_static_rqs)
306
+ blk_mq_put_rq_ref(rq);
307
+ return ret;
281308 }
282309
310
+/**
311
+ * bt_tags_for_each - iterate over the requests in a tag map
312
+ * @tags: Tag map to iterate over.
313
+ * @bt: sbitmap to examine. This is either the breserved_tags member
314
+ * or the bitmap_tags member of struct blk_mq_tags.
315
+ * @fn: Pointer to the function that will be called for each started
316
+ * request. @fn will be called as follows: @fn(rq, @data,
317
+ * @reserved) where rq is a pointer to a request. Return true
318
+ * to continue iterating tags, false to stop.
319
+ * @data: Will be passed as second argument to @fn.
320
+ * @flags: BT_TAG_ITER_*
321
+ */
283322 static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
284
- busy_tag_iter_fn *fn, void *data, bool reserved)
323
+ busy_tag_iter_fn *fn, void *data, unsigned int flags)
285324 {
286325 struct bt_tags_iter_data iter_data = {
287326 .tags = tags,
288327 .fn = fn,
289328 .data = data,
290
- .reserved = reserved,
329
+ .flags = flags,
291330 };
292331
293332 if (tags->rqs)
294333 sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
295334 }
296335
297
-static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
298
- busy_tag_iter_fn *fn, void *priv)
336
+static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
337
+ busy_tag_iter_fn *fn, void *priv, unsigned int flags)
299338 {
339
+ WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED);
340
+
300341 if (tags->nr_reserved_tags)
301
- bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true);
302
- bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
342
+ bt_tags_for_each(tags, tags->breserved_tags, fn, priv,
343
+ flags | BT_TAG_ITER_RESERVED);
344
+ bt_tags_for_each(tags, tags->bitmap_tags, fn, priv, flags);
303345 }
304346
347
+/**
348
+ * blk_mq_all_tag_iter - iterate over all requests in a tag map
349
+ * @tags: Tag map to iterate over.
350
+ * @fn: Pointer to the function that will be called for each
351
+ * request. @fn will be called as follows: @fn(rq, @priv,
352
+ * reserved) where rq is a pointer to a request. 'reserved'
353
+ * indicates whether or not @rq is a reserved request. Return
354
+ * true to continue iterating tags, false to stop.
355
+ * @priv: Will be passed as second argument to @fn.
356
+ *
357
+ * Caller has to pass the tag map from which requests are allocated.
358
+ */
359
+void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
360
+ void *priv)
361
+{
362
+ __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS);
363
+}
364
+
365
+/**
366
+ * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set
367
+ * @tagset: Tag set to iterate over.
368
+ * @fn: Pointer to the function that will be called for each started
369
+ * request. @fn will be called as follows: @fn(rq, @priv,
370
+ * reserved) where rq is a pointer to a request. 'reserved'
371
+ * indicates whether or not @rq is a reserved request. Return
372
+ * true to continue iterating tags, false to stop.
373
+ * @priv: Will be passed as second argument to @fn.
374
+ *
375
+ * We grab one request reference before calling @fn and release it after
376
+ * @fn returns.
377
+ */
305378 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
306379 busy_tag_iter_fn *fn, void *priv)
307380 {
....@@ -309,11 +382,57 @@
309382
310383 for (i = 0; i < tagset->nr_hw_queues; i++) {
311384 if (tagset->tags && tagset->tags[i])
312
- blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
385
+ __blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
386
+ BT_TAG_ITER_STARTED);
313387 }
314388 }
315389 EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
316390
391
+static bool blk_mq_tagset_count_completed_rqs(struct request *rq,
392
+ void *data, bool reserved)
393
+{
394
+ unsigned *count = data;
395
+
396
+ if (blk_mq_request_completed(rq))
397
+ (*count)++;
398
+ return true;
399
+}
400
+
401
+/**
402
+ * blk_mq_tagset_wait_completed_request - wait until all completed req's
403
+ * complete funtion is run
404
+ * @tagset: Tag set to drain completed request
405
+ *
406
+ * Note: This function has to be run after all IO queues are shutdown
407
+ */
408
+void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset)
409
+{
410
+ while (true) {
411
+ unsigned count = 0;
412
+
413
+ blk_mq_tagset_busy_iter(tagset,
414
+ blk_mq_tagset_count_completed_rqs, &count);
415
+ if (!count)
416
+ break;
417
+ msleep(5);
418
+ }
419
+}
420
+EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
421
+
422
+/**
423
+ * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
424
+ * @q: Request queue to examine.
425
+ * @fn: Pointer to the function that will be called for each request
426
+ * on @q. @fn will be called as follows: @fn(hctx, rq, @priv,
427
+ * reserved) where rq is a pointer to a request and hctx points
428
+ * to the hardware queue associated with the request. 'reserved'
429
+ * indicates whether or not @rq is a reserved request.
430
+ * @priv: Will be passed as third argument to @fn.
431
+ *
432
+ * Note: if @q->tag_set is shared with other request queues then @fn will be
433
+ * called for all requests on all queues that share that tag set and not only
434
+ * for requests associated with @q.
435
+ */
317436 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
318437 void *priv)
319438 {
....@@ -321,9 +440,9 @@
321440 int i;
322441
323442 /*
324
- * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and
325
- * queue_hw_ctx after freeze the queue, so we use q_usage_counter
326
- * to avoid race with it.
443
+ * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
444
+ * while the queue is frozen. So we can use q_usage_counter to avoid
445
+ * racing with it.
327446 */
328447 if (!percpu_ref_tryget(&q->q_usage_counter))
329448 return;
....@@ -332,15 +451,15 @@
332451 struct blk_mq_tags *tags = hctx->tags;
333452
334453 /*
335
- * If not software queues are currently mapped to this
454
+ * If no software queues are currently mapped to this
336455 * hardware queue, there's nothing to check
337456 */
338457 if (!blk_mq_hw_queue_mapped(hctx))
339458 continue;
340459
341460 if (tags->nr_reserved_tags)
342
- bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
343
- bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
461
+ bt_for_each(hctx, tags->breserved_tags, fn, priv, true);
462
+ bt_for_each(hctx, tags->bitmap_tags, fn, priv, false);
344463 }
345464 blk_queue_exit(q);
346465 }
....@@ -352,30 +471,64 @@
352471 node);
353472 }
354473
355
-static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
356
- int node, int alloc_policy)
474
+static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
475
+ int node, int alloc_policy)
357476 {
358477 unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
359478 bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
360479
361
- if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
362
- goto free_tags;
363
- if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin,
364
- node))
480
+ if (bt_alloc(&tags->__bitmap_tags, depth, round_robin, node))
481
+ return -ENOMEM;
482
+ if (bt_alloc(&tags->__breserved_tags, tags->nr_reserved_tags,
483
+ round_robin, node))
365484 goto free_bitmap_tags;
366485
367
- return tags;
486
+ tags->bitmap_tags = &tags->__bitmap_tags;
487
+ tags->breserved_tags = &tags->__breserved_tags;
488
+
489
+ return 0;
368490 free_bitmap_tags:
369
- sbitmap_queue_free(&tags->bitmap_tags);
370
-free_tags:
371
- kfree(tags);
372
- return NULL;
491
+ sbitmap_queue_free(&tags->__bitmap_tags);
492
+ return -ENOMEM;
493
+}
494
+
495
+int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int flags)
496
+{
497
+ unsigned int depth = set->queue_depth - set->reserved_tags;
498
+ int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
499
+ bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
500
+ int i, node = set->numa_node;
501
+
502
+ if (bt_alloc(&set->__bitmap_tags, depth, round_robin, node))
503
+ return -ENOMEM;
504
+ if (bt_alloc(&set->__breserved_tags, set->reserved_tags,
505
+ round_robin, node))
506
+ goto free_bitmap_tags;
507
+
508
+ for (i = 0; i < set->nr_hw_queues; i++) {
509
+ struct blk_mq_tags *tags = set->tags[i];
510
+
511
+ tags->bitmap_tags = &set->__bitmap_tags;
512
+ tags->breserved_tags = &set->__breserved_tags;
513
+ }
514
+
515
+ return 0;
516
+free_bitmap_tags:
517
+ sbitmap_queue_free(&set->__bitmap_tags);
518
+ return -ENOMEM;
519
+}
520
+
521
+void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set)
522
+{
523
+ sbitmap_queue_free(&set->__bitmap_tags);
524
+ sbitmap_queue_free(&set->__breserved_tags);
373525 }
374526
375527 struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
376528 unsigned int reserved_tags,
377
- int node, int alloc_policy)
529
+ int node, unsigned int flags)
378530 {
531
+ int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(flags);
379532 struct blk_mq_tags *tags;
380533
381534 if (total_tags > BLK_MQ_TAG_MAX) {
....@@ -389,14 +542,24 @@
389542
390543 tags->nr_tags = total_tags;
391544 tags->nr_reserved_tags = reserved_tags;
545
+ spin_lock_init(&tags->lock);
392546
393
- return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
547
+ if (flags & BLK_MQ_F_TAG_HCTX_SHARED)
548
+ return tags;
549
+
550
+ if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) {
551
+ kfree(tags);
552
+ return NULL;
553
+ }
554
+ return tags;
394555 }
395556
396
-void blk_mq_free_tags(struct blk_mq_tags *tags)
557
+void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags)
397558 {
398
- sbitmap_queue_free(&tags->bitmap_tags);
399
- sbitmap_queue_free(&tags->breserved_tags);
559
+ if (!(flags & BLK_MQ_F_TAG_HCTX_SHARED)) {
560
+ sbitmap_queue_free(tags->bitmap_tags);
561
+ sbitmap_queue_free(tags->breserved_tags);
562
+ }
400563 kfree(tags);
401564 }
402565
....@@ -415,6 +578,8 @@
415578 */
416579 if (tdepth > tags->nr_tags) {
417580 struct blk_mq_tag_set *set = hctx->queue->tag_set;
581
+ /* Only sched tags can grow, so clear HCTX_SHARED flag */
582
+ unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
418583 struct blk_mq_tags *new;
419584 bool ret;
420585
....@@ -429,28 +594,33 @@
429594 return -EINVAL;
430595
431596 new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth,
432
- tags->nr_reserved_tags);
597
+ tags->nr_reserved_tags, flags);
433598 if (!new)
434599 return -ENOMEM;
435600 ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
436601 if (ret) {
437
- blk_mq_free_rq_map(new);
602
+ blk_mq_free_rq_map(new, flags);
438603 return -ENOMEM;
439604 }
440605
441606 blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
442
- blk_mq_free_rq_map(*tagsptr);
607
+ blk_mq_free_rq_map(*tagsptr, flags);
443608 *tagsptr = new;
444609 } else {
445610 /*
446611 * Don't need (or can't) update reserved tags here, they
447612 * remain static and should never need resizing.
448613 */
449
- sbitmap_queue_resize(&tags->bitmap_tags,
614
+ sbitmap_queue_resize(tags->bitmap_tags,
450615 tdepth - tags->nr_reserved_tags);
451616 }
452617
453618 return 0;
619
+}
620
+
621
+void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int size)
622
+{
623
+ sbitmap_queue_resize(&set->__bitmap_tags, size - set->reserved_tags);
454624 }
455625
456626 /**
....@@ -467,16 +637,7 @@
467637 */
468638 u32 blk_mq_unique_tag(struct request *rq)
469639 {
470
- struct request_queue *q = rq->q;
471
- struct blk_mq_hw_ctx *hctx;
472
- int hwq = 0;
473
-
474
- if (q->mq_ops) {
475
- hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
476
- hwq = hctx->queue_num;
477
- }
478
-
479
- return (hwq << BLK_MQ_UNIQUE_TAG_BITS) |
640
+ return (rq->mq_hctx->queue_num << BLK_MQ_UNIQUE_TAG_BITS) |
480641 (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
481642 }
482643 EXPORT_SYMBOL(blk_mq_unique_tag);