.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
---|
1 | 2 | /* |
---|
2 | 3 | * Tag allocation using scalable bitmaps. Uses active queue tracking to support |
---|
3 | 4 | * fairer distribution of tags between multiple submitters when a shared tag map |
---|
.. | .. |
---|
9 | 10 | #include <linux/module.h> |
---|
10 | 11 | |
---|
11 | 12 | #include <linux/blk-mq.h> |
---|
| 13 | +#include <linux/delay.h> |
---|
12 | 14 | #include "blk.h" |
---|
13 | 15 | #include "blk-mq.h" |
---|
14 | 16 | #include "blk-mq-tag.h" |
---|
15 | | - |
---|
16 | | -bool blk_mq_has_free_tags(struct blk_mq_tags *tags) |
---|
17 | | -{ |
---|
18 | | - if (!tags) |
---|
19 | | - return true; |
---|
20 | | - |
---|
21 | | - return sbitmap_any_bit_clear(&tags->bitmap_tags.sb); |
---|
22 | | -} |
---|
23 | 17 | |
---|
24 | 18 | /* |
---|
25 | 19 | * If a previously inactive queue goes active, bump the active user count. |
---|
.. | .. |
---|
29 | 23 | */ |
---|
30 | 24 | bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) |
---|
31 | 25 | { |
---|
32 | | - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && |
---|
33 | | - !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) |
---|
34 | | - atomic_inc(&hctx->tags->active_queues); |
---|
| 26 | + if (blk_mq_is_sbitmap_shared(hctx->flags)) { |
---|
| 27 | + struct request_queue *q = hctx->queue; |
---|
| 28 | + struct blk_mq_tag_set *set = q->tag_set; |
---|
| 29 | + |
---|
| 30 | + if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) && |
---|
| 31 | + !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) |
---|
| 32 | + atomic_inc(&set->active_queues_shared_sbitmap); |
---|
| 33 | + } else { |
---|
| 34 | + if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && |
---|
| 35 | + !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) |
---|
| 36 | + atomic_inc(&hctx->tags->active_queues); |
---|
| 37 | + } |
---|
35 | 38 | |
---|
36 | 39 | return true; |
---|
37 | 40 | } |
---|
.. | .. |
---|
41 | 44 | */ |
---|
42 | 45 | void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) |
---|
43 | 46 | { |
---|
44 | | - sbitmap_queue_wake_all(&tags->bitmap_tags); |
---|
| 47 | + sbitmap_queue_wake_all(tags->bitmap_tags); |
---|
45 | 48 | if (include_reserve) |
---|
46 | | - sbitmap_queue_wake_all(&tags->breserved_tags); |
---|
| 49 | + sbitmap_queue_wake_all(tags->breserved_tags); |
---|
47 | 50 | } |
---|
48 | 51 | |
---|
49 | 52 | /* |
---|
.. | .. |
---|
53 | 56 | void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) |
---|
54 | 57 | { |
---|
55 | 58 | struct blk_mq_tags *tags = hctx->tags; |
---|
| 59 | + struct request_queue *q = hctx->queue; |
---|
| 60 | + struct blk_mq_tag_set *set = q->tag_set; |
---|
56 | 61 | |
---|
57 | | - if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) |
---|
58 | | - return; |
---|
59 | | - |
---|
60 | | - atomic_dec(&tags->active_queues); |
---|
| 62 | + if (blk_mq_is_sbitmap_shared(hctx->flags)) { |
---|
| 63 | + if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE, |
---|
| 64 | + &q->queue_flags)) |
---|
| 65 | + return; |
---|
| 66 | + atomic_dec(&set->active_queues_shared_sbitmap); |
---|
| 67 | + } else { |
---|
| 68 | + if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) |
---|
| 69 | + return; |
---|
| 70 | + atomic_dec(&tags->active_queues); |
---|
| 71 | + } |
---|
61 | 72 | |
---|
62 | 73 | blk_mq_tag_wakeup_all(tags, false); |
---|
63 | | -} |
---|
64 | | - |
---|
65 | | -/* |
---|
66 | | - * For shared tag users, we track the number of currently active users |
---|
67 | | - * and attempt to provide a fair share of the tag depth for each of them. |
---|
68 | | - */ |
---|
69 | | -static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, |
---|
70 | | - struct sbitmap_queue *bt) |
---|
71 | | -{ |
---|
72 | | - unsigned int depth, users; |
---|
73 | | - |
---|
74 | | - if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED)) |
---|
75 | | - return true; |
---|
76 | | - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) |
---|
77 | | - return true; |
---|
78 | | - |
---|
79 | | - /* |
---|
80 | | - * Don't try dividing an ant |
---|
81 | | - */ |
---|
82 | | - if (bt->sb.depth == 1) |
---|
83 | | - return true; |
---|
84 | | - |
---|
85 | | - users = atomic_read(&hctx->tags->active_queues); |
---|
86 | | - if (!users) |
---|
87 | | - return true; |
---|
88 | | - |
---|
89 | | - /* |
---|
90 | | - * Allow at least some tags |
---|
91 | | - */ |
---|
92 | | - depth = max((bt->sb.depth + users - 1) / users, 4U); |
---|
93 | | - return atomic_read(&hctx->nr_active) < depth; |
---|
94 | 74 | } |
---|
95 | 75 | |
---|
96 | 76 | static int __blk_mq_get_tag(struct blk_mq_alloc_data *data, |
---|
97 | 77 | struct sbitmap_queue *bt) |
---|
98 | 78 | { |
---|
99 | | - if (!(data->flags & BLK_MQ_REQ_INTERNAL) && |
---|
100 | | - !hctx_may_queue(data->hctx, bt)) |
---|
101 | | - return -1; |
---|
| 79 | + if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) && |
---|
| 80 | + !hctx_may_queue(data->hctx, bt)) |
---|
| 81 | + return BLK_MQ_NO_TAG; |
---|
| 82 | + |
---|
102 | 83 | if (data->shallow_depth) |
---|
103 | 84 | return __sbitmap_queue_get_shallow(bt, data->shallow_depth); |
---|
104 | 85 | else |
---|
.. | .. |
---|
110 | 91 | struct blk_mq_tags *tags = blk_mq_tags_from_data(data); |
---|
111 | 92 | struct sbitmap_queue *bt; |
---|
112 | 93 | struct sbq_wait_state *ws; |
---|
113 | | - DEFINE_WAIT(wait); |
---|
| 94 | + DEFINE_SBQ_WAIT(wait); |
---|
114 | 95 | unsigned int tag_offset; |
---|
115 | | - bool drop_ctx; |
---|
116 | 96 | int tag; |
---|
117 | 97 | |
---|
118 | 98 | if (data->flags & BLK_MQ_REQ_RESERVED) { |
---|
119 | 99 | if (unlikely(!tags->nr_reserved_tags)) { |
---|
120 | 100 | WARN_ON_ONCE(1); |
---|
121 | | - return BLK_MQ_TAG_FAIL; |
---|
| 101 | + return BLK_MQ_NO_TAG; |
---|
122 | 102 | } |
---|
123 | | - bt = &tags->breserved_tags; |
---|
| 103 | + bt = tags->breserved_tags; |
---|
124 | 104 | tag_offset = 0; |
---|
125 | 105 | } else { |
---|
126 | | - bt = &tags->bitmap_tags; |
---|
| 106 | + bt = tags->bitmap_tags; |
---|
127 | 107 | tag_offset = tags->nr_reserved_tags; |
---|
128 | 108 | } |
---|
129 | 109 | |
---|
130 | 110 | tag = __blk_mq_get_tag(data, bt); |
---|
131 | | - if (tag != -1) |
---|
| 111 | + if (tag != BLK_MQ_NO_TAG) |
---|
132 | 112 | goto found_tag; |
---|
133 | 113 | |
---|
134 | 114 | if (data->flags & BLK_MQ_REQ_NOWAIT) |
---|
135 | | - return BLK_MQ_TAG_FAIL; |
---|
| 115 | + return BLK_MQ_NO_TAG; |
---|
136 | 116 | |
---|
137 | 117 | ws = bt_wait_ptr(bt, data->hctx); |
---|
138 | | - drop_ctx = data->ctx == NULL; |
---|
139 | 118 | do { |
---|
140 | 119 | struct sbitmap_queue *bt_prev; |
---|
141 | 120 | |
---|
.. | .. |
---|
151 | 130 | * as running the queue may also have found completions. |
---|
152 | 131 | */ |
---|
153 | 132 | tag = __blk_mq_get_tag(data, bt); |
---|
154 | | - if (tag != -1) |
---|
| 133 | + if (tag != BLK_MQ_NO_TAG) |
---|
155 | 134 | break; |
---|
156 | 135 | |
---|
157 | | - prepare_to_wait_exclusive(&ws->wait, &wait, |
---|
158 | | - TASK_UNINTERRUPTIBLE); |
---|
| 136 | + sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE); |
---|
159 | 137 | |
---|
160 | 138 | tag = __blk_mq_get_tag(data, bt); |
---|
161 | | - if (tag != -1) |
---|
| 139 | + if (tag != BLK_MQ_NO_TAG) |
---|
162 | 140 | break; |
---|
163 | | - |
---|
164 | | - if (data->ctx) |
---|
165 | | - blk_mq_put_ctx(data->ctx); |
---|
166 | 141 | |
---|
167 | 142 | bt_prev = bt; |
---|
168 | 143 | io_schedule(); |
---|
169 | 144 | |
---|
| 145 | + sbitmap_finish_wait(bt, ws, &wait); |
---|
| 146 | + |
---|
170 | 147 | data->ctx = blk_mq_get_ctx(data->q); |
---|
171 | | - data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu); |
---|
| 148 | + data->hctx = blk_mq_map_queue(data->q, data->cmd_flags, |
---|
| 149 | + data->ctx); |
---|
172 | 150 | tags = blk_mq_tags_from_data(data); |
---|
173 | 151 | if (data->flags & BLK_MQ_REQ_RESERVED) |
---|
174 | | - bt = &tags->breserved_tags; |
---|
| 152 | + bt = tags->breserved_tags; |
---|
175 | 153 | else |
---|
176 | | - bt = &tags->bitmap_tags; |
---|
177 | | - |
---|
178 | | - finish_wait(&ws->wait, &wait); |
---|
| 154 | + bt = tags->bitmap_tags; |
---|
179 | 155 | |
---|
180 | 156 | /* |
---|
181 | 157 | * If destination hw queue is changed, fake wake up on |
---|
.. | .. |
---|
188 | 164 | ws = bt_wait_ptr(bt, data->hctx); |
---|
189 | 165 | } while (1); |
---|
190 | 166 | |
---|
191 | | - if (drop_ctx && data->ctx) |
---|
192 | | - blk_mq_put_ctx(data->ctx); |
---|
193 | | - |
---|
194 | | - finish_wait(&ws->wait, &wait); |
---|
| 167 | + sbitmap_finish_wait(bt, ws, &wait); |
---|
195 | 168 | |
---|
196 | 169 | found_tag: |
---|
| 170 | + /* |
---|
| 171 | + * Give up this allocation if the hctx is inactive. The caller will |
---|
| 172 | + * retry on an active hctx. |
---|
| 173 | + */ |
---|
| 174 | + if (unlikely(test_bit(BLK_MQ_S_INACTIVE, &data->hctx->state))) { |
---|
| 175 | + blk_mq_put_tag(tags, data->ctx, tag + tag_offset); |
---|
| 176 | + return BLK_MQ_NO_TAG; |
---|
| 177 | + } |
---|
197 | 178 | return tag + tag_offset; |
---|
198 | 179 | } |
---|
199 | 180 | |
---|
200 | | -void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, |
---|
201 | | - struct blk_mq_ctx *ctx, unsigned int tag) |
---|
| 181 | +void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, |
---|
| 182 | + unsigned int tag) |
---|
202 | 183 | { |
---|
203 | 184 | if (!blk_mq_tag_is_reserved(tags, tag)) { |
---|
204 | 185 | const int real_tag = tag - tags->nr_reserved_tags; |
---|
205 | 186 | |
---|
206 | 187 | BUG_ON(real_tag >= tags->nr_tags); |
---|
207 | | - sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu); |
---|
| 188 | + sbitmap_queue_clear(tags->bitmap_tags, real_tag, ctx->cpu); |
---|
208 | 189 | } else { |
---|
209 | 190 | BUG_ON(tag >= tags->nr_reserved_tags); |
---|
210 | | - sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu); |
---|
| 191 | + sbitmap_queue_clear(tags->breserved_tags, tag, ctx->cpu); |
---|
211 | 192 | } |
---|
212 | 193 | } |
---|
213 | 194 | |
---|
.. | .. |
---|
218 | 199 | bool reserved; |
---|
219 | 200 | }; |
---|
220 | 201 | |
---|
| 202 | +static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags, |
---|
| 203 | + unsigned int bitnr) |
---|
| 204 | +{ |
---|
| 205 | + struct request *rq; |
---|
| 206 | + unsigned long flags; |
---|
| 207 | + |
---|
| 208 | + spin_lock_irqsave(&tags->lock, flags); |
---|
| 209 | + rq = tags->rqs[bitnr]; |
---|
| 210 | + if (!rq || rq->tag != bitnr || !refcount_inc_not_zero(&rq->ref)) |
---|
| 211 | + rq = NULL; |
---|
| 212 | + spin_unlock_irqrestore(&tags->lock, flags); |
---|
| 213 | + return rq; |
---|
| 214 | +} |
---|
| 215 | + |
---|
221 | 216 | static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) |
---|
222 | 217 | { |
---|
223 | 218 | struct bt_iter_data *iter_data = data; |
---|
.. | .. |
---|
225 | 220 | struct blk_mq_tags *tags = hctx->tags; |
---|
226 | 221 | bool reserved = iter_data->reserved; |
---|
227 | 222 | struct request *rq; |
---|
| 223 | + bool ret = true; |
---|
228 | 224 | |
---|
229 | 225 | if (!reserved) |
---|
230 | 226 | bitnr += tags->nr_reserved_tags; |
---|
231 | | - rq = tags->rqs[bitnr]; |
---|
232 | | - |
---|
233 | 227 | /* |
---|
234 | 228 | * We can hit rq == NULL here, because the tagging functions |
---|
235 | | - * test and set the bit before assining ->rqs[]. |
---|
| 229 | + * test and set the bit before assigning ->rqs[]. |
---|
236 | 230 | */ |
---|
237 | | - if (rq && rq->q == hctx->queue) |
---|
238 | | - iter_data->fn(hctx, rq, iter_data->data, reserved); |
---|
239 | | - return true; |
---|
| 231 | + rq = blk_mq_find_and_get_req(tags, bitnr); |
---|
| 232 | + if (!rq) |
---|
| 233 | + return true; |
---|
| 234 | + |
---|
| 235 | + if (rq->q == hctx->queue && rq->mq_hctx == hctx) |
---|
| 236 | + ret = iter_data->fn(hctx, rq, iter_data->data, reserved); |
---|
| 237 | + blk_mq_put_rq_ref(rq); |
---|
| 238 | + return ret; |
---|
240 | 239 | } |
---|
241 | 240 | |
---|
| 241 | +/** |
---|
| 242 | + * bt_for_each - iterate over the requests associated with a hardware queue |
---|
| 243 | + * @hctx: Hardware queue to examine. |
---|
| 244 | + * @bt: sbitmap to examine. This is either the breserved_tags member |
---|
| 245 | + * or the bitmap_tags member of struct blk_mq_tags. |
---|
| 246 | + * @fn: Pointer to the function that will be called for each request |
---|
| 247 | + * associated with @hctx that has been assigned a driver tag. |
---|
| 248 | + * @fn will be called as follows: @fn(@hctx, rq, @data, @reserved) |
---|
| 249 | + * where rq is a pointer to a request. Return true to continue |
---|
| 250 | + * iterating tags, false to stop. |
---|
| 251 | + * @data: Will be passed as third argument to @fn. |
---|
| 252 | + * @reserved: Indicates whether @bt is the breserved_tags member or the |
---|
| 253 | + * bitmap_tags member of struct blk_mq_tags. |
---|
| 254 | + */ |
---|
242 | 255 | static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt, |
---|
243 | 256 | busy_iter_fn *fn, void *data, bool reserved) |
---|
244 | 257 | { |
---|
.. | .. |
---|
256 | 269 | struct blk_mq_tags *tags; |
---|
257 | 270 | busy_tag_iter_fn *fn; |
---|
258 | 271 | void *data; |
---|
259 | | - bool reserved; |
---|
| 272 | + unsigned int flags; |
---|
260 | 273 | }; |
---|
| 274 | + |
---|
| 275 | +#define BT_TAG_ITER_RESERVED (1 << 0) |
---|
| 276 | +#define BT_TAG_ITER_STARTED (1 << 1) |
---|
| 277 | +#define BT_TAG_ITER_STATIC_RQS (1 << 2) |
---|
261 | 278 | |
---|
262 | 279 | static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) |
---|
263 | 280 | { |
---|
264 | 281 | struct bt_tags_iter_data *iter_data = data; |
---|
265 | 282 | struct blk_mq_tags *tags = iter_data->tags; |
---|
266 | | - bool reserved = iter_data->reserved; |
---|
| 283 | + bool reserved = iter_data->flags & BT_TAG_ITER_RESERVED; |
---|
267 | 284 | struct request *rq; |
---|
| 285 | + bool ret = true; |
---|
| 286 | + bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS); |
---|
268 | 287 | |
---|
269 | 288 | if (!reserved) |
---|
270 | 289 | bitnr += tags->nr_reserved_tags; |
---|
271 | 290 | |
---|
272 | 291 | /* |
---|
273 | 292 | * We can hit rq == NULL here, because the tagging functions |
---|
274 | | - * test and set the bit before assining ->rqs[]. |
---|
| 293 | + * test and set the bit before assigning ->rqs[]. |
---|
275 | 294 | */ |
---|
276 | | - rq = tags->rqs[bitnr]; |
---|
277 | | - if (rq && blk_mq_request_started(rq)) |
---|
278 | | - iter_data->fn(rq, iter_data->data, reserved); |
---|
| 295 | + if (iter_static_rqs) |
---|
| 296 | + rq = tags->static_rqs[bitnr]; |
---|
| 297 | + else |
---|
| 298 | + rq = blk_mq_find_and_get_req(tags, bitnr); |
---|
| 299 | + if (!rq) |
---|
| 300 | + return true; |
---|
279 | 301 | |
---|
280 | | - return true; |
---|
| 302 | + if (!(iter_data->flags & BT_TAG_ITER_STARTED) || |
---|
| 303 | + blk_mq_request_started(rq)) |
---|
| 304 | + ret = iter_data->fn(rq, iter_data->data, reserved); |
---|
| 305 | + if (!iter_static_rqs) |
---|
| 306 | + blk_mq_put_rq_ref(rq); |
---|
| 307 | + return ret; |
---|
281 | 308 | } |
---|
282 | 309 | |
---|
| 310 | +/** |
---|
| 311 | + * bt_tags_for_each - iterate over the requests in a tag map |
---|
| 312 | + * @tags: Tag map to iterate over. |
---|
| 313 | + * @bt: sbitmap to examine. This is either the breserved_tags member |
---|
| 314 | + * or the bitmap_tags member of struct blk_mq_tags. |
---|
| 315 | + * @fn: Pointer to the function that will be called for each started |
---|
| 316 | + * request. @fn will be called as follows: @fn(rq, @data, |
---|
| 317 | + * @reserved) where rq is a pointer to a request. Return true |
---|
| 318 | + * to continue iterating tags, false to stop. |
---|
| 319 | + * @data: Will be passed as second argument to @fn. |
---|
| 320 | + * @flags: BT_TAG_ITER_* |
---|
| 321 | + */ |
---|
283 | 322 | static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt, |
---|
284 | | - busy_tag_iter_fn *fn, void *data, bool reserved) |
---|
| 323 | + busy_tag_iter_fn *fn, void *data, unsigned int flags) |
---|
285 | 324 | { |
---|
286 | 325 | struct bt_tags_iter_data iter_data = { |
---|
287 | 326 | .tags = tags, |
---|
288 | 327 | .fn = fn, |
---|
289 | 328 | .data = data, |
---|
290 | | - .reserved = reserved, |
---|
| 329 | + .flags = flags, |
---|
291 | 330 | }; |
---|
292 | 331 | |
---|
293 | 332 | if (tags->rqs) |
---|
294 | 333 | sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data); |
---|
295 | 334 | } |
---|
296 | 335 | |
---|
297 | | -static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, |
---|
298 | | - busy_tag_iter_fn *fn, void *priv) |
---|
| 336 | +static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags, |
---|
| 337 | + busy_tag_iter_fn *fn, void *priv, unsigned int flags) |
---|
299 | 338 | { |
---|
| 339 | + WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED); |
---|
| 340 | + |
---|
300 | 341 | if (tags->nr_reserved_tags) |
---|
301 | | - bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true); |
---|
302 | | - bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false); |
---|
| 342 | + bt_tags_for_each(tags, tags->breserved_tags, fn, priv, |
---|
| 343 | + flags | BT_TAG_ITER_RESERVED); |
---|
| 344 | + bt_tags_for_each(tags, tags->bitmap_tags, fn, priv, flags); |
---|
303 | 345 | } |
---|
304 | 346 | |
---|
| 347 | +/** |
---|
| 348 | + * blk_mq_all_tag_iter - iterate over all requests in a tag map |
---|
| 349 | + * @tags: Tag map to iterate over. |
---|
| 350 | + * @fn: Pointer to the function that will be called for each |
---|
| 351 | + * request. @fn will be called as follows: @fn(rq, @priv, |
---|
| 352 | + * reserved) where rq is a pointer to a request. 'reserved' |
---|
| 353 | + * indicates whether or not @rq is a reserved request. Return |
---|
| 354 | + * true to continue iterating tags, false to stop. |
---|
| 355 | + * @priv: Will be passed as second argument to @fn. |
---|
| 356 | + * |
---|
| 357 | + * Caller has to pass the tag map from which requests are allocated. |
---|
| 358 | + */ |
---|
| 359 | +void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, |
---|
| 360 | + void *priv) |
---|
| 361 | +{ |
---|
| 362 | + __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS); |
---|
| 363 | +} |
---|
| 364 | + |
---|
| 365 | +/** |
---|
| 366 | + * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set |
---|
| 367 | + * @tagset: Tag set to iterate over. |
---|
| 368 | + * @fn: Pointer to the function that will be called for each started |
---|
| 369 | + * request. @fn will be called as follows: @fn(rq, @priv, |
---|
| 370 | + * reserved) where rq is a pointer to a request. 'reserved' |
---|
| 371 | + * indicates whether or not @rq is a reserved request. Return |
---|
| 372 | + * true to continue iterating tags, false to stop. |
---|
| 373 | + * @priv: Will be passed as second argument to @fn. |
---|
| 374 | + * |
---|
| 375 | + * We grab one request reference before calling @fn and release it after |
---|
| 376 | + * @fn returns. |
---|
| 377 | + */ |
---|
305 | 378 | void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, |
---|
306 | 379 | busy_tag_iter_fn *fn, void *priv) |
---|
307 | 380 | { |
---|
.. | .. |
---|
309 | 382 | |
---|
310 | 383 | for (i = 0; i < tagset->nr_hw_queues; i++) { |
---|
311 | 384 | if (tagset->tags && tagset->tags[i]) |
---|
312 | | - blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv); |
---|
| 385 | + __blk_mq_all_tag_iter(tagset->tags[i], fn, priv, |
---|
| 386 | + BT_TAG_ITER_STARTED); |
---|
313 | 387 | } |
---|
314 | 388 | } |
---|
315 | 389 | EXPORT_SYMBOL(blk_mq_tagset_busy_iter); |
---|
316 | 390 | |
---|
| 391 | +static bool blk_mq_tagset_count_completed_rqs(struct request *rq, |
---|
| 392 | + void *data, bool reserved) |
---|
| 393 | +{ |
---|
| 394 | + unsigned *count = data; |
---|
| 395 | + |
---|
| 396 | + if (blk_mq_request_completed(rq)) |
---|
| 397 | + (*count)++; |
---|
| 398 | + return true; |
---|
| 399 | +} |
---|
| 400 | + |
---|
| 401 | +/** |
---|
| 402 | + * blk_mq_tagset_wait_completed_request - wait until all completed req's |
---|
| 403 | + * complete funtion is run |
---|
| 404 | + * @tagset: Tag set to drain completed request |
---|
| 405 | + * |
---|
| 406 | + * Note: This function has to be run after all IO queues are shutdown |
---|
| 407 | + */ |
---|
| 408 | +void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset) |
---|
| 409 | +{ |
---|
| 410 | + while (true) { |
---|
| 411 | + unsigned count = 0; |
---|
| 412 | + |
---|
| 413 | + blk_mq_tagset_busy_iter(tagset, |
---|
| 414 | + blk_mq_tagset_count_completed_rqs, &count); |
---|
| 415 | + if (!count) |
---|
| 416 | + break; |
---|
| 417 | + msleep(5); |
---|
| 418 | + } |
---|
| 419 | +} |
---|
| 420 | +EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request); |
---|
| 421 | + |
---|
| 422 | +/** |
---|
| 423 | + * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag |
---|
| 424 | + * @q: Request queue to examine. |
---|
| 425 | + * @fn: Pointer to the function that will be called for each request |
---|
| 426 | + * on @q. @fn will be called as follows: @fn(hctx, rq, @priv, |
---|
| 427 | + * reserved) where rq is a pointer to a request and hctx points |
---|
| 428 | + * to the hardware queue associated with the request. 'reserved' |
---|
| 429 | + * indicates whether or not @rq is a reserved request. |
---|
| 430 | + * @priv: Will be passed as third argument to @fn. |
---|
| 431 | + * |
---|
| 432 | + * Note: if @q->tag_set is shared with other request queues then @fn will be |
---|
| 433 | + * called for all requests on all queues that share that tag set and not only |
---|
| 434 | + * for requests associated with @q. |
---|
| 435 | + */ |
---|
317 | 436 | void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, |
---|
318 | 437 | void *priv) |
---|
319 | 438 | { |
---|
.. | .. |
---|
321 | 440 | int i; |
---|
322 | 441 | |
---|
323 | 442 | /* |
---|
324 | | - * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and |
---|
325 | | - * queue_hw_ctx after freeze the queue, so we use q_usage_counter |
---|
326 | | - * to avoid race with it. |
---|
| 443 | + * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx |
---|
| 444 | + * while the queue is frozen. So we can use q_usage_counter to avoid |
---|
| 445 | + * racing with it. |
---|
327 | 446 | */ |
---|
328 | 447 | if (!percpu_ref_tryget(&q->q_usage_counter)) |
---|
329 | 448 | return; |
---|
.. | .. |
---|
332 | 451 | struct blk_mq_tags *tags = hctx->tags; |
---|
333 | 452 | |
---|
334 | 453 | /* |
---|
335 | | - * If not software queues are currently mapped to this |
---|
| 454 | + * If no software queues are currently mapped to this |
---|
336 | 455 | * hardware queue, there's nothing to check |
---|
337 | 456 | */ |
---|
338 | 457 | if (!blk_mq_hw_queue_mapped(hctx)) |
---|
339 | 458 | continue; |
---|
340 | 459 | |
---|
341 | 460 | if (tags->nr_reserved_tags) |
---|
342 | | - bt_for_each(hctx, &tags->breserved_tags, fn, priv, true); |
---|
343 | | - bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false); |
---|
| 461 | + bt_for_each(hctx, tags->breserved_tags, fn, priv, true); |
---|
| 462 | + bt_for_each(hctx, tags->bitmap_tags, fn, priv, false); |
---|
344 | 463 | } |
---|
345 | 464 | blk_queue_exit(q); |
---|
346 | 465 | } |
---|
.. | .. |
---|
352 | 471 | node); |
---|
353 | 472 | } |
---|
354 | 473 | |
---|
355 | | -static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, |
---|
356 | | - int node, int alloc_policy) |
---|
| 474 | +static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, |
---|
| 475 | + int node, int alloc_policy) |
---|
357 | 476 | { |
---|
358 | 477 | unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; |
---|
359 | 478 | bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; |
---|
360 | 479 | |
---|
361 | | - if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node)) |
---|
362 | | - goto free_tags; |
---|
363 | | - if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin, |
---|
364 | | - node)) |
---|
| 480 | + if (bt_alloc(&tags->__bitmap_tags, depth, round_robin, node)) |
---|
| 481 | + return -ENOMEM; |
---|
| 482 | + if (bt_alloc(&tags->__breserved_tags, tags->nr_reserved_tags, |
---|
| 483 | + round_robin, node)) |
---|
365 | 484 | goto free_bitmap_tags; |
---|
366 | 485 | |
---|
367 | | - return tags; |
---|
| 486 | + tags->bitmap_tags = &tags->__bitmap_tags; |
---|
| 487 | + tags->breserved_tags = &tags->__breserved_tags; |
---|
| 488 | + |
---|
| 489 | + return 0; |
---|
368 | 490 | free_bitmap_tags: |
---|
369 | | - sbitmap_queue_free(&tags->bitmap_tags); |
---|
370 | | -free_tags: |
---|
371 | | - kfree(tags); |
---|
372 | | - return NULL; |
---|
| 491 | + sbitmap_queue_free(&tags->__bitmap_tags); |
---|
| 492 | + return -ENOMEM; |
---|
| 493 | +} |
---|
| 494 | + |
---|
| 495 | +int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int flags) |
---|
| 496 | +{ |
---|
| 497 | + unsigned int depth = set->queue_depth - set->reserved_tags; |
---|
| 498 | + int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags); |
---|
| 499 | + bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; |
---|
| 500 | + int i, node = set->numa_node; |
---|
| 501 | + |
---|
| 502 | + if (bt_alloc(&set->__bitmap_tags, depth, round_robin, node)) |
---|
| 503 | + return -ENOMEM; |
---|
| 504 | + if (bt_alloc(&set->__breserved_tags, set->reserved_tags, |
---|
| 505 | + round_robin, node)) |
---|
| 506 | + goto free_bitmap_tags; |
---|
| 507 | + |
---|
| 508 | + for (i = 0; i < set->nr_hw_queues; i++) { |
---|
| 509 | + struct blk_mq_tags *tags = set->tags[i]; |
---|
| 510 | + |
---|
| 511 | + tags->bitmap_tags = &set->__bitmap_tags; |
---|
| 512 | + tags->breserved_tags = &set->__breserved_tags; |
---|
| 513 | + } |
---|
| 514 | + |
---|
| 515 | + return 0; |
---|
| 516 | +free_bitmap_tags: |
---|
| 517 | + sbitmap_queue_free(&set->__bitmap_tags); |
---|
| 518 | + return -ENOMEM; |
---|
| 519 | +} |
---|
| 520 | + |
---|
| 521 | +void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set) |
---|
| 522 | +{ |
---|
| 523 | + sbitmap_queue_free(&set->__bitmap_tags); |
---|
| 524 | + sbitmap_queue_free(&set->__breserved_tags); |
---|
373 | 525 | } |
---|
374 | 526 | |
---|
375 | 527 | struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, |
---|
376 | 528 | unsigned int reserved_tags, |
---|
377 | | - int node, int alloc_policy) |
---|
| 529 | + int node, unsigned int flags) |
---|
378 | 530 | { |
---|
| 531 | + int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(flags); |
---|
379 | 532 | struct blk_mq_tags *tags; |
---|
380 | 533 | |
---|
381 | 534 | if (total_tags > BLK_MQ_TAG_MAX) { |
---|
.. | .. |
---|
389 | 542 | |
---|
390 | 543 | tags->nr_tags = total_tags; |
---|
391 | 544 | tags->nr_reserved_tags = reserved_tags; |
---|
| 545 | + spin_lock_init(&tags->lock); |
---|
392 | 546 | |
---|
393 | | - return blk_mq_init_bitmap_tags(tags, node, alloc_policy); |
---|
| 547 | + if (flags & BLK_MQ_F_TAG_HCTX_SHARED) |
---|
| 548 | + return tags; |
---|
| 549 | + |
---|
| 550 | + if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) { |
---|
| 551 | + kfree(tags); |
---|
| 552 | + return NULL; |
---|
| 553 | + } |
---|
| 554 | + return tags; |
---|
394 | 555 | } |
---|
395 | 556 | |
---|
396 | | -void blk_mq_free_tags(struct blk_mq_tags *tags) |
---|
| 557 | +void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags) |
---|
397 | 558 | { |
---|
398 | | - sbitmap_queue_free(&tags->bitmap_tags); |
---|
399 | | - sbitmap_queue_free(&tags->breserved_tags); |
---|
| 559 | + if (!(flags & BLK_MQ_F_TAG_HCTX_SHARED)) { |
---|
| 560 | + sbitmap_queue_free(tags->bitmap_tags); |
---|
| 561 | + sbitmap_queue_free(tags->breserved_tags); |
---|
| 562 | + } |
---|
400 | 563 | kfree(tags); |
---|
401 | 564 | } |
---|
402 | 565 | |
---|
.. | .. |
---|
415 | 578 | */ |
---|
416 | 579 | if (tdepth > tags->nr_tags) { |
---|
417 | 580 | struct blk_mq_tag_set *set = hctx->queue->tag_set; |
---|
| 581 | + /* Only sched tags can grow, so clear HCTX_SHARED flag */ |
---|
| 582 | + unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED; |
---|
418 | 583 | struct blk_mq_tags *new; |
---|
419 | 584 | bool ret; |
---|
420 | 585 | |
---|
.. | .. |
---|
429 | 594 | return -EINVAL; |
---|
430 | 595 | |
---|
431 | 596 | new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, |
---|
432 | | - tags->nr_reserved_tags); |
---|
| 597 | + tags->nr_reserved_tags, flags); |
---|
433 | 598 | if (!new) |
---|
434 | 599 | return -ENOMEM; |
---|
435 | 600 | ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth); |
---|
436 | 601 | if (ret) { |
---|
437 | | - blk_mq_free_rq_map(new); |
---|
| 602 | + blk_mq_free_rq_map(new, flags); |
---|
438 | 603 | return -ENOMEM; |
---|
439 | 604 | } |
---|
440 | 605 | |
---|
441 | 606 | blk_mq_free_rqs(set, *tagsptr, hctx->queue_num); |
---|
442 | | - blk_mq_free_rq_map(*tagsptr); |
---|
| 607 | + blk_mq_free_rq_map(*tagsptr, flags); |
---|
443 | 608 | *tagsptr = new; |
---|
444 | 609 | } else { |
---|
445 | 610 | /* |
---|
446 | 611 | * Don't need (or can't) update reserved tags here, they |
---|
447 | 612 | * remain static and should never need resizing. |
---|
448 | 613 | */ |
---|
449 | | - sbitmap_queue_resize(&tags->bitmap_tags, |
---|
| 614 | + sbitmap_queue_resize(tags->bitmap_tags, |
---|
450 | 615 | tdepth - tags->nr_reserved_tags); |
---|
451 | 616 | } |
---|
452 | 617 | |
---|
453 | 618 | return 0; |
---|
| 619 | +} |
---|
| 620 | + |
---|
| 621 | +void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int size) |
---|
| 622 | +{ |
---|
| 623 | + sbitmap_queue_resize(&set->__bitmap_tags, size - set->reserved_tags); |
---|
454 | 624 | } |
---|
455 | 625 | |
---|
456 | 626 | /** |
---|
.. | .. |
---|
467 | 637 | */ |
---|
468 | 638 | u32 blk_mq_unique_tag(struct request *rq) |
---|
469 | 639 | { |
---|
470 | | - struct request_queue *q = rq->q; |
---|
471 | | - struct blk_mq_hw_ctx *hctx; |
---|
472 | | - int hwq = 0; |
---|
473 | | - |
---|
474 | | - if (q->mq_ops) { |
---|
475 | | - hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu); |
---|
476 | | - hwq = hctx->queue_num; |
---|
477 | | - } |
---|
478 | | - |
---|
479 | | - return (hwq << BLK_MQ_UNIQUE_TAG_BITS) | |
---|
| 640 | + return (rq->mq_hctx->queue_num << BLK_MQ_UNIQUE_TAG_BITS) | |
---|
480 | 641 | (rq->tag & BLK_MQ_UNIQUE_TAG_MASK); |
---|
481 | 642 | } |
---|
482 | 643 | EXPORT_SYMBOL(blk_mq_unique_tag); |
---|