.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
---|
3 | 4 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics |
---|
.. | .. |
---|
19 | 20 | #include <linux/blk-mq.h> |
---|
20 | 21 | #include <linux/highmem.h> |
---|
21 | 22 | #include <linux/mm.h> |
---|
| 23 | +#include <linux/pagemap.h> |
---|
22 | 24 | #include <linux/kernel_stat.h> |
---|
23 | 25 | #include <linux/string.h> |
---|
24 | 26 | #include <linux/init.h> |
---|
.. | .. |
---|
33 | 35 | #include <linux/ratelimit.h> |
---|
34 | 36 | #include <linux/pm_runtime.h> |
---|
35 | 37 | #include <linux/blk-cgroup.h> |
---|
| 38 | +#include <linux/t10-pi.h> |
---|
36 | 39 | #include <linux/debugfs.h> |
---|
37 | 40 | #include <linux/bpf.h> |
---|
38 | 41 | #include <linux/psi.h> |
---|
| 42 | +#include <linux/sched/sysctl.h> |
---|
39 | 43 | #include <linux/blk-crypto.h> |
---|
40 | 44 | |
---|
41 | 45 | #define CREATE_TRACE_POINTS |
---|
.. | .. |
---|
44 | 48 | #include "blk.h" |
---|
45 | 49 | #include "blk-mq.h" |
---|
46 | 50 | #include "blk-mq-sched.h" |
---|
| 51 | +#include "blk-pm.h" |
---|
47 | 52 | #include "blk-rq-qos.h" |
---|
48 | 53 | |
---|
49 | | -#ifdef CONFIG_DEBUG_FS |
---|
50 | 54 | struct dentry *blk_debugfs_root; |
---|
51 | | -#endif |
---|
52 | 55 | |
---|
53 | 56 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); |
---|
54 | 57 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
---|
55 | 58 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
---|
56 | 59 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_split); |
---|
57 | 60 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); |
---|
| 61 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_queue); |
---|
| 62 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_getrq); |
---|
| 63 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_insert); |
---|
| 64 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_issue); |
---|
| 65 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_merge); |
---|
| 66 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_requeue); |
---|
| 67 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_complete); |
---|
58 | 68 | |
---|
59 | 69 | DEFINE_IDA(blk_queue_ida); |
---|
60 | | - |
---|
61 | | -/* |
---|
62 | | - * For the allocated request tables |
---|
63 | | - */ |
---|
64 | | -struct kmem_cache *request_cachep; |
---|
65 | 70 | |
---|
66 | 71 | /* |
---|
67 | 72 | * For queue allocation |
---|
.. | .. |
---|
80 | 85 | */ |
---|
81 | 86 | void blk_queue_flag_set(unsigned int flag, struct request_queue *q) |
---|
82 | 87 | { |
---|
83 | | - unsigned long flags; |
---|
84 | | - |
---|
85 | | - spin_lock_irqsave(q->queue_lock, flags); |
---|
86 | | - queue_flag_set(flag, q); |
---|
87 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
---|
| 88 | + set_bit(flag, &q->queue_flags); |
---|
88 | 89 | } |
---|
89 | 90 | EXPORT_SYMBOL(blk_queue_flag_set); |
---|
90 | 91 | |
---|
.. | .. |
---|
95 | 96 | */ |
---|
96 | 97 | void blk_queue_flag_clear(unsigned int flag, struct request_queue *q) |
---|
97 | 98 | { |
---|
98 | | - unsigned long flags; |
---|
99 | | - |
---|
100 | | - spin_lock_irqsave(q->queue_lock, flags); |
---|
101 | | - queue_flag_clear(flag, q); |
---|
102 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
---|
| 99 | + clear_bit(flag, &q->queue_flags); |
---|
103 | 100 | } |
---|
104 | 101 | EXPORT_SYMBOL(blk_queue_flag_clear); |
---|
105 | 102 | |
---|
.. | .. |
---|
113 | 110 | */ |
---|
114 | 111 | bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q) |
---|
115 | 112 | { |
---|
116 | | - unsigned long flags; |
---|
117 | | - bool res; |
---|
118 | | - |
---|
119 | | - spin_lock_irqsave(q->queue_lock, flags); |
---|
120 | | - res = queue_flag_test_and_set(flag, q); |
---|
121 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
---|
122 | | - |
---|
123 | | - return res; |
---|
| 113 | + return test_and_set_bit(flag, &q->queue_flags); |
---|
124 | 114 | } |
---|
125 | 115 | EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set); |
---|
126 | | - |
---|
127 | | -/** |
---|
128 | | - * blk_queue_flag_test_and_clear - atomically test and clear a queue flag |
---|
129 | | - * @flag: flag to be cleared |
---|
130 | | - * @q: request queue |
---|
131 | | - * |
---|
132 | | - * Returns the previous value of @flag - 0 if the flag was not set and 1 if |
---|
133 | | - * the flag was set. |
---|
134 | | - */ |
---|
135 | | -bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q) |
---|
136 | | -{ |
---|
137 | | - unsigned long flags; |
---|
138 | | - bool res; |
---|
139 | | - |
---|
140 | | - spin_lock_irqsave(q->queue_lock, flags); |
---|
141 | | - res = queue_flag_test_and_clear(flag, q); |
---|
142 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
---|
143 | | - |
---|
144 | | - return res; |
---|
145 | | -} |
---|
146 | | -EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear); |
---|
147 | | - |
---|
148 | | -static void blk_clear_congested(struct request_list *rl, int sync) |
---|
149 | | -{ |
---|
150 | | -#ifdef CONFIG_CGROUP_WRITEBACK |
---|
151 | | - clear_wb_congested(rl->blkg->wb_congested, sync); |
---|
152 | | -#else |
---|
153 | | - /* |
---|
154 | | - * If !CGROUP_WRITEBACK, all blkg's map to bdi->wb and we shouldn't |
---|
155 | | - * flip its congestion state for events on other blkcgs. |
---|
156 | | - */ |
---|
157 | | - if (rl == &rl->q->root_rl) |
---|
158 | | - clear_wb_congested(rl->q->backing_dev_info->wb.congested, sync); |
---|
159 | | -#endif |
---|
160 | | -} |
---|
161 | | - |
---|
162 | | -static void blk_set_congested(struct request_list *rl, int sync) |
---|
163 | | -{ |
---|
164 | | -#ifdef CONFIG_CGROUP_WRITEBACK |
---|
165 | | - set_wb_congested(rl->blkg->wb_congested, sync); |
---|
166 | | -#else |
---|
167 | | - /* see blk_clear_congested() */ |
---|
168 | | - if (rl == &rl->q->root_rl) |
---|
169 | | - set_wb_congested(rl->q->backing_dev_info->wb.congested, sync); |
---|
170 | | -#endif |
---|
171 | | -} |
---|
172 | | - |
---|
173 | | -void blk_queue_congestion_threshold(struct request_queue *q) |
---|
174 | | -{ |
---|
175 | | - int nr; |
---|
176 | | - |
---|
177 | | - nr = q->nr_requests - (q->nr_requests / 8) + 1; |
---|
178 | | - if (nr > q->nr_requests) |
---|
179 | | - nr = q->nr_requests; |
---|
180 | | - q->nr_congestion_on = nr; |
---|
181 | | - |
---|
182 | | - nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; |
---|
183 | | - if (nr < 1) |
---|
184 | | - nr = 1; |
---|
185 | | - q->nr_congestion_off = nr; |
---|
186 | | -} |
---|
187 | 116 | |
---|
188 | 117 | void blk_rq_init(struct request_queue *q, struct request *rq) |
---|
189 | 118 | { |
---|
190 | 119 | memset(rq, 0, sizeof(*rq)); |
---|
191 | 120 | |
---|
192 | 121 | INIT_LIST_HEAD(&rq->queuelist); |
---|
193 | | - INIT_LIST_HEAD(&rq->timeout_list); |
---|
194 | | - rq->cpu = -1; |
---|
195 | 122 | rq->q = q; |
---|
196 | 123 | rq->__sector = (sector_t) -1; |
---|
197 | 124 | INIT_HLIST_NODE(&rq->hash); |
---|
198 | 125 | RB_CLEAR_NODE(&rq->rb_node); |
---|
199 | | - rq->tag = -1; |
---|
200 | | - rq->internal_tag = -1; |
---|
| 126 | + rq->tag = BLK_MQ_NO_TAG; |
---|
| 127 | + rq->internal_tag = BLK_MQ_NO_TAG; |
---|
201 | 128 | rq->start_time_ns = ktime_get_ns(); |
---|
202 | 129 | rq->part = NULL; |
---|
203 | | - refcount_set(&rq->ref, 1); |
---|
| 130 | + blk_crypto_rq_set_defaults(rq); |
---|
204 | 131 | } |
---|
205 | 132 | EXPORT_SYMBOL(blk_rq_init); |
---|
| 133 | + |
---|
| 134 | +#define REQ_OP_NAME(name) [REQ_OP_##name] = #name |
---|
| 135 | +static const char *const blk_op_name[] = { |
---|
| 136 | + REQ_OP_NAME(READ), |
---|
| 137 | + REQ_OP_NAME(WRITE), |
---|
| 138 | + REQ_OP_NAME(FLUSH), |
---|
| 139 | + REQ_OP_NAME(DISCARD), |
---|
| 140 | + REQ_OP_NAME(SECURE_ERASE), |
---|
| 141 | + REQ_OP_NAME(ZONE_RESET), |
---|
| 142 | + REQ_OP_NAME(ZONE_RESET_ALL), |
---|
| 143 | + REQ_OP_NAME(ZONE_OPEN), |
---|
| 144 | + REQ_OP_NAME(ZONE_CLOSE), |
---|
| 145 | + REQ_OP_NAME(ZONE_FINISH), |
---|
| 146 | + REQ_OP_NAME(ZONE_APPEND), |
---|
| 147 | + REQ_OP_NAME(WRITE_SAME), |
---|
| 148 | + REQ_OP_NAME(WRITE_ZEROES), |
---|
| 149 | + REQ_OP_NAME(SCSI_IN), |
---|
| 150 | + REQ_OP_NAME(SCSI_OUT), |
---|
| 151 | + REQ_OP_NAME(DRV_IN), |
---|
| 152 | + REQ_OP_NAME(DRV_OUT), |
---|
| 153 | +}; |
---|
| 154 | +#undef REQ_OP_NAME |
---|
| 155 | + |
---|
| 156 | +/** |
---|
| 157 | + * blk_op_str - Return string XXX in the REQ_OP_XXX. |
---|
| 158 | + * @op: REQ_OP_XXX. |
---|
| 159 | + * |
---|
| 160 | + * Description: Centralize block layer function to convert REQ_OP_XXX into |
---|
| 161 | + * string format. Useful in the debugging and tracing bio or request. For |
---|
| 162 | + * invalid REQ_OP_XXX it returns string "UNKNOWN". |
---|
| 163 | + */ |
---|
| 164 | +inline const char *blk_op_str(unsigned int op) |
---|
| 165 | +{ |
---|
| 166 | + const char *op_str = "UNKNOWN"; |
---|
| 167 | + |
---|
| 168 | + if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op]) |
---|
| 169 | + op_str = blk_op_name[op]; |
---|
| 170 | + |
---|
| 171 | + return op_str; |
---|
| 172 | +} |
---|
| 173 | +EXPORT_SYMBOL_GPL(blk_op_str); |
---|
206 | 174 | |
---|
207 | 175 | static const struct { |
---|
208 | 176 | int errno; |
---|
.. | .. |
---|
223 | 191 | |
---|
224 | 192 | /* device mapper special case, should not leak out: */ |
---|
225 | 193 | [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" }, |
---|
| 194 | + |
---|
| 195 | + /* zone device specific errors */ |
---|
| 196 | + [BLK_STS_ZONE_OPEN_RESOURCE] = { -ETOOMANYREFS, "open zones exceeded" }, |
---|
| 197 | + [BLK_STS_ZONE_ACTIVE_RESOURCE] = { -EOVERFLOW, "active zones exceeded" }, |
---|
226 | 198 | |
---|
227 | 199 | /* everything else not covered above: */ |
---|
228 | 200 | [BLK_STS_IOERR] = { -EIO, "I/O" }, |
---|
.. | .. |
---|
251 | 223 | } |
---|
252 | 224 | EXPORT_SYMBOL_GPL(blk_status_to_errno); |
---|
253 | 225 | |
---|
254 | | -static void print_req_error(struct request *req, blk_status_t status) |
---|
| 226 | +static void print_req_error(struct request *req, blk_status_t status, |
---|
| 227 | + const char *caller) |
---|
255 | 228 | { |
---|
256 | 229 | int idx = (__force int)status; |
---|
257 | 230 | |
---|
258 | 231 | if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors))) |
---|
259 | 232 | return; |
---|
260 | 233 | |
---|
261 | | - printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n", |
---|
262 | | - __func__, blk_errors[idx].name, req->rq_disk ? |
---|
263 | | - req->rq_disk->disk_name : "?", |
---|
264 | | - (unsigned long long)blk_rq_pos(req)); |
---|
| 234 | + printk_ratelimited(KERN_ERR |
---|
| 235 | + "%s: %s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x " |
---|
| 236 | + "phys_seg %u prio class %u\n", |
---|
| 237 | + caller, blk_errors[idx].name, |
---|
| 238 | + req->rq_disk ? req->rq_disk->disk_name : "?", |
---|
| 239 | + blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)), |
---|
| 240 | + req->cmd_flags & ~REQ_OP_MASK, |
---|
| 241 | + req->nr_phys_segments, |
---|
| 242 | + IOPRIO_PRIO_CLASS(req->ioprio)); |
---|
265 | 243 | } |
---|
266 | 244 | |
---|
267 | 245 | static void req_bio_endio(struct request *rq, struct bio *bio, |
---|
.. | .. |
---|
274 | 252 | bio_set_flag(bio, BIO_QUIET); |
---|
275 | 253 | |
---|
276 | 254 | bio_advance(bio, nbytes); |
---|
| 255 | + |
---|
| 256 | + if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) { |
---|
| 257 | + /* |
---|
| 258 | + * Partial zone append completions cannot be supported as the |
---|
| 259 | + * BIO fragments may end up not being written sequentially. |
---|
| 260 | + */ |
---|
| 261 | + if (bio->bi_iter.bi_size) |
---|
| 262 | + bio->bi_status = BLK_STS_IOERR; |
---|
| 263 | + else |
---|
| 264 | + bio->bi_iter.bi_sector = rq->__sector; |
---|
| 265 | + } |
---|
277 | 266 | |
---|
278 | 267 | /* don't actually finish bio if it's part of flush sequence */ |
---|
279 | 268 | if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) |
---|
.. | .. |
---|
294 | 283 | } |
---|
295 | 284 | EXPORT_SYMBOL(blk_dump_rq_flags); |
---|
296 | 285 | |
---|
297 | | -static void blk_delay_work(struct work_struct *work) |
---|
298 | | -{ |
---|
299 | | - struct request_queue *q; |
---|
300 | | - |
---|
301 | | - q = container_of(work, struct request_queue, delay_work.work); |
---|
302 | | - spin_lock_irq(q->queue_lock); |
---|
303 | | - __blk_run_queue(q); |
---|
304 | | - spin_unlock_irq(q->queue_lock); |
---|
305 | | -} |
---|
306 | | - |
---|
307 | | -/** |
---|
308 | | - * blk_delay_queue - restart queueing after defined interval |
---|
309 | | - * @q: The &struct request_queue in question |
---|
310 | | - * @msecs: Delay in msecs |
---|
311 | | - * |
---|
312 | | - * Description: |
---|
313 | | - * Sometimes queueing needs to be postponed for a little while, to allow |
---|
314 | | - * resources to come back. This function will make sure that queueing is |
---|
315 | | - * restarted around the specified time. |
---|
316 | | - */ |
---|
317 | | -void blk_delay_queue(struct request_queue *q, unsigned long msecs) |
---|
318 | | -{ |
---|
319 | | - lockdep_assert_held(q->queue_lock); |
---|
320 | | - WARN_ON_ONCE(q->mq_ops); |
---|
321 | | - |
---|
322 | | - if (likely(!blk_queue_dead(q))) |
---|
323 | | - queue_delayed_work(kblockd_workqueue, &q->delay_work, |
---|
324 | | - msecs_to_jiffies(msecs)); |
---|
325 | | -} |
---|
326 | | -EXPORT_SYMBOL(blk_delay_queue); |
---|
327 | | - |
---|
328 | | -/** |
---|
329 | | - * blk_start_queue_async - asynchronously restart a previously stopped queue |
---|
330 | | - * @q: The &struct request_queue in question |
---|
331 | | - * |
---|
332 | | - * Description: |
---|
333 | | - * blk_start_queue_async() will clear the stop flag on the queue, and |
---|
334 | | - * ensure that the request_fn for the queue is run from an async |
---|
335 | | - * context. |
---|
336 | | - **/ |
---|
337 | | -void blk_start_queue_async(struct request_queue *q) |
---|
338 | | -{ |
---|
339 | | - lockdep_assert_held(q->queue_lock); |
---|
340 | | - WARN_ON_ONCE(q->mq_ops); |
---|
341 | | - |
---|
342 | | - queue_flag_clear(QUEUE_FLAG_STOPPED, q); |
---|
343 | | - blk_run_queue_async(q); |
---|
344 | | -} |
---|
345 | | -EXPORT_SYMBOL(blk_start_queue_async); |
---|
346 | | - |
---|
347 | | -/** |
---|
348 | | - * blk_start_queue - restart a previously stopped queue |
---|
349 | | - * @q: The &struct request_queue in question |
---|
350 | | - * |
---|
351 | | - * Description: |
---|
352 | | - * blk_start_queue() will clear the stop flag on the queue, and call |
---|
353 | | - * the request_fn for the queue if it was in a stopped state when |
---|
354 | | - * entered. Also see blk_stop_queue(). |
---|
355 | | - **/ |
---|
356 | | -void blk_start_queue(struct request_queue *q) |
---|
357 | | -{ |
---|
358 | | - lockdep_assert_held(q->queue_lock); |
---|
359 | | - WARN_ON_ONCE(q->mq_ops); |
---|
360 | | - |
---|
361 | | - queue_flag_clear(QUEUE_FLAG_STOPPED, q); |
---|
362 | | - __blk_run_queue(q); |
---|
363 | | -} |
---|
364 | | -EXPORT_SYMBOL(blk_start_queue); |
---|
365 | | - |
---|
366 | | -/** |
---|
367 | | - * blk_stop_queue - stop a queue |
---|
368 | | - * @q: The &struct request_queue in question |
---|
369 | | - * |
---|
370 | | - * Description: |
---|
371 | | - * The Linux block layer assumes that a block driver will consume all |
---|
372 | | - * entries on the request queue when the request_fn strategy is called. |
---|
373 | | - * Often this will not happen, because of hardware limitations (queue |
---|
374 | | - * depth settings). If a device driver gets a 'queue full' response, |
---|
375 | | - * or if it simply chooses not to queue more I/O at one point, it can |
---|
376 | | - * call this function to prevent the request_fn from being called until |
---|
377 | | - * the driver has signalled it's ready to go again. This happens by calling |
---|
378 | | - * blk_start_queue() to restart queue operations. |
---|
379 | | - **/ |
---|
380 | | -void blk_stop_queue(struct request_queue *q) |
---|
381 | | -{ |
---|
382 | | - lockdep_assert_held(q->queue_lock); |
---|
383 | | - WARN_ON_ONCE(q->mq_ops); |
---|
384 | | - |
---|
385 | | - cancel_delayed_work(&q->delay_work); |
---|
386 | | - queue_flag_set(QUEUE_FLAG_STOPPED, q); |
---|
387 | | -} |
---|
388 | | -EXPORT_SYMBOL(blk_stop_queue); |
---|
389 | | - |
---|
390 | 286 | /** |
---|
391 | 287 | * blk_sync_queue - cancel any pending callbacks on a queue |
---|
392 | 288 | * @q: the queue |
---|
.. | .. |
---|
397 | 293 | * A block device may call blk_sync_queue to ensure that any |
---|
398 | 294 | * such activity is cancelled, thus allowing it to release resources |
---|
399 | 295 | * that the callbacks might use. The caller must already have made sure |
---|
400 | | - * that its ->make_request_fn will not re-add plugging prior to calling |
---|
| 296 | + * that its ->submit_bio will not re-add plugging prior to calling |
---|
401 | 297 | * this function. |
---|
402 | 298 | * |
---|
403 | 299 | * This function does not cancel any asynchronous activity arising |
---|
.. | .. |
---|
409 | 305 | { |
---|
410 | 306 | del_timer_sync(&q->timeout); |
---|
411 | 307 | cancel_work_sync(&q->timeout_work); |
---|
412 | | - |
---|
413 | | - if (q->mq_ops) { |
---|
414 | | - struct blk_mq_hw_ctx *hctx; |
---|
415 | | - int i; |
---|
416 | | - |
---|
417 | | - queue_for_each_hw_ctx(q, hctx, i) |
---|
418 | | - cancel_delayed_work_sync(&hctx->run_work); |
---|
419 | | - } else { |
---|
420 | | - cancel_delayed_work_sync(&q->delay_work); |
---|
421 | | - } |
---|
422 | 308 | } |
---|
423 | 309 | EXPORT_SYMBOL(blk_sync_queue); |
---|
424 | 310 | |
---|
.. | .. |
---|
444 | 330 | EXPORT_SYMBOL_GPL(blk_clear_pm_only); |
---|
445 | 331 | |
---|
446 | 332 | /** |
---|
447 | | - * __blk_run_queue_uncond - run a queue whether or not it has been stopped |
---|
448 | | - * @q: The queue to run |
---|
| 333 | + * blk_put_queue - decrement the request_queue refcount |
---|
| 334 | + * @q: the request_queue structure to decrement the refcount for |
---|
449 | 335 | * |
---|
450 | | - * Description: |
---|
451 | | - * Invoke request handling on a queue if there are any pending requests. |
---|
452 | | - * May be used to restart request handling after a request has completed. |
---|
453 | | - * This variant runs the queue whether or not the queue has been |
---|
454 | | - * stopped. Must be called with the queue lock held and interrupts |
---|
455 | | - * disabled. See also @blk_run_queue. |
---|
| 336 | + * Decrements the refcount of the request_queue kobject. When this reaches 0 |
---|
| 337 | + * we'll have blk_release_queue() called. |
---|
| 338 | + * |
---|
| 339 | + * Context: Any context, but the last reference must not be dropped from |
---|
| 340 | + * atomic context. |
---|
456 | 341 | */ |
---|
457 | | -inline void __blk_run_queue_uncond(struct request_queue *q) |
---|
458 | | -{ |
---|
459 | | - lockdep_assert_held(q->queue_lock); |
---|
460 | | - WARN_ON_ONCE(q->mq_ops); |
---|
461 | | - |
---|
462 | | - if (unlikely(blk_queue_dead(q))) |
---|
463 | | - return; |
---|
464 | | - |
---|
465 | | - /* |
---|
466 | | - * Some request_fn implementations, e.g. scsi_request_fn(), unlock |
---|
467 | | - * the queue lock internally. As a result multiple threads may be |
---|
468 | | - * running such a request function concurrently. Keep track of the |
---|
469 | | - * number of active request_fn invocations such that blk_drain_queue() |
---|
470 | | - * can wait until all these request_fn calls have finished. |
---|
471 | | - */ |
---|
472 | | - q->request_fn_active++; |
---|
473 | | - q->request_fn(q); |
---|
474 | | - q->request_fn_active--; |
---|
475 | | -} |
---|
476 | | -EXPORT_SYMBOL_GPL(__blk_run_queue_uncond); |
---|
477 | | - |
---|
478 | | -/** |
---|
479 | | - * __blk_run_queue - run a single device queue |
---|
480 | | - * @q: The queue to run |
---|
481 | | - * |
---|
482 | | - * Description: |
---|
483 | | - * See @blk_run_queue. |
---|
484 | | - */ |
---|
485 | | -void __blk_run_queue(struct request_queue *q) |
---|
486 | | -{ |
---|
487 | | - lockdep_assert_held(q->queue_lock); |
---|
488 | | - WARN_ON_ONCE(q->mq_ops); |
---|
489 | | - |
---|
490 | | - if (unlikely(blk_queue_stopped(q))) |
---|
491 | | - return; |
---|
492 | | - |
---|
493 | | - __blk_run_queue_uncond(q); |
---|
494 | | -} |
---|
495 | | -EXPORT_SYMBOL(__blk_run_queue); |
---|
496 | | - |
---|
497 | | -/** |
---|
498 | | - * blk_run_queue_async - run a single device queue in workqueue context |
---|
499 | | - * @q: The queue to run |
---|
500 | | - * |
---|
501 | | - * Description: |
---|
502 | | - * Tells kblockd to perform the equivalent of @blk_run_queue on behalf |
---|
503 | | - * of us. |
---|
504 | | - * |
---|
505 | | - * Note: |
---|
506 | | - * Since it is not allowed to run q->delay_work after blk_cleanup_queue() |
---|
507 | | - * has canceled q->delay_work, callers must hold the queue lock to avoid |
---|
508 | | - * race conditions between blk_cleanup_queue() and blk_run_queue_async(). |
---|
509 | | - */ |
---|
510 | | -void blk_run_queue_async(struct request_queue *q) |
---|
511 | | -{ |
---|
512 | | - lockdep_assert_held(q->queue_lock); |
---|
513 | | - WARN_ON_ONCE(q->mq_ops); |
---|
514 | | - |
---|
515 | | - if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q))) |
---|
516 | | - mod_delayed_work(kblockd_workqueue, &q->delay_work, 0); |
---|
517 | | -} |
---|
518 | | -EXPORT_SYMBOL(blk_run_queue_async); |
---|
519 | | - |
---|
520 | | -/** |
---|
521 | | - * blk_run_queue - run a single device queue |
---|
522 | | - * @q: The queue to run |
---|
523 | | - * |
---|
524 | | - * Description: |
---|
525 | | - * Invoke request handling on this queue, if it has pending work to do. |
---|
526 | | - * May be used to restart queueing when a request has completed. |
---|
527 | | - */ |
---|
528 | | -void blk_run_queue(struct request_queue *q) |
---|
529 | | -{ |
---|
530 | | - unsigned long flags; |
---|
531 | | - |
---|
532 | | - WARN_ON_ONCE(q->mq_ops); |
---|
533 | | - |
---|
534 | | - spin_lock_irqsave(q->queue_lock, flags); |
---|
535 | | - __blk_run_queue(q); |
---|
536 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
---|
537 | | -} |
---|
538 | | -EXPORT_SYMBOL(blk_run_queue); |
---|
539 | | - |
---|
540 | 342 | void blk_put_queue(struct request_queue *q) |
---|
541 | 343 | { |
---|
542 | 344 | kobject_put(&q->kobj); |
---|
543 | 345 | } |
---|
544 | 346 | EXPORT_SYMBOL(blk_put_queue); |
---|
545 | | - |
---|
546 | | -/** |
---|
547 | | - * __blk_drain_queue - drain requests from request_queue |
---|
548 | | - * @q: queue to drain |
---|
549 | | - * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV |
---|
550 | | - * |
---|
551 | | - * Drain requests from @q. If @drain_all is set, all requests are drained. |
---|
552 | | - * If not, only ELVPRIV requests are drained. The caller is responsible |
---|
553 | | - * for ensuring that no new requests which need to be drained are queued. |
---|
554 | | - */ |
---|
555 | | -static void __blk_drain_queue(struct request_queue *q, bool drain_all) |
---|
556 | | - __releases(q->queue_lock) |
---|
557 | | - __acquires(q->queue_lock) |
---|
558 | | -{ |
---|
559 | | - int i; |
---|
560 | | - |
---|
561 | | - lockdep_assert_held(q->queue_lock); |
---|
562 | | - WARN_ON_ONCE(q->mq_ops); |
---|
563 | | - |
---|
564 | | - while (true) { |
---|
565 | | - bool drain = false; |
---|
566 | | - |
---|
567 | | - /* |
---|
568 | | - * The caller might be trying to drain @q before its |
---|
569 | | - * elevator is initialized. |
---|
570 | | - */ |
---|
571 | | - if (q->elevator) |
---|
572 | | - elv_drain_elevator(q); |
---|
573 | | - |
---|
574 | | - blkcg_drain_queue(q); |
---|
575 | | - |
---|
576 | | - /* |
---|
577 | | - * This function might be called on a queue which failed |
---|
578 | | - * driver init after queue creation or is not yet fully |
---|
579 | | - * active yet. Some drivers (e.g. fd and loop) get unhappy |
---|
580 | | - * in such cases. Kick queue iff dispatch queue has |
---|
581 | | - * something on it and @q has request_fn set. |
---|
582 | | - */ |
---|
583 | | - if (!list_empty(&q->queue_head) && q->request_fn) |
---|
584 | | - __blk_run_queue(q); |
---|
585 | | - |
---|
586 | | - drain |= q->nr_rqs_elvpriv; |
---|
587 | | - drain |= q->request_fn_active; |
---|
588 | | - |
---|
589 | | - /* |
---|
590 | | - * Unfortunately, requests are queued at and tracked from |
---|
591 | | - * multiple places and there's no single counter which can |
---|
592 | | - * be drained. Check all the queues and counters. |
---|
593 | | - */ |
---|
594 | | - if (drain_all) { |
---|
595 | | - struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); |
---|
596 | | - drain |= !list_empty(&q->queue_head); |
---|
597 | | - for (i = 0; i < 2; i++) { |
---|
598 | | - drain |= q->nr_rqs[i]; |
---|
599 | | - drain |= q->in_flight[i]; |
---|
600 | | - if (fq) |
---|
601 | | - drain |= !list_empty(&fq->flush_queue[i]); |
---|
602 | | - } |
---|
603 | | - } |
---|
604 | | - |
---|
605 | | - if (!drain) |
---|
606 | | - break; |
---|
607 | | - |
---|
608 | | - spin_unlock_irq(q->queue_lock); |
---|
609 | | - |
---|
610 | | - msleep(10); |
---|
611 | | - |
---|
612 | | - spin_lock_irq(q->queue_lock); |
---|
613 | | - } |
---|
614 | | - |
---|
615 | | - /* |
---|
616 | | - * With queue marked dead, any woken up waiter will fail the |
---|
617 | | - * allocation path, so the wakeup chaining is lost and we're |
---|
618 | | - * left with hung waiters. We need to wake up those waiters. |
---|
619 | | - */ |
---|
620 | | - if (q->request_fn) { |
---|
621 | | - struct request_list *rl; |
---|
622 | | - |
---|
623 | | - blk_queue_for_each_rl(rl, q) |
---|
624 | | - for (i = 0; i < ARRAY_SIZE(rl->wait); i++) |
---|
625 | | - wake_up_all(&rl->wait[i]); |
---|
626 | | - } |
---|
627 | | -} |
---|
628 | | - |
---|
629 | | -void blk_drain_queue(struct request_queue *q) |
---|
630 | | -{ |
---|
631 | | - spin_lock_irq(q->queue_lock); |
---|
632 | | - __blk_drain_queue(q, true); |
---|
633 | | - spin_unlock_irq(q->queue_lock); |
---|
634 | | -} |
---|
635 | | - |
---|
636 | | -/** |
---|
637 | | - * blk_queue_bypass_start - enter queue bypass mode |
---|
638 | | - * @q: queue of interest |
---|
639 | | - * |
---|
640 | | - * In bypass mode, only the dispatch FIFO queue of @q is used. This |
---|
641 | | - * function makes @q enter bypass mode and drains all requests which were |
---|
642 | | - * throttled or issued before. On return, it's guaranteed that no request |
---|
643 | | - * is being throttled or has ELVPRIV set and blk_queue_bypass() %true |
---|
644 | | - * inside queue or RCU read lock. |
---|
645 | | - */ |
---|
646 | | -void blk_queue_bypass_start(struct request_queue *q) |
---|
647 | | -{ |
---|
648 | | - WARN_ON_ONCE(q->mq_ops); |
---|
649 | | - |
---|
650 | | - spin_lock_irq(q->queue_lock); |
---|
651 | | - q->bypass_depth++; |
---|
652 | | - queue_flag_set(QUEUE_FLAG_BYPASS, q); |
---|
653 | | - spin_unlock_irq(q->queue_lock); |
---|
654 | | - |
---|
655 | | - /* |
---|
656 | | - * Queues start drained. Skip actual draining till init is |
---|
657 | | - * complete. This avoids lenghty delays during queue init which |
---|
658 | | - * can happen many times during boot. |
---|
659 | | - */ |
---|
660 | | - if (blk_queue_init_done(q)) { |
---|
661 | | - spin_lock_irq(q->queue_lock); |
---|
662 | | - __blk_drain_queue(q, false); |
---|
663 | | - spin_unlock_irq(q->queue_lock); |
---|
664 | | - |
---|
665 | | - /* ensure blk_queue_bypass() is %true inside RCU read lock */ |
---|
666 | | - synchronize_rcu(); |
---|
667 | | - } |
---|
668 | | -} |
---|
669 | | -EXPORT_SYMBOL_GPL(blk_queue_bypass_start); |
---|
670 | | - |
---|
671 | | -/** |
---|
672 | | - * blk_queue_bypass_end - leave queue bypass mode |
---|
673 | | - * @q: queue of interest |
---|
674 | | - * |
---|
675 | | - * Leave bypass mode and restore the normal queueing behavior. |
---|
676 | | - * |
---|
677 | | - * Note: although blk_queue_bypass_start() is only called for blk-sq queues, |
---|
678 | | - * this function is called for both blk-sq and blk-mq queues. |
---|
679 | | - */ |
---|
680 | | -void blk_queue_bypass_end(struct request_queue *q) |
---|
681 | | -{ |
---|
682 | | - spin_lock_irq(q->queue_lock); |
---|
683 | | - if (!--q->bypass_depth) |
---|
684 | | - queue_flag_clear(QUEUE_FLAG_BYPASS, q); |
---|
685 | | - WARN_ON_ONCE(q->bypass_depth < 0); |
---|
686 | | - spin_unlock_irq(q->queue_lock); |
---|
687 | | -} |
---|
688 | | -EXPORT_SYMBOL_GPL(blk_queue_bypass_end); |
---|
689 | 347 | |
---|
690 | 348 | void blk_set_queue_dying(struct request_queue *q) |
---|
691 | 349 | { |
---|
.. | .. |
---|
698 | 356 | */ |
---|
699 | 357 | blk_freeze_queue_start(q); |
---|
700 | 358 | |
---|
701 | | - if (q->mq_ops) |
---|
| 359 | + if (queue_is_mq(q)) |
---|
702 | 360 | blk_mq_wake_waiters(q); |
---|
703 | | - else { |
---|
704 | | - struct request_list *rl; |
---|
705 | | - |
---|
706 | | - spin_lock_irq(q->queue_lock); |
---|
707 | | - blk_queue_for_each_rl(rl, q) { |
---|
708 | | - if (rl->rq_pool) { |
---|
709 | | - wake_up_all(&rl->wait[BLK_RW_SYNC]); |
---|
710 | | - wake_up_all(&rl->wait[BLK_RW_ASYNC]); |
---|
711 | | - } |
---|
712 | | - } |
---|
713 | | - spin_unlock_irq(q->queue_lock); |
---|
714 | | - } |
---|
715 | 361 | |
---|
716 | 362 | /* Make blk_queue_enter() reexamine the DYING flag. */ |
---|
717 | 363 | wake_up_all(&q->mq_freeze_wq); |
---|
718 | 364 | } |
---|
719 | 365 | EXPORT_SYMBOL_GPL(blk_set_queue_dying); |
---|
720 | | - |
---|
721 | | -/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */ |
---|
722 | | -void blk_exit_queue(struct request_queue *q) |
---|
723 | | -{ |
---|
724 | | - /* |
---|
725 | | - * Since the I/O scheduler exit code may access cgroup information, |
---|
726 | | - * perform I/O scheduler exit before disassociating from the block |
---|
727 | | - * cgroup controller. |
---|
728 | | - */ |
---|
729 | | - if (q->elevator) { |
---|
730 | | - ioc_clear_queue(q); |
---|
731 | | - elevator_exit(q, q->elevator); |
---|
732 | | - q->elevator = NULL; |
---|
733 | | - } |
---|
734 | | - |
---|
735 | | - /* |
---|
736 | | - * Remove all references to @q from the block cgroup controller before |
---|
737 | | - * restoring @q->queue_lock to avoid that restoring this pointer causes |
---|
738 | | - * e.g. blkcg_print_blkgs() to crash. |
---|
739 | | - */ |
---|
740 | | - blkcg_exit_queue(q); |
---|
741 | | - |
---|
742 | | - /* |
---|
743 | | - * Since the cgroup code may dereference the @q->backing_dev_info |
---|
744 | | - * pointer, only decrease its reference count after having removed the |
---|
745 | | - * association with the block cgroup controller. |
---|
746 | | - */ |
---|
747 | | - bdi_put(q->backing_dev_info); |
---|
748 | | -} |
---|
749 | 366 | |
---|
750 | 367 | /** |
---|
751 | 368 | * blk_cleanup_queue - shutdown a request queue |
---|
.. | .. |
---|
753 | 370 | * |
---|
754 | 371 | * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and |
---|
755 | 372 | * put it. All future requests will be failed immediately with -ENODEV. |
---|
| 373 | + * |
---|
| 374 | + * Context: can sleep |
---|
756 | 375 | */ |
---|
757 | 376 | void blk_cleanup_queue(struct request_queue *q) |
---|
758 | 377 | { |
---|
759 | | - spinlock_t *lock = q->queue_lock; |
---|
| 378 | + /* cannot be called from atomic context */ |
---|
| 379 | + might_sleep(); |
---|
| 380 | + |
---|
| 381 | + WARN_ON_ONCE(blk_queue_registered(q)); |
---|
760 | 382 | |
---|
761 | 383 | /* mark @q DYING, no new request or merges will be allowed afterwards */ |
---|
762 | | - mutex_lock(&q->sysfs_lock); |
---|
763 | 384 | blk_set_queue_dying(q); |
---|
764 | | - spin_lock_irq(lock); |
---|
765 | 385 | |
---|
766 | | - /* |
---|
767 | | - * A dying queue is permanently in bypass mode till released. Note |
---|
768 | | - * that, unlike blk_queue_bypass_start(), we aren't performing |
---|
769 | | - * synchronize_rcu() after entering bypass mode to avoid the delay |
---|
770 | | - * as some drivers create and destroy a lot of queues while |
---|
771 | | - * probing. This is still safe because blk_release_queue() will be |
---|
772 | | - * called only after the queue refcnt drops to zero and nothing, |
---|
773 | | - * RCU or not, would be traversing the queue by then. |
---|
774 | | - */ |
---|
775 | | - q->bypass_depth++; |
---|
776 | | - queue_flag_set(QUEUE_FLAG_BYPASS, q); |
---|
777 | | - |
---|
778 | | - queue_flag_set(QUEUE_FLAG_NOMERGES, q); |
---|
779 | | - queue_flag_set(QUEUE_FLAG_NOXMERGES, q); |
---|
780 | | - queue_flag_set(QUEUE_FLAG_DYING, q); |
---|
781 | | - spin_unlock_irq(lock); |
---|
782 | | - mutex_unlock(&q->sysfs_lock); |
---|
| 386 | + blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); |
---|
| 387 | + blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); |
---|
783 | 388 | |
---|
784 | 389 | /* |
---|
785 | 390 | * Drain all requests queued before DYING marking. Set DEAD flag to |
---|
786 | | - * prevent that q->request_fn() gets invoked after draining finished. |
---|
| 391 | + * prevent that blk_mq_run_hw_queues() accesses the hardware queues |
---|
| 392 | + * after draining finished. |
---|
787 | 393 | */ |
---|
788 | 394 | blk_freeze_queue(q); |
---|
789 | 395 | |
---|
790 | 396 | rq_qos_exit(q); |
---|
791 | 397 | |
---|
792 | | - spin_lock_irq(lock); |
---|
793 | | - queue_flag_set(QUEUE_FLAG_DEAD, q); |
---|
794 | | - spin_unlock_irq(lock); |
---|
795 | | - |
---|
796 | | - /* |
---|
797 | | - * make sure all in-progress dispatch are completed because |
---|
798 | | - * blk_freeze_queue() can only complete all requests, and |
---|
799 | | - * dispatch may still be in-progress since we dispatch requests |
---|
800 | | - * from more than one contexts. |
---|
801 | | - * |
---|
802 | | - * We rely on driver to deal with the race in case that queue |
---|
803 | | - * initialization isn't done. |
---|
804 | | - */ |
---|
805 | | - if (q->mq_ops && blk_queue_init_done(q)) |
---|
806 | | - blk_mq_quiesce_queue(q); |
---|
| 398 | + blk_queue_flag_set(QUEUE_FLAG_DEAD, q); |
---|
807 | 399 | |
---|
808 | 400 | /* for synchronous bio-based driver finish in-flight integrity i/o */ |
---|
809 | 401 | blk_flush_integrity(); |
---|
.. | .. |
---|
812 | 404 | del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer); |
---|
813 | 405 | blk_sync_queue(q); |
---|
814 | 406 | |
---|
815 | | - /* |
---|
816 | | - * I/O scheduler exit is only safe after the sysfs scheduler attribute |
---|
817 | | - * has been removed. |
---|
818 | | - */ |
---|
819 | | - WARN_ON_ONCE(q->kobj.state_in_sysfs); |
---|
820 | | - |
---|
821 | | - blk_exit_queue(q); |
---|
822 | | - |
---|
823 | | - if (q->mq_ops) |
---|
| 407 | + if (queue_is_mq(q)) |
---|
824 | 408 | blk_mq_exit_queue(q); |
---|
825 | 409 | |
---|
826 | | - percpu_ref_exit(&q->q_usage_counter); |
---|
| 410 | + /* |
---|
| 411 | + * In theory, request pool of sched_tags belongs to request queue. |
---|
| 412 | + * However, the current implementation requires tag_set for freeing |
---|
| 413 | + * requests, so free the pool now. |
---|
| 414 | + * |
---|
| 415 | + * Queue has become frozen, there can't be any in-queue requests, so |
---|
| 416 | + * it is safe to free requests now. |
---|
| 417 | + */ |
---|
| 418 | + mutex_lock(&q->sysfs_lock); |
---|
| 419 | + if (q->elevator) |
---|
| 420 | + blk_mq_sched_free_requests(q); |
---|
| 421 | + mutex_unlock(&q->sysfs_lock); |
---|
827 | 422 | |
---|
828 | | - spin_lock_irq(lock); |
---|
829 | | - if (q->queue_lock != &q->__queue_lock) |
---|
830 | | - q->queue_lock = &q->__queue_lock; |
---|
831 | | - spin_unlock_irq(lock); |
---|
| 423 | + percpu_ref_exit(&q->q_usage_counter); |
---|
832 | 424 | |
---|
833 | 425 | /* @q is and will stay empty, shutdown and put */ |
---|
834 | 426 | blk_put_queue(q); |
---|
835 | 427 | } |
---|
836 | 428 | EXPORT_SYMBOL(blk_cleanup_queue); |
---|
837 | 429 | |
---|
838 | | -/* Allocate memory local to the request queue */ |
---|
839 | | -static void *alloc_request_simple(gfp_t gfp_mask, void *data) |
---|
840 | | -{ |
---|
841 | | - struct request_queue *q = data; |
---|
842 | | - |
---|
843 | | - return kmem_cache_alloc_node(request_cachep, gfp_mask, q->node); |
---|
844 | | -} |
---|
845 | | - |
---|
846 | | -static void free_request_simple(void *element, void *data) |
---|
847 | | -{ |
---|
848 | | - kmem_cache_free(request_cachep, element); |
---|
849 | | -} |
---|
850 | | - |
---|
851 | | -static void *alloc_request_size(gfp_t gfp_mask, void *data) |
---|
852 | | -{ |
---|
853 | | - struct request_queue *q = data; |
---|
854 | | - struct request *rq; |
---|
855 | | - |
---|
856 | | - rq = kmalloc_node(sizeof(struct request) + q->cmd_size, gfp_mask, |
---|
857 | | - q->node); |
---|
858 | | - if (rq && q->init_rq_fn && q->init_rq_fn(q, rq, gfp_mask) < 0) { |
---|
859 | | - kfree(rq); |
---|
860 | | - rq = NULL; |
---|
861 | | - } |
---|
862 | | - return rq; |
---|
863 | | -} |
---|
864 | | - |
---|
865 | | -static void free_request_size(void *element, void *data) |
---|
866 | | -{ |
---|
867 | | - struct request_queue *q = data; |
---|
868 | | - |
---|
869 | | - if (q->exit_rq_fn) |
---|
870 | | - q->exit_rq_fn(q, element); |
---|
871 | | - kfree(element); |
---|
872 | | -} |
---|
873 | | - |
---|
874 | | -int blk_init_rl(struct request_list *rl, struct request_queue *q, |
---|
875 | | - gfp_t gfp_mask) |
---|
876 | | -{ |
---|
877 | | - if (unlikely(rl->rq_pool) || q->mq_ops) |
---|
878 | | - return 0; |
---|
879 | | - |
---|
880 | | - rl->q = q; |
---|
881 | | - rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; |
---|
882 | | - rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; |
---|
883 | | - init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); |
---|
884 | | - init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); |
---|
885 | | - |
---|
886 | | - if (q->cmd_size) { |
---|
887 | | - rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, |
---|
888 | | - alloc_request_size, free_request_size, |
---|
889 | | - q, gfp_mask, q->node); |
---|
890 | | - } else { |
---|
891 | | - rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, |
---|
892 | | - alloc_request_simple, free_request_simple, |
---|
893 | | - q, gfp_mask, q->node); |
---|
894 | | - } |
---|
895 | | - if (!rl->rq_pool) |
---|
896 | | - return -ENOMEM; |
---|
897 | | - |
---|
898 | | - if (rl != &q->root_rl) |
---|
899 | | - WARN_ON_ONCE(!blk_get_queue(q)); |
---|
900 | | - |
---|
901 | | - return 0; |
---|
902 | | -} |
---|
903 | | - |
---|
904 | | -void blk_exit_rl(struct request_queue *q, struct request_list *rl) |
---|
905 | | -{ |
---|
906 | | - if (rl->rq_pool) { |
---|
907 | | - mempool_destroy(rl->rq_pool); |
---|
908 | | - if (rl != &q->root_rl) |
---|
909 | | - blk_put_queue(q); |
---|
910 | | - } |
---|
911 | | -} |
---|
912 | | - |
---|
913 | | -struct request_queue *blk_alloc_queue(gfp_t gfp_mask) |
---|
914 | | -{ |
---|
915 | | - return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL); |
---|
916 | | -} |
---|
917 | | -EXPORT_SYMBOL(blk_alloc_queue); |
---|
918 | | - |
---|
919 | 430 | /** |
---|
920 | 431 | * blk_queue_enter() - try to increase q->q_usage_counter |
---|
921 | 432 | * @q: request queue pointer |
---|
922 | | - * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT |
---|
| 433 | + * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PM |
---|
923 | 434 | */ |
---|
924 | 435 | int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) |
---|
925 | 436 | { |
---|
926 | | - const bool pm = flags & BLK_MQ_REQ_PREEMPT; |
---|
| 437 | + const bool pm = flags & BLK_MQ_REQ_PM; |
---|
927 | 438 | |
---|
928 | 439 | while (true) { |
---|
929 | 440 | bool success = false; |
---|
.. | .. |
---|
959 | 470 | smp_rmb(); |
---|
960 | 471 | |
---|
961 | 472 | wait_event(q->mq_freeze_wq, |
---|
962 | | - (atomic_read(&q->mq_freeze_depth) == 0 && |
---|
963 | | - (pm || !blk_queue_pm_only(q))) || |
---|
| 473 | + (!q->mq_freeze_depth && |
---|
| 474 | + (pm || (blk_pm_request_resume(q), |
---|
| 475 | + !blk_queue_pm_only(q)))) || |
---|
964 | 476 | blk_queue_dying(q)); |
---|
965 | 477 | if (blk_queue_dying(q)) |
---|
966 | 478 | return -ENODEV; |
---|
967 | 479 | } |
---|
| 480 | +} |
---|
| 481 | + |
---|
| 482 | +static inline int bio_queue_enter(struct bio *bio) |
---|
| 483 | +{ |
---|
| 484 | + struct request_queue *q = bio->bi_disk->queue; |
---|
| 485 | + bool nowait = bio->bi_opf & REQ_NOWAIT; |
---|
| 486 | + int ret; |
---|
| 487 | + |
---|
| 488 | + ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0); |
---|
| 489 | + if (unlikely(ret)) { |
---|
| 490 | + if (nowait && !blk_queue_dying(q)) |
---|
| 491 | + bio_wouldblock_error(bio); |
---|
| 492 | + else |
---|
| 493 | + bio_io_error(bio); |
---|
| 494 | + } |
---|
| 495 | + |
---|
| 496 | + return ret; |
---|
968 | 497 | } |
---|
969 | 498 | |
---|
970 | 499 | void blk_queue_exit(struct request_queue *q) |
---|
.. | .. |
---|
987 | 516 | kblockd_schedule_work(&q->timeout_work); |
---|
988 | 517 | } |
---|
989 | 518 | |
---|
990 | | -static void blk_timeout_work_dummy(struct work_struct *work) |
---|
| 519 | +static void blk_timeout_work(struct work_struct *work) |
---|
991 | 520 | { |
---|
992 | 521 | } |
---|
993 | 522 | |
---|
994 | | -/** |
---|
995 | | - * blk_alloc_queue_node - allocate a request queue |
---|
996 | | - * @gfp_mask: memory allocation flags |
---|
997 | | - * @node_id: NUMA node to allocate memory from |
---|
998 | | - * @lock: For legacy queues, pointer to a spinlock that will be used to e.g. |
---|
999 | | - * serialize calls to the legacy .request_fn() callback. Ignored for |
---|
1000 | | - * blk-mq request queues. |
---|
1001 | | - * |
---|
1002 | | - * Note: pass the queue lock as the third argument to this function instead of |
---|
1003 | | - * setting the queue lock pointer explicitly to avoid triggering a sporadic |
---|
1004 | | - * crash in the blkcg code. This function namely calls blkcg_init_queue() and |
---|
1005 | | - * the queue lock pointer must be set before blkcg_init_queue() is called. |
---|
1006 | | - */ |
---|
1007 | | -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, |
---|
1008 | | - spinlock_t *lock) |
---|
| 523 | +struct request_queue *blk_alloc_queue(int node_id) |
---|
1009 | 524 | { |
---|
1010 | 525 | struct request_queue *q; |
---|
1011 | 526 | int ret; |
---|
1012 | 527 | |
---|
1013 | 528 | q = kmem_cache_alloc_node(blk_requestq_cachep, |
---|
1014 | | - gfp_mask | __GFP_ZERO, node_id); |
---|
| 529 | + GFP_KERNEL | __GFP_ZERO, node_id); |
---|
1015 | 530 | if (!q) |
---|
1016 | 531 | return NULL; |
---|
1017 | 532 | |
---|
1018 | | - INIT_LIST_HEAD(&q->queue_head); |
---|
1019 | 533 | q->last_merge = NULL; |
---|
1020 | | - q->end_sector = 0; |
---|
1021 | | - q->boundary_rq = NULL; |
---|
1022 | 534 | |
---|
1023 | | - q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); |
---|
| 535 | + q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL); |
---|
1024 | 536 | if (q->id < 0) |
---|
1025 | 537 | goto fail_q; |
---|
1026 | 538 | |
---|
.. | .. |
---|
1028 | 540 | if (ret) |
---|
1029 | 541 | goto fail_id; |
---|
1030 | 542 | |
---|
1031 | | - q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id); |
---|
| 543 | + q->backing_dev_info = bdi_alloc(node_id); |
---|
1032 | 544 | if (!q->backing_dev_info) |
---|
1033 | 545 | goto fail_split; |
---|
1034 | 546 | |
---|
.. | .. |
---|
1036 | 548 | if (!q->stats) |
---|
1037 | 549 | goto fail_stats; |
---|
1038 | 550 | |
---|
1039 | | - q->backing_dev_info->ra_pages = |
---|
1040 | | - (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; |
---|
1041 | | - q->backing_dev_info->io_pages = |
---|
1042 | | - (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; |
---|
1043 | | - q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; |
---|
1044 | | - q->backing_dev_info->name = "block"; |
---|
1045 | 551 | q->node = node_id; |
---|
| 552 | + |
---|
| 553 | + atomic_set(&q->nr_active_requests_shared_sbitmap, 0); |
---|
1046 | 554 | |
---|
1047 | 555 | timer_setup(&q->backing_dev_info->laptop_mode_wb_timer, |
---|
1048 | 556 | laptop_mode_timer_fn, 0); |
---|
1049 | 557 | timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); |
---|
1050 | | - INIT_WORK(&q->timeout_work, blk_timeout_work_dummy); |
---|
1051 | | - INIT_LIST_HEAD(&q->timeout_list); |
---|
| 558 | + INIT_WORK(&q->timeout_work, blk_timeout_work); |
---|
1052 | 559 | INIT_LIST_HEAD(&q->icq_list); |
---|
1053 | 560 | #ifdef CONFIG_BLK_CGROUP |
---|
1054 | 561 | INIT_LIST_HEAD(&q->blkg_list); |
---|
1055 | 562 | #endif |
---|
1056 | | - INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); |
---|
1057 | 563 | |
---|
1058 | 564 | kobject_init(&q->kobj, &blk_queue_ktype); |
---|
1059 | 565 | |
---|
1060 | | -#ifdef CONFIG_BLK_DEV_IO_TRACE |
---|
1061 | | - mutex_init(&q->blk_trace_mutex); |
---|
1062 | | -#endif |
---|
| 566 | + mutex_init(&q->debugfs_mutex); |
---|
1063 | 567 | mutex_init(&q->sysfs_lock); |
---|
1064 | | - spin_lock_init(&q->__queue_lock); |
---|
1065 | | - |
---|
1066 | | - if (!q->mq_ops) |
---|
1067 | | - q->queue_lock = lock ? : &q->__queue_lock; |
---|
1068 | | - |
---|
1069 | | - /* |
---|
1070 | | - * A queue starts its life with bypass turned on to avoid |
---|
1071 | | - * unnecessary bypass on/off overhead and nasty surprises during |
---|
1072 | | - * init. The initial bypass will be finished when the queue is |
---|
1073 | | - * registered by blk_register_queue(). |
---|
1074 | | - */ |
---|
1075 | | - q->bypass_depth = 1; |
---|
1076 | | - queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q); |
---|
| 568 | + mutex_init(&q->sysfs_dir_lock); |
---|
| 569 | + spin_lock_init(&q->queue_lock); |
---|
1077 | 570 | |
---|
1078 | 571 | init_waitqueue_head(&q->mq_freeze_wq); |
---|
| 572 | + mutex_init(&q->mq_freeze_lock); |
---|
1079 | 573 | |
---|
1080 | 574 | /* |
---|
1081 | 575 | * Init percpu_ref in atomic mode so that it's faster to shutdown. |
---|
.. | .. |
---|
1088 | 582 | |
---|
1089 | 583 | if (blkcg_init_queue(q)) |
---|
1090 | 584 | goto fail_ref; |
---|
| 585 | + |
---|
| 586 | + blk_queue_dma_alignment(q, 511); |
---|
| 587 | + blk_set_default_limits(&q->limits); |
---|
| 588 | + q->nr_requests = BLKDEV_MAX_RQ; |
---|
1091 | 589 | |
---|
1092 | 590 | return q; |
---|
1093 | 591 | |
---|
.. | .. |
---|
1105 | 603 | kmem_cache_free(blk_requestq_cachep, q); |
---|
1106 | 604 | return NULL; |
---|
1107 | 605 | } |
---|
1108 | | -EXPORT_SYMBOL(blk_alloc_queue_node); |
---|
| 606 | +EXPORT_SYMBOL(blk_alloc_queue); |
---|
1109 | 607 | |
---|
1110 | 608 | /** |
---|
1111 | | - * blk_init_queue - prepare a request queue for use with a block device |
---|
1112 | | - * @rfn: The function to be called to process requests that have been |
---|
1113 | | - * placed on the queue. |
---|
1114 | | - * @lock: Request queue spin lock |
---|
| 609 | + * blk_get_queue - increment the request_queue refcount |
---|
| 610 | + * @q: the request_queue structure to increment the refcount for |
---|
1115 | 611 | * |
---|
1116 | | - * Description: |
---|
1117 | | - * If a block device wishes to use the standard request handling procedures, |
---|
1118 | | - * which sorts requests and coalesces adjacent requests, then it must |
---|
1119 | | - * call blk_init_queue(). The function @rfn will be called when there |
---|
1120 | | - * are requests on the queue that need to be processed. If the device |
---|
1121 | | - * supports plugging, then @rfn may not be called immediately when requests |
---|
1122 | | - * are available on the queue, but may be called at some time later instead. |
---|
1123 | | - * Plugged queues are generally unplugged when a buffer belonging to one |
---|
1124 | | - * of the requests on the queue is needed, or due to memory pressure. |
---|
| 612 | + * Increment the refcount of the request_queue kobject. |
---|
1125 | 613 | * |
---|
1126 | | - * @rfn is not required, or even expected, to remove all requests off the |
---|
1127 | | - * queue, but only as many as it can handle at a time. If it does leave |
---|
1128 | | - * requests on the queue, it is responsible for arranging that the requests |
---|
1129 | | - * get dealt with eventually. |
---|
1130 | | - * |
---|
1131 | | - * The queue spin lock must be held while manipulating the requests on the |
---|
1132 | | - * request queue; this lock will be taken also from interrupt context, so irq |
---|
1133 | | - * disabling is needed for it. |
---|
1134 | | - * |
---|
1135 | | - * Function returns a pointer to the initialized request queue, or %NULL if |
---|
1136 | | - * it didn't succeed. |
---|
1137 | | - * |
---|
1138 | | - * Note: |
---|
1139 | | - * blk_init_queue() must be paired with a blk_cleanup_queue() call |
---|
1140 | | - * when the block device is deactivated (such as at module unload). |
---|
1141 | | - **/ |
---|
1142 | | - |
---|
1143 | | -struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) |
---|
1144 | | -{ |
---|
1145 | | - return blk_init_queue_node(rfn, lock, NUMA_NO_NODE); |
---|
1146 | | -} |
---|
1147 | | -EXPORT_SYMBOL(blk_init_queue); |
---|
1148 | | - |
---|
1149 | | -struct request_queue * |
---|
1150 | | -blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) |
---|
1151 | | -{ |
---|
1152 | | - struct request_queue *q; |
---|
1153 | | - |
---|
1154 | | - q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock); |
---|
1155 | | - if (!q) |
---|
1156 | | - return NULL; |
---|
1157 | | - |
---|
1158 | | - q->request_fn = rfn; |
---|
1159 | | - if (blk_init_allocated_queue(q) < 0) { |
---|
1160 | | - blk_cleanup_queue(q); |
---|
1161 | | - return NULL; |
---|
1162 | | - } |
---|
1163 | | - |
---|
1164 | | - return q; |
---|
1165 | | -} |
---|
1166 | | -EXPORT_SYMBOL(blk_init_queue_node); |
---|
1167 | | - |
---|
1168 | | -static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio); |
---|
1169 | | - |
---|
1170 | | - |
---|
1171 | | -int blk_init_allocated_queue(struct request_queue *q) |
---|
1172 | | -{ |
---|
1173 | | - WARN_ON_ONCE(q->mq_ops); |
---|
1174 | | - |
---|
1175 | | - q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size, GFP_KERNEL); |
---|
1176 | | - if (!q->fq) |
---|
1177 | | - return -ENOMEM; |
---|
1178 | | - |
---|
1179 | | - if (q->init_rq_fn && q->init_rq_fn(q, q->fq->flush_rq, GFP_KERNEL)) |
---|
1180 | | - goto out_free_flush_queue; |
---|
1181 | | - |
---|
1182 | | - if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) |
---|
1183 | | - goto out_exit_flush_rq; |
---|
1184 | | - |
---|
1185 | | - INIT_WORK(&q->timeout_work, blk_timeout_work); |
---|
1186 | | - q->queue_flags |= QUEUE_FLAG_DEFAULT; |
---|
1187 | | - |
---|
1188 | | - /* |
---|
1189 | | - * This also sets hw/phys segments, boundary and size |
---|
1190 | | - */ |
---|
1191 | | - blk_queue_make_request(q, blk_queue_bio); |
---|
1192 | | - |
---|
1193 | | - q->sg_reserved_size = INT_MAX; |
---|
1194 | | - |
---|
1195 | | - if (elevator_init(q)) |
---|
1196 | | - goto out_exit_flush_rq; |
---|
1197 | | - return 0; |
---|
1198 | | - |
---|
1199 | | -out_exit_flush_rq: |
---|
1200 | | - if (q->exit_rq_fn) |
---|
1201 | | - q->exit_rq_fn(q, q->fq->flush_rq); |
---|
1202 | | -out_free_flush_queue: |
---|
1203 | | - blk_free_flush_queue(q->fq); |
---|
1204 | | - q->fq = NULL; |
---|
1205 | | - return -ENOMEM; |
---|
1206 | | -} |
---|
1207 | | -EXPORT_SYMBOL(blk_init_allocated_queue); |
---|
1208 | | - |
---|
| 614 | + * Context: Any context. |
---|
| 615 | + */ |
---|
1209 | 616 | bool blk_get_queue(struct request_queue *q) |
---|
1210 | 617 | { |
---|
1211 | 618 | if (likely(!blk_queue_dying(q))) { |
---|
.. | .. |
---|
1216 | 623 | return false; |
---|
1217 | 624 | } |
---|
1218 | 625 | EXPORT_SYMBOL(blk_get_queue); |
---|
1219 | | - |
---|
1220 | | -static inline void blk_free_request(struct request_list *rl, struct request *rq) |
---|
1221 | | -{ |
---|
1222 | | - if (rq->rq_flags & RQF_ELVPRIV) { |
---|
1223 | | - elv_put_request(rl->q, rq); |
---|
1224 | | - if (rq->elv.icq) |
---|
1225 | | - put_io_context(rq->elv.icq->ioc); |
---|
1226 | | - } |
---|
1227 | | - |
---|
1228 | | - mempool_free(rq, rl->rq_pool); |
---|
1229 | | -} |
---|
1230 | | - |
---|
1231 | | -/* |
---|
1232 | | - * ioc_batching returns true if the ioc is a valid batching request and |
---|
1233 | | - * should be given priority access to a request. |
---|
1234 | | - */ |
---|
1235 | | -static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) |
---|
1236 | | -{ |
---|
1237 | | - if (!ioc) |
---|
1238 | | - return 0; |
---|
1239 | | - |
---|
1240 | | - /* |
---|
1241 | | - * Make sure the process is able to allocate at least 1 request |
---|
1242 | | - * even if the batch times out, otherwise we could theoretically |
---|
1243 | | - * lose wakeups. |
---|
1244 | | - */ |
---|
1245 | | - return ioc->nr_batch_requests == q->nr_batching || |
---|
1246 | | - (ioc->nr_batch_requests > 0 |
---|
1247 | | - && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); |
---|
1248 | | -} |
---|
1249 | | - |
---|
1250 | | -/* |
---|
1251 | | - * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This |
---|
1252 | | - * will cause the process to be a "batcher" on all queues in the system. This |
---|
1253 | | - * is the behaviour we want though - once it gets a wakeup it should be given |
---|
1254 | | - * a nice run. |
---|
1255 | | - */ |
---|
1256 | | -static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) |
---|
1257 | | -{ |
---|
1258 | | - if (!ioc || ioc_batching(q, ioc)) |
---|
1259 | | - return; |
---|
1260 | | - |
---|
1261 | | - ioc->nr_batch_requests = q->nr_batching; |
---|
1262 | | - ioc->last_waited = jiffies; |
---|
1263 | | -} |
---|
1264 | | - |
---|
1265 | | -static void __freed_request(struct request_list *rl, int sync) |
---|
1266 | | -{ |
---|
1267 | | - struct request_queue *q = rl->q; |
---|
1268 | | - |
---|
1269 | | - if (rl->count[sync] < queue_congestion_off_threshold(q)) |
---|
1270 | | - blk_clear_congested(rl, sync); |
---|
1271 | | - |
---|
1272 | | - if (rl->count[sync] + 1 <= q->nr_requests) { |
---|
1273 | | - if (waitqueue_active(&rl->wait[sync])) |
---|
1274 | | - wake_up(&rl->wait[sync]); |
---|
1275 | | - |
---|
1276 | | - blk_clear_rl_full(rl, sync); |
---|
1277 | | - } |
---|
1278 | | -} |
---|
1279 | | - |
---|
1280 | | -/* |
---|
1281 | | - * A request has just been released. Account for it, update the full and |
---|
1282 | | - * congestion status, wake up any waiters. Called under q->queue_lock. |
---|
1283 | | - */ |
---|
1284 | | -static void freed_request(struct request_list *rl, bool sync, |
---|
1285 | | - req_flags_t rq_flags) |
---|
1286 | | -{ |
---|
1287 | | - struct request_queue *q = rl->q; |
---|
1288 | | - |
---|
1289 | | - q->nr_rqs[sync]--; |
---|
1290 | | - rl->count[sync]--; |
---|
1291 | | - if (rq_flags & RQF_ELVPRIV) |
---|
1292 | | - q->nr_rqs_elvpriv--; |
---|
1293 | | - |
---|
1294 | | - __freed_request(rl, sync); |
---|
1295 | | - |
---|
1296 | | - if (unlikely(rl->starved[sync ^ 1])) |
---|
1297 | | - __freed_request(rl, sync ^ 1); |
---|
1298 | | -} |
---|
1299 | | - |
---|
1300 | | -int blk_update_nr_requests(struct request_queue *q, unsigned int nr) |
---|
1301 | | -{ |
---|
1302 | | - struct request_list *rl; |
---|
1303 | | - int on_thresh, off_thresh; |
---|
1304 | | - |
---|
1305 | | - WARN_ON_ONCE(q->mq_ops); |
---|
1306 | | - |
---|
1307 | | - spin_lock_irq(q->queue_lock); |
---|
1308 | | - q->nr_requests = nr; |
---|
1309 | | - blk_queue_congestion_threshold(q); |
---|
1310 | | - on_thresh = queue_congestion_on_threshold(q); |
---|
1311 | | - off_thresh = queue_congestion_off_threshold(q); |
---|
1312 | | - |
---|
1313 | | - blk_queue_for_each_rl(rl, q) { |
---|
1314 | | - if (rl->count[BLK_RW_SYNC] >= on_thresh) |
---|
1315 | | - blk_set_congested(rl, BLK_RW_SYNC); |
---|
1316 | | - else if (rl->count[BLK_RW_SYNC] < off_thresh) |
---|
1317 | | - blk_clear_congested(rl, BLK_RW_SYNC); |
---|
1318 | | - |
---|
1319 | | - if (rl->count[BLK_RW_ASYNC] >= on_thresh) |
---|
1320 | | - blk_set_congested(rl, BLK_RW_ASYNC); |
---|
1321 | | - else if (rl->count[BLK_RW_ASYNC] < off_thresh) |
---|
1322 | | - blk_clear_congested(rl, BLK_RW_ASYNC); |
---|
1323 | | - |
---|
1324 | | - if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { |
---|
1325 | | - blk_set_rl_full(rl, BLK_RW_SYNC); |
---|
1326 | | - } else { |
---|
1327 | | - blk_clear_rl_full(rl, BLK_RW_SYNC); |
---|
1328 | | - wake_up(&rl->wait[BLK_RW_SYNC]); |
---|
1329 | | - } |
---|
1330 | | - |
---|
1331 | | - if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { |
---|
1332 | | - blk_set_rl_full(rl, BLK_RW_ASYNC); |
---|
1333 | | - } else { |
---|
1334 | | - blk_clear_rl_full(rl, BLK_RW_ASYNC); |
---|
1335 | | - wake_up(&rl->wait[BLK_RW_ASYNC]); |
---|
1336 | | - } |
---|
1337 | | - } |
---|
1338 | | - |
---|
1339 | | - spin_unlock_irq(q->queue_lock); |
---|
1340 | | - return 0; |
---|
1341 | | -} |
---|
1342 | | - |
---|
1343 | | -/** |
---|
1344 | | - * __get_request - get a free request |
---|
1345 | | - * @rl: request list to allocate from |
---|
1346 | | - * @op: operation and flags |
---|
1347 | | - * @bio: bio to allocate request for (can be %NULL) |
---|
1348 | | - * @flags: BLQ_MQ_REQ_* flags |
---|
1349 | | - * @gfp_mask: allocator flags |
---|
1350 | | - * |
---|
1351 | | - * Get a free request from @q. This function may fail under memory |
---|
1352 | | - * pressure or if @q is dead. |
---|
1353 | | - * |
---|
1354 | | - * Must be called with @q->queue_lock held and, |
---|
1355 | | - * Returns ERR_PTR on failure, with @q->queue_lock held. |
---|
1356 | | - * Returns request pointer on success, with @q->queue_lock *not held*. |
---|
1357 | | - */ |
---|
1358 | | -static struct request *__get_request(struct request_list *rl, unsigned int op, |
---|
1359 | | - struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp_mask) |
---|
1360 | | -{ |
---|
1361 | | - struct request_queue *q = rl->q; |
---|
1362 | | - struct request *rq; |
---|
1363 | | - struct elevator_type *et = q->elevator->type; |
---|
1364 | | - struct io_context *ioc = rq_ioc(bio); |
---|
1365 | | - struct io_cq *icq = NULL; |
---|
1366 | | - const bool is_sync = op_is_sync(op); |
---|
1367 | | - int may_queue; |
---|
1368 | | - req_flags_t rq_flags = RQF_ALLOCED; |
---|
1369 | | - |
---|
1370 | | - lockdep_assert_held(q->queue_lock); |
---|
1371 | | - |
---|
1372 | | - if (unlikely(blk_queue_dying(q))) |
---|
1373 | | - return ERR_PTR(-ENODEV); |
---|
1374 | | - |
---|
1375 | | - may_queue = elv_may_queue(q, op); |
---|
1376 | | - if (may_queue == ELV_MQUEUE_NO) |
---|
1377 | | - goto rq_starved; |
---|
1378 | | - |
---|
1379 | | - if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { |
---|
1380 | | - if (rl->count[is_sync]+1 >= q->nr_requests) { |
---|
1381 | | - /* |
---|
1382 | | - * The queue will fill after this allocation, so set |
---|
1383 | | - * it as full, and mark this process as "batching". |
---|
1384 | | - * This process will be allowed to complete a batch of |
---|
1385 | | - * requests, others will be blocked. |
---|
1386 | | - */ |
---|
1387 | | - if (!blk_rl_full(rl, is_sync)) { |
---|
1388 | | - ioc_set_batching(q, ioc); |
---|
1389 | | - blk_set_rl_full(rl, is_sync); |
---|
1390 | | - } else { |
---|
1391 | | - if (may_queue != ELV_MQUEUE_MUST |
---|
1392 | | - && !ioc_batching(q, ioc)) { |
---|
1393 | | - /* |
---|
1394 | | - * The queue is full and the allocating |
---|
1395 | | - * process is not a "batcher", and not |
---|
1396 | | - * exempted by the IO scheduler |
---|
1397 | | - */ |
---|
1398 | | - return ERR_PTR(-ENOMEM); |
---|
1399 | | - } |
---|
1400 | | - } |
---|
1401 | | - } |
---|
1402 | | - blk_set_congested(rl, is_sync); |
---|
1403 | | - } |
---|
1404 | | - |
---|
1405 | | - /* |
---|
1406 | | - * Only allow batching queuers to allocate up to 50% over the defined |
---|
1407 | | - * limit of requests, otherwise we could have thousands of requests |
---|
1408 | | - * allocated with any setting of ->nr_requests |
---|
1409 | | - */ |
---|
1410 | | - if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) |
---|
1411 | | - return ERR_PTR(-ENOMEM); |
---|
1412 | | - |
---|
1413 | | - q->nr_rqs[is_sync]++; |
---|
1414 | | - rl->count[is_sync]++; |
---|
1415 | | - rl->starved[is_sync] = 0; |
---|
1416 | | - |
---|
1417 | | - /* |
---|
1418 | | - * Decide whether the new request will be managed by elevator. If |
---|
1419 | | - * so, mark @rq_flags and increment elvpriv. Non-zero elvpriv will |
---|
1420 | | - * prevent the current elevator from being destroyed until the new |
---|
1421 | | - * request is freed. This guarantees icq's won't be destroyed and |
---|
1422 | | - * makes creating new ones safe. |
---|
1423 | | - * |
---|
1424 | | - * Flush requests do not use the elevator so skip initialization. |
---|
1425 | | - * This allows a request to share the flush and elevator data. |
---|
1426 | | - * |
---|
1427 | | - * Also, lookup icq while holding queue_lock. If it doesn't exist, |
---|
1428 | | - * it will be created after releasing queue_lock. |
---|
1429 | | - */ |
---|
1430 | | - if (!op_is_flush(op) && !blk_queue_bypass(q)) { |
---|
1431 | | - rq_flags |= RQF_ELVPRIV; |
---|
1432 | | - q->nr_rqs_elvpriv++; |
---|
1433 | | - if (et->icq_cache && ioc) |
---|
1434 | | - icq = ioc_lookup_icq(ioc, q); |
---|
1435 | | - } |
---|
1436 | | - |
---|
1437 | | - if (blk_queue_io_stat(q)) |
---|
1438 | | - rq_flags |= RQF_IO_STAT; |
---|
1439 | | - spin_unlock_irq(q->queue_lock); |
---|
1440 | | - |
---|
1441 | | - /* allocate and init request */ |
---|
1442 | | - rq = mempool_alloc(rl->rq_pool, gfp_mask); |
---|
1443 | | - if (!rq) |
---|
1444 | | - goto fail_alloc; |
---|
1445 | | - |
---|
1446 | | - blk_rq_init(q, rq); |
---|
1447 | | - blk_rq_set_rl(rq, rl); |
---|
1448 | | - rq->cmd_flags = op; |
---|
1449 | | - rq->rq_flags = rq_flags; |
---|
1450 | | - if (flags & BLK_MQ_REQ_PREEMPT) |
---|
1451 | | - rq->rq_flags |= RQF_PREEMPT; |
---|
1452 | | - |
---|
1453 | | - /* init elvpriv */ |
---|
1454 | | - if (rq_flags & RQF_ELVPRIV) { |
---|
1455 | | - if (unlikely(et->icq_cache && !icq)) { |
---|
1456 | | - if (ioc) |
---|
1457 | | - icq = ioc_create_icq(ioc, q, gfp_mask); |
---|
1458 | | - if (!icq) |
---|
1459 | | - goto fail_elvpriv; |
---|
1460 | | - } |
---|
1461 | | - |
---|
1462 | | - rq->elv.icq = icq; |
---|
1463 | | - if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) |
---|
1464 | | - goto fail_elvpriv; |
---|
1465 | | - |
---|
1466 | | - /* @rq->elv.icq holds io_context until @rq is freed */ |
---|
1467 | | - if (icq) |
---|
1468 | | - get_io_context(icq->ioc); |
---|
1469 | | - } |
---|
1470 | | -out: |
---|
1471 | | - /* |
---|
1472 | | - * ioc may be NULL here, and ioc_batching will be false. That's |
---|
1473 | | - * OK, if the queue is under the request limit then requests need |
---|
1474 | | - * not count toward the nr_batch_requests limit. There will always |
---|
1475 | | - * be some limit enforced by BLK_BATCH_TIME. |
---|
1476 | | - */ |
---|
1477 | | - if (ioc_batching(q, ioc)) |
---|
1478 | | - ioc->nr_batch_requests--; |
---|
1479 | | - |
---|
1480 | | - trace_block_getrq(q, bio, op); |
---|
1481 | | - return rq; |
---|
1482 | | - |
---|
1483 | | -fail_elvpriv: |
---|
1484 | | - /* |
---|
1485 | | - * elvpriv init failed. ioc, icq and elvpriv aren't mempool backed |
---|
1486 | | - * and may fail indefinitely under memory pressure and thus |
---|
1487 | | - * shouldn't stall IO. Treat this request as !elvpriv. This will |
---|
1488 | | - * disturb iosched and blkcg but weird is bettern than dead. |
---|
1489 | | - */ |
---|
1490 | | - printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n", |
---|
1491 | | - __func__, dev_name(q->backing_dev_info->dev)); |
---|
1492 | | - |
---|
1493 | | - rq->rq_flags &= ~RQF_ELVPRIV; |
---|
1494 | | - rq->elv.icq = NULL; |
---|
1495 | | - |
---|
1496 | | - spin_lock_irq(q->queue_lock); |
---|
1497 | | - q->nr_rqs_elvpriv--; |
---|
1498 | | - spin_unlock_irq(q->queue_lock); |
---|
1499 | | - goto out; |
---|
1500 | | - |
---|
1501 | | -fail_alloc: |
---|
1502 | | - /* |
---|
1503 | | - * Allocation failed presumably due to memory. Undo anything we |
---|
1504 | | - * might have messed up. |
---|
1505 | | - * |
---|
1506 | | - * Allocating task should really be put onto the front of the wait |
---|
1507 | | - * queue, but this is pretty rare. |
---|
1508 | | - */ |
---|
1509 | | - spin_lock_irq(q->queue_lock); |
---|
1510 | | - freed_request(rl, is_sync, rq_flags); |
---|
1511 | | - |
---|
1512 | | - /* |
---|
1513 | | - * in the very unlikely event that allocation failed and no |
---|
1514 | | - * requests for this direction was pending, mark us starved so that |
---|
1515 | | - * freeing of a request in the other direction will notice |
---|
1516 | | - * us. another possible fix would be to split the rq mempool into |
---|
1517 | | - * READ and WRITE |
---|
1518 | | - */ |
---|
1519 | | -rq_starved: |
---|
1520 | | - if (unlikely(rl->count[is_sync] == 0)) |
---|
1521 | | - rl->starved[is_sync] = 1; |
---|
1522 | | - return ERR_PTR(-ENOMEM); |
---|
1523 | | -} |
---|
1524 | | - |
---|
1525 | | -/** |
---|
1526 | | - * get_request - get a free request |
---|
1527 | | - * @q: request_queue to allocate request from |
---|
1528 | | - * @op: operation and flags |
---|
1529 | | - * @bio: bio to allocate request for (can be %NULL) |
---|
1530 | | - * @flags: BLK_MQ_REQ_* flags. |
---|
1531 | | - * @gfp: allocator flags |
---|
1532 | | - * |
---|
1533 | | - * Get a free request from @q. If %BLK_MQ_REQ_NOWAIT is set in @flags, |
---|
1534 | | - * this function keeps retrying under memory pressure and fails iff @q is dead. |
---|
1535 | | - * |
---|
1536 | | - * Must be called with @q->queue_lock held and, |
---|
1537 | | - * Returns ERR_PTR on failure, with @q->queue_lock held. |
---|
1538 | | - * Returns request pointer on success, with @q->queue_lock *not held*. |
---|
1539 | | - */ |
---|
1540 | | -static struct request *get_request(struct request_queue *q, unsigned int op, |
---|
1541 | | - struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp) |
---|
1542 | | -{ |
---|
1543 | | - const bool is_sync = op_is_sync(op); |
---|
1544 | | - DEFINE_WAIT(wait); |
---|
1545 | | - struct request_list *rl; |
---|
1546 | | - struct request *rq; |
---|
1547 | | - |
---|
1548 | | - lockdep_assert_held(q->queue_lock); |
---|
1549 | | - WARN_ON_ONCE(q->mq_ops); |
---|
1550 | | - |
---|
1551 | | - rl = blk_get_rl(q, bio); /* transferred to @rq on success */ |
---|
1552 | | -retry: |
---|
1553 | | - rq = __get_request(rl, op, bio, flags, gfp); |
---|
1554 | | - if (!IS_ERR(rq)) |
---|
1555 | | - return rq; |
---|
1556 | | - |
---|
1557 | | - if (op & REQ_NOWAIT) { |
---|
1558 | | - blk_put_rl(rl); |
---|
1559 | | - return ERR_PTR(-EAGAIN); |
---|
1560 | | - } |
---|
1561 | | - |
---|
1562 | | - if ((flags & BLK_MQ_REQ_NOWAIT) || unlikely(blk_queue_dying(q))) { |
---|
1563 | | - blk_put_rl(rl); |
---|
1564 | | - return rq; |
---|
1565 | | - } |
---|
1566 | | - |
---|
1567 | | - /* wait on @rl and retry */ |
---|
1568 | | - prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, |
---|
1569 | | - TASK_UNINTERRUPTIBLE); |
---|
1570 | | - |
---|
1571 | | - trace_block_sleeprq(q, bio, op); |
---|
1572 | | - |
---|
1573 | | - spin_unlock_irq(q->queue_lock); |
---|
1574 | | - io_schedule(); |
---|
1575 | | - |
---|
1576 | | - /* |
---|
1577 | | - * After sleeping, we become a "batching" process and will be able |
---|
1578 | | - * to allocate at least one request, and up to a big batch of them |
---|
1579 | | - * for a small period time. See ioc_batching, ioc_set_batching |
---|
1580 | | - */ |
---|
1581 | | - ioc_set_batching(q, current->io_context); |
---|
1582 | | - |
---|
1583 | | - spin_lock_irq(q->queue_lock); |
---|
1584 | | - finish_wait(&rl->wait[is_sync], &wait); |
---|
1585 | | - |
---|
1586 | | - goto retry; |
---|
1587 | | -} |
---|
1588 | | - |
---|
1589 | | -/* flags: BLK_MQ_REQ_PREEMPT and/or BLK_MQ_REQ_NOWAIT. */ |
---|
1590 | | -static struct request *blk_old_get_request(struct request_queue *q, |
---|
1591 | | - unsigned int op, blk_mq_req_flags_t flags) |
---|
1592 | | -{ |
---|
1593 | | - struct request *rq; |
---|
1594 | | - gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : GFP_NOIO; |
---|
1595 | | - int ret = 0; |
---|
1596 | | - |
---|
1597 | | - WARN_ON_ONCE(q->mq_ops); |
---|
1598 | | - |
---|
1599 | | - /* create ioc upfront */ |
---|
1600 | | - create_io_context(gfp_mask, q->node); |
---|
1601 | | - |
---|
1602 | | - ret = blk_queue_enter(q, flags); |
---|
1603 | | - if (ret) |
---|
1604 | | - return ERR_PTR(ret); |
---|
1605 | | - spin_lock_irq(q->queue_lock); |
---|
1606 | | - rq = get_request(q, op, NULL, flags, gfp_mask); |
---|
1607 | | - if (IS_ERR(rq)) { |
---|
1608 | | - spin_unlock_irq(q->queue_lock); |
---|
1609 | | - blk_queue_exit(q); |
---|
1610 | | - return rq; |
---|
1611 | | - } |
---|
1612 | | - |
---|
1613 | | - /* q->queue_lock is unlocked at this point */ |
---|
1614 | | - rq->__data_len = 0; |
---|
1615 | | - rq->__sector = (sector_t) -1; |
---|
1616 | | - rq->bio = rq->biotail = NULL; |
---|
1617 | | - return rq; |
---|
1618 | | -} |
---|
1619 | 626 | |
---|
1620 | 627 | /** |
---|
1621 | 628 | * blk_get_request - allocate a request |
---|
.. | .. |
---|
1629 | 636 | struct request *req; |
---|
1630 | 637 | |
---|
1631 | 638 | WARN_ON_ONCE(op & REQ_NOWAIT); |
---|
1632 | | - WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT)); |
---|
| 639 | + WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM)); |
---|
1633 | 640 | |
---|
1634 | | - if (q->mq_ops) { |
---|
1635 | | - req = blk_mq_alloc_request(q, op, flags); |
---|
1636 | | - if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) |
---|
1637 | | - q->mq_ops->initialize_rq_fn(req); |
---|
1638 | | - } else { |
---|
1639 | | - req = blk_old_get_request(q, op, flags); |
---|
1640 | | - if (!IS_ERR(req) && q->initialize_rq_fn) |
---|
1641 | | - q->initialize_rq_fn(req); |
---|
1642 | | - } |
---|
| 641 | + req = blk_mq_alloc_request(q, op, flags); |
---|
| 642 | + if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) |
---|
| 643 | + q->mq_ops->initialize_rq_fn(req); |
---|
1643 | 644 | |
---|
1644 | 645 | return req; |
---|
1645 | 646 | } |
---|
1646 | 647 | EXPORT_SYMBOL(blk_get_request); |
---|
1647 | 648 | |
---|
1648 | | -/** |
---|
1649 | | - * blk_requeue_request - put a request back on queue |
---|
1650 | | - * @q: request queue where request should be inserted |
---|
1651 | | - * @rq: request to be inserted |
---|
1652 | | - * |
---|
1653 | | - * Description: |
---|
1654 | | - * Drivers often keep queueing requests until the hardware cannot accept |
---|
1655 | | - * more, when that condition happens we need to put the request back |
---|
1656 | | - * on the queue. Must be called with queue lock held. |
---|
1657 | | - */ |
---|
1658 | | -void blk_requeue_request(struct request_queue *q, struct request *rq) |
---|
1659 | | -{ |
---|
1660 | | - lockdep_assert_held(q->queue_lock); |
---|
1661 | | - WARN_ON_ONCE(q->mq_ops); |
---|
1662 | | - |
---|
1663 | | - blk_delete_timer(rq); |
---|
1664 | | - blk_clear_rq_complete(rq); |
---|
1665 | | - trace_block_rq_requeue(q, rq); |
---|
1666 | | - rq_qos_requeue(q, rq); |
---|
1667 | | - |
---|
1668 | | - if (rq->rq_flags & RQF_QUEUED) |
---|
1669 | | - blk_queue_end_tag(q, rq); |
---|
1670 | | - |
---|
1671 | | - BUG_ON(blk_queued_rq(rq)); |
---|
1672 | | - |
---|
1673 | | - elv_requeue_request(q, rq); |
---|
1674 | | -} |
---|
1675 | | -EXPORT_SYMBOL(blk_requeue_request); |
---|
1676 | | - |
---|
1677 | | -static void add_acct_request(struct request_queue *q, struct request *rq, |
---|
1678 | | - int where) |
---|
1679 | | -{ |
---|
1680 | | - blk_account_io_start(rq, true); |
---|
1681 | | - __elv_add_request(q, rq, where); |
---|
1682 | | -} |
---|
1683 | | - |
---|
1684 | | -static void part_round_stats_single(struct request_queue *q, int cpu, |
---|
1685 | | - struct hd_struct *part, unsigned long now, |
---|
1686 | | - unsigned int inflight) |
---|
1687 | | -{ |
---|
1688 | | - if (inflight) { |
---|
1689 | | - __part_stat_add(cpu, part, time_in_queue, |
---|
1690 | | - inflight * (now - part->stamp)); |
---|
1691 | | - __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); |
---|
1692 | | - } |
---|
1693 | | - part->stamp = now; |
---|
1694 | | -} |
---|
1695 | | - |
---|
1696 | | -/** |
---|
1697 | | - * part_round_stats() - Round off the performance stats on a struct disk_stats. |
---|
1698 | | - * @q: target block queue |
---|
1699 | | - * @cpu: cpu number for stats access |
---|
1700 | | - * @part: target partition |
---|
1701 | | - * |
---|
1702 | | - * The average IO queue length and utilisation statistics are maintained |
---|
1703 | | - * by observing the current state of the queue length and the amount of |
---|
1704 | | - * time it has been in this state for. |
---|
1705 | | - * |
---|
1706 | | - * Normally, that accounting is done on IO completion, but that can result |
---|
1707 | | - * in more than a second's worth of IO being accounted for within any one |
---|
1708 | | - * second, leading to >100% utilisation. To deal with that, we call this |
---|
1709 | | - * function to do a round-off before returning the results when reading |
---|
1710 | | - * /proc/diskstats. This accounts immediately for all queue usage up to |
---|
1711 | | - * the current jiffies and restarts the counters again. |
---|
1712 | | - */ |
---|
1713 | | -void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part) |
---|
1714 | | -{ |
---|
1715 | | - struct hd_struct *part2 = NULL; |
---|
1716 | | - unsigned long now = jiffies; |
---|
1717 | | - unsigned int inflight[2]; |
---|
1718 | | - int stats = 0; |
---|
1719 | | - |
---|
1720 | | - if (part->stamp != now) |
---|
1721 | | - stats |= 1; |
---|
1722 | | - |
---|
1723 | | - if (part->partno) { |
---|
1724 | | - part2 = &part_to_disk(part)->part0; |
---|
1725 | | - if (part2->stamp != now) |
---|
1726 | | - stats |= 2; |
---|
1727 | | - } |
---|
1728 | | - |
---|
1729 | | - if (!stats) |
---|
1730 | | - return; |
---|
1731 | | - |
---|
1732 | | - part_in_flight(q, part, inflight); |
---|
1733 | | - |
---|
1734 | | - if (stats & 2) |
---|
1735 | | - part_round_stats_single(q, cpu, part2, now, inflight[1]); |
---|
1736 | | - if (stats & 1) |
---|
1737 | | - part_round_stats_single(q, cpu, part, now, inflight[0]); |
---|
1738 | | -} |
---|
1739 | | -EXPORT_SYMBOL_GPL(part_round_stats); |
---|
1740 | | - |
---|
1741 | | -#ifdef CONFIG_PM |
---|
1742 | | -static void blk_pm_put_request(struct request *rq) |
---|
1743 | | -{ |
---|
1744 | | - if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending) |
---|
1745 | | - pm_runtime_mark_last_busy(rq->q->dev); |
---|
1746 | | -} |
---|
1747 | | -#else |
---|
1748 | | -static inline void blk_pm_put_request(struct request *rq) {} |
---|
1749 | | -#endif |
---|
1750 | | - |
---|
1751 | | -void __blk_put_request(struct request_queue *q, struct request *req) |
---|
1752 | | -{ |
---|
1753 | | - req_flags_t rq_flags = req->rq_flags; |
---|
1754 | | - |
---|
1755 | | - if (unlikely(!q)) |
---|
1756 | | - return; |
---|
1757 | | - |
---|
1758 | | - if (q->mq_ops) { |
---|
1759 | | - blk_mq_free_request(req); |
---|
1760 | | - return; |
---|
1761 | | - } |
---|
1762 | | - |
---|
1763 | | - lockdep_assert_held(q->queue_lock); |
---|
1764 | | - |
---|
1765 | | - blk_req_zone_write_unlock(req); |
---|
1766 | | - blk_pm_put_request(req); |
---|
1767 | | - |
---|
1768 | | - elv_completed_request(q, req); |
---|
1769 | | - |
---|
1770 | | - /* this is a bio leak */ |
---|
1771 | | - WARN_ON(req->bio != NULL); |
---|
1772 | | - |
---|
1773 | | - rq_qos_done(q, req); |
---|
1774 | | - |
---|
1775 | | - /* |
---|
1776 | | - * Request may not have originated from ll_rw_blk. if not, |
---|
1777 | | - * it didn't come out of our reserved rq pools |
---|
1778 | | - */ |
---|
1779 | | - if (rq_flags & RQF_ALLOCED) { |
---|
1780 | | - struct request_list *rl = blk_rq_rl(req); |
---|
1781 | | - bool sync = op_is_sync(req->cmd_flags); |
---|
1782 | | - |
---|
1783 | | - BUG_ON(!list_empty(&req->queuelist)); |
---|
1784 | | - BUG_ON(ELV_ON_HASH(req)); |
---|
1785 | | - |
---|
1786 | | - blk_free_request(rl, req); |
---|
1787 | | - freed_request(rl, sync, rq_flags); |
---|
1788 | | - blk_put_rl(rl); |
---|
1789 | | - blk_queue_exit(q); |
---|
1790 | | - } |
---|
1791 | | -} |
---|
1792 | | -EXPORT_SYMBOL_GPL(__blk_put_request); |
---|
1793 | | - |
---|
1794 | 649 | void blk_put_request(struct request *req) |
---|
1795 | 650 | { |
---|
1796 | | - struct request_queue *q = req->q; |
---|
1797 | | - |
---|
1798 | | - if (q->mq_ops) |
---|
1799 | | - blk_mq_free_request(req); |
---|
1800 | | - else { |
---|
1801 | | - unsigned long flags; |
---|
1802 | | - |
---|
1803 | | - spin_lock_irqsave(q->queue_lock, flags); |
---|
1804 | | - __blk_put_request(q, req); |
---|
1805 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
---|
1806 | | - } |
---|
| 651 | + blk_mq_free_request(req); |
---|
1807 | 652 | } |
---|
1808 | 653 | EXPORT_SYMBOL(blk_put_request); |
---|
1809 | | - |
---|
1810 | | -bool bio_attempt_back_merge(struct request_queue *q, struct request *req, |
---|
1811 | | - struct bio *bio) |
---|
1812 | | -{ |
---|
1813 | | - const int ff = bio->bi_opf & REQ_FAILFAST_MASK; |
---|
1814 | | - |
---|
1815 | | - if (!ll_back_merge_fn(q, req, bio)) |
---|
1816 | | - return false; |
---|
1817 | | - |
---|
1818 | | - trace_block_bio_backmerge(q, req, bio); |
---|
1819 | | - |
---|
1820 | | - if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) |
---|
1821 | | - blk_rq_set_mixed_merge(req); |
---|
1822 | | - |
---|
1823 | | - req->biotail->bi_next = bio; |
---|
1824 | | - req->biotail = bio; |
---|
1825 | | - req->__data_len += bio->bi_iter.bi_size; |
---|
1826 | | - req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
---|
1827 | | - |
---|
1828 | | - blk_account_io_start(req, false); |
---|
1829 | | - return true; |
---|
1830 | | -} |
---|
1831 | | - |
---|
1832 | | -bool bio_attempt_front_merge(struct request_queue *q, struct request *req, |
---|
1833 | | - struct bio *bio) |
---|
1834 | | -{ |
---|
1835 | | - const int ff = bio->bi_opf & REQ_FAILFAST_MASK; |
---|
1836 | | - |
---|
1837 | | - if (!ll_front_merge_fn(q, req, bio)) |
---|
1838 | | - return false; |
---|
1839 | | - |
---|
1840 | | - trace_block_bio_frontmerge(q, req, bio); |
---|
1841 | | - |
---|
1842 | | - if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) |
---|
1843 | | - blk_rq_set_mixed_merge(req); |
---|
1844 | | - |
---|
1845 | | - bio->bi_next = req->bio; |
---|
1846 | | - req->bio = bio; |
---|
1847 | | - |
---|
1848 | | - req->__sector = bio->bi_iter.bi_sector; |
---|
1849 | | - req->__data_len += bio->bi_iter.bi_size; |
---|
1850 | | - req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
---|
1851 | | - |
---|
1852 | | - blk_account_io_start(req, false); |
---|
1853 | | - return true; |
---|
1854 | | -} |
---|
1855 | | - |
---|
1856 | | -bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, |
---|
1857 | | - struct bio *bio) |
---|
1858 | | -{ |
---|
1859 | | - unsigned short segments = blk_rq_nr_discard_segments(req); |
---|
1860 | | - |
---|
1861 | | - if (segments >= queue_max_discard_segments(q)) |
---|
1862 | | - goto no_merge; |
---|
1863 | | - if (blk_rq_sectors(req) + bio_sectors(bio) > |
---|
1864 | | - blk_rq_get_max_sectors(req, blk_rq_pos(req))) |
---|
1865 | | - goto no_merge; |
---|
1866 | | - |
---|
1867 | | - req->biotail->bi_next = bio; |
---|
1868 | | - req->biotail = bio; |
---|
1869 | | - req->__data_len += bio->bi_iter.bi_size; |
---|
1870 | | - req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
---|
1871 | | - req->nr_phys_segments = segments + 1; |
---|
1872 | | - |
---|
1873 | | - blk_account_io_start(req, false); |
---|
1874 | | - return true; |
---|
1875 | | -no_merge: |
---|
1876 | | - req_set_nomerge(q, req); |
---|
1877 | | - return false; |
---|
1878 | | -} |
---|
1879 | | - |
---|
1880 | | -/** |
---|
1881 | | - * blk_attempt_plug_merge - try to merge with %current's plugged list |
---|
1882 | | - * @q: request_queue new bio is being queued at |
---|
1883 | | - * @bio: new bio being queued |
---|
1884 | | - * @request_count: out parameter for number of traversed plugged requests |
---|
1885 | | - * @same_queue_rq: pointer to &struct request that gets filled in when |
---|
1886 | | - * another request associated with @q is found on the plug list |
---|
1887 | | - * (optional, may be %NULL) |
---|
1888 | | - * |
---|
1889 | | - * Determine whether @bio being queued on @q can be merged with a request |
---|
1890 | | - * on %current's plugged list. Returns %true if merge was successful, |
---|
1891 | | - * otherwise %false. |
---|
1892 | | - * |
---|
1893 | | - * Plugging coalesces IOs from the same issuer for the same purpose without |
---|
1894 | | - * going through @q->queue_lock. As such it's more of an issuing mechanism |
---|
1895 | | - * than scheduling, and the request, while may have elvpriv data, is not |
---|
1896 | | - * added on the elevator at this point. In addition, we don't have |
---|
1897 | | - * reliable access to the elevator outside queue lock. Only check basic |
---|
1898 | | - * merging parameters without querying the elevator. |
---|
1899 | | - * |
---|
1900 | | - * Caller must ensure !blk_queue_nomerges(q) beforehand. |
---|
1901 | | - */ |
---|
1902 | | -bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, |
---|
1903 | | - unsigned int *request_count, |
---|
1904 | | - struct request **same_queue_rq) |
---|
1905 | | -{ |
---|
1906 | | - struct blk_plug *plug; |
---|
1907 | | - struct request *rq; |
---|
1908 | | - struct list_head *plug_list; |
---|
1909 | | - |
---|
1910 | | - plug = current->plug; |
---|
1911 | | - if (!plug) |
---|
1912 | | - return false; |
---|
1913 | | - *request_count = 0; |
---|
1914 | | - |
---|
1915 | | - if (q->mq_ops) |
---|
1916 | | - plug_list = &plug->mq_list; |
---|
1917 | | - else |
---|
1918 | | - plug_list = &plug->list; |
---|
1919 | | - |
---|
1920 | | - list_for_each_entry_reverse(rq, plug_list, queuelist) { |
---|
1921 | | - bool merged = false; |
---|
1922 | | - |
---|
1923 | | - if (rq->q == q) { |
---|
1924 | | - (*request_count)++; |
---|
1925 | | - /* |
---|
1926 | | - * Only blk-mq multiple hardware queues case checks the |
---|
1927 | | - * rq in the same queue, there should be only one such |
---|
1928 | | - * rq in a queue |
---|
1929 | | - **/ |
---|
1930 | | - if (same_queue_rq) |
---|
1931 | | - *same_queue_rq = rq; |
---|
1932 | | - } |
---|
1933 | | - |
---|
1934 | | - if (rq->q != q || !blk_rq_merge_ok(rq, bio)) |
---|
1935 | | - continue; |
---|
1936 | | - |
---|
1937 | | - switch (blk_try_merge(rq, bio)) { |
---|
1938 | | - case ELEVATOR_BACK_MERGE: |
---|
1939 | | - merged = bio_attempt_back_merge(q, rq, bio); |
---|
1940 | | - break; |
---|
1941 | | - case ELEVATOR_FRONT_MERGE: |
---|
1942 | | - merged = bio_attempt_front_merge(q, rq, bio); |
---|
1943 | | - break; |
---|
1944 | | - case ELEVATOR_DISCARD_MERGE: |
---|
1945 | | - merged = bio_attempt_discard_merge(q, rq, bio); |
---|
1946 | | - break; |
---|
1947 | | - default: |
---|
1948 | | - break; |
---|
1949 | | - } |
---|
1950 | | - |
---|
1951 | | - if (merged) |
---|
1952 | | - return true; |
---|
1953 | | - } |
---|
1954 | | - |
---|
1955 | | - return false; |
---|
1956 | | -} |
---|
1957 | | - |
---|
1958 | | -unsigned int blk_plug_queued_count(struct request_queue *q) |
---|
1959 | | -{ |
---|
1960 | | - struct blk_plug *plug; |
---|
1961 | | - struct request *rq; |
---|
1962 | | - struct list_head *plug_list; |
---|
1963 | | - unsigned int ret = 0; |
---|
1964 | | - |
---|
1965 | | - plug = current->plug; |
---|
1966 | | - if (!plug) |
---|
1967 | | - goto out; |
---|
1968 | | - |
---|
1969 | | - if (q->mq_ops) |
---|
1970 | | - plug_list = &plug->mq_list; |
---|
1971 | | - else |
---|
1972 | | - plug_list = &plug->list; |
---|
1973 | | - |
---|
1974 | | - list_for_each_entry(rq, plug_list, queuelist) { |
---|
1975 | | - if (rq->q == q) |
---|
1976 | | - ret++; |
---|
1977 | | - } |
---|
1978 | | -out: |
---|
1979 | | - return ret; |
---|
1980 | | -} |
---|
1981 | | - |
---|
1982 | | -void blk_init_request_from_bio(struct request *req, struct bio *bio) |
---|
1983 | | -{ |
---|
1984 | | - struct io_context *ioc = rq_ioc(bio); |
---|
1985 | | - |
---|
1986 | | - if (bio->bi_opf & REQ_RAHEAD) |
---|
1987 | | - req->cmd_flags |= REQ_FAILFAST_MASK; |
---|
1988 | | - |
---|
1989 | | - req->__sector = bio->bi_iter.bi_sector; |
---|
1990 | | - if (ioprio_valid(bio_prio(bio))) |
---|
1991 | | - req->ioprio = bio_prio(bio); |
---|
1992 | | - else if (ioc) |
---|
1993 | | - req->ioprio = ioc->ioprio; |
---|
1994 | | - else |
---|
1995 | | - req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); |
---|
1996 | | - req->write_hint = bio->bi_write_hint; |
---|
1997 | | - blk_rq_bio_prep(req->q, req, bio); |
---|
1998 | | -} |
---|
1999 | | -EXPORT_SYMBOL_GPL(blk_init_request_from_bio); |
---|
2000 | | - |
---|
2001 | | -static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) |
---|
2002 | | -{ |
---|
2003 | | - struct blk_plug *plug; |
---|
2004 | | - int where = ELEVATOR_INSERT_SORT; |
---|
2005 | | - struct request *req, *free; |
---|
2006 | | - unsigned int request_count = 0; |
---|
2007 | | - |
---|
2008 | | - /* |
---|
2009 | | - * low level driver can indicate that it wants pages above a |
---|
2010 | | - * certain limit bounced to low memory (ie for highmem, or even |
---|
2011 | | - * ISA dma in theory) |
---|
2012 | | - */ |
---|
2013 | | - blk_queue_bounce(q, &bio); |
---|
2014 | | - |
---|
2015 | | - blk_queue_split(q, &bio); |
---|
2016 | | - |
---|
2017 | | - if (!bio_integrity_prep(bio)) |
---|
2018 | | - return BLK_QC_T_NONE; |
---|
2019 | | - |
---|
2020 | | - if (op_is_flush(bio->bi_opf)) { |
---|
2021 | | - spin_lock_irq(q->queue_lock); |
---|
2022 | | - where = ELEVATOR_INSERT_FLUSH; |
---|
2023 | | - goto get_rq; |
---|
2024 | | - } |
---|
2025 | | - |
---|
2026 | | - /* |
---|
2027 | | - * Check if we can merge with the plugged list before grabbing |
---|
2028 | | - * any locks. |
---|
2029 | | - */ |
---|
2030 | | - if (!blk_queue_nomerges(q)) { |
---|
2031 | | - if (blk_attempt_plug_merge(q, bio, &request_count, NULL)) |
---|
2032 | | - return BLK_QC_T_NONE; |
---|
2033 | | - } else |
---|
2034 | | - request_count = blk_plug_queued_count(q); |
---|
2035 | | - |
---|
2036 | | - spin_lock_irq(q->queue_lock); |
---|
2037 | | - |
---|
2038 | | - switch (elv_merge(q, &req, bio)) { |
---|
2039 | | - case ELEVATOR_BACK_MERGE: |
---|
2040 | | - if (!bio_attempt_back_merge(q, req, bio)) |
---|
2041 | | - break; |
---|
2042 | | - elv_bio_merged(q, req, bio); |
---|
2043 | | - free = attempt_back_merge(q, req); |
---|
2044 | | - if (free) |
---|
2045 | | - __blk_put_request(q, free); |
---|
2046 | | - else |
---|
2047 | | - elv_merged_request(q, req, ELEVATOR_BACK_MERGE); |
---|
2048 | | - goto out_unlock; |
---|
2049 | | - case ELEVATOR_FRONT_MERGE: |
---|
2050 | | - if (!bio_attempt_front_merge(q, req, bio)) |
---|
2051 | | - break; |
---|
2052 | | - elv_bio_merged(q, req, bio); |
---|
2053 | | - free = attempt_front_merge(q, req); |
---|
2054 | | - if (free) |
---|
2055 | | - __blk_put_request(q, free); |
---|
2056 | | - else |
---|
2057 | | - elv_merged_request(q, req, ELEVATOR_FRONT_MERGE); |
---|
2058 | | - goto out_unlock; |
---|
2059 | | - default: |
---|
2060 | | - break; |
---|
2061 | | - } |
---|
2062 | | - |
---|
2063 | | -get_rq: |
---|
2064 | | - rq_qos_throttle(q, bio, q->queue_lock); |
---|
2065 | | - |
---|
2066 | | - /* |
---|
2067 | | - * Grab a free request. This is might sleep but can not fail. |
---|
2068 | | - * Returns with the queue unlocked. |
---|
2069 | | - */ |
---|
2070 | | - blk_queue_enter_live(q); |
---|
2071 | | - req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO); |
---|
2072 | | - if (IS_ERR(req)) { |
---|
2073 | | - blk_queue_exit(q); |
---|
2074 | | - rq_qos_cleanup(q, bio); |
---|
2075 | | - if (PTR_ERR(req) == -ENOMEM) |
---|
2076 | | - bio->bi_status = BLK_STS_RESOURCE; |
---|
2077 | | - else |
---|
2078 | | - bio->bi_status = BLK_STS_IOERR; |
---|
2079 | | - bio_endio(bio); |
---|
2080 | | - goto out_unlock; |
---|
2081 | | - } |
---|
2082 | | - |
---|
2083 | | - rq_qos_track(q, req, bio); |
---|
2084 | | - |
---|
2085 | | - /* |
---|
2086 | | - * After dropping the lock and possibly sleeping here, our request |
---|
2087 | | - * may now be mergeable after it had proven unmergeable (above). |
---|
2088 | | - * We don't worry about that case for efficiency. It won't happen |
---|
2089 | | - * often, and the elevators are able to handle it. |
---|
2090 | | - */ |
---|
2091 | | - blk_init_request_from_bio(req, bio); |
---|
2092 | | - |
---|
2093 | | - if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) |
---|
2094 | | - req->cpu = raw_smp_processor_id(); |
---|
2095 | | - |
---|
2096 | | - plug = current->plug; |
---|
2097 | | - if (plug) { |
---|
2098 | | - /* |
---|
2099 | | - * If this is the first request added after a plug, fire |
---|
2100 | | - * of a plug trace. |
---|
2101 | | - * |
---|
2102 | | - * @request_count may become stale because of schedule |
---|
2103 | | - * out, so check plug list again. |
---|
2104 | | - */ |
---|
2105 | | - if (!request_count || list_empty(&plug->list)) |
---|
2106 | | - trace_block_plug(q); |
---|
2107 | | - else { |
---|
2108 | | - struct request *last = list_entry_rq(plug->list.prev); |
---|
2109 | | - if (request_count >= BLK_MAX_REQUEST_COUNT || |
---|
2110 | | - blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE) { |
---|
2111 | | - blk_flush_plug_list(plug, false); |
---|
2112 | | - trace_block_plug(q); |
---|
2113 | | - } |
---|
2114 | | - } |
---|
2115 | | - list_add_tail(&req->queuelist, &plug->list); |
---|
2116 | | - blk_account_io_start(req, true); |
---|
2117 | | - } else { |
---|
2118 | | - spin_lock_irq(q->queue_lock); |
---|
2119 | | - add_acct_request(q, req, where); |
---|
2120 | | - __blk_run_queue(q); |
---|
2121 | | -out_unlock: |
---|
2122 | | - spin_unlock_irq(q->queue_lock); |
---|
2123 | | - } |
---|
2124 | | - |
---|
2125 | | - return BLK_QC_T_NONE; |
---|
2126 | | -} |
---|
2127 | 654 | |
---|
2128 | 655 | static void handle_bad_sector(struct bio *bio, sector_t maxsector) |
---|
2129 | 656 | { |
---|
2130 | 657 | char b[BDEVNAME_SIZE]; |
---|
2131 | 658 | |
---|
2132 | | - printk(KERN_INFO "attempt to access beyond end of device\n"); |
---|
2133 | | - printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n", |
---|
2134 | | - bio_devname(bio, b), bio->bi_opf, |
---|
2135 | | - (unsigned long long)bio_end_sector(bio), |
---|
2136 | | - (long long)maxsector); |
---|
| 659 | + pr_info_ratelimited("attempt to access beyond end of device\n" |
---|
| 660 | + "%s: rw=%d, want=%llu, limit=%llu\n", |
---|
| 661 | + bio_devname(bio, b), bio->bi_opf, |
---|
| 662 | + bio_end_sector(bio), maxsector); |
---|
2137 | 663 | } |
---|
2138 | 664 | |
---|
2139 | 665 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
---|
.. | .. |
---|
2182 | 708 | return false; |
---|
2183 | 709 | |
---|
2184 | 710 | WARN_ONCE(1, |
---|
2185 | | - "generic_make_request: Trying to write " |
---|
2186 | | - "to read-only block-device %s (partno %d)\n", |
---|
| 711 | + "Trying to write to read-only block-device %s (partno %d)\n", |
---|
2187 | 712 | bio_devname(bio, b), part->partno); |
---|
2188 | 713 | /* Older lvm-tools actually trigger this */ |
---|
2189 | 714 | return false; |
---|
.. | .. |
---|
2235 | 760 | if (unlikely(bio_check_ro(bio, p))) |
---|
2236 | 761 | goto out; |
---|
2237 | 762 | |
---|
2238 | | - /* |
---|
2239 | | - * Zone reset does not include bi_size so bio_sectors() is always 0. |
---|
2240 | | - * Include a test for the reset op code and perform the remap if needed. |
---|
2241 | | - */ |
---|
2242 | | - if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) { |
---|
| 763 | + if (bio_sectors(bio)) { |
---|
2243 | 764 | if (bio_check_eod(bio, part_nr_sects_read(p))) |
---|
2244 | 765 | goto out; |
---|
2245 | 766 | bio->bi_iter.bi_sector += p->start_sect; |
---|
.. | .. |
---|
2253 | 774 | return ret; |
---|
2254 | 775 | } |
---|
2255 | 776 | |
---|
2256 | | -static noinline_for_stack bool |
---|
2257 | | -generic_make_request_checks(struct bio *bio) |
---|
| 777 | +/* |
---|
| 778 | + * Check write append to a zoned block device. |
---|
| 779 | + */ |
---|
| 780 | +static inline blk_status_t blk_check_zone_append(struct request_queue *q, |
---|
| 781 | + struct bio *bio) |
---|
2258 | 782 | { |
---|
2259 | | - struct request_queue *q; |
---|
| 783 | + sector_t pos = bio->bi_iter.bi_sector; |
---|
2260 | 784 | int nr_sectors = bio_sectors(bio); |
---|
| 785 | + |
---|
| 786 | + /* Only applicable to zoned block devices */ |
---|
| 787 | + if (!blk_queue_is_zoned(q)) |
---|
| 788 | + return BLK_STS_NOTSUPP; |
---|
| 789 | + |
---|
| 790 | + /* The bio sector must point to the start of a sequential zone */ |
---|
| 791 | + if (pos & (blk_queue_zone_sectors(q) - 1) || |
---|
| 792 | + !blk_queue_zone_is_seq(q, pos)) |
---|
| 793 | + return BLK_STS_IOERR; |
---|
| 794 | + |
---|
| 795 | + /* |
---|
| 796 | + * Not allowed to cross zone boundaries. Otherwise, the BIO will be |
---|
| 797 | + * split and could result in non-contiguous sectors being written in |
---|
| 798 | + * different zones. |
---|
| 799 | + */ |
---|
| 800 | + if (nr_sectors > q->limits.chunk_sectors) |
---|
| 801 | + return BLK_STS_IOERR; |
---|
| 802 | + |
---|
| 803 | + /* Make sure the BIO is small enough and will not get split */ |
---|
| 804 | + if (nr_sectors > q->limits.max_zone_append_sectors) |
---|
| 805 | + return BLK_STS_IOERR; |
---|
| 806 | + |
---|
| 807 | + bio->bi_opf |= REQ_NOMERGE; |
---|
| 808 | + |
---|
| 809 | + return BLK_STS_OK; |
---|
| 810 | +} |
---|
| 811 | + |
---|
| 812 | +static noinline_for_stack bool submit_bio_checks(struct bio *bio) |
---|
| 813 | +{ |
---|
| 814 | + struct request_queue *q = bio->bi_disk->queue; |
---|
2261 | 815 | blk_status_t status = BLK_STS_IOERR; |
---|
2262 | | - char b[BDEVNAME_SIZE]; |
---|
| 816 | + struct blk_plug *plug; |
---|
2263 | 817 | |
---|
2264 | 818 | might_sleep(); |
---|
2265 | 819 | |
---|
2266 | | - q = bio->bi_disk->queue; |
---|
2267 | | - if (unlikely(!q)) { |
---|
2268 | | - printk(KERN_ERR |
---|
2269 | | - "generic_make_request: Trying to access " |
---|
2270 | | - "nonexistent block-device %s (%Lu)\n", |
---|
2271 | | - bio_devname(bio, b), (long long)bio->bi_iter.bi_sector); |
---|
2272 | | - goto end_io; |
---|
2273 | | - } |
---|
| 820 | + plug = blk_mq_plug(q, bio); |
---|
| 821 | + if (plug && plug->nowait) |
---|
| 822 | + bio->bi_opf |= REQ_NOWAIT; |
---|
2274 | 823 | |
---|
2275 | 824 | /* |
---|
2276 | 825 | * For a REQ_NOWAIT based request, return -EOPNOTSUPP |
---|
2277 | | - * if queue is not a request based queue. |
---|
| 826 | + * if queue does not support NOWAIT. |
---|
2278 | 827 | */ |
---|
2279 | | - if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q)) |
---|
| 828 | + if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q)) |
---|
2280 | 829 | goto not_supported; |
---|
2281 | 830 | |
---|
2282 | 831 | if (should_fail_bio(bio)) |
---|
.. | .. |
---|
2293 | 842 | } |
---|
2294 | 843 | |
---|
2295 | 844 | /* |
---|
2296 | | - * Filter flush bio's early so that make_request based |
---|
2297 | | - * drivers without flush support don't have to worry |
---|
2298 | | - * about them. |
---|
| 845 | + * Filter flush bio's early so that bio based drivers without flush |
---|
| 846 | + * support don't have to worry about them. |
---|
2299 | 847 | */ |
---|
2300 | 848 | if (op_is_flush(bio->bi_opf) && |
---|
2301 | 849 | !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { |
---|
2302 | 850 | bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); |
---|
2303 | | - if (!nr_sectors) { |
---|
| 851 | + if (!bio_sectors(bio)) { |
---|
2304 | 852 | status = BLK_STS_OK; |
---|
2305 | 853 | goto end_io; |
---|
2306 | 854 | } |
---|
2307 | 855 | } |
---|
| 856 | + |
---|
| 857 | + if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) |
---|
| 858 | + bio->bi_opf &= ~REQ_HIPRI; |
---|
2308 | 859 | |
---|
2309 | 860 | switch (bio_op(bio)) { |
---|
2310 | 861 | case REQ_OP_DISCARD: |
---|
.. | .. |
---|
2319 | 870 | if (!q->limits.max_write_same_sectors) |
---|
2320 | 871 | goto not_supported; |
---|
2321 | 872 | break; |
---|
2322 | | - case REQ_OP_ZONE_REPORT: |
---|
| 873 | + case REQ_OP_ZONE_APPEND: |
---|
| 874 | + status = blk_check_zone_append(q, bio); |
---|
| 875 | + if (status != BLK_STS_OK) |
---|
| 876 | + goto end_io; |
---|
| 877 | + break; |
---|
2323 | 878 | case REQ_OP_ZONE_RESET: |
---|
| 879 | + case REQ_OP_ZONE_OPEN: |
---|
| 880 | + case REQ_OP_ZONE_CLOSE: |
---|
| 881 | + case REQ_OP_ZONE_FINISH: |
---|
2324 | 882 | if (!blk_queue_is_zoned(q)) |
---|
| 883 | + goto not_supported; |
---|
| 884 | + break; |
---|
| 885 | + case REQ_OP_ZONE_RESET_ALL: |
---|
| 886 | + if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q)) |
---|
2325 | 887 | goto not_supported; |
---|
2326 | 888 | break; |
---|
2327 | 889 | case REQ_OP_WRITE_ZEROES: |
---|
.. | .. |
---|
2333 | 895 | } |
---|
2334 | 896 | |
---|
2335 | 897 | /* |
---|
2336 | | - * Various block parts want %current->io_context and lazy ioc |
---|
2337 | | - * allocation ends up trading a lot of pain for a small amount of |
---|
2338 | | - * memory. Just allocate it upfront. This may fail and block |
---|
2339 | | - * layer knows how to live with it. |
---|
| 898 | + * Various block parts want %current->io_context, so allocate it up |
---|
| 899 | + * front rather than dealing with lots of pain to allocate it only |
---|
| 900 | + * where needed. This may fail and the block layer knows how to live |
---|
| 901 | + * with it. |
---|
2340 | 902 | */ |
---|
2341 | | - create_io_context(GFP_ATOMIC, q->node); |
---|
| 903 | + if (unlikely(!current->io_context)) |
---|
| 904 | + create_task_io_context(current, GFP_ATOMIC, q->node); |
---|
2342 | 905 | |
---|
2343 | | - if (!blkcg_bio_issue_check(q, bio)) |
---|
| 906 | + if (blk_throtl_bio(bio)) |
---|
2344 | 907 | return false; |
---|
| 908 | + |
---|
| 909 | + blk_cgroup_bio_start(bio); |
---|
| 910 | + blkcg_bio_issue_init(bio); |
---|
2345 | 911 | |
---|
2346 | 912 | if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { |
---|
2347 | 913 | trace_block_bio_queue(q, bio); |
---|
.. | .. |
---|
2360 | 926 | return false; |
---|
2361 | 927 | } |
---|
2362 | 928 | |
---|
2363 | | -/** |
---|
2364 | | - * generic_make_request - hand a buffer to its device driver for I/O |
---|
2365 | | - * @bio: The bio describing the location in memory and on the device. |
---|
2366 | | - * |
---|
2367 | | - * generic_make_request() is used to make I/O requests of block |
---|
2368 | | - * devices. It is passed a &struct bio, which describes the I/O that needs |
---|
2369 | | - * to be done. |
---|
2370 | | - * |
---|
2371 | | - * generic_make_request() does not return any status. The |
---|
2372 | | - * success/failure status of the request, along with notification of |
---|
2373 | | - * completion, is delivered asynchronously through the bio->bi_end_io |
---|
2374 | | - * function described (one day) else where. |
---|
2375 | | - * |
---|
2376 | | - * The caller of generic_make_request must make sure that bi_io_vec |
---|
2377 | | - * are set to describe the memory buffer, and that bi_dev and bi_sector are |
---|
2378 | | - * set to describe the device address, and the |
---|
2379 | | - * bi_end_io and optionally bi_private are set to describe how |
---|
2380 | | - * completion notification should be signaled. |
---|
2381 | | - * |
---|
2382 | | - * generic_make_request and the drivers it calls may use bi_next if this |
---|
2383 | | - * bio happens to be merged with someone else, and may resubmit the bio to |
---|
2384 | | - * a lower device by calling into generic_make_request recursively, which |
---|
2385 | | - * means the bio should NOT be touched after the call to ->make_request_fn. |
---|
2386 | | - */ |
---|
2387 | | -blk_qc_t generic_make_request(struct bio *bio) |
---|
| 929 | +static blk_qc_t __submit_bio(struct bio *bio) |
---|
2388 | 930 | { |
---|
2389 | | - /* |
---|
2390 | | - * bio_list_on_stack[0] contains bios submitted by the current |
---|
2391 | | - * make_request_fn. |
---|
2392 | | - * bio_list_on_stack[1] contains bios that were submitted before |
---|
2393 | | - * the current make_request_fn, but that haven't been processed |
---|
2394 | | - * yet. |
---|
2395 | | - */ |
---|
2396 | | - struct bio_list bio_list_on_stack[2]; |
---|
2397 | | - blk_mq_req_flags_t flags = 0; |
---|
2398 | | - struct request_queue *q = bio->bi_disk->queue; |
---|
| 931 | + struct gendisk *disk = bio->bi_disk; |
---|
2399 | 932 | blk_qc_t ret = BLK_QC_T_NONE; |
---|
2400 | 933 | |
---|
2401 | | - if (bio->bi_opf & REQ_NOWAIT) |
---|
2402 | | - flags = BLK_MQ_REQ_NOWAIT; |
---|
2403 | | - if (bio_flagged(bio, BIO_QUEUE_ENTERED)) |
---|
2404 | | - blk_queue_enter_live(q); |
---|
2405 | | - else if (blk_queue_enter(q, flags) < 0) { |
---|
2406 | | - if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT)) |
---|
2407 | | - bio_wouldblock_error(bio); |
---|
2408 | | - else |
---|
2409 | | - bio_io_error(bio); |
---|
2410 | | - return ret; |
---|
| 934 | + if (blk_crypto_bio_prep(&bio)) { |
---|
| 935 | + if (!disk->fops->submit_bio) |
---|
| 936 | + return blk_mq_submit_bio(bio); |
---|
| 937 | + ret = disk->fops->submit_bio(bio); |
---|
2411 | 938 | } |
---|
| 939 | + blk_queue_exit(disk->queue); |
---|
| 940 | + return ret; |
---|
| 941 | +} |
---|
2412 | 942 | |
---|
2413 | | - if (!generic_make_request_checks(bio)) |
---|
2414 | | - goto out; |
---|
| 943 | +/* |
---|
| 944 | + * The loop in this function may be a bit non-obvious, and so deserves some |
---|
| 945 | + * explanation: |
---|
| 946 | + * |
---|
| 947 | + * - Before entering the loop, bio->bi_next is NULL (as all callers ensure |
---|
| 948 | + * that), so we have a list with a single bio. |
---|
| 949 | + * - We pretend that we have just taken it off a longer list, so we assign |
---|
| 950 | + * bio_list to a pointer to the bio_list_on_stack, thus initialising the |
---|
| 951 | + * bio_list of new bios to be added. ->submit_bio() may indeed add some more |
---|
| 952 | + * bios through a recursive call to submit_bio_noacct. If it did, we find a |
---|
| 953 | + * non-NULL value in bio_list and re-enter the loop from the top. |
---|
| 954 | + * - In this case we really did just take the bio of the top of the list (no |
---|
| 955 | + * pretending) and so remove it from bio_list, and call into ->submit_bio() |
---|
| 956 | + * again. |
---|
| 957 | + * |
---|
| 958 | + * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio. |
---|
| 959 | + * bio_list_on_stack[1] contains bios that were submitted before the current |
---|
| 960 | + * ->submit_bio_bio, but that haven't been processed yet. |
---|
| 961 | + */ |
---|
| 962 | +static blk_qc_t __submit_bio_noacct(struct bio *bio) |
---|
| 963 | +{ |
---|
| 964 | + struct bio_list bio_list_on_stack[2]; |
---|
| 965 | + blk_qc_t ret = BLK_QC_T_NONE; |
---|
| 966 | + |
---|
| 967 | + BUG_ON(bio->bi_next); |
---|
| 968 | + |
---|
| 969 | + bio_list_init(&bio_list_on_stack[0]); |
---|
| 970 | + current->bio_list = bio_list_on_stack; |
---|
| 971 | + |
---|
| 972 | + do { |
---|
| 973 | + struct request_queue *q = bio->bi_disk->queue; |
---|
| 974 | + struct bio_list lower, same; |
---|
| 975 | + |
---|
| 976 | + if (unlikely(bio_queue_enter(bio) != 0)) |
---|
| 977 | + continue; |
---|
| 978 | + |
---|
| 979 | + /* |
---|
| 980 | + * Create a fresh bio_list for all subordinate requests. |
---|
| 981 | + */ |
---|
| 982 | + bio_list_on_stack[1] = bio_list_on_stack[0]; |
---|
| 983 | + bio_list_init(&bio_list_on_stack[0]); |
---|
| 984 | + |
---|
| 985 | + ret = __submit_bio(bio); |
---|
| 986 | + |
---|
| 987 | + /* |
---|
| 988 | + * Sort new bios into those for a lower level and those for the |
---|
| 989 | + * same level. |
---|
| 990 | + */ |
---|
| 991 | + bio_list_init(&lower); |
---|
| 992 | + bio_list_init(&same); |
---|
| 993 | + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) |
---|
| 994 | + if (q == bio->bi_disk->queue) |
---|
| 995 | + bio_list_add(&same, bio); |
---|
| 996 | + else |
---|
| 997 | + bio_list_add(&lower, bio); |
---|
| 998 | + |
---|
| 999 | + /* |
---|
| 1000 | + * Now assemble so we handle the lowest level first. |
---|
| 1001 | + */ |
---|
| 1002 | + bio_list_merge(&bio_list_on_stack[0], &lower); |
---|
| 1003 | + bio_list_merge(&bio_list_on_stack[0], &same); |
---|
| 1004 | + bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); |
---|
| 1005 | + } while ((bio = bio_list_pop(&bio_list_on_stack[0]))); |
---|
| 1006 | + |
---|
| 1007 | + current->bio_list = NULL; |
---|
| 1008 | + return ret; |
---|
| 1009 | +} |
---|
| 1010 | + |
---|
| 1011 | +static blk_qc_t __submit_bio_noacct_mq(struct bio *bio) |
---|
| 1012 | +{ |
---|
| 1013 | + struct bio_list bio_list[2] = { }; |
---|
| 1014 | + blk_qc_t ret = BLK_QC_T_NONE; |
---|
| 1015 | + |
---|
| 1016 | + current->bio_list = bio_list; |
---|
| 1017 | + |
---|
| 1018 | + do { |
---|
| 1019 | + struct gendisk *disk = bio->bi_disk; |
---|
| 1020 | + |
---|
| 1021 | + if (unlikely(bio_queue_enter(bio) != 0)) |
---|
| 1022 | + continue; |
---|
| 1023 | + |
---|
| 1024 | + if (!blk_crypto_bio_prep(&bio)) { |
---|
| 1025 | + blk_queue_exit(disk->queue); |
---|
| 1026 | + ret = BLK_QC_T_NONE; |
---|
| 1027 | + continue; |
---|
| 1028 | + } |
---|
| 1029 | + |
---|
| 1030 | + ret = blk_mq_submit_bio(bio); |
---|
| 1031 | + } while ((bio = bio_list_pop(&bio_list[0]))); |
---|
| 1032 | + |
---|
| 1033 | + current->bio_list = NULL; |
---|
| 1034 | + return ret; |
---|
| 1035 | +} |
---|
| 1036 | + |
---|
| 1037 | +/** |
---|
| 1038 | + * submit_bio_noacct - re-submit a bio to the block device layer for I/O |
---|
| 1039 | + * @bio: The bio describing the location in memory and on the device. |
---|
| 1040 | + * |
---|
| 1041 | + * This is a version of submit_bio() that shall only be used for I/O that is |
---|
| 1042 | + * resubmitted to lower level drivers by stacking block drivers. All file |
---|
| 1043 | + * systems and other upper level users of the block layer should use |
---|
| 1044 | + * submit_bio() instead. |
---|
| 1045 | + */ |
---|
| 1046 | +blk_qc_t submit_bio_noacct(struct bio *bio) |
---|
| 1047 | +{ |
---|
| 1048 | + if (!submit_bio_checks(bio)) |
---|
| 1049 | + return BLK_QC_T_NONE; |
---|
2415 | 1050 | |
---|
2416 | 1051 | /* |
---|
2417 | | - * We only want one ->make_request_fn to be active at a time, else |
---|
2418 | | - * stack usage with stacked devices could be a problem. So use |
---|
2419 | | - * current->bio_list to keep a list of requests submited by a |
---|
2420 | | - * make_request_fn function. current->bio_list is also used as a |
---|
2421 | | - * flag to say if generic_make_request is currently active in this |
---|
2422 | | - * task or not. If it is NULL, then no make_request is active. If |
---|
2423 | | - * it is non-NULL, then a make_request is active, and new requests |
---|
2424 | | - * should be added at the tail |
---|
| 1052 | + * We only want one ->submit_bio to be active at a time, else stack |
---|
| 1053 | + * usage with stacked devices could be a problem. Use current->bio_list |
---|
| 1054 | + * to collect a list of requests submited by a ->submit_bio method while |
---|
| 1055 | + * it is active, and then process them after it returned. |
---|
2425 | 1056 | */ |
---|
2426 | 1057 | if (current->bio_list) { |
---|
2427 | 1058 | bio_list_add(¤t->bio_list[0], bio); |
---|
2428 | | - goto out; |
---|
2429 | | - } |
---|
2430 | | - |
---|
2431 | | - /* following loop may be a bit non-obvious, and so deserves some |
---|
2432 | | - * explanation. |
---|
2433 | | - * Before entering the loop, bio->bi_next is NULL (as all callers |
---|
2434 | | - * ensure that) so we have a list with a single bio. |
---|
2435 | | - * We pretend that we have just taken it off a longer list, so |
---|
2436 | | - * we assign bio_list to a pointer to the bio_list_on_stack, |
---|
2437 | | - * thus initialising the bio_list of new bios to be |
---|
2438 | | - * added. ->make_request() may indeed add some more bios |
---|
2439 | | - * through a recursive call to generic_make_request. If it |
---|
2440 | | - * did, we find a non-NULL value in bio_list and re-enter the loop |
---|
2441 | | - * from the top. In this case we really did just take the bio |
---|
2442 | | - * of the top of the list (no pretending) and so remove it from |
---|
2443 | | - * bio_list, and call into ->make_request() again. |
---|
2444 | | - */ |
---|
2445 | | - BUG_ON(bio->bi_next); |
---|
2446 | | - bio_list_init(&bio_list_on_stack[0]); |
---|
2447 | | - current->bio_list = bio_list_on_stack; |
---|
2448 | | - do { |
---|
2449 | | - bool enter_succeeded = true; |
---|
2450 | | - |
---|
2451 | | - if (unlikely(q != bio->bi_disk->queue)) { |
---|
2452 | | - if (q) |
---|
2453 | | - blk_queue_exit(q); |
---|
2454 | | - q = bio->bi_disk->queue; |
---|
2455 | | - flags = 0; |
---|
2456 | | - if (bio->bi_opf & REQ_NOWAIT) |
---|
2457 | | - flags = BLK_MQ_REQ_NOWAIT; |
---|
2458 | | - if (blk_queue_enter(q, flags) < 0) |
---|
2459 | | - enter_succeeded = false; |
---|
2460 | | - } |
---|
2461 | | - |
---|
2462 | | - if (enter_succeeded) { |
---|
2463 | | - struct bio_list lower, same; |
---|
2464 | | - |
---|
2465 | | - /* Create a fresh bio_list for all subordinate requests */ |
---|
2466 | | - bio_list_on_stack[1] = bio_list_on_stack[0]; |
---|
2467 | | - bio_list_init(&bio_list_on_stack[0]); |
---|
2468 | | - |
---|
2469 | | - if (!blk_crypto_submit_bio(&bio)) |
---|
2470 | | - ret = q->make_request_fn(q, bio); |
---|
2471 | | - |
---|
2472 | | - /* sort new bios into those for a lower level |
---|
2473 | | - * and those for the same level |
---|
2474 | | - */ |
---|
2475 | | - bio_list_init(&lower); |
---|
2476 | | - bio_list_init(&same); |
---|
2477 | | - while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) |
---|
2478 | | - if (q == bio->bi_disk->queue) |
---|
2479 | | - bio_list_add(&same, bio); |
---|
2480 | | - else |
---|
2481 | | - bio_list_add(&lower, bio); |
---|
2482 | | - /* now assemble so we handle the lowest level first */ |
---|
2483 | | - bio_list_merge(&bio_list_on_stack[0], &lower); |
---|
2484 | | - bio_list_merge(&bio_list_on_stack[0], &same); |
---|
2485 | | - bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); |
---|
2486 | | - } else { |
---|
2487 | | - if (unlikely(!blk_queue_dying(q) && |
---|
2488 | | - (bio->bi_opf & REQ_NOWAIT))) |
---|
2489 | | - bio_wouldblock_error(bio); |
---|
2490 | | - else |
---|
2491 | | - bio_io_error(bio); |
---|
2492 | | - q = NULL; |
---|
2493 | | - } |
---|
2494 | | - bio = bio_list_pop(&bio_list_on_stack[0]); |
---|
2495 | | - } while (bio); |
---|
2496 | | - current->bio_list = NULL; /* deactivate */ |
---|
2497 | | - |
---|
2498 | | -out: |
---|
2499 | | - if (q) |
---|
2500 | | - blk_queue_exit(q); |
---|
2501 | | - return ret; |
---|
2502 | | -} |
---|
2503 | | -EXPORT_SYMBOL(generic_make_request); |
---|
2504 | | - |
---|
2505 | | -/** |
---|
2506 | | - * direct_make_request - hand a buffer directly to its device driver for I/O |
---|
2507 | | - * @bio: The bio describing the location in memory and on the device. |
---|
2508 | | - * |
---|
2509 | | - * This function behaves like generic_make_request(), but does not protect |
---|
2510 | | - * against recursion. Must only be used if the called driver is known |
---|
2511 | | - * to not call generic_make_request (or direct_make_request) again from |
---|
2512 | | - * its make_request function. (Calling direct_make_request again from |
---|
2513 | | - * a workqueue is perfectly fine as that doesn't recurse). |
---|
2514 | | - */ |
---|
2515 | | -blk_qc_t direct_make_request(struct bio *bio) |
---|
2516 | | -{ |
---|
2517 | | - struct request_queue *q = bio->bi_disk->queue; |
---|
2518 | | - bool nowait = bio->bi_opf & REQ_NOWAIT; |
---|
2519 | | - blk_qc_t ret = BLK_QC_T_NONE; |
---|
2520 | | - |
---|
2521 | | - if (!generic_make_request_checks(bio)) |
---|
2522 | | - return BLK_QC_T_NONE; |
---|
2523 | | - |
---|
2524 | | - if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) { |
---|
2525 | | - if (nowait && !blk_queue_dying(q)) |
---|
2526 | | - bio->bi_status = BLK_STS_AGAIN; |
---|
2527 | | - else |
---|
2528 | | - bio->bi_status = BLK_STS_IOERR; |
---|
2529 | | - bio_endio(bio); |
---|
2530 | 1059 | return BLK_QC_T_NONE; |
---|
2531 | 1060 | } |
---|
2532 | 1061 | |
---|
2533 | | - if (!blk_crypto_submit_bio(&bio)) |
---|
2534 | | - ret = q->make_request_fn(q, bio); |
---|
2535 | | - blk_queue_exit(q); |
---|
2536 | | - return ret; |
---|
| 1062 | + if (!bio->bi_disk->fops->submit_bio) |
---|
| 1063 | + return __submit_bio_noacct_mq(bio); |
---|
| 1064 | + return __submit_bio_noacct(bio); |
---|
2537 | 1065 | } |
---|
2538 | | -EXPORT_SYMBOL_GPL(direct_make_request); |
---|
| 1066 | +EXPORT_SYMBOL(submit_bio_noacct); |
---|
2539 | 1067 | |
---|
2540 | 1068 | /** |
---|
2541 | 1069 | * submit_bio - submit a bio to the block device layer for I/O |
---|
2542 | 1070 | * @bio: The &struct bio which describes the I/O |
---|
2543 | 1071 | * |
---|
2544 | | - * submit_bio() is very similar in purpose to generic_make_request(), and |
---|
2545 | | - * uses that function to do most of the work. Both are fairly rough |
---|
2546 | | - * interfaces; @bio must be presetup and ready for I/O. |
---|
| 1072 | + * submit_bio() is used to submit I/O requests to block devices. It is passed a |
---|
| 1073 | + * fully set up &struct bio that describes the I/O that needs to be done. The |
---|
| 1074 | + * bio will be send to the device described by the bi_disk and bi_partno fields. |
---|
2547 | 1075 | * |
---|
| 1076 | + * The success/failure status of the request, along with notification of |
---|
| 1077 | + * completion, is delivered asynchronously through the ->bi_end_io() callback |
---|
| 1078 | + * in @bio. The bio must NOT be touched by thecaller until ->bi_end_io() has |
---|
| 1079 | + * been called. |
---|
2548 | 1080 | */ |
---|
2549 | 1081 | blk_qc_t submit_bio(struct bio *bio) |
---|
2550 | 1082 | { |
---|
2551 | | - bool workingset_read = false; |
---|
2552 | | - unsigned long pflags; |
---|
2553 | | - blk_qc_t ret; |
---|
| 1083 | + if (blkcg_punt_bio_submit(bio)) |
---|
| 1084 | + return BLK_QC_T_NONE; |
---|
2554 | 1085 | |
---|
2555 | 1086 | /* |
---|
2556 | 1087 | * If it's a regular read/write or a barrier with data attached, |
---|
.. | .. |
---|
2567 | 1098 | if (op_is_write(bio_op(bio))) { |
---|
2568 | 1099 | count_vm_events(PGPGOUT, count); |
---|
2569 | 1100 | } else { |
---|
2570 | | - if (bio_flagged(bio, BIO_WORKINGSET)) |
---|
2571 | | - workingset_read = true; |
---|
2572 | 1101 | task_io_account_read(bio->bi_iter.bi_size); |
---|
2573 | 1102 | count_vm_events(PGPGIN, count); |
---|
2574 | 1103 | } |
---|
.. | .. |
---|
2584 | 1113 | } |
---|
2585 | 1114 | |
---|
2586 | 1115 | /* |
---|
2587 | | - * If we're reading data that is part of the userspace |
---|
2588 | | - * workingset, count submission time as memory stall. When the |
---|
2589 | | - * device is congested, or the submitting cgroup IO-throttled, |
---|
2590 | | - * submission can be a significant part of overall IO time. |
---|
| 1116 | + * If we're reading data that is part of the userspace workingset, count |
---|
| 1117 | + * submission time as memory stall. When the device is congested, or |
---|
| 1118 | + * the submitting cgroup IO-throttled, submission can be a significant |
---|
| 1119 | + * part of overall IO time. |
---|
2591 | 1120 | */ |
---|
2592 | | - if (workingset_read) |
---|
| 1121 | + if (unlikely(bio_op(bio) == REQ_OP_READ && |
---|
| 1122 | + bio_flagged(bio, BIO_WORKINGSET))) { |
---|
| 1123 | + unsigned long pflags; |
---|
| 1124 | + blk_qc_t ret; |
---|
| 1125 | + |
---|
2593 | 1126 | psi_memstall_enter(&pflags); |
---|
2594 | | - |
---|
2595 | | - ret = generic_make_request(bio); |
---|
2596 | | - |
---|
2597 | | - if (workingset_read) |
---|
| 1127 | + ret = submit_bio_noacct(bio); |
---|
2598 | 1128 | psi_memstall_leave(&pflags); |
---|
2599 | 1129 | |
---|
2600 | | - return ret; |
---|
| 1130 | + return ret; |
---|
| 1131 | + } |
---|
| 1132 | + |
---|
| 1133 | + return submit_bio_noacct(bio); |
---|
2601 | 1134 | } |
---|
2602 | 1135 | EXPORT_SYMBOL(submit_bio); |
---|
2603 | 1136 | |
---|
2604 | | -bool blk_poll(struct request_queue *q, blk_qc_t cookie) |
---|
2605 | | -{ |
---|
2606 | | - if (!q->poll_fn || !blk_qc_t_valid(cookie)) |
---|
2607 | | - return false; |
---|
2608 | | - |
---|
2609 | | - if (current->plug) |
---|
2610 | | - blk_flush_plug_list(current->plug, false); |
---|
2611 | | - return q->poll_fn(q, cookie); |
---|
2612 | | -} |
---|
2613 | | -EXPORT_SYMBOL_GPL(blk_poll); |
---|
2614 | | - |
---|
2615 | 1137 | /** |
---|
2616 | 1138 | * blk_cloned_rq_check_limits - Helper function to check a cloned request |
---|
2617 | | - * for new the queue limits |
---|
| 1139 | + * for the new queue limits |
---|
2618 | 1140 | * @q: the queue |
---|
2619 | 1141 | * @rq: the request being checked |
---|
2620 | 1142 | * |
---|
.. | .. |
---|
2629 | 1151 | * limits when retrying requests on other queues. Those requests need |
---|
2630 | 1152 | * to be checked against the new queue limits again during dispatch. |
---|
2631 | 1153 | */ |
---|
2632 | | -static int blk_cloned_rq_check_limits(struct request_queue *q, |
---|
| 1154 | +static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q, |
---|
2633 | 1155 | struct request *rq) |
---|
2634 | 1156 | { |
---|
2635 | | - if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) { |
---|
2636 | | - printk(KERN_ERR "%s: over max size limit.\n", __func__); |
---|
2637 | | - return -EIO; |
---|
| 1157 | + unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq)); |
---|
| 1158 | + |
---|
| 1159 | + if (blk_rq_sectors(rq) > max_sectors) { |
---|
| 1160 | + /* |
---|
| 1161 | + * SCSI device does not have a good way to return if |
---|
| 1162 | + * Write Same/Zero is actually supported. If a device rejects |
---|
| 1163 | + * a non-read/write command (discard, write same,etc.) the |
---|
| 1164 | + * low-level device driver will set the relevant queue limit to |
---|
| 1165 | + * 0 to prevent blk-lib from issuing more of the offending |
---|
| 1166 | + * operations. Commands queued prior to the queue limit being |
---|
| 1167 | + * reset need to be completed with BLK_STS_NOTSUPP to avoid I/O |
---|
| 1168 | + * errors being propagated to upper layers. |
---|
| 1169 | + */ |
---|
| 1170 | + if (max_sectors == 0) |
---|
| 1171 | + return BLK_STS_NOTSUPP; |
---|
| 1172 | + |
---|
| 1173 | + printk(KERN_ERR "%s: over max size limit. (%u > %u)\n", |
---|
| 1174 | + __func__, blk_rq_sectors(rq), max_sectors); |
---|
| 1175 | + return BLK_STS_IOERR; |
---|
2638 | 1176 | } |
---|
2639 | 1177 | |
---|
2640 | 1178 | /* |
---|
.. | .. |
---|
2643 | 1181 | * Recalculate it to check the request correctly on this queue's |
---|
2644 | 1182 | * limitation. |
---|
2645 | 1183 | */ |
---|
2646 | | - blk_recalc_rq_segments(rq); |
---|
| 1184 | + rq->nr_phys_segments = blk_recalc_rq_segments(rq); |
---|
2647 | 1185 | if (rq->nr_phys_segments > queue_max_segments(q)) { |
---|
2648 | | - printk(KERN_ERR "%s: over max segments limit.\n", __func__); |
---|
2649 | | - return -EIO; |
---|
| 1186 | + printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n", |
---|
| 1187 | + __func__, rq->nr_phys_segments, queue_max_segments(q)); |
---|
| 1188 | + return BLK_STS_IOERR; |
---|
2650 | 1189 | } |
---|
2651 | 1190 | |
---|
2652 | | - return 0; |
---|
| 1191 | + return BLK_STS_OK; |
---|
2653 | 1192 | } |
---|
2654 | 1193 | |
---|
2655 | 1194 | /** |
---|
.. | .. |
---|
2659 | 1198 | */ |
---|
2660 | 1199 | blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq) |
---|
2661 | 1200 | { |
---|
2662 | | - unsigned long flags; |
---|
2663 | | - int where = ELEVATOR_INSERT_BACK; |
---|
| 1201 | + blk_status_t ret; |
---|
2664 | 1202 | |
---|
2665 | | - if (blk_cloned_rq_check_limits(q, rq)) |
---|
2666 | | - return BLK_STS_IOERR; |
---|
| 1203 | + ret = blk_cloned_rq_check_limits(q, rq); |
---|
| 1204 | + if (ret != BLK_STS_OK) |
---|
| 1205 | + return ret; |
---|
2667 | 1206 | |
---|
2668 | 1207 | if (rq->rq_disk && |
---|
2669 | 1208 | should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) |
---|
2670 | 1209 | return BLK_STS_IOERR; |
---|
2671 | 1210 | |
---|
2672 | | - if (q->mq_ops) { |
---|
2673 | | - if (blk_queue_io_stat(q)) |
---|
2674 | | - blk_account_io_start(rq, true); |
---|
2675 | | - /* |
---|
2676 | | - * Since we have a scheduler attached on the top device, |
---|
2677 | | - * bypass a potential scheduler on the bottom device for |
---|
2678 | | - * insert. |
---|
2679 | | - */ |
---|
2680 | | - return blk_mq_request_issue_directly(rq); |
---|
2681 | | - } |
---|
2682 | | - |
---|
2683 | | - spin_lock_irqsave(q->queue_lock, flags); |
---|
2684 | | - if (unlikely(blk_queue_dying(q))) { |
---|
2685 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
---|
| 1211 | + if (blk_crypto_insert_cloned_request(rq)) |
---|
2686 | 1212 | return BLK_STS_IOERR; |
---|
2687 | | - } |
---|
| 1213 | + |
---|
| 1214 | + if (blk_queue_io_stat(q)) |
---|
| 1215 | + blk_account_io_start(rq); |
---|
2688 | 1216 | |
---|
2689 | 1217 | /* |
---|
2690 | | - * Submitting request must be dequeued before calling this function |
---|
2691 | | - * because it will be linked to another request_queue |
---|
| 1218 | + * Since we have a scheduler attached on the top device, |
---|
| 1219 | + * bypass a potential scheduler on the bottom device for |
---|
| 1220 | + * insert. |
---|
2692 | 1221 | */ |
---|
2693 | | - BUG_ON(blk_queued_rq(rq)); |
---|
2694 | | - |
---|
2695 | | - if (op_is_flush(rq->cmd_flags)) |
---|
2696 | | - where = ELEVATOR_INSERT_FLUSH; |
---|
2697 | | - |
---|
2698 | | - add_acct_request(q, rq, where); |
---|
2699 | | - if (where == ELEVATOR_INSERT_FLUSH) |
---|
2700 | | - __blk_run_queue(q); |
---|
2701 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
---|
2702 | | - |
---|
2703 | | - return BLK_STS_OK; |
---|
| 1222 | + return blk_mq_request_issue_directly(rq, true); |
---|
2704 | 1223 | } |
---|
2705 | 1224 | EXPORT_SYMBOL_GPL(blk_insert_cloned_request); |
---|
2706 | 1225 | |
---|
.. | .. |
---|
2745 | 1264 | } |
---|
2746 | 1265 | EXPORT_SYMBOL_GPL(blk_rq_err_bytes); |
---|
2747 | 1266 | |
---|
2748 | | -void blk_account_io_completion(struct request *req, unsigned int bytes) |
---|
| 1267 | +static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) |
---|
2749 | 1268 | { |
---|
2750 | | - if (blk_do_io_stat(req)) { |
---|
| 1269 | + unsigned long stamp; |
---|
| 1270 | +again: |
---|
| 1271 | + stamp = READ_ONCE(part->stamp); |
---|
| 1272 | + if (unlikely(stamp != now)) { |
---|
| 1273 | + if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) |
---|
| 1274 | + __part_stat_add(part, io_ticks, end ? now - stamp : 1); |
---|
| 1275 | + } |
---|
| 1276 | + if (part->partno) { |
---|
| 1277 | + part = &part_to_disk(part)->part0; |
---|
| 1278 | + goto again; |
---|
| 1279 | + } |
---|
| 1280 | +} |
---|
| 1281 | + |
---|
| 1282 | +static void blk_account_io_completion(struct request *req, unsigned int bytes) |
---|
| 1283 | +{ |
---|
| 1284 | + if (req->part && blk_do_io_stat(req)) { |
---|
2751 | 1285 | const int sgrp = op_stat_group(req_op(req)); |
---|
2752 | 1286 | struct hd_struct *part; |
---|
2753 | | - int cpu; |
---|
2754 | 1287 | |
---|
2755 | | - cpu = part_stat_lock(); |
---|
| 1288 | + part_stat_lock(); |
---|
2756 | 1289 | part = req->part; |
---|
2757 | | - part_stat_add(cpu, part, sectors[sgrp], bytes >> 9); |
---|
| 1290 | + part_stat_add(part, sectors[sgrp], bytes >> 9); |
---|
2758 | 1291 | part_stat_unlock(); |
---|
2759 | 1292 | } |
---|
2760 | 1293 | } |
---|
.. | .. |
---|
2766 | 1299 | * normal IO on queueing nor completion. Accounting the |
---|
2767 | 1300 | * containing request is enough. |
---|
2768 | 1301 | */ |
---|
2769 | | - if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { |
---|
| 1302 | + if (req->part && blk_do_io_stat(req) && |
---|
| 1303 | + !(req->rq_flags & RQF_FLUSH_SEQ)) { |
---|
2770 | 1304 | const int sgrp = op_stat_group(req_op(req)); |
---|
2771 | 1305 | struct hd_struct *part; |
---|
2772 | | - int cpu; |
---|
2773 | 1306 | |
---|
2774 | | - cpu = part_stat_lock(); |
---|
| 1307 | + part_stat_lock(); |
---|
2775 | 1308 | part = req->part; |
---|
2776 | 1309 | |
---|
2777 | | - part_stat_inc(cpu, part, ios[sgrp]); |
---|
2778 | | - part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns); |
---|
2779 | | - part_round_stats(req->q, cpu, part); |
---|
2780 | | - part_dec_in_flight(req->q, part, rq_data_dir(req)); |
---|
| 1310 | + update_io_ticks(part, jiffies, true); |
---|
| 1311 | + part_stat_inc(part, ios[sgrp]); |
---|
| 1312 | + part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); |
---|
| 1313 | + part_stat_unlock(); |
---|
2781 | 1314 | |
---|
2782 | 1315 | hd_struct_put(part); |
---|
2783 | | - part_stat_unlock(); |
---|
2784 | 1316 | } |
---|
2785 | 1317 | } |
---|
2786 | 1318 | |
---|
2787 | | -#ifdef CONFIG_PM |
---|
2788 | | -/* |
---|
2789 | | - * Don't process normal requests when queue is suspended |
---|
2790 | | - * or in the process of suspending/resuming |
---|
2791 | | - */ |
---|
2792 | | -static bool blk_pm_allow_request(struct request *rq) |
---|
| 1319 | +void blk_account_io_start(struct request *rq) |
---|
2793 | 1320 | { |
---|
2794 | | - switch (rq->q->rpm_status) { |
---|
2795 | | - case RPM_RESUMING: |
---|
2796 | | - case RPM_SUSPENDING: |
---|
2797 | | - return rq->rq_flags & RQF_PM; |
---|
2798 | | - case RPM_SUSPENDED: |
---|
2799 | | - return false; |
---|
2800 | | - default: |
---|
2801 | | - return true; |
---|
2802 | | - } |
---|
2803 | | -} |
---|
2804 | | -#else |
---|
2805 | | -static bool blk_pm_allow_request(struct request *rq) |
---|
2806 | | -{ |
---|
2807 | | - return true; |
---|
2808 | | -} |
---|
2809 | | -#endif |
---|
2810 | | - |
---|
2811 | | -void blk_account_io_start(struct request *rq, bool new_io) |
---|
2812 | | -{ |
---|
2813 | | - struct hd_struct *part; |
---|
2814 | | - int rw = rq_data_dir(rq); |
---|
2815 | | - int cpu; |
---|
2816 | | - |
---|
2817 | 1321 | if (!blk_do_io_stat(rq)) |
---|
2818 | 1322 | return; |
---|
2819 | 1323 | |
---|
2820 | | - cpu = part_stat_lock(); |
---|
| 1324 | + rq->part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); |
---|
2821 | 1325 | |
---|
2822 | | - if (!new_io) { |
---|
2823 | | - part = rq->part; |
---|
2824 | | - part_stat_inc(cpu, part, merges[rw]); |
---|
2825 | | - } else { |
---|
2826 | | - part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); |
---|
2827 | | - if (!hd_struct_try_get(part)) { |
---|
2828 | | - /* |
---|
2829 | | - * The partition is already being removed, |
---|
2830 | | - * the request will be accounted on the disk only |
---|
2831 | | - * |
---|
2832 | | - * We take a reference on disk->part0 although that |
---|
2833 | | - * partition will never be deleted, so we can treat |
---|
2834 | | - * it as any other partition. |
---|
2835 | | - */ |
---|
2836 | | - part = &rq->rq_disk->part0; |
---|
2837 | | - hd_struct_get(part); |
---|
2838 | | - } |
---|
2839 | | - part_round_stats(rq->q, cpu, part); |
---|
2840 | | - part_inc_in_flight(rq->q, part, rw); |
---|
2841 | | - rq->part = part; |
---|
2842 | | - } |
---|
2843 | | - |
---|
| 1326 | + part_stat_lock(); |
---|
| 1327 | + update_io_ticks(rq->part, jiffies, false); |
---|
2844 | 1328 | part_stat_unlock(); |
---|
2845 | 1329 | } |
---|
2846 | 1330 | |
---|
2847 | | -static struct request *elv_next_request(struct request_queue *q) |
---|
| 1331 | +static unsigned long __part_start_io_acct(struct hd_struct *part, |
---|
| 1332 | + unsigned int sectors, unsigned int op) |
---|
2848 | 1333 | { |
---|
2849 | | - struct request *rq; |
---|
2850 | | - struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); |
---|
| 1334 | + const int sgrp = op_stat_group(op); |
---|
| 1335 | + unsigned long now = READ_ONCE(jiffies); |
---|
2851 | 1336 | |
---|
2852 | | - WARN_ON_ONCE(q->mq_ops); |
---|
| 1337 | + part_stat_lock(); |
---|
| 1338 | + update_io_ticks(part, now, false); |
---|
| 1339 | + part_stat_inc(part, ios[sgrp]); |
---|
| 1340 | + part_stat_add(part, sectors[sgrp], sectors); |
---|
| 1341 | + part_stat_local_inc(part, in_flight[op_is_write(op)]); |
---|
| 1342 | + part_stat_unlock(); |
---|
2853 | 1343 | |
---|
2854 | | - while (1) { |
---|
2855 | | - list_for_each_entry(rq, &q->queue_head, queuelist) { |
---|
2856 | | - if (blk_pm_allow_request(rq)) |
---|
2857 | | - return rq; |
---|
2858 | | - |
---|
2859 | | - if (rq->rq_flags & RQF_SOFTBARRIER) |
---|
2860 | | - break; |
---|
2861 | | - } |
---|
2862 | | - |
---|
2863 | | - /* |
---|
2864 | | - * Flush request is running and flush request isn't queueable |
---|
2865 | | - * in the drive, we can hold the queue till flush request is |
---|
2866 | | - * finished. Even we don't do this, driver can't dispatch next |
---|
2867 | | - * requests and will requeue them. And this can improve |
---|
2868 | | - * throughput too. For example, we have request flush1, write1, |
---|
2869 | | - * flush 2. flush1 is dispatched, then queue is hold, write1 |
---|
2870 | | - * isn't inserted to queue. After flush1 is finished, flush2 |
---|
2871 | | - * will be dispatched. Since disk cache is already clean, |
---|
2872 | | - * flush2 will be finished very soon, so looks like flush2 is |
---|
2873 | | - * folded to flush1. |
---|
2874 | | - * Since the queue is hold, a flag is set to indicate the queue |
---|
2875 | | - * should be restarted later. Please see flush_end_io() for |
---|
2876 | | - * details. |
---|
2877 | | - */ |
---|
2878 | | - if (fq->flush_pending_idx != fq->flush_running_idx && |
---|
2879 | | - !queue_flush_queueable(q)) { |
---|
2880 | | - fq->flush_queue_delayed = 1; |
---|
2881 | | - return NULL; |
---|
2882 | | - } |
---|
2883 | | - if (unlikely(blk_queue_bypass(q)) || |
---|
2884 | | - !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0)) |
---|
2885 | | - return NULL; |
---|
2886 | | - } |
---|
| 1344 | + return now; |
---|
2887 | 1345 | } |
---|
2888 | 1346 | |
---|
2889 | | -/** |
---|
2890 | | - * blk_peek_request - peek at the top of a request queue |
---|
2891 | | - * @q: request queue to peek at |
---|
2892 | | - * |
---|
2893 | | - * Description: |
---|
2894 | | - * Return the request at the top of @q. The returned request |
---|
2895 | | - * should be started using blk_start_request() before LLD starts |
---|
2896 | | - * processing it. |
---|
2897 | | - * |
---|
2898 | | - * Return: |
---|
2899 | | - * Pointer to the request at the top of @q if available. Null |
---|
2900 | | - * otherwise. |
---|
2901 | | - */ |
---|
2902 | | -struct request *blk_peek_request(struct request_queue *q) |
---|
| 1347 | +unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, |
---|
| 1348 | + struct bio *bio) |
---|
2903 | 1349 | { |
---|
2904 | | - struct request *rq; |
---|
2905 | | - int ret; |
---|
| 1350 | + *part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector); |
---|
2906 | 1351 | |
---|
2907 | | - lockdep_assert_held(q->queue_lock); |
---|
2908 | | - WARN_ON_ONCE(q->mq_ops); |
---|
2909 | | - |
---|
2910 | | - while ((rq = elv_next_request(q)) != NULL) { |
---|
2911 | | - if (!(rq->rq_flags & RQF_STARTED)) { |
---|
2912 | | - /* |
---|
2913 | | - * This is the first time the device driver |
---|
2914 | | - * sees this request (possibly after |
---|
2915 | | - * requeueing). Notify IO scheduler. |
---|
2916 | | - */ |
---|
2917 | | - if (rq->rq_flags & RQF_SORTED) |
---|
2918 | | - elv_activate_rq(q, rq); |
---|
2919 | | - |
---|
2920 | | - /* |
---|
2921 | | - * just mark as started even if we don't start |
---|
2922 | | - * it, a request that has been delayed should |
---|
2923 | | - * not be passed by new incoming requests |
---|
2924 | | - */ |
---|
2925 | | - rq->rq_flags |= RQF_STARTED; |
---|
2926 | | - trace_block_rq_issue(q, rq); |
---|
2927 | | - } |
---|
2928 | | - |
---|
2929 | | - if (!q->boundary_rq || q->boundary_rq == rq) { |
---|
2930 | | - q->end_sector = rq_end_sector(rq); |
---|
2931 | | - q->boundary_rq = NULL; |
---|
2932 | | - } |
---|
2933 | | - |
---|
2934 | | - if (rq->rq_flags & RQF_DONTPREP) |
---|
2935 | | - break; |
---|
2936 | | - |
---|
2937 | | - if (q->dma_drain_size && blk_rq_bytes(rq)) { |
---|
2938 | | - /* |
---|
2939 | | - * make sure space for the drain appears we |
---|
2940 | | - * know we can do this because max_hw_segments |
---|
2941 | | - * has been adjusted to be one fewer than the |
---|
2942 | | - * device can handle |
---|
2943 | | - */ |
---|
2944 | | - rq->nr_phys_segments++; |
---|
2945 | | - } |
---|
2946 | | - |
---|
2947 | | - if (!q->prep_rq_fn) |
---|
2948 | | - break; |
---|
2949 | | - |
---|
2950 | | - ret = q->prep_rq_fn(q, rq); |
---|
2951 | | - if (ret == BLKPREP_OK) { |
---|
2952 | | - break; |
---|
2953 | | - } else if (ret == BLKPREP_DEFER) { |
---|
2954 | | - /* |
---|
2955 | | - * the request may have been (partially) prepped. |
---|
2956 | | - * we need to keep this request in the front to |
---|
2957 | | - * avoid resource deadlock. RQF_STARTED will |
---|
2958 | | - * prevent other fs requests from passing this one. |
---|
2959 | | - */ |
---|
2960 | | - if (q->dma_drain_size && blk_rq_bytes(rq) && |
---|
2961 | | - !(rq->rq_flags & RQF_DONTPREP)) { |
---|
2962 | | - /* |
---|
2963 | | - * remove the space for the drain we added |
---|
2964 | | - * so that we don't add it again |
---|
2965 | | - */ |
---|
2966 | | - --rq->nr_phys_segments; |
---|
2967 | | - } |
---|
2968 | | - |
---|
2969 | | - rq = NULL; |
---|
2970 | | - break; |
---|
2971 | | - } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) { |
---|
2972 | | - rq->rq_flags |= RQF_QUIET; |
---|
2973 | | - /* |
---|
2974 | | - * Mark this request as started so we don't trigger |
---|
2975 | | - * any debug logic in the end I/O path. |
---|
2976 | | - */ |
---|
2977 | | - blk_start_request(rq); |
---|
2978 | | - __blk_end_request_all(rq, ret == BLKPREP_INVALID ? |
---|
2979 | | - BLK_STS_TARGET : BLK_STS_IOERR); |
---|
2980 | | - } else { |
---|
2981 | | - printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); |
---|
2982 | | - break; |
---|
2983 | | - } |
---|
2984 | | - } |
---|
2985 | | - |
---|
2986 | | - return rq; |
---|
| 1352 | + return __part_start_io_acct(*part, bio_sectors(bio), bio_op(bio)); |
---|
2987 | 1353 | } |
---|
2988 | | -EXPORT_SYMBOL(blk_peek_request); |
---|
| 1354 | +EXPORT_SYMBOL_GPL(part_start_io_acct); |
---|
2989 | 1355 | |
---|
2990 | | -static void blk_dequeue_request(struct request *rq) |
---|
| 1356 | +unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, |
---|
| 1357 | + unsigned int op) |
---|
2991 | 1358 | { |
---|
2992 | | - struct request_queue *q = rq->q; |
---|
| 1359 | + return __part_start_io_acct(&disk->part0, sectors, op); |
---|
| 1360 | +} |
---|
| 1361 | +EXPORT_SYMBOL(disk_start_io_acct); |
---|
2993 | 1362 | |
---|
2994 | | - BUG_ON(list_empty(&rq->queuelist)); |
---|
2995 | | - BUG_ON(ELV_ON_HASH(rq)); |
---|
| 1363 | +static void __part_end_io_acct(struct hd_struct *part, unsigned int op, |
---|
| 1364 | + unsigned long start_time) |
---|
| 1365 | +{ |
---|
| 1366 | + const int sgrp = op_stat_group(op); |
---|
| 1367 | + unsigned long now = READ_ONCE(jiffies); |
---|
| 1368 | + unsigned long duration = now - start_time; |
---|
2996 | 1369 | |
---|
2997 | | - list_del_init(&rq->queuelist); |
---|
2998 | | - |
---|
2999 | | - /* |
---|
3000 | | - * the time frame between a request being removed from the lists |
---|
3001 | | - * and to it is freed is accounted as io that is in progress at |
---|
3002 | | - * the driver side. |
---|
3003 | | - */ |
---|
3004 | | - if (blk_account_rq(rq)) |
---|
3005 | | - q->in_flight[rq_is_sync(rq)]++; |
---|
| 1370 | + part_stat_lock(); |
---|
| 1371 | + update_io_ticks(part, now, true); |
---|
| 1372 | + part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); |
---|
| 1373 | + part_stat_local_dec(part, in_flight[op_is_write(op)]); |
---|
| 1374 | + part_stat_unlock(); |
---|
3006 | 1375 | } |
---|
3007 | 1376 | |
---|
3008 | | -/** |
---|
3009 | | - * blk_start_request - start request processing on the driver |
---|
3010 | | - * @req: request to dequeue |
---|
3011 | | - * |
---|
3012 | | - * Description: |
---|
3013 | | - * Dequeue @req and start timeout timer on it. This hands off the |
---|
3014 | | - * request to the driver. |
---|
3015 | | - */ |
---|
3016 | | -void blk_start_request(struct request *req) |
---|
| 1377 | +void part_end_io_acct(struct hd_struct *part, struct bio *bio, |
---|
| 1378 | + unsigned long start_time) |
---|
3017 | 1379 | { |
---|
3018 | | - lockdep_assert_held(req->q->queue_lock); |
---|
3019 | | - WARN_ON_ONCE(req->q->mq_ops); |
---|
3020 | | - |
---|
3021 | | - blk_dequeue_request(req); |
---|
3022 | | - |
---|
3023 | | - if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) { |
---|
3024 | | - req->io_start_time_ns = ktime_get_ns(); |
---|
3025 | | -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW |
---|
3026 | | - req->throtl_size = blk_rq_sectors(req); |
---|
3027 | | -#endif |
---|
3028 | | - req->rq_flags |= RQF_STATS; |
---|
3029 | | - rq_qos_issue(req->q, req); |
---|
3030 | | - } |
---|
3031 | | - |
---|
3032 | | - BUG_ON(blk_rq_is_complete(req)); |
---|
3033 | | - blk_add_timer(req); |
---|
| 1380 | + __part_end_io_acct(part, bio_op(bio), start_time); |
---|
| 1381 | + hd_struct_put(part); |
---|
3034 | 1382 | } |
---|
3035 | | -EXPORT_SYMBOL(blk_start_request); |
---|
| 1383 | +EXPORT_SYMBOL_GPL(part_end_io_acct); |
---|
3036 | 1384 | |
---|
3037 | | -/** |
---|
3038 | | - * blk_fetch_request - fetch a request from a request queue |
---|
3039 | | - * @q: request queue to fetch a request from |
---|
3040 | | - * |
---|
3041 | | - * Description: |
---|
3042 | | - * Return the request at the top of @q. The request is started on |
---|
3043 | | - * return and LLD can start processing it immediately. |
---|
3044 | | - * |
---|
3045 | | - * Return: |
---|
3046 | | - * Pointer to the request at the top of @q if available. Null |
---|
3047 | | - * otherwise. |
---|
3048 | | - */ |
---|
3049 | | -struct request *blk_fetch_request(struct request_queue *q) |
---|
| 1385 | +void disk_end_io_acct(struct gendisk *disk, unsigned int op, |
---|
| 1386 | + unsigned long start_time) |
---|
3050 | 1387 | { |
---|
3051 | | - struct request *rq; |
---|
3052 | | - |
---|
3053 | | - lockdep_assert_held(q->queue_lock); |
---|
3054 | | - WARN_ON_ONCE(q->mq_ops); |
---|
3055 | | - |
---|
3056 | | - rq = blk_peek_request(q); |
---|
3057 | | - if (rq) |
---|
3058 | | - blk_start_request(rq); |
---|
3059 | | - return rq; |
---|
| 1388 | + __part_end_io_acct(&disk->part0, op, start_time); |
---|
3060 | 1389 | } |
---|
3061 | | -EXPORT_SYMBOL(blk_fetch_request); |
---|
| 1390 | +EXPORT_SYMBOL(disk_end_io_acct); |
---|
3062 | 1391 | |
---|
3063 | 1392 | /* |
---|
3064 | 1393 | * Steal bios from a request and add them to a bio list. |
---|
.. | .. |
---|
3094 | 1423 | * |
---|
3095 | 1424 | * This special helper function is only for request stacking drivers |
---|
3096 | 1425 | * (e.g. request-based dm) so that they can handle partial completion. |
---|
3097 | | - * Actual device drivers should use blk_end_request instead. |
---|
| 1426 | + * Actual device drivers should use blk_mq_end_request instead. |
---|
3098 | 1427 | * |
---|
3099 | 1428 | * Passing the result of blk_rq_bytes() as @nr_bytes guarantees |
---|
3100 | 1429 | * %false return from this function. |
---|
.. | .. |
---|
3117 | 1446 | if (!req->bio) |
---|
3118 | 1447 | return false; |
---|
3119 | 1448 | |
---|
| 1449 | +#ifdef CONFIG_BLK_DEV_INTEGRITY |
---|
| 1450 | + if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ && |
---|
| 1451 | + error == BLK_STS_OK) |
---|
| 1452 | + req->q->integrity.profile->complete_fn(req, nr_bytes); |
---|
| 1453 | +#endif |
---|
| 1454 | + |
---|
3120 | 1455 | if (unlikely(error && !blk_rq_is_passthrough(req) && |
---|
3121 | 1456 | !(req->rq_flags & RQF_QUIET))) |
---|
3122 | | - print_req_error(req, error); |
---|
| 1457 | + print_req_error(req, error, __func__); |
---|
3123 | 1458 | |
---|
3124 | 1459 | blk_account_io_completion(req, nr_bytes); |
---|
3125 | 1460 | |
---|
.. | .. |
---|
3178 | 1513 | } |
---|
3179 | 1514 | |
---|
3180 | 1515 | /* recalculate the number of segments */ |
---|
3181 | | - blk_recalc_rq_segments(req); |
---|
| 1516 | + req->nr_phys_segments = blk_recalc_rq_segments(req); |
---|
3182 | 1517 | } |
---|
3183 | 1518 | |
---|
3184 | 1519 | return true; |
---|
3185 | 1520 | } |
---|
3186 | 1521 | EXPORT_SYMBOL_GPL(blk_update_request); |
---|
3187 | | - |
---|
3188 | | -static bool blk_update_bidi_request(struct request *rq, blk_status_t error, |
---|
3189 | | - unsigned int nr_bytes, |
---|
3190 | | - unsigned int bidi_bytes) |
---|
3191 | | -{ |
---|
3192 | | - if (blk_update_request(rq, error, nr_bytes)) |
---|
3193 | | - return true; |
---|
3194 | | - |
---|
3195 | | - /* Bidi request must be completed as a whole */ |
---|
3196 | | - if (unlikely(blk_bidi_rq(rq)) && |
---|
3197 | | - blk_update_request(rq->next_rq, error, bidi_bytes)) |
---|
3198 | | - return true; |
---|
3199 | | - |
---|
3200 | | - if (blk_queue_add_random(rq->q)) |
---|
3201 | | - add_disk_randomness(rq->rq_disk); |
---|
3202 | | - |
---|
3203 | | - return false; |
---|
3204 | | -} |
---|
3205 | | - |
---|
3206 | | -/** |
---|
3207 | | - * blk_unprep_request - unprepare a request |
---|
3208 | | - * @req: the request |
---|
3209 | | - * |
---|
3210 | | - * This function makes a request ready for complete resubmission (or |
---|
3211 | | - * completion). It happens only after all error handling is complete, |
---|
3212 | | - * so represents the appropriate moment to deallocate any resources |
---|
3213 | | - * that were allocated to the request in the prep_rq_fn. The queue |
---|
3214 | | - * lock is held when calling this. |
---|
3215 | | - */ |
---|
3216 | | -void blk_unprep_request(struct request *req) |
---|
3217 | | -{ |
---|
3218 | | - struct request_queue *q = req->q; |
---|
3219 | | - |
---|
3220 | | - req->rq_flags &= ~RQF_DONTPREP; |
---|
3221 | | - if (q->unprep_rq_fn) |
---|
3222 | | - q->unprep_rq_fn(q, req); |
---|
3223 | | -} |
---|
3224 | | -EXPORT_SYMBOL_GPL(blk_unprep_request); |
---|
3225 | | - |
---|
3226 | | -void blk_finish_request(struct request *req, blk_status_t error) |
---|
3227 | | -{ |
---|
3228 | | - struct request_queue *q = req->q; |
---|
3229 | | - u64 now = ktime_get_ns(); |
---|
3230 | | - |
---|
3231 | | - lockdep_assert_held(req->q->queue_lock); |
---|
3232 | | - WARN_ON_ONCE(q->mq_ops); |
---|
3233 | | - |
---|
3234 | | - if (req->rq_flags & RQF_STATS) |
---|
3235 | | - blk_stat_add(req, now); |
---|
3236 | | - |
---|
3237 | | - if (req->rq_flags & RQF_QUEUED) |
---|
3238 | | - blk_queue_end_tag(q, req); |
---|
3239 | | - |
---|
3240 | | - BUG_ON(blk_queued_rq(req)); |
---|
3241 | | - |
---|
3242 | | - if (unlikely(laptop_mode) && !blk_rq_is_passthrough(req)) |
---|
3243 | | - laptop_io_completion(req->q->backing_dev_info); |
---|
3244 | | - |
---|
3245 | | - blk_delete_timer(req); |
---|
3246 | | - |
---|
3247 | | - if (req->rq_flags & RQF_DONTPREP) |
---|
3248 | | - blk_unprep_request(req); |
---|
3249 | | - |
---|
3250 | | - blk_account_io_done(req, now); |
---|
3251 | | - |
---|
3252 | | - if (req->end_io) { |
---|
3253 | | - rq_qos_done(q, req); |
---|
3254 | | - req->end_io(req, error); |
---|
3255 | | - } else { |
---|
3256 | | - if (blk_bidi_rq(req)) |
---|
3257 | | - __blk_put_request(req->next_rq->q, req->next_rq); |
---|
3258 | | - |
---|
3259 | | - __blk_put_request(q, req); |
---|
3260 | | - } |
---|
3261 | | -} |
---|
3262 | | -EXPORT_SYMBOL(blk_finish_request); |
---|
3263 | | - |
---|
3264 | | -/** |
---|
3265 | | - * blk_end_bidi_request - Complete a bidi request |
---|
3266 | | - * @rq: the request to complete |
---|
3267 | | - * @error: block status code |
---|
3268 | | - * @nr_bytes: number of bytes to complete @rq |
---|
3269 | | - * @bidi_bytes: number of bytes to complete @rq->next_rq |
---|
3270 | | - * |
---|
3271 | | - * Description: |
---|
3272 | | - * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. |
---|
3273 | | - * Drivers that supports bidi can safely call this member for any |
---|
3274 | | - * type of request, bidi or uni. In the later case @bidi_bytes is |
---|
3275 | | - * just ignored. |
---|
3276 | | - * |
---|
3277 | | - * Return: |
---|
3278 | | - * %false - we are done with this request |
---|
3279 | | - * %true - still buffers pending for this request |
---|
3280 | | - **/ |
---|
3281 | | -static bool blk_end_bidi_request(struct request *rq, blk_status_t error, |
---|
3282 | | - unsigned int nr_bytes, unsigned int bidi_bytes) |
---|
3283 | | -{ |
---|
3284 | | - struct request_queue *q = rq->q; |
---|
3285 | | - unsigned long flags; |
---|
3286 | | - |
---|
3287 | | - WARN_ON_ONCE(q->mq_ops); |
---|
3288 | | - |
---|
3289 | | - if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) |
---|
3290 | | - return true; |
---|
3291 | | - |
---|
3292 | | - spin_lock_irqsave(q->queue_lock, flags); |
---|
3293 | | - blk_finish_request(rq, error); |
---|
3294 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
---|
3295 | | - |
---|
3296 | | - return false; |
---|
3297 | | -} |
---|
3298 | | - |
---|
3299 | | -/** |
---|
3300 | | - * __blk_end_bidi_request - Complete a bidi request with queue lock held |
---|
3301 | | - * @rq: the request to complete |
---|
3302 | | - * @error: block status code |
---|
3303 | | - * @nr_bytes: number of bytes to complete @rq |
---|
3304 | | - * @bidi_bytes: number of bytes to complete @rq->next_rq |
---|
3305 | | - * |
---|
3306 | | - * Description: |
---|
3307 | | - * Identical to blk_end_bidi_request() except that queue lock is |
---|
3308 | | - * assumed to be locked on entry and remains so on return. |
---|
3309 | | - * |
---|
3310 | | - * Return: |
---|
3311 | | - * %false - we are done with this request |
---|
3312 | | - * %true - still buffers pending for this request |
---|
3313 | | - **/ |
---|
3314 | | -static bool __blk_end_bidi_request(struct request *rq, blk_status_t error, |
---|
3315 | | - unsigned int nr_bytes, unsigned int bidi_bytes) |
---|
3316 | | -{ |
---|
3317 | | - lockdep_assert_held(rq->q->queue_lock); |
---|
3318 | | - WARN_ON_ONCE(rq->q->mq_ops); |
---|
3319 | | - |
---|
3320 | | - if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) |
---|
3321 | | - return true; |
---|
3322 | | - |
---|
3323 | | - blk_finish_request(rq, error); |
---|
3324 | | - |
---|
3325 | | - return false; |
---|
3326 | | -} |
---|
3327 | | - |
---|
3328 | | -/** |
---|
3329 | | - * blk_end_request - Helper function for drivers to complete the request. |
---|
3330 | | - * @rq: the request being processed |
---|
3331 | | - * @error: block status code |
---|
3332 | | - * @nr_bytes: number of bytes to complete |
---|
3333 | | - * |
---|
3334 | | - * Description: |
---|
3335 | | - * Ends I/O on a number of bytes attached to @rq. |
---|
3336 | | - * If @rq has leftover, sets it up for the next range of segments. |
---|
3337 | | - * |
---|
3338 | | - * Return: |
---|
3339 | | - * %false - we are done with this request |
---|
3340 | | - * %true - still buffers pending for this request |
---|
3341 | | - **/ |
---|
3342 | | -bool blk_end_request(struct request *rq, blk_status_t error, |
---|
3343 | | - unsigned int nr_bytes) |
---|
3344 | | -{ |
---|
3345 | | - WARN_ON_ONCE(rq->q->mq_ops); |
---|
3346 | | - return blk_end_bidi_request(rq, error, nr_bytes, 0); |
---|
3347 | | -} |
---|
3348 | | -EXPORT_SYMBOL(blk_end_request); |
---|
3349 | | - |
---|
3350 | | -/** |
---|
3351 | | - * blk_end_request_all - Helper function for drives to finish the request. |
---|
3352 | | - * @rq: the request to finish |
---|
3353 | | - * @error: block status code |
---|
3354 | | - * |
---|
3355 | | - * Description: |
---|
3356 | | - * Completely finish @rq. |
---|
3357 | | - */ |
---|
3358 | | -void blk_end_request_all(struct request *rq, blk_status_t error) |
---|
3359 | | -{ |
---|
3360 | | - bool pending; |
---|
3361 | | - unsigned int bidi_bytes = 0; |
---|
3362 | | - |
---|
3363 | | - if (unlikely(blk_bidi_rq(rq))) |
---|
3364 | | - bidi_bytes = blk_rq_bytes(rq->next_rq); |
---|
3365 | | - |
---|
3366 | | - pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); |
---|
3367 | | - BUG_ON(pending); |
---|
3368 | | -} |
---|
3369 | | -EXPORT_SYMBOL(blk_end_request_all); |
---|
3370 | | - |
---|
3371 | | -/** |
---|
3372 | | - * __blk_end_request - Helper function for drivers to complete the request. |
---|
3373 | | - * @rq: the request being processed |
---|
3374 | | - * @error: block status code |
---|
3375 | | - * @nr_bytes: number of bytes to complete |
---|
3376 | | - * |
---|
3377 | | - * Description: |
---|
3378 | | - * Must be called with queue lock held unlike blk_end_request(). |
---|
3379 | | - * |
---|
3380 | | - * Return: |
---|
3381 | | - * %false - we are done with this request |
---|
3382 | | - * %true - still buffers pending for this request |
---|
3383 | | - **/ |
---|
3384 | | -bool __blk_end_request(struct request *rq, blk_status_t error, |
---|
3385 | | - unsigned int nr_bytes) |
---|
3386 | | -{ |
---|
3387 | | - lockdep_assert_held(rq->q->queue_lock); |
---|
3388 | | - WARN_ON_ONCE(rq->q->mq_ops); |
---|
3389 | | - |
---|
3390 | | - return __blk_end_bidi_request(rq, error, nr_bytes, 0); |
---|
3391 | | -} |
---|
3392 | | -EXPORT_SYMBOL(__blk_end_request); |
---|
3393 | | - |
---|
3394 | | -/** |
---|
3395 | | - * __blk_end_request_all - Helper function for drives to finish the request. |
---|
3396 | | - * @rq: the request to finish |
---|
3397 | | - * @error: block status code |
---|
3398 | | - * |
---|
3399 | | - * Description: |
---|
3400 | | - * Completely finish @rq. Must be called with queue lock held. |
---|
3401 | | - */ |
---|
3402 | | -void __blk_end_request_all(struct request *rq, blk_status_t error) |
---|
3403 | | -{ |
---|
3404 | | - bool pending; |
---|
3405 | | - unsigned int bidi_bytes = 0; |
---|
3406 | | - |
---|
3407 | | - lockdep_assert_held(rq->q->queue_lock); |
---|
3408 | | - WARN_ON_ONCE(rq->q->mq_ops); |
---|
3409 | | - |
---|
3410 | | - if (unlikely(blk_bidi_rq(rq))) |
---|
3411 | | - bidi_bytes = blk_rq_bytes(rq->next_rq); |
---|
3412 | | - |
---|
3413 | | - pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); |
---|
3414 | | - BUG_ON(pending); |
---|
3415 | | -} |
---|
3416 | | -EXPORT_SYMBOL(__blk_end_request_all); |
---|
3417 | | - |
---|
3418 | | -/** |
---|
3419 | | - * __blk_end_request_cur - Helper function to finish the current request chunk. |
---|
3420 | | - * @rq: the request to finish the current chunk for |
---|
3421 | | - * @error: block status code |
---|
3422 | | - * |
---|
3423 | | - * Description: |
---|
3424 | | - * Complete the current consecutively mapped chunk from @rq. Must |
---|
3425 | | - * be called with queue lock held. |
---|
3426 | | - * |
---|
3427 | | - * Return: |
---|
3428 | | - * %false - we are done with this request |
---|
3429 | | - * %true - still buffers pending for this request |
---|
3430 | | - */ |
---|
3431 | | -bool __blk_end_request_cur(struct request *rq, blk_status_t error) |
---|
3432 | | -{ |
---|
3433 | | - return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); |
---|
3434 | | -} |
---|
3435 | | -EXPORT_SYMBOL(__blk_end_request_cur); |
---|
3436 | | - |
---|
3437 | | -void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
---|
3438 | | - struct bio *bio) |
---|
3439 | | -{ |
---|
3440 | | - if (bio_has_data(bio)) |
---|
3441 | | - rq->nr_phys_segments = bio_phys_segments(q, bio); |
---|
3442 | | - else if (bio_op(bio) == REQ_OP_DISCARD) |
---|
3443 | | - rq->nr_phys_segments = 1; |
---|
3444 | | - |
---|
3445 | | - rq->__data_len = bio->bi_iter.bi_size; |
---|
3446 | | - rq->bio = rq->biotail = bio; |
---|
3447 | | - |
---|
3448 | | - if (bio->bi_disk) |
---|
3449 | | - rq->rq_disk = bio->bi_disk; |
---|
3450 | | -} |
---|
3451 | 1522 | |
---|
3452 | 1523 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
---|
3453 | 1524 | /** |
---|
.. | .. |
---|
3489 | 1560 | */ |
---|
3490 | 1561 | int blk_lld_busy(struct request_queue *q) |
---|
3491 | 1562 | { |
---|
3492 | | - if (q->lld_busy_fn) |
---|
3493 | | - return q->lld_busy_fn(q); |
---|
| 1563 | + if (queue_is_mq(q) && q->mq_ops->busy) |
---|
| 1564 | + return q->mq_ops->busy(q); |
---|
3494 | 1565 | |
---|
3495 | 1566 | return 0; |
---|
3496 | 1567 | } |
---|
.. | .. |
---|
3515 | 1586 | } |
---|
3516 | 1587 | EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); |
---|
3517 | 1588 | |
---|
3518 | | -/* |
---|
3519 | | - * Copy attributes of the original request to the clone request. |
---|
3520 | | - * The actual data parts (e.g. ->cmd, ->sense) are not copied. |
---|
3521 | | - */ |
---|
3522 | | -static void __blk_rq_prep_clone(struct request *dst, struct request *src) |
---|
3523 | | -{ |
---|
3524 | | - dst->cpu = src->cpu; |
---|
3525 | | - dst->__sector = blk_rq_pos(src); |
---|
3526 | | - dst->__data_len = blk_rq_bytes(src); |
---|
3527 | | - if (src->rq_flags & RQF_SPECIAL_PAYLOAD) { |
---|
3528 | | - dst->rq_flags |= RQF_SPECIAL_PAYLOAD; |
---|
3529 | | - dst->special_vec = src->special_vec; |
---|
3530 | | - } |
---|
3531 | | - dst->nr_phys_segments = src->nr_phys_segments; |
---|
3532 | | - dst->ioprio = src->ioprio; |
---|
3533 | | - dst->extra_len = src->extra_len; |
---|
3534 | | -} |
---|
3535 | | - |
---|
3536 | 1589 | /** |
---|
3537 | 1590 | * blk_rq_prep_clone - Helper function to setup clone request |
---|
3538 | 1591 | * @rq: the request to be setup |
---|
.. | .. |
---|
3545 | 1598 | * |
---|
3546 | 1599 | * Description: |
---|
3547 | 1600 | * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. |
---|
3548 | | - * The actual data parts of @rq_src (e.g. ->cmd, ->sense) |
---|
3549 | | - * are not copied, and copying such parts is the caller's responsibility. |
---|
3550 | 1601 | * Also, pages which the original bios are pointing to are not copied |
---|
3551 | 1602 | * and the cloned bios just point same pages. |
---|
3552 | 1603 | * So cloned bios must be completed before original bios, which means |
---|
.. | .. |
---|
3573 | 1624 | if (rq->bio) { |
---|
3574 | 1625 | rq->biotail->bi_next = bio; |
---|
3575 | 1626 | rq->biotail = bio; |
---|
3576 | | - } else |
---|
| 1627 | + } else { |
---|
3577 | 1628 | rq->bio = rq->biotail = bio; |
---|
| 1629 | + } |
---|
| 1630 | + bio = NULL; |
---|
3578 | 1631 | } |
---|
3579 | 1632 | |
---|
3580 | | - __blk_rq_prep_clone(rq, rq_src); |
---|
| 1633 | + /* Copy attributes of the original request to the clone request. */ |
---|
| 1634 | + rq->__sector = blk_rq_pos(rq_src); |
---|
| 1635 | + rq->__data_len = blk_rq_bytes(rq_src); |
---|
| 1636 | + if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) { |
---|
| 1637 | + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; |
---|
| 1638 | + rq->special_vec = rq_src->special_vec; |
---|
| 1639 | + } |
---|
| 1640 | + rq->nr_phys_segments = rq_src->nr_phys_segments; |
---|
| 1641 | + rq->ioprio = rq_src->ioprio; |
---|
| 1642 | + |
---|
| 1643 | + if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0) |
---|
| 1644 | + goto free_and_out; |
---|
3581 | 1645 | |
---|
3582 | 1646 | return 0; |
---|
3583 | 1647 | |
---|
.. | .. |
---|
3596 | 1660 | } |
---|
3597 | 1661 | EXPORT_SYMBOL(kblockd_schedule_work); |
---|
3598 | 1662 | |
---|
3599 | | -int kblockd_schedule_work_on(int cpu, struct work_struct *work) |
---|
3600 | | -{ |
---|
3601 | | - return queue_work_on(cpu, kblockd_workqueue, work); |
---|
3602 | | -} |
---|
3603 | | -EXPORT_SYMBOL(kblockd_schedule_work_on); |
---|
3604 | | - |
---|
3605 | 1663 | int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, |
---|
3606 | 1664 | unsigned long delay) |
---|
3607 | 1665 | { |
---|
.. | .. |
---|
3614 | 1672 | * @plug: The &struct blk_plug that needs to be initialized |
---|
3615 | 1673 | * |
---|
3616 | 1674 | * Description: |
---|
| 1675 | + * blk_start_plug() indicates to the block layer an intent by the caller |
---|
| 1676 | + * to submit multiple I/O requests in a batch. The block layer may use |
---|
| 1677 | + * this hint to defer submitting I/Os from the caller until blk_finish_plug() |
---|
| 1678 | + * is called. However, the block layer may choose to submit requests |
---|
| 1679 | + * before a call to blk_finish_plug() if the number of queued I/Os |
---|
| 1680 | + * exceeds %BLK_MAX_REQUEST_COUNT, or if the size of the I/O is larger than |
---|
| 1681 | + * %BLK_PLUG_FLUSH_SIZE. The queued I/Os may also be submitted early if |
---|
| 1682 | + * the task schedules (see below). |
---|
| 1683 | + * |
---|
3617 | 1684 | * Tracking blk_plug inside the task_struct will help with auto-flushing the |
---|
3618 | 1685 | * pending I/O should the task end up blocking between blk_start_plug() and |
---|
3619 | 1686 | * blk_finish_plug(). This is important from a performance perspective, but |
---|
.. | .. |
---|
3633 | 1700 | if (tsk->plug) |
---|
3634 | 1701 | return; |
---|
3635 | 1702 | |
---|
3636 | | - INIT_LIST_HEAD(&plug->list); |
---|
3637 | 1703 | INIT_LIST_HEAD(&plug->mq_list); |
---|
3638 | 1704 | INIT_LIST_HEAD(&plug->cb_list); |
---|
| 1705 | + plug->rq_count = 0; |
---|
| 1706 | + plug->multiple_queues = false; |
---|
| 1707 | + plug->nowait = false; |
---|
| 1708 | + |
---|
3639 | 1709 | /* |
---|
3640 | 1710 | * Store ordering should not be needed here, since a potential |
---|
3641 | 1711 | * preempt will imply a full memory barrier |
---|
.. | .. |
---|
3643 | 1713 | tsk->plug = plug; |
---|
3644 | 1714 | } |
---|
3645 | 1715 | EXPORT_SYMBOL(blk_start_plug); |
---|
3646 | | - |
---|
3647 | | -static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) |
---|
3648 | | -{ |
---|
3649 | | - struct request *rqa = container_of(a, struct request, queuelist); |
---|
3650 | | - struct request *rqb = container_of(b, struct request, queuelist); |
---|
3651 | | - |
---|
3652 | | - return !(rqa->q < rqb->q || |
---|
3653 | | - (rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb))); |
---|
3654 | | -} |
---|
3655 | | - |
---|
3656 | | -/* |
---|
3657 | | - * If 'from_schedule' is true, then postpone the dispatch of requests |
---|
3658 | | - * until a safe kblockd context. We due this to avoid accidental big |
---|
3659 | | - * additional stack usage in driver dispatch, in places where the originally |
---|
3660 | | - * plugger did not intend it. |
---|
3661 | | - */ |
---|
3662 | | -static void queue_unplugged(struct request_queue *q, unsigned int depth, |
---|
3663 | | - bool from_schedule) |
---|
3664 | | - __releases(q->queue_lock) |
---|
3665 | | -{ |
---|
3666 | | - lockdep_assert_held(q->queue_lock); |
---|
3667 | | - |
---|
3668 | | - trace_block_unplug(q, depth, !from_schedule); |
---|
3669 | | - |
---|
3670 | | - if (from_schedule) |
---|
3671 | | - blk_run_queue_async(q); |
---|
3672 | | - else |
---|
3673 | | - __blk_run_queue(q); |
---|
3674 | | - spin_unlock_irq(q->queue_lock); |
---|
3675 | | -} |
---|
3676 | 1716 | |
---|
3677 | 1717 | static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) |
---|
3678 | 1718 | { |
---|
.. | .. |
---|
3718 | 1758 | |
---|
3719 | 1759 | void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
---|
3720 | 1760 | { |
---|
3721 | | - struct request_queue *q; |
---|
3722 | | - struct request *rq; |
---|
3723 | | - LIST_HEAD(list); |
---|
3724 | | - unsigned int depth; |
---|
3725 | | - |
---|
3726 | 1761 | flush_plug_callbacks(plug, from_schedule); |
---|
3727 | 1762 | |
---|
3728 | 1763 | if (!list_empty(&plug->mq_list)) |
---|
3729 | 1764 | blk_mq_flush_plug_list(plug, from_schedule); |
---|
3730 | | - |
---|
3731 | | - if (list_empty(&plug->list)) |
---|
3732 | | - return; |
---|
3733 | | - |
---|
3734 | | - list_splice_init(&plug->list, &list); |
---|
3735 | | - |
---|
3736 | | - list_sort(NULL, &list, plug_rq_cmp); |
---|
3737 | | - |
---|
3738 | | - q = NULL; |
---|
3739 | | - depth = 0; |
---|
3740 | | - |
---|
3741 | | - while (!list_empty(&list)) { |
---|
3742 | | - rq = list_entry_rq(list.next); |
---|
3743 | | - list_del_init(&rq->queuelist); |
---|
3744 | | - BUG_ON(!rq->q); |
---|
3745 | | - if (rq->q != q) { |
---|
3746 | | - /* |
---|
3747 | | - * This drops the queue lock |
---|
3748 | | - */ |
---|
3749 | | - if (q) |
---|
3750 | | - queue_unplugged(q, depth, from_schedule); |
---|
3751 | | - q = rq->q; |
---|
3752 | | - depth = 0; |
---|
3753 | | - spin_lock_irq(q->queue_lock); |
---|
3754 | | - } |
---|
3755 | | - |
---|
3756 | | - /* |
---|
3757 | | - * Short-circuit if @q is dead |
---|
3758 | | - */ |
---|
3759 | | - if (unlikely(blk_queue_dying(q))) { |
---|
3760 | | - __blk_end_request_all(rq, BLK_STS_IOERR); |
---|
3761 | | - continue; |
---|
3762 | | - } |
---|
3763 | | - |
---|
3764 | | - /* |
---|
3765 | | - * rq is already accounted, so use raw insert |
---|
3766 | | - */ |
---|
3767 | | - if (op_is_flush(rq->cmd_flags)) |
---|
3768 | | - __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); |
---|
3769 | | - else |
---|
3770 | | - __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); |
---|
3771 | | - |
---|
3772 | | - depth++; |
---|
3773 | | - } |
---|
3774 | | - |
---|
3775 | | - /* |
---|
3776 | | - * This drops the queue lock |
---|
3777 | | - */ |
---|
3778 | | - if (q) |
---|
3779 | | - queue_unplugged(q, depth, from_schedule); |
---|
3780 | 1765 | } |
---|
3781 | 1766 | |
---|
| 1767 | +/** |
---|
| 1768 | + * blk_finish_plug - mark the end of a batch of submitted I/O |
---|
| 1769 | + * @plug: The &struct blk_plug passed to blk_start_plug() |
---|
| 1770 | + * |
---|
| 1771 | + * Description: |
---|
| 1772 | + * Indicate that a batch of I/O submissions is complete. This function |
---|
| 1773 | + * must be paired with an initial call to blk_start_plug(). The intent |
---|
| 1774 | + * is to allow the block layer to optimize I/O submission. See the |
---|
| 1775 | + * documentation for blk_start_plug() for more information. |
---|
| 1776 | + */ |
---|
3782 | 1777 | void blk_finish_plug(struct blk_plug *plug) |
---|
3783 | 1778 | { |
---|
3784 | 1779 | if (plug != current->plug) |
---|
.. | .. |
---|
3789 | 1784 | } |
---|
3790 | 1785 | EXPORT_SYMBOL(blk_finish_plug); |
---|
3791 | 1786 | |
---|
3792 | | -#ifdef CONFIG_PM |
---|
3793 | | -/** |
---|
3794 | | - * blk_pm_runtime_init - Block layer runtime PM initialization routine |
---|
3795 | | - * @q: the queue of the device |
---|
3796 | | - * @dev: the device the queue belongs to |
---|
3797 | | - * |
---|
3798 | | - * Description: |
---|
3799 | | - * Initialize runtime-PM-related fields for @q and start auto suspend for |
---|
3800 | | - * @dev. Drivers that want to take advantage of request-based runtime PM |
---|
3801 | | - * should call this function after @dev has been initialized, and its |
---|
3802 | | - * request queue @q has been allocated, and runtime PM for it can not happen |
---|
3803 | | - * yet(either due to disabled/forbidden or its usage_count > 0). In most |
---|
3804 | | - * cases, driver should call this function before any I/O has taken place. |
---|
3805 | | - * |
---|
3806 | | - * This function takes care of setting up using auto suspend for the device, |
---|
3807 | | - * the autosuspend delay is set to -1 to make runtime suspend impossible |
---|
3808 | | - * until an updated value is either set by user or by driver. Drivers do |
---|
3809 | | - * not need to touch other autosuspend settings. |
---|
3810 | | - * |
---|
3811 | | - * The block layer runtime PM is request based, so only works for drivers |
---|
3812 | | - * that use request as their IO unit instead of those directly use bio's. |
---|
3813 | | - */ |
---|
3814 | | -void blk_pm_runtime_init(struct request_queue *q, struct device *dev) |
---|
| 1787 | +void blk_io_schedule(void) |
---|
3815 | 1788 | { |
---|
3816 | | - /* Don't enable runtime PM for blk-mq until it is ready */ |
---|
3817 | | - if (q->mq_ops) { |
---|
3818 | | - pm_runtime_disable(dev); |
---|
3819 | | - return; |
---|
3820 | | - } |
---|
| 1789 | + /* Prevent hang_check timer from firing at us during very long I/O */ |
---|
| 1790 | + unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; |
---|
3821 | 1791 | |
---|
3822 | | - q->dev = dev; |
---|
3823 | | - q->rpm_status = RPM_ACTIVE; |
---|
3824 | | - pm_runtime_set_autosuspend_delay(q->dev, -1); |
---|
3825 | | - pm_runtime_use_autosuspend(q->dev); |
---|
| 1792 | + if (timeout) |
---|
| 1793 | + io_schedule_timeout(timeout); |
---|
| 1794 | + else |
---|
| 1795 | + io_schedule(); |
---|
3826 | 1796 | } |
---|
3827 | | -EXPORT_SYMBOL(blk_pm_runtime_init); |
---|
3828 | | - |
---|
3829 | | -/** |
---|
3830 | | - * blk_pre_runtime_suspend - Pre runtime suspend check |
---|
3831 | | - * @q: the queue of the device |
---|
3832 | | - * |
---|
3833 | | - * Description: |
---|
3834 | | - * This function will check if runtime suspend is allowed for the device |
---|
3835 | | - * by examining if there are any requests pending in the queue. If there |
---|
3836 | | - * are requests pending, the device can not be runtime suspended; otherwise, |
---|
3837 | | - * the queue's status will be updated to SUSPENDING and the driver can |
---|
3838 | | - * proceed to suspend the device. |
---|
3839 | | - * |
---|
3840 | | - * For the not allowed case, we mark last busy for the device so that |
---|
3841 | | - * runtime PM core will try to autosuspend it some time later. |
---|
3842 | | - * |
---|
3843 | | - * This function should be called near the start of the device's |
---|
3844 | | - * runtime_suspend callback. |
---|
3845 | | - * |
---|
3846 | | - * Return: |
---|
3847 | | - * 0 - OK to runtime suspend the device |
---|
3848 | | - * -EBUSY - Device should not be runtime suspended |
---|
3849 | | - */ |
---|
3850 | | -int blk_pre_runtime_suspend(struct request_queue *q) |
---|
3851 | | -{ |
---|
3852 | | - int ret = 0; |
---|
3853 | | - |
---|
3854 | | - if (!q->dev) |
---|
3855 | | - return ret; |
---|
3856 | | - |
---|
3857 | | - spin_lock_irq(q->queue_lock); |
---|
3858 | | - if (q->nr_pending) { |
---|
3859 | | - ret = -EBUSY; |
---|
3860 | | - pm_runtime_mark_last_busy(q->dev); |
---|
3861 | | - } else { |
---|
3862 | | - q->rpm_status = RPM_SUSPENDING; |
---|
3863 | | - } |
---|
3864 | | - spin_unlock_irq(q->queue_lock); |
---|
3865 | | - return ret; |
---|
3866 | | -} |
---|
3867 | | -EXPORT_SYMBOL(blk_pre_runtime_suspend); |
---|
3868 | | - |
---|
3869 | | -/** |
---|
3870 | | - * blk_post_runtime_suspend - Post runtime suspend processing |
---|
3871 | | - * @q: the queue of the device |
---|
3872 | | - * @err: return value of the device's runtime_suspend function |
---|
3873 | | - * |
---|
3874 | | - * Description: |
---|
3875 | | - * Update the queue's runtime status according to the return value of the |
---|
3876 | | - * device's runtime suspend function and mark last busy for the device so |
---|
3877 | | - * that PM core will try to auto suspend the device at a later time. |
---|
3878 | | - * |
---|
3879 | | - * This function should be called near the end of the device's |
---|
3880 | | - * runtime_suspend callback. |
---|
3881 | | - */ |
---|
3882 | | -void blk_post_runtime_suspend(struct request_queue *q, int err) |
---|
3883 | | -{ |
---|
3884 | | - if (!q->dev) |
---|
3885 | | - return; |
---|
3886 | | - |
---|
3887 | | - spin_lock_irq(q->queue_lock); |
---|
3888 | | - if (!err) { |
---|
3889 | | - q->rpm_status = RPM_SUSPENDED; |
---|
3890 | | - } else { |
---|
3891 | | - q->rpm_status = RPM_ACTIVE; |
---|
3892 | | - pm_runtime_mark_last_busy(q->dev); |
---|
3893 | | - } |
---|
3894 | | - spin_unlock_irq(q->queue_lock); |
---|
3895 | | -} |
---|
3896 | | -EXPORT_SYMBOL(blk_post_runtime_suspend); |
---|
3897 | | - |
---|
3898 | | -/** |
---|
3899 | | - * blk_pre_runtime_resume - Pre runtime resume processing |
---|
3900 | | - * @q: the queue of the device |
---|
3901 | | - * |
---|
3902 | | - * Description: |
---|
3903 | | - * Update the queue's runtime status to RESUMING in preparation for the |
---|
3904 | | - * runtime resume of the device. |
---|
3905 | | - * |
---|
3906 | | - * This function should be called near the start of the device's |
---|
3907 | | - * runtime_resume callback. |
---|
3908 | | - */ |
---|
3909 | | -void blk_pre_runtime_resume(struct request_queue *q) |
---|
3910 | | -{ |
---|
3911 | | - if (!q->dev) |
---|
3912 | | - return; |
---|
3913 | | - |
---|
3914 | | - spin_lock_irq(q->queue_lock); |
---|
3915 | | - q->rpm_status = RPM_RESUMING; |
---|
3916 | | - spin_unlock_irq(q->queue_lock); |
---|
3917 | | -} |
---|
3918 | | -EXPORT_SYMBOL(blk_pre_runtime_resume); |
---|
3919 | | - |
---|
3920 | | -/** |
---|
3921 | | - * blk_post_runtime_resume - Post runtime resume processing |
---|
3922 | | - * @q: the queue of the device |
---|
3923 | | - * @err: return value of the device's runtime_resume function |
---|
3924 | | - * |
---|
3925 | | - * Description: |
---|
3926 | | - * Update the queue's runtime status according to the return value of the |
---|
3927 | | - * device's runtime_resume function. If it is successfully resumed, process |
---|
3928 | | - * the requests that are queued into the device's queue when it is resuming |
---|
3929 | | - * and then mark last busy and initiate autosuspend for it. |
---|
3930 | | - * |
---|
3931 | | - * This function should be called near the end of the device's |
---|
3932 | | - * runtime_resume callback. |
---|
3933 | | - */ |
---|
3934 | | -void blk_post_runtime_resume(struct request_queue *q, int err) |
---|
3935 | | -{ |
---|
3936 | | - if (!q->dev) |
---|
3937 | | - return; |
---|
3938 | | - |
---|
3939 | | - spin_lock_irq(q->queue_lock); |
---|
3940 | | - if (!err) { |
---|
3941 | | - q->rpm_status = RPM_ACTIVE; |
---|
3942 | | - __blk_run_queue(q); |
---|
3943 | | - pm_runtime_mark_last_busy(q->dev); |
---|
3944 | | - pm_request_autosuspend(q->dev); |
---|
3945 | | - } else { |
---|
3946 | | - q->rpm_status = RPM_SUSPENDED; |
---|
3947 | | - } |
---|
3948 | | - spin_unlock_irq(q->queue_lock); |
---|
3949 | | -} |
---|
3950 | | -EXPORT_SYMBOL(blk_post_runtime_resume); |
---|
3951 | | - |
---|
3952 | | -/** |
---|
3953 | | - * blk_set_runtime_active - Force runtime status of the queue to be active |
---|
3954 | | - * @q: the queue of the device |
---|
3955 | | - * |
---|
3956 | | - * If the device is left runtime suspended during system suspend the resume |
---|
3957 | | - * hook typically resumes the device and corrects runtime status |
---|
3958 | | - * accordingly. However, that does not affect the queue runtime PM status |
---|
3959 | | - * which is still "suspended". This prevents processing requests from the |
---|
3960 | | - * queue. |
---|
3961 | | - * |
---|
3962 | | - * This function can be used in driver's resume hook to correct queue |
---|
3963 | | - * runtime PM status and re-enable peeking requests from the queue. It |
---|
3964 | | - * should be called before first request is added to the queue. |
---|
3965 | | - */ |
---|
3966 | | -void blk_set_runtime_active(struct request_queue *q) |
---|
3967 | | -{ |
---|
3968 | | - spin_lock_irq(q->queue_lock); |
---|
3969 | | - q->rpm_status = RPM_ACTIVE; |
---|
3970 | | - pm_runtime_mark_last_busy(q->dev); |
---|
3971 | | - pm_request_autosuspend(q->dev); |
---|
3972 | | - spin_unlock_irq(q->queue_lock); |
---|
3973 | | -} |
---|
3974 | | -EXPORT_SYMBOL(blk_set_runtime_active); |
---|
3975 | | -#endif |
---|
| 1797 | +EXPORT_SYMBOL_GPL(blk_io_schedule); |
---|
3976 | 1798 | |
---|
3977 | 1799 | int __init blk_dev_init(void) |
---|
3978 | 1800 | { |
---|
3979 | 1801 | BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS)); |
---|
3980 | 1802 | BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * |
---|
3981 | | - FIELD_SIZEOF(struct request, cmd_flags)); |
---|
| 1803 | + sizeof_field(struct request, cmd_flags)); |
---|
3982 | 1804 | BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * |
---|
3983 | | - FIELD_SIZEOF(struct bio, bi_opf)); |
---|
| 1805 | + sizeof_field(struct bio, bi_opf)); |
---|
3984 | 1806 | |
---|
3985 | 1807 | /* used for unplugging and affects IO latency/throughput - HIGHPRI */ |
---|
3986 | 1808 | kblockd_workqueue = alloc_workqueue("kblockd", |
---|
.. | .. |
---|
3988 | 1810 | if (!kblockd_workqueue) |
---|
3989 | 1811 | panic("Failed to create kblockd\n"); |
---|
3990 | 1812 | |
---|
3991 | | - request_cachep = kmem_cache_create("blkdev_requests", |
---|
3992 | | - sizeof(struct request), 0, SLAB_PANIC, NULL); |
---|
3993 | | - |
---|
3994 | 1813 | blk_requestq_cachep = kmem_cache_create("request_queue", |
---|
3995 | 1814 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); |
---|
3996 | 1815 | |
---|
3997 | | -#ifdef CONFIG_DEBUG_FS |
---|
3998 | 1816 | blk_debugfs_root = debugfs_create_dir("block", NULL); |
---|
3999 | | -#endif |
---|
4000 | | - |
---|
4001 | | - if (bio_crypt_ctx_init() < 0) |
---|
4002 | | - panic("Failed to allocate mem for bio crypt ctxs\n"); |
---|
4003 | | - |
---|
4004 | | - if (blk_crypto_fallback_init() < 0) |
---|
4005 | | - panic("Failed to init blk-crypto-fallback\n"); |
---|
4006 | 1817 | |
---|
4007 | 1818 | return 0; |
---|
4008 | 1819 | } |
---|