| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
|---|
| 3 | 4 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics |
|---|
| .. | .. |
|---|
| 19 | 20 | #include <linux/blk-mq.h> |
|---|
| 20 | 21 | #include <linux/highmem.h> |
|---|
| 21 | 22 | #include <linux/mm.h> |
|---|
| 23 | +#include <linux/pagemap.h> |
|---|
| 22 | 24 | #include <linux/kernel_stat.h> |
|---|
| 23 | 25 | #include <linux/string.h> |
|---|
| 24 | 26 | #include <linux/init.h> |
|---|
| .. | .. |
|---|
| 33 | 35 | #include <linux/ratelimit.h> |
|---|
| 34 | 36 | #include <linux/pm_runtime.h> |
|---|
| 35 | 37 | #include <linux/blk-cgroup.h> |
|---|
| 38 | +#include <linux/t10-pi.h> |
|---|
| 36 | 39 | #include <linux/debugfs.h> |
|---|
| 37 | 40 | #include <linux/bpf.h> |
|---|
| 38 | 41 | #include <linux/psi.h> |
|---|
| 42 | +#include <linux/sched/sysctl.h> |
|---|
| 39 | 43 | #include <linux/blk-crypto.h> |
|---|
| 40 | 44 | |
|---|
| 41 | 45 | #define CREATE_TRACE_POINTS |
|---|
| .. | .. |
|---|
| 44 | 48 | #include "blk.h" |
|---|
| 45 | 49 | #include "blk-mq.h" |
|---|
| 46 | 50 | #include "blk-mq-sched.h" |
|---|
| 51 | +#include "blk-pm.h" |
|---|
| 47 | 52 | #include "blk-rq-qos.h" |
|---|
| 48 | 53 | |
|---|
| 49 | | -#ifdef CONFIG_DEBUG_FS |
|---|
| 50 | 54 | struct dentry *blk_debugfs_root; |
|---|
| 51 | | -#endif |
|---|
| 52 | 55 | |
|---|
| 53 | 56 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); |
|---|
| 54 | 57 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
|---|
| 55 | 58 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
|---|
| 56 | 59 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_split); |
|---|
| 57 | 60 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); |
|---|
| 61 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_queue); |
|---|
| 62 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_getrq); |
|---|
| 63 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_insert); |
|---|
| 64 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_issue); |
|---|
| 65 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_merge); |
|---|
| 66 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_requeue); |
|---|
| 67 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_complete); |
|---|
| 58 | 68 | |
|---|
| 59 | 69 | DEFINE_IDA(blk_queue_ida); |
|---|
| 60 | | - |
|---|
| 61 | | -/* |
|---|
| 62 | | - * For the allocated request tables |
|---|
| 63 | | - */ |
|---|
| 64 | | -struct kmem_cache *request_cachep; |
|---|
| 65 | 70 | |
|---|
| 66 | 71 | /* |
|---|
| 67 | 72 | * For queue allocation |
|---|
| .. | .. |
|---|
| 80 | 85 | */ |
|---|
| 81 | 86 | void blk_queue_flag_set(unsigned int flag, struct request_queue *q) |
|---|
| 82 | 87 | { |
|---|
| 83 | | - unsigned long flags; |
|---|
| 84 | | - |
|---|
| 85 | | - spin_lock_irqsave(q->queue_lock, flags); |
|---|
| 86 | | - queue_flag_set(flag, q); |
|---|
| 87 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
|---|
| 88 | + set_bit(flag, &q->queue_flags); |
|---|
| 88 | 89 | } |
|---|
| 89 | 90 | EXPORT_SYMBOL(blk_queue_flag_set); |
|---|
| 90 | 91 | |
|---|
| .. | .. |
|---|
| 95 | 96 | */ |
|---|
| 96 | 97 | void blk_queue_flag_clear(unsigned int flag, struct request_queue *q) |
|---|
| 97 | 98 | { |
|---|
| 98 | | - unsigned long flags; |
|---|
| 99 | | - |
|---|
| 100 | | - spin_lock_irqsave(q->queue_lock, flags); |
|---|
| 101 | | - queue_flag_clear(flag, q); |
|---|
| 102 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
|---|
| 99 | + clear_bit(flag, &q->queue_flags); |
|---|
| 103 | 100 | } |
|---|
| 104 | 101 | EXPORT_SYMBOL(blk_queue_flag_clear); |
|---|
| 105 | 102 | |
|---|
| .. | .. |
|---|
| 113 | 110 | */ |
|---|
| 114 | 111 | bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q) |
|---|
| 115 | 112 | { |
|---|
| 116 | | - unsigned long flags; |
|---|
| 117 | | - bool res; |
|---|
| 118 | | - |
|---|
| 119 | | - spin_lock_irqsave(q->queue_lock, flags); |
|---|
| 120 | | - res = queue_flag_test_and_set(flag, q); |
|---|
| 121 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
|---|
| 122 | | - |
|---|
| 123 | | - return res; |
|---|
| 113 | + return test_and_set_bit(flag, &q->queue_flags); |
|---|
| 124 | 114 | } |
|---|
| 125 | 115 | EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set); |
|---|
| 126 | | - |
|---|
| 127 | | -/** |
|---|
| 128 | | - * blk_queue_flag_test_and_clear - atomically test and clear a queue flag |
|---|
| 129 | | - * @flag: flag to be cleared |
|---|
| 130 | | - * @q: request queue |
|---|
| 131 | | - * |
|---|
| 132 | | - * Returns the previous value of @flag - 0 if the flag was not set and 1 if |
|---|
| 133 | | - * the flag was set. |
|---|
| 134 | | - */ |
|---|
| 135 | | -bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q) |
|---|
| 136 | | -{ |
|---|
| 137 | | - unsigned long flags; |
|---|
| 138 | | - bool res; |
|---|
| 139 | | - |
|---|
| 140 | | - spin_lock_irqsave(q->queue_lock, flags); |
|---|
| 141 | | - res = queue_flag_test_and_clear(flag, q); |
|---|
| 142 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
|---|
| 143 | | - |
|---|
| 144 | | - return res; |
|---|
| 145 | | -} |
|---|
| 146 | | -EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear); |
|---|
| 147 | | - |
|---|
| 148 | | -static void blk_clear_congested(struct request_list *rl, int sync) |
|---|
| 149 | | -{ |
|---|
| 150 | | -#ifdef CONFIG_CGROUP_WRITEBACK |
|---|
| 151 | | - clear_wb_congested(rl->blkg->wb_congested, sync); |
|---|
| 152 | | -#else |
|---|
| 153 | | - /* |
|---|
| 154 | | - * If !CGROUP_WRITEBACK, all blkg's map to bdi->wb and we shouldn't |
|---|
| 155 | | - * flip its congestion state for events on other blkcgs. |
|---|
| 156 | | - */ |
|---|
| 157 | | - if (rl == &rl->q->root_rl) |
|---|
| 158 | | - clear_wb_congested(rl->q->backing_dev_info->wb.congested, sync); |
|---|
| 159 | | -#endif |
|---|
| 160 | | -} |
|---|
| 161 | | - |
|---|
| 162 | | -static void blk_set_congested(struct request_list *rl, int sync) |
|---|
| 163 | | -{ |
|---|
| 164 | | -#ifdef CONFIG_CGROUP_WRITEBACK |
|---|
| 165 | | - set_wb_congested(rl->blkg->wb_congested, sync); |
|---|
| 166 | | -#else |
|---|
| 167 | | - /* see blk_clear_congested() */ |
|---|
| 168 | | - if (rl == &rl->q->root_rl) |
|---|
| 169 | | - set_wb_congested(rl->q->backing_dev_info->wb.congested, sync); |
|---|
| 170 | | -#endif |
|---|
| 171 | | -} |
|---|
| 172 | | - |
|---|
| 173 | | -void blk_queue_congestion_threshold(struct request_queue *q) |
|---|
| 174 | | -{ |
|---|
| 175 | | - int nr; |
|---|
| 176 | | - |
|---|
| 177 | | - nr = q->nr_requests - (q->nr_requests / 8) + 1; |
|---|
| 178 | | - if (nr > q->nr_requests) |
|---|
| 179 | | - nr = q->nr_requests; |
|---|
| 180 | | - q->nr_congestion_on = nr; |
|---|
| 181 | | - |
|---|
| 182 | | - nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; |
|---|
| 183 | | - if (nr < 1) |
|---|
| 184 | | - nr = 1; |
|---|
| 185 | | - q->nr_congestion_off = nr; |
|---|
| 186 | | -} |
|---|
| 187 | 116 | |
|---|
| 188 | 117 | void blk_rq_init(struct request_queue *q, struct request *rq) |
|---|
| 189 | 118 | { |
|---|
| 190 | 119 | memset(rq, 0, sizeof(*rq)); |
|---|
| 191 | 120 | |
|---|
| 192 | 121 | INIT_LIST_HEAD(&rq->queuelist); |
|---|
| 193 | | - INIT_LIST_HEAD(&rq->timeout_list); |
|---|
| 194 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
|---|
| 195 | | - INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); |
|---|
| 196 | | -#endif |
|---|
| 197 | | - rq->cpu = -1; |
|---|
| 198 | 122 | rq->q = q; |
|---|
| 199 | 123 | rq->__sector = (sector_t) -1; |
|---|
| 200 | 124 | INIT_HLIST_NODE(&rq->hash); |
|---|
| 201 | 125 | RB_CLEAR_NODE(&rq->rb_node); |
|---|
| 202 | | - rq->tag = -1; |
|---|
| 203 | | - rq->internal_tag = -1; |
|---|
| 126 | + rq->tag = BLK_MQ_NO_TAG; |
|---|
| 127 | + rq->internal_tag = BLK_MQ_NO_TAG; |
|---|
| 204 | 128 | rq->start_time_ns = ktime_get_ns(); |
|---|
| 205 | 129 | rq->part = NULL; |
|---|
| 206 | | - refcount_set(&rq->ref, 1); |
|---|
| 130 | + blk_crypto_rq_set_defaults(rq); |
|---|
| 207 | 131 | } |
|---|
| 208 | 132 | EXPORT_SYMBOL(blk_rq_init); |
|---|
| 133 | + |
|---|
| 134 | +#define REQ_OP_NAME(name) [REQ_OP_##name] = #name |
|---|
| 135 | +static const char *const blk_op_name[] = { |
|---|
| 136 | + REQ_OP_NAME(READ), |
|---|
| 137 | + REQ_OP_NAME(WRITE), |
|---|
| 138 | + REQ_OP_NAME(FLUSH), |
|---|
| 139 | + REQ_OP_NAME(DISCARD), |
|---|
| 140 | + REQ_OP_NAME(SECURE_ERASE), |
|---|
| 141 | + REQ_OP_NAME(ZONE_RESET), |
|---|
| 142 | + REQ_OP_NAME(ZONE_RESET_ALL), |
|---|
| 143 | + REQ_OP_NAME(ZONE_OPEN), |
|---|
| 144 | + REQ_OP_NAME(ZONE_CLOSE), |
|---|
| 145 | + REQ_OP_NAME(ZONE_FINISH), |
|---|
| 146 | + REQ_OP_NAME(ZONE_APPEND), |
|---|
| 147 | + REQ_OP_NAME(WRITE_SAME), |
|---|
| 148 | + REQ_OP_NAME(WRITE_ZEROES), |
|---|
| 149 | + REQ_OP_NAME(SCSI_IN), |
|---|
| 150 | + REQ_OP_NAME(SCSI_OUT), |
|---|
| 151 | + REQ_OP_NAME(DRV_IN), |
|---|
| 152 | + REQ_OP_NAME(DRV_OUT), |
|---|
| 153 | +}; |
|---|
| 154 | +#undef REQ_OP_NAME |
|---|
| 155 | + |
|---|
| 156 | +/** |
|---|
| 157 | + * blk_op_str - Return string XXX in the REQ_OP_XXX. |
|---|
| 158 | + * @op: REQ_OP_XXX. |
|---|
| 159 | + * |
|---|
| 160 | + * Description: Centralize block layer function to convert REQ_OP_XXX into |
|---|
| 161 | + * string format. Useful in the debugging and tracing bio or request. For |
|---|
| 162 | + * invalid REQ_OP_XXX it returns string "UNKNOWN". |
|---|
| 163 | + */ |
|---|
| 164 | +inline const char *blk_op_str(unsigned int op) |
|---|
| 165 | +{ |
|---|
| 166 | + const char *op_str = "UNKNOWN"; |
|---|
| 167 | + |
|---|
| 168 | + if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op]) |
|---|
| 169 | + op_str = blk_op_name[op]; |
|---|
| 170 | + |
|---|
| 171 | + return op_str; |
|---|
| 172 | +} |
|---|
| 173 | +EXPORT_SYMBOL_GPL(blk_op_str); |
|---|
| 209 | 174 | |
|---|
| 210 | 175 | static const struct { |
|---|
| 211 | 176 | int errno; |
|---|
| .. | .. |
|---|
| 226 | 191 | |
|---|
| 227 | 192 | /* device mapper special case, should not leak out: */ |
|---|
| 228 | 193 | [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" }, |
|---|
| 194 | + |
|---|
| 195 | + /* zone device specific errors */ |
|---|
| 196 | + [BLK_STS_ZONE_OPEN_RESOURCE] = { -ETOOMANYREFS, "open zones exceeded" }, |
|---|
| 197 | + [BLK_STS_ZONE_ACTIVE_RESOURCE] = { -EOVERFLOW, "active zones exceeded" }, |
|---|
| 229 | 198 | |
|---|
| 230 | 199 | /* everything else not covered above: */ |
|---|
| 231 | 200 | [BLK_STS_IOERR] = { -EIO, "I/O" }, |
|---|
| .. | .. |
|---|
| 254 | 223 | } |
|---|
| 255 | 224 | EXPORT_SYMBOL_GPL(blk_status_to_errno); |
|---|
| 256 | 225 | |
|---|
| 257 | | -static void print_req_error(struct request *req, blk_status_t status) |
|---|
| 226 | +static void print_req_error(struct request *req, blk_status_t status, |
|---|
| 227 | + const char *caller) |
|---|
| 258 | 228 | { |
|---|
| 259 | 229 | int idx = (__force int)status; |
|---|
| 260 | 230 | |
|---|
| 261 | 231 | if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors))) |
|---|
| 262 | 232 | return; |
|---|
| 263 | 233 | |
|---|
| 264 | | - printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n", |
|---|
| 265 | | - __func__, blk_errors[idx].name, req->rq_disk ? |
|---|
| 266 | | - req->rq_disk->disk_name : "?", |
|---|
| 267 | | - (unsigned long long)blk_rq_pos(req)); |
|---|
| 234 | + printk_ratelimited(KERN_ERR |
|---|
| 235 | + "%s: %s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x " |
|---|
| 236 | + "phys_seg %u prio class %u\n", |
|---|
| 237 | + caller, blk_errors[idx].name, |
|---|
| 238 | + req->rq_disk ? req->rq_disk->disk_name : "?", |
|---|
| 239 | + blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)), |
|---|
| 240 | + req->cmd_flags & ~REQ_OP_MASK, |
|---|
| 241 | + req->nr_phys_segments, |
|---|
| 242 | + IOPRIO_PRIO_CLASS(req->ioprio)); |
|---|
| 268 | 243 | } |
|---|
| 269 | 244 | |
|---|
| 270 | 245 | static void req_bio_endio(struct request *rq, struct bio *bio, |
|---|
| .. | .. |
|---|
| 277 | 252 | bio_set_flag(bio, BIO_QUIET); |
|---|
| 278 | 253 | |
|---|
| 279 | 254 | bio_advance(bio, nbytes); |
|---|
| 255 | + |
|---|
| 256 | + if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) { |
|---|
| 257 | + /* |
|---|
| 258 | + * Partial zone append completions cannot be supported as the |
|---|
| 259 | + * BIO fragments may end up not being written sequentially. |
|---|
| 260 | + */ |
|---|
| 261 | + if (bio->bi_iter.bi_size) |
|---|
| 262 | + bio->bi_status = BLK_STS_IOERR; |
|---|
| 263 | + else |
|---|
| 264 | + bio->bi_iter.bi_sector = rq->__sector; |
|---|
| 265 | + } |
|---|
| 280 | 266 | |
|---|
| 281 | 267 | /* don't actually finish bio if it's part of flush sequence */ |
|---|
| 282 | 268 | if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) |
|---|
| .. | .. |
|---|
| 297 | 283 | } |
|---|
| 298 | 284 | EXPORT_SYMBOL(blk_dump_rq_flags); |
|---|
| 299 | 285 | |
|---|
| 300 | | -static void blk_delay_work(struct work_struct *work) |
|---|
| 301 | | -{ |
|---|
| 302 | | - struct request_queue *q; |
|---|
| 303 | | - |
|---|
| 304 | | - q = container_of(work, struct request_queue, delay_work.work); |
|---|
| 305 | | - spin_lock_irq(q->queue_lock); |
|---|
| 306 | | - __blk_run_queue(q); |
|---|
| 307 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 308 | | -} |
|---|
| 309 | | - |
|---|
| 310 | | -/** |
|---|
| 311 | | - * blk_delay_queue - restart queueing after defined interval |
|---|
| 312 | | - * @q: The &struct request_queue in question |
|---|
| 313 | | - * @msecs: Delay in msecs |
|---|
| 314 | | - * |
|---|
| 315 | | - * Description: |
|---|
| 316 | | - * Sometimes queueing needs to be postponed for a little while, to allow |
|---|
| 317 | | - * resources to come back. This function will make sure that queueing is |
|---|
| 318 | | - * restarted around the specified time. |
|---|
| 319 | | - */ |
|---|
| 320 | | -void blk_delay_queue(struct request_queue *q, unsigned long msecs) |
|---|
| 321 | | -{ |
|---|
| 322 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 323 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 324 | | - |
|---|
| 325 | | - if (likely(!blk_queue_dead(q))) |
|---|
| 326 | | - queue_delayed_work(kblockd_workqueue, &q->delay_work, |
|---|
| 327 | | - msecs_to_jiffies(msecs)); |
|---|
| 328 | | -} |
|---|
| 329 | | -EXPORT_SYMBOL(blk_delay_queue); |
|---|
| 330 | | - |
|---|
| 331 | | -/** |
|---|
| 332 | | - * blk_start_queue_async - asynchronously restart a previously stopped queue |
|---|
| 333 | | - * @q: The &struct request_queue in question |
|---|
| 334 | | - * |
|---|
| 335 | | - * Description: |
|---|
| 336 | | - * blk_start_queue_async() will clear the stop flag on the queue, and |
|---|
| 337 | | - * ensure that the request_fn for the queue is run from an async |
|---|
| 338 | | - * context. |
|---|
| 339 | | - **/ |
|---|
| 340 | | -void blk_start_queue_async(struct request_queue *q) |
|---|
| 341 | | -{ |
|---|
| 342 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 343 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 344 | | - |
|---|
| 345 | | - queue_flag_clear(QUEUE_FLAG_STOPPED, q); |
|---|
| 346 | | - blk_run_queue_async(q); |
|---|
| 347 | | -} |
|---|
| 348 | | -EXPORT_SYMBOL(blk_start_queue_async); |
|---|
| 349 | | - |
|---|
| 350 | | -/** |
|---|
| 351 | | - * blk_start_queue - restart a previously stopped queue |
|---|
| 352 | | - * @q: The &struct request_queue in question |
|---|
| 353 | | - * |
|---|
| 354 | | - * Description: |
|---|
| 355 | | - * blk_start_queue() will clear the stop flag on the queue, and call |
|---|
| 356 | | - * the request_fn for the queue if it was in a stopped state when |
|---|
| 357 | | - * entered. Also see blk_stop_queue(). |
|---|
| 358 | | - **/ |
|---|
| 359 | | -void blk_start_queue(struct request_queue *q) |
|---|
| 360 | | -{ |
|---|
| 361 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 362 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 363 | | - |
|---|
| 364 | | - queue_flag_clear(QUEUE_FLAG_STOPPED, q); |
|---|
| 365 | | - __blk_run_queue(q); |
|---|
| 366 | | -} |
|---|
| 367 | | -EXPORT_SYMBOL(blk_start_queue); |
|---|
| 368 | | - |
|---|
| 369 | | -/** |
|---|
| 370 | | - * blk_stop_queue - stop a queue |
|---|
| 371 | | - * @q: The &struct request_queue in question |
|---|
| 372 | | - * |
|---|
| 373 | | - * Description: |
|---|
| 374 | | - * The Linux block layer assumes that a block driver will consume all |
|---|
| 375 | | - * entries on the request queue when the request_fn strategy is called. |
|---|
| 376 | | - * Often this will not happen, because of hardware limitations (queue |
|---|
| 377 | | - * depth settings). If a device driver gets a 'queue full' response, |
|---|
| 378 | | - * or if it simply chooses not to queue more I/O at one point, it can |
|---|
| 379 | | - * call this function to prevent the request_fn from being called until |
|---|
| 380 | | - * the driver has signalled it's ready to go again. This happens by calling |
|---|
| 381 | | - * blk_start_queue() to restart queue operations. |
|---|
| 382 | | - **/ |
|---|
| 383 | | -void blk_stop_queue(struct request_queue *q) |
|---|
| 384 | | -{ |
|---|
| 385 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 386 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 387 | | - |
|---|
| 388 | | - cancel_delayed_work(&q->delay_work); |
|---|
| 389 | | - queue_flag_set(QUEUE_FLAG_STOPPED, q); |
|---|
| 390 | | -} |
|---|
| 391 | | -EXPORT_SYMBOL(blk_stop_queue); |
|---|
| 392 | | - |
|---|
| 393 | 286 | /** |
|---|
| 394 | 287 | * blk_sync_queue - cancel any pending callbacks on a queue |
|---|
| 395 | 288 | * @q: the queue |
|---|
| .. | .. |
|---|
| 400 | 293 | * A block device may call blk_sync_queue to ensure that any |
|---|
| 401 | 294 | * such activity is cancelled, thus allowing it to release resources |
|---|
| 402 | 295 | * that the callbacks might use. The caller must already have made sure |
|---|
| 403 | | - * that its ->make_request_fn will not re-add plugging prior to calling |
|---|
| 296 | + * that its ->submit_bio will not re-add plugging prior to calling |
|---|
| 404 | 297 | * this function. |
|---|
| 405 | 298 | * |
|---|
| 406 | 299 | * This function does not cancel any asynchronous activity arising |
|---|
| .. | .. |
|---|
| 412 | 305 | { |
|---|
| 413 | 306 | del_timer_sync(&q->timeout); |
|---|
| 414 | 307 | cancel_work_sync(&q->timeout_work); |
|---|
| 415 | | - |
|---|
| 416 | | - if (q->mq_ops) { |
|---|
| 417 | | - struct blk_mq_hw_ctx *hctx; |
|---|
| 418 | | - int i; |
|---|
| 419 | | - |
|---|
| 420 | | - queue_for_each_hw_ctx(q, hctx, i) |
|---|
| 421 | | - cancel_delayed_work_sync(&hctx->run_work); |
|---|
| 422 | | - } else { |
|---|
| 423 | | - cancel_delayed_work_sync(&q->delay_work); |
|---|
| 424 | | - } |
|---|
| 425 | 308 | } |
|---|
| 426 | 309 | EXPORT_SYMBOL(blk_sync_queue); |
|---|
| 427 | 310 | |
|---|
| .. | .. |
|---|
| 447 | 330 | EXPORT_SYMBOL_GPL(blk_clear_pm_only); |
|---|
| 448 | 331 | |
|---|
| 449 | 332 | /** |
|---|
| 450 | | - * __blk_run_queue_uncond - run a queue whether or not it has been stopped |
|---|
| 451 | | - * @q: The queue to run |
|---|
| 333 | + * blk_put_queue - decrement the request_queue refcount |
|---|
| 334 | + * @q: the request_queue structure to decrement the refcount for |
|---|
| 452 | 335 | * |
|---|
| 453 | | - * Description: |
|---|
| 454 | | - * Invoke request handling on a queue if there are any pending requests. |
|---|
| 455 | | - * May be used to restart request handling after a request has completed. |
|---|
| 456 | | - * This variant runs the queue whether or not the queue has been |
|---|
| 457 | | - * stopped. Must be called with the queue lock held and interrupts |
|---|
| 458 | | - * disabled. See also @blk_run_queue. |
|---|
| 336 | + * Decrements the refcount of the request_queue kobject. When this reaches 0 |
|---|
| 337 | + * we'll have blk_release_queue() called. |
|---|
| 338 | + * |
|---|
| 339 | + * Context: Any context, but the last reference must not be dropped from |
|---|
| 340 | + * atomic context. |
|---|
| 459 | 341 | */ |
|---|
| 460 | | -inline void __blk_run_queue_uncond(struct request_queue *q) |
|---|
| 461 | | -{ |
|---|
| 462 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 463 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 464 | | - |
|---|
| 465 | | - if (unlikely(blk_queue_dead(q))) |
|---|
| 466 | | - return; |
|---|
| 467 | | - |
|---|
| 468 | | - /* |
|---|
| 469 | | - * Some request_fn implementations, e.g. scsi_request_fn(), unlock |
|---|
| 470 | | - * the queue lock internally. As a result multiple threads may be |
|---|
| 471 | | - * running such a request function concurrently. Keep track of the |
|---|
| 472 | | - * number of active request_fn invocations such that blk_drain_queue() |
|---|
| 473 | | - * can wait until all these request_fn calls have finished. |
|---|
| 474 | | - */ |
|---|
| 475 | | - q->request_fn_active++; |
|---|
| 476 | | - q->request_fn(q); |
|---|
| 477 | | - q->request_fn_active--; |
|---|
| 478 | | -} |
|---|
| 479 | | -EXPORT_SYMBOL_GPL(__blk_run_queue_uncond); |
|---|
| 480 | | - |
|---|
| 481 | | -/** |
|---|
| 482 | | - * __blk_run_queue - run a single device queue |
|---|
| 483 | | - * @q: The queue to run |
|---|
| 484 | | - * |
|---|
| 485 | | - * Description: |
|---|
| 486 | | - * See @blk_run_queue. |
|---|
| 487 | | - */ |
|---|
| 488 | | -void __blk_run_queue(struct request_queue *q) |
|---|
| 489 | | -{ |
|---|
| 490 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 491 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 492 | | - |
|---|
| 493 | | - if (unlikely(blk_queue_stopped(q))) |
|---|
| 494 | | - return; |
|---|
| 495 | | - |
|---|
| 496 | | - __blk_run_queue_uncond(q); |
|---|
| 497 | | -} |
|---|
| 498 | | -EXPORT_SYMBOL(__blk_run_queue); |
|---|
| 499 | | - |
|---|
| 500 | | -/** |
|---|
| 501 | | - * blk_run_queue_async - run a single device queue in workqueue context |
|---|
| 502 | | - * @q: The queue to run |
|---|
| 503 | | - * |
|---|
| 504 | | - * Description: |
|---|
| 505 | | - * Tells kblockd to perform the equivalent of @blk_run_queue on behalf |
|---|
| 506 | | - * of us. |
|---|
| 507 | | - * |
|---|
| 508 | | - * Note: |
|---|
| 509 | | - * Since it is not allowed to run q->delay_work after blk_cleanup_queue() |
|---|
| 510 | | - * has canceled q->delay_work, callers must hold the queue lock to avoid |
|---|
| 511 | | - * race conditions between blk_cleanup_queue() and blk_run_queue_async(). |
|---|
| 512 | | - */ |
|---|
| 513 | | -void blk_run_queue_async(struct request_queue *q) |
|---|
| 514 | | -{ |
|---|
| 515 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 516 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 517 | | - |
|---|
| 518 | | - if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q))) |
|---|
| 519 | | - mod_delayed_work(kblockd_workqueue, &q->delay_work, 0); |
|---|
| 520 | | -} |
|---|
| 521 | | -EXPORT_SYMBOL(blk_run_queue_async); |
|---|
| 522 | | - |
|---|
| 523 | | -/** |
|---|
| 524 | | - * blk_run_queue - run a single device queue |
|---|
| 525 | | - * @q: The queue to run |
|---|
| 526 | | - * |
|---|
| 527 | | - * Description: |
|---|
| 528 | | - * Invoke request handling on this queue, if it has pending work to do. |
|---|
| 529 | | - * May be used to restart queueing when a request has completed. |
|---|
| 530 | | - */ |
|---|
| 531 | | -void blk_run_queue(struct request_queue *q) |
|---|
| 532 | | -{ |
|---|
| 533 | | - unsigned long flags; |
|---|
| 534 | | - |
|---|
| 535 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 536 | | - |
|---|
| 537 | | - spin_lock_irqsave(q->queue_lock, flags); |
|---|
| 538 | | - __blk_run_queue(q); |
|---|
| 539 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
|---|
| 540 | | -} |
|---|
| 541 | | -EXPORT_SYMBOL(blk_run_queue); |
|---|
| 542 | | - |
|---|
| 543 | 342 | void blk_put_queue(struct request_queue *q) |
|---|
| 544 | 343 | { |
|---|
| 545 | 344 | kobject_put(&q->kobj); |
|---|
| 546 | 345 | } |
|---|
| 547 | 346 | EXPORT_SYMBOL(blk_put_queue); |
|---|
| 548 | | - |
|---|
| 549 | | -/** |
|---|
| 550 | | - * __blk_drain_queue - drain requests from request_queue |
|---|
| 551 | | - * @q: queue to drain |
|---|
| 552 | | - * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV |
|---|
| 553 | | - * |
|---|
| 554 | | - * Drain requests from @q. If @drain_all is set, all requests are drained. |
|---|
| 555 | | - * If not, only ELVPRIV requests are drained. The caller is responsible |
|---|
| 556 | | - * for ensuring that no new requests which need to be drained are queued. |
|---|
| 557 | | - */ |
|---|
| 558 | | -static void __blk_drain_queue(struct request_queue *q, bool drain_all) |
|---|
| 559 | | - __releases(q->queue_lock) |
|---|
| 560 | | - __acquires(q->queue_lock) |
|---|
| 561 | | -{ |
|---|
| 562 | | - int i; |
|---|
| 563 | | - |
|---|
| 564 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 565 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 566 | | - |
|---|
| 567 | | - while (true) { |
|---|
| 568 | | - bool drain = false; |
|---|
| 569 | | - |
|---|
| 570 | | - /* |
|---|
| 571 | | - * The caller might be trying to drain @q before its |
|---|
| 572 | | - * elevator is initialized. |
|---|
| 573 | | - */ |
|---|
| 574 | | - if (q->elevator) |
|---|
| 575 | | - elv_drain_elevator(q); |
|---|
| 576 | | - |
|---|
| 577 | | - blkcg_drain_queue(q); |
|---|
| 578 | | - |
|---|
| 579 | | - /* |
|---|
| 580 | | - * This function might be called on a queue which failed |
|---|
| 581 | | - * driver init after queue creation or is not yet fully |
|---|
| 582 | | - * active yet. Some drivers (e.g. fd and loop) get unhappy |
|---|
| 583 | | - * in such cases. Kick queue iff dispatch queue has |
|---|
| 584 | | - * something on it and @q has request_fn set. |
|---|
| 585 | | - */ |
|---|
| 586 | | - if (!list_empty(&q->queue_head) && q->request_fn) |
|---|
| 587 | | - __blk_run_queue(q); |
|---|
| 588 | | - |
|---|
| 589 | | - drain |= q->nr_rqs_elvpriv; |
|---|
| 590 | | - drain |= q->request_fn_active; |
|---|
| 591 | | - |
|---|
| 592 | | - /* |
|---|
| 593 | | - * Unfortunately, requests are queued at and tracked from |
|---|
| 594 | | - * multiple places and there's no single counter which can |
|---|
| 595 | | - * be drained. Check all the queues and counters. |
|---|
| 596 | | - */ |
|---|
| 597 | | - if (drain_all) { |
|---|
| 598 | | - struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); |
|---|
| 599 | | - drain |= !list_empty(&q->queue_head); |
|---|
| 600 | | - for (i = 0; i < 2; i++) { |
|---|
| 601 | | - drain |= q->nr_rqs[i]; |
|---|
| 602 | | - drain |= q->in_flight[i]; |
|---|
| 603 | | - if (fq) |
|---|
| 604 | | - drain |= !list_empty(&fq->flush_queue[i]); |
|---|
| 605 | | - } |
|---|
| 606 | | - } |
|---|
| 607 | | - |
|---|
| 608 | | - if (!drain) |
|---|
| 609 | | - break; |
|---|
| 610 | | - |
|---|
| 611 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 612 | | - |
|---|
| 613 | | - msleep(10); |
|---|
| 614 | | - |
|---|
| 615 | | - spin_lock_irq(q->queue_lock); |
|---|
| 616 | | - } |
|---|
| 617 | | - |
|---|
| 618 | | - /* |
|---|
| 619 | | - * With queue marked dead, any woken up waiter will fail the |
|---|
| 620 | | - * allocation path, so the wakeup chaining is lost and we're |
|---|
| 621 | | - * left with hung waiters. We need to wake up those waiters. |
|---|
| 622 | | - */ |
|---|
| 623 | | - if (q->request_fn) { |
|---|
| 624 | | - struct request_list *rl; |
|---|
| 625 | | - |
|---|
| 626 | | - blk_queue_for_each_rl(rl, q) |
|---|
| 627 | | - for (i = 0; i < ARRAY_SIZE(rl->wait); i++) |
|---|
| 628 | | - wake_up_all(&rl->wait[i]); |
|---|
| 629 | | - } |
|---|
| 630 | | -} |
|---|
| 631 | | - |
|---|
| 632 | | -void blk_drain_queue(struct request_queue *q) |
|---|
| 633 | | -{ |
|---|
| 634 | | - spin_lock_irq(q->queue_lock); |
|---|
| 635 | | - __blk_drain_queue(q, true); |
|---|
| 636 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 637 | | -} |
|---|
| 638 | | - |
|---|
| 639 | | -/** |
|---|
| 640 | | - * blk_queue_bypass_start - enter queue bypass mode |
|---|
| 641 | | - * @q: queue of interest |
|---|
| 642 | | - * |
|---|
| 643 | | - * In bypass mode, only the dispatch FIFO queue of @q is used. This |
|---|
| 644 | | - * function makes @q enter bypass mode and drains all requests which were |
|---|
| 645 | | - * throttled or issued before. On return, it's guaranteed that no request |
|---|
| 646 | | - * is being throttled or has ELVPRIV set and blk_queue_bypass() %true |
|---|
| 647 | | - * inside queue or RCU read lock. |
|---|
| 648 | | - */ |
|---|
| 649 | | -void blk_queue_bypass_start(struct request_queue *q) |
|---|
| 650 | | -{ |
|---|
| 651 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 652 | | - |
|---|
| 653 | | - spin_lock_irq(q->queue_lock); |
|---|
| 654 | | - q->bypass_depth++; |
|---|
| 655 | | - queue_flag_set(QUEUE_FLAG_BYPASS, q); |
|---|
| 656 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 657 | | - |
|---|
| 658 | | - /* |
|---|
| 659 | | - * Queues start drained. Skip actual draining till init is |
|---|
| 660 | | - * complete. This avoids lenghty delays during queue init which |
|---|
| 661 | | - * can happen many times during boot. |
|---|
| 662 | | - */ |
|---|
| 663 | | - if (blk_queue_init_done(q)) { |
|---|
| 664 | | - spin_lock_irq(q->queue_lock); |
|---|
| 665 | | - __blk_drain_queue(q, false); |
|---|
| 666 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 667 | | - |
|---|
| 668 | | - /* ensure blk_queue_bypass() is %true inside RCU read lock */ |
|---|
| 669 | | - synchronize_rcu(); |
|---|
| 670 | | - } |
|---|
| 671 | | -} |
|---|
| 672 | | -EXPORT_SYMBOL_GPL(blk_queue_bypass_start); |
|---|
| 673 | | - |
|---|
| 674 | | -/** |
|---|
| 675 | | - * blk_queue_bypass_end - leave queue bypass mode |
|---|
| 676 | | - * @q: queue of interest |
|---|
| 677 | | - * |
|---|
| 678 | | - * Leave bypass mode and restore the normal queueing behavior. |
|---|
| 679 | | - * |
|---|
| 680 | | - * Note: although blk_queue_bypass_start() is only called for blk-sq queues, |
|---|
| 681 | | - * this function is called for both blk-sq and blk-mq queues. |
|---|
| 682 | | - */ |
|---|
| 683 | | -void blk_queue_bypass_end(struct request_queue *q) |
|---|
| 684 | | -{ |
|---|
| 685 | | - spin_lock_irq(q->queue_lock); |
|---|
| 686 | | - if (!--q->bypass_depth) |
|---|
| 687 | | - queue_flag_clear(QUEUE_FLAG_BYPASS, q); |
|---|
| 688 | | - WARN_ON_ONCE(q->bypass_depth < 0); |
|---|
| 689 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 690 | | -} |
|---|
| 691 | | -EXPORT_SYMBOL_GPL(blk_queue_bypass_end); |
|---|
| 692 | 347 | |
|---|
| 693 | 348 | void blk_set_queue_dying(struct request_queue *q) |
|---|
| 694 | 349 | { |
|---|
| .. | .. |
|---|
| 701 | 356 | */ |
|---|
| 702 | 357 | blk_freeze_queue_start(q); |
|---|
| 703 | 358 | |
|---|
| 704 | | - if (q->mq_ops) |
|---|
| 359 | + if (queue_is_mq(q)) |
|---|
| 705 | 360 | blk_mq_wake_waiters(q); |
|---|
| 706 | | - else { |
|---|
| 707 | | - struct request_list *rl; |
|---|
| 708 | | - |
|---|
| 709 | | - spin_lock_irq(q->queue_lock); |
|---|
| 710 | | - blk_queue_for_each_rl(rl, q) { |
|---|
| 711 | | - if (rl->rq_pool) { |
|---|
| 712 | | - wake_up_all(&rl->wait[BLK_RW_SYNC]); |
|---|
| 713 | | - wake_up_all(&rl->wait[BLK_RW_ASYNC]); |
|---|
| 714 | | - } |
|---|
| 715 | | - } |
|---|
| 716 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 717 | | - } |
|---|
| 718 | 361 | |
|---|
| 719 | 362 | /* Make blk_queue_enter() reexamine the DYING flag. */ |
|---|
| 720 | 363 | wake_up_all(&q->mq_freeze_wq); |
|---|
| 721 | 364 | } |
|---|
| 722 | 365 | EXPORT_SYMBOL_GPL(blk_set_queue_dying); |
|---|
| 723 | | - |
|---|
| 724 | | -/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */ |
|---|
| 725 | | -void blk_exit_queue(struct request_queue *q) |
|---|
| 726 | | -{ |
|---|
| 727 | | - /* |
|---|
| 728 | | - * Since the I/O scheduler exit code may access cgroup information, |
|---|
| 729 | | - * perform I/O scheduler exit before disassociating from the block |
|---|
| 730 | | - * cgroup controller. |
|---|
| 731 | | - */ |
|---|
| 732 | | - if (q->elevator) { |
|---|
| 733 | | - ioc_clear_queue(q); |
|---|
| 734 | | - elevator_exit(q, q->elevator); |
|---|
| 735 | | - q->elevator = NULL; |
|---|
| 736 | | - } |
|---|
| 737 | | - |
|---|
| 738 | | - /* |
|---|
| 739 | | - * Remove all references to @q from the block cgroup controller before |
|---|
| 740 | | - * restoring @q->queue_lock to avoid that restoring this pointer causes |
|---|
| 741 | | - * e.g. blkcg_print_blkgs() to crash. |
|---|
| 742 | | - */ |
|---|
| 743 | | - blkcg_exit_queue(q); |
|---|
| 744 | | - |
|---|
| 745 | | - /* |
|---|
| 746 | | - * Since the cgroup code may dereference the @q->backing_dev_info |
|---|
| 747 | | - * pointer, only decrease its reference count after having removed the |
|---|
| 748 | | - * association with the block cgroup controller. |
|---|
| 749 | | - */ |
|---|
| 750 | | - bdi_put(q->backing_dev_info); |
|---|
| 751 | | -} |
|---|
| 752 | 366 | |
|---|
| 753 | 367 | /** |
|---|
| 754 | 368 | * blk_cleanup_queue - shutdown a request queue |
|---|
| .. | .. |
|---|
| 756 | 370 | * |
|---|
| 757 | 371 | * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and |
|---|
| 758 | 372 | * put it. All future requests will be failed immediately with -ENODEV. |
|---|
| 373 | + * |
|---|
| 374 | + * Context: can sleep |
|---|
| 759 | 375 | */ |
|---|
| 760 | 376 | void blk_cleanup_queue(struct request_queue *q) |
|---|
| 761 | 377 | { |
|---|
| 762 | | - spinlock_t *lock = q->queue_lock; |
|---|
| 378 | + /* cannot be called from atomic context */ |
|---|
| 379 | + might_sleep(); |
|---|
| 380 | + |
|---|
| 381 | + WARN_ON_ONCE(blk_queue_registered(q)); |
|---|
| 763 | 382 | |
|---|
| 764 | 383 | /* mark @q DYING, no new request or merges will be allowed afterwards */ |
|---|
| 765 | | - mutex_lock(&q->sysfs_lock); |
|---|
| 766 | 384 | blk_set_queue_dying(q); |
|---|
| 767 | | - spin_lock_irq(lock); |
|---|
| 768 | 385 | |
|---|
| 769 | | - /* |
|---|
| 770 | | - * A dying queue is permanently in bypass mode till released. Note |
|---|
| 771 | | - * that, unlike blk_queue_bypass_start(), we aren't performing |
|---|
| 772 | | - * synchronize_rcu() after entering bypass mode to avoid the delay |
|---|
| 773 | | - * as some drivers create and destroy a lot of queues while |
|---|
| 774 | | - * probing. This is still safe because blk_release_queue() will be |
|---|
| 775 | | - * called only after the queue refcnt drops to zero and nothing, |
|---|
| 776 | | - * RCU or not, would be traversing the queue by then. |
|---|
| 777 | | - */ |
|---|
| 778 | | - q->bypass_depth++; |
|---|
| 779 | | - queue_flag_set(QUEUE_FLAG_BYPASS, q); |
|---|
| 780 | | - |
|---|
| 781 | | - queue_flag_set(QUEUE_FLAG_NOMERGES, q); |
|---|
| 782 | | - queue_flag_set(QUEUE_FLAG_NOXMERGES, q); |
|---|
| 783 | | - queue_flag_set(QUEUE_FLAG_DYING, q); |
|---|
| 784 | | - spin_unlock_irq(lock); |
|---|
| 785 | | - mutex_unlock(&q->sysfs_lock); |
|---|
| 386 | + blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); |
|---|
| 387 | + blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); |
|---|
| 786 | 388 | |
|---|
| 787 | 389 | /* |
|---|
| 788 | 390 | * Drain all requests queued before DYING marking. Set DEAD flag to |
|---|
| 789 | | - * prevent that q->request_fn() gets invoked after draining finished. |
|---|
| 391 | + * prevent that blk_mq_run_hw_queues() accesses the hardware queues |
|---|
| 392 | + * after draining finished. |
|---|
| 790 | 393 | */ |
|---|
| 791 | 394 | blk_freeze_queue(q); |
|---|
| 792 | 395 | |
|---|
| 793 | 396 | rq_qos_exit(q); |
|---|
| 794 | 397 | |
|---|
| 795 | | - spin_lock_irq(lock); |
|---|
| 796 | | - queue_flag_set(QUEUE_FLAG_DEAD, q); |
|---|
| 797 | | - spin_unlock_irq(lock); |
|---|
| 798 | | - |
|---|
| 799 | | - /* |
|---|
| 800 | | - * make sure all in-progress dispatch are completed because |
|---|
| 801 | | - * blk_freeze_queue() can only complete all requests, and |
|---|
| 802 | | - * dispatch may still be in-progress since we dispatch requests |
|---|
| 803 | | - * from more than one contexts. |
|---|
| 804 | | - * |
|---|
| 805 | | - * We rely on driver to deal with the race in case that queue |
|---|
| 806 | | - * initialization isn't done. |
|---|
| 807 | | - */ |
|---|
| 808 | | - if (q->mq_ops && blk_queue_init_done(q)) |
|---|
| 809 | | - blk_mq_quiesce_queue(q); |
|---|
| 398 | + blk_queue_flag_set(QUEUE_FLAG_DEAD, q); |
|---|
| 810 | 399 | |
|---|
| 811 | 400 | /* for synchronous bio-based driver finish in-flight integrity i/o */ |
|---|
| 812 | 401 | blk_flush_integrity(); |
|---|
| .. | .. |
|---|
| 815 | 404 | del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer); |
|---|
| 816 | 405 | blk_sync_queue(q); |
|---|
| 817 | 406 | |
|---|
| 818 | | - /* |
|---|
| 819 | | - * I/O scheduler exit is only safe after the sysfs scheduler attribute |
|---|
| 820 | | - * has been removed. |
|---|
| 821 | | - */ |
|---|
| 822 | | - WARN_ON_ONCE(q->kobj.state_in_sysfs); |
|---|
| 823 | | - |
|---|
| 824 | | - blk_exit_queue(q); |
|---|
| 825 | | - |
|---|
| 826 | | - if (q->mq_ops) |
|---|
| 407 | + if (queue_is_mq(q)) |
|---|
| 827 | 408 | blk_mq_exit_queue(q); |
|---|
| 828 | 409 | |
|---|
| 829 | | - percpu_ref_exit(&q->q_usage_counter); |
|---|
| 830 | | - |
|---|
| 831 | | - spin_lock_irq(lock); |
|---|
| 832 | | - if (q->queue_lock != &q->__queue_lock) |
|---|
| 833 | | - q->queue_lock = &q->__queue_lock; |
|---|
| 834 | | - spin_unlock_irq(lock); |
|---|
| 410 | + /* |
|---|
| 411 | + * In theory, request pool of sched_tags belongs to request queue. |
|---|
| 412 | + * However, the current implementation requires tag_set for freeing |
|---|
| 413 | + * requests, so free the pool now. |
|---|
| 414 | + * |
|---|
| 415 | + * Queue has become frozen, there can't be any in-queue requests, so |
|---|
| 416 | + * it is safe to free requests now. |
|---|
| 417 | + */ |
|---|
| 418 | + mutex_lock(&q->sysfs_lock); |
|---|
| 419 | + if (q->elevator) |
|---|
| 420 | + blk_mq_sched_free_requests(q); |
|---|
| 421 | + mutex_unlock(&q->sysfs_lock); |
|---|
| 835 | 422 | |
|---|
| 836 | 423 | /* @q is and will stay empty, shutdown and put */ |
|---|
| 837 | 424 | blk_put_queue(q); |
|---|
| 838 | 425 | } |
|---|
| 839 | 426 | EXPORT_SYMBOL(blk_cleanup_queue); |
|---|
| 840 | 427 | |
|---|
| 841 | | -/* Allocate memory local to the request queue */ |
|---|
| 842 | | -static void *alloc_request_simple(gfp_t gfp_mask, void *data) |
|---|
| 843 | | -{ |
|---|
| 844 | | - struct request_queue *q = data; |
|---|
| 845 | | - |
|---|
| 846 | | - return kmem_cache_alloc_node(request_cachep, gfp_mask, q->node); |
|---|
| 847 | | -} |
|---|
| 848 | | - |
|---|
| 849 | | -static void free_request_simple(void *element, void *data) |
|---|
| 850 | | -{ |
|---|
| 851 | | - kmem_cache_free(request_cachep, element); |
|---|
| 852 | | -} |
|---|
| 853 | | - |
|---|
| 854 | | -static void *alloc_request_size(gfp_t gfp_mask, void *data) |
|---|
| 855 | | -{ |
|---|
| 856 | | - struct request_queue *q = data; |
|---|
| 857 | | - struct request *rq; |
|---|
| 858 | | - |
|---|
| 859 | | - rq = kmalloc_node(sizeof(struct request) + q->cmd_size, gfp_mask, |
|---|
| 860 | | - q->node); |
|---|
| 861 | | - if (rq && q->init_rq_fn && q->init_rq_fn(q, rq, gfp_mask) < 0) { |
|---|
| 862 | | - kfree(rq); |
|---|
| 863 | | - rq = NULL; |
|---|
| 864 | | - } |
|---|
| 865 | | - return rq; |
|---|
| 866 | | -} |
|---|
| 867 | | - |
|---|
| 868 | | -static void free_request_size(void *element, void *data) |
|---|
| 869 | | -{ |
|---|
| 870 | | - struct request_queue *q = data; |
|---|
| 871 | | - |
|---|
| 872 | | - if (q->exit_rq_fn) |
|---|
| 873 | | - q->exit_rq_fn(q, element); |
|---|
| 874 | | - kfree(element); |
|---|
| 875 | | -} |
|---|
| 876 | | - |
|---|
| 877 | | -int blk_init_rl(struct request_list *rl, struct request_queue *q, |
|---|
| 878 | | - gfp_t gfp_mask) |
|---|
| 879 | | -{ |
|---|
| 880 | | - if (unlikely(rl->rq_pool) || q->mq_ops) |
|---|
| 881 | | - return 0; |
|---|
| 882 | | - |
|---|
| 883 | | - rl->q = q; |
|---|
| 884 | | - rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; |
|---|
| 885 | | - rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; |
|---|
| 886 | | - init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); |
|---|
| 887 | | - init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); |
|---|
| 888 | | - |
|---|
| 889 | | - if (q->cmd_size) { |
|---|
| 890 | | - rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, |
|---|
| 891 | | - alloc_request_size, free_request_size, |
|---|
| 892 | | - q, gfp_mask, q->node); |
|---|
| 893 | | - } else { |
|---|
| 894 | | - rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, |
|---|
| 895 | | - alloc_request_simple, free_request_simple, |
|---|
| 896 | | - q, gfp_mask, q->node); |
|---|
| 897 | | - } |
|---|
| 898 | | - if (!rl->rq_pool) |
|---|
| 899 | | - return -ENOMEM; |
|---|
| 900 | | - |
|---|
| 901 | | - if (rl != &q->root_rl) |
|---|
| 902 | | - WARN_ON_ONCE(!blk_get_queue(q)); |
|---|
| 903 | | - |
|---|
| 904 | | - return 0; |
|---|
| 905 | | -} |
|---|
| 906 | | - |
|---|
| 907 | | -void blk_exit_rl(struct request_queue *q, struct request_list *rl) |
|---|
| 908 | | -{ |
|---|
| 909 | | - if (rl->rq_pool) { |
|---|
| 910 | | - mempool_destroy(rl->rq_pool); |
|---|
| 911 | | - if (rl != &q->root_rl) |
|---|
| 912 | | - blk_put_queue(q); |
|---|
| 913 | | - } |
|---|
| 914 | | -} |
|---|
| 915 | | - |
|---|
| 916 | | -struct request_queue *blk_alloc_queue(gfp_t gfp_mask) |
|---|
| 917 | | -{ |
|---|
| 918 | | - return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL); |
|---|
| 919 | | -} |
|---|
| 920 | | -EXPORT_SYMBOL(blk_alloc_queue); |
|---|
| 921 | | - |
|---|
| 922 | 428 | /** |
|---|
| 923 | 429 | * blk_queue_enter() - try to increase q->q_usage_counter |
|---|
| 924 | 430 | * @q: request queue pointer |
|---|
| 925 | | - * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT |
|---|
| 431 | + * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PM |
|---|
| 926 | 432 | */ |
|---|
| 927 | 433 | int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) |
|---|
| 928 | 434 | { |
|---|
| 929 | | - const bool pm = flags & BLK_MQ_REQ_PREEMPT; |
|---|
| 435 | + const bool pm = flags & BLK_MQ_REQ_PM; |
|---|
| 930 | 436 | |
|---|
| 931 | 437 | while (true) { |
|---|
| 932 | 438 | bool success = false; |
|---|
| .. | .. |
|---|
| 962 | 468 | smp_rmb(); |
|---|
| 963 | 469 | |
|---|
| 964 | 470 | wait_event(q->mq_freeze_wq, |
|---|
| 965 | | - (atomic_read(&q->mq_freeze_depth) == 0 && |
|---|
| 966 | | - (pm || !blk_queue_pm_only(q))) || |
|---|
| 471 | + (!q->mq_freeze_depth && |
|---|
| 472 | + (pm || (blk_pm_request_resume(q), |
|---|
| 473 | + !blk_queue_pm_only(q)))) || |
|---|
| 967 | 474 | blk_queue_dying(q)); |
|---|
| 968 | 475 | if (blk_queue_dying(q)) |
|---|
| 969 | 476 | return -ENODEV; |
|---|
| 970 | 477 | } |
|---|
| 478 | +} |
|---|
| 479 | + |
|---|
| 480 | +static inline int bio_queue_enter(struct bio *bio) |
|---|
| 481 | +{ |
|---|
| 482 | + struct request_queue *q = bio->bi_disk->queue; |
|---|
| 483 | + bool nowait = bio->bi_opf & REQ_NOWAIT; |
|---|
| 484 | + int ret; |
|---|
| 485 | + |
|---|
| 486 | + ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0); |
|---|
| 487 | + if (unlikely(ret)) { |
|---|
| 488 | + if (nowait && !blk_queue_dying(q)) |
|---|
| 489 | + bio_wouldblock_error(bio); |
|---|
| 490 | + else |
|---|
| 491 | + bio_io_error(bio); |
|---|
| 492 | + } |
|---|
| 493 | + |
|---|
| 494 | + return ret; |
|---|
| 971 | 495 | } |
|---|
| 972 | 496 | |
|---|
| 973 | 497 | void blk_queue_exit(struct request_queue *q) |
|---|
| .. | .. |
|---|
| 975 | 499 | percpu_ref_put(&q->q_usage_counter); |
|---|
| 976 | 500 | } |
|---|
| 977 | 501 | |
|---|
| 978 | | -static void blk_queue_usage_counter_release_wrk(struct work_struct *work) |
|---|
| 979 | | -{ |
|---|
| 980 | | - struct request_queue *q = |
|---|
| 981 | | - container_of(work, struct request_queue, mq_pcpu_wake); |
|---|
| 982 | | - |
|---|
| 983 | | - wake_up_all(&q->mq_freeze_wq); |
|---|
| 984 | | -} |
|---|
| 985 | | - |
|---|
| 986 | 502 | static void blk_queue_usage_counter_release(struct percpu_ref *ref) |
|---|
| 987 | 503 | { |
|---|
| 988 | 504 | struct request_queue *q = |
|---|
| 989 | 505 | container_of(ref, struct request_queue, q_usage_counter); |
|---|
| 990 | 506 | |
|---|
| 991 | | - if (wq_has_sleeper(&q->mq_freeze_wq)) |
|---|
| 992 | | - schedule_work(&q->mq_pcpu_wake); |
|---|
| 507 | + wake_up_all(&q->mq_freeze_wq); |
|---|
| 993 | 508 | } |
|---|
| 994 | 509 | |
|---|
| 995 | 510 | static void blk_rq_timed_out_timer(struct timer_list *t) |
|---|
| .. | .. |
|---|
| 999 | 514 | kblockd_schedule_work(&q->timeout_work); |
|---|
| 1000 | 515 | } |
|---|
| 1001 | 516 | |
|---|
| 1002 | | -static void blk_timeout_work_dummy(struct work_struct *work) |
|---|
| 517 | +static void blk_timeout_work(struct work_struct *work) |
|---|
| 1003 | 518 | { |
|---|
| 1004 | 519 | } |
|---|
| 1005 | 520 | |
|---|
| 1006 | | -/** |
|---|
| 1007 | | - * blk_alloc_queue_node - allocate a request queue |
|---|
| 1008 | | - * @gfp_mask: memory allocation flags |
|---|
| 1009 | | - * @node_id: NUMA node to allocate memory from |
|---|
| 1010 | | - * @lock: For legacy queues, pointer to a spinlock that will be used to e.g. |
|---|
| 1011 | | - * serialize calls to the legacy .request_fn() callback. Ignored for |
|---|
| 1012 | | - * blk-mq request queues. |
|---|
| 1013 | | - * |
|---|
| 1014 | | - * Note: pass the queue lock as the third argument to this function instead of |
|---|
| 1015 | | - * setting the queue lock pointer explicitly to avoid triggering a sporadic |
|---|
| 1016 | | - * crash in the blkcg code. This function namely calls blkcg_init_queue() and |
|---|
| 1017 | | - * the queue lock pointer must be set before blkcg_init_queue() is called. |
|---|
| 1018 | | - */ |
|---|
| 1019 | | -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, |
|---|
| 1020 | | - spinlock_t *lock) |
|---|
| 521 | +struct request_queue *blk_alloc_queue(int node_id) |
|---|
| 1021 | 522 | { |
|---|
| 1022 | 523 | struct request_queue *q; |
|---|
| 1023 | 524 | int ret; |
|---|
| 1024 | 525 | |
|---|
| 1025 | 526 | q = kmem_cache_alloc_node(blk_requestq_cachep, |
|---|
| 1026 | | - gfp_mask | __GFP_ZERO, node_id); |
|---|
| 527 | + GFP_KERNEL | __GFP_ZERO, node_id); |
|---|
| 1027 | 528 | if (!q) |
|---|
| 1028 | 529 | return NULL; |
|---|
| 1029 | 530 | |
|---|
| 1030 | | - INIT_LIST_HEAD(&q->queue_head); |
|---|
| 1031 | 531 | q->last_merge = NULL; |
|---|
| 1032 | | - q->end_sector = 0; |
|---|
| 1033 | | - q->boundary_rq = NULL; |
|---|
| 1034 | 532 | |
|---|
| 1035 | | - q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); |
|---|
| 533 | + q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL); |
|---|
| 1036 | 534 | if (q->id < 0) |
|---|
| 1037 | 535 | goto fail_q; |
|---|
| 1038 | 536 | |
|---|
| .. | .. |
|---|
| 1040 | 538 | if (ret) |
|---|
| 1041 | 539 | goto fail_id; |
|---|
| 1042 | 540 | |
|---|
| 1043 | | - q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id); |
|---|
| 541 | + q->backing_dev_info = bdi_alloc(node_id); |
|---|
| 1044 | 542 | if (!q->backing_dev_info) |
|---|
| 1045 | 543 | goto fail_split; |
|---|
| 1046 | 544 | |
|---|
| .. | .. |
|---|
| 1048 | 546 | if (!q->stats) |
|---|
| 1049 | 547 | goto fail_stats; |
|---|
| 1050 | 548 | |
|---|
| 1051 | | - q->backing_dev_info->ra_pages = |
|---|
| 1052 | | - (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; |
|---|
| 1053 | | - q->backing_dev_info->io_pages = |
|---|
| 1054 | | - (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; |
|---|
| 1055 | | - q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; |
|---|
| 1056 | | - q->backing_dev_info->name = "block"; |
|---|
| 1057 | 549 | q->node = node_id; |
|---|
| 550 | + |
|---|
| 551 | + atomic_set(&q->nr_active_requests_shared_sbitmap, 0); |
|---|
| 1058 | 552 | |
|---|
| 1059 | 553 | timer_setup(&q->backing_dev_info->laptop_mode_wb_timer, |
|---|
| 1060 | 554 | laptop_mode_timer_fn, 0); |
|---|
| 1061 | 555 | timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); |
|---|
| 1062 | | - INIT_WORK(&q->timeout_work, blk_timeout_work_dummy); |
|---|
| 1063 | | - INIT_LIST_HEAD(&q->timeout_list); |
|---|
| 556 | + INIT_WORK(&q->timeout_work, blk_timeout_work); |
|---|
| 1064 | 557 | INIT_LIST_HEAD(&q->icq_list); |
|---|
| 1065 | 558 | #ifdef CONFIG_BLK_CGROUP |
|---|
| 1066 | 559 | INIT_LIST_HEAD(&q->blkg_list); |
|---|
| 1067 | 560 | #endif |
|---|
| 1068 | | - INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); |
|---|
| 1069 | 561 | |
|---|
| 1070 | 562 | kobject_init(&q->kobj, &blk_queue_ktype); |
|---|
| 1071 | 563 | |
|---|
| 1072 | | -#ifdef CONFIG_BLK_DEV_IO_TRACE |
|---|
| 1073 | | - mutex_init(&q->blk_trace_mutex); |
|---|
| 1074 | | -#endif |
|---|
| 564 | + mutex_init(&q->debugfs_mutex); |
|---|
| 1075 | 565 | mutex_init(&q->sysfs_lock); |
|---|
| 1076 | | - spin_lock_init(&q->__queue_lock); |
|---|
| 1077 | | - |
|---|
| 1078 | | - if (!q->mq_ops) |
|---|
| 1079 | | - q->queue_lock = lock ? : &q->__queue_lock; |
|---|
| 1080 | | - |
|---|
| 1081 | | - /* |
|---|
| 1082 | | - * A queue starts its life with bypass turned on to avoid |
|---|
| 1083 | | - * unnecessary bypass on/off overhead and nasty surprises during |
|---|
| 1084 | | - * init. The initial bypass will be finished when the queue is |
|---|
| 1085 | | - * registered by blk_register_queue(). |
|---|
| 1086 | | - */ |
|---|
| 1087 | | - q->bypass_depth = 1; |
|---|
| 1088 | | - queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q); |
|---|
| 566 | + mutex_init(&q->sysfs_dir_lock); |
|---|
| 567 | + spin_lock_init(&q->queue_lock); |
|---|
| 1089 | 568 | |
|---|
| 1090 | 569 | init_waitqueue_head(&q->mq_freeze_wq); |
|---|
| 1091 | | - INIT_WORK(&q->mq_pcpu_wake, blk_queue_usage_counter_release_wrk); |
|---|
| 570 | + mutex_init(&q->mq_freeze_lock); |
|---|
| 1092 | 571 | |
|---|
| 1093 | 572 | /* |
|---|
| 1094 | 573 | * Init percpu_ref in atomic mode so that it's faster to shutdown. |
|---|
| .. | .. |
|---|
| 1101 | 580 | |
|---|
| 1102 | 581 | if (blkcg_init_queue(q)) |
|---|
| 1103 | 582 | goto fail_ref; |
|---|
| 583 | + |
|---|
| 584 | + blk_queue_dma_alignment(q, 511); |
|---|
| 585 | + blk_set_default_limits(&q->limits); |
|---|
| 586 | + q->nr_requests = BLKDEV_MAX_RQ; |
|---|
| 1104 | 587 | |
|---|
| 1105 | 588 | return q; |
|---|
| 1106 | 589 | |
|---|
| .. | .. |
|---|
| 1118 | 601 | kmem_cache_free(blk_requestq_cachep, q); |
|---|
| 1119 | 602 | return NULL; |
|---|
| 1120 | 603 | } |
|---|
| 1121 | | -EXPORT_SYMBOL(blk_alloc_queue_node); |
|---|
| 604 | +EXPORT_SYMBOL(blk_alloc_queue); |
|---|
| 1122 | 605 | |
|---|
| 1123 | 606 | /** |
|---|
| 1124 | | - * blk_init_queue - prepare a request queue for use with a block device |
|---|
| 1125 | | - * @rfn: The function to be called to process requests that have been |
|---|
| 1126 | | - * placed on the queue. |
|---|
| 1127 | | - * @lock: Request queue spin lock |
|---|
| 607 | + * blk_get_queue - increment the request_queue refcount |
|---|
| 608 | + * @q: the request_queue structure to increment the refcount for |
|---|
| 1128 | 609 | * |
|---|
| 1129 | | - * Description: |
|---|
| 1130 | | - * If a block device wishes to use the standard request handling procedures, |
|---|
| 1131 | | - * which sorts requests and coalesces adjacent requests, then it must |
|---|
| 1132 | | - * call blk_init_queue(). The function @rfn will be called when there |
|---|
| 1133 | | - * are requests on the queue that need to be processed. If the device |
|---|
| 1134 | | - * supports plugging, then @rfn may not be called immediately when requests |
|---|
| 1135 | | - * are available on the queue, but may be called at some time later instead. |
|---|
| 1136 | | - * Plugged queues are generally unplugged when a buffer belonging to one |
|---|
| 1137 | | - * of the requests on the queue is needed, or due to memory pressure. |
|---|
| 610 | + * Increment the refcount of the request_queue kobject. |
|---|
| 1138 | 611 | * |
|---|
| 1139 | | - * @rfn is not required, or even expected, to remove all requests off the |
|---|
| 1140 | | - * queue, but only as many as it can handle at a time. If it does leave |
|---|
| 1141 | | - * requests on the queue, it is responsible for arranging that the requests |
|---|
| 1142 | | - * get dealt with eventually. |
|---|
| 1143 | | - * |
|---|
| 1144 | | - * The queue spin lock must be held while manipulating the requests on the |
|---|
| 1145 | | - * request queue; this lock will be taken also from interrupt context, so irq |
|---|
| 1146 | | - * disabling is needed for it. |
|---|
| 1147 | | - * |
|---|
| 1148 | | - * Function returns a pointer to the initialized request queue, or %NULL if |
|---|
| 1149 | | - * it didn't succeed. |
|---|
| 1150 | | - * |
|---|
| 1151 | | - * Note: |
|---|
| 1152 | | - * blk_init_queue() must be paired with a blk_cleanup_queue() call |
|---|
| 1153 | | - * when the block device is deactivated (such as at module unload). |
|---|
| 1154 | | - **/ |
|---|
| 1155 | | - |
|---|
| 1156 | | -struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) |
|---|
| 1157 | | -{ |
|---|
| 1158 | | - return blk_init_queue_node(rfn, lock, NUMA_NO_NODE); |
|---|
| 1159 | | -} |
|---|
| 1160 | | -EXPORT_SYMBOL(blk_init_queue); |
|---|
| 1161 | | - |
|---|
| 1162 | | -struct request_queue * |
|---|
| 1163 | | -blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) |
|---|
| 1164 | | -{ |
|---|
| 1165 | | - struct request_queue *q; |
|---|
| 1166 | | - |
|---|
| 1167 | | - q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock); |
|---|
| 1168 | | - if (!q) |
|---|
| 1169 | | - return NULL; |
|---|
| 1170 | | - |
|---|
| 1171 | | - q->request_fn = rfn; |
|---|
| 1172 | | - if (blk_init_allocated_queue(q) < 0) { |
|---|
| 1173 | | - blk_cleanup_queue(q); |
|---|
| 1174 | | - return NULL; |
|---|
| 1175 | | - } |
|---|
| 1176 | | - |
|---|
| 1177 | | - return q; |
|---|
| 1178 | | -} |
|---|
| 1179 | | -EXPORT_SYMBOL(blk_init_queue_node); |
|---|
| 1180 | | - |
|---|
| 1181 | | -static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio); |
|---|
| 1182 | | - |
|---|
| 1183 | | - |
|---|
| 1184 | | -int blk_init_allocated_queue(struct request_queue *q) |
|---|
| 1185 | | -{ |
|---|
| 1186 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 1187 | | - |
|---|
| 1188 | | - q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size, GFP_KERNEL); |
|---|
| 1189 | | - if (!q->fq) |
|---|
| 1190 | | - return -ENOMEM; |
|---|
| 1191 | | - |
|---|
| 1192 | | - if (q->init_rq_fn && q->init_rq_fn(q, q->fq->flush_rq, GFP_KERNEL)) |
|---|
| 1193 | | - goto out_free_flush_queue; |
|---|
| 1194 | | - |
|---|
| 1195 | | - if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) |
|---|
| 1196 | | - goto out_exit_flush_rq; |
|---|
| 1197 | | - |
|---|
| 1198 | | - INIT_WORK(&q->timeout_work, blk_timeout_work); |
|---|
| 1199 | | - q->queue_flags |= QUEUE_FLAG_DEFAULT; |
|---|
| 1200 | | - |
|---|
| 1201 | | - /* |
|---|
| 1202 | | - * This also sets hw/phys segments, boundary and size |
|---|
| 1203 | | - */ |
|---|
| 1204 | | - blk_queue_make_request(q, blk_queue_bio); |
|---|
| 1205 | | - |
|---|
| 1206 | | - q->sg_reserved_size = INT_MAX; |
|---|
| 1207 | | - |
|---|
| 1208 | | - if (elevator_init(q)) |
|---|
| 1209 | | - goto out_exit_flush_rq; |
|---|
| 1210 | | - return 0; |
|---|
| 1211 | | - |
|---|
| 1212 | | -out_exit_flush_rq: |
|---|
| 1213 | | - if (q->exit_rq_fn) |
|---|
| 1214 | | - q->exit_rq_fn(q, q->fq->flush_rq); |
|---|
| 1215 | | -out_free_flush_queue: |
|---|
| 1216 | | - blk_free_flush_queue(q->fq); |
|---|
| 1217 | | - q->fq = NULL; |
|---|
| 1218 | | - return -ENOMEM; |
|---|
| 1219 | | -} |
|---|
| 1220 | | -EXPORT_SYMBOL(blk_init_allocated_queue); |
|---|
| 1221 | | - |
|---|
| 612 | + * Context: Any context. |
|---|
| 613 | + */ |
|---|
| 1222 | 614 | bool blk_get_queue(struct request_queue *q) |
|---|
| 1223 | 615 | { |
|---|
| 1224 | 616 | if (likely(!blk_queue_dying(q))) { |
|---|
| .. | .. |
|---|
| 1229 | 621 | return false; |
|---|
| 1230 | 622 | } |
|---|
| 1231 | 623 | EXPORT_SYMBOL(blk_get_queue); |
|---|
| 1232 | | - |
|---|
| 1233 | | -static inline void blk_free_request(struct request_list *rl, struct request *rq) |
|---|
| 1234 | | -{ |
|---|
| 1235 | | - if (rq->rq_flags & RQF_ELVPRIV) { |
|---|
| 1236 | | - elv_put_request(rl->q, rq); |
|---|
| 1237 | | - if (rq->elv.icq) |
|---|
| 1238 | | - put_io_context(rq->elv.icq->ioc); |
|---|
| 1239 | | - } |
|---|
| 1240 | | - |
|---|
| 1241 | | - mempool_free(rq, rl->rq_pool); |
|---|
| 1242 | | -} |
|---|
| 1243 | | - |
|---|
| 1244 | | -/* |
|---|
| 1245 | | - * ioc_batching returns true if the ioc is a valid batching request and |
|---|
| 1246 | | - * should be given priority access to a request. |
|---|
| 1247 | | - */ |
|---|
| 1248 | | -static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) |
|---|
| 1249 | | -{ |
|---|
| 1250 | | - if (!ioc) |
|---|
| 1251 | | - return 0; |
|---|
| 1252 | | - |
|---|
| 1253 | | - /* |
|---|
| 1254 | | - * Make sure the process is able to allocate at least 1 request |
|---|
| 1255 | | - * even if the batch times out, otherwise we could theoretically |
|---|
| 1256 | | - * lose wakeups. |
|---|
| 1257 | | - */ |
|---|
| 1258 | | - return ioc->nr_batch_requests == q->nr_batching || |
|---|
| 1259 | | - (ioc->nr_batch_requests > 0 |
|---|
| 1260 | | - && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); |
|---|
| 1261 | | -} |
|---|
| 1262 | | - |
|---|
| 1263 | | -/* |
|---|
| 1264 | | - * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This |
|---|
| 1265 | | - * will cause the process to be a "batcher" on all queues in the system. This |
|---|
| 1266 | | - * is the behaviour we want though - once it gets a wakeup it should be given |
|---|
| 1267 | | - * a nice run. |
|---|
| 1268 | | - */ |
|---|
| 1269 | | -static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) |
|---|
| 1270 | | -{ |
|---|
| 1271 | | - if (!ioc || ioc_batching(q, ioc)) |
|---|
| 1272 | | - return; |
|---|
| 1273 | | - |
|---|
| 1274 | | - ioc->nr_batch_requests = q->nr_batching; |
|---|
| 1275 | | - ioc->last_waited = jiffies; |
|---|
| 1276 | | -} |
|---|
| 1277 | | - |
|---|
| 1278 | | -static void __freed_request(struct request_list *rl, int sync) |
|---|
| 1279 | | -{ |
|---|
| 1280 | | - struct request_queue *q = rl->q; |
|---|
| 1281 | | - |
|---|
| 1282 | | - if (rl->count[sync] < queue_congestion_off_threshold(q)) |
|---|
| 1283 | | - blk_clear_congested(rl, sync); |
|---|
| 1284 | | - |
|---|
| 1285 | | - if (rl->count[sync] + 1 <= q->nr_requests) { |
|---|
| 1286 | | - if (waitqueue_active(&rl->wait[sync])) |
|---|
| 1287 | | - wake_up(&rl->wait[sync]); |
|---|
| 1288 | | - |
|---|
| 1289 | | - blk_clear_rl_full(rl, sync); |
|---|
| 1290 | | - } |
|---|
| 1291 | | -} |
|---|
| 1292 | | - |
|---|
| 1293 | | -/* |
|---|
| 1294 | | - * A request has just been released. Account for it, update the full and |
|---|
| 1295 | | - * congestion status, wake up any waiters. Called under q->queue_lock. |
|---|
| 1296 | | - */ |
|---|
| 1297 | | -static void freed_request(struct request_list *rl, bool sync, |
|---|
| 1298 | | - req_flags_t rq_flags) |
|---|
| 1299 | | -{ |
|---|
| 1300 | | - struct request_queue *q = rl->q; |
|---|
| 1301 | | - |
|---|
| 1302 | | - q->nr_rqs[sync]--; |
|---|
| 1303 | | - rl->count[sync]--; |
|---|
| 1304 | | - if (rq_flags & RQF_ELVPRIV) |
|---|
| 1305 | | - q->nr_rqs_elvpriv--; |
|---|
| 1306 | | - |
|---|
| 1307 | | - __freed_request(rl, sync); |
|---|
| 1308 | | - |
|---|
| 1309 | | - if (unlikely(rl->starved[sync ^ 1])) |
|---|
| 1310 | | - __freed_request(rl, sync ^ 1); |
|---|
| 1311 | | -} |
|---|
| 1312 | | - |
|---|
| 1313 | | -int blk_update_nr_requests(struct request_queue *q, unsigned int nr) |
|---|
| 1314 | | -{ |
|---|
| 1315 | | - struct request_list *rl; |
|---|
| 1316 | | - int on_thresh, off_thresh; |
|---|
| 1317 | | - |
|---|
| 1318 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 1319 | | - |
|---|
| 1320 | | - spin_lock_irq(q->queue_lock); |
|---|
| 1321 | | - q->nr_requests = nr; |
|---|
| 1322 | | - blk_queue_congestion_threshold(q); |
|---|
| 1323 | | - on_thresh = queue_congestion_on_threshold(q); |
|---|
| 1324 | | - off_thresh = queue_congestion_off_threshold(q); |
|---|
| 1325 | | - |
|---|
| 1326 | | - blk_queue_for_each_rl(rl, q) { |
|---|
| 1327 | | - if (rl->count[BLK_RW_SYNC] >= on_thresh) |
|---|
| 1328 | | - blk_set_congested(rl, BLK_RW_SYNC); |
|---|
| 1329 | | - else if (rl->count[BLK_RW_SYNC] < off_thresh) |
|---|
| 1330 | | - blk_clear_congested(rl, BLK_RW_SYNC); |
|---|
| 1331 | | - |
|---|
| 1332 | | - if (rl->count[BLK_RW_ASYNC] >= on_thresh) |
|---|
| 1333 | | - blk_set_congested(rl, BLK_RW_ASYNC); |
|---|
| 1334 | | - else if (rl->count[BLK_RW_ASYNC] < off_thresh) |
|---|
| 1335 | | - blk_clear_congested(rl, BLK_RW_ASYNC); |
|---|
| 1336 | | - |
|---|
| 1337 | | - if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { |
|---|
| 1338 | | - blk_set_rl_full(rl, BLK_RW_SYNC); |
|---|
| 1339 | | - } else { |
|---|
| 1340 | | - blk_clear_rl_full(rl, BLK_RW_SYNC); |
|---|
| 1341 | | - wake_up(&rl->wait[BLK_RW_SYNC]); |
|---|
| 1342 | | - } |
|---|
| 1343 | | - |
|---|
| 1344 | | - if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { |
|---|
| 1345 | | - blk_set_rl_full(rl, BLK_RW_ASYNC); |
|---|
| 1346 | | - } else { |
|---|
| 1347 | | - blk_clear_rl_full(rl, BLK_RW_ASYNC); |
|---|
| 1348 | | - wake_up(&rl->wait[BLK_RW_ASYNC]); |
|---|
| 1349 | | - } |
|---|
| 1350 | | - } |
|---|
| 1351 | | - |
|---|
| 1352 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 1353 | | - return 0; |
|---|
| 1354 | | -} |
|---|
| 1355 | | - |
|---|
| 1356 | | -/** |
|---|
| 1357 | | - * __get_request - get a free request |
|---|
| 1358 | | - * @rl: request list to allocate from |
|---|
| 1359 | | - * @op: operation and flags |
|---|
| 1360 | | - * @bio: bio to allocate request for (can be %NULL) |
|---|
| 1361 | | - * @flags: BLQ_MQ_REQ_* flags |
|---|
| 1362 | | - * @gfp_mask: allocator flags |
|---|
| 1363 | | - * |
|---|
| 1364 | | - * Get a free request from @q. This function may fail under memory |
|---|
| 1365 | | - * pressure or if @q is dead. |
|---|
| 1366 | | - * |
|---|
| 1367 | | - * Must be called with @q->queue_lock held and, |
|---|
| 1368 | | - * Returns ERR_PTR on failure, with @q->queue_lock held. |
|---|
| 1369 | | - * Returns request pointer on success, with @q->queue_lock *not held*. |
|---|
| 1370 | | - */ |
|---|
| 1371 | | -static struct request *__get_request(struct request_list *rl, unsigned int op, |
|---|
| 1372 | | - struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp_mask) |
|---|
| 1373 | | -{ |
|---|
| 1374 | | - struct request_queue *q = rl->q; |
|---|
| 1375 | | - struct request *rq; |
|---|
| 1376 | | - struct elevator_type *et = q->elevator->type; |
|---|
| 1377 | | - struct io_context *ioc = rq_ioc(bio); |
|---|
| 1378 | | - struct io_cq *icq = NULL; |
|---|
| 1379 | | - const bool is_sync = op_is_sync(op); |
|---|
| 1380 | | - int may_queue; |
|---|
| 1381 | | - req_flags_t rq_flags = RQF_ALLOCED; |
|---|
| 1382 | | - |
|---|
| 1383 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 1384 | | - |
|---|
| 1385 | | - if (unlikely(blk_queue_dying(q))) |
|---|
| 1386 | | - return ERR_PTR(-ENODEV); |
|---|
| 1387 | | - |
|---|
| 1388 | | - may_queue = elv_may_queue(q, op); |
|---|
| 1389 | | - if (may_queue == ELV_MQUEUE_NO) |
|---|
| 1390 | | - goto rq_starved; |
|---|
| 1391 | | - |
|---|
| 1392 | | - if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { |
|---|
| 1393 | | - if (rl->count[is_sync]+1 >= q->nr_requests) { |
|---|
| 1394 | | - /* |
|---|
| 1395 | | - * The queue will fill after this allocation, so set |
|---|
| 1396 | | - * it as full, and mark this process as "batching". |
|---|
| 1397 | | - * This process will be allowed to complete a batch of |
|---|
| 1398 | | - * requests, others will be blocked. |
|---|
| 1399 | | - */ |
|---|
| 1400 | | - if (!blk_rl_full(rl, is_sync)) { |
|---|
| 1401 | | - ioc_set_batching(q, ioc); |
|---|
| 1402 | | - blk_set_rl_full(rl, is_sync); |
|---|
| 1403 | | - } else { |
|---|
| 1404 | | - if (may_queue != ELV_MQUEUE_MUST |
|---|
| 1405 | | - && !ioc_batching(q, ioc)) { |
|---|
| 1406 | | - /* |
|---|
| 1407 | | - * The queue is full and the allocating |
|---|
| 1408 | | - * process is not a "batcher", and not |
|---|
| 1409 | | - * exempted by the IO scheduler |
|---|
| 1410 | | - */ |
|---|
| 1411 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1412 | | - } |
|---|
| 1413 | | - } |
|---|
| 1414 | | - } |
|---|
| 1415 | | - blk_set_congested(rl, is_sync); |
|---|
| 1416 | | - } |
|---|
| 1417 | | - |
|---|
| 1418 | | - /* |
|---|
| 1419 | | - * Only allow batching queuers to allocate up to 50% over the defined |
|---|
| 1420 | | - * limit of requests, otherwise we could have thousands of requests |
|---|
| 1421 | | - * allocated with any setting of ->nr_requests |
|---|
| 1422 | | - */ |
|---|
| 1423 | | - if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) |
|---|
| 1424 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1425 | | - |
|---|
| 1426 | | - q->nr_rqs[is_sync]++; |
|---|
| 1427 | | - rl->count[is_sync]++; |
|---|
| 1428 | | - rl->starved[is_sync] = 0; |
|---|
| 1429 | | - |
|---|
| 1430 | | - /* |
|---|
| 1431 | | - * Decide whether the new request will be managed by elevator. If |
|---|
| 1432 | | - * so, mark @rq_flags and increment elvpriv. Non-zero elvpriv will |
|---|
| 1433 | | - * prevent the current elevator from being destroyed until the new |
|---|
| 1434 | | - * request is freed. This guarantees icq's won't be destroyed and |
|---|
| 1435 | | - * makes creating new ones safe. |
|---|
| 1436 | | - * |
|---|
| 1437 | | - * Flush requests do not use the elevator so skip initialization. |
|---|
| 1438 | | - * This allows a request to share the flush and elevator data. |
|---|
| 1439 | | - * |
|---|
| 1440 | | - * Also, lookup icq while holding queue_lock. If it doesn't exist, |
|---|
| 1441 | | - * it will be created after releasing queue_lock. |
|---|
| 1442 | | - */ |
|---|
| 1443 | | - if (!op_is_flush(op) && !blk_queue_bypass(q)) { |
|---|
| 1444 | | - rq_flags |= RQF_ELVPRIV; |
|---|
| 1445 | | - q->nr_rqs_elvpriv++; |
|---|
| 1446 | | - if (et->icq_cache && ioc) |
|---|
| 1447 | | - icq = ioc_lookup_icq(ioc, q); |
|---|
| 1448 | | - } |
|---|
| 1449 | | - |
|---|
| 1450 | | - if (blk_queue_io_stat(q)) |
|---|
| 1451 | | - rq_flags |= RQF_IO_STAT; |
|---|
| 1452 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 1453 | | - |
|---|
| 1454 | | - /* allocate and init request */ |
|---|
| 1455 | | - rq = mempool_alloc(rl->rq_pool, gfp_mask); |
|---|
| 1456 | | - if (!rq) |
|---|
| 1457 | | - goto fail_alloc; |
|---|
| 1458 | | - |
|---|
| 1459 | | - blk_rq_init(q, rq); |
|---|
| 1460 | | - blk_rq_set_rl(rq, rl); |
|---|
| 1461 | | - rq->cmd_flags = op; |
|---|
| 1462 | | - rq->rq_flags = rq_flags; |
|---|
| 1463 | | - if (flags & BLK_MQ_REQ_PREEMPT) |
|---|
| 1464 | | - rq->rq_flags |= RQF_PREEMPT; |
|---|
| 1465 | | - |
|---|
| 1466 | | - /* init elvpriv */ |
|---|
| 1467 | | - if (rq_flags & RQF_ELVPRIV) { |
|---|
| 1468 | | - if (unlikely(et->icq_cache && !icq)) { |
|---|
| 1469 | | - if (ioc) |
|---|
| 1470 | | - icq = ioc_create_icq(ioc, q, gfp_mask); |
|---|
| 1471 | | - if (!icq) |
|---|
| 1472 | | - goto fail_elvpriv; |
|---|
| 1473 | | - } |
|---|
| 1474 | | - |
|---|
| 1475 | | - rq->elv.icq = icq; |
|---|
| 1476 | | - if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) |
|---|
| 1477 | | - goto fail_elvpriv; |
|---|
| 1478 | | - |
|---|
| 1479 | | - /* @rq->elv.icq holds io_context until @rq is freed */ |
|---|
| 1480 | | - if (icq) |
|---|
| 1481 | | - get_io_context(icq->ioc); |
|---|
| 1482 | | - } |
|---|
| 1483 | | -out: |
|---|
| 1484 | | - /* |
|---|
| 1485 | | - * ioc may be NULL here, and ioc_batching will be false. That's |
|---|
| 1486 | | - * OK, if the queue is under the request limit then requests need |
|---|
| 1487 | | - * not count toward the nr_batch_requests limit. There will always |
|---|
| 1488 | | - * be some limit enforced by BLK_BATCH_TIME. |
|---|
| 1489 | | - */ |
|---|
| 1490 | | - if (ioc_batching(q, ioc)) |
|---|
| 1491 | | - ioc->nr_batch_requests--; |
|---|
| 1492 | | - |
|---|
| 1493 | | - trace_block_getrq(q, bio, op); |
|---|
| 1494 | | - return rq; |
|---|
| 1495 | | - |
|---|
| 1496 | | -fail_elvpriv: |
|---|
| 1497 | | - /* |
|---|
| 1498 | | - * elvpriv init failed. ioc, icq and elvpriv aren't mempool backed |
|---|
| 1499 | | - * and may fail indefinitely under memory pressure and thus |
|---|
| 1500 | | - * shouldn't stall IO. Treat this request as !elvpriv. This will |
|---|
| 1501 | | - * disturb iosched and blkcg but weird is bettern than dead. |
|---|
| 1502 | | - */ |
|---|
| 1503 | | - printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n", |
|---|
| 1504 | | - __func__, dev_name(q->backing_dev_info->dev)); |
|---|
| 1505 | | - |
|---|
| 1506 | | - rq->rq_flags &= ~RQF_ELVPRIV; |
|---|
| 1507 | | - rq->elv.icq = NULL; |
|---|
| 1508 | | - |
|---|
| 1509 | | - spin_lock_irq(q->queue_lock); |
|---|
| 1510 | | - q->nr_rqs_elvpriv--; |
|---|
| 1511 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 1512 | | - goto out; |
|---|
| 1513 | | - |
|---|
| 1514 | | -fail_alloc: |
|---|
| 1515 | | - /* |
|---|
| 1516 | | - * Allocation failed presumably due to memory. Undo anything we |
|---|
| 1517 | | - * might have messed up. |
|---|
| 1518 | | - * |
|---|
| 1519 | | - * Allocating task should really be put onto the front of the wait |
|---|
| 1520 | | - * queue, but this is pretty rare. |
|---|
| 1521 | | - */ |
|---|
| 1522 | | - spin_lock_irq(q->queue_lock); |
|---|
| 1523 | | - freed_request(rl, is_sync, rq_flags); |
|---|
| 1524 | | - |
|---|
| 1525 | | - /* |
|---|
| 1526 | | - * in the very unlikely event that allocation failed and no |
|---|
| 1527 | | - * requests for this direction was pending, mark us starved so that |
|---|
| 1528 | | - * freeing of a request in the other direction will notice |
|---|
| 1529 | | - * us. another possible fix would be to split the rq mempool into |
|---|
| 1530 | | - * READ and WRITE |
|---|
| 1531 | | - */ |
|---|
| 1532 | | -rq_starved: |
|---|
| 1533 | | - if (unlikely(rl->count[is_sync] == 0)) |
|---|
| 1534 | | - rl->starved[is_sync] = 1; |
|---|
| 1535 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1536 | | -} |
|---|
| 1537 | | - |
|---|
| 1538 | | -/** |
|---|
| 1539 | | - * get_request - get a free request |
|---|
| 1540 | | - * @q: request_queue to allocate request from |
|---|
| 1541 | | - * @op: operation and flags |
|---|
| 1542 | | - * @bio: bio to allocate request for (can be %NULL) |
|---|
| 1543 | | - * @flags: BLK_MQ_REQ_* flags. |
|---|
| 1544 | | - * @gfp: allocator flags |
|---|
| 1545 | | - * |
|---|
| 1546 | | - * Get a free request from @q. If %BLK_MQ_REQ_NOWAIT is set in @flags, |
|---|
| 1547 | | - * this function keeps retrying under memory pressure and fails iff @q is dead. |
|---|
| 1548 | | - * |
|---|
| 1549 | | - * Must be called with @q->queue_lock held and, |
|---|
| 1550 | | - * Returns ERR_PTR on failure, with @q->queue_lock held. |
|---|
| 1551 | | - * Returns request pointer on success, with @q->queue_lock *not held*. |
|---|
| 1552 | | - */ |
|---|
| 1553 | | -static struct request *get_request(struct request_queue *q, unsigned int op, |
|---|
| 1554 | | - struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp) |
|---|
| 1555 | | -{ |
|---|
| 1556 | | - const bool is_sync = op_is_sync(op); |
|---|
| 1557 | | - DEFINE_WAIT(wait); |
|---|
| 1558 | | - struct request_list *rl; |
|---|
| 1559 | | - struct request *rq; |
|---|
| 1560 | | - |
|---|
| 1561 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 1562 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 1563 | | - |
|---|
| 1564 | | - rl = blk_get_rl(q, bio); /* transferred to @rq on success */ |
|---|
| 1565 | | -retry: |
|---|
| 1566 | | - rq = __get_request(rl, op, bio, flags, gfp); |
|---|
| 1567 | | - if (!IS_ERR(rq)) |
|---|
| 1568 | | - return rq; |
|---|
| 1569 | | - |
|---|
| 1570 | | - if (op & REQ_NOWAIT) { |
|---|
| 1571 | | - blk_put_rl(rl); |
|---|
| 1572 | | - return ERR_PTR(-EAGAIN); |
|---|
| 1573 | | - } |
|---|
| 1574 | | - |
|---|
| 1575 | | - if ((flags & BLK_MQ_REQ_NOWAIT) || unlikely(blk_queue_dying(q))) { |
|---|
| 1576 | | - blk_put_rl(rl); |
|---|
| 1577 | | - return rq; |
|---|
| 1578 | | - } |
|---|
| 1579 | | - |
|---|
| 1580 | | - /* wait on @rl and retry */ |
|---|
| 1581 | | - prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, |
|---|
| 1582 | | - TASK_UNINTERRUPTIBLE); |
|---|
| 1583 | | - |
|---|
| 1584 | | - trace_block_sleeprq(q, bio, op); |
|---|
| 1585 | | - |
|---|
| 1586 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 1587 | | - io_schedule(); |
|---|
| 1588 | | - |
|---|
| 1589 | | - /* |
|---|
| 1590 | | - * After sleeping, we become a "batching" process and will be able |
|---|
| 1591 | | - * to allocate at least one request, and up to a big batch of them |
|---|
| 1592 | | - * for a small period time. See ioc_batching, ioc_set_batching |
|---|
| 1593 | | - */ |
|---|
| 1594 | | - ioc_set_batching(q, current->io_context); |
|---|
| 1595 | | - |
|---|
| 1596 | | - spin_lock_irq(q->queue_lock); |
|---|
| 1597 | | - finish_wait(&rl->wait[is_sync], &wait); |
|---|
| 1598 | | - |
|---|
| 1599 | | - goto retry; |
|---|
| 1600 | | -} |
|---|
| 1601 | | - |
|---|
| 1602 | | -/* flags: BLK_MQ_REQ_PREEMPT and/or BLK_MQ_REQ_NOWAIT. */ |
|---|
| 1603 | | -static struct request *blk_old_get_request(struct request_queue *q, |
|---|
| 1604 | | - unsigned int op, blk_mq_req_flags_t flags) |
|---|
| 1605 | | -{ |
|---|
| 1606 | | - struct request *rq; |
|---|
| 1607 | | - gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : GFP_NOIO; |
|---|
| 1608 | | - int ret = 0; |
|---|
| 1609 | | - |
|---|
| 1610 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 1611 | | - |
|---|
| 1612 | | - /* create ioc upfront */ |
|---|
| 1613 | | - create_io_context(gfp_mask, q->node); |
|---|
| 1614 | | - |
|---|
| 1615 | | - ret = blk_queue_enter(q, flags); |
|---|
| 1616 | | - if (ret) |
|---|
| 1617 | | - return ERR_PTR(ret); |
|---|
| 1618 | | - spin_lock_irq(q->queue_lock); |
|---|
| 1619 | | - rq = get_request(q, op, NULL, flags, gfp_mask); |
|---|
| 1620 | | - if (IS_ERR(rq)) { |
|---|
| 1621 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 1622 | | - blk_queue_exit(q); |
|---|
| 1623 | | - return rq; |
|---|
| 1624 | | - } |
|---|
| 1625 | | - |
|---|
| 1626 | | - /* q->queue_lock is unlocked at this point */ |
|---|
| 1627 | | - rq->__data_len = 0; |
|---|
| 1628 | | - rq->__sector = (sector_t) -1; |
|---|
| 1629 | | - rq->bio = rq->biotail = NULL; |
|---|
| 1630 | | - return rq; |
|---|
| 1631 | | -} |
|---|
| 1632 | 624 | |
|---|
| 1633 | 625 | /** |
|---|
| 1634 | 626 | * blk_get_request - allocate a request |
|---|
| .. | .. |
|---|
| 1642 | 634 | struct request *req; |
|---|
| 1643 | 635 | |
|---|
| 1644 | 636 | WARN_ON_ONCE(op & REQ_NOWAIT); |
|---|
| 1645 | | - WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT)); |
|---|
| 637 | + WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM)); |
|---|
| 1646 | 638 | |
|---|
| 1647 | | - if (q->mq_ops) { |
|---|
| 1648 | | - req = blk_mq_alloc_request(q, op, flags); |
|---|
| 1649 | | - if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) |
|---|
| 1650 | | - q->mq_ops->initialize_rq_fn(req); |
|---|
| 1651 | | - } else { |
|---|
| 1652 | | - req = blk_old_get_request(q, op, flags); |
|---|
| 1653 | | - if (!IS_ERR(req) && q->initialize_rq_fn) |
|---|
| 1654 | | - q->initialize_rq_fn(req); |
|---|
| 1655 | | - } |
|---|
| 639 | + req = blk_mq_alloc_request(q, op, flags); |
|---|
| 640 | + if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) |
|---|
| 641 | + q->mq_ops->initialize_rq_fn(req); |
|---|
| 1656 | 642 | |
|---|
| 1657 | 643 | return req; |
|---|
| 1658 | 644 | } |
|---|
| 1659 | 645 | EXPORT_SYMBOL(blk_get_request); |
|---|
| 1660 | 646 | |
|---|
| 1661 | | -/** |
|---|
| 1662 | | - * blk_requeue_request - put a request back on queue |
|---|
| 1663 | | - * @q: request queue where request should be inserted |
|---|
| 1664 | | - * @rq: request to be inserted |
|---|
| 1665 | | - * |
|---|
| 1666 | | - * Description: |
|---|
| 1667 | | - * Drivers often keep queueing requests until the hardware cannot accept |
|---|
| 1668 | | - * more, when that condition happens we need to put the request back |
|---|
| 1669 | | - * on the queue. Must be called with queue lock held. |
|---|
| 1670 | | - */ |
|---|
| 1671 | | -void blk_requeue_request(struct request_queue *q, struct request *rq) |
|---|
| 1672 | | -{ |
|---|
| 1673 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 1674 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 1675 | | - |
|---|
| 1676 | | - blk_delete_timer(rq); |
|---|
| 1677 | | - blk_clear_rq_complete(rq); |
|---|
| 1678 | | - trace_block_rq_requeue(q, rq); |
|---|
| 1679 | | - rq_qos_requeue(q, rq); |
|---|
| 1680 | | - |
|---|
| 1681 | | - if (rq->rq_flags & RQF_QUEUED) |
|---|
| 1682 | | - blk_queue_end_tag(q, rq); |
|---|
| 1683 | | - |
|---|
| 1684 | | - BUG_ON(blk_queued_rq(rq)); |
|---|
| 1685 | | - |
|---|
| 1686 | | - elv_requeue_request(q, rq); |
|---|
| 1687 | | -} |
|---|
| 1688 | | -EXPORT_SYMBOL(blk_requeue_request); |
|---|
| 1689 | | - |
|---|
| 1690 | | -static void add_acct_request(struct request_queue *q, struct request *rq, |
|---|
| 1691 | | - int where) |
|---|
| 1692 | | -{ |
|---|
| 1693 | | - blk_account_io_start(rq, true); |
|---|
| 1694 | | - __elv_add_request(q, rq, where); |
|---|
| 1695 | | -} |
|---|
| 1696 | | - |
|---|
| 1697 | | -static void part_round_stats_single(struct request_queue *q, int cpu, |
|---|
| 1698 | | - struct hd_struct *part, unsigned long now, |
|---|
| 1699 | | - unsigned int inflight) |
|---|
| 1700 | | -{ |
|---|
| 1701 | | - if (inflight) { |
|---|
| 1702 | | - __part_stat_add(cpu, part, time_in_queue, |
|---|
| 1703 | | - inflight * (now - part->stamp)); |
|---|
| 1704 | | - __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); |
|---|
| 1705 | | - } |
|---|
| 1706 | | - part->stamp = now; |
|---|
| 1707 | | -} |
|---|
| 1708 | | - |
|---|
| 1709 | | -/** |
|---|
| 1710 | | - * part_round_stats() - Round off the performance stats on a struct disk_stats. |
|---|
| 1711 | | - * @q: target block queue |
|---|
| 1712 | | - * @cpu: cpu number for stats access |
|---|
| 1713 | | - * @part: target partition |
|---|
| 1714 | | - * |
|---|
| 1715 | | - * The average IO queue length and utilisation statistics are maintained |
|---|
| 1716 | | - * by observing the current state of the queue length and the amount of |
|---|
| 1717 | | - * time it has been in this state for. |
|---|
| 1718 | | - * |
|---|
| 1719 | | - * Normally, that accounting is done on IO completion, but that can result |
|---|
| 1720 | | - * in more than a second's worth of IO being accounted for within any one |
|---|
| 1721 | | - * second, leading to >100% utilisation. To deal with that, we call this |
|---|
| 1722 | | - * function to do a round-off before returning the results when reading |
|---|
| 1723 | | - * /proc/diskstats. This accounts immediately for all queue usage up to |
|---|
| 1724 | | - * the current jiffies and restarts the counters again. |
|---|
| 1725 | | - */ |
|---|
| 1726 | | -void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part) |
|---|
| 1727 | | -{ |
|---|
| 1728 | | - struct hd_struct *part2 = NULL; |
|---|
| 1729 | | - unsigned long now = jiffies; |
|---|
| 1730 | | - unsigned int inflight[2]; |
|---|
| 1731 | | - int stats = 0; |
|---|
| 1732 | | - |
|---|
| 1733 | | - if (part->stamp != now) |
|---|
| 1734 | | - stats |= 1; |
|---|
| 1735 | | - |
|---|
| 1736 | | - if (part->partno) { |
|---|
| 1737 | | - part2 = &part_to_disk(part)->part0; |
|---|
| 1738 | | - if (part2->stamp != now) |
|---|
| 1739 | | - stats |= 2; |
|---|
| 1740 | | - } |
|---|
| 1741 | | - |
|---|
| 1742 | | - if (!stats) |
|---|
| 1743 | | - return; |
|---|
| 1744 | | - |
|---|
| 1745 | | - part_in_flight(q, part, inflight); |
|---|
| 1746 | | - |
|---|
| 1747 | | - if (stats & 2) |
|---|
| 1748 | | - part_round_stats_single(q, cpu, part2, now, inflight[1]); |
|---|
| 1749 | | - if (stats & 1) |
|---|
| 1750 | | - part_round_stats_single(q, cpu, part, now, inflight[0]); |
|---|
| 1751 | | -} |
|---|
| 1752 | | -EXPORT_SYMBOL_GPL(part_round_stats); |
|---|
| 1753 | | - |
|---|
| 1754 | | -#ifdef CONFIG_PM |
|---|
| 1755 | | -static void blk_pm_put_request(struct request *rq) |
|---|
| 1756 | | -{ |
|---|
| 1757 | | - if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending) |
|---|
| 1758 | | - pm_runtime_mark_last_busy(rq->q->dev); |
|---|
| 1759 | | -} |
|---|
| 1760 | | -#else |
|---|
| 1761 | | -static inline void blk_pm_put_request(struct request *rq) {} |
|---|
| 1762 | | -#endif |
|---|
| 1763 | | - |
|---|
| 1764 | | -void __blk_put_request(struct request_queue *q, struct request *req) |
|---|
| 1765 | | -{ |
|---|
| 1766 | | - req_flags_t rq_flags = req->rq_flags; |
|---|
| 1767 | | - |
|---|
| 1768 | | - if (unlikely(!q)) |
|---|
| 1769 | | - return; |
|---|
| 1770 | | - |
|---|
| 1771 | | - if (q->mq_ops) { |
|---|
| 1772 | | - blk_mq_free_request(req); |
|---|
| 1773 | | - return; |
|---|
| 1774 | | - } |
|---|
| 1775 | | - |
|---|
| 1776 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 1777 | | - |
|---|
| 1778 | | - blk_req_zone_write_unlock(req); |
|---|
| 1779 | | - blk_pm_put_request(req); |
|---|
| 1780 | | - |
|---|
| 1781 | | - elv_completed_request(q, req); |
|---|
| 1782 | | - |
|---|
| 1783 | | - /* this is a bio leak */ |
|---|
| 1784 | | - WARN_ON(req->bio != NULL); |
|---|
| 1785 | | - |
|---|
| 1786 | | - rq_qos_done(q, req); |
|---|
| 1787 | | - |
|---|
| 1788 | | - /* |
|---|
| 1789 | | - * Request may not have originated from ll_rw_blk. if not, |
|---|
| 1790 | | - * it didn't come out of our reserved rq pools |
|---|
| 1791 | | - */ |
|---|
| 1792 | | - if (rq_flags & RQF_ALLOCED) { |
|---|
| 1793 | | - struct request_list *rl = blk_rq_rl(req); |
|---|
| 1794 | | - bool sync = op_is_sync(req->cmd_flags); |
|---|
| 1795 | | - |
|---|
| 1796 | | - BUG_ON(!list_empty(&req->queuelist)); |
|---|
| 1797 | | - BUG_ON(ELV_ON_HASH(req)); |
|---|
| 1798 | | - |
|---|
| 1799 | | - blk_free_request(rl, req); |
|---|
| 1800 | | - freed_request(rl, sync, rq_flags); |
|---|
| 1801 | | - blk_put_rl(rl); |
|---|
| 1802 | | - blk_queue_exit(q); |
|---|
| 1803 | | - } |
|---|
| 1804 | | -} |
|---|
| 1805 | | -EXPORT_SYMBOL_GPL(__blk_put_request); |
|---|
| 1806 | | - |
|---|
| 1807 | 647 | void blk_put_request(struct request *req) |
|---|
| 1808 | 648 | { |
|---|
| 1809 | | - struct request_queue *q = req->q; |
|---|
| 1810 | | - |
|---|
| 1811 | | - if (q->mq_ops) |
|---|
| 1812 | | - blk_mq_free_request(req); |
|---|
| 1813 | | - else { |
|---|
| 1814 | | - unsigned long flags; |
|---|
| 1815 | | - |
|---|
| 1816 | | - spin_lock_irqsave(q->queue_lock, flags); |
|---|
| 1817 | | - __blk_put_request(q, req); |
|---|
| 1818 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
|---|
| 1819 | | - } |
|---|
| 649 | + blk_mq_free_request(req); |
|---|
| 1820 | 650 | } |
|---|
| 1821 | 651 | EXPORT_SYMBOL(blk_put_request); |
|---|
| 1822 | | - |
|---|
| 1823 | | -bool bio_attempt_back_merge(struct request_queue *q, struct request *req, |
|---|
| 1824 | | - struct bio *bio) |
|---|
| 1825 | | -{ |
|---|
| 1826 | | - const int ff = bio->bi_opf & REQ_FAILFAST_MASK; |
|---|
| 1827 | | - |
|---|
| 1828 | | - if (!ll_back_merge_fn(q, req, bio)) |
|---|
| 1829 | | - return false; |
|---|
| 1830 | | - |
|---|
| 1831 | | - trace_block_bio_backmerge(q, req, bio); |
|---|
| 1832 | | - |
|---|
| 1833 | | - if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) |
|---|
| 1834 | | - blk_rq_set_mixed_merge(req); |
|---|
| 1835 | | - |
|---|
| 1836 | | - req->biotail->bi_next = bio; |
|---|
| 1837 | | - req->biotail = bio; |
|---|
| 1838 | | - req->__data_len += bio->bi_iter.bi_size; |
|---|
| 1839 | | - req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
|---|
| 1840 | | - |
|---|
| 1841 | | - blk_account_io_start(req, false); |
|---|
| 1842 | | - return true; |
|---|
| 1843 | | -} |
|---|
| 1844 | | - |
|---|
| 1845 | | -bool bio_attempt_front_merge(struct request_queue *q, struct request *req, |
|---|
| 1846 | | - struct bio *bio) |
|---|
| 1847 | | -{ |
|---|
| 1848 | | - const int ff = bio->bi_opf & REQ_FAILFAST_MASK; |
|---|
| 1849 | | - |
|---|
| 1850 | | - if (!ll_front_merge_fn(q, req, bio)) |
|---|
| 1851 | | - return false; |
|---|
| 1852 | | - |
|---|
| 1853 | | - trace_block_bio_frontmerge(q, req, bio); |
|---|
| 1854 | | - |
|---|
| 1855 | | - if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) |
|---|
| 1856 | | - blk_rq_set_mixed_merge(req); |
|---|
| 1857 | | - |
|---|
| 1858 | | - bio->bi_next = req->bio; |
|---|
| 1859 | | - req->bio = bio; |
|---|
| 1860 | | - |
|---|
| 1861 | | - req->__sector = bio->bi_iter.bi_sector; |
|---|
| 1862 | | - req->__data_len += bio->bi_iter.bi_size; |
|---|
| 1863 | | - req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
|---|
| 1864 | | - |
|---|
| 1865 | | - blk_account_io_start(req, false); |
|---|
| 1866 | | - return true; |
|---|
| 1867 | | -} |
|---|
| 1868 | | - |
|---|
| 1869 | | -bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, |
|---|
| 1870 | | - struct bio *bio) |
|---|
| 1871 | | -{ |
|---|
| 1872 | | - unsigned short segments = blk_rq_nr_discard_segments(req); |
|---|
| 1873 | | - |
|---|
| 1874 | | - if (segments >= queue_max_discard_segments(q)) |
|---|
| 1875 | | - goto no_merge; |
|---|
| 1876 | | - if (blk_rq_sectors(req) + bio_sectors(bio) > |
|---|
| 1877 | | - blk_rq_get_max_sectors(req, blk_rq_pos(req))) |
|---|
| 1878 | | - goto no_merge; |
|---|
| 1879 | | - |
|---|
| 1880 | | - req->biotail->bi_next = bio; |
|---|
| 1881 | | - req->biotail = bio; |
|---|
| 1882 | | - req->__data_len += bio->bi_iter.bi_size; |
|---|
| 1883 | | - req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
|---|
| 1884 | | - req->nr_phys_segments = segments + 1; |
|---|
| 1885 | | - |
|---|
| 1886 | | - blk_account_io_start(req, false); |
|---|
| 1887 | | - return true; |
|---|
| 1888 | | -no_merge: |
|---|
| 1889 | | - req_set_nomerge(q, req); |
|---|
| 1890 | | - return false; |
|---|
| 1891 | | -} |
|---|
| 1892 | | - |
|---|
| 1893 | | -/** |
|---|
| 1894 | | - * blk_attempt_plug_merge - try to merge with %current's plugged list |
|---|
| 1895 | | - * @q: request_queue new bio is being queued at |
|---|
| 1896 | | - * @bio: new bio being queued |
|---|
| 1897 | | - * @request_count: out parameter for number of traversed plugged requests |
|---|
| 1898 | | - * @same_queue_rq: pointer to &struct request that gets filled in when |
|---|
| 1899 | | - * another request associated with @q is found on the plug list |
|---|
| 1900 | | - * (optional, may be %NULL) |
|---|
| 1901 | | - * |
|---|
| 1902 | | - * Determine whether @bio being queued on @q can be merged with a request |
|---|
| 1903 | | - * on %current's plugged list. Returns %true if merge was successful, |
|---|
| 1904 | | - * otherwise %false. |
|---|
| 1905 | | - * |
|---|
| 1906 | | - * Plugging coalesces IOs from the same issuer for the same purpose without |
|---|
| 1907 | | - * going through @q->queue_lock. As such it's more of an issuing mechanism |
|---|
| 1908 | | - * than scheduling, and the request, while may have elvpriv data, is not |
|---|
| 1909 | | - * added on the elevator at this point. In addition, we don't have |
|---|
| 1910 | | - * reliable access to the elevator outside queue lock. Only check basic |
|---|
| 1911 | | - * merging parameters without querying the elevator. |
|---|
| 1912 | | - * |
|---|
| 1913 | | - * Caller must ensure !blk_queue_nomerges(q) beforehand. |
|---|
| 1914 | | - */ |
|---|
| 1915 | | -bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, |
|---|
| 1916 | | - unsigned int *request_count, |
|---|
| 1917 | | - struct request **same_queue_rq) |
|---|
| 1918 | | -{ |
|---|
| 1919 | | - struct blk_plug *plug; |
|---|
| 1920 | | - struct request *rq; |
|---|
| 1921 | | - struct list_head *plug_list; |
|---|
| 1922 | | - |
|---|
| 1923 | | - plug = current->plug; |
|---|
| 1924 | | - if (!plug) |
|---|
| 1925 | | - return false; |
|---|
| 1926 | | - *request_count = 0; |
|---|
| 1927 | | - |
|---|
| 1928 | | - if (q->mq_ops) |
|---|
| 1929 | | - plug_list = &plug->mq_list; |
|---|
| 1930 | | - else |
|---|
| 1931 | | - plug_list = &plug->list; |
|---|
| 1932 | | - |
|---|
| 1933 | | - list_for_each_entry_reverse(rq, plug_list, queuelist) { |
|---|
| 1934 | | - bool merged = false; |
|---|
| 1935 | | - |
|---|
| 1936 | | - if (rq->q == q) { |
|---|
| 1937 | | - (*request_count)++; |
|---|
| 1938 | | - /* |
|---|
| 1939 | | - * Only blk-mq multiple hardware queues case checks the |
|---|
| 1940 | | - * rq in the same queue, there should be only one such |
|---|
| 1941 | | - * rq in a queue |
|---|
| 1942 | | - **/ |
|---|
| 1943 | | - if (same_queue_rq) |
|---|
| 1944 | | - *same_queue_rq = rq; |
|---|
| 1945 | | - } |
|---|
| 1946 | | - |
|---|
| 1947 | | - if (rq->q != q || !blk_rq_merge_ok(rq, bio)) |
|---|
| 1948 | | - continue; |
|---|
| 1949 | | - |
|---|
| 1950 | | - switch (blk_try_merge(rq, bio)) { |
|---|
| 1951 | | - case ELEVATOR_BACK_MERGE: |
|---|
| 1952 | | - merged = bio_attempt_back_merge(q, rq, bio); |
|---|
| 1953 | | - break; |
|---|
| 1954 | | - case ELEVATOR_FRONT_MERGE: |
|---|
| 1955 | | - merged = bio_attempt_front_merge(q, rq, bio); |
|---|
| 1956 | | - break; |
|---|
| 1957 | | - case ELEVATOR_DISCARD_MERGE: |
|---|
| 1958 | | - merged = bio_attempt_discard_merge(q, rq, bio); |
|---|
| 1959 | | - break; |
|---|
| 1960 | | - default: |
|---|
| 1961 | | - break; |
|---|
| 1962 | | - } |
|---|
| 1963 | | - |
|---|
| 1964 | | - if (merged) |
|---|
| 1965 | | - return true; |
|---|
| 1966 | | - } |
|---|
| 1967 | | - |
|---|
| 1968 | | - return false; |
|---|
| 1969 | | -} |
|---|
| 1970 | | - |
|---|
| 1971 | | -unsigned int blk_plug_queued_count(struct request_queue *q) |
|---|
| 1972 | | -{ |
|---|
| 1973 | | - struct blk_plug *plug; |
|---|
| 1974 | | - struct request *rq; |
|---|
| 1975 | | - struct list_head *plug_list; |
|---|
| 1976 | | - unsigned int ret = 0; |
|---|
| 1977 | | - |
|---|
| 1978 | | - plug = current->plug; |
|---|
| 1979 | | - if (!plug) |
|---|
| 1980 | | - goto out; |
|---|
| 1981 | | - |
|---|
| 1982 | | - if (q->mq_ops) |
|---|
| 1983 | | - plug_list = &plug->mq_list; |
|---|
| 1984 | | - else |
|---|
| 1985 | | - plug_list = &plug->list; |
|---|
| 1986 | | - |
|---|
| 1987 | | - list_for_each_entry(rq, plug_list, queuelist) { |
|---|
| 1988 | | - if (rq->q == q) |
|---|
| 1989 | | - ret++; |
|---|
| 1990 | | - } |
|---|
| 1991 | | -out: |
|---|
| 1992 | | - return ret; |
|---|
| 1993 | | -} |
|---|
| 1994 | | - |
|---|
| 1995 | | -void blk_init_request_from_bio(struct request *req, struct bio *bio) |
|---|
| 1996 | | -{ |
|---|
| 1997 | | - struct io_context *ioc = rq_ioc(bio); |
|---|
| 1998 | | - |
|---|
| 1999 | | - if (bio->bi_opf & REQ_RAHEAD) |
|---|
| 2000 | | - req->cmd_flags |= REQ_FAILFAST_MASK; |
|---|
| 2001 | | - |
|---|
| 2002 | | - req->__sector = bio->bi_iter.bi_sector; |
|---|
| 2003 | | - if (ioprio_valid(bio_prio(bio))) |
|---|
| 2004 | | - req->ioprio = bio_prio(bio); |
|---|
| 2005 | | - else if (ioc) |
|---|
| 2006 | | - req->ioprio = ioc->ioprio; |
|---|
| 2007 | | - else |
|---|
| 2008 | | - req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); |
|---|
| 2009 | | - req->write_hint = bio->bi_write_hint; |
|---|
| 2010 | | - blk_rq_bio_prep(req->q, req, bio); |
|---|
| 2011 | | -} |
|---|
| 2012 | | -EXPORT_SYMBOL_GPL(blk_init_request_from_bio); |
|---|
| 2013 | | - |
|---|
| 2014 | | -static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) |
|---|
| 2015 | | -{ |
|---|
| 2016 | | - struct blk_plug *plug; |
|---|
| 2017 | | - int where = ELEVATOR_INSERT_SORT; |
|---|
| 2018 | | - struct request *req, *free; |
|---|
| 2019 | | - unsigned int request_count = 0; |
|---|
| 2020 | | - |
|---|
| 2021 | | - /* |
|---|
| 2022 | | - * low level driver can indicate that it wants pages above a |
|---|
| 2023 | | - * certain limit bounced to low memory (ie for highmem, or even |
|---|
| 2024 | | - * ISA dma in theory) |
|---|
| 2025 | | - */ |
|---|
| 2026 | | - blk_queue_bounce(q, &bio); |
|---|
| 2027 | | - |
|---|
| 2028 | | - blk_queue_split(q, &bio); |
|---|
| 2029 | | - |
|---|
| 2030 | | - if (!bio_integrity_prep(bio)) |
|---|
| 2031 | | - return BLK_QC_T_NONE; |
|---|
| 2032 | | - |
|---|
| 2033 | | - if (op_is_flush(bio->bi_opf)) { |
|---|
| 2034 | | - spin_lock_irq(q->queue_lock); |
|---|
| 2035 | | - where = ELEVATOR_INSERT_FLUSH; |
|---|
| 2036 | | - goto get_rq; |
|---|
| 2037 | | - } |
|---|
| 2038 | | - |
|---|
| 2039 | | - /* |
|---|
| 2040 | | - * Check if we can merge with the plugged list before grabbing |
|---|
| 2041 | | - * any locks. |
|---|
| 2042 | | - */ |
|---|
| 2043 | | - if (!blk_queue_nomerges(q)) { |
|---|
| 2044 | | - if (blk_attempt_plug_merge(q, bio, &request_count, NULL)) |
|---|
| 2045 | | - return BLK_QC_T_NONE; |
|---|
| 2046 | | - } else |
|---|
| 2047 | | - request_count = blk_plug_queued_count(q); |
|---|
| 2048 | | - |
|---|
| 2049 | | - spin_lock_irq(q->queue_lock); |
|---|
| 2050 | | - |
|---|
| 2051 | | - switch (elv_merge(q, &req, bio)) { |
|---|
| 2052 | | - case ELEVATOR_BACK_MERGE: |
|---|
| 2053 | | - if (!bio_attempt_back_merge(q, req, bio)) |
|---|
| 2054 | | - break; |
|---|
| 2055 | | - elv_bio_merged(q, req, bio); |
|---|
| 2056 | | - free = attempt_back_merge(q, req); |
|---|
| 2057 | | - if (free) |
|---|
| 2058 | | - __blk_put_request(q, free); |
|---|
| 2059 | | - else |
|---|
| 2060 | | - elv_merged_request(q, req, ELEVATOR_BACK_MERGE); |
|---|
| 2061 | | - goto out_unlock; |
|---|
| 2062 | | - case ELEVATOR_FRONT_MERGE: |
|---|
| 2063 | | - if (!bio_attempt_front_merge(q, req, bio)) |
|---|
| 2064 | | - break; |
|---|
| 2065 | | - elv_bio_merged(q, req, bio); |
|---|
| 2066 | | - free = attempt_front_merge(q, req); |
|---|
| 2067 | | - if (free) |
|---|
| 2068 | | - __blk_put_request(q, free); |
|---|
| 2069 | | - else |
|---|
| 2070 | | - elv_merged_request(q, req, ELEVATOR_FRONT_MERGE); |
|---|
| 2071 | | - goto out_unlock; |
|---|
| 2072 | | - default: |
|---|
| 2073 | | - break; |
|---|
| 2074 | | - } |
|---|
| 2075 | | - |
|---|
| 2076 | | -get_rq: |
|---|
| 2077 | | - rq_qos_throttle(q, bio, q->queue_lock); |
|---|
| 2078 | | - |
|---|
| 2079 | | - /* |
|---|
| 2080 | | - * Grab a free request. This is might sleep but can not fail. |
|---|
| 2081 | | - * Returns with the queue unlocked. |
|---|
| 2082 | | - */ |
|---|
| 2083 | | - blk_queue_enter_live(q); |
|---|
| 2084 | | - req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO); |
|---|
| 2085 | | - if (IS_ERR(req)) { |
|---|
| 2086 | | - blk_queue_exit(q); |
|---|
| 2087 | | - rq_qos_cleanup(q, bio); |
|---|
| 2088 | | - if (PTR_ERR(req) == -ENOMEM) |
|---|
| 2089 | | - bio->bi_status = BLK_STS_RESOURCE; |
|---|
| 2090 | | - else |
|---|
| 2091 | | - bio->bi_status = BLK_STS_IOERR; |
|---|
| 2092 | | - bio_endio(bio); |
|---|
| 2093 | | - goto out_unlock; |
|---|
| 2094 | | - } |
|---|
| 2095 | | - |
|---|
| 2096 | | - rq_qos_track(q, req, bio); |
|---|
| 2097 | | - |
|---|
| 2098 | | - /* |
|---|
| 2099 | | - * After dropping the lock and possibly sleeping here, our request |
|---|
| 2100 | | - * may now be mergeable after it had proven unmergeable (above). |
|---|
| 2101 | | - * We don't worry about that case for efficiency. It won't happen |
|---|
| 2102 | | - * often, and the elevators are able to handle it. |
|---|
| 2103 | | - */ |
|---|
| 2104 | | - blk_init_request_from_bio(req, bio); |
|---|
| 2105 | | - |
|---|
| 2106 | | - if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) |
|---|
| 2107 | | - req->cpu = raw_smp_processor_id(); |
|---|
| 2108 | | - |
|---|
| 2109 | | - plug = current->plug; |
|---|
| 2110 | | - if (plug) { |
|---|
| 2111 | | - /* |
|---|
| 2112 | | - * If this is the first request added after a plug, fire |
|---|
| 2113 | | - * of a plug trace. |
|---|
| 2114 | | - * |
|---|
| 2115 | | - * @request_count may become stale because of schedule |
|---|
| 2116 | | - * out, so check plug list again. |
|---|
| 2117 | | - */ |
|---|
| 2118 | | - if (!request_count || list_empty(&plug->list)) |
|---|
| 2119 | | - trace_block_plug(q); |
|---|
| 2120 | | - else { |
|---|
| 2121 | | - struct request *last = list_entry_rq(plug->list.prev); |
|---|
| 2122 | | - if (request_count >= BLK_MAX_REQUEST_COUNT || |
|---|
| 2123 | | - blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE) { |
|---|
| 2124 | | - blk_flush_plug_list(plug, false); |
|---|
| 2125 | | - trace_block_plug(q); |
|---|
| 2126 | | - } |
|---|
| 2127 | | - } |
|---|
| 2128 | | - list_add_tail(&req->queuelist, &plug->list); |
|---|
| 2129 | | - blk_account_io_start(req, true); |
|---|
| 2130 | | - } else { |
|---|
| 2131 | | - spin_lock_irq(q->queue_lock); |
|---|
| 2132 | | - add_acct_request(q, req, where); |
|---|
| 2133 | | - __blk_run_queue(q); |
|---|
| 2134 | | -out_unlock: |
|---|
| 2135 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 2136 | | - } |
|---|
| 2137 | | - |
|---|
| 2138 | | - return BLK_QC_T_NONE; |
|---|
| 2139 | | -} |
|---|
| 2140 | 652 | |
|---|
| 2141 | 653 | static void handle_bad_sector(struct bio *bio, sector_t maxsector) |
|---|
| 2142 | 654 | { |
|---|
| 2143 | 655 | char b[BDEVNAME_SIZE]; |
|---|
| 2144 | 656 | |
|---|
| 2145 | | - printk(KERN_INFO "attempt to access beyond end of device\n"); |
|---|
| 2146 | | - printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n", |
|---|
| 2147 | | - bio_devname(bio, b), bio->bi_opf, |
|---|
| 2148 | | - (unsigned long long)bio_end_sector(bio), |
|---|
| 2149 | | - (long long)maxsector); |
|---|
| 657 | + pr_info_ratelimited("attempt to access beyond end of device\n" |
|---|
| 658 | + "%s: rw=%d, want=%llu, limit=%llu\n", |
|---|
| 659 | + bio_devname(bio, b), bio->bi_opf, |
|---|
| 660 | + bio_end_sector(bio), maxsector); |
|---|
| 2150 | 661 | } |
|---|
| 2151 | 662 | |
|---|
| 2152 | 663 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
|---|
| .. | .. |
|---|
| 2193 | 704 | |
|---|
| 2194 | 705 | if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) |
|---|
| 2195 | 706 | return false; |
|---|
| 2196 | | - |
|---|
| 2197 | | - WARN_ONCE(1, |
|---|
| 2198 | | - "generic_make_request: Trying to write " |
|---|
| 2199 | | - "to read-only block-device %s (partno %d)\n", |
|---|
| 707 | + pr_warn("Trying to write to read-only block-device %s (partno %d)\n", |
|---|
| 2200 | 708 | bio_devname(bio, b), part->partno); |
|---|
| 2201 | 709 | /* Older lvm-tools actually trigger this */ |
|---|
| 2202 | 710 | return false; |
|---|
| .. | .. |
|---|
| 2248 | 756 | if (unlikely(bio_check_ro(bio, p))) |
|---|
| 2249 | 757 | goto out; |
|---|
| 2250 | 758 | |
|---|
| 2251 | | - /* |
|---|
| 2252 | | - * Zone reset does not include bi_size so bio_sectors() is always 0. |
|---|
| 2253 | | - * Include a test for the reset op code and perform the remap if needed. |
|---|
| 2254 | | - */ |
|---|
| 2255 | | - if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) { |
|---|
| 759 | + if (bio_sectors(bio)) { |
|---|
| 2256 | 760 | if (bio_check_eod(bio, part_nr_sects_read(p))) |
|---|
| 2257 | 761 | goto out; |
|---|
| 2258 | 762 | bio->bi_iter.bi_sector += p->start_sect; |
|---|
| .. | .. |
|---|
| 2266 | 770 | return ret; |
|---|
| 2267 | 771 | } |
|---|
| 2268 | 772 | |
|---|
| 2269 | | -static noinline_for_stack bool |
|---|
| 2270 | | -generic_make_request_checks(struct bio *bio) |
|---|
| 773 | +/* |
|---|
| 774 | + * Check write append to a zoned block device. |
|---|
| 775 | + */ |
|---|
| 776 | +static inline blk_status_t blk_check_zone_append(struct request_queue *q, |
|---|
| 777 | + struct bio *bio) |
|---|
| 2271 | 778 | { |
|---|
| 2272 | | - struct request_queue *q; |
|---|
| 779 | + sector_t pos = bio->bi_iter.bi_sector; |
|---|
| 2273 | 780 | int nr_sectors = bio_sectors(bio); |
|---|
| 781 | + |
|---|
| 782 | + /* Only applicable to zoned block devices */ |
|---|
| 783 | + if (!blk_queue_is_zoned(q)) |
|---|
| 784 | + return BLK_STS_NOTSUPP; |
|---|
| 785 | + |
|---|
| 786 | + /* The bio sector must point to the start of a sequential zone */ |
|---|
| 787 | + if (pos & (blk_queue_zone_sectors(q) - 1) || |
|---|
| 788 | + !blk_queue_zone_is_seq(q, pos)) |
|---|
| 789 | + return BLK_STS_IOERR; |
|---|
| 790 | + |
|---|
| 791 | + /* |
|---|
| 792 | + * Not allowed to cross zone boundaries. Otherwise, the BIO will be |
|---|
| 793 | + * split and could result in non-contiguous sectors being written in |
|---|
| 794 | + * different zones. |
|---|
| 795 | + */ |
|---|
| 796 | + if (nr_sectors > q->limits.chunk_sectors) |
|---|
| 797 | + return BLK_STS_IOERR; |
|---|
| 798 | + |
|---|
| 799 | + /* Make sure the BIO is small enough and will not get split */ |
|---|
| 800 | + if (nr_sectors > q->limits.max_zone_append_sectors) |
|---|
| 801 | + return BLK_STS_IOERR; |
|---|
| 802 | + |
|---|
| 803 | + bio->bi_opf |= REQ_NOMERGE; |
|---|
| 804 | + |
|---|
| 805 | + return BLK_STS_OK; |
|---|
| 806 | +} |
|---|
| 807 | + |
|---|
| 808 | +static noinline_for_stack bool submit_bio_checks(struct bio *bio) |
|---|
| 809 | +{ |
|---|
| 810 | + struct request_queue *q = bio->bi_disk->queue; |
|---|
| 2274 | 811 | blk_status_t status = BLK_STS_IOERR; |
|---|
| 2275 | | - char b[BDEVNAME_SIZE]; |
|---|
| 812 | + struct blk_plug *plug; |
|---|
| 2276 | 813 | |
|---|
| 2277 | 814 | might_sleep(); |
|---|
| 2278 | 815 | |
|---|
| 2279 | | - q = bio->bi_disk->queue; |
|---|
| 2280 | | - if (unlikely(!q)) { |
|---|
| 2281 | | - printk(KERN_ERR |
|---|
| 2282 | | - "generic_make_request: Trying to access " |
|---|
| 2283 | | - "nonexistent block-device %s (%Lu)\n", |
|---|
| 2284 | | - bio_devname(bio, b), (long long)bio->bi_iter.bi_sector); |
|---|
| 2285 | | - goto end_io; |
|---|
| 2286 | | - } |
|---|
| 816 | + plug = blk_mq_plug(q, bio); |
|---|
| 817 | + if (plug && plug->nowait) |
|---|
| 818 | + bio->bi_opf |= REQ_NOWAIT; |
|---|
| 2287 | 819 | |
|---|
| 2288 | 820 | /* |
|---|
| 2289 | 821 | * For a REQ_NOWAIT based request, return -EOPNOTSUPP |
|---|
| 2290 | | - * if queue is not a request based queue. |
|---|
| 822 | + * if queue does not support NOWAIT. |
|---|
| 2291 | 823 | */ |
|---|
| 2292 | | - if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q)) |
|---|
| 824 | + if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q)) |
|---|
| 2293 | 825 | goto not_supported; |
|---|
| 2294 | 826 | |
|---|
| 2295 | 827 | if (should_fail_bio(bio)) |
|---|
| .. | .. |
|---|
| 2306 | 838 | } |
|---|
| 2307 | 839 | |
|---|
| 2308 | 840 | /* |
|---|
| 2309 | | - * Filter flush bio's early so that make_request based |
|---|
| 2310 | | - * drivers without flush support don't have to worry |
|---|
| 2311 | | - * about them. |
|---|
| 841 | + * Filter flush bio's early so that bio based drivers without flush |
|---|
| 842 | + * support don't have to worry about them. |
|---|
| 2312 | 843 | */ |
|---|
| 2313 | 844 | if (op_is_flush(bio->bi_opf) && |
|---|
| 2314 | 845 | !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { |
|---|
| 2315 | 846 | bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); |
|---|
| 2316 | | - if (!nr_sectors) { |
|---|
| 847 | + if (!bio_sectors(bio)) { |
|---|
| 2317 | 848 | status = BLK_STS_OK; |
|---|
| 2318 | 849 | goto end_io; |
|---|
| 2319 | 850 | } |
|---|
| 2320 | 851 | } |
|---|
| 852 | + |
|---|
| 853 | + if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) |
|---|
| 854 | + bio->bi_opf &= ~REQ_HIPRI; |
|---|
| 2321 | 855 | |
|---|
| 2322 | 856 | switch (bio_op(bio)) { |
|---|
| 2323 | 857 | case REQ_OP_DISCARD: |
|---|
| .. | .. |
|---|
| 2332 | 866 | if (!q->limits.max_write_same_sectors) |
|---|
| 2333 | 867 | goto not_supported; |
|---|
| 2334 | 868 | break; |
|---|
| 2335 | | - case REQ_OP_ZONE_REPORT: |
|---|
| 869 | + case REQ_OP_ZONE_APPEND: |
|---|
| 870 | + status = blk_check_zone_append(q, bio); |
|---|
| 871 | + if (status != BLK_STS_OK) |
|---|
| 872 | + goto end_io; |
|---|
| 873 | + break; |
|---|
| 2336 | 874 | case REQ_OP_ZONE_RESET: |
|---|
| 875 | + case REQ_OP_ZONE_OPEN: |
|---|
| 876 | + case REQ_OP_ZONE_CLOSE: |
|---|
| 877 | + case REQ_OP_ZONE_FINISH: |
|---|
| 2337 | 878 | if (!blk_queue_is_zoned(q)) |
|---|
| 879 | + goto not_supported; |
|---|
| 880 | + break; |
|---|
| 881 | + case REQ_OP_ZONE_RESET_ALL: |
|---|
| 882 | + if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q)) |
|---|
| 2338 | 883 | goto not_supported; |
|---|
| 2339 | 884 | break; |
|---|
| 2340 | 885 | case REQ_OP_WRITE_ZEROES: |
|---|
| .. | .. |
|---|
| 2346 | 891 | } |
|---|
| 2347 | 892 | |
|---|
| 2348 | 893 | /* |
|---|
| 2349 | | - * Various block parts want %current->io_context and lazy ioc |
|---|
| 2350 | | - * allocation ends up trading a lot of pain for a small amount of |
|---|
| 2351 | | - * memory. Just allocate it upfront. This may fail and block |
|---|
| 2352 | | - * layer knows how to live with it. |
|---|
| 894 | + * Various block parts want %current->io_context, so allocate it up |
|---|
| 895 | + * front rather than dealing with lots of pain to allocate it only |
|---|
| 896 | + * where needed. This may fail and the block layer knows how to live |
|---|
| 897 | + * with it. |
|---|
| 2353 | 898 | */ |
|---|
| 2354 | | - create_io_context(GFP_ATOMIC, q->node); |
|---|
| 899 | + if (unlikely(!current->io_context)) |
|---|
| 900 | + create_task_io_context(current, GFP_ATOMIC, q->node); |
|---|
| 2355 | 901 | |
|---|
| 2356 | | - if (!blkcg_bio_issue_check(q, bio)) |
|---|
| 902 | + if (blk_throtl_bio(bio)) |
|---|
| 2357 | 903 | return false; |
|---|
| 904 | + |
|---|
| 905 | + blk_cgroup_bio_start(bio); |
|---|
| 906 | + blkcg_bio_issue_init(bio); |
|---|
| 2358 | 907 | |
|---|
| 2359 | 908 | if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { |
|---|
| 2360 | 909 | trace_block_bio_queue(q, bio); |
|---|
| .. | .. |
|---|
| 2373 | 922 | return false; |
|---|
| 2374 | 923 | } |
|---|
| 2375 | 924 | |
|---|
| 2376 | | -/** |
|---|
| 2377 | | - * generic_make_request - hand a buffer to its device driver for I/O |
|---|
| 2378 | | - * @bio: The bio describing the location in memory and on the device. |
|---|
| 2379 | | - * |
|---|
| 2380 | | - * generic_make_request() is used to make I/O requests of block |
|---|
| 2381 | | - * devices. It is passed a &struct bio, which describes the I/O that needs |
|---|
| 2382 | | - * to be done. |
|---|
| 2383 | | - * |
|---|
| 2384 | | - * generic_make_request() does not return any status. The |
|---|
| 2385 | | - * success/failure status of the request, along with notification of |
|---|
| 2386 | | - * completion, is delivered asynchronously through the bio->bi_end_io |
|---|
| 2387 | | - * function described (one day) else where. |
|---|
| 2388 | | - * |
|---|
| 2389 | | - * The caller of generic_make_request must make sure that bi_io_vec |
|---|
| 2390 | | - * are set to describe the memory buffer, and that bi_dev and bi_sector are |
|---|
| 2391 | | - * set to describe the device address, and the |
|---|
| 2392 | | - * bi_end_io and optionally bi_private are set to describe how |
|---|
| 2393 | | - * completion notification should be signaled. |
|---|
| 2394 | | - * |
|---|
| 2395 | | - * generic_make_request and the drivers it calls may use bi_next if this |
|---|
| 2396 | | - * bio happens to be merged with someone else, and may resubmit the bio to |
|---|
| 2397 | | - * a lower device by calling into generic_make_request recursively, which |
|---|
| 2398 | | - * means the bio should NOT be touched after the call to ->make_request_fn. |
|---|
| 2399 | | - */ |
|---|
| 2400 | | -blk_qc_t generic_make_request(struct bio *bio) |
|---|
| 925 | +static blk_qc_t __submit_bio(struct bio *bio) |
|---|
| 2401 | 926 | { |
|---|
| 2402 | | - /* |
|---|
| 2403 | | - * bio_list_on_stack[0] contains bios submitted by the current |
|---|
| 2404 | | - * make_request_fn. |
|---|
| 2405 | | - * bio_list_on_stack[1] contains bios that were submitted before |
|---|
| 2406 | | - * the current make_request_fn, but that haven't been processed |
|---|
| 2407 | | - * yet. |
|---|
| 2408 | | - */ |
|---|
| 2409 | | - struct bio_list bio_list_on_stack[2]; |
|---|
| 2410 | | - blk_mq_req_flags_t flags = 0; |
|---|
| 2411 | | - struct request_queue *q = bio->bi_disk->queue; |
|---|
| 927 | + struct gendisk *disk = bio->bi_disk; |
|---|
| 2412 | 928 | blk_qc_t ret = BLK_QC_T_NONE; |
|---|
| 2413 | 929 | |
|---|
| 2414 | | - if (bio->bi_opf & REQ_NOWAIT) |
|---|
| 2415 | | - flags = BLK_MQ_REQ_NOWAIT; |
|---|
| 2416 | | - if (bio_flagged(bio, BIO_QUEUE_ENTERED)) |
|---|
| 2417 | | - blk_queue_enter_live(q); |
|---|
| 2418 | | - else if (blk_queue_enter(q, flags) < 0) { |
|---|
| 2419 | | - if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT)) |
|---|
| 2420 | | - bio_wouldblock_error(bio); |
|---|
| 2421 | | - else |
|---|
| 2422 | | - bio_io_error(bio); |
|---|
| 2423 | | - return ret; |
|---|
| 930 | + if (blk_crypto_bio_prep(&bio)) { |
|---|
| 931 | + if (!disk->fops->submit_bio) |
|---|
| 932 | + return blk_mq_submit_bio(bio); |
|---|
| 933 | + ret = disk->fops->submit_bio(bio); |
|---|
| 2424 | 934 | } |
|---|
| 935 | + blk_queue_exit(disk->queue); |
|---|
| 936 | + return ret; |
|---|
| 937 | +} |
|---|
| 2425 | 938 | |
|---|
| 2426 | | - if (!generic_make_request_checks(bio)) |
|---|
| 2427 | | - goto out; |
|---|
| 939 | +/* |
|---|
| 940 | + * The loop in this function may be a bit non-obvious, and so deserves some |
|---|
| 941 | + * explanation: |
|---|
| 942 | + * |
|---|
| 943 | + * - Before entering the loop, bio->bi_next is NULL (as all callers ensure |
|---|
| 944 | + * that), so we have a list with a single bio. |
|---|
| 945 | + * - We pretend that we have just taken it off a longer list, so we assign |
|---|
| 946 | + * bio_list to a pointer to the bio_list_on_stack, thus initialising the |
|---|
| 947 | + * bio_list of new bios to be added. ->submit_bio() may indeed add some more |
|---|
| 948 | + * bios through a recursive call to submit_bio_noacct. If it did, we find a |
|---|
| 949 | + * non-NULL value in bio_list and re-enter the loop from the top. |
|---|
| 950 | + * - In this case we really did just take the bio of the top of the list (no |
|---|
| 951 | + * pretending) and so remove it from bio_list, and call into ->submit_bio() |
|---|
| 952 | + * again. |
|---|
| 953 | + * |
|---|
| 954 | + * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio. |
|---|
| 955 | + * bio_list_on_stack[1] contains bios that were submitted before the current |
|---|
| 956 | + * ->submit_bio_bio, but that haven't been processed yet. |
|---|
| 957 | + */ |
|---|
| 958 | +static blk_qc_t __submit_bio_noacct(struct bio *bio) |
|---|
| 959 | +{ |
|---|
| 960 | + struct bio_list bio_list_on_stack[2]; |
|---|
| 961 | + blk_qc_t ret = BLK_QC_T_NONE; |
|---|
| 962 | + |
|---|
| 963 | + BUG_ON(bio->bi_next); |
|---|
| 964 | + |
|---|
| 965 | + bio_list_init(&bio_list_on_stack[0]); |
|---|
| 966 | + current->bio_list = bio_list_on_stack; |
|---|
| 967 | + |
|---|
| 968 | + do { |
|---|
| 969 | + struct request_queue *q = bio->bi_disk->queue; |
|---|
| 970 | + struct bio_list lower, same; |
|---|
| 971 | + |
|---|
| 972 | + if (unlikely(bio_queue_enter(bio) != 0)) |
|---|
| 973 | + continue; |
|---|
| 974 | + |
|---|
| 975 | + /* |
|---|
| 976 | + * Create a fresh bio_list for all subordinate requests. |
|---|
| 977 | + */ |
|---|
| 978 | + bio_list_on_stack[1] = bio_list_on_stack[0]; |
|---|
| 979 | + bio_list_init(&bio_list_on_stack[0]); |
|---|
| 980 | + |
|---|
| 981 | + ret = __submit_bio(bio); |
|---|
| 982 | + |
|---|
| 983 | + /* |
|---|
| 984 | + * Sort new bios into those for a lower level and those for the |
|---|
| 985 | + * same level. |
|---|
| 986 | + */ |
|---|
| 987 | + bio_list_init(&lower); |
|---|
| 988 | + bio_list_init(&same); |
|---|
| 989 | + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) |
|---|
| 990 | + if (q == bio->bi_disk->queue) |
|---|
| 991 | + bio_list_add(&same, bio); |
|---|
| 992 | + else |
|---|
| 993 | + bio_list_add(&lower, bio); |
|---|
| 994 | + |
|---|
| 995 | + /* |
|---|
| 996 | + * Now assemble so we handle the lowest level first. |
|---|
| 997 | + */ |
|---|
| 998 | + bio_list_merge(&bio_list_on_stack[0], &lower); |
|---|
| 999 | + bio_list_merge(&bio_list_on_stack[0], &same); |
|---|
| 1000 | + bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); |
|---|
| 1001 | + } while ((bio = bio_list_pop(&bio_list_on_stack[0]))); |
|---|
| 1002 | + |
|---|
| 1003 | + current->bio_list = NULL; |
|---|
| 1004 | + return ret; |
|---|
| 1005 | +} |
|---|
| 1006 | + |
|---|
| 1007 | +static blk_qc_t __submit_bio_noacct_mq(struct bio *bio) |
|---|
| 1008 | +{ |
|---|
| 1009 | + struct bio_list bio_list[2] = { }; |
|---|
| 1010 | + blk_qc_t ret = BLK_QC_T_NONE; |
|---|
| 1011 | + |
|---|
| 1012 | + current->bio_list = bio_list; |
|---|
| 1013 | + |
|---|
| 1014 | + do { |
|---|
| 1015 | + struct gendisk *disk = bio->bi_disk; |
|---|
| 1016 | + |
|---|
| 1017 | + if (unlikely(bio_queue_enter(bio) != 0)) |
|---|
| 1018 | + continue; |
|---|
| 1019 | + |
|---|
| 1020 | + if (!blk_crypto_bio_prep(&bio)) { |
|---|
| 1021 | + blk_queue_exit(disk->queue); |
|---|
| 1022 | + ret = BLK_QC_T_NONE; |
|---|
| 1023 | + continue; |
|---|
| 1024 | + } |
|---|
| 1025 | + |
|---|
| 1026 | + ret = blk_mq_submit_bio(bio); |
|---|
| 1027 | + } while ((bio = bio_list_pop(&bio_list[0]))); |
|---|
| 1028 | + |
|---|
| 1029 | + current->bio_list = NULL; |
|---|
| 1030 | + return ret; |
|---|
| 1031 | +} |
|---|
| 1032 | + |
|---|
| 1033 | +/** |
|---|
| 1034 | + * submit_bio_noacct - re-submit a bio to the block device layer for I/O |
|---|
| 1035 | + * @bio: The bio describing the location in memory and on the device. |
|---|
| 1036 | + * |
|---|
| 1037 | + * This is a version of submit_bio() that shall only be used for I/O that is |
|---|
| 1038 | + * resubmitted to lower level drivers by stacking block drivers. All file |
|---|
| 1039 | + * systems and other upper level users of the block layer should use |
|---|
| 1040 | + * submit_bio() instead. |
|---|
| 1041 | + */ |
|---|
| 1042 | +blk_qc_t submit_bio_noacct(struct bio *bio) |
|---|
| 1043 | +{ |
|---|
| 1044 | + if (!submit_bio_checks(bio)) |
|---|
| 1045 | + return BLK_QC_T_NONE; |
|---|
| 2428 | 1046 | |
|---|
| 2429 | 1047 | /* |
|---|
| 2430 | | - * We only want one ->make_request_fn to be active at a time, else |
|---|
| 2431 | | - * stack usage with stacked devices could be a problem. So use |
|---|
| 2432 | | - * current->bio_list to keep a list of requests submited by a |
|---|
| 2433 | | - * make_request_fn function. current->bio_list is also used as a |
|---|
| 2434 | | - * flag to say if generic_make_request is currently active in this |
|---|
| 2435 | | - * task or not. If it is NULL, then no make_request is active. If |
|---|
| 2436 | | - * it is non-NULL, then a make_request is active, and new requests |
|---|
| 2437 | | - * should be added at the tail |
|---|
| 1048 | + * We only want one ->submit_bio to be active at a time, else stack |
|---|
| 1049 | + * usage with stacked devices could be a problem. Use current->bio_list |
|---|
| 1050 | + * to collect a list of requests submited by a ->submit_bio method while |
|---|
| 1051 | + * it is active, and then process them after it returned. |
|---|
| 2438 | 1052 | */ |
|---|
| 2439 | 1053 | if (current->bio_list) { |
|---|
| 2440 | 1054 | bio_list_add(¤t->bio_list[0], bio); |
|---|
| 2441 | | - goto out; |
|---|
| 2442 | | - } |
|---|
| 2443 | | - |
|---|
| 2444 | | - /* following loop may be a bit non-obvious, and so deserves some |
|---|
| 2445 | | - * explanation. |
|---|
| 2446 | | - * Before entering the loop, bio->bi_next is NULL (as all callers |
|---|
| 2447 | | - * ensure that) so we have a list with a single bio. |
|---|
| 2448 | | - * We pretend that we have just taken it off a longer list, so |
|---|
| 2449 | | - * we assign bio_list to a pointer to the bio_list_on_stack, |
|---|
| 2450 | | - * thus initialising the bio_list of new bios to be |
|---|
| 2451 | | - * added. ->make_request() may indeed add some more bios |
|---|
| 2452 | | - * through a recursive call to generic_make_request. If it |
|---|
| 2453 | | - * did, we find a non-NULL value in bio_list and re-enter the loop |
|---|
| 2454 | | - * from the top. In this case we really did just take the bio |
|---|
| 2455 | | - * of the top of the list (no pretending) and so remove it from |
|---|
| 2456 | | - * bio_list, and call into ->make_request() again. |
|---|
| 2457 | | - */ |
|---|
| 2458 | | - BUG_ON(bio->bi_next); |
|---|
| 2459 | | - bio_list_init(&bio_list_on_stack[0]); |
|---|
| 2460 | | - current->bio_list = bio_list_on_stack; |
|---|
| 2461 | | - do { |
|---|
| 2462 | | - bool enter_succeeded = true; |
|---|
| 2463 | | - |
|---|
| 2464 | | - if (unlikely(q != bio->bi_disk->queue)) { |
|---|
| 2465 | | - if (q) |
|---|
| 2466 | | - blk_queue_exit(q); |
|---|
| 2467 | | - q = bio->bi_disk->queue; |
|---|
| 2468 | | - flags = 0; |
|---|
| 2469 | | - if (bio->bi_opf & REQ_NOWAIT) |
|---|
| 2470 | | - flags = BLK_MQ_REQ_NOWAIT; |
|---|
| 2471 | | - if (blk_queue_enter(q, flags) < 0) |
|---|
| 2472 | | - enter_succeeded = false; |
|---|
| 2473 | | - } |
|---|
| 2474 | | - |
|---|
| 2475 | | - if (enter_succeeded) { |
|---|
| 2476 | | - struct bio_list lower, same; |
|---|
| 2477 | | - |
|---|
| 2478 | | - /* Create a fresh bio_list for all subordinate requests */ |
|---|
| 2479 | | - bio_list_on_stack[1] = bio_list_on_stack[0]; |
|---|
| 2480 | | - bio_list_init(&bio_list_on_stack[0]); |
|---|
| 2481 | | - |
|---|
| 2482 | | - if (!blk_crypto_submit_bio(&bio)) |
|---|
| 2483 | | - ret = q->make_request_fn(q, bio); |
|---|
| 2484 | | - |
|---|
| 2485 | | - /* sort new bios into those for a lower level |
|---|
| 2486 | | - * and those for the same level |
|---|
| 2487 | | - */ |
|---|
| 2488 | | - bio_list_init(&lower); |
|---|
| 2489 | | - bio_list_init(&same); |
|---|
| 2490 | | - while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) |
|---|
| 2491 | | - if (q == bio->bi_disk->queue) |
|---|
| 2492 | | - bio_list_add(&same, bio); |
|---|
| 2493 | | - else |
|---|
| 2494 | | - bio_list_add(&lower, bio); |
|---|
| 2495 | | - /* now assemble so we handle the lowest level first */ |
|---|
| 2496 | | - bio_list_merge(&bio_list_on_stack[0], &lower); |
|---|
| 2497 | | - bio_list_merge(&bio_list_on_stack[0], &same); |
|---|
| 2498 | | - bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); |
|---|
| 2499 | | - } else { |
|---|
| 2500 | | - if (unlikely(!blk_queue_dying(q) && |
|---|
| 2501 | | - (bio->bi_opf & REQ_NOWAIT))) |
|---|
| 2502 | | - bio_wouldblock_error(bio); |
|---|
| 2503 | | - else |
|---|
| 2504 | | - bio_io_error(bio); |
|---|
| 2505 | | - q = NULL; |
|---|
| 2506 | | - } |
|---|
| 2507 | | - bio = bio_list_pop(&bio_list_on_stack[0]); |
|---|
| 2508 | | - } while (bio); |
|---|
| 2509 | | - current->bio_list = NULL; /* deactivate */ |
|---|
| 2510 | | - |
|---|
| 2511 | | -out: |
|---|
| 2512 | | - if (q) |
|---|
| 2513 | | - blk_queue_exit(q); |
|---|
| 2514 | | - return ret; |
|---|
| 2515 | | -} |
|---|
| 2516 | | -EXPORT_SYMBOL(generic_make_request); |
|---|
| 2517 | | - |
|---|
| 2518 | | -/** |
|---|
| 2519 | | - * direct_make_request - hand a buffer directly to its device driver for I/O |
|---|
| 2520 | | - * @bio: The bio describing the location in memory and on the device. |
|---|
| 2521 | | - * |
|---|
| 2522 | | - * This function behaves like generic_make_request(), but does not protect |
|---|
| 2523 | | - * against recursion. Must only be used if the called driver is known |
|---|
| 2524 | | - * to not call generic_make_request (or direct_make_request) again from |
|---|
| 2525 | | - * its make_request function. (Calling direct_make_request again from |
|---|
| 2526 | | - * a workqueue is perfectly fine as that doesn't recurse). |
|---|
| 2527 | | - */ |
|---|
| 2528 | | -blk_qc_t direct_make_request(struct bio *bio) |
|---|
| 2529 | | -{ |
|---|
| 2530 | | - struct request_queue *q = bio->bi_disk->queue; |
|---|
| 2531 | | - bool nowait = bio->bi_opf & REQ_NOWAIT; |
|---|
| 2532 | | - blk_qc_t ret = BLK_QC_T_NONE; |
|---|
| 2533 | | - |
|---|
| 2534 | | - if (!generic_make_request_checks(bio)) |
|---|
| 2535 | | - return BLK_QC_T_NONE; |
|---|
| 2536 | | - |
|---|
| 2537 | | - if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) { |
|---|
| 2538 | | - if (nowait && !blk_queue_dying(q)) |
|---|
| 2539 | | - bio->bi_status = BLK_STS_AGAIN; |
|---|
| 2540 | | - else |
|---|
| 2541 | | - bio->bi_status = BLK_STS_IOERR; |
|---|
| 2542 | | - bio_endio(bio); |
|---|
| 2543 | 1055 | return BLK_QC_T_NONE; |
|---|
| 2544 | 1056 | } |
|---|
| 2545 | 1057 | |
|---|
| 2546 | | - if (!blk_crypto_submit_bio(&bio)) |
|---|
| 2547 | | - ret = q->make_request_fn(q, bio); |
|---|
| 2548 | | - blk_queue_exit(q); |
|---|
| 2549 | | - return ret; |
|---|
| 1058 | + if (!bio->bi_disk->fops->submit_bio) |
|---|
| 1059 | + return __submit_bio_noacct_mq(bio); |
|---|
| 1060 | + return __submit_bio_noacct(bio); |
|---|
| 2550 | 1061 | } |
|---|
| 2551 | | -EXPORT_SYMBOL_GPL(direct_make_request); |
|---|
| 1062 | +EXPORT_SYMBOL(submit_bio_noacct); |
|---|
| 2552 | 1063 | |
|---|
| 2553 | 1064 | /** |
|---|
| 2554 | 1065 | * submit_bio - submit a bio to the block device layer for I/O |
|---|
| 2555 | 1066 | * @bio: The &struct bio which describes the I/O |
|---|
| 2556 | 1067 | * |
|---|
| 2557 | | - * submit_bio() is very similar in purpose to generic_make_request(), and |
|---|
| 2558 | | - * uses that function to do most of the work. Both are fairly rough |
|---|
| 2559 | | - * interfaces; @bio must be presetup and ready for I/O. |
|---|
| 1068 | + * submit_bio() is used to submit I/O requests to block devices. It is passed a |
|---|
| 1069 | + * fully set up &struct bio that describes the I/O that needs to be done. The |
|---|
| 1070 | + * bio will be send to the device described by the bi_disk and bi_partno fields. |
|---|
| 2560 | 1071 | * |
|---|
| 1072 | + * The success/failure status of the request, along with notification of |
|---|
| 1073 | + * completion, is delivered asynchronously through the ->bi_end_io() callback |
|---|
| 1074 | + * in @bio. The bio must NOT be touched by thecaller until ->bi_end_io() has |
|---|
| 1075 | + * been called. |
|---|
| 2561 | 1076 | */ |
|---|
| 2562 | 1077 | blk_qc_t submit_bio(struct bio *bio) |
|---|
| 2563 | 1078 | { |
|---|
| 2564 | | - bool workingset_read = false; |
|---|
| 2565 | | - unsigned long pflags; |
|---|
| 2566 | | - blk_qc_t ret; |
|---|
| 1079 | + if (blkcg_punt_bio_submit(bio)) |
|---|
| 1080 | + return BLK_QC_T_NONE; |
|---|
| 2567 | 1081 | |
|---|
| 2568 | 1082 | /* |
|---|
| 2569 | 1083 | * If it's a regular read/write or a barrier with data attached, |
|---|
| .. | .. |
|---|
| 2580 | 1094 | if (op_is_write(bio_op(bio))) { |
|---|
| 2581 | 1095 | count_vm_events(PGPGOUT, count); |
|---|
| 2582 | 1096 | } else { |
|---|
| 2583 | | - if (bio_flagged(bio, BIO_WORKINGSET)) |
|---|
| 2584 | | - workingset_read = true; |
|---|
| 2585 | 1097 | task_io_account_read(bio->bi_iter.bi_size); |
|---|
| 2586 | 1098 | count_vm_events(PGPGIN, count); |
|---|
| 2587 | 1099 | } |
|---|
| .. | .. |
|---|
| 2597 | 1109 | } |
|---|
| 2598 | 1110 | |
|---|
| 2599 | 1111 | /* |
|---|
| 2600 | | - * If we're reading data that is part of the userspace |
|---|
| 2601 | | - * workingset, count submission time as memory stall. When the |
|---|
| 2602 | | - * device is congested, or the submitting cgroup IO-throttled, |
|---|
| 2603 | | - * submission can be a significant part of overall IO time. |
|---|
| 1112 | + * If we're reading data that is part of the userspace workingset, count |
|---|
| 1113 | + * submission time as memory stall. When the device is congested, or |
|---|
| 1114 | + * the submitting cgroup IO-throttled, submission can be a significant |
|---|
| 1115 | + * part of overall IO time. |
|---|
| 2604 | 1116 | */ |
|---|
| 2605 | | - if (workingset_read) |
|---|
| 1117 | + if (unlikely(bio_op(bio) == REQ_OP_READ && |
|---|
| 1118 | + bio_flagged(bio, BIO_WORKINGSET))) { |
|---|
| 1119 | + unsigned long pflags; |
|---|
| 1120 | + blk_qc_t ret; |
|---|
| 1121 | + |
|---|
| 2606 | 1122 | psi_memstall_enter(&pflags); |
|---|
| 2607 | | - |
|---|
| 2608 | | - ret = generic_make_request(bio); |
|---|
| 2609 | | - |
|---|
| 2610 | | - if (workingset_read) |
|---|
| 1123 | + ret = submit_bio_noacct(bio); |
|---|
| 2611 | 1124 | psi_memstall_leave(&pflags); |
|---|
| 2612 | 1125 | |
|---|
| 2613 | | - return ret; |
|---|
| 1126 | + return ret; |
|---|
| 1127 | + } |
|---|
| 1128 | + |
|---|
| 1129 | + return submit_bio_noacct(bio); |
|---|
| 2614 | 1130 | } |
|---|
| 2615 | 1131 | EXPORT_SYMBOL(submit_bio); |
|---|
| 2616 | 1132 | |
|---|
| 2617 | | -bool blk_poll(struct request_queue *q, blk_qc_t cookie) |
|---|
| 2618 | | -{ |
|---|
| 2619 | | - if (!q->poll_fn || !blk_qc_t_valid(cookie)) |
|---|
| 2620 | | - return false; |
|---|
| 2621 | | - |
|---|
| 2622 | | - if (current->plug) |
|---|
| 2623 | | - blk_flush_plug_list(current->plug, false); |
|---|
| 2624 | | - return q->poll_fn(q, cookie); |
|---|
| 2625 | | -} |
|---|
| 2626 | | -EXPORT_SYMBOL_GPL(blk_poll); |
|---|
| 2627 | | - |
|---|
| 2628 | 1133 | /** |
|---|
| 2629 | 1134 | * blk_cloned_rq_check_limits - Helper function to check a cloned request |
|---|
| 2630 | | - * for new the queue limits |
|---|
| 1135 | + * for the new queue limits |
|---|
| 2631 | 1136 | * @q: the queue |
|---|
| 2632 | 1137 | * @rq: the request being checked |
|---|
| 2633 | 1138 | * |
|---|
| .. | .. |
|---|
| 2642 | 1147 | * limits when retrying requests on other queues. Those requests need |
|---|
| 2643 | 1148 | * to be checked against the new queue limits again during dispatch. |
|---|
| 2644 | 1149 | */ |
|---|
| 2645 | | -static int blk_cloned_rq_check_limits(struct request_queue *q, |
|---|
| 1150 | +static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q, |
|---|
| 2646 | 1151 | struct request *rq) |
|---|
| 2647 | 1152 | { |
|---|
| 2648 | | - if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) { |
|---|
| 2649 | | - printk(KERN_ERR "%s: over max size limit.\n", __func__); |
|---|
| 2650 | | - return -EIO; |
|---|
| 1153 | + unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq)); |
|---|
| 1154 | + |
|---|
| 1155 | + if (blk_rq_sectors(rq) > max_sectors) { |
|---|
| 1156 | + /* |
|---|
| 1157 | + * SCSI device does not have a good way to return if |
|---|
| 1158 | + * Write Same/Zero is actually supported. If a device rejects |
|---|
| 1159 | + * a non-read/write command (discard, write same,etc.) the |
|---|
| 1160 | + * low-level device driver will set the relevant queue limit to |
|---|
| 1161 | + * 0 to prevent blk-lib from issuing more of the offending |
|---|
| 1162 | + * operations. Commands queued prior to the queue limit being |
|---|
| 1163 | + * reset need to be completed with BLK_STS_NOTSUPP to avoid I/O |
|---|
| 1164 | + * errors being propagated to upper layers. |
|---|
| 1165 | + */ |
|---|
| 1166 | + if (max_sectors == 0) |
|---|
| 1167 | + return BLK_STS_NOTSUPP; |
|---|
| 1168 | + |
|---|
| 1169 | + printk(KERN_ERR "%s: over max size limit. (%u > %u)\n", |
|---|
| 1170 | + __func__, blk_rq_sectors(rq), max_sectors); |
|---|
| 1171 | + return BLK_STS_IOERR; |
|---|
| 2651 | 1172 | } |
|---|
| 2652 | 1173 | |
|---|
| 2653 | 1174 | /* |
|---|
| .. | .. |
|---|
| 2656 | 1177 | * Recalculate it to check the request correctly on this queue's |
|---|
| 2657 | 1178 | * limitation. |
|---|
| 2658 | 1179 | */ |
|---|
| 2659 | | - blk_recalc_rq_segments(rq); |
|---|
| 1180 | + rq->nr_phys_segments = blk_recalc_rq_segments(rq); |
|---|
| 2660 | 1181 | if (rq->nr_phys_segments > queue_max_segments(q)) { |
|---|
| 2661 | | - printk(KERN_ERR "%s: over max segments limit.\n", __func__); |
|---|
| 2662 | | - return -EIO; |
|---|
| 1182 | + printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n", |
|---|
| 1183 | + __func__, rq->nr_phys_segments, queue_max_segments(q)); |
|---|
| 1184 | + return BLK_STS_IOERR; |
|---|
| 2663 | 1185 | } |
|---|
| 2664 | 1186 | |
|---|
| 2665 | | - return 0; |
|---|
| 1187 | + return BLK_STS_OK; |
|---|
| 2666 | 1188 | } |
|---|
| 2667 | 1189 | |
|---|
| 2668 | 1190 | /** |
|---|
| .. | .. |
|---|
| 2672 | 1194 | */ |
|---|
| 2673 | 1195 | blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq) |
|---|
| 2674 | 1196 | { |
|---|
| 2675 | | - unsigned long flags; |
|---|
| 2676 | | - int where = ELEVATOR_INSERT_BACK; |
|---|
| 1197 | + blk_status_t ret; |
|---|
| 2677 | 1198 | |
|---|
| 2678 | | - if (blk_cloned_rq_check_limits(q, rq)) |
|---|
| 2679 | | - return BLK_STS_IOERR; |
|---|
| 1199 | + ret = blk_cloned_rq_check_limits(q, rq); |
|---|
| 1200 | + if (ret != BLK_STS_OK) |
|---|
| 1201 | + return ret; |
|---|
| 2680 | 1202 | |
|---|
| 2681 | 1203 | if (rq->rq_disk && |
|---|
| 2682 | 1204 | should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) |
|---|
| 2683 | 1205 | return BLK_STS_IOERR; |
|---|
| 2684 | 1206 | |
|---|
| 2685 | | - if (q->mq_ops) { |
|---|
| 2686 | | - if (blk_queue_io_stat(q)) |
|---|
| 2687 | | - blk_account_io_start(rq, true); |
|---|
| 2688 | | - /* |
|---|
| 2689 | | - * Since we have a scheduler attached on the top device, |
|---|
| 2690 | | - * bypass a potential scheduler on the bottom device for |
|---|
| 2691 | | - * insert. |
|---|
| 2692 | | - */ |
|---|
| 2693 | | - return blk_mq_request_issue_directly(rq); |
|---|
| 2694 | | - } |
|---|
| 2695 | | - |
|---|
| 2696 | | - spin_lock_irqsave(q->queue_lock, flags); |
|---|
| 2697 | | - if (unlikely(blk_queue_dying(q))) { |
|---|
| 2698 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
|---|
| 1207 | + if (blk_crypto_insert_cloned_request(rq)) |
|---|
| 2699 | 1208 | return BLK_STS_IOERR; |
|---|
| 2700 | | - } |
|---|
| 1209 | + |
|---|
| 1210 | + if (blk_queue_io_stat(q)) |
|---|
| 1211 | + blk_account_io_start(rq); |
|---|
| 2701 | 1212 | |
|---|
| 2702 | 1213 | /* |
|---|
| 2703 | | - * Submitting request must be dequeued before calling this function |
|---|
| 2704 | | - * because it will be linked to another request_queue |
|---|
| 1214 | + * Since we have a scheduler attached on the top device, |
|---|
| 1215 | + * bypass a potential scheduler on the bottom device for |
|---|
| 1216 | + * insert. |
|---|
| 2705 | 1217 | */ |
|---|
| 2706 | | - BUG_ON(blk_queued_rq(rq)); |
|---|
| 2707 | | - |
|---|
| 2708 | | - if (op_is_flush(rq->cmd_flags)) |
|---|
| 2709 | | - where = ELEVATOR_INSERT_FLUSH; |
|---|
| 2710 | | - |
|---|
| 2711 | | - add_acct_request(q, rq, where); |
|---|
| 2712 | | - if (where == ELEVATOR_INSERT_FLUSH) |
|---|
| 2713 | | - __blk_run_queue(q); |
|---|
| 2714 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
|---|
| 2715 | | - |
|---|
| 2716 | | - return BLK_STS_OK; |
|---|
| 1218 | + return blk_mq_request_issue_directly(rq, true); |
|---|
| 2717 | 1219 | } |
|---|
| 2718 | 1220 | EXPORT_SYMBOL_GPL(blk_insert_cloned_request); |
|---|
| 2719 | 1221 | |
|---|
| .. | .. |
|---|
| 2758 | 1260 | } |
|---|
| 2759 | 1261 | EXPORT_SYMBOL_GPL(blk_rq_err_bytes); |
|---|
| 2760 | 1262 | |
|---|
| 2761 | | -void blk_account_io_completion(struct request *req, unsigned int bytes) |
|---|
| 1263 | +static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) |
|---|
| 2762 | 1264 | { |
|---|
| 2763 | | - if (blk_do_io_stat(req)) { |
|---|
| 1265 | + unsigned long stamp; |
|---|
| 1266 | +again: |
|---|
| 1267 | + stamp = READ_ONCE(part->stamp); |
|---|
| 1268 | + if (unlikely(stamp != now)) { |
|---|
| 1269 | + if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) |
|---|
| 1270 | + __part_stat_add(part, io_ticks, end ? now - stamp : 1); |
|---|
| 1271 | + } |
|---|
| 1272 | + if (part->partno) { |
|---|
| 1273 | + part = &part_to_disk(part)->part0; |
|---|
| 1274 | + goto again; |
|---|
| 1275 | + } |
|---|
| 1276 | +} |
|---|
| 1277 | + |
|---|
| 1278 | +static void blk_account_io_completion(struct request *req, unsigned int bytes) |
|---|
| 1279 | +{ |
|---|
| 1280 | + if (req->part && blk_do_io_stat(req)) { |
|---|
| 2764 | 1281 | const int sgrp = op_stat_group(req_op(req)); |
|---|
| 2765 | 1282 | struct hd_struct *part; |
|---|
| 2766 | | - int cpu; |
|---|
| 2767 | 1283 | |
|---|
| 2768 | | - cpu = part_stat_lock(); |
|---|
| 1284 | + part_stat_lock(); |
|---|
| 2769 | 1285 | part = req->part; |
|---|
| 2770 | | - part_stat_add(cpu, part, sectors[sgrp], bytes >> 9); |
|---|
| 1286 | + part_stat_add(part, sectors[sgrp], bytes >> 9); |
|---|
| 2771 | 1287 | part_stat_unlock(); |
|---|
| 2772 | 1288 | } |
|---|
| 2773 | 1289 | } |
|---|
| .. | .. |
|---|
| 2779 | 1295 | * normal IO on queueing nor completion. Accounting the |
|---|
| 2780 | 1296 | * containing request is enough. |
|---|
| 2781 | 1297 | */ |
|---|
| 2782 | | - if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { |
|---|
| 1298 | + if (req->part && blk_do_io_stat(req) && |
|---|
| 1299 | + !(req->rq_flags & RQF_FLUSH_SEQ)) { |
|---|
| 2783 | 1300 | const int sgrp = op_stat_group(req_op(req)); |
|---|
| 2784 | 1301 | struct hd_struct *part; |
|---|
| 2785 | | - int cpu; |
|---|
| 2786 | 1302 | |
|---|
| 2787 | | - cpu = part_stat_lock(); |
|---|
| 1303 | + part_stat_lock(); |
|---|
| 2788 | 1304 | part = req->part; |
|---|
| 2789 | 1305 | |
|---|
| 2790 | | - part_stat_inc(cpu, part, ios[sgrp]); |
|---|
| 2791 | | - part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns); |
|---|
| 2792 | | - part_round_stats(req->q, cpu, part); |
|---|
| 2793 | | - part_dec_in_flight(req->q, part, rq_data_dir(req)); |
|---|
| 1306 | + update_io_ticks(part, jiffies, true); |
|---|
| 1307 | + part_stat_inc(part, ios[sgrp]); |
|---|
| 1308 | + part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); |
|---|
| 1309 | + part_stat_unlock(); |
|---|
| 2794 | 1310 | |
|---|
| 2795 | 1311 | hd_struct_put(part); |
|---|
| 2796 | | - part_stat_unlock(); |
|---|
| 2797 | 1312 | } |
|---|
| 2798 | 1313 | } |
|---|
| 2799 | 1314 | |
|---|
| 2800 | | -#ifdef CONFIG_PM |
|---|
| 2801 | | -/* |
|---|
| 2802 | | - * Don't process normal requests when queue is suspended |
|---|
| 2803 | | - * or in the process of suspending/resuming |
|---|
| 2804 | | - */ |
|---|
| 2805 | | -static bool blk_pm_allow_request(struct request *rq) |
|---|
| 1315 | +void blk_account_io_start(struct request *rq) |
|---|
| 2806 | 1316 | { |
|---|
| 2807 | | - switch (rq->q->rpm_status) { |
|---|
| 2808 | | - case RPM_RESUMING: |
|---|
| 2809 | | - case RPM_SUSPENDING: |
|---|
| 2810 | | - return rq->rq_flags & RQF_PM; |
|---|
| 2811 | | - case RPM_SUSPENDED: |
|---|
| 2812 | | - return false; |
|---|
| 2813 | | - default: |
|---|
| 2814 | | - return true; |
|---|
| 2815 | | - } |
|---|
| 2816 | | -} |
|---|
| 2817 | | -#else |
|---|
| 2818 | | -static bool blk_pm_allow_request(struct request *rq) |
|---|
| 2819 | | -{ |
|---|
| 2820 | | - return true; |
|---|
| 2821 | | -} |
|---|
| 2822 | | -#endif |
|---|
| 2823 | | - |
|---|
| 2824 | | -void blk_account_io_start(struct request *rq, bool new_io) |
|---|
| 2825 | | -{ |
|---|
| 2826 | | - struct hd_struct *part; |
|---|
| 2827 | | - int rw = rq_data_dir(rq); |
|---|
| 2828 | | - int cpu; |
|---|
| 2829 | | - |
|---|
| 2830 | 1317 | if (!blk_do_io_stat(rq)) |
|---|
| 2831 | 1318 | return; |
|---|
| 2832 | 1319 | |
|---|
| 2833 | | - cpu = part_stat_lock(); |
|---|
| 1320 | + rq->part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); |
|---|
| 2834 | 1321 | |
|---|
| 2835 | | - if (!new_io) { |
|---|
| 2836 | | - part = rq->part; |
|---|
| 2837 | | - part_stat_inc(cpu, part, merges[rw]); |
|---|
| 2838 | | - } else { |
|---|
| 2839 | | - part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); |
|---|
| 2840 | | - if (!hd_struct_try_get(part)) { |
|---|
| 2841 | | - /* |
|---|
| 2842 | | - * The partition is already being removed, |
|---|
| 2843 | | - * the request will be accounted on the disk only |
|---|
| 2844 | | - * |
|---|
| 2845 | | - * We take a reference on disk->part0 although that |
|---|
| 2846 | | - * partition will never be deleted, so we can treat |
|---|
| 2847 | | - * it as any other partition. |
|---|
| 2848 | | - */ |
|---|
| 2849 | | - part = &rq->rq_disk->part0; |
|---|
| 2850 | | - hd_struct_get(part); |
|---|
| 2851 | | - } |
|---|
| 2852 | | - part_round_stats(rq->q, cpu, part); |
|---|
| 2853 | | - part_inc_in_flight(rq->q, part, rw); |
|---|
| 2854 | | - rq->part = part; |
|---|
| 2855 | | - } |
|---|
| 2856 | | - |
|---|
| 1322 | + part_stat_lock(); |
|---|
| 1323 | + update_io_ticks(rq->part, jiffies, false); |
|---|
| 2857 | 1324 | part_stat_unlock(); |
|---|
| 2858 | 1325 | } |
|---|
| 2859 | 1326 | |
|---|
| 2860 | | -static struct request *elv_next_request(struct request_queue *q) |
|---|
| 1327 | +static unsigned long __part_start_io_acct(struct hd_struct *part, |
|---|
| 1328 | + unsigned int sectors, unsigned int op) |
|---|
| 2861 | 1329 | { |
|---|
| 2862 | | - struct request *rq; |
|---|
| 2863 | | - struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); |
|---|
| 1330 | + const int sgrp = op_stat_group(op); |
|---|
| 1331 | + unsigned long now = READ_ONCE(jiffies); |
|---|
| 2864 | 1332 | |
|---|
| 2865 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 1333 | + part_stat_lock(); |
|---|
| 1334 | + update_io_ticks(part, now, false); |
|---|
| 1335 | + part_stat_inc(part, ios[sgrp]); |
|---|
| 1336 | + part_stat_add(part, sectors[sgrp], sectors); |
|---|
| 1337 | + part_stat_local_inc(part, in_flight[op_is_write(op)]); |
|---|
| 1338 | + part_stat_unlock(); |
|---|
| 2866 | 1339 | |
|---|
| 2867 | | - while (1) { |
|---|
| 2868 | | - list_for_each_entry(rq, &q->queue_head, queuelist) { |
|---|
| 2869 | | - if (blk_pm_allow_request(rq)) |
|---|
| 2870 | | - return rq; |
|---|
| 2871 | | - |
|---|
| 2872 | | - if (rq->rq_flags & RQF_SOFTBARRIER) |
|---|
| 2873 | | - break; |
|---|
| 2874 | | - } |
|---|
| 2875 | | - |
|---|
| 2876 | | - /* |
|---|
| 2877 | | - * Flush request is running and flush request isn't queueable |
|---|
| 2878 | | - * in the drive, we can hold the queue till flush request is |
|---|
| 2879 | | - * finished. Even we don't do this, driver can't dispatch next |
|---|
| 2880 | | - * requests and will requeue them. And this can improve |
|---|
| 2881 | | - * throughput too. For example, we have request flush1, write1, |
|---|
| 2882 | | - * flush 2. flush1 is dispatched, then queue is hold, write1 |
|---|
| 2883 | | - * isn't inserted to queue. After flush1 is finished, flush2 |
|---|
| 2884 | | - * will be dispatched. Since disk cache is already clean, |
|---|
| 2885 | | - * flush2 will be finished very soon, so looks like flush2 is |
|---|
| 2886 | | - * folded to flush1. |
|---|
| 2887 | | - * Since the queue is hold, a flag is set to indicate the queue |
|---|
| 2888 | | - * should be restarted later. Please see flush_end_io() for |
|---|
| 2889 | | - * details. |
|---|
| 2890 | | - */ |
|---|
| 2891 | | - if (fq->flush_pending_idx != fq->flush_running_idx && |
|---|
| 2892 | | - !queue_flush_queueable(q)) { |
|---|
| 2893 | | - fq->flush_queue_delayed = 1; |
|---|
| 2894 | | - return NULL; |
|---|
| 2895 | | - } |
|---|
| 2896 | | - if (unlikely(blk_queue_bypass(q)) || |
|---|
| 2897 | | - !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0)) |
|---|
| 2898 | | - return NULL; |
|---|
| 2899 | | - } |
|---|
| 1340 | + return now; |
|---|
| 2900 | 1341 | } |
|---|
| 2901 | 1342 | |
|---|
| 2902 | | -/** |
|---|
| 2903 | | - * blk_peek_request - peek at the top of a request queue |
|---|
| 2904 | | - * @q: request queue to peek at |
|---|
| 2905 | | - * |
|---|
| 2906 | | - * Description: |
|---|
| 2907 | | - * Return the request at the top of @q. The returned request |
|---|
| 2908 | | - * should be started using blk_start_request() before LLD starts |
|---|
| 2909 | | - * processing it. |
|---|
| 2910 | | - * |
|---|
| 2911 | | - * Return: |
|---|
| 2912 | | - * Pointer to the request at the top of @q if available. Null |
|---|
| 2913 | | - * otherwise. |
|---|
| 2914 | | - */ |
|---|
| 2915 | | -struct request *blk_peek_request(struct request_queue *q) |
|---|
| 1343 | +unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, |
|---|
| 1344 | + struct bio *bio) |
|---|
| 2916 | 1345 | { |
|---|
| 2917 | | - struct request *rq; |
|---|
| 2918 | | - int ret; |
|---|
| 1346 | + *part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector); |
|---|
| 2919 | 1347 | |
|---|
| 2920 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 2921 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 2922 | | - |
|---|
| 2923 | | - while ((rq = elv_next_request(q)) != NULL) { |
|---|
| 2924 | | - if (!(rq->rq_flags & RQF_STARTED)) { |
|---|
| 2925 | | - /* |
|---|
| 2926 | | - * This is the first time the device driver |
|---|
| 2927 | | - * sees this request (possibly after |
|---|
| 2928 | | - * requeueing). Notify IO scheduler. |
|---|
| 2929 | | - */ |
|---|
| 2930 | | - if (rq->rq_flags & RQF_SORTED) |
|---|
| 2931 | | - elv_activate_rq(q, rq); |
|---|
| 2932 | | - |
|---|
| 2933 | | - /* |
|---|
| 2934 | | - * just mark as started even if we don't start |
|---|
| 2935 | | - * it, a request that has been delayed should |
|---|
| 2936 | | - * not be passed by new incoming requests |
|---|
| 2937 | | - */ |
|---|
| 2938 | | - rq->rq_flags |= RQF_STARTED; |
|---|
| 2939 | | - trace_block_rq_issue(q, rq); |
|---|
| 2940 | | - } |
|---|
| 2941 | | - |
|---|
| 2942 | | - if (!q->boundary_rq || q->boundary_rq == rq) { |
|---|
| 2943 | | - q->end_sector = rq_end_sector(rq); |
|---|
| 2944 | | - q->boundary_rq = NULL; |
|---|
| 2945 | | - } |
|---|
| 2946 | | - |
|---|
| 2947 | | - if (rq->rq_flags & RQF_DONTPREP) |
|---|
| 2948 | | - break; |
|---|
| 2949 | | - |
|---|
| 2950 | | - if (q->dma_drain_size && blk_rq_bytes(rq)) { |
|---|
| 2951 | | - /* |
|---|
| 2952 | | - * make sure space for the drain appears we |
|---|
| 2953 | | - * know we can do this because max_hw_segments |
|---|
| 2954 | | - * has been adjusted to be one fewer than the |
|---|
| 2955 | | - * device can handle |
|---|
| 2956 | | - */ |
|---|
| 2957 | | - rq->nr_phys_segments++; |
|---|
| 2958 | | - } |
|---|
| 2959 | | - |
|---|
| 2960 | | - if (!q->prep_rq_fn) |
|---|
| 2961 | | - break; |
|---|
| 2962 | | - |
|---|
| 2963 | | - ret = q->prep_rq_fn(q, rq); |
|---|
| 2964 | | - if (ret == BLKPREP_OK) { |
|---|
| 2965 | | - break; |
|---|
| 2966 | | - } else if (ret == BLKPREP_DEFER) { |
|---|
| 2967 | | - /* |
|---|
| 2968 | | - * the request may have been (partially) prepped. |
|---|
| 2969 | | - * we need to keep this request in the front to |
|---|
| 2970 | | - * avoid resource deadlock. RQF_STARTED will |
|---|
| 2971 | | - * prevent other fs requests from passing this one. |
|---|
| 2972 | | - */ |
|---|
| 2973 | | - if (q->dma_drain_size && blk_rq_bytes(rq) && |
|---|
| 2974 | | - !(rq->rq_flags & RQF_DONTPREP)) { |
|---|
| 2975 | | - /* |
|---|
| 2976 | | - * remove the space for the drain we added |
|---|
| 2977 | | - * so that we don't add it again |
|---|
| 2978 | | - */ |
|---|
| 2979 | | - --rq->nr_phys_segments; |
|---|
| 2980 | | - } |
|---|
| 2981 | | - |
|---|
| 2982 | | - rq = NULL; |
|---|
| 2983 | | - break; |
|---|
| 2984 | | - } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) { |
|---|
| 2985 | | - rq->rq_flags |= RQF_QUIET; |
|---|
| 2986 | | - /* |
|---|
| 2987 | | - * Mark this request as started so we don't trigger |
|---|
| 2988 | | - * any debug logic in the end I/O path. |
|---|
| 2989 | | - */ |
|---|
| 2990 | | - blk_start_request(rq); |
|---|
| 2991 | | - __blk_end_request_all(rq, ret == BLKPREP_INVALID ? |
|---|
| 2992 | | - BLK_STS_TARGET : BLK_STS_IOERR); |
|---|
| 2993 | | - } else { |
|---|
| 2994 | | - printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); |
|---|
| 2995 | | - break; |
|---|
| 2996 | | - } |
|---|
| 2997 | | - } |
|---|
| 2998 | | - |
|---|
| 2999 | | - return rq; |
|---|
| 1348 | + return __part_start_io_acct(*part, bio_sectors(bio), bio_op(bio)); |
|---|
| 3000 | 1349 | } |
|---|
| 3001 | | -EXPORT_SYMBOL(blk_peek_request); |
|---|
| 1350 | +EXPORT_SYMBOL_GPL(part_start_io_acct); |
|---|
| 3002 | 1351 | |
|---|
| 3003 | | -static void blk_dequeue_request(struct request *rq) |
|---|
| 1352 | +unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, |
|---|
| 1353 | + unsigned int op) |
|---|
| 3004 | 1354 | { |
|---|
| 3005 | | - struct request_queue *q = rq->q; |
|---|
| 1355 | + return __part_start_io_acct(&disk->part0, sectors, op); |
|---|
| 1356 | +} |
|---|
| 1357 | +EXPORT_SYMBOL(disk_start_io_acct); |
|---|
| 3006 | 1358 | |
|---|
| 3007 | | - BUG_ON(list_empty(&rq->queuelist)); |
|---|
| 3008 | | - BUG_ON(ELV_ON_HASH(rq)); |
|---|
| 1359 | +static void __part_end_io_acct(struct hd_struct *part, unsigned int op, |
|---|
| 1360 | + unsigned long start_time) |
|---|
| 1361 | +{ |
|---|
| 1362 | + const int sgrp = op_stat_group(op); |
|---|
| 1363 | + unsigned long now = READ_ONCE(jiffies); |
|---|
| 1364 | + unsigned long duration = now - start_time; |
|---|
| 3009 | 1365 | |
|---|
| 3010 | | - list_del_init(&rq->queuelist); |
|---|
| 3011 | | - |
|---|
| 3012 | | - /* |
|---|
| 3013 | | - * the time frame between a request being removed from the lists |
|---|
| 3014 | | - * and to it is freed is accounted as io that is in progress at |
|---|
| 3015 | | - * the driver side. |
|---|
| 3016 | | - */ |
|---|
| 3017 | | - if (blk_account_rq(rq)) |
|---|
| 3018 | | - q->in_flight[rq_is_sync(rq)]++; |
|---|
| 1366 | + part_stat_lock(); |
|---|
| 1367 | + update_io_ticks(part, now, true); |
|---|
| 1368 | + part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); |
|---|
| 1369 | + part_stat_local_dec(part, in_flight[op_is_write(op)]); |
|---|
| 1370 | + part_stat_unlock(); |
|---|
| 3019 | 1371 | } |
|---|
| 3020 | 1372 | |
|---|
| 3021 | | -/** |
|---|
| 3022 | | - * blk_start_request - start request processing on the driver |
|---|
| 3023 | | - * @req: request to dequeue |
|---|
| 3024 | | - * |
|---|
| 3025 | | - * Description: |
|---|
| 3026 | | - * Dequeue @req and start timeout timer on it. This hands off the |
|---|
| 3027 | | - * request to the driver. |
|---|
| 3028 | | - */ |
|---|
| 3029 | | -void blk_start_request(struct request *req) |
|---|
| 1373 | +void part_end_io_acct(struct hd_struct *part, struct bio *bio, |
|---|
| 1374 | + unsigned long start_time) |
|---|
| 3030 | 1375 | { |
|---|
| 3031 | | - lockdep_assert_held(req->q->queue_lock); |
|---|
| 3032 | | - WARN_ON_ONCE(req->q->mq_ops); |
|---|
| 3033 | | - |
|---|
| 3034 | | - blk_dequeue_request(req); |
|---|
| 3035 | | - |
|---|
| 3036 | | - if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) { |
|---|
| 3037 | | - req->io_start_time_ns = ktime_get_ns(); |
|---|
| 3038 | | -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW |
|---|
| 3039 | | - req->throtl_size = blk_rq_sectors(req); |
|---|
| 3040 | | -#endif |
|---|
| 3041 | | - req->rq_flags |= RQF_STATS; |
|---|
| 3042 | | - rq_qos_issue(req->q, req); |
|---|
| 3043 | | - } |
|---|
| 3044 | | - |
|---|
| 3045 | | - BUG_ON(blk_rq_is_complete(req)); |
|---|
| 3046 | | - blk_add_timer(req); |
|---|
| 1376 | + __part_end_io_acct(part, bio_op(bio), start_time); |
|---|
| 1377 | + hd_struct_put(part); |
|---|
| 3047 | 1378 | } |
|---|
| 3048 | | -EXPORT_SYMBOL(blk_start_request); |
|---|
| 1379 | +EXPORT_SYMBOL_GPL(part_end_io_acct); |
|---|
| 3049 | 1380 | |
|---|
| 3050 | | -/** |
|---|
| 3051 | | - * blk_fetch_request - fetch a request from a request queue |
|---|
| 3052 | | - * @q: request queue to fetch a request from |
|---|
| 3053 | | - * |
|---|
| 3054 | | - * Description: |
|---|
| 3055 | | - * Return the request at the top of @q. The request is started on |
|---|
| 3056 | | - * return and LLD can start processing it immediately. |
|---|
| 3057 | | - * |
|---|
| 3058 | | - * Return: |
|---|
| 3059 | | - * Pointer to the request at the top of @q if available. Null |
|---|
| 3060 | | - * otherwise. |
|---|
| 3061 | | - */ |
|---|
| 3062 | | -struct request *blk_fetch_request(struct request_queue *q) |
|---|
| 1381 | +void disk_end_io_acct(struct gendisk *disk, unsigned int op, |
|---|
| 1382 | + unsigned long start_time) |
|---|
| 3063 | 1383 | { |
|---|
| 3064 | | - struct request *rq; |
|---|
| 3065 | | - |
|---|
| 3066 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 3067 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 3068 | | - |
|---|
| 3069 | | - rq = blk_peek_request(q); |
|---|
| 3070 | | - if (rq) |
|---|
| 3071 | | - blk_start_request(rq); |
|---|
| 3072 | | - return rq; |
|---|
| 1384 | + __part_end_io_acct(&disk->part0, op, start_time); |
|---|
| 3073 | 1385 | } |
|---|
| 3074 | | -EXPORT_SYMBOL(blk_fetch_request); |
|---|
| 1386 | +EXPORT_SYMBOL(disk_end_io_acct); |
|---|
| 3075 | 1387 | |
|---|
| 3076 | 1388 | /* |
|---|
| 3077 | 1389 | * Steal bios from a request and add them to a bio list. |
|---|
| .. | .. |
|---|
| 3107 | 1419 | * |
|---|
| 3108 | 1420 | * This special helper function is only for request stacking drivers |
|---|
| 3109 | 1421 | * (e.g. request-based dm) so that they can handle partial completion. |
|---|
| 3110 | | - * Actual device drivers should use blk_end_request instead. |
|---|
| 1422 | + * Actual device drivers should use blk_mq_end_request instead. |
|---|
| 3111 | 1423 | * |
|---|
| 3112 | 1424 | * Passing the result of blk_rq_bytes() as @nr_bytes guarantees |
|---|
| 3113 | 1425 | * %false return from this function. |
|---|
| .. | .. |
|---|
| 3130 | 1442 | if (!req->bio) |
|---|
| 3131 | 1443 | return false; |
|---|
| 3132 | 1444 | |
|---|
| 1445 | +#ifdef CONFIG_BLK_DEV_INTEGRITY |
|---|
| 1446 | + if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ && |
|---|
| 1447 | + error == BLK_STS_OK) |
|---|
| 1448 | + req->q->integrity.profile->complete_fn(req, nr_bytes); |
|---|
| 1449 | +#endif |
|---|
| 1450 | + |
|---|
| 1451 | + /* |
|---|
| 1452 | + * Upper layers may call blk_crypto_evict_key() anytime after the last |
|---|
| 1453 | + * bio_endio(). Therefore, the keyslot must be released before that. |
|---|
| 1454 | + */ |
|---|
| 1455 | + if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req)) |
|---|
| 1456 | + __blk_crypto_rq_put_keyslot(req); |
|---|
| 1457 | + |
|---|
| 3133 | 1458 | if (unlikely(error && !blk_rq_is_passthrough(req) && |
|---|
| 3134 | 1459 | !(req->rq_flags & RQF_QUIET))) |
|---|
| 3135 | | - print_req_error(req, error); |
|---|
| 1460 | + print_req_error(req, error, __func__); |
|---|
| 3136 | 1461 | |
|---|
| 3137 | 1462 | blk_account_io_completion(req, nr_bytes); |
|---|
| 3138 | 1463 | |
|---|
| .. | .. |
|---|
| 3191 | 1516 | } |
|---|
| 3192 | 1517 | |
|---|
| 3193 | 1518 | /* recalculate the number of segments */ |
|---|
| 3194 | | - blk_recalc_rq_segments(req); |
|---|
| 1519 | + req->nr_phys_segments = blk_recalc_rq_segments(req); |
|---|
| 3195 | 1520 | } |
|---|
| 3196 | 1521 | |
|---|
| 3197 | 1522 | return true; |
|---|
| 3198 | 1523 | } |
|---|
| 3199 | 1524 | EXPORT_SYMBOL_GPL(blk_update_request); |
|---|
| 3200 | | - |
|---|
| 3201 | | -static bool blk_update_bidi_request(struct request *rq, blk_status_t error, |
|---|
| 3202 | | - unsigned int nr_bytes, |
|---|
| 3203 | | - unsigned int bidi_bytes) |
|---|
| 3204 | | -{ |
|---|
| 3205 | | - if (blk_update_request(rq, error, nr_bytes)) |
|---|
| 3206 | | - return true; |
|---|
| 3207 | | - |
|---|
| 3208 | | - /* Bidi request must be completed as a whole */ |
|---|
| 3209 | | - if (unlikely(blk_bidi_rq(rq)) && |
|---|
| 3210 | | - blk_update_request(rq->next_rq, error, bidi_bytes)) |
|---|
| 3211 | | - return true; |
|---|
| 3212 | | - |
|---|
| 3213 | | - if (blk_queue_add_random(rq->q)) |
|---|
| 3214 | | - add_disk_randomness(rq->rq_disk); |
|---|
| 3215 | | - |
|---|
| 3216 | | - return false; |
|---|
| 3217 | | -} |
|---|
| 3218 | | - |
|---|
| 3219 | | -/** |
|---|
| 3220 | | - * blk_unprep_request - unprepare a request |
|---|
| 3221 | | - * @req: the request |
|---|
| 3222 | | - * |
|---|
| 3223 | | - * This function makes a request ready for complete resubmission (or |
|---|
| 3224 | | - * completion). It happens only after all error handling is complete, |
|---|
| 3225 | | - * so represents the appropriate moment to deallocate any resources |
|---|
| 3226 | | - * that were allocated to the request in the prep_rq_fn. The queue |
|---|
| 3227 | | - * lock is held when calling this. |
|---|
| 3228 | | - */ |
|---|
| 3229 | | -void blk_unprep_request(struct request *req) |
|---|
| 3230 | | -{ |
|---|
| 3231 | | - struct request_queue *q = req->q; |
|---|
| 3232 | | - |
|---|
| 3233 | | - req->rq_flags &= ~RQF_DONTPREP; |
|---|
| 3234 | | - if (q->unprep_rq_fn) |
|---|
| 3235 | | - q->unprep_rq_fn(q, req); |
|---|
| 3236 | | -} |
|---|
| 3237 | | -EXPORT_SYMBOL_GPL(blk_unprep_request); |
|---|
| 3238 | | - |
|---|
| 3239 | | -void blk_finish_request(struct request *req, blk_status_t error) |
|---|
| 3240 | | -{ |
|---|
| 3241 | | - struct request_queue *q = req->q; |
|---|
| 3242 | | - u64 now = ktime_get_ns(); |
|---|
| 3243 | | - |
|---|
| 3244 | | - lockdep_assert_held(req->q->queue_lock); |
|---|
| 3245 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 3246 | | - |
|---|
| 3247 | | - if (req->rq_flags & RQF_STATS) |
|---|
| 3248 | | - blk_stat_add(req, now); |
|---|
| 3249 | | - |
|---|
| 3250 | | - if (req->rq_flags & RQF_QUEUED) |
|---|
| 3251 | | - blk_queue_end_tag(q, req); |
|---|
| 3252 | | - |
|---|
| 3253 | | - BUG_ON(blk_queued_rq(req)); |
|---|
| 3254 | | - |
|---|
| 3255 | | - if (unlikely(laptop_mode) && !blk_rq_is_passthrough(req)) |
|---|
| 3256 | | - laptop_io_completion(req->q->backing_dev_info); |
|---|
| 3257 | | - |
|---|
| 3258 | | - blk_delete_timer(req); |
|---|
| 3259 | | - |
|---|
| 3260 | | - if (req->rq_flags & RQF_DONTPREP) |
|---|
| 3261 | | - blk_unprep_request(req); |
|---|
| 3262 | | - |
|---|
| 3263 | | - blk_account_io_done(req, now); |
|---|
| 3264 | | - |
|---|
| 3265 | | - if (req->end_io) { |
|---|
| 3266 | | - rq_qos_done(q, req); |
|---|
| 3267 | | - req->end_io(req, error); |
|---|
| 3268 | | - } else { |
|---|
| 3269 | | - if (blk_bidi_rq(req)) |
|---|
| 3270 | | - __blk_put_request(req->next_rq->q, req->next_rq); |
|---|
| 3271 | | - |
|---|
| 3272 | | - __blk_put_request(q, req); |
|---|
| 3273 | | - } |
|---|
| 3274 | | -} |
|---|
| 3275 | | -EXPORT_SYMBOL(blk_finish_request); |
|---|
| 3276 | | - |
|---|
| 3277 | | -/** |
|---|
| 3278 | | - * blk_end_bidi_request - Complete a bidi request |
|---|
| 3279 | | - * @rq: the request to complete |
|---|
| 3280 | | - * @error: block status code |
|---|
| 3281 | | - * @nr_bytes: number of bytes to complete @rq |
|---|
| 3282 | | - * @bidi_bytes: number of bytes to complete @rq->next_rq |
|---|
| 3283 | | - * |
|---|
| 3284 | | - * Description: |
|---|
| 3285 | | - * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. |
|---|
| 3286 | | - * Drivers that supports bidi can safely call this member for any |
|---|
| 3287 | | - * type of request, bidi or uni. In the later case @bidi_bytes is |
|---|
| 3288 | | - * just ignored. |
|---|
| 3289 | | - * |
|---|
| 3290 | | - * Return: |
|---|
| 3291 | | - * %false - we are done with this request |
|---|
| 3292 | | - * %true - still buffers pending for this request |
|---|
| 3293 | | - **/ |
|---|
| 3294 | | -static bool blk_end_bidi_request(struct request *rq, blk_status_t error, |
|---|
| 3295 | | - unsigned int nr_bytes, unsigned int bidi_bytes) |
|---|
| 3296 | | -{ |
|---|
| 3297 | | - struct request_queue *q = rq->q; |
|---|
| 3298 | | - unsigned long flags; |
|---|
| 3299 | | - |
|---|
| 3300 | | - WARN_ON_ONCE(q->mq_ops); |
|---|
| 3301 | | - |
|---|
| 3302 | | - if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) |
|---|
| 3303 | | - return true; |
|---|
| 3304 | | - |
|---|
| 3305 | | - spin_lock_irqsave(q->queue_lock, flags); |
|---|
| 3306 | | - blk_finish_request(rq, error); |
|---|
| 3307 | | - spin_unlock_irqrestore(q->queue_lock, flags); |
|---|
| 3308 | | - |
|---|
| 3309 | | - return false; |
|---|
| 3310 | | -} |
|---|
| 3311 | | - |
|---|
| 3312 | | -/** |
|---|
| 3313 | | - * __blk_end_bidi_request - Complete a bidi request with queue lock held |
|---|
| 3314 | | - * @rq: the request to complete |
|---|
| 3315 | | - * @error: block status code |
|---|
| 3316 | | - * @nr_bytes: number of bytes to complete @rq |
|---|
| 3317 | | - * @bidi_bytes: number of bytes to complete @rq->next_rq |
|---|
| 3318 | | - * |
|---|
| 3319 | | - * Description: |
|---|
| 3320 | | - * Identical to blk_end_bidi_request() except that queue lock is |
|---|
| 3321 | | - * assumed to be locked on entry and remains so on return. |
|---|
| 3322 | | - * |
|---|
| 3323 | | - * Return: |
|---|
| 3324 | | - * %false - we are done with this request |
|---|
| 3325 | | - * %true - still buffers pending for this request |
|---|
| 3326 | | - **/ |
|---|
| 3327 | | -static bool __blk_end_bidi_request(struct request *rq, blk_status_t error, |
|---|
| 3328 | | - unsigned int nr_bytes, unsigned int bidi_bytes) |
|---|
| 3329 | | -{ |
|---|
| 3330 | | - lockdep_assert_held(rq->q->queue_lock); |
|---|
| 3331 | | - WARN_ON_ONCE(rq->q->mq_ops); |
|---|
| 3332 | | - |
|---|
| 3333 | | - if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) |
|---|
| 3334 | | - return true; |
|---|
| 3335 | | - |
|---|
| 3336 | | - blk_finish_request(rq, error); |
|---|
| 3337 | | - |
|---|
| 3338 | | - return false; |
|---|
| 3339 | | -} |
|---|
| 3340 | | - |
|---|
| 3341 | | -/** |
|---|
| 3342 | | - * blk_end_request - Helper function for drivers to complete the request. |
|---|
| 3343 | | - * @rq: the request being processed |
|---|
| 3344 | | - * @error: block status code |
|---|
| 3345 | | - * @nr_bytes: number of bytes to complete |
|---|
| 3346 | | - * |
|---|
| 3347 | | - * Description: |
|---|
| 3348 | | - * Ends I/O on a number of bytes attached to @rq. |
|---|
| 3349 | | - * If @rq has leftover, sets it up for the next range of segments. |
|---|
| 3350 | | - * |
|---|
| 3351 | | - * Return: |
|---|
| 3352 | | - * %false - we are done with this request |
|---|
| 3353 | | - * %true - still buffers pending for this request |
|---|
| 3354 | | - **/ |
|---|
| 3355 | | -bool blk_end_request(struct request *rq, blk_status_t error, |
|---|
| 3356 | | - unsigned int nr_bytes) |
|---|
| 3357 | | -{ |
|---|
| 3358 | | - WARN_ON_ONCE(rq->q->mq_ops); |
|---|
| 3359 | | - return blk_end_bidi_request(rq, error, nr_bytes, 0); |
|---|
| 3360 | | -} |
|---|
| 3361 | | -EXPORT_SYMBOL(blk_end_request); |
|---|
| 3362 | | - |
|---|
| 3363 | | -/** |
|---|
| 3364 | | - * blk_end_request_all - Helper function for drives to finish the request. |
|---|
| 3365 | | - * @rq: the request to finish |
|---|
| 3366 | | - * @error: block status code |
|---|
| 3367 | | - * |
|---|
| 3368 | | - * Description: |
|---|
| 3369 | | - * Completely finish @rq. |
|---|
| 3370 | | - */ |
|---|
| 3371 | | -void blk_end_request_all(struct request *rq, blk_status_t error) |
|---|
| 3372 | | -{ |
|---|
| 3373 | | - bool pending; |
|---|
| 3374 | | - unsigned int bidi_bytes = 0; |
|---|
| 3375 | | - |
|---|
| 3376 | | - if (unlikely(blk_bidi_rq(rq))) |
|---|
| 3377 | | - bidi_bytes = blk_rq_bytes(rq->next_rq); |
|---|
| 3378 | | - |
|---|
| 3379 | | - pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); |
|---|
| 3380 | | - BUG_ON(pending); |
|---|
| 3381 | | -} |
|---|
| 3382 | | -EXPORT_SYMBOL(blk_end_request_all); |
|---|
| 3383 | | - |
|---|
| 3384 | | -/** |
|---|
| 3385 | | - * __blk_end_request - Helper function for drivers to complete the request. |
|---|
| 3386 | | - * @rq: the request being processed |
|---|
| 3387 | | - * @error: block status code |
|---|
| 3388 | | - * @nr_bytes: number of bytes to complete |
|---|
| 3389 | | - * |
|---|
| 3390 | | - * Description: |
|---|
| 3391 | | - * Must be called with queue lock held unlike blk_end_request(). |
|---|
| 3392 | | - * |
|---|
| 3393 | | - * Return: |
|---|
| 3394 | | - * %false - we are done with this request |
|---|
| 3395 | | - * %true - still buffers pending for this request |
|---|
| 3396 | | - **/ |
|---|
| 3397 | | -bool __blk_end_request(struct request *rq, blk_status_t error, |
|---|
| 3398 | | - unsigned int nr_bytes) |
|---|
| 3399 | | -{ |
|---|
| 3400 | | - lockdep_assert_held(rq->q->queue_lock); |
|---|
| 3401 | | - WARN_ON_ONCE(rq->q->mq_ops); |
|---|
| 3402 | | - |
|---|
| 3403 | | - return __blk_end_bidi_request(rq, error, nr_bytes, 0); |
|---|
| 3404 | | -} |
|---|
| 3405 | | -EXPORT_SYMBOL(__blk_end_request); |
|---|
| 3406 | | - |
|---|
| 3407 | | -/** |
|---|
| 3408 | | - * __blk_end_request_all - Helper function for drives to finish the request. |
|---|
| 3409 | | - * @rq: the request to finish |
|---|
| 3410 | | - * @error: block status code |
|---|
| 3411 | | - * |
|---|
| 3412 | | - * Description: |
|---|
| 3413 | | - * Completely finish @rq. Must be called with queue lock held. |
|---|
| 3414 | | - */ |
|---|
| 3415 | | -void __blk_end_request_all(struct request *rq, blk_status_t error) |
|---|
| 3416 | | -{ |
|---|
| 3417 | | - bool pending; |
|---|
| 3418 | | - unsigned int bidi_bytes = 0; |
|---|
| 3419 | | - |
|---|
| 3420 | | - lockdep_assert_held(rq->q->queue_lock); |
|---|
| 3421 | | - WARN_ON_ONCE(rq->q->mq_ops); |
|---|
| 3422 | | - |
|---|
| 3423 | | - if (unlikely(blk_bidi_rq(rq))) |
|---|
| 3424 | | - bidi_bytes = blk_rq_bytes(rq->next_rq); |
|---|
| 3425 | | - |
|---|
| 3426 | | - pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); |
|---|
| 3427 | | - BUG_ON(pending); |
|---|
| 3428 | | -} |
|---|
| 3429 | | -EXPORT_SYMBOL(__blk_end_request_all); |
|---|
| 3430 | | - |
|---|
| 3431 | | -/** |
|---|
| 3432 | | - * __blk_end_request_cur - Helper function to finish the current request chunk. |
|---|
| 3433 | | - * @rq: the request to finish the current chunk for |
|---|
| 3434 | | - * @error: block status code |
|---|
| 3435 | | - * |
|---|
| 3436 | | - * Description: |
|---|
| 3437 | | - * Complete the current consecutively mapped chunk from @rq. Must |
|---|
| 3438 | | - * be called with queue lock held. |
|---|
| 3439 | | - * |
|---|
| 3440 | | - * Return: |
|---|
| 3441 | | - * %false - we are done with this request |
|---|
| 3442 | | - * %true - still buffers pending for this request |
|---|
| 3443 | | - */ |
|---|
| 3444 | | -bool __blk_end_request_cur(struct request *rq, blk_status_t error) |
|---|
| 3445 | | -{ |
|---|
| 3446 | | - return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); |
|---|
| 3447 | | -} |
|---|
| 3448 | | -EXPORT_SYMBOL(__blk_end_request_cur); |
|---|
| 3449 | | - |
|---|
| 3450 | | -void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
|---|
| 3451 | | - struct bio *bio) |
|---|
| 3452 | | -{ |
|---|
| 3453 | | - if (bio_has_data(bio)) |
|---|
| 3454 | | - rq->nr_phys_segments = bio_phys_segments(q, bio); |
|---|
| 3455 | | - else if (bio_op(bio) == REQ_OP_DISCARD) |
|---|
| 3456 | | - rq->nr_phys_segments = 1; |
|---|
| 3457 | | - |
|---|
| 3458 | | - rq->__data_len = bio->bi_iter.bi_size; |
|---|
| 3459 | | - rq->bio = rq->biotail = bio; |
|---|
| 3460 | | - |
|---|
| 3461 | | - if (bio->bi_disk) |
|---|
| 3462 | | - rq->rq_disk = bio->bi_disk; |
|---|
| 3463 | | -} |
|---|
| 3464 | 1525 | |
|---|
| 3465 | 1526 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
|---|
| 3466 | 1527 | /** |
|---|
| .. | .. |
|---|
| 3502 | 1563 | */ |
|---|
| 3503 | 1564 | int blk_lld_busy(struct request_queue *q) |
|---|
| 3504 | 1565 | { |
|---|
| 3505 | | - if (q->lld_busy_fn) |
|---|
| 3506 | | - return q->lld_busy_fn(q); |
|---|
| 1566 | + if (queue_is_mq(q) && q->mq_ops->busy) |
|---|
| 1567 | + return q->mq_ops->busy(q); |
|---|
| 3507 | 1568 | |
|---|
| 3508 | 1569 | return 0; |
|---|
| 3509 | 1570 | } |
|---|
| .. | .. |
|---|
| 3528 | 1589 | } |
|---|
| 3529 | 1590 | EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); |
|---|
| 3530 | 1591 | |
|---|
| 3531 | | -/* |
|---|
| 3532 | | - * Copy attributes of the original request to the clone request. |
|---|
| 3533 | | - * The actual data parts (e.g. ->cmd, ->sense) are not copied. |
|---|
| 3534 | | - */ |
|---|
| 3535 | | -static void __blk_rq_prep_clone(struct request *dst, struct request *src) |
|---|
| 3536 | | -{ |
|---|
| 3537 | | - dst->cpu = src->cpu; |
|---|
| 3538 | | - dst->__sector = blk_rq_pos(src); |
|---|
| 3539 | | - dst->__data_len = blk_rq_bytes(src); |
|---|
| 3540 | | - if (src->rq_flags & RQF_SPECIAL_PAYLOAD) { |
|---|
| 3541 | | - dst->rq_flags |= RQF_SPECIAL_PAYLOAD; |
|---|
| 3542 | | - dst->special_vec = src->special_vec; |
|---|
| 3543 | | - } |
|---|
| 3544 | | - dst->nr_phys_segments = src->nr_phys_segments; |
|---|
| 3545 | | - dst->ioprio = src->ioprio; |
|---|
| 3546 | | - dst->extra_len = src->extra_len; |
|---|
| 3547 | | -} |
|---|
| 3548 | | - |
|---|
| 3549 | 1592 | /** |
|---|
| 3550 | 1593 | * blk_rq_prep_clone - Helper function to setup clone request |
|---|
| 3551 | 1594 | * @rq: the request to be setup |
|---|
| .. | .. |
|---|
| 3558 | 1601 | * |
|---|
| 3559 | 1602 | * Description: |
|---|
| 3560 | 1603 | * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. |
|---|
| 3561 | | - * The actual data parts of @rq_src (e.g. ->cmd, ->sense) |
|---|
| 3562 | | - * are not copied, and copying such parts is the caller's responsibility. |
|---|
| 3563 | 1604 | * Also, pages which the original bios are pointing to are not copied |
|---|
| 3564 | 1605 | * and the cloned bios just point same pages. |
|---|
| 3565 | 1606 | * So cloned bios must be completed before original bios, which means |
|---|
| .. | .. |
|---|
| 3586 | 1627 | if (rq->bio) { |
|---|
| 3587 | 1628 | rq->biotail->bi_next = bio; |
|---|
| 3588 | 1629 | rq->biotail = bio; |
|---|
| 3589 | | - } else |
|---|
| 1630 | + } else { |
|---|
| 3590 | 1631 | rq->bio = rq->biotail = bio; |
|---|
| 1632 | + } |
|---|
| 1633 | + bio = NULL; |
|---|
| 3591 | 1634 | } |
|---|
| 3592 | 1635 | |
|---|
| 3593 | | - __blk_rq_prep_clone(rq, rq_src); |
|---|
| 1636 | + /* Copy attributes of the original request to the clone request. */ |
|---|
| 1637 | + rq->__sector = blk_rq_pos(rq_src); |
|---|
| 1638 | + rq->__data_len = blk_rq_bytes(rq_src); |
|---|
| 1639 | + if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) { |
|---|
| 1640 | + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; |
|---|
| 1641 | + rq->special_vec = rq_src->special_vec; |
|---|
| 1642 | + } |
|---|
| 1643 | + rq->nr_phys_segments = rq_src->nr_phys_segments; |
|---|
| 1644 | + rq->ioprio = rq_src->ioprio; |
|---|
| 1645 | + |
|---|
| 1646 | + if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0) |
|---|
| 1647 | + goto free_and_out; |
|---|
| 3594 | 1648 | |
|---|
| 3595 | 1649 | return 0; |
|---|
| 3596 | 1650 | |
|---|
| .. | .. |
|---|
| 3609 | 1663 | } |
|---|
| 3610 | 1664 | EXPORT_SYMBOL(kblockd_schedule_work); |
|---|
| 3611 | 1665 | |
|---|
| 3612 | | -int kblockd_schedule_work_on(int cpu, struct work_struct *work) |
|---|
| 3613 | | -{ |
|---|
| 3614 | | - return queue_work_on(cpu, kblockd_workqueue, work); |
|---|
| 3615 | | -} |
|---|
| 3616 | | -EXPORT_SYMBOL(kblockd_schedule_work_on); |
|---|
| 3617 | | - |
|---|
| 3618 | 1666 | int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, |
|---|
| 3619 | 1667 | unsigned long delay) |
|---|
| 3620 | 1668 | { |
|---|
| .. | .. |
|---|
| 3627 | 1675 | * @plug: The &struct blk_plug that needs to be initialized |
|---|
| 3628 | 1676 | * |
|---|
| 3629 | 1677 | * Description: |
|---|
| 1678 | + * blk_start_plug() indicates to the block layer an intent by the caller |
|---|
| 1679 | + * to submit multiple I/O requests in a batch. The block layer may use |
|---|
| 1680 | + * this hint to defer submitting I/Os from the caller until blk_finish_plug() |
|---|
| 1681 | + * is called. However, the block layer may choose to submit requests |
|---|
| 1682 | + * before a call to blk_finish_plug() if the number of queued I/Os |
|---|
| 1683 | + * exceeds %BLK_MAX_REQUEST_COUNT, or if the size of the I/O is larger than |
|---|
| 1684 | + * %BLK_PLUG_FLUSH_SIZE. The queued I/Os may also be submitted early if |
|---|
| 1685 | + * the task schedules (see below). |
|---|
| 1686 | + * |
|---|
| 3630 | 1687 | * Tracking blk_plug inside the task_struct will help with auto-flushing the |
|---|
| 3631 | 1688 | * pending I/O should the task end up blocking between blk_start_plug() and |
|---|
| 3632 | 1689 | * blk_finish_plug(). This is important from a performance perspective, but |
|---|
| .. | .. |
|---|
| 3646 | 1703 | if (tsk->plug) |
|---|
| 3647 | 1704 | return; |
|---|
| 3648 | 1705 | |
|---|
| 3649 | | - INIT_LIST_HEAD(&plug->list); |
|---|
| 3650 | 1706 | INIT_LIST_HEAD(&plug->mq_list); |
|---|
| 3651 | 1707 | INIT_LIST_HEAD(&plug->cb_list); |
|---|
| 1708 | + plug->rq_count = 0; |
|---|
| 1709 | + plug->multiple_queues = false; |
|---|
| 1710 | + plug->nowait = false; |
|---|
| 1711 | + |
|---|
| 3652 | 1712 | /* |
|---|
| 3653 | 1713 | * Store ordering should not be needed here, since a potential |
|---|
| 3654 | 1714 | * preempt will imply a full memory barrier |
|---|
| .. | .. |
|---|
| 3656 | 1716 | tsk->plug = plug; |
|---|
| 3657 | 1717 | } |
|---|
| 3658 | 1718 | EXPORT_SYMBOL(blk_start_plug); |
|---|
| 3659 | | - |
|---|
| 3660 | | -static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) |
|---|
| 3661 | | -{ |
|---|
| 3662 | | - struct request *rqa = container_of(a, struct request, queuelist); |
|---|
| 3663 | | - struct request *rqb = container_of(b, struct request, queuelist); |
|---|
| 3664 | | - |
|---|
| 3665 | | - return !(rqa->q < rqb->q || |
|---|
| 3666 | | - (rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb))); |
|---|
| 3667 | | -} |
|---|
| 3668 | | - |
|---|
| 3669 | | -/* |
|---|
| 3670 | | - * If 'from_schedule' is true, then postpone the dispatch of requests |
|---|
| 3671 | | - * until a safe kblockd context. We due this to avoid accidental big |
|---|
| 3672 | | - * additional stack usage in driver dispatch, in places where the originally |
|---|
| 3673 | | - * plugger did not intend it. |
|---|
| 3674 | | - */ |
|---|
| 3675 | | -static void queue_unplugged(struct request_queue *q, unsigned int depth, |
|---|
| 3676 | | - bool from_schedule) |
|---|
| 3677 | | - __releases(q->queue_lock) |
|---|
| 3678 | | -{ |
|---|
| 3679 | | - lockdep_assert_held(q->queue_lock); |
|---|
| 3680 | | - |
|---|
| 3681 | | - trace_block_unplug(q, depth, !from_schedule); |
|---|
| 3682 | | - |
|---|
| 3683 | | - if (from_schedule) |
|---|
| 3684 | | - blk_run_queue_async(q); |
|---|
| 3685 | | - else |
|---|
| 3686 | | - __blk_run_queue(q); |
|---|
| 3687 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 3688 | | -} |
|---|
| 3689 | 1719 | |
|---|
| 3690 | 1720 | static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) |
|---|
| 3691 | 1721 | { |
|---|
| .. | .. |
|---|
| 3731 | 1761 | |
|---|
| 3732 | 1762 | void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
|---|
| 3733 | 1763 | { |
|---|
| 3734 | | - struct request_queue *q; |
|---|
| 3735 | | - struct request *rq; |
|---|
| 3736 | | - LIST_HEAD(list); |
|---|
| 3737 | | - unsigned int depth; |
|---|
| 3738 | | - |
|---|
| 3739 | 1764 | flush_plug_callbacks(plug, from_schedule); |
|---|
| 3740 | 1765 | |
|---|
| 3741 | 1766 | if (!list_empty(&plug->mq_list)) |
|---|
| 3742 | 1767 | blk_mq_flush_plug_list(plug, from_schedule); |
|---|
| 3743 | | - |
|---|
| 3744 | | - if (list_empty(&plug->list)) |
|---|
| 3745 | | - return; |
|---|
| 3746 | | - |
|---|
| 3747 | | - list_splice_init(&plug->list, &list); |
|---|
| 3748 | | - |
|---|
| 3749 | | - list_sort(NULL, &list, plug_rq_cmp); |
|---|
| 3750 | | - |
|---|
| 3751 | | - q = NULL; |
|---|
| 3752 | | - depth = 0; |
|---|
| 3753 | | - |
|---|
| 3754 | | - while (!list_empty(&list)) { |
|---|
| 3755 | | - rq = list_entry_rq(list.next); |
|---|
| 3756 | | - list_del_init(&rq->queuelist); |
|---|
| 3757 | | - BUG_ON(!rq->q); |
|---|
| 3758 | | - if (rq->q != q) { |
|---|
| 3759 | | - /* |
|---|
| 3760 | | - * This drops the queue lock |
|---|
| 3761 | | - */ |
|---|
| 3762 | | - if (q) |
|---|
| 3763 | | - queue_unplugged(q, depth, from_schedule); |
|---|
| 3764 | | - q = rq->q; |
|---|
| 3765 | | - depth = 0; |
|---|
| 3766 | | - spin_lock_irq(q->queue_lock); |
|---|
| 3767 | | - } |
|---|
| 3768 | | - |
|---|
| 3769 | | - /* |
|---|
| 3770 | | - * Short-circuit if @q is dead |
|---|
| 3771 | | - */ |
|---|
| 3772 | | - if (unlikely(blk_queue_dying(q))) { |
|---|
| 3773 | | - __blk_end_request_all(rq, BLK_STS_IOERR); |
|---|
| 3774 | | - continue; |
|---|
| 3775 | | - } |
|---|
| 3776 | | - |
|---|
| 3777 | | - /* |
|---|
| 3778 | | - * rq is already accounted, so use raw insert |
|---|
| 3779 | | - */ |
|---|
| 3780 | | - if (op_is_flush(rq->cmd_flags)) |
|---|
| 3781 | | - __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); |
|---|
| 3782 | | - else |
|---|
| 3783 | | - __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); |
|---|
| 3784 | | - |
|---|
| 3785 | | - depth++; |
|---|
| 3786 | | - } |
|---|
| 3787 | | - |
|---|
| 3788 | | - /* |
|---|
| 3789 | | - * This drops the queue lock |
|---|
| 3790 | | - */ |
|---|
| 3791 | | - if (q) |
|---|
| 3792 | | - queue_unplugged(q, depth, from_schedule); |
|---|
| 3793 | 1768 | } |
|---|
| 3794 | 1769 | |
|---|
| 1770 | +/** |
|---|
| 1771 | + * blk_finish_plug - mark the end of a batch of submitted I/O |
|---|
| 1772 | + * @plug: The &struct blk_plug passed to blk_start_plug() |
|---|
| 1773 | + * |
|---|
| 1774 | + * Description: |
|---|
| 1775 | + * Indicate that a batch of I/O submissions is complete. This function |
|---|
| 1776 | + * must be paired with an initial call to blk_start_plug(). The intent |
|---|
| 1777 | + * is to allow the block layer to optimize I/O submission. See the |
|---|
| 1778 | + * documentation for blk_start_plug() for more information. |
|---|
| 1779 | + */ |
|---|
| 3795 | 1780 | void blk_finish_plug(struct blk_plug *plug) |
|---|
| 3796 | 1781 | { |
|---|
| 3797 | 1782 | if (plug != current->plug) |
|---|
| .. | .. |
|---|
| 3802 | 1787 | } |
|---|
| 3803 | 1788 | EXPORT_SYMBOL(blk_finish_plug); |
|---|
| 3804 | 1789 | |
|---|
| 3805 | | -#ifdef CONFIG_PM |
|---|
| 3806 | | -/** |
|---|
| 3807 | | - * blk_pm_runtime_init - Block layer runtime PM initialization routine |
|---|
| 3808 | | - * @q: the queue of the device |
|---|
| 3809 | | - * @dev: the device the queue belongs to |
|---|
| 3810 | | - * |
|---|
| 3811 | | - * Description: |
|---|
| 3812 | | - * Initialize runtime-PM-related fields for @q and start auto suspend for |
|---|
| 3813 | | - * @dev. Drivers that want to take advantage of request-based runtime PM |
|---|
| 3814 | | - * should call this function after @dev has been initialized, and its |
|---|
| 3815 | | - * request queue @q has been allocated, and runtime PM for it can not happen |
|---|
| 3816 | | - * yet(either due to disabled/forbidden or its usage_count > 0). In most |
|---|
| 3817 | | - * cases, driver should call this function before any I/O has taken place. |
|---|
| 3818 | | - * |
|---|
| 3819 | | - * This function takes care of setting up using auto suspend for the device, |
|---|
| 3820 | | - * the autosuspend delay is set to -1 to make runtime suspend impossible |
|---|
| 3821 | | - * until an updated value is either set by user or by driver. Drivers do |
|---|
| 3822 | | - * not need to touch other autosuspend settings. |
|---|
| 3823 | | - * |
|---|
| 3824 | | - * The block layer runtime PM is request based, so only works for drivers |
|---|
| 3825 | | - * that use request as their IO unit instead of those directly use bio's. |
|---|
| 3826 | | - */ |
|---|
| 3827 | | -void blk_pm_runtime_init(struct request_queue *q, struct device *dev) |
|---|
| 1790 | +void blk_io_schedule(void) |
|---|
| 3828 | 1791 | { |
|---|
| 3829 | | - /* Don't enable runtime PM for blk-mq until it is ready */ |
|---|
| 3830 | | - if (q->mq_ops) { |
|---|
| 3831 | | - pm_runtime_disable(dev); |
|---|
| 3832 | | - return; |
|---|
| 3833 | | - } |
|---|
| 1792 | + /* Prevent hang_check timer from firing at us during very long I/O */ |
|---|
| 1793 | + unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; |
|---|
| 3834 | 1794 | |
|---|
| 3835 | | - q->dev = dev; |
|---|
| 3836 | | - q->rpm_status = RPM_ACTIVE; |
|---|
| 3837 | | - pm_runtime_set_autosuspend_delay(q->dev, -1); |
|---|
| 3838 | | - pm_runtime_use_autosuspend(q->dev); |
|---|
| 1795 | + if (timeout) |
|---|
| 1796 | + io_schedule_timeout(timeout); |
|---|
| 1797 | + else |
|---|
| 1798 | + io_schedule(); |
|---|
| 3839 | 1799 | } |
|---|
| 3840 | | -EXPORT_SYMBOL(blk_pm_runtime_init); |
|---|
| 3841 | | - |
|---|
| 3842 | | -/** |
|---|
| 3843 | | - * blk_pre_runtime_suspend - Pre runtime suspend check |
|---|
| 3844 | | - * @q: the queue of the device |
|---|
| 3845 | | - * |
|---|
| 3846 | | - * Description: |
|---|
| 3847 | | - * This function will check if runtime suspend is allowed for the device |
|---|
| 3848 | | - * by examining if there are any requests pending in the queue. If there |
|---|
| 3849 | | - * are requests pending, the device can not be runtime suspended; otherwise, |
|---|
| 3850 | | - * the queue's status will be updated to SUSPENDING and the driver can |
|---|
| 3851 | | - * proceed to suspend the device. |
|---|
| 3852 | | - * |
|---|
| 3853 | | - * For the not allowed case, we mark last busy for the device so that |
|---|
| 3854 | | - * runtime PM core will try to autosuspend it some time later. |
|---|
| 3855 | | - * |
|---|
| 3856 | | - * This function should be called near the start of the device's |
|---|
| 3857 | | - * runtime_suspend callback. |
|---|
| 3858 | | - * |
|---|
| 3859 | | - * Return: |
|---|
| 3860 | | - * 0 - OK to runtime suspend the device |
|---|
| 3861 | | - * -EBUSY - Device should not be runtime suspended |
|---|
| 3862 | | - */ |
|---|
| 3863 | | -int blk_pre_runtime_suspend(struct request_queue *q) |
|---|
| 3864 | | -{ |
|---|
| 3865 | | - int ret = 0; |
|---|
| 3866 | | - |
|---|
| 3867 | | - if (!q->dev) |
|---|
| 3868 | | - return ret; |
|---|
| 3869 | | - |
|---|
| 3870 | | - spin_lock_irq(q->queue_lock); |
|---|
| 3871 | | - if (q->nr_pending) { |
|---|
| 3872 | | - ret = -EBUSY; |
|---|
| 3873 | | - pm_runtime_mark_last_busy(q->dev); |
|---|
| 3874 | | - } else { |
|---|
| 3875 | | - q->rpm_status = RPM_SUSPENDING; |
|---|
| 3876 | | - } |
|---|
| 3877 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 3878 | | - return ret; |
|---|
| 3879 | | -} |
|---|
| 3880 | | -EXPORT_SYMBOL(blk_pre_runtime_suspend); |
|---|
| 3881 | | - |
|---|
| 3882 | | -/** |
|---|
| 3883 | | - * blk_post_runtime_suspend - Post runtime suspend processing |
|---|
| 3884 | | - * @q: the queue of the device |
|---|
| 3885 | | - * @err: return value of the device's runtime_suspend function |
|---|
| 3886 | | - * |
|---|
| 3887 | | - * Description: |
|---|
| 3888 | | - * Update the queue's runtime status according to the return value of the |
|---|
| 3889 | | - * device's runtime suspend function and mark last busy for the device so |
|---|
| 3890 | | - * that PM core will try to auto suspend the device at a later time. |
|---|
| 3891 | | - * |
|---|
| 3892 | | - * This function should be called near the end of the device's |
|---|
| 3893 | | - * runtime_suspend callback. |
|---|
| 3894 | | - */ |
|---|
| 3895 | | -void blk_post_runtime_suspend(struct request_queue *q, int err) |
|---|
| 3896 | | -{ |
|---|
| 3897 | | - if (!q->dev) |
|---|
| 3898 | | - return; |
|---|
| 3899 | | - |
|---|
| 3900 | | - spin_lock_irq(q->queue_lock); |
|---|
| 3901 | | - if (!err) { |
|---|
| 3902 | | - q->rpm_status = RPM_SUSPENDED; |
|---|
| 3903 | | - } else { |
|---|
| 3904 | | - q->rpm_status = RPM_ACTIVE; |
|---|
| 3905 | | - pm_runtime_mark_last_busy(q->dev); |
|---|
| 3906 | | - } |
|---|
| 3907 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 3908 | | -} |
|---|
| 3909 | | -EXPORT_SYMBOL(blk_post_runtime_suspend); |
|---|
| 3910 | | - |
|---|
| 3911 | | -/** |
|---|
| 3912 | | - * blk_pre_runtime_resume - Pre runtime resume processing |
|---|
| 3913 | | - * @q: the queue of the device |
|---|
| 3914 | | - * |
|---|
| 3915 | | - * Description: |
|---|
| 3916 | | - * Update the queue's runtime status to RESUMING in preparation for the |
|---|
| 3917 | | - * runtime resume of the device. |
|---|
| 3918 | | - * |
|---|
| 3919 | | - * This function should be called near the start of the device's |
|---|
| 3920 | | - * runtime_resume callback. |
|---|
| 3921 | | - */ |
|---|
| 3922 | | -void blk_pre_runtime_resume(struct request_queue *q) |
|---|
| 3923 | | -{ |
|---|
| 3924 | | - if (!q->dev) |
|---|
| 3925 | | - return; |
|---|
| 3926 | | - |
|---|
| 3927 | | - spin_lock_irq(q->queue_lock); |
|---|
| 3928 | | - q->rpm_status = RPM_RESUMING; |
|---|
| 3929 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 3930 | | -} |
|---|
| 3931 | | -EXPORT_SYMBOL(blk_pre_runtime_resume); |
|---|
| 3932 | | - |
|---|
| 3933 | | -/** |
|---|
| 3934 | | - * blk_post_runtime_resume - Post runtime resume processing |
|---|
| 3935 | | - * @q: the queue of the device |
|---|
| 3936 | | - * @err: return value of the device's runtime_resume function |
|---|
| 3937 | | - * |
|---|
| 3938 | | - * Description: |
|---|
| 3939 | | - * Update the queue's runtime status according to the return value of the |
|---|
| 3940 | | - * device's runtime_resume function. If it is successfully resumed, process |
|---|
| 3941 | | - * the requests that are queued into the device's queue when it is resuming |
|---|
| 3942 | | - * and then mark last busy and initiate autosuspend for it. |
|---|
| 3943 | | - * |
|---|
| 3944 | | - * This function should be called near the end of the device's |
|---|
| 3945 | | - * runtime_resume callback. |
|---|
| 3946 | | - */ |
|---|
| 3947 | | -void blk_post_runtime_resume(struct request_queue *q, int err) |
|---|
| 3948 | | -{ |
|---|
| 3949 | | - if (!q->dev) |
|---|
| 3950 | | - return; |
|---|
| 3951 | | - |
|---|
| 3952 | | - spin_lock_irq(q->queue_lock); |
|---|
| 3953 | | - if (!err) { |
|---|
| 3954 | | - q->rpm_status = RPM_ACTIVE; |
|---|
| 3955 | | - __blk_run_queue(q); |
|---|
| 3956 | | - pm_runtime_mark_last_busy(q->dev); |
|---|
| 3957 | | - pm_request_autosuspend(q->dev); |
|---|
| 3958 | | - } else { |
|---|
| 3959 | | - q->rpm_status = RPM_SUSPENDED; |
|---|
| 3960 | | - } |
|---|
| 3961 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 3962 | | -} |
|---|
| 3963 | | -EXPORT_SYMBOL(blk_post_runtime_resume); |
|---|
| 3964 | | - |
|---|
| 3965 | | -/** |
|---|
| 3966 | | - * blk_set_runtime_active - Force runtime status of the queue to be active |
|---|
| 3967 | | - * @q: the queue of the device |
|---|
| 3968 | | - * |
|---|
| 3969 | | - * If the device is left runtime suspended during system suspend the resume |
|---|
| 3970 | | - * hook typically resumes the device and corrects runtime status |
|---|
| 3971 | | - * accordingly. However, that does not affect the queue runtime PM status |
|---|
| 3972 | | - * which is still "suspended". This prevents processing requests from the |
|---|
| 3973 | | - * queue. |
|---|
| 3974 | | - * |
|---|
| 3975 | | - * This function can be used in driver's resume hook to correct queue |
|---|
| 3976 | | - * runtime PM status and re-enable peeking requests from the queue. It |
|---|
| 3977 | | - * should be called before first request is added to the queue. |
|---|
| 3978 | | - */ |
|---|
| 3979 | | -void blk_set_runtime_active(struct request_queue *q) |
|---|
| 3980 | | -{ |
|---|
| 3981 | | - spin_lock_irq(q->queue_lock); |
|---|
| 3982 | | - q->rpm_status = RPM_ACTIVE; |
|---|
| 3983 | | - pm_runtime_mark_last_busy(q->dev); |
|---|
| 3984 | | - pm_request_autosuspend(q->dev); |
|---|
| 3985 | | - spin_unlock_irq(q->queue_lock); |
|---|
| 3986 | | -} |
|---|
| 3987 | | -EXPORT_SYMBOL(blk_set_runtime_active); |
|---|
| 3988 | | -#endif |
|---|
| 1800 | +EXPORT_SYMBOL_GPL(blk_io_schedule); |
|---|
| 3989 | 1801 | |
|---|
| 3990 | 1802 | int __init blk_dev_init(void) |
|---|
| 3991 | 1803 | { |
|---|
| 3992 | 1804 | BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS)); |
|---|
| 3993 | 1805 | BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * |
|---|
| 3994 | | - FIELD_SIZEOF(struct request, cmd_flags)); |
|---|
| 1806 | + sizeof_field(struct request, cmd_flags)); |
|---|
| 3995 | 1807 | BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * |
|---|
| 3996 | | - FIELD_SIZEOF(struct bio, bi_opf)); |
|---|
| 1808 | + sizeof_field(struct bio, bi_opf)); |
|---|
| 3997 | 1809 | |
|---|
| 3998 | 1810 | /* used for unplugging and affects IO latency/throughput - HIGHPRI */ |
|---|
| 3999 | 1811 | kblockd_workqueue = alloc_workqueue("kblockd", |
|---|
| .. | .. |
|---|
| 4001 | 1813 | if (!kblockd_workqueue) |
|---|
| 4002 | 1814 | panic("Failed to create kblockd\n"); |
|---|
| 4003 | 1815 | |
|---|
| 4004 | | - request_cachep = kmem_cache_create("blkdev_requests", |
|---|
| 4005 | | - sizeof(struct request), 0, SLAB_PANIC, NULL); |
|---|
| 4006 | | - |
|---|
| 4007 | 1816 | blk_requestq_cachep = kmem_cache_create("request_queue", |
|---|
| 4008 | 1817 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); |
|---|
| 4009 | 1818 | |
|---|
| 4010 | | -#ifdef CONFIG_DEBUG_FS |
|---|
| 4011 | 1819 | blk_debugfs_root = debugfs_create_dir("block", NULL); |
|---|
| 4012 | | -#endif |
|---|
| 4013 | | - |
|---|
| 4014 | | - if (bio_crypt_ctx_init() < 0) |
|---|
| 4015 | | - panic("Failed to allocate mem for bio crypt ctxs\n"); |
|---|
| 4016 | | - |
|---|
| 4017 | | - if (blk_crypto_fallback_init() < 0) |
|---|
| 4018 | | - panic("Failed to init blk-crypto-fallback\n"); |
|---|
| 4019 | 1820 | |
|---|
| 4020 | 1821 | return 0; |
|---|
| 4021 | 1822 | } |
|---|