| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * NVMe over Fabrics loopback device. |
|---|
| 3 | 4 | * Copyright (c) 2015-2016 HGST, a Western Digital Company. |
|---|
| 4 | | - * |
|---|
| 5 | | - * This program is free software; you can redistribute it and/or modify it |
|---|
| 6 | | - * under the terms and conditions of the GNU General Public License, |
|---|
| 7 | | - * version 2, as published by the Free Software Foundation. |
|---|
| 8 | | - * |
|---|
| 9 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
|---|
| 10 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|---|
| 11 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|---|
| 12 | | - * more details. |
|---|
| 13 | 5 | */ |
|---|
| 14 | 6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
|---|
| 15 | 7 | #include <linux/scatterlist.h> |
|---|
| .. | .. |
|---|
| 26 | 18 | struct nvme_loop_iod { |
|---|
| 27 | 19 | struct nvme_request nvme_req; |
|---|
| 28 | 20 | struct nvme_command cmd; |
|---|
| 29 | | - struct nvme_completion rsp; |
|---|
| 21 | + struct nvme_completion cqe; |
|---|
| 30 | 22 | struct nvmet_req req; |
|---|
| 31 | 23 | struct nvme_loop_queue *queue; |
|---|
| 32 | 24 | struct work_struct work; |
|---|
| .. | .. |
|---|
| 44 | 36 | struct nvme_loop_iod async_event_iod; |
|---|
| 45 | 37 | struct nvme_ctrl ctrl; |
|---|
| 46 | 38 | |
|---|
| 47 | | - struct nvmet_ctrl *target_ctrl; |
|---|
| 48 | 39 | struct nvmet_port *port; |
|---|
| 49 | 40 | }; |
|---|
| 50 | 41 | |
|---|
| .. | .. |
|---|
| 84 | 75 | { |
|---|
| 85 | 76 | struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); |
|---|
| 86 | 77 | |
|---|
| 87 | | - nvme_cleanup_cmd(req); |
|---|
| 88 | | - sg_free_table_chained(&iod->sg_table, true); |
|---|
| 78 | + sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT); |
|---|
| 89 | 79 | nvme_complete_rq(req); |
|---|
| 90 | 80 | } |
|---|
| 91 | 81 | |
|---|
| .. | .. |
|---|
| 102 | 92 | { |
|---|
| 103 | 93 | struct nvme_loop_queue *queue = |
|---|
| 104 | 94 | container_of(req->sq, struct nvme_loop_queue, nvme_sq); |
|---|
| 105 | | - struct nvme_completion *cqe = req->rsp; |
|---|
| 95 | + struct nvme_completion *cqe = req->cqe; |
|---|
| 106 | 96 | |
|---|
| 107 | 97 | /* |
|---|
| 108 | 98 | * AEN requests are special as they don't time out and can |
|---|
| .. | .. |
|---|
| 110 | 100 | * aborts. We don't even bother to allocate a struct request |
|---|
| 111 | 101 | * for them but rather special case them here. |
|---|
| 112 | 102 | */ |
|---|
| 113 | | - if (unlikely(nvme_loop_queue_idx(queue) == 0 && |
|---|
| 114 | | - cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) { |
|---|
| 103 | + if (unlikely(nvme_is_aen_req(nvme_loop_queue_idx(queue), |
|---|
| 104 | + cqe->command_id))) { |
|---|
| 115 | 105 | nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, |
|---|
| 116 | 106 | &cqe->result); |
|---|
| 117 | 107 | } else { |
|---|
| 118 | 108 | struct request *rq; |
|---|
| 119 | 109 | |
|---|
| 120 | | - rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id); |
|---|
| 110 | + rq = nvme_find_rq(nvme_loop_tagset(queue), cqe->command_id); |
|---|
| 121 | 111 | if (!rq) { |
|---|
| 122 | 112 | dev_err(queue->ctrl->ctrl.device, |
|---|
| 123 | | - "tag 0x%x on queue %d not found\n", |
|---|
| 113 | + "got bad command_id %#x on queue %d\n", |
|---|
| 124 | 114 | cqe->command_id, nvme_loop_queue_idx(queue)); |
|---|
| 125 | 115 | return; |
|---|
| 126 | 116 | } |
|---|
| 127 | 117 | |
|---|
| 128 | | - nvme_end_request(rq, cqe->status, cqe->result); |
|---|
| 118 | + if (!nvme_try_complete_req(rq, cqe->status, cqe->result)) |
|---|
| 119 | + nvme_loop_complete_rq(rq); |
|---|
| 129 | 120 | } |
|---|
| 130 | 121 | } |
|---|
| 131 | 122 | |
|---|
| .. | .. |
|---|
| 134 | 125 | struct nvme_loop_iod *iod = |
|---|
| 135 | 126 | container_of(work, struct nvme_loop_iod, work); |
|---|
| 136 | 127 | |
|---|
| 137 | | - nvmet_req_execute(&iod->req); |
|---|
| 138 | | -} |
|---|
| 139 | | - |
|---|
| 140 | | -static enum blk_eh_timer_return |
|---|
| 141 | | -nvme_loop_timeout(struct request *rq, bool reserved) |
|---|
| 142 | | -{ |
|---|
| 143 | | - struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq); |
|---|
| 144 | | - |
|---|
| 145 | | - /* queue error recovery */ |
|---|
| 146 | | - nvme_reset_ctrl(&iod->queue->ctrl->ctrl); |
|---|
| 147 | | - |
|---|
| 148 | | - /* fail with DNR on admin cmd timeout */ |
|---|
| 149 | | - nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; |
|---|
| 150 | | - |
|---|
| 151 | | - return BLK_EH_DONE; |
|---|
| 128 | + iod->req.execute(&iod->req); |
|---|
| 152 | 129 | } |
|---|
| 153 | 130 | |
|---|
| 154 | 131 | static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, |
|---|
| .. | .. |
|---|
| 179 | 156 | iod->sg_table.sgl = iod->first_sgl; |
|---|
| 180 | 157 | if (sg_alloc_table_chained(&iod->sg_table, |
|---|
| 181 | 158 | blk_rq_nr_phys_segments(req), |
|---|
| 182 | | - iod->sg_table.sgl)) |
|---|
| 159 | + iod->sg_table.sgl, NVME_INLINE_SG_CNT)) { |
|---|
| 160 | + nvme_cleanup_cmd(req); |
|---|
| 183 | 161 | return BLK_STS_RESOURCE; |
|---|
| 162 | + } |
|---|
| 184 | 163 | |
|---|
| 185 | 164 | iod->req.sg = iod->sg_table.sgl; |
|---|
| 186 | 165 | iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); |
|---|
| .. | .. |
|---|
| 215 | 194 | struct nvme_loop_iod *iod, unsigned int queue_idx) |
|---|
| 216 | 195 | { |
|---|
| 217 | 196 | iod->req.cmd = &iod->cmd; |
|---|
| 218 | | - iod->req.rsp = &iod->rsp; |
|---|
| 197 | + iod->req.cqe = &iod->cqe; |
|---|
| 219 | 198 | iod->queue = &ctrl->queues[queue_idx]; |
|---|
| 220 | 199 | INIT_WORK(&iod->work, nvme_loop_execute_work); |
|---|
| 221 | 200 | return 0; |
|---|
| .. | .. |
|---|
| 261 | 240 | .complete = nvme_loop_complete_rq, |
|---|
| 262 | 241 | .init_request = nvme_loop_init_request, |
|---|
| 263 | 242 | .init_hctx = nvme_loop_init_hctx, |
|---|
| 264 | | - .timeout = nvme_loop_timeout, |
|---|
| 265 | 243 | }; |
|---|
| 266 | 244 | |
|---|
| 267 | 245 | static const struct blk_mq_ops nvme_loop_admin_mq_ops = { |
|---|
| .. | .. |
|---|
| 269 | 247 | .complete = nvme_loop_complete_rq, |
|---|
| 270 | 248 | .init_request = nvme_loop_init_request, |
|---|
| 271 | 249 | .init_hctx = nvme_loop_init_admin_hctx, |
|---|
| 272 | | - .timeout = nvme_loop_timeout, |
|---|
| 273 | 250 | }; |
|---|
| 274 | 251 | |
|---|
| 275 | 252 | static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) |
|---|
| .. | .. |
|---|
| 278 | 255 | return; |
|---|
| 279 | 256 | nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); |
|---|
| 280 | 257 | blk_cleanup_queue(ctrl->ctrl.admin_q); |
|---|
| 258 | + blk_cleanup_queue(ctrl->ctrl.fabrics_q); |
|---|
| 281 | 259 | blk_mq_free_tag_set(&ctrl->admin_tag_set); |
|---|
| 282 | 260 | } |
|---|
| 283 | 261 | |
|---|
| .. | .. |
|---|
| 347 | 325 | int i, ret; |
|---|
| 348 | 326 | |
|---|
| 349 | 327 | for (i = 1; i < ctrl->ctrl.queue_count; i++) { |
|---|
| 350 | | - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); |
|---|
| 328 | + ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false); |
|---|
| 351 | 329 | if (ret) |
|---|
| 352 | 330 | return ret; |
|---|
| 353 | 331 | set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); |
|---|
| .. | .. |
|---|
| 364 | 342 | ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops; |
|---|
| 365 | 343 | ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; |
|---|
| 366 | 344 | ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ |
|---|
| 367 | | - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; |
|---|
| 345 | + ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; |
|---|
| 368 | 346 | ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + |
|---|
| 369 | | - SG_CHUNK_SIZE * sizeof(struct scatterlist); |
|---|
| 347 | + NVME_INLINE_SG_CNT * sizeof(struct scatterlist); |
|---|
| 370 | 348 | ctrl->admin_tag_set.driver_data = ctrl; |
|---|
| 371 | 349 | ctrl->admin_tag_set.nr_hw_queues = 1; |
|---|
| 372 | 350 | ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; |
|---|
| .. | .. |
|---|
| 383 | 361 | goto out_free_sq; |
|---|
| 384 | 362 | ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; |
|---|
| 385 | 363 | |
|---|
| 364 | + ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); |
|---|
| 365 | + if (IS_ERR(ctrl->ctrl.fabrics_q)) { |
|---|
| 366 | + error = PTR_ERR(ctrl->ctrl.fabrics_q); |
|---|
| 367 | + goto out_free_tagset; |
|---|
| 368 | + } |
|---|
| 369 | + |
|---|
| 386 | 370 | ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); |
|---|
| 387 | 371 | if (IS_ERR(ctrl->ctrl.admin_q)) { |
|---|
| 388 | 372 | error = PTR_ERR(ctrl->ctrl.admin_q); |
|---|
| 389 | | - goto out_free_tagset; |
|---|
| 373 | + goto out_cleanup_fabrics_q; |
|---|
| 390 | 374 | } |
|---|
| 391 | 375 | |
|---|
| 392 | 376 | error = nvmf_connect_admin_queue(&ctrl->ctrl); |
|---|
| .. | .. |
|---|
| 395 | 379 | |
|---|
| 396 | 380 | set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); |
|---|
| 397 | 381 | |
|---|
| 398 | | - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); |
|---|
| 399 | | - if (error) { |
|---|
| 400 | | - dev_err(ctrl->ctrl.device, |
|---|
| 401 | | - "prop_get NVME_REG_CAP failed\n"); |
|---|
| 402 | | - goto out_cleanup_queue; |
|---|
| 403 | | - } |
|---|
| 404 | | - |
|---|
| 405 | | - ctrl->ctrl.sqsize = |
|---|
| 406 | | - min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); |
|---|
| 407 | | - |
|---|
| 408 | | - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); |
|---|
| 382 | + error = nvme_enable_ctrl(&ctrl->ctrl); |
|---|
| 409 | 383 | if (error) |
|---|
| 410 | 384 | goto out_cleanup_queue; |
|---|
| 411 | 385 | |
|---|
| 412 | 386 | ctrl->ctrl.max_hw_sectors = |
|---|
| 413 | 387 | (NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9); |
|---|
| 388 | + |
|---|
| 389 | + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); |
|---|
| 414 | 390 | |
|---|
| 415 | 391 | error = nvme_init_identify(&ctrl->ctrl); |
|---|
| 416 | 392 | if (error) |
|---|
| .. | .. |
|---|
| 421 | 397 | out_cleanup_queue: |
|---|
| 422 | 398 | clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); |
|---|
| 423 | 399 | blk_cleanup_queue(ctrl->ctrl.admin_q); |
|---|
| 400 | +out_cleanup_fabrics_q: |
|---|
| 401 | + blk_cleanup_queue(ctrl->ctrl.fabrics_q); |
|---|
| 424 | 402 | out_free_tagset: |
|---|
| 425 | 403 | blk_mq_free_tag_set(&ctrl->admin_tag_set); |
|---|
| 426 | 404 | out_free_sq: |
|---|
| .. | .. |
|---|
| 434 | 412 | nvme_stop_queues(&ctrl->ctrl); |
|---|
| 435 | 413 | blk_mq_tagset_busy_iter(&ctrl->tag_set, |
|---|
| 436 | 414 | nvme_cancel_request, &ctrl->ctrl); |
|---|
| 415 | + blk_mq_tagset_wait_completed_request(&ctrl->tag_set); |
|---|
| 437 | 416 | nvme_loop_destroy_io_queues(ctrl); |
|---|
| 438 | 417 | } |
|---|
| 439 | 418 | |
|---|
| 419 | + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); |
|---|
| 440 | 420 | if (ctrl->ctrl.state == NVME_CTRL_LIVE) |
|---|
| 441 | 421 | nvme_shutdown_ctrl(&ctrl->ctrl); |
|---|
| 442 | 422 | |
|---|
| 443 | | - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); |
|---|
| 444 | 423 | blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, |
|---|
| 445 | 424 | nvme_cancel_request, &ctrl->ctrl); |
|---|
| 446 | | - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); |
|---|
| 425 | + blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); |
|---|
| 447 | 426 | nvme_loop_destroy_admin_queue(ctrl); |
|---|
| 448 | 427 | } |
|---|
| 449 | 428 | |
|---|
| .. | .. |
|---|
| 468 | 447 | { |
|---|
| 469 | 448 | struct nvme_loop_ctrl *ctrl = |
|---|
| 470 | 449 | container_of(work, struct nvme_loop_ctrl, ctrl.reset_work); |
|---|
| 471 | | - bool changed; |
|---|
| 472 | 450 | int ret; |
|---|
| 473 | 451 | |
|---|
| 474 | 452 | nvme_stop_ctrl(&ctrl->ctrl); |
|---|
| 475 | 453 | nvme_loop_shutdown_ctrl(ctrl); |
|---|
| 476 | 454 | |
|---|
| 477 | 455 | if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { |
|---|
| 478 | | - /* state change failure should never happen */ |
|---|
| 479 | | - WARN_ON_ONCE(1); |
|---|
| 456 | + if (ctrl->ctrl.state != NVME_CTRL_DELETING && |
|---|
| 457 | + ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO) |
|---|
| 458 | + /* state change failure for non-deleted ctrl? */ |
|---|
| 459 | + WARN_ON_ONCE(1); |
|---|
| 480 | 460 | return; |
|---|
| 481 | 461 | } |
|---|
| 482 | 462 | |
|---|
| .. | .. |
|---|
| 495 | 475 | blk_mq_update_nr_hw_queues(&ctrl->tag_set, |
|---|
| 496 | 476 | ctrl->ctrl.queue_count - 1); |
|---|
| 497 | 477 | |
|---|
| 498 | | - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); |
|---|
| 499 | | - WARN_ON_ONCE(!changed); |
|---|
| 478 | + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE)) |
|---|
| 479 | + WARN_ON_ONCE(1); |
|---|
| 500 | 480 | |
|---|
| 501 | 481 | nvme_start_ctrl(&ctrl->ctrl); |
|---|
| 502 | 482 | |
|---|
| .. | .. |
|---|
| 509 | 489 | out_disable: |
|---|
| 510 | 490 | dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); |
|---|
| 511 | 491 | nvme_uninit_ctrl(&ctrl->ctrl); |
|---|
| 512 | | - nvme_put_ctrl(&ctrl->ctrl); |
|---|
| 513 | 492 | } |
|---|
| 514 | 493 | |
|---|
| 515 | 494 | static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { |
|---|
| .. | .. |
|---|
| 537 | 516 | ctrl->tag_set.ops = &nvme_loop_mq_ops; |
|---|
| 538 | 517 | ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; |
|---|
| 539 | 518 | ctrl->tag_set.reserved_tags = 1; /* fabric connect */ |
|---|
| 540 | | - ctrl->tag_set.numa_node = NUMA_NO_NODE; |
|---|
| 519 | + ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; |
|---|
| 541 | 520 | ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; |
|---|
| 542 | 521 | ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + |
|---|
| 543 | | - SG_CHUNK_SIZE * sizeof(struct scatterlist); |
|---|
| 522 | + NVME_INLINE_SG_CNT * sizeof(struct scatterlist); |
|---|
| 544 | 523 | ctrl->tag_set.driver_data = ctrl; |
|---|
| 545 | 524 | ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; |
|---|
| 546 | 525 | ctrl->tag_set.timeout = NVME_IO_TIMEOUT; |
|---|
| .. | .. |
|---|
| 592 | 571 | struct nvmf_ctrl_options *opts) |
|---|
| 593 | 572 | { |
|---|
| 594 | 573 | struct nvme_loop_ctrl *ctrl; |
|---|
| 595 | | - bool changed; |
|---|
| 596 | 574 | int ret; |
|---|
| 597 | 575 | |
|---|
| 598 | 576 | ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); |
|---|
| .. | .. |
|---|
| 605 | 583 | |
|---|
| 606 | 584 | ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops, |
|---|
| 607 | 585 | 0 /* no quirks, we're perfect! */); |
|---|
| 608 | | - if (ret) |
|---|
| 609 | | - goto out_put_ctrl; |
|---|
| 586 | + if (ret) { |
|---|
| 587 | + kfree(ctrl); |
|---|
| 588 | + goto out; |
|---|
| 589 | + } |
|---|
| 590 | + |
|---|
| 591 | + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) |
|---|
| 592 | + WARN_ON_ONCE(1); |
|---|
| 610 | 593 | |
|---|
| 611 | 594 | ret = -ENOMEM; |
|---|
| 612 | 595 | |
|---|
| .. | .. |
|---|
| 642 | 625 | dev_info(ctrl->ctrl.device, |
|---|
| 643 | 626 | "new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn); |
|---|
| 644 | 627 | |
|---|
| 645 | | - nvme_get_ctrl(&ctrl->ctrl); |
|---|
| 646 | | - |
|---|
| 647 | | - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); |
|---|
| 648 | | - WARN_ON_ONCE(!changed); |
|---|
| 628 | + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE)) |
|---|
| 629 | + WARN_ON_ONCE(1); |
|---|
| 649 | 630 | |
|---|
| 650 | 631 | mutex_lock(&nvme_loop_ctrl_mutex); |
|---|
| 651 | 632 | list_add_tail(&ctrl->list, &nvme_loop_ctrl_list); |
|---|
| .. | .. |
|---|
| 661 | 642 | kfree(ctrl->queues); |
|---|
| 662 | 643 | out_uninit_ctrl: |
|---|
| 663 | 644 | nvme_uninit_ctrl(&ctrl->ctrl); |
|---|
| 664 | | -out_put_ctrl: |
|---|
| 665 | 645 | nvme_put_ctrl(&ctrl->ctrl); |
|---|
| 646 | +out: |
|---|
| 666 | 647 | if (ret > 0) |
|---|
| 667 | 648 | ret = -EIO; |
|---|
| 668 | 649 | return ERR_PTR(ret); |
|---|