| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * NVMe over Fabrics RDMA target. |
|---|
| 3 | 4 | * Copyright (c) 2015-2016 HGST, a Western Digital Company. |
|---|
| 4 | | - * |
|---|
| 5 | | - * This program is free software; you can redistribute it and/or modify it |
|---|
| 6 | | - * under the terms and conditions of the GNU General Public License, |
|---|
| 7 | | - * version 2, as published by the Free Software Foundation. |
|---|
| 8 | | - * |
|---|
| 9 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
|---|
| 10 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|---|
| 11 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|---|
| 12 | | - * more details. |
|---|
| 13 | 5 | */ |
|---|
| 14 | 6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
|---|
| 15 | 7 | #include <linux/atomic.h> |
|---|
| .. | .. |
|---|
| 28 | 20 | #include <rdma/ib_verbs.h> |
|---|
| 29 | 21 | #include <rdma/rdma_cm.h> |
|---|
| 30 | 22 | #include <rdma/rw.h> |
|---|
| 23 | +#include <rdma/ib_cm.h> |
|---|
| 31 | 24 | |
|---|
| 32 | 25 | #include <linux/nvme-rdma.h> |
|---|
| 33 | 26 | #include "nvmet.h" |
|---|
| .. | .. |
|---|
| 39 | 32 | #define NVMET_RDMA_MAX_INLINE_SGE 4 |
|---|
| 40 | 33 | #define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE) |
|---|
| 41 | 34 | |
|---|
| 35 | +/* Assume mpsmin == device_page_size == 4KB */ |
|---|
| 36 | +#define NVMET_RDMA_MAX_MDTS 8 |
|---|
| 37 | +#define NVMET_RDMA_MAX_METADATA_MDTS 5 |
|---|
| 38 | + |
|---|
| 39 | +struct nvmet_rdma_srq; |
|---|
| 40 | + |
|---|
| 42 | 41 | struct nvmet_rdma_cmd { |
|---|
| 43 | 42 | struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; |
|---|
| 44 | 43 | struct ib_cqe cqe; |
|---|
| .. | .. |
|---|
| 46 | 45 | struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE]; |
|---|
| 47 | 46 | struct nvme_command *nvme_cmd; |
|---|
| 48 | 47 | struct nvmet_rdma_queue *queue; |
|---|
| 48 | + struct nvmet_rdma_srq *nsrq; |
|---|
| 49 | 49 | }; |
|---|
| 50 | 50 | |
|---|
| 51 | 51 | enum { |
|---|
| .. | .. |
|---|
| 62 | 62 | struct nvmet_rdma_queue *queue; |
|---|
| 63 | 63 | |
|---|
| 64 | 64 | struct ib_cqe read_cqe; |
|---|
| 65 | + struct ib_cqe write_cqe; |
|---|
| 65 | 66 | struct rdma_rw_ctx rw; |
|---|
| 66 | 67 | |
|---|
| 67 | 68 | struct nvmet_req req; |
|---|
| .. | .. |
|---|
| 88 | 89 | struct ib_cq *cq; |
|---|
| 89 | 90 | atomic_t sq_wr_avail; |
|---|
| 90 | 91 | struct nvmet_rdma_device *dev; |
|---|
| 92 | + struct nvmet_rdma_srq *nsrq; |
|---|
| 91 | 93 | spinlock_t state_lock; |
|---|
| 92 | 94 | enum nvmet_rdma_queue_state state; |
|---|
| 93 | 95 | struct nvmet_cq nvme_cq; |
|---|
| .. | .. |
|---|
| 105 | 107 | |
|---|
| 106 | 108 | int idx; |
|---|
| 107 | 109 | int host_qid; |
|---|
| 110 | + int comp_vector; |
|---|
| 108 | 111 | int recv_queue_size; |
|---|
| 109 | 112 | int send_queue_size; |
|---|
| 110 | 113 | |
|---|
| 111 | 114 | struct list_head queue_list; |
|---|
| 112 | 115 | }; |
|---|
| 113 | 116 | |
|---|
| 117 | +struct nvmet_rdma_port { |
|---|
| 118 | + struct nvmet_port *nport; |
|---|
| 119 | + struct sockaddr_storage addr; |
|---|
| 120 | + struct rdma_cm_id *cm_id; |
|---|
| 121 | + struct delayed_work repair_work; |
|---|
| 122 | +}; |
|---|
| 123 | + |
|---|
| 124 | +struct nvmet_rdma_srq { |
|---|
| 125 | + struct ib_srq *srq; |
|---|
| 126 | + struct nvmet_rdma_cmd *cmds; |
|---|
| 127 | + struct nvmet_rdma_device *ndev; |
|---|
| 128 | +}; |
|---|
| 129 | + |
|---|
| 114 | 130 | struct nvmet_rdma_device { |
|---|
| 115 | 131 | struct ib_device *device; |
|---|
| 116 | 132 | struct ib_pd *pd; |
|---|
| 117 | | - struct ib_srq *srq; |
|---|
| 118 | | - struct nvmet_rdma_cmd *srq_cmds; |
|---|
| 133 | + struct nvmet_rdma_srq **srqs; |
|---|
| 134 | + int srq_count; |
|---|
| 119 | 135 | size_t srq_size; |
|---|
| 120 | 136 | struct kref ref; |
|---|
| 121 | 137 | struct list_head entry; |
|---|
| .. | .. |
|---|
| 126 | 142 | static bool nvmet_rdma_use_srq; |
|---|
| 127 | 143 | module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); |
|---|
| 128 | 144 | MODULE_PARM_DESC(use_srq, "Use shared receive queue."); |
|---|
| 145 | + |
|---|
| 146 | +static int srq_size_set(const char *val, const struct kernel_param *kp); |
|---|
| 147 | +static const struct kernel_param_ops srq_size_ops = { |
|---|
| 148 | + .set = srq_size_set, |
|---|
| 149 | + .get = param_get_int, |
|---|
| 150 | +}; |
|---|
| 151 | + |
|---|
| 152 | +static int nvmet_rdma_srq_size = 1024; |
|---|
| 153 | +module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644); |
|---|
| 154 | +MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)"); |
|---|
| 129 | 155 | |
|---|
| 130 | 156 | static DEFINE_IDA(nvmet_rdma_queue_ida); |
|---|
| 131 | 157 | static LIST_HEAD(nvmet_rdma_queue_list); |
|---|
| .. | .. |
|---|
| 138 | 164 | static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc); |
|---|
| 139 | 165 | static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); |
|---|
| 140 | 166 | static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); |
|---|
| 167 | +static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc); |
|---|
| 141 | 168 | static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); |
|---|
| 142 | 169 | static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); |
|---|
| 143 | 170 | static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, |
|---|
| .. | .. |
|---|
| 147 | 174 | |
|---|
| 148 | 175 | static const struct nvmet_fabrics_ops nvmet_rdma_ops; |
|---|
| 149 | 176 | |
|---|
| 177 | +static int srq_size_set(const char *val, const struct kernel_param *kp) |
|---|
| 178 | +{ |
|---|
| 179 | + int n = 0, ret; |
|---|
| 180 | + |
|---|
| 181 | + ret = kstrtoint(val, 10, &n); |
|---|
| 182 | + if (ret != 0 || n < 256) |
|---|
| 183 | + return -EINVAL; |
|---|
| 184 | + |
|---|
| 185 | + return param_set_int(val, kp); |
|---|
| 186 | +} |
|---|
| 187 | + |
|---|
| 150 | 188 | static int num_pages(int len) |
|---|
| 151 | 189 | { |
|---|
| 152 | 190 | return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT); |
|---|
| 153 | | -} |
|---|
| 154 | | - |
|---|
| 155 | | -/* XXX: really should move to a generic header sooner or later.. */ |
|---|
| 156 | | -static inline u32 get_unaligned_le24(const u8 *p) |
|---|
| 157 | | -{ |
|---|
| 158 | | - return (u32)p[0] | (u32)p[1] << 8 | (u32)p[2] << 16; |
|---|
| 159 | 191 | } |
|---|
| 160 | 192 | |
|---|
| 161 | 193 | static inline bool nvmet_rdma_need_data_in(struct nvmet_rdma_rsp *rsp) |
|---|
| .. | .. |
|---|
| 169 | 201 | { |
|---|
| 170 | 202 | return !nvme_is_write(rsp->req.cmd) && |
|---|
| 171 | 203 | rsp->req.transfer_len && |
|---|
| 172 | | - !rsp->req.rsp->status && |
|---|
| 204 | + !rsp->req.cqe->status && |
|---|
| 173 | 205 | !(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA); |
|---|
| 174 | 206 | } |
|---|
| 175 | 207 | |
|---|
| .. | .. |
|---|
| 373 | 405 | struct nvmet_rdma_rsp *r) |
|---|
| 374 | 406 | { |
|---|
| 375 | 407 | /* NVMe CQE / RDMA SEND */ |
|---|
| 376 | | - r->req.rsp = kmalloc(sizeof(*r->req.rsp), GFP_KERNEL); |
|---|
| 377 | | - if (!r->req.rsp) |
|---|
| 408 | + r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL); |
|---|
| 409 | + if (!r->req.cqe) |
|---|
| 378 | 410 | goto out; |
|---|
| 379 | 411 | |
|---|
| 380 | | - r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.rsp, |
|---|
| 381 | | - sizeof(*r->req.rsp), DMA_TO_DEVICE); |
|---|
| 412 | + r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.cqe, |
|---|
| 413 | + sizeof(*r->req.cqe), DMA_TO_DEVICE); |
|---|
| 382 | 414 | if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) |
|---|
| 383 | 415 | goto out_free_rsp; |
|---|
| 384 | 416 | |
|---|
| 385 | | - r->send_sge.length = sizeof(*r->req.rsp); |
|---|
| 417 | + if (!ib_uses_virt_dma(ndev->device)) |
|---|
| 418 | + r->req.p2p_client = &ndev->device->dev; |
|---|
| 419 | + r->send_sge.length = sizeof(*r->req.cqe); |
|---|
| 386 | 420 | r->send_sge.lkey = ndev->pd->local_dma_lkey; |
|---|
| 387 | 421 | |
|---|
| 388 | 422 | r->send_cqe.done = nvmet_rdma_send_done; |
|---|
| .. | .. |
|---|
| 394 | 428 | |
|---|
| 395 | 429 | /* Data In / RDMA READ */ |
|---|
| 396 | 430 | r->read_cqe.done = nvmet_rdma_read_data_done; |
|---|
| 431 | + /* Data Out / RDMA WRITE */ |
|---|
| 432 | + r->write_cqe.done = nvmet_rdma_write_data_done; |
|---|
| 433 | + |
|---|
| 397 | 434 | return 0; |
|---|
| 398 | 435 | |
|---|
| 399 | 436 | out_free_rsp: |
|---|
| 400 | | - kfree(r->req.rsp); |
|---|
| 437 | + kfree(r->req.cqe); |
|---|
| 401 | 438 | out: |
|---|
| 402 | 439 | return -ENOMEM; |
|---|
| 403 | 440 | } |
|---|
| .. | .. |
|---|
| 406 | 443 | struct nvmet_rdma_rsp *r) |
|---|
| 407 | 444 | { |
|---|
| 408 | 445 | ib_dma_unmap_single(ndev->device, r->send_sge.addr, |
|---|
| 409 | | - sizeof(*r->req.rsp), DMA_TO_DEVICE); |
|---|
| 410 | | - kfree(r->req.rsp); |
|---|
| 446 | + sizeof(*r->req.cqe), DMA_TO_DEVICE); |
|---|
| 447 | + kfree(r->req.cqe); |
|---|
| 411 | 448 | } |
|---|
| 412 | 449 | |
|---|
| 413 | 450 | static int |
|---|
| .. | .. |
|---|
| 469 | 506 | cmd->sge[0].addr, cmd->sge[0].length, |
|---|
| 470 | 507 | DMA_FROM_DEVICE); |
|---|
| 471 | 508 | |
|---|
| 472 | | - if (ndev->srq) |
|---|
| 473 | | - ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); |
|---|
| 509 | + if (cmd->nsrq) |
|---|
| 510 | + ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL); |
|---|
| 474 | 511 | else |
|---|
| 475 | 512 | ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL); |
|---|
| 476 | 513 | |
|---|
| .. | .. |
|---|
| 503 | 540 | spin_unlock(&queue->rsp_wr_wait_lock); |
|---|
| 504 | 541 | } |
|---|
| 505 | 542 | |
|---|
| 543 | +static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr) |
|---|
| 544 | +{ |
|---|
| 545 | + struct ib_mr_status mr_status; |
|---|
| 546 | + int ret; |
|---|
| 547 | + u16 status = 0; |
|---|
| 548 | + |
|---|
| 549 | + ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); |
|---|
| 550 | + if (ret) { |
|---|
| 551 | + pr_err("ib_check_mr_status failed, ret %d\n", ret); |
|---|
| 552 | + return NVME_SC_INVALID_PI; |
|---|
| 553 | + } |
|---|
| 554 | + |
|---|
| 555 | + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { |
|---|
| 556 | + switch (mr_status.sig_err.err_type) { |
|---|
| 557 | + case IB_SIG_BAD_GUARD: |
|---|
| 558 | + status = NVME_SC_GUARD_CHECK; |
|---|
| 559 | + break; |
|---|
| 560 | + case IB_SIG_BAD_REFTAG: |
|---|
| 561 | + status = NVME_SC_REFTAG_CHECK; |
|---|
| 562 | + break; |
|---|
| 563 | + case IB_SIG_BAD_APPTAG: |
|---|
| 564 | + status = NVME_SC_APPTAG_CHECK; |
|---|
| 565 | + break; |
|---|
| 566 | + } |
|---|
| 567 | + pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", |
|---|
| 568 | + mr_status.sig_err.err_type, |
|---|
| 569 | + mr_status.sig_err.expected, |
|---|
| 570 | + mr_status.sig_err.actual); |
|---|
| 571 | + } |
|---|
| 572 | + |
|---|
| 573 | + return status; |
|---|
| 574 | +} |
|---|
| 575 | + |
|---|
| 576 | +static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi, |
|---|
| 577 | + struct nvme_command *cmd, struct ib_sig_domain *domain, |
|---|
| 578 | + u16 control, u8 pi_type) |
|---|
| 579 | +{ |
|---|
| 580 | + domain->sig_type = IB_SIG_TYPE_T10_DIF; |
|---|
| 581 | + domain->sig.dif.bg_type = IB_T10DIF_CRC; |
|---|
| 582 | + domain->sig.dif.pi_interval = 1 << bi->interval_exp; |
|---|
| 583 | + domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); |
|---|
| 584 | + if (control & NVME_RW_PRINFO_PRCHK_REF) |
|---|
| 585 | + domain->sig.dif.ref_remap = true; |
|---|
| 586 | + |
|---|
| 587 | + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); |
|---|
| 588 | + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); |
|---|
| 589 | + domain->sig.dif.app_escape = true; |
|---|
| 590 | + if (pi_type == NVME_NS_DPS_PI_TYPE3) |
|---|
| 591 | + domain->sig.dif.ref_escape = true; |
|---|
| 592 | +} |
|---|
| 593 | + |
|---|
| 594 | +static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req, |
|---|
| 595 | + struct ib_sig_attrs *sig_attrs) |
|---|
| 596 | +{ |
|---|
| 597 | + struct nvme_command *cmd = req->cmd; |
|---|
| 598 | + u16 control = le16_to_cpu(cmd->rw.control); |
|---|
| 599 | + u8 pi_type = req->ns->pi_type; |
|---|
| 600 | + struct blk_integrity *bi; |
|---|
| 601 | + |
|---|
| 602 | + bi = bdev_get_integrity(req->ns->bdev); |
|---|
| 603 | + |
|---|
| 604 | + memset(sig_attrs, 0, sizeof(*sig_attrs)); |
|---|
| 605 | + |
|---|
| 606 | + if (control & NVME_RW_PRINFO_PRACT) { |
|---|
| 607 | + /* for WRITE_INSERT/READ_STRIP no wire domain */ |
|---|
| 608 | + sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; |
|---|
| 609 | + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, |
|---|
| 610 | + pi_type); |
|---|
| 611 | + /* Clear the PRACT bit since HCA will generate/verify the PI */ |
|---|
| 612 | + control &= ~NVME_RW_PRINFO_PRACT; |
|---|
| 613 | + cmd->rw.control = cpu_to_le16(control); |
|---|
| 614 | + /* PI is added by the HW */ |
|---|
| 615 | + req->transfer_len += req->metadata_len; |
|---|
| 616 | + } else { |
|---|
| 617 | + /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ |
|---|
| 618 | + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, |
|---|
| 619 | + pi_type); |
|---|
| 620 | + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, |
|---|
| 621 | + pi_type); |
|---|
| 622 | + } |
|---|
| 623 | + |
|---|
| 624 | + if (control & NVME_RW_PRINFO_PRCHK_REF) |
|---|
| 625 | + sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG; |
|---|
| 626 | + if (control & NVME_RW_PRINFO_PRCHK_GUARD) |
|---|
| 627 | + sig_attrs->check_mask |= IB_SIG_CHECK_GUARD; |
|---|
| 628 | + if (control & NVME_RW_PRINFO_PRCHK_APP) |
|---|
| 629 | + sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG; |
|---|
| 630 | +} |
|---|
| 631 | + |
|---|
| 632 | +static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key, |
|---|
| 633 | + struct ib_sig_attrs *sig_attrs) |
|---|
| 634 | +{ |
|---|
| 635 | + struct rdma_cm_id *cm_id = rsp->queue->cm_id; |
|---|
| 636 | + struct nvmet_req *req = &rsp->req; |
|---|
| 637 | + int ret; |
|---|
| 638 | + |
|---|
| 639 | + if (req->metadata_len) |
|---|
| 640 | + ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp, |
|---|
| 641 | + cm_id->port_num, req->sg, req->sg_cnt, |
|---|
| 642 | + req->metadata_sg, req->metadata_sg_cnt, sig_attrs, |
|---|
| 643 | + addr, key, nvmet_data_dir(req)); |
|---|
| 644 | + else |
|---|
| 645 | + ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, |
|---|
| 646 | + req->sg, req->sg_cnt, 0, addr, key, |
|---|
| 647 | + nvmet_data_dir(req)); |
|---|
| 648 | + |
|---|
| 649 | + return ret; |
|---|
| 650 | +} |
|---|
| 651 | + |
|---|
| 652 | +static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp) |
|---|
| 653 | +{ |
|---|
| 654 | + struct rdma_cm_id *cm_id = rsp->queue->cm_id; |
|---|
| 655 | + struct nvmet_req *req = &rsp->req; |
|---|
| 656 | + |
|---|
| 657 | + if (req->metadata_len) |
|---|
| 658 | + rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp, |
|---|
| 659 | + cm_id->port_num, req->sg, req->sg_cnt, |
|---|
| 660 | + req->metadata_sg, req->metadata_sg_cnt, |
|---|
| 661 | + nvmet_data_dir(req)); |
|---|
| 662 | + else |
|---|
| 663 | + rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num, |
|---|
| 664 | + req->sg, req->sg_cnt, nvmet_data_dir(req)); |
|---|
| 665 | +} |
|---|
| 506 | 666 | |
|---|
| 507 | 667 | static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) |
|---|
| 508 | 668 | { |
|---|
| .. | .. |
|---|
| 510 | 670 | |
|---|
| 511 | 671 | atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); |
|---|
| 512 | 672 | |
|---|
| 513 | | - if (rsp->n_rdma) { |
|---|
| 514 | | - rdma_rw_ctx_destroy(&rsp->rw, queue->qp, |
|---|
| 515 | | - queue->cm_id->port_num, rsp->req.sg, |
|---|
| 516 | | - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); |
|---|
| 517 | | - } |
|---|
| 673 | + if (rsp->n_rdma) |
|---|
| 674 | + nvmet_rdma_rw_ctx_destroy(rsp); |
|---|
| 518 | 675 | |
|---|
| 519 | 676 | if (rsp->req.sg != rsp->cmd->inline_sg) |
|---|
| 520 | | - sgl_free(rsp->req.sg); |
|---|
| 677 | + nvmet_req_free_sgls(&rsp->req); |
|---|
| 521 | 678 | |
|---|
| 522 | 679 | if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list))) |
|---|
| 523 | 680 | nvmet_rdma_process_wr_wait_list(queue); |
|---|
| .. | .. |
|---|
| 543 | 700 | { |
|---|
| 544 | 701 | struct nvmet_rdma_rsp *rsp = |
|---|
| 545 | 702 | container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe); |
|---|
| 546 | | - struct nvmet_rdma_queue *queue = cq->cq_context; |
|---|
| 703 | + struct nvmet_rdma_queue *queue = wc->qp->qp_context; |
|---|
| 547 | 704 | |
|---|
| 548 | 705 | nvmet_rdma_release_rsp(rsp); |
|---|
| 549 | 706 | |
|---|
| .. | .. |
|---|
| 569 | 726 | rsp->send_wr.opcode = IB_WR_SEND; |
|---|
| 570 | 727 | } |
|---|
| 571 | 728 | |
|---|
| 572 | | - if (nvmet_rdma_need_data_out(rsp)) |
|---|
| 573 | | - first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, |
|---|
| 574 | | - cm_id->port_num, NULL, &rsp->send_wr); |
|---|
| 575 | | - else |
|---|
| 729 | + if (nvmet_rdma_need_data_out(rsp)) { |
|---|
| 730 | + if (rsp->req.metadata_len) |
|---|
| 731 | + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, |
|---|
| 732 | + cm_id->port_num, &rsp->write_cqe, NULL); |
|---|
| 733 | + else |
|---|
| 734 | + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, |
|---|
| 735 | + cm_id->port_num, NULL, &rsp->send_wr); |
|---|
| 736 | + } else { |
|---|
| 576 | 737 | first_wr = &rsp->send_wr; |
|---|
| 738 | + } |
|---|
| 577 | 739 | |
|---|
| 578 | 740 | nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); |
|---|
| 579 | 741 | |
|---|
| .. | .. |
|---|
| 591 | 753 | { |
|---|
| 592 | 754 | struct nvmet_rdma_rsp *rsp = |
|---|
| 593 | 755 | container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe); |
|---|
| 594 | | - struct nvmet_rdma_queue *queue = cq->cq_context; |
|---|
| 756 | + struct nvmet_rdma_queue *queue = wc->qp->qp_context; |
|---|
| 757 | + u16 status = 0; |
|---|
| 595 | 758 | |
|---|
| 596 | 759 | WARN_ON(rsp->n_rdma <= 0); |
|---|
| 597 | 760 | atomic_add(rsp->n_rdma, &queue->sq_wr_avail); |
|---|
| 598 | | - rdma_rw_ctx_destroy(&rsp->rw, queue->qp, |
|---|
| 599 | | - queue->cm_id->port_num, rsp->req.sg, |
|---|
| 600 | | - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); |
|---|
| 601 | 761 | rsp->n_rdma = 0; |
|---|
| 602 | 762 | |
|---|
| 603 | 763 | if (unlikely(wc->status != IB_WC_SUCCESS)) { |
|---|
| 764 | + nvmet_rdma_rw_ctx_destroy(rsp); |
|---|
| 604 | 765 | nvmet_req_uninit(&rsp->req); |
|---|
| 605 | 766 | nvmet_rdma_release_rsp(rsp); |
|---|
| 606 | 767 | if (wc->status != IB_WC_WR_FLUSH_ERR) { |
|---|
| .. | .. |
|---|
| 611 | 772 | return; |
|---|
| 612 | 773 | } |
|---|
| 613 | 774 | |
|---|
| 614 | | - nvmet_req_execute(&rsp->req); |
|---|
| 775 | + if (rsp->req.metadata_len) |
|---|
| 776 | + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); |
|---|
| 777 | + nvmet_rdma_rw_ctx_destroy(rsp); |
|---|
| 778 | + |
|---|
| 779 | + if (unlikely(status)) |
|---|
| 780 | + nvmet_req_complete(&rsp->req, status); |
|---|
| 781 | + else |
|---|
| 782 | + rsp->req.execute(&rsp->req); |
|---|
| 783 | +} |
|---|
| 784 | + |
|---|
| 785 | +static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) |
|---|
| 786 | +{ |
|---|
| 787 | + struct nvmet_rdma_rsp *rsp = |
|---|
| 788 | + container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe); |
|---|
| 789 | + struct nvmet_rdma_queue *queue = wc->qp->qp_context; |
|---|
| 790 | + struct rdma_cm_id *cm_id = rsp->queue->cm_id; |
|---|
| 791 | + u16 status; |
|---|
| 792 | + |
|---|
| 793 | + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) |
|---|
| 794 | + return; |
|---|
| 795 | + |
|---|
| 796 | + WARN_ON(rsp->n_rdma <= 0); |
|---|
| 797 | + atomic_add(rsp->n_rdma, &queue->sq_wr_avail); |
|---|
| 798 | + rsp->n_rdma = 0; |
|---|
| 799 | + |
|---|
| 800 | + if (unlikely(wc->status != IB_WC_SUCCESS)) { |
|---|
| 801 | + nvmet_rdma_rw_ctx_destroy(rsp); |
|---|
| 802 | + nvmet_req_uninit(&rsp->req); |
|---|
| 803 | + nvmet_rdma_release_rsp(rsp); |
|---|
| 804 | + if (wc->status != IB_WC_WR_FLUSH_ERR) { |
|---|
| 805 | + pr_info("RDMA WRITE for CQE failed with status %s (%d).\n", |
|---|
| 806 | + ib_wc_status_msg(wc->status), wc->status); |
|---|
| 807 | + nvmet_rdma_error_comp(queue); |
|---|
| 808 | + } |
|---|
| 809 | + return; |
|---|
| 810 | + } |
|---|
| 811 | + |
|---|
| 812 | + /* |
|---|
| 813 | + * Upon RDMA completion check the signature status |
|---|
| 814 | + * - if succeeded send good NVMe response |
|---|
| 815 | + * - if failed send bad NVMe response with appropriate error |
|---|
| 816 | + */ |
|---|
| 817 | + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); |
|---|
| 818 | + if (unlikely(status)) |
|---|
| 819 | + rsp->req.cqe->status = cpu_to_le16(status << 1); |
|---|
| 820 | + nvmet_rdma_rw_ctx_destroy(rsp); |
|---|
| 821 | + |
|---|
| 822 | + if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) { |
|---|
| 823 | + pr_err("sending cmd response failed\n"); |
|---|
| 824 | + nvmet_rdma_release_rsp(rsp); |
|---|
| 825 | + } |
|---|
| 615 | 826 | } |
|---|
| 616 | 827 | |
|---|
| 617 | 828 | static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len, |
|---|
| .. | .. |
|---|
| 644 | 855 | u64 off = le64_to_cpu(sgl->addr); |
|---|
| 645 | 856 | u32 len = le32_to_cpu(sgl->length); |
|---|
| 646 | 857 | |
|---|
| 647 | | - if (!nvme_is_write(rsp->req.cmd)) |
|---|
| 858 | + if (!nvme_is_write(rsp->req.cmd)) { |
|---|
| 859 | + rsp->req.error_loc = |
|---|
| 860 | + offsetof(struct nvme_common_command, opcode); |
|---|
| 648 | 861 | return NVME_SC_INVALID_FIELD | NVME_SC_DNR; |
|---|
| 862 | + } |
|---|
| 649 | 863 | |
|---|
| 650 | 864 | if (off + len > rsp->queue->dev->inline_data_size) { |
|---|
| 651 | 865 | pr_err("invalid inline data offset!\n"); |
|---|
| .. | .. |
|---|
| 665 | 879 | static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, |
|---|
| 666 | 880 | struct nvme_keyed_sgl_desc *sgl, bool invalidate) |
|---|
| 667 | 881 | { |
|---|
| 668 | | - struct rdma_cm_id *cm_id = rsp->queue->cm_id; |
|---|
| 669 | 882 | u64 addr = le64_to_cpu(sgl->addr); |
|---|
| 670 | | - u32 len = get_unaligned_le24(sgl->length); |
|---|
| 671 | 883 | u32 key = get_unaligned_le32(sgl->key); |
|---|
| 884 | + struct ib_sig_attrs sig_attrs; |
|---|
| 672 | 885 | int ret; |
|---|
| 673 | 886 | |
|---|
| 887 | + rsp->req.transfer_len = get_unaligned_le24(sgl->length); |
|---|
| 888 | + |
|---|
| 674 | 889 | /* no data command? */ |
|---|
| 675 | | - if (!len) |
|---|
| 890 | + if (!rsp->req.transfer_len) |
|---|
| 676 | 891 | return 0; |
|---|
| 677 | 892 | |
|---|
| 678 | | - rsp->req.sg = sgl_alloc(len, GFP_KERNEL, &rsp->req.sg_cnt); |
|---|
| 679 | | - if (!rsp->req.sg) |
|---|
| 680 | | - return NVME_SC_INTERNAL; |
|---|
| 893 | + if (rsp->req.metadata_len) |
|---|
| 894 | + nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs); |
|---|
| 681 | 895 | |
|---|
| 682 | | - ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, |
|---|
| 683 | | - rsp->req.sg, rsp->req.sg_cnt, 0, addr, key, |
|---|
| 684 | | - nvmet_data_dir(&rsp->req)); |
|---|
| 685 | | - if (ret < 0) |
|---|
| 686 | | - return NVME_SC_INTERNAL; |
|---|
| 687 | | - rsp->req.transfer_len += len; |
|---|
| 896 | + ret = nvmet_req_alloc_sgls(&rsp->req); |
|---|
| 897 | + if (unlikely(ret < 0)) |
|---|
| 898 | + goto error_out; |
|---|
| 899 | + |
|---|
| 900 | + ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs); |
|---|
| 901 | + if (unlikely(ret < 0)) |
|---|
| 902 | + goto error_out; |
|---|
| 688 | 903 | rsp->n_rdma += ret; |
|---|
| 689 | 904 | |
|---|
| 690 | 905 | if (invalidate) { |
|---|
| .. | .. |
|---|
| 693 | 908 | } |
|---|
| 694 | 909 | |
|---|
| 695 | 910 | return 0; |
|---|
| 911 | + |
|---|
| 912 | +error_out: |
|---|
| 913 | + rsp->req.transfer_len = 0; |
|---|
| 914 | + return NVME_SC_INTERNAL; |
|---|
| 696 | 915 | } |
|---|
| 697 | 916 | |
|---|
| 698 | 917 | static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp) |
|---|
| .. | .. |
|---|
| 706 | 925 | return nvmet_rdma_map_sgl_inline(rsp); |
|---|
| 707 | 926 | default: |
|---|
| 708 | 927 | pr_err("invalid SGL subtype: %#x\n", sgl->type); |
|---|
| 928 | + rsp->req.error_loc = |
|---|
| 929 | + offsetof(struct nvme_common_command, dptr); |
|---|
| 709 | 930 | return NVME_SC_INVALID_FIELD | NVME_SC_DNR; |
|---|
| 710 | 931 | } |
|---|
| 711 | 932 | case NVME_KEY_SGL_FMT_DATA_DESC: |
|---|
| .. | .. |
|---|
| 716 | 937 | return nvmet_rdma_map_sgl_keyed(rsp, sgl, false); |
|---|
| 717 | 938 | default: |
|---|
| 718 | 939 | pr_err("invalid SGL subtype: %#x\n", sgl->type); |
|---|
| 940 | + rsp->req.error_loc = |
|---|
| 941 | + offsetof(struct nvme_common_command, dptr); |
|---|
| 719 | 942 | return NVME_SC_INVALID_FIELD | NVME_SC_DNR; |
|---|
| 720 | 943 | } |
|---|
| 721 | 944 | default: |
|---|
| 722 | 945 | pr_err("invalid SGL type: %#x\n", sgl->type); |
|---|
| 946 | + rsp->req.error_loc = offsetof(struct nvme_common_command, dptr); |
|---|
| 723 | 947 | return NVME_SC_SGL_INVALID_TYPE | NVME_SC_DNR; |
|---|
| 724 | 948 | } |
|---|
| 725 | 949 | } |
|---|
| .. | .. |
|---|
| 742 | 966 | queue->cm_id->port_num, &rsp->read_cqe, NULL)) |
|---|
| 743 | 967 | nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR); |
|---|
| 744 | 968 | } else { |
|---|
| 745 | | - nvmet_req_execute(&rsp->req); |
|---|
| 969 | + rsp->req.execute(&rsp->req); |
|---|
| 746 | 970 | } |
|---|
| 747 | 971 | |
|---|
| 748 | 972 | return true; |
|---|
| .. | .. |
|---|
| 784 | 1008 | { |
|---|
| 785 | 1009 | struct nvmet_rdma_cmd *cmd = |
|---|
| 786 | 1010 | container_of(wc->wr_cqe, struct nvmet_rdma_cmd, cqe); |
|---|
| 787 | | - struct nvmet_rdma_queue *queue = cq->cq_context; |
|---|
| 1011 | + struct nvmet_rdma_queue *queue = wc->qp->qp_context; |
|---|
| 788 | 1012 | struct nvmet_rdma_rsp *rsp; |
|---|
| 789 | 1013 | |
|---|
| 790 | 1014 | if (unlikely(wc->status != IB_WC_SUCCESS)) { |
|---|
| .. | .. |
|---|
| 836 | 1060 | nvmet_rdma_handle_command(queue, rsp); |
|---|
| 837 | 1061 | } |
|---|
| 838 | 1062 | |
|---|
| 839 | | -static void nvmet_rdma_destroy_srq(struct nvmet_rdma_device *ndev) |
|---|
| 1063 | +static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq) |
|---|
| 840 | 1064 | { |
|---|
| 841 | | - if (!ndev->srq) |
|---|
| 842 | | - return; |
|---|
| 1065 | + nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size, |
|---|
| 1066 | + false); |
|---|
| 1067 | + ib_destroy_srq(nsrq->srq); |
|---|
| 843 | 1068 | |
|---|
| 844 | | - nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false); |
|---|
| 845 | | - ib_destroy_srq(ndev->srq); |
|---|
| 1069 | + kfree(nsrq); |
|---|
| 846 | 1070 | } |
|---|
| 847 | 1071 | |
|---|
| 848 | | -static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) |
|---|
| 1072 | +static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev) |
|---|
| 1073 | +{ |
|---|
| 1074 | + int i; |
|---|
| 1075 | + |
|---|
| 1076 | + if (!ndev->srqs) |
|---|
| 1077 | + return; |
|---|
| 1078 | + |
|---|
| 1079 | + for (i = 0; i < ndev->srq_count; i++) |
|---|
| 1080 | + nvmet_rdma_destroy_srq(ndev->srqs[i]); |
|---|
| 1081 | + |
|---|
| 1082 | + kfree(ndev->srqs); |
|---|
| 1083 | +} |
|---|
| 1084 | + |
|---|
| 1085 | +static struct nvmet_rdma_srq * |
|---|
| 1086 | +nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) |
|---|
| 849 | 1087 | { |
|---|
| 850 | 1088 | struct ib_srq_init_attr srq_attr = { NULL, }; |
|---|
| 1089 | + size_t srq_size = ndev->srq_size; |
|---|
| 1090 | + struct nvmet_rdma_srq *nsrq; |
|---|
| 851 | 1091 | struct ib_srq *srq; |
|---|
| 852 | | - size_t srq_size; |
|---|
| 853 | 1092 | int ret, i; |
|---|
| 854 | 1093 | |
|---|
| 855 | | - srq_size = 4095; /* XXX: tune */ |
|---|
| 1094 | + nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL); |
|---|
| 1095 | + if (!nsrq) |
|---|
| 1096 | + return ERR_PTR(-ENOMEM); |
|---|
| 856 | 1097 | |
|---|
| 857 | 1098 | srq_attr.attr.max_wr = srq_size; |
|---|
| 858 | 1099 | srq_attr.attr.max_sge = 1 + ndev->inline_page_count; |
|---|
| .. | .. |
|---|
| 860 | 1101 | srq_attr.srq_type = IB_SRQT_BASIC; |
|---|
| 861 | 1102 | srq = ib_create_srq(ndev->pd, &srq_attr); |
|---|
| 862 | 1103 | if (IS_ERR(srq)) { |
|---|
| 1104 | + ret = PTR_ERR(srq); |
|---|
| 1105 | + goto out_free; |
|---|
| 1106 | + } |
|---|
| 1107 | + |
|---|
| 1108 | + nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); |
|---|
| 1109 | + if (IS_ERR(nsrq->cmds)) { |
|---|
| 1110 | + ret = PTR_ERR(nsrq->cmds); |
|---|
| 1111 | + goto out_destroy_srq; |
|---|
| 1112 | + } |
|---|
| 1113 | + |
|---|
| 1114 | + nsrq->srq = srq; |
|---|
| 1115 | + nsrq->ndev = ndev; |
|---|
| 1116 | + |
|---|
| 1117 | + for (i = 0; i < srq_size; i++) { |
|---|
| 1118 | + nsrq->cmds[i].nsrq = nsrq; |
|---|
| 1119 | + ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]); |
|---|
| 1120 | + if (ret) |
|---|
| 1121 | + goto out_free_cmds; |
|---|
| 1122 | + } |
|---|
| 1123 | + |
|---|
| 1124 | + return nsrq; |
|---|
| 1125 | + |
|---|
| 1126 | +out_free_cmds: |
|---|
| 1127 | + nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false); |
|---|
| 1128 | +out_destroy_srq: |
|---|
| 1129 | + ib_destroy_srq(srq); |
|---|
| 1130 | +out_free: |
|---|
| 1131 | + kfree(nsrq); |
|---|
| 1132 | + return ERR_PTR(ret); |
|---|
| 1133 | +} |
|---|
| 1134 | + |
|---|
| 1135 | +static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev) |
|---|
| 1136 | +{ |
|---|
| 1137 | + int i, ret; |
|---|
| 1138 | + |
|---|
| 1139 | + if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) { |
|---|
| 863 | 1140 | /* |
|---|
| 864 | 1141 | * If SRQs aren't supported we just go ahead and use normal |
|---|
| 865 | 1142 | * non-shared receive queues. |
|---|
| .. | .. |
|---|
| 868 | 1145 | return 0; |
|---|
| 869 | 1146 | } |
|---|
| 870 | 1147 | |
|---|
| 871 | | - ndev->srq_cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); |
|---|
| 872 | | - if (IS_ERR(ndev->srq_cmds)) { |
|---|
| 873 | | - ret = PTR_ERR(ndev->srq_cmds); |
|---|
| 874 | | - goto out_destroy_srq; |
|---|
| 875 | | - } |
|---|
| 1148 | + ndev->srq_size = min(ndev->device->attrs.max_srq_wr, |
|---|
| 1149 | + nvmet_rdma_srq_size); |
|---|
| 1150 | + ndev->srq_count = min(ndev->device->num_comp_vectors, |
|---|
| 1151 | + ndev->device->attrs.max_srq); |
|---|
| 876 | 1152 | |
|---|
| 877 | | - ndev->srq = srq; |
|---|
| 878 | | - ndev->srq_size = srq_size; |
|---|
| 1153 | + ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL); |
|---|
| 1154 | + if (!ndev->srqs) |
|---|
| 1155 | + return -ENOMEM; |
|---|
| 879 | 1156 | |
|---|
| 880 | | - for (i = 0; i < srq_size; i++) { |
|---|
| 881 | | - ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]); |
|---|
| 882 | | - if (ret) |
|---|
| 883 | | - goto out_free_cmds; |
|---|
| 1157 | + for (i = 0; i < ndev->srq_count; i++) { |
|---|
| 1158 | + ndev->srqs[i] = nvmet_rdma_init_srq(ndev); |
|---|
| 1159 | + if (IS_ERR(ndev->srqs[i])) { |
|---|
| 1160 | + ret = PTR_ERR(ndev->srqs[i]); |
|---|
| 1161 | + goto err_srq; |
|---|
| 1162 | + } |
|---|
| 884 | 1163 | } |
|---|
| 885 | 1164 | |
|---|
| 886 | 1165 | return 0; |
|---|
| 887 | 1166 | |
|---|
| 888 | | -out_free_cmds: |
|---|
| 889 | | - nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false); |
|---|
| 890 | | -out_destroy_srq: |
|---|
| 891 | | - ib_destroy_srq(srq); |
|---|
| 1167 | +err_srq: |
|---|
| 1168 | + while (--i >= 0) |
|---|
| 1169 | + nvmet_rdma_destroy_srq(ndev->srqs[i]); |
|---|
| 1170 | + kfree(ndev->srqs); |
|---|
| 892 | 1171 | return ret; |
|---|
| 893 | 1172 | } |
|---|
| 894 | 1173 | |
|---|
| .. | .. |
|---|
| 901 | 1180 | list_del(&ndev->entry); |
|---|
| 902 | 1181 | mutex_unlock(&device_list_mutex); |
|---|
| 903 | 1182 | |
|---|
| 904 | | - nvmet_rdma_destroy_srq(ndev); |
|---|
| 1183 | + nvmet_rdma_destroy_srqs(ndev); |
|---|
| 905 | 1184 | ib_dealloc_pd(ndev->pd); |
|---|
| 906 | 1185 | |
|---|
| 907 | 1186 | kfree(ndev); |
|---|
| .. | .. |
|---|
| 910 | 1189 | static struct nvmet_rdma_device * |
|---|
| 911 | 1190 | nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) |
|---|
| 912 | 1191 | { |
|---|
| 913 | | - struct nvmet_port *port = cm_id->context; |
|---|
| 1192 | + struct nvmet_rdma_port *port = cm_id->context; |
|---|
| 1193 | + struct nvmet_port *nport = port->nport; |
|---|
| 914 | 1194 | struct nvmet_rdma_device *ndev; |
|---|
| 915 | 1195 | int inline_page_count; |
|---|
| 916 | 1196 | int inline_sge_count; |
|---|
| .. | .. |
|---|
| 927 | 1207 | if (!ndev) |
|---|
| 928 | 1208 | goto out_err; |
|---|
| 929 | 1209 | |
|---|
| 930 | | - inline_page_count = num_pages(port->inline_data_size); |
|---|
| 1210 | + inline_page_count = num_pages(nport->inline_data_size); |
|---|
| 931 | 1211 | inline_sge_count = max(cm_id->device->attrs.max_sge_rd, |
|---|
| 932 | 1212 | cm_id->device->attrs.max_recv_sge) - 1; |
|---|
| 933 | 1213 | if (inline_page_count > inline_sge_count) { |
|---|
| 934 | 1214 | pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", |
|---|
| 935 | | - port->inline_data_size, cm_id->device->name, |
|---|
| 1215 | + nport->inline_data_size, cm_id->device->name, |
|---|
| 936 | 1216 | inline_sge_count * PAGE_SIZE); |
|---|
| 937 | | - port->inline_data_size = inline_sge_count * PAGE_SIZE; |
|---|
| 1217 | + nport->inline_data_size = inline_sge_count * PAGE_SIZE; |
|---|
| 938 | 1218 | inline_page_count = inline_sge_count; |
|---|
| 939 | 1219 | } |
|---|
| 940 | | - ndev->inline_data_size = port->inline_data_size; |
|---|
| 1220 | + ndev->inline_data_size = nport->inline_data_size; |
|---|
| 941 | 1221 | ndev->inline_page_count = inline_page_count; |
|---|
| 1222 | + |
|---|
| 1223 | + if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags & |
|---|
| 1224 | + IB_DEVICE_INTEGRITY_HANDOVER)) { |
|---|
| 1225 | + pr_warn("T10-PI is not supported by device %s. Disabling it\n", |
|---|
| 1226 | + cm_id->device->name); |
|---|
| 1227 | + nport->pi_enable = false; |
|---|
| 1228 | + } |
|---|
| 1229 | + |
|---|
| 942 | 1230 | ndev->device = cm_id->device; |
|---|
| 943 | 1231 | kref_init(&ndev->ref); |
|---|
| 944 | 1232 | |
|---|
| .. | .. |
|---|
| 947 | 1235 | goto out_free_dev; |
|---|
| 948 | 1236 | |
|---|
| 949 | 1237 | if (nvmet_rdma_use_srq) { |
|---|
| 950 | | - ret = nvmet_rdma_init_srq(ndev); |
|---|
| 1238 | + ret = nvmet_rdma_init_srqs(ndev); |
|---|
| 951 | 1239 | if (ret) |
|---|
| 952 | 1240 | goto out_free_pd; |
|---|
| 953 | 1241 | } |
|---|
| .. | .. |
|---|
| 971 | 1259 | { |
|---|
| 972 | 1260 | struct ib_qp_init_attr qp_attr; |
|---|
| 973 | 1261 | struct nvmet_rdma_device *ndev = queue->dev; |
|---|
| 974 | | - int comp_vector, nr_cqe, ret, i; |
|---|
| 975 | | - |
|---|
| 976 | | - /* |
|---|
| 977 | | - * Spread the io queues across completion vectors, |
|---|
| 978 | | - * but still keep all admin queues on vector 0. |
|---|
| 979 | | - */ |
|---|
| 980 | | - comp_vector = !queue->host_qid ? 0 : |
|---|
| 981 | | - queue->idx % ndev->device->num_comp_vectors; |
|---|
| 1262 | + int nr_cqe, ret, i, factor; |
|---|
| 982 | 1263 | |
|---|
| 983 | 1264 | /* |
|---|
| 984 | 1265 | * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND. |
|---|
| 985 | 1266 | */ |
|---|
| 986 | 1267 | nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size; |
|---|
| 987 | 1268 | |
|---|
| 988 | | - queue->cq = ib_alloc_cq(ndev->device, queue, |
|---|
| 989 | | - nr_cqe + 1, comp_vector, |
|---|
| 990 | | - IB_POLL_WORKQUEUE); |
|---|
| 1269 | + queue->cq = ib_cq_pool_get(ndev->device, nr_cqe + 1, |
|---|
| 1270 | + queue->comp_vector, IB_POLL_WORKQUEUE); |
|---|
| 991 | 1271 | if (IS_ERR(queue->cq)) { |
|---|
| 992 | 1272 | ret = PTR_ERR(queue->cq); |
|---|
| 993 | 1273 | pr_err("failed to create CQ cqe= %d ret= %d\n", |
|---|
| .. | .. |
|---|
| 1004 | 1284 | qp_attr.qp_type = IB_QPT_RC; |
|---|
| 1005 | 1285 | /* +1 for drain */ |
|---|
| 1006 | 1286 | qp_attr.cap.max_send_wr = queue->send_queue_size + 1; |
|---|
| 1007 | | - qp_attr.cap.max_rdma_ctxs = queue->send_queue_size; |
|---|
| 1287 | + factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num, |
|---|
| 1288 | + 1 << NVMET_RDMA_MAX_MDTS); |
|---|
| 1289 | + qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor; |
|---|
| 1008 | 1290 | qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, |
|---|
| 1009 | 1291 | ndev->device->attrs.max_send_sge); |
|---|
| 1010 | 1292 | |
|---|
| 1011 | | - if (ndev->srq) { |
|---|
| 1012 | | - qp_attr.srq = ndev->srq; |
|---|
| 1293 | + if (queue->nsrq) { |
|---|
| 1294 | + qp_attr.srq = queue->nsrq->srq; |
|---|
| 1013 | 1295 | } else { |
|---|
| 1014 | 1296 | /* +1 for drain */ |
|---|
| 1015 | 1297 | qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size; |
|---|
| 1016 | 1298 | qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count; |
|---|
| 1017 | 1299 | } |
|---|
| 1300 | + |
|---|
| 1301 | + if (queue->port->pi_enable && queue->host_qid) |
|---|
| 1302 | + qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; |
|---|
| 1018 | 1303 | |
|---|
| 1019 | 1304 | ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr); |
|---|
| 1020 | 1305 | if (ret) { |
|---|
| .. | .. |
|---|
| 1029 | 1314 | __func__, queue->cq->cqe, qp_attr.cap.max_send_sge, |
|---|
| 1030 | 1315 | qp_attr.cap.max_send_wr, queue->cm_id); |
|---|
| 1031 | 1316 | |
|---|
| 1032 | | - if (!ndev->srq) { |
|---|
| 1317 | + if (!queue->nsrq) { |
|---|
| 1033 | 1318 | for (i = 0; i < queue->recv_queue_size; i++) { |
|---|
| 1034 | 1319 | queue->cmds[i].queue = queue; |
|---|
| 1035 | 1320 | ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]); |
|---|
| .. | .. |
|---|
| 1044 | 1329 | err_destroy_qp: |
|---|
| 1045 | 1330 | rdma_destroy_qp(queue->cm_id); |
|---|
| 1046 | 1331 | err_destroy_cq: |
|---|
| 1047 | | - ib_free_cq(queue->cq); |
|---|
| 1332 | + ib_cq_pool_put(queue->cq, nr_cqe + 1); |
|---|
| 1048 | 1333 | goto out; |
|---|
| 1049 | 1334 | } |
|---|
| 1050 | 1335 | |
|---|
| .. | .. |
|---|
| 1054 | 1339 | if (queue->cm_id) |
|---|
| 1055 | 1340 | rdma_destroy_id(queue->cm_id); |
|---|
| 1056 | 1341 | ib_destroy_qp(queue->qp); |
|---|
| 1057 | | - ib_free_cq(queue->cq); |
|---|
| 1342 | + ib_cq_pool_put(queue->cq, queue->recv_queue_size + 2 * |
|---|
| 1343 | + queue->send_queue_size + 1); |
|---|
| 1058 | 1344 | } |
|---|
| 1059 | 1345 | |
|---|
| 1060 | 1346 | static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) |
|---|
| .. | .. |
|---|
| 1064 | 1350 | nvmet_sq_destroy(&queue->nvme_sq); |
|---|
| 1065 | 1351 | |
|---|
| 1066 | 1352 | nvmet_rdma_destroy_queue_ib(queue); |
|---|
| 1067 | | - if (!queue->dev->srq) { |
|---|
| 1353 | + if (!queue->nsrq) { |
|---|
| 1068 | 1354 | nvmet_rdma_free_cmds(queue->dev, queue->cmds, |
|---|
| 1069 | 1355 | queue->recv_queue_size, |
|---|
| 1070 | 1356 | !queue->host_qid); |
|---|
| .. | .. |
|---|
| 1126 | 1412 | rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); |
|---|
| 1127 | 1413 | rej.sts = cpu_to_le16(status); |
|---|
| 1128 | 1414 | |
|---|
| 1129 | | - return rdma_reject(cm_id, (void *)&rej, sizeof(rej)); |
|---|
| 1415 | + return rdma_reject(cm_id, (void *)&rej, sizeof(rej), |
|---|
| 1416 | + IB_CM_REJ_CONSUMER_DEFINED); |
|---|
| 1130 | 1417 | } |
|---|
| 1131 | 1418 | |
|---|
| 1132 | 1419 | static struct nvmet_rdma_queue * |
|---|
| .. | .. |
|---|
| 1134 | 1421 | struct rdma_cm_id *cm_id, |
|---|
| 1135 | 1422 | struct rdma_cm_event *event) |
|---|
| 1136 | 1423 | { |
|---|
| 1424 | + struct nvmet_rdma_port *port = cm_id->context; |
|---|
| 1137 | 1425 | struct nvmet_rdma_queue *queue; |
|---|
| 1138 | 1426 | int ret; |
|---|
| 1139 | 1427 | |
|---|
| .. | .. |
|---|
| 1160 | 1448 | INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work); |
|---|
| 1161 | 1449 | queue->dev = ndev; |
|---|
| 1162 | 1450 | queue->cm_id = cm_id; |
|---|
| 1451 | + queue->port = port->nport; |
|---|
| 1163 | 1452 | |
|---|
| 1164 | 1453 | spin_lock_init(&queue->state_lock); |
|---|
| 1165 | 1454 | queue->state = NVMET_RDMA_Q_CONNECTING; |
|---|
| .. | .. |
|---|
| 1176 | 1465 | goto out_destroy_sq; |
|---|
| 1177 | 1466 | } |
|---|
| 1178 | 1467 | |
|---|
| 1468 | + /* |
|---|
| 1469 | + * Spread the io queues across completion vectors, |
|---|
| 1470 | + * but still keep all admin queues on vector 0. |
|---|
| 1471 | + */ |
|---|
| 1472 | + queue->comp_vector = !queue->host_qid ? 0 : |
|---|
| 1473 | + queue->idx % ndev->device->num_comp_vectors; |
|---|
| 1474 | + |
|---|
| 1475 | + |
|---|
| 1179 | 1476 | ret = nvmet_rdma_alloc_rsps(queue); |
|---|
| 1180 | 1477 | if (ret) { |
|---|
| 1181 | 1478 | ret = NVME_RDMA_CM_NO_RSC; |
|---|
| 1182 | 1479 | goto out_ida_remove; |
|---|
| 1183 | 1480 | } |
|---|
| 1184 | 1481 | |
|---|
| 1185 | | - if (!ndev->srq) { |
|---|
| 1482 | + if (ndev->srqs) { |
|---|
| 1483 | + queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count]; |
|---|
| 1484 | + } else { |
|---|
| 1186 | 1485 | queue->cmds = nvmet_rdma_alloc_cmds(ndev, |
|---|
| 1187 | 1486 | queue->recv_queue_size, |
|---|
| 1188 | 1487 | !queue->host_qid); |
|---|
| .. | .. |
|---|
| 1203 | 1502 | return queue; |
|---|
| 1204 | 1503 | |
|---|
| 1205 | 1504 | out_free_cmds: |
|---|
| 1206 | | - if (!ndev->srq) { |
|---|
| 1505 | + if (!queue->nsrq) { |
|---|
| 1207 | 1506 | nvmet_rdma_free_cmds(queue->dev, queue->cmds, |
|---|
| 1208 | 1507 | queue->recv_queue_size, |
|---|
| 1209 | 1508 | !queue->host_qid); |
|---|
| .. | .. |
|---|
| 1228 | 1527 | switch (event->event) { |
|---|
| 1229 | 1528 | case IB_EVENT_COMM_EST: |
|---|
| 1230 | 1529 | rdma_notify(queue->cm_id, event->event); |
|---|
| 1530 | + break; |
|---|
| 1531 | + case IB_EVENT_QP_LAST_WQE_REACHED: |
|---|
| 1532 | + pr_debug("received last WQE reached event for queue=0x%p\n", |
|---|
| 1533 | + queue); |
|---|
| 1231 | 1534 | break; |
|---|
| 1232 | 1535 | default: |
|---|
| 1233 | 1536 | pr_err("received IB QP event: %s (%d)\n", |
|---|
| .. | .. |
|---|
| 1278 | 1581 | ret = -ENOMEM; |
|---|
| 1279 | 1582 | goto put_device; |
|---|
| 1280 | 1583 | } |
|---|
| 1281 | | - queue->port = cm_id->context; |
|---|
| 1282 | 1584 | |
|---|
| 1283 | 1585 | if (queue->host_qid == 0) { |
|---|
| 1284 | 1586 | /* Let inflight controller teardown complete */ |
|---|
| .. | .. |
|---|
| 1346 | 1648 | spin_lock_irqsave(&queue->state_lock, flags); |
|---|
| 1347 | 1649 | switch (queue->state) { |
|---|
| 1348 | 1650 | case NVMET_RDMA_Q_CONNECTING: |
|---|
| 1651 | + while (!list_empty(&queue->rsp_wait_list)) { |
|---|
| 1652 | + struct nvmet_rdma_rsp *rsp; |
|---|
| 1653 | + |
|---|
| 1654 | + rsp = list_first_entry(&queue->rsp_wait_list, |
|---|
| 1655 | + struct nvmet_rdma_rsp, |
|---|
| 1656 | + wait_list); |
|---|
| 1657 | + list_del(&rsp->wait_list); |
|---|
| 1658 | + nvmet_rdma_put_rsp(rsp); |
|---|
| 1659 | + } |
|---|
| 1660 | + fallthrough; |
|---|
| 1349 | 1661 | case NVMET_RDMA_Q_LIVE: |
|---|
| 1350 | 1662 | queue->state = NVMET_RDMA_Q_DISCONNECTING; |
|---|
| 1351 | 1663 | disconnect = true; |
|---|
| .. | .. |
|---|
| 1408 | 1720 | static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, |
|---|
| 1409 | 1721 | struct nvmet_rdma_queue *queue) |
|---|
| 1410 | 1722 | { |
|---|
| 1411 | | - struct nvmet_port *port; |
|---|
| 1723 | + struct nvmet_rdma_port *port; |
|---|
| 1412 | 1724 | |
|---|
| 1413 | 1725 | if (queue) { |
|---|
| 1414 | 1726 | /* |
|---|
| .. | .. |
|---|
| 1427 | 1739 | * cm_id destroy. use atomic xchg to make sure |
|---|
| 1428 | 1740 | * we don't compete with remove_port. |
|---|
| 1429 | 1741 | */ |
|---|
| 1430 | | - if (xchg(&port->priv, NULL) != cm_id) |
|---|
| 1742 | + if (xchg(&port->cm_id, NULL) != cm_id) |
|---|
| 1431 | 1743 | return 0; |
|---|
| 1432 | 1744 | |
|---|
| 1433 | 1745 | /* |
|---|
| .. | .. |
|---|
| 1458 | 1770 | nvmet_rdma_queue_established(queue); |
|---|
| 1459 | 1771 | break; |
|---|
| 1460 | 1772 | case RDMA_CM_EVENT_ADDR_CHANGE: |
|---|
| 1773 | + if (!queue) { |
|---|
| 1774 | + struct nvmet_rdma_port *port = cm_id->context; |
|---|
| 1775 | + |
|---|
| 1776 | + schedule_delayed_work(&port->repair_work, 0); |
|---|
| 1777 | + break; |
|---|
| 1778 | + } |
|---|
| 1779 | + fallthrough; |
|---|
| 1461 | 1780 | case RDMA_CM_EVENT_DISCONNECTED: |
|---|
| 1462 | 1781 | case RDMA_CM_EVENT_TIMEWAIT_EXIT: |
|---|
| 1463 | 1782 | nvmet_rdma_queue_disconnect(queue); |
|---|
| .. | .. |
|---|
| 1468 | 1787 | case RDMA_CM_EVENT_REJECTED: |
|---|
| 1469 | 1788 | pr_debug("Connection rejected: %s\n", |
|---|
| 1470 | 1789 | rdma_reject_msg(cm_id, event->status)); |
|---|
| 1471 | | - /* FALLTHROUGH */ |
|---|
| 1790 | + fallthrough; |
|---|
| 1472 | 1791 | case RDMA_CM_EVENT_UNREACHABLE: |
|---|
| 1473 | 1792 | case RDMA_CM_EVENT_CONNECT_ERROR: |
|---|
| 1474 | 1793 | nvmet_rdma_queue_connect_fail(cm_id, queue); |
|---|
| .. | .. |
|---|
| 1500 | 1819 | mutex_unlock(&nvmet_rdma_queue_mutex); |
|---|
| 1501 | 1820 | } |
|---|
| 1502 | 1821 | |
|---|
| 1503 | | -static int nvmet_rdma_add_port(struct nvmet_port *port) |
|---|
| 1822 | +static void nvmet_rdma_destroy_port_queues(struct nvmet_rdma_port *port) |
|---|
| 1504 | 1823 | { |
|---|
| 1824 | + struct nvmet_rdma_queue *queue, *tmp; |
|---|
| 1825 | + struct nvmet_port *nport = port->nport; |
|---|
| 1826 | + |
|---|
| 1827 | + mutex_lock(&nvmet_rdma_queue_mutex); |
|---|
| 1828 | + list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list, |
|---|
| 1829 | + queue_list) { |
|---|
| 1830 | + if (queue->port != nport) |
|---|
| 1831 | + continue; |
|---|
| 1832 | + |
|---|
| 1833 | + list_del_init(&queue->queue_list); |
|---|
| 1834 | + __nvmet_rdma_queue_disconnect(queue); |
|---|
| 1835 | + } |
|---|
| 1836 | + mutex_unlock(&nvmet_rdma_queue_mutex); |
|---|
| 1837 | +} |
|---|
| 1838 | + |
|---|
| 1839 | +static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) |
|---|
| 1840 | +{ |
|---|
| 1841 | + struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); |
|---|
| 1842 | + |
|---|
| 1843 | + if (cm_id) |
|---|
| 1844 | + rdma_destroy_id(cm_id); |
|---|
| 1845 | + |
|---|
| 1846 | + /* |
|---|
| 1847 | + * Destroy the remaining queues, which are not belong to any |
|---|
| 1848 | + * controller yet. Do it here after the RDMA-CM was destroyed |
|---|
| 1849 | + * guarantees that no new queue will be created. |
|---|
| 1850 | + */ |
|---|
| 1851 | + nvmet_rdma_destroy_port_queues(port); |
|---|
| 1852 | +} |
|---|
| 1853 | + |
|---|
| 1854 | +static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) |
|---|
| 1855 | +{ |
|---|
| 1856 | + struct sockaddr *addr = (struct sockaddr *)&port->addr; |
|---|
| 1505 | 1857 | struct rdma_cm_id *cm_id; |
|---|
| 1506 | | - struct sockaddr_storage addr = { }; |
|---|
| 1507 | | - __kernel_sa_family_t af; |
|---|
| 1508 | 1858 | int ret; |
|---|
| 1509 | | - |
|---|
| 1510 | | - switch (port->disc_addr.adrfam) { |
|---|
| 1511 | | - case NVMF_ADDR_FAMILY_IP4: |
|---|
| 1512 | | - af = AF_INET; |
|---|
| 1513 | | - break; |
|---|
| 1514 | | - case NVMF_ADDR_FAMILY_IP6: |
|---|
| 1515 | | - af = AF_INET6; |
|---|
| 1516 | | - break; |
|---|
| 1517 | | - default: |
|---|
| 1518 | | - pr_err("address family %d not supported\n", |
|---|
| 1519 | | - port->disc_addr.adrfam); |
|---|
| 1520 | | - return -EINVAL; |
|---|
| 1521 | | - } |
|---|
| 1522 | | - |
|---|
| 1523 | | - if (port->inline_data_size < 0) { |
|---|
| 1524 | | - port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; |
|---|
| 1525 | | - } else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { |
|---|
| 1526 | | - pr_warn("inline_data_size %u is too large, reducing to %u\n", |
|---|
| 1527 | | - port->inline_data_size, |
|---|
| 1528 | | - NVMET_RDMA_MAX_INLINE_DATA_SIZE); |
|---|
| 1529 | | - port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; |
|---|
| 1530 | | - } |
|---|
| 1531 | | - |
|---|
| 1532 | | - ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, |
|---|
| 1533 | | - port->disc_addr.trsvcid, &addr); |
|---|
| 1534 | | - if (ret) { |
|---|
| 1535 | | - pr_err("malformed ip/port passed: %s:%s\n", |
|---|
| 1536 | | - port->disc_addr.traddr, port->disc_addr.trsvcid); |
|---|
| 1537 | | - return ret; |
|---|
| 1538 | | - } |
|---|
| 1539 | 1859 | |
|---|
| 1540 | 1860 | cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, |
|---|
| 1541 | 1861 | RDMA_PS_TCP, IB_QPT_RC); |
|---|
| .. | .. |
|---|
| 1554 | 1874 | goto out_destroy_id; |
|---|
| 1555 | 1875 | } |
|---|
| 1556 | 1876 | |
|---|
| 1557 | | - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); |
|---|
| 1877 | + ret = rdma_bind_addr(cm_id, addr); |
|---|
| 1558 | 1878 | if (ret) { |
|---|
| 1559 | | - pr_err("binding CM ID to %pISpcs failed (%d)\n", |
|---|
| 1560 | | - (struct sockaddr *)&addr, ret); |
|---|
| 1879 | + pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret); |
|---|
| 1561 | 1880 | goto out_destroy_id; |
|---|
| 1562 | 1881 | } |
|---|
| 1563 | 1882 | |
|---|
| 1564 | 1883 | ret = rdma_listen(cm_id, 128); |
|---|
| 1565 | 1884 | if (ret) { |
|---|
| 1566 | | - pr_err("listening to %pISpcs failed (%d)\n", |
|---|
| 1567 | | - (struct sockaddr *)&addr, ret); |
|---|
| 1885 | + pr_err("listening to %pISpcs failed (%d)\n", addr, ret); |
|---|
| 1568 | 1886 | goto out_destroy_id; |
|---|
| 1569 | 1887 | } |
|---|
| 1570 | 1888 | |
|---|
| 1571 | | - pr_info("enabling port %d (%pISpcs)\n", |
|---|
| 1572 | | - le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); |
|---|
| 1573 | | - port->priv = cm_id; |
|---|
| 1889 | + port->cm_id = cm_id; |
|---|
| 1574 | 1890 | return 0; |
|---|
| 1575 | 1891 | |
|---|
| 1576 | 1892 | out_destroy_id: |
|---|
| .. | .. |
|---|
| 1578 | 1894 | return ret; |
|---|
| 1579 | 1895 | } |
|---|
| 1580 | 1896 | |
|---|
| 1581 | | -static void nvmet_rdma_remove_port(struct nvmet_port *port) |
|---|
| 1897 | +static void nvmet_rdma_repair_port_work(struct work_struct *w) |
|---|
| 1582 | 1898 | { |
|---|
| 1583 | | - struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); |
|---|
| 1899 | + struct nvmet_rdma_port *port = container_of(to_delayed_work(w), |
|---|
| 1900 | + struct nvmet_rdma_port, repair_work); |
|---|
| 1901 | + int ret; |
|---|
| 1584 | 1902 | |
|---|
| 1585 | | - if (cm_id) |
|---|
| 1586 | | - rdma_destroy_id(cm_id); |
|---|
| 1903 | + nvmet_rdma_disable_port(port); |
|---|
| 1904 | + ret = nvmet_rdma_enable_port(port); |
|---|
| 1905 | + if (ret) |
|---|
| 1906 | + schedule_delayed_work(&port->repair_work, 5 * HZ); |
|---|
| 1907 | +} |
|---|
| 1908 | + |
|---|
| 1909 | +static int nvmet_rdma_add_port(struct nvmet_port *nport) |
|---|
| 1910 | +{ |
|---|
| 1911 | + struct nvmet_rdma_port *port; |
|---|
| 1912 | + __kernel_sa_family_t af; |
|---|
| 1913 | + int ret; |
|---|
| 1914 | + |
|---|
| 1915 | + port = kzalloc(sizeof(*port), GFP_KERNEL); |
|---|
| 1916 | + if (!port) |
|---|
| 1917 | + return -ENOMEM; |
|---|
| 1918 | + |
|---|
| 1919 | + nport->priv = port; |
|---|
| 1920 | + port->nport = nport; |
|---|
| 1921 | + INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work); |
|---|
| 1922 | + |
|---|
| 1923 | + switch (nport->disc_addr.adrfam) { |
|---|
| 1924 | + case NVMF_ADDR_FAMILY_IP4: |
|---|
| 1925 | + af = AF_INET; |
|---|
| 1926 | + break; |
|---|
| 1927 | + case NVMF_ADDR_FAMILY_IP6: |
|---|
| 1928 | + af = AF_INET6; |
|---|
| 1929 | + break; |
|---|
| 1930 | + default: |
|---|
| 1931 | + pr_err("address family %d not supported\n", |
|---|
| 1932 | + nport->disc_addr.adrfam); |
|---|
| 1933 | + ret = -EINVAL; |
|---|
| 1934 | + goto out_free_port; |
|---|
| 1935 | + } |
|---|
| 1936 | + |
|---|
| 1937 | + if (nport->inline_data_size < 0) { |
|---|
| 1938 | + nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; |
|---|
| 1939 | + } else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { |
|---|
| 1940 | + pr_warn("inline_data_size %u is too large, reducing to %u\n", |
|---|
| 1941 | + nport->inline_data_size, |
|---|
| 1942 | + NVMET_RDMA_MAX_INLINE_DATA_SIZE); |
|---|
| 1943 | + nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; |
|---|
| 1944 | + } |
|---|
| 1945 | + |
|---|
| 1946 | + ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, |
|---|
| 1947 | + nport->disc_addr.trsvcid, &port->addr); |
|---|
| 1948 | + if (ret) { |
|---|
| 1949 | + pr_err("malformed ip/port passed: %s:%s\n", |
|---|
| 1950 | + nport->disc_addr.traddr, nport->disc_addr.trsvcid); |
|---|
| 1951 | + goto out_free_port; |
|---|
| 1952 | + } |
|---|
| 1953 | + |
|---|
| 1954 | + ret = nvmet_rdma_enable_port(port); |
|---|
| 1955 | + if (ret) |
|---|
| 1956 | + goto out_free_port; |
|---|
| 1957 | + |
|---|
| 1958 | + pr_info("enabling port %d (%pISpcs)\n", |
|---|
| 1959 | + le16_to_cpu(nport->disc_addr.portid), |
|---|
| 1960 | + (struct sockaddr *)&port->addr); |
|---|
| 1961 | + |
|---|
| 1962 | + return 0; |
|---|
| 1963 | + |
|---|
| 1964 | +out_free_port: |
|---|
| 1965 | + kfree(port); |
|---|
| 1966 | + return ret; |
|---|
| 1967 | +} |
|---|
| 1968 | + |
|---|
| 1969 | +static void nvmet_rdma_remove_port(struct nvmet_port *nport) |
|---|
| 1970 | +{ |
|---|
| 1971 | + struct nvmet_rdma_port *port = nport->priv; |
|---|
| 1972 | + |
|---|
| 1973 | + cancel_delayed_work_sync(&port->repair_work); |
|---|
| 1974 | + nvmet_rdma_disable_port(port); |
|---|
| 1975 | + kfree(port); |
|---|
| 1587 | 1976 | } |
|---|
| 1588 | 1977 | |
|---|
| 1589 | 1978 | static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, |
|---|
| 1590 | | - struct nvmet_port *port, char *traddr) |
|---|
| 1979 | + struct nvmet_port *nport, char *traddr) |
|---|
| 1591 | 1980 | { |
|---|
| 1592 | | - struct rdma_cm_id *cm_id = port->priv; |
|---|
| 1981 | + struct nvmet_rdma_port *port = nport->priv; |
|---|
| 1982 | + struct rdma_cm_id *cm_id = port->cm_id; |
|---|
| 1593 | 1983 | |
|---|
| 1594 | 1984 | if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { |
|---|
| 1595 | 1985 | struct nvmet_rdma_rsp *rsp = |
|---|
| .. | .. |
|---|
| 1599 | 1989 | |
|---|
| 1600 | 1990 | sprintf(traddr, "%pISc", addr); |
|---|
| 1601 | 1991 | } else { |
|---|
| 1602 | | - memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); |
|---|
| 1992 | + memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); |
|---|
| 1603 | 1993 | } |
|---|
| 1994 | +} |
|---|
| 1995 | + |
|---|
| 1996 | +static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) |
|---|
| 1997 | +{ |
|---|
| 1998 | + if (ctrl->pi_support) |
|---|
| 1999 | + return NVMET_RDMA_MAX_METADATA_MDTS; |
|---|
| 2000 | + return NVMET_RDMA_MAX_MDTS; |
|---|
| 1604 | 2001 | } |
|---|
| 1605 | 2002 | |
|---|
| 1606 | 2003 | static const struct nvmet_fabrics_ops nvmet_rdma_ops = { |
|---|
| 1607 | 2004 | .owner = THIS_MODULE, |
|---|
| 1608 | 2005 | .type = NVMF_TRTYPE_RDMA, |
|---|
| 1609 | 2006 | .msdbd = 1, |
|---|
| 1610 | | - .has_keyed_sgls = 1, |
|---|
| 2007 | + .flags = NVMF_KEYED_SGLS | NVMF_METADATA_SUPPORTED, |
|---|
| 1611 | 2008 | .add_port = nvmet_rdma_add_port, |
|---|
| 1612 | 2009 | .remove_port = nvmet_rdma_remove_port, |
|---|
| 1613 | 2010 | .queue_response = nvmet_rdma_queue_response, |
|---|
| 1614 | 2011 | .delete_ctrl = nvmet_rdma_delete_ctrl, |
|---|
| 1615 | 2012 | .disc_traddr = nvmet_rdma_disc_port_addr, |
|---|
| 2013 | + .get_mdts = nvmet_rdma_get_mdts, |
|---|
| 1616 | 2014 | }; |
|---|
| 1617 | 2015 | |
|---|
| 1618 | 2016 | static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) |
|---|