| .. | .. |
|---|
| 1 | 1 | /* |
|---|
| 2 | | - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
|---|
| 2 | + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. |
|---|
| 3 | 3 | * |
|---|
| 4 | 4 | * This software is available to you under a choice of one of two |
|---|
| 5 | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
|---|
| .. | .. |
|---|
| 39 | 39 | #include "rds_single_path.h" |
|---|
| 40 | 40 | #include "rds.h" |
|---|
| 41 | 41 | #include "ib.h" |
|---|
| 42 | +#include "ib_mr.h" |
|---|
| 42 | 43 | |
|---|
| 43 | 44 | /* |
|---|
| 44 | 45 | * Convert IB-specific error message to RDS error message and call core |
|---|
| .. | .. |
|---|
| 67 | 68 | break; |
|---|
| 68 | 69 | } |
|---|
| 69 | 70 | complete(rm, notify_status); |
|---|
| 71 | +} |
|---|
| 72 | + |
|---|
| 73 | +static void rds_ib_send_unmap_data(struct rds_ib_connection *ic, |
|---|
| 74 | + struct rm_data_op *op, |
|---|
| 75 | + int wc_status) |
|---|
| 76 | +{ |
|---|
| 77 | + if (op->op_nents) |
|---|
| 78 | + ib_dma_unmap_sg(ic->i_cm_id->device, |
|---|
| 79 | + op->op_sg, op->op_nents, |
|---|
| 80 | + DMA_TO_DEVICE); |
|---|
| 70 | 81 | } |
|---|
| 71 | 82 | |
|---|
| 72 | 83 | static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, |
|---|
| .. | .. |
|---|
| 127 | 138 | rds_ib_stats_inc(s_ib_atomic_cswp); |
|---|
| 128 | 139 | else |
|---|
| 129 | 140 | rds_ib_stats_inc(s_ib_atomic_fadd); |
|---|
| 130 | | -} |
|---|
| 131 | | - |
|---|
| 132 | | -static void rds_ib_send_unmap_data(struct rds_ib_connection *ic, |
|---|
| 133 | | - struct rm_data_op *op, |
|---|
| 134 | | - int wc_status) |
|---|
| 135 | | -{ |
|---|
| 136 | | - struct rds_message *rm = container_of(op, struct rds_message, data); |
|---|
| 137 | | - |
|---|
| 138 | | - if (op->op_nents) |
|---|
| 139 | | - ib_dma_unmap_sg(ic->i_cm_id->device, |
|---|
| 140 | | - op->op_sg, op->op_nents, |
|---|
| 141 | | - DMA_TO_DEVICE); |
|---|
| 142 | | - |
|---|
| 143 | | - if (rm->rdma.op_active && rm->data.op_notify) |
|---|
| 144 | | - rds_ib_send_unmap_rdma(ic, &rm->rdma, wc_status); |
|---|
| 145 | 141 | } |
|---|
| 146 | 142 | |
|---|
| 147 | 143 | /* |
|---|
| .. | .. |
|---|
| 206 | 202 | send->s_wr.ex.imm_data = 0; |
|---|
| 207 | 203 | |
|---|
| 208 | 204 | sge = &send->s_sge[0]; |
|---|
| 209 | | - sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); |
|---|
| 205 | + sge->addr = ic->i_send_hdrs_dma[i]; |
|---|
| 206 | + |
|---|
| 210 | 207 | sge->length = sizeof(struct rds_header); |
|---|
| 211 | 208 | sge->lkey = ic->i_pd->local_dma_lkey; |
|---|
| 212 | 209 | |
|---|
| .. | .. |
|---|
| 305 | 302 | |
|---|
| 306 | 303 | /* We expect errors as the qp is drained during shutdown */ |
|---|
| 307 | 304 | if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) { |
|---|
| 308 | | - rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", |
|---|
| 309 | | - &conn->c_laddr, &conn->c_faddr, wc->status, |
|---|
| 310 | | - ib_wc_status_msg(wc->status)); |
|---|
| 305 | + rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n", |
|---|
| 306 | + &conn->c_laddr, &conn->c_faddr, |
|---|
| 307 | + conn->c_tos, wc->status, |
|---|
| 308 | + ib_wc_status_msg(wc->status), wc->vendor_err); |
|---|
| 311 | 309 | } |
|---|
| 312 | 310 | } |
|---|
| 313 | 311 | |
|---|
| .. | .. |
|---|
| 522 | 520 | if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) |
|---|
| 523 | 521 | i = 1; |
|---|
| 524 | 522 | else |
|---|
| 525 | | - i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); |
|---|
| 523 | + i = DIV_ROUND_UP(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); |
|---|
| 526 | 524 | |
|---|
| 527 | 525 | work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); |
|---|
| 528 | 526 | if (work_alloc == 0) { |
|---|
| .. | .. |
|---|
| 635 | 633 | send->s_queued = jiffies; |
|---|
| 636 | 634 | send->s_op = NULL; |
|---|
| 637 | 635 | |
|---|
| 638 | | - send->s_sge[0].addr = ic->i_send_hdrs_dma |
|---|
| 639 | | - + (pos * sizeof(struct rds_header)); |
|---|
| 640 | | - send->s_sge[0].length = sizeof(struct rds_header); |
|---|
| 636 | + send->s_sge[0].addr = ic->i_send_hdrs_dma[pos]; |
|---|
| 641 | 637 | |
|---|
| 642 | | - memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); |
|---|
| 638 | + send->s_sge[0].length = sizeof(struct rds_header); |
|---|
| 639 | + send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; |
|---|
| 640 | + |
|---|
| 641 | + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, |
|---|
| 642 | + ic->i_send_hdrs_dma[pos], |
|---|
| 643 | + sizeof(struct rds_header), |
|---|
| 644 | + DMA_TO_DEVICE); |
|---|
| 645 | + memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, |
|---|
| 646 | + sizeof(struct rds_header)); |
|---|
| 647 | + |
|---|
| 643 | 648 | |
|---|
| 644 | 649 | /* Set up the data, if present */ |
|---|
| 645 | 650 | if (i < work_alloc |
|---|
| 646 | 651 | && scat != &rm->data.op_sg[rm->data.op_count]) { |
|---|
| 647 | 652 | len = min(RDS_FRAG_SIZE, |
|---|
| 648 | | - ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff); |
|---|
| 653 | + sg_dma_len(scat) - rm->data.op_dmaoff); |
|---|
| 649 | 654 | send->s_wr.num_sge = 2; |
|---|
| 650 | 655 | |
|---|
| 651 | | - send->s_sge[1].addr = ib_sg_dma_address(dev, scat); |
|---|
| 656 | + send->s_sge[1].addr = sg_dma_address(scat); |
|---|
| 652 | 657 | send->s_sge[1].addr += rm->data.op_dmaoff; |
|---|
| 653 | 658 | send->s_sge[1].length = len; |
|---|
| 659 | + send->s_sge[1].lkey = ic->i_pd->local_dma_lkey; |
|---|
| 654 | 660 | |
|---|
| 655 | 661 | bytes_sent += len; |
|---|
| 656 | 662 | rm->data.op_dmaoff += len; |
|---|
| 657 | | - if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) { |
|---|
| 663 | + if (rm->data.op_dmaoff == sg_dma_len(scat)) { |
|---|
| 658 | 664 | scat++; |
|---|
| 659 | 665 | rm->data.op_dmasg++; |
|---|
| 660 | 666 | rm->data.op_dmaoff = 0; |
|---|
| .. | .. |
|---|
| 678 | 684 | &send->s_wr, send->s_wr.num_sge, send->s_wr.next); |
|---|
| 679 | 685 | |
|---|
| 680 | 686 | if (ic->i_flowctl && adv_credits) { |
|---|
| 681 | | - struct rds_header *hdr = &ic->i_send_hdrs[pos]; |
|---|
| 687 | + struct rds_header *hdr = ic->i_send_hdrs[pos]; |
|---|
| 682 | 688 | |
|---|
| 683 | 689 | /* add credit and redo the header checksum */ |
|---|
| 684 | 690 | hdr->h_credit = adv_credits; |
|---|
| .. | .. |
|---|
| 686 | 692 | adv_credits = 0; |
|---|
| 687 | 693 | rds_ib_stats_inc(s_ib_tx_credit_updates); |
|---|
| 688 | 694 | } |
|---|
| 695 | + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, |
|---|
| 696 | + ic->i_send_hdrs_dma[pos], |
|---|
| 697 | + sizeof(struct rds_header), |
|---|
| 698 | + DMA_TO_DEVICE); |
|---|
| 689 | 699 | |
|---|
| 690 | 700 | if (prev) |
|---|
| 691 | 701 | prev->s_wr.next = &send->s_wr; |
|---|
| .. | .. |
|---|
| 808 | 818 | } |
|---|
| 809 | 819 | |
|---|
| 810 | 820 | /* Convert our struct scatterlist to struct ib_sge */ |
|---|
| 811 | | - send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg); |
|---|
| 812 | | - send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg); |
|---|
| 821 | + send->s_sge[0].addr = sg_dma_address(op->op_sg); |
|---|
| 822 | + send->s_sge[0].length = sg_dma_len(op->op_sg); |
|---|
| 813 | 823 | send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; |
|---|
| 814 | 824 | |
|---|
| 815 | 825 | rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr, |
|---|
| .. | .. |
|---|
| 859 | 869 | int ret; |
|---|
| 860 | 870 | int num_sge; |
|---|
| 861 | 871 | int nr_sig = 0; |
|---|
| 872 | + u64 odp_addr = op->op_odp_addr; |
|---|
| 873 | + u32 odp_lkey = 0; |
|---|
| 862 | 874 | |
|---|
| 863 | 875 | /* map the op the first time we see it */ |
|---|
| 864 | | - if (!op->op_mapped) { |
|---|
| 865 | | - op->op_count = ib_dma_map_sg(ic->i_cm_id->device, |
|---|
| 866 | | - op->op_sg, op->op_nents, (op->op_write) ? |
|---|
| 867 | | - DMA_TO_DEVICE : DMA_FROM_DEVICE); |
|---|
| 868 | | - rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count); |
|---|
| 869 | | - if (op->op_count == 0) { |
|---|
| 870 | | - rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); |
|---|
| 871 | | - ret = -ENOMEM; /* XXX ? */ |
|---|
| 872 | | - goto out; |
|---|
| 876 | + if (!op->op_odp_mr) { |
|---|
| 877 | + if (!op->op_mapped) { |
|---|
| 878 | + op->op_count = |
|---|
| 879 | + ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, |
|---|
| 880 | + op->op_nents, |
|---|
| 881 | + (op->op_write) ? DMA_TO_DEVICE : |
|---|
| 882 | + DMA_FROM_DEVICE); |
|---|
| 883 | + rdsdebug("ic %p mapping op %p: %d\n", ic, op, |
|---|
| 884 | + op->op_count); |
|---|
| 885 | + if (op->op_count == 0) { |
|---|
| 886 | + rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); |
|---|
| 887 | + ret = -ENOMEM; /* XXX ? */ |
|---|
| 888 | + goto out; |
|---|
| 889 | + } |
|---|
| 890 | + op->op_mapped = 1; |
|---|
| 873 | 891 | } |
|---|
| 874 | | - |
|---|
| 875 | | - op->op_mapped = 1; |
|---|
| 892 | + } else { |
|---|
| 893 | + op->op_count = op->op_nents; |
|---|
| 894 | + odp_lkey = rds_ib_get_lkey(op->op_odp_mr->r_trans_private); |
|---|
| 876 | 895 | } |
|---|
| 877 | 896 | |
|---|
| 878 | 897 | /* |
|---|
| 879 | 898 | * Instead of knowing how to return a partial rdma read/write we insist that there |
|---|
| 880 | 899 | * be enough work requests to send the entire message. |
|---|
| 881 | 900 | */ |
|---|
| 882 | | - i = ceil(op->op_count, max_sge); |
|---|
| 901 | + i = DIV_ROUND_UP(op->op_count, max_sge); |
|---|
| 883 | 902 | |
|---|
| 884 | 903 | work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); |
|---|
| 885 | 904 | if (work_alloc != i) { |
|---|
| .. | .. |
|---|
| 901 | 920 | send->s_queued = jiffies; |
|---|
| 902 | 921 | send->s_op = NULL; |
|---|
| 903 | 922 | |
|---|
| 904 | | - nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify); |
|---|
| 923 | + if (!op->op_notify) |
|---|
| 924 | + nr_sig += rds_ib_set_wr_signal_state(ic, send, |
|---|
| 925 | + op->op_notify); |
|---|
| 905 | 926 | |
|---|
| 906 | 927 | send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; |
|---|
| 907 | 928 | send->s_rdma_wr.remote_addr = remote_addr; |
|---|
| .. | .. |
|---|
| 921 | 942 | |
|---|
| 922 | 943 | for (j = 0; j < send->s_rdma_wr.wr.num_sge && |
|---|
| 923 | 944 | scat != &op->op_sg[op->op_count]; j++) { |
|---|
| 924 | | - len = ib_sg_dma_len(ic->i_cm_id->device, scat); |
|---|
| 925 | | - send->s_sge[j].addr = |
|---|
| 926 | | - ib_sg_dma_address(ic->i_cm_id->device, scat); |
|---|
| 945 | + len = sg_dma_len(scat); |
|---|
| 946 | + if (!op->op_odp_mr) { |
|---|
| 947 | + send->s_sge[j].addr = sg_dma_address(scat); |
|---|
| 948 | + send->s_sge[j].lkey = ic->i_pd->local_dma_lkey; |
|---|
| 949 | + } else { |
|---|
| 950 | + send->s_sge[j].addr = odp_addr; |
|---|
| 951 | + send->s_sge[j].lkey = odp_lkey; |
|---|
| 952 | + } |
|---|
| 927 | 953 | send->s_sge[j].length = len; |
|---|
| 928 | | - send->s_sge[j].lkey = ic->i_pd->local_dma_lkey; |
|---|
| 929 | 954 | |
|---|
| 930 | 955 | sent += len; |
|---|
| 931 | 956 | rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr); |
|---|
| 932 | 957 | |
|---|
| 933 | 958 | remote_addr += len; |
|---|
| 959 | + odp_addr += len; |
|---|
| 934 | 960 | scat++; |
|---|
| 935 | 961 | } |
|---|
| 936 | 962 | |
|---|