.. | .. |
---|
1 | 1 | /* |
---|
2 | | - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
---|
| 2 | + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. |
---|
3 | 3 | * |
---|
4 | 4 | * This software is available to you under a choice of one of two |
---|
5 | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
---|
.. | .. |
---|
39 | 39 | #include "rds_single_path.h" |
---|
40 | 40 | #include "rds.h" |
---|
41 | 41 | #include "ib.h" |
---|
| 42 | +#include "ib_mr.h" |
---|
42 | 43 | |
---|
43 | 44 | /* |
---|
44 | 45 | * Convert IB-specific error message to RDS error message and call core |
---|
.. | .. |
---|
67 | 68 | break; |
---|
68 | 69 | } |
---|
69 | 70 | complete(rm, notify_status); |
---|
| 71 | +} |
---|
| 72 | + |
---|
| 73 | +static void rds_ib_send_unmap_data(struct rds_ib_connection *ic, |
---|
| 74 | + struct rm_data_op *op, |
---|
| 75 | + int wc_status) |
---|
| 76 | +{ |
---|
| 77 | + if (op->op_nents) |
---|
| 78 | + ib_dma_unmap_sg(ic->i_cm_id->device, |
---|
| 79 | + op->op_sg, op->op_nents, |
---|
| 80 | + DMA_TO_DEVICE); |
---|
70 | 81 | } |
---|
71 | 82 | |
---|
72 | 83 | static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, |
---|
.. | .. |
---|
127 | 138 | rds_ib_stats_inc(s_ib_atomic_cswp); |
---|
128 | 139 | else |
---|
129 | 140 | rds_ib_stats_inc(s_ib_atomic_fadd); |
---|
130 | | -} |
---|
131 | | - |
---|
132 | | -static void rds_ib_send_unmap_data(struct rds_ib_connection *ic, |
---|
133 | | - struct rm_data_op *op, |
---|
134 | | - int wc_status) |
---|
135 | | -{ |
---|
136 | | - struct rds_message *rm = container_of(op, struct rds_message, data); |
---|
137 | | - |
---|
138 | | - if (op->op_nents) |
---|
139 | | - ib_dma_unmap_sg(ic->i_cm_id->device, |
---|
140 | | - op->op_sg, op->op_nents, |
---|
141 | | - DMA_TO_DEVICE); |
---|
142 | | - |
---|
143 | | - if (rm->rdma.op_active && rm->data.op_notify) |
---|
144 | | - rds_ib_send_unmap_rdma(ic, &rm->rdma, wc_status); |
---|
145 | 141 | } |
---|
146 | 142 | |
---|
147 | 143 | /* |
---|
.. | .. |
---|
206 | 202 | send->s_wr.ex.imm_data = 0; |
---|
207 | 203 | |
---|
208 | 204 | sge = &send->s_sge[0]; |
---|
209 | | - sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); |
---|
| 205 | + sge->addr = ic->i_send_hdrs_dma[i]; |
---|
| 206 | + |
---|
210 | 207 | sge->length = sizeof(struct rds_header); |
---|
211 | 208 | sge->lkey = ic->i_pd->local_dma_lkey; |
---|
212 | 209 | |
---|
.. | .. |
---|
305 | 302 | |
---|
306 | 303 | /* We expect errors as the qp is drained during shutdown */ |
---|
307 | 304 | if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) { |
---|
308 | | - rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", |
---|
309 | | - &conn->c_laddr, &conn->c_faddr, wc->status, |
---|
310 | | - ib_wc_status_msg(wc->status)); |
---|
| 305 | + rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n", |
---|
| 306 | + &conn->c_laddr, &conn->c_faddr, |
---|
| 307 | + conn->c_tos, wc->status, |
---|
| 308 | + ib_wc_status_msg(wc->status), wc->vendor_err); |
---|
311 | 309 | } |
---|
312 | 310 | } |
---|
313 | 311 | |
---|
.. | .. |
---|
522 | 520 | if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) |
---|
523 | 521 | i = 1; |
---|
524 | 522 | else |
---|
525 | | - i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); |
---|
| 523 | + i = DIV_ROUND_UP(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); |
---|
526 | 524 | |
---|
527 | 525 | work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); |
---|
528 | 526 | if (work_alloc == 0) { |
---|
.. | .. |
---|
635 | 633 | send->s_queued = jiffies; |
---|
636 | 634 | send->s_op = NULL; |
---|
637 | 635 | |
---|
638 | | - send->s_sge[0].addr = ic->i_send_hdrs_dma |
---|
639 | | - + (pos * sizeof(struct rds_header)); |
---|
640 | | - send->s_sge[0].length = sizeof(struct rds_header); |
---|
| 636 | + send->s_sge[0].addr = ic->i_send_hdrs_dma[pos]; |
---|
641 | 637 | |
---|
642 | | - memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); |
---|
| 638 | + send->s_sge[0].length = sizeof(struct rds_header); |
---|
| 639 | + send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; |
---|
| 640 | + |
---|
| 641 | + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, |
---|
| 642 | + ic->i_send_hdrs_dma[pos], |
---|
| 643 | + sizeof(struct rds_header), |
---|
| 644 | + DMA_TO_DEVICE); |
---|
| 645 | + memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, |
---|
| 646 | + sizeof(struct rds_header)); |
---|
| 647 | + |
---|
643 | 648 | |
---|
644 | 649 | /* Set up the data, if present */ |
---|
645 | 650 | if (i < work_alloc |
---|
646 | 651 | && scat != &rm->data.op_sg[rm->data.op_count]) { |
---|
647 | 652 | len = min(RDS_FRAG_SIZE, |
---|
648 | | - ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff); |
---|
| 653 | + sg_dma_len(scat) - rm->data.op_dmaoff); |
---|
649 | 654 | send->s_wr.num_sge = 2; |
---|
650 | 655 | |
---|
651 | | - send->s_sge[1].addr = ib_sg_dma_address(dev, scat); |
---|
| 656 | + send->s_sge[1].addr = sg_dma_address(scat); |
---|
652 | 657 | send->s_sge[1].addr += rm->data.op_dmaoff; |
---|
653 | 658 | send->s_sge[1].length = len; |
---|
| 659 | + send->s_sge[1].lkey = ic->i_pd->local_dma_lkey; |
---|
654 | 660 | |
---|
655 | 661 | bytes_sent += len; |
---|
656 | 662 | rm->data.op_dmaoff += len; |
---|
657 | | - if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) { |
---|
| 663 | + if (rm->data.op_dmaoff == sg_dma_len(scat)) { |
---|
658 | 664 | scat++; |
---|
659 | 665 | rm->data.op_dmasg++; |
---|
660 | 666 | rm->data.op_dmaoff = 0; |
---|
.. | .. |
---|
678 | 684 | &send->s_wr, send->s_wr.num_sge, send->s_wr.next); |
---|
679 | 685 | |
---|
680 | 686 | if (ic->i_flowctl && adv_credits) { |
---|
681 | | - struct rds_header *hdr = &ic->i_send_hdrs[pos]; |
---|
| 687 | + struct rds_header *hdr = ic->i_send_hdrs[pos]; |
---|
682 | 688 | |
---|
683 | 689 | /* add credit and redo the header checksum */ |
---|
684 | 690 | hdr->h_credit = adv_credits; |
---|
.. | .. |
---|
686 | 692 | adv_credits = 0; |
---|
687 | 693 | rds_ib_stats_inc(s_ib_tx_credit_updates); |
---|
688 | 694 | } |
---|
| 695 | + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, |
---|
| 696 | + ic->i_send_hdrs_dma[pos], |
---|
| 697 | + sizeof(struct rds_header), |
---|
| 698 | + DMA_TO_DEVICE); |
---|
689 | 699 | |
---|
690 | 700 | if (prev) |
---|
691 | 701 | prev->s_wr.next = &send->s_wr; |
---|
.. | .. |
---|
808 | 818 | } |
---|
809 | 819 | |
---|
810 | 820 | /* Convert our struct scatterlist to struct ib_sge */ |
---|
811 | | - send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg); |
---|
812 | | - send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg); |
---|
| 821 | + send->s_sge[0].addr = sg_dma_address(op->op_sg); |
---|
| 822 | + send->s_sge[0].length = sg_dma_len(op->op_sg); |
---|
813 | 823 | send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; |
---|
814 | 824 | |
---|
815 | 825 | rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr, |
---|
.. | .. |
---|
859 | 869 | int ret; |
---|
860 | 870 | int num_sge; |
---|
861 | 871 | int nr_sig = 0; |
---|
| 872 | + u64 odp_addr = op->op_odp_addr; |
---|
| 873 | + u32 odp_lkey = 0; |
---|
862 | 874 | |
---|
863 | 875 | /* map the op the first time we see it */ |
---|
864 | | - if (!op->op_mapped) { |
---|
865 | | - op->op_count = ib_dma_map_sg(ic->i_cm_id->device, |
---|
866 | | - op->op_sg, op->op_nents, (op->op_write) ? |
---|
867 | | - DMA_TO_DEVICE : DMA_FROM_DEVICE); |
---|
868 | | - rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count); |
---|
869 | | - if (op->op_count == 0) { |
---|
870 | | - rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); |
---|
871 | | - ret = -ENOMEM; /* XXX ? */ |
---|
872 | | - goto out; |
---|
| 876 | + if (!op->op_odp_mr) { |
---|
| 877 | + if (!op->op_mapped) { |
---|
| 878 | + op->op_count = |
---|
| 879 | + ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, |
---|
| 880 | + op->op_nents, |
---|
| 881 | + (op->op_write) ? DMA_TO_DEVICE : |
---|
| 882 | + DMA_FROM_DEVICE); |
---|
| 883 | + rdsdebug("ic %p mapping op %p: %d\n", ic, op, |
---|
| 884 | + op->op_count); |
---|
| 885 | + if (op->op_count == 0) { |
---|
| 886 | + rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); |
---|
| 887 | + ret = -ENOMEM; /* XXX ? */ |
---|
| 888 | + goto out; |
---|
| 889 | + } |
---|
| 890 | + op->op_mapped = 1; |
---|
873 | 891 | } |
---|
874 | | - |
---|
875 | | - op->op_mapped = 1; |
---|
| 892 | + } else { |
---|
| 893 | + op->op_count = op->op_nents; |
---|
| 894 | + odp_lkey = rds_ib_get_lkey(op->op_odp_mr->r_trans_private); |
---|
876 | 895 | } |
---|
877 | 896 | |
---|
878 | 897 | /* |
---|
879 | 898 | * Instead of knowing how to return a partial rdma read/write we insist that there |
---|
880 | 899 | * be enough work requests to send the entire message. |
---|
881 | 900 | */ |
---|
882 | | - i = ceil(op->op_count, max_sge); |
---|
| 901 | + i = DIV_ROUND_UP(op->op_count, max_sge); |
---|
883 | 902 | |
---|
884 | 903 | work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); |
---|
885 | 904 | if (work_alloc != i) { |
---|
.. | .. |
---|
901 | 920 | send->s_queued = jiffies; |
---|
902 | 921 | send->s_op = NULL; |
---|
903 | 922 | |
---|
904 | | - nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify); |
---|
| 923 | + if (!op->op_notify) |
---|
| 924 | + nr_sig += rds_ib_set_wr_signal_state(ic, send, |
---|
| 925 | + op->op_notify); |
---|
905 | 926 | |
---|
906 | 927 | send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; |
---|
907 | 928 | send->s_rdma_wr.remote_addr = remote_addr; |
---|
.. | .. |
---|
921 | 942 | |
---|
922 | 943 | for (j = 0; j < send->s_rdma_wr.wr.num_sge && |
---|
923 | 944 | scat != &op->op_sg[op->op_count]; j++) { |
---|
924 | | - len = ib_sg_dma_len(ic->i_cm_id->device, scat); |
---|
925 | | - send->s_sge[j].addr = |
---|
926 | | - ib_sg_dma_address(ic->i_cm_id->device, scat); |
---|
| 945 | + len = sg_dma_len(scat); |
---|
| 946 | + if (!op->op_odp_mr) { |
---|
| 947 | + send->s_sge[j].addr = sg_dma_address(scat); |
---|
| 948 | + send->s_sge[j].lkey = ic->i_pd->local_dma_lkey; |
---|
| 949 | + } else { |
---|
| 950 | + send->s_sge[j].addr = odp_addr; |
---|
| 951 | + send->s_sge[j].lkey = odp_lkey; |
---|
| 952 | + } |
---|
927 | 953 | send->s_sge[j].length = len; |
---|
928 | | - send->s_sge[j].lkey = ic->i_pd->local_dma_lkey; |
---|
929 | 954 | |
---|
930 | 955 | sent += len; |
---|
931 | 956 | rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr); |
---|
932 | 957 | |
---|
933 | 958 | remote_addr += len; |
---|
| 959 | + odp_addr += len; |
---|
934 | 960 | scat++; |
---|
935 | 961 | } |
---|
936 | 962 | |
---|