| .. | .. | 
|---|
| 7 | 7 |  /* Lightweight memory registration using Fast Registration Work | 
|---|
| 8 | 8 |   * Requests (FRWR). | 
|---|
| 9 | 9 |   * | 
|---|
| 10 |  | - * FRWR features ordered asynchronous registration and deregistration  | 
|---|
| 11 |  | - * of arbitrarily sized memory regions. This is the fastest and safest  | 
|---|
 | 10 | + * FRWR features ordered asynchronous registration and invalidation  | 
|---|
 | 11 | + * of arbitrarily-sized memory regions. This is the fastest and safest  | 
|---|
| 12 | 12 |   * but most complex memory registration mode. | 
|---|
| 13 | 13 |   */ | 
|---|
| 14 | 14 |   | 
|---|
| 15 | 15 |  /* Normal operation | 
|---|
| 16 | 16 |   * | 
|---|
| 17 |  | - * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG  | 
|---|
| 18 |  | - * Work Request (frwr_op_map). When the RDMA operation is finished, this  | 
|---|
 | 17 | + * A Memory Region is prepared for RDMA Read or Write using a FAST_REG  | 
|---|
 | 18 | + * Work Request (frwr_map). When the RDMA operation is finished, this  | 
|---|
| 19 | 19 |   * Memory Region is invalidated using a LOCAL_INV Work Request | 
|---|
| 20 |  | - * (frwr_op_unmap_sync).  | 
|---|
 | 20 | + * (frwr_unmap_async and frwr_unmap_sync).  | 
|---|
| 21 | 21 |   * | 
|---|
| 22 |  | - * Typically these Work Requests are not signaled, and neither are RDMA  | 
|---|
| 23 |  | - * SEND Work Requests (with the exception of signaling occasionally to  | 
|---|
| 24 |  | - * prevent provider work queue overflows). This greatly reduces HCA  | 
|---|
 | 22 | + * Typically FAST_REG Work Requests are not signaled, and neither are  | 
|---|
 | 23 | + * RDMA Send Work Requests (with the exception of signaling occasionally  | 
|---|
 | 24 | + * to prevent provider work queue overflows). This greatly reduces HCA  | 
|---|
| 25 | 25 |   * interrupt workload. | 
|---|
| 26 |  | - *  | 
|---|
| 27 |  | - * As an optimization, frwr_op_unmap marks MRs INVALID before the  | 
|---|
| 28 |  | - * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on  | 
|---|
| 29 |  | - * rb_mrs immediately so that no work (like managing a linked list  | 
|---|
| 30 |  | - * under a spinlock) is needed in the completion upcall.  | 
|---|
| 31 |  | - *  | 
|---|
| 32 |  | - * But this means that frwr_op_map() can occasionally encounter an MR  | 
|---|
| 33 |  | - * that is INVALID but the LOCAL_INV WR has not completed. Work Queue  | 
|---|
| 34 |  | - * ordering prevents a subsequent FAST_REG WR from executing against  | 
|---|
| 35 |  | - * that MR while it is still being invalidated.  | 
|---|
| 36 | 26 |   */ | 
|---|
| 37 | 27 |   | 
|---|
| 38 | 28 |  /* Transport recovery | 
|---|
| 39 | 29 |   * | 
|---|
| 40 |  | - * ->op_map and the transport connect worker cannot run at the same  | 
|---|
| 41 |  | - * time, but ->op_unmap can fire while the transport connect worker  | 
|---|
| 42 |  | - * is running. Thus MR recovery is handled in ->op_map, to guarantee  | 
|---|
| 43 |  | - * that recovered MRs are owned by a sending RPC, and not one where  | 
|---|
| 44 |  | - * ->op_unmap could fire at the same time transport reconnect is  | 
|---|
| 45 |  | - * being done.  | 
|---|
 | 30 | + * frwr_map and frwr_unmap_* cannot run at the same time the transport  | 
|---|
 | 31 | + * connect worker is running. The connect worker holds the transport  | 
|---|
 | 32 | + * send lock, just as ->send_request does. This prevents frwr_map and  | 
|---|
 | 33 | + * the connect worker from running concurrently. When a connection is  | 
|---|
 | 34 | + * closed, the Receive completion queue is drained before the allowing  | 
|---|
 | 35 | + * the connect worker to get control. This prevents frwr_unmap and the  | 
|---|
 | 36 | + * connect worker from running concurrently.  | 
|---|
| 46 | 37 |   * | 
|---|
| 47 |  | - * When the underlying transport disconnects, MRs are left in one of  | 
|---|
| 48 |  | - * four states:  | 
|---|
| 49 |  | - *  | 
|---|
| 50 |  | - * INVALID:	The MR was not in use before the QP entered ERROR state.  | 
|---|
| 51 |  | - *  | 
|---|
| 52 |  | - * VALID:	The MR was registered before the QP entered ERROR state.  | 
|---|
| 53 |  | - *  | 
|---|
| 54 |  | - * FLUSHED_FR:	The MR was being registered when the QP entered ERROR  | 
|---|
| 55 |  | - *		state, and the pending WR was flushed.  | 
|---|
| 56 |  | - *  | 
|---|
| 57 |  | - * FLUSHED_LI:	The MR was being invalidated when the QP entered ERROR  | 
|---|
| 58 |  | - *		state, and the pending WR was flushed.  | 
|---|
| 59 |  | - *  | 
|---|
| 60 |  | - * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered  | 
|---|
| 61 |  | - * with ib_dereg_mr and then are re-initialized. Because MR recovery  | 
|---|
| 62 |  | - * allocates fresh resources, it is deferred to a workqueue, and the  | 
|---|
| 63 |  | - * recovered MRs are placed back on the rb_mrs list when recovery is  | 
|---|
| 64 |  | - * complete. frwr_op_map allocates another MR for the current RPC while  | 
|---|
| 65 |  | - * the broken MR is reset.  | 
|---|
| 66 |  | - *  | 
|---|
| 67 |  | - * To ensure that frwr_op_map doesn't encounter an MR that is marked  | 
|---|
| 68 |  | - * INVALID but that is about to be flushed due to a previous transport  | 
|---|
| 69 |  | - * disconnect, the transport connect worker attempts to drain all  | 
|---|
| 70 |  | - * pending send queue WRs before the transport is reconnected.  | 
|---|
 | 38 | + * When the underlying transport disconnects, MRs that are in flight  | 
|---|
 | 39 | + * are flushed and are likely unusable. Thus all MRs are destroyed.  | 
|---|
 | 40 | + * New MRs are created on demand.  | 
|---|
| 71 | 41 |   */ | 
|---|
| 72 | 42 |   | 
|---|
| 73 |  | -#include <linux/sunrpc/rpc_rdma.h>  | 
|---|
| 74 | 43 |  #include <linux/sunrpc/svc_rdma.h> | 
|---|
| 75 | 44 |   | 
|---|
| 76 | 45 |  #include "xprt_rdma.h" | 
|---|
| .. | .. | 
|---|
| 80 | 49 |  # define RPCDBG_FACILITY	RPCDBG_TRANS | 
|---|
| 81 | 50 |  #endif | 
|---|
| 82 | 51 |   | 
|---|
| 83 |  | -bool  | 
|---|
| 84 |  | -frwr_is_supported(struct rpcrdma_ia *ia)  | 
|---|
| 85 |  | -{  | 
|---|
| 86 |  | -	struct ib_device_attr *attrs = &ia->ri_device->attrs;  | 
|---|
| 87 |  | -  | 
|---|
| 88 |  | -	if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))  | 
|---|
| 89 |  | -		goto out_not_supported;  | 
|---|
| 90 |  | -	if (attrs->max_fast_reg_page_list_len == 0)  | 
|---|
| 91 |  | -		goto out_not_supported;  | 
|---|
| 92 |  | -	return true;  | 
|---|
| 93 |  | -  | 
|---|
| 94 |  | -out_not_supported:  | 
|---|
| 95 |  | -	pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n",  | 
|---|
| 96 |  | -		ia->ri_device->name);  | 
|---|
| 97 |  | -	return false;  | 
|---|
| 98 |  | -}  | 
|---|
| 99 |  | -  | 
|---|
| 100 |  | -static int  | 
|---|
| 101 |  | -frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)  | 
|---|
| 102 |  | -{  | 
|---|
| 103 |  | -	unsigned int depth = ia->ri_max_frwr_depth;  | 
|---|
| 104 |  | -	struct rpcrdma_frwr *frwr = &mr->frwr;  | 
|---|
| 105 |  | -	int rc;  | 
|---|
| 106 |  | -  | 
|---|
| 107 |  | -	frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);  | 
|---|
| 108 |  | -	if (IS_ERR(frwr->fr_mr))  | 
|---|
| 109 |  | -		goto out_mr_err;  | 
|---|
| 110 |  | -  | 
|---|
| 111 |  | -	mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);  | 
|---|
| 112 |  | -	if (!mr->mr_sg)  | 
|---|
| 113 |  | -		goto out_list_err;  | 
|---|
| 114 |  | -  | 
|---|
| 115 |  | -	INIT_LIST_HEAD(&mr->mr_list);  | 
|---|
| 116 |  | -	sg_init_table(mr->mr_sg, depth);  | 
|---|
| 117 |  | -	init_completion(&frwr->fr_linv_done);  | 
|---|
| 118 |  | -	return 0;  | 
|---|
| 119 |  | -  | 
|---|
| 120 |  | -out_mr_err:  | 
|---|
| 121 |  | -	rc = PTR_ERR(frwr->fr_mr);  | 
|---|
| 122 |  | -	dprintk("RPC:       %s: ib_alloc_mr status %i\n",  | 
|---|
| 123 |  | -		__func__, rc);  | 
|---|
| 124 |  | -	return rc;  | 
|---|
| 125 |  | -  | 
|---|
| 126 |  | -out_list_err:  | 
|---|
| 127 |  | -	rc = -ENOMEM;  | 
|---|
| 128 |  | -	dprintk("RPC:       %s: sg allocation failure\n",  | 
|---|
| 129 |  | -		__func__);  | 
|---|
| 130 |  | -	ib_dereg_mr(frwr->fr_mr);  | 
|---|
| 131 |  | -	return rc;  | 
|---|
| 132 |  | -}  | 
|---|
| 133 |  | -  | 
|---|
| 134 |  | -static void  | 
|---|
| 135 |  | -frwr_op_release_mr(struct rpcrdma_mr *mr)  | 
|---|
 | 52 | +/**  | 
|---|
 | 53 | + * frwr_release_mr - Destroy one MR  | 
|---|
 | 54 | + * @mr: MR allocated by frwr_mr_init  | 
|---|
 | 55 | + *  | 
|---|
 | 56 | + */  | 
|---|
 | 57 | +void frwr_release_mr(struct rpcrdma_mr *mr)  | 
|---|
| 136 | 58 |  { | 
|---|
| 137 | 59 |  	int rc; | 
|---|
| 138 | 60 |   | 
|---|
| 139 | 61 |  	rc = ib_dereg_mr(mr->frwr.fr_mr); | 
|---|
| 140 | 62 |  	if (rc) | 
|---|
| 141 |  | -		pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",  | 
|---|
| 142 |  | -		       mr, rc);  | 
|---|
 | 63 | +		trace_xprtrdma_frwr_dereg(mr, rc);  | 
|---|
| 143 | 64 |  	kfree(mr->mr_sg); | 
|---|
| 144 | 65 |  	kfree(mr); | 
|---|
| 145 | 66 |  } | 
|---|
| 146 | 67 |   | 
|---|
| 147 |  | -static int  | 
|---|
| 148 |  | -__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)  | 
|---|
 | 68 | +static void frwr_mr_recycle(struct rpcrdma_mr *mr)  | 
|---|
| 149 | 69 |  { | 
|---|
| 150 |  | -	struct rpcrdma_frwr *frwr = &mr->frwr;  | 
|---|
| 151 |  | -	int rc;  | 
|---|
| 152 |  | -  | 
|---|
| 153 |  | -	rc = ib_dereg_mr(frwr->fr_mr);  | 
|---|
| 154 |  | -	if (rc) {  | 
|---|
| 155 |  | -		pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",  | 
|---|
| 156 |  | -			rc, mr);  | 
|---|
| 157 |  | -		return rc;  | 
|---|
| 158 |  | -	}  | 
|---|
| 159 |  | -  | 
|---|
| 160 |  | -	frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,  | 
|---|
| 161 |  | -				  ia->ri_max_frwr_depth);  | 
|---|
| 162 |  | -	if (IS_ERR(frwr->fr_mr)) {  | 
|---|
| 163 |  | -		pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",  | 
|---|
| 164 |  | -			PTR_ERR(frwr->fr_mr), mr);  | 
|---|
| 165 |  | -		return PTR_ERR(frwr->fr_mr);  | 
|---|
| 166 |  | -	}  | 
|---|
| 167 |  | -  | 
|---|
| 168 |  | -	dprintk("RPC:       %s: recovered FRWR %p\n", __func__, frwr);  | 
|---|
| 169 |  | -	frwr->fr_state = FRWR_IS_INVALID;  | 
|---|
| 170 |  | -	return 0;  | 
|---|
| 171 |  | -}  | 
|---|
| 172 |  | -  | 
|---|
| 173 |  | -/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.  | 
|---|
| 174 |  | - */  | 
|---|
| 175 |  | -static void  | 
|---|
| 176 |  | -frwr_op_recover_mr(struct rpcrdma_mr *mr)  | 
|---|
| 177 |  | -{  | 
|---|
| 178 |  | -	enum rpcrdma_frwr_state state = mr->frwr.fr_state;  | 
|---|
| 179 | 70 |  	struct rpcrdma_xprt *r_xprt = mr->mr_xprt; | 
|---|
| 180 |  | -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;  | 
|---|
| 181 |  | -	int rc;  | 
|---|
| 182 | 71 |   | 
|---|
| 183 |  | -	rc = __frwr_mr_reset(ia, mr);  | 
|---|
| 184 |  | -	if (state != FRWR_FLUSHED_LI) {  | 
|---|
| 185 |  | -		trace_xprtrdma_dma_unmap(mr);  | 
|---|
| 186 |  | -		ib_dma_unmap_sg(ia->ri_device,  | 
|---|
 | 72 | +	trace_xprtrdma_mr_recycle(mr);  | 
|---|
 | 73 | +  | 
|---|
 | 74 | +	if (mr->mr_dir != DMA_NONE) {  | 
|---|
 | 75 | +		trace_xprtrdma_mr_unmap(mr);  | 
|---|
 | 76 | +		ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device,  | 
|---|
| 187 | 77 |  				mr->mr_sg, mr->mr_nents, mr->mr_dir); | 
|---|
 | 78 | +		mr->mr_dir = DMA_NONE;  | 
|---|
| 188 | 79 |  	} | 
|---|
| 189 |  | -	if (rc)  | 
|---|
| 190 |  | -		goto out_release;  | 
|---|
| 191 | 80 |   | 
|---|
| 192 |  | -	rpcrdma_mr_put(mr);  | 
|---|
| 193 |  | -	r_xprt->rx_stats.mrs_recovered++;  | 
|---|
| 194 |  | -	return;  | 
|---|
| 195 |  | -  | 
|---|
| 196 |  | -out_release:  | 
|---|
| 197 |  | -	pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);  | 
|---|
| 198 |  | -	r_xprt->rx_stats.mrs_orphaned++;  | 
|---|
| 199 |  | -  | 
|---|
| 200 |  | -	spin_lock(&r_xprt->rx_buf.rb_mrlock);  | 
|---|
 | 81 | +	spin_lock(&r_xprt->rx_buf.rb_lock);  | 
|---|
| 201 | 82 |  	list_del(&mr->mr_all); | 
|---|
| 202 |  | -	spin_unlock(&r_xprt->rx_buf.rb_mrlock);  | 
|---|
 | 83 | +	r_xprt->rx_stats.mrs_recycled++;  | 
|---|
 | 84 | +	spin_unlock(&r_xprt->rx_buf.rb_lock);  | 
|---|
| 203 | 85 |   | 
|---|
| 204 |  | -	frwr_op_release_mr(mr);  | 
|---|
 | 86 | +	frwr_release_mr(mr);  | 
|---|
| 205 | 87 |  } | 
|---|
| 206 | 88 |   | 
|---|
| 207 |  | -/* On success, sets:  | 
|---|
| 208 |  | - *	ep->rep_attr.cap.max_send_wr  | 
|---|
| 209 |  | - *	ep->rep_attr.cap.max_recv_wr  | 
|---|
| 210 |  | - *	cdata->max_requests  | 
|---|
| 211 |  | - *	ia->ri_max_segs  | 
|---|
 | 89 | +/* frwr_reset - Place MRs back on the free list  | 
|---|
 | 90 | + * @req: request to reset  | 
|---|
| 212 | 91 |   * | 
|---|
| 213 |  | - * And these FRWR-related fields:  | 
|---|
| 214 |  | - *	ia->ri_max_frwr_depth  | 
|---|
| 215 |  | - *	ia->ri_mrtype  | 
|---|
 | 92 | + * Used after a failed marshal. For FRWR, this means the MRs  | 
|---|
 | 93 | + * don't have to be fully released and recreated.  | 
|---|
 | 94 | + *  | 
|---|
 | 95 | + * NB: This is safe only as long as none of @req's MRs are  | 
|---|
 | 96 | + * involved with an ongoing asynchronous FAST_REG or LOCAL_INV  | 
|---|
 | 97 | + * Work Request.  | 
|---|
| 216 | 98 |   */ | 
|---|
| 217 |  | -static int  | 
|---|
| 218 |  | -frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,  | 
|---|
| 219 |  | -	     struct rpcrdma_create_data_internal *cdata)  | 
|---|
 | 99 | +void frwr_reset(struct rpcrdma_req *req)  | 
|---|
| 220 | 100 |  { | 
|---|
| 221 |  | -	struct ib_device_attr *attrs = &ia->ri_device->attrs;  | 
|---|
 | 101 | +	struct rpcrdma_mr *mr;  | 
|---|
 | 102 | +  | 
|---|
 | 103 | +	while ((mr = rpcrdma_mr_pop(&req->rl_registered)))  | 
|---|
 | 104 | +		rpcrdma_mr_put(mr);  | 
|---|
 | 105 | +}  | 
|---|
 | 106 | +  | 
|---|
 | 107 | +/**  | 
|---|
 | 108 | + * frwr_mr_init - Initialize one MR  | 
|---|
 | 109 | + * @r_xprt: controlling transport instance  | 
|---|
 | 110 | + * @mr: generic MR to prepare for FRWR  | 
|---|
 | 111 | + *  | 
|---|
 | 112 | + * Returns zero if successful. Otherwise a negative errno  | 
|---|
 | 113 | + * is returned.  | 
|---|
 | 114 | + */  | 
|---|
 | 115 | +int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)  | 
|---|
 | 116 | +{  | 
|---|
 | 117 | +	struct rpcrdma_ep *ep = r_xprt->rx_ep;  | 
|---|
 | 118 | +	unsigned int depth = ep->re_max_fr_depth;  | 
|---|
 | 119 | +	struct scatterlist *sg;  | 
|---|
 | 120 | +	struct ib_mr *frmr;  | 
|---|
 | 121 | +	int rc;  | 
|---|
 | 122 | +  | 
|---|
 | 123 | +	frmr = ib_alloc_mr(ep->re_pd, ep->re_mrtype, depth);  | 
|---|
 | 124 | +	if (IS_ERR(frmr))  | 
|---|
 | 125 | +		goto out_mr_err;  | 
|---|
 | 126 | +  | 
|---|
 | 127 | +	sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS);  | 
|---|
 | 128 | +	if (!sg)  | 
|---|
 | 129 | +		goto out_list_err;  | 
|---|
 | 130 | +  | 
|---|
 | 131 | +	mr->mr_xprt = r_xprt;  | 
|---|
 | 132 | +	mr->frwr.fr_mr = frmr;  | 
|---|
 | 133 | +	mr->mr_dir = DMA_NONE;  | 
|---|
 | 134 | +	INIT_LIST_HEAD(&mr->mr_list);  | 
|---|
 | 135 | +	init_completion(&mr->frwr.fr_linv_done);  | 
|---|
 | 136 | +  | 
|---|
 | 137 | +	sg_init_table(sg, depth);  | 
|---|
 | 138 | +	mr->mr_sg = sg;  | 
|---|
 | 139 | +	return 0;  | 
|---|
 | 140 | +  | 
|---|
 | 141 | +out_mr_err:  | 
|---|
 | 142 | +	rc = PTR_ERR(frmr);  | 
|---|
 | 143 | +	trace_xprtrdma_frwr_alloc(mr, rc);  | 
|---|
 | 144 | +	return rc;  | 
|---|
 | 145 | +  | 
|---|
 | 146 | +out_list_err:  | 
|---|
 | 147 | +	ib_dereg_mr(frmr);  | 
|---|
 | 148 | +	return -ENOMEM;  | 
|---|
 | 149 | +}  | 
|---|
 | 150 | +  | 
|---|
 | 151 | +/**  | 
|---|
 | 152 | + * frwr_query_device - Prepare a transport for use with FRWR  | 
|---|
 | 153 | + * @ep: endpoint to fill in  | 
|---|
 | 154 | + * @device: RDMA device to query  | 
|---|
 | 155 | + *  | 
|---|
 | 156 | + * On success, sets:  | 
|---|
 | 157 | + *	ep->re_attr  | 
|---|
 | 158 | + *	ep->re_max_requests  | 
|---|
 | 159 | + *	ep->re_max_rdma_segs  | 
|---|
 | 160 | + *	ep->re_max_fr_depth  | 
|---|
 | 161 | + *	ep->re_mrtype  | 
|---|
 | 162 | + *  | 
|---|
 | 163 | + * Return values:  | 
|---|
 | 164 | + *   On success, returns zero.  | 
|---|
 | 165 | + *   %-EINVAL - the device does not support FRWR memory registration  | 
|---|
 | 166 | + *   %-ENOMEM - the device is not sufficiently capable for NFS/RDMA  | 
|---|
 | 167 | + */  | 
|---|
 | 168 | +int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)  | 
|---|
 | 169 | +{  | 
|---|
 | 170 | +	const struct ib_device_attr *attrs = &device->attrs;  | 
|---|
| 222 | 171 |  	int max_qp_wr, depth, delta; | 
|---|
 | 172 | +	unsigned int max_sge;  | 
|---|
| 223 | 173 |   | 
|---|
| 224 |  | -	ia->ri_mrtype = IB_MR_TYPE_MEM_REG;  | 
|---|
 | 174 | +	if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||  | 
|---|
 | 175 | +	    attrs->max_fast_reg_page_list_len == 0) {  | 
|---|
 | 176 | +		pr_err("rpcrdma: 'frwr' mode is not supported by device %s\n",  | 
|---|
 | 177 | +		       device->name);  | 
|---|
 | 178 | +		return -EINVAL;  | 
|---|
 | 179 | +	}  | 
|---|
 | 180 | +  | 
|---|
 | 181 | +	max_sge = min_t(unsigned int, attrs->max_send_sge,  | 
|---|
 | 182 | +			RPCRDMA_MAX_SEND_SGES);  | 
|---|
 | 183 | +	if (max_sge < RPCRDMA_MIN_SEND_SGES) {  | 
|---|
 | 184 | +		pr_err("rpcrdma: HCA provides only %u send SGEs\n", max_sge);  | 
|---|
 | 185 | +		return -ENOMEM;  | 
|---|
 | 186 | +	}  | 
|---|
 | 187 | +	ep->re_attr.cap.max_send_sge = max_sge;  | 
|---|
 | 188 | +	ep->re_attr.cap.max_recv_sge = 1;  | 
|---|
 | 189 | +  | 
|---|
 | 190 | +	ep->re_mrtype = IB_MR_TYPE_MEM_REG;  | 
|---|
| 225 | 191 |  	if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) | 
|---|
| 226 |  | -		ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;  | 
|---|
 | 192 | +		ep->re_mrtype = IB_MR_TYPE_SG_GAPS;  | 
|---|
| 227 | 193 |   | 
|---|
| 228 |  | -	ia->ri_max_frwr_depth =  | 
|---|
| 229 |  | -			min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,  | 
|---|
| 230 |  | -			      attrs->max_fast_reg_page_list_len);  | 
|---|
| 231 |  | -	dprintk("RPC:       %s: device's max FR page list len = %u\n",  | 
|---|
| 232 |  | -		__func__, ia->ri_max_frwr_depth);  | 
|---|
 | 194 | +	/* Quirk: Some devices advertise a large max_fast_reg_page_list_len  | 
|---|
 | 195 | +	 * capability, but perform optimally when the MRs are not larger  | 
|---|
 | 196 | +	 * than a page.  | 
|---|
 | 197 | +	 */  | 
|---|
 | 198 | +	if (attrs->max_sge_rd > RPCRDMA_MAX_HDR_SEGS)  | 
|---|
 | 199 | +		ep->re_max_fr_depth = attrs->max_sge_rd;  | 
|---|
 | 200 | +	else  | 
|---|
 | 201 | +		ep->re_max_fr_depth = attrs->max_fast_reg_page_list_len;  | 
|---|
 | 202 | +	if (ep->re_max_fr_depth > RPCRDMA_MAX_DATA_SEGS)  | 
|---|
 | 203 | +		ep->re_max_fr_depth = RPCRDMA_MAX_DATA_SEGS;  | 
|---|
| 233 | 204 |   | 
|---|
| 234 | 205 |  	/* Add room for frwr register and invalidate WRs. | 
|---|
| 235 | 206 |  	 * 1. FRWR reg WR for head | 
|---|
| .. | .. | 
|---|
| 245 | 216 |  	/* Calculate N if the device max FRWR depth is smaller than | 
|---|
| 246 | 217 |  	 * RPCRDMA_MAX_DATA_SEGS. | 
|---|
| 247 | 218 |  	 */ | 
|---|
| 248 |  | -	if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {  | 
|---|
| 249 |  | -		delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;  | 
|---|
 | 219 | +	if (ep->re_max_fr_depth < RPCRDMA_MAX_DATA_SEGS) {  | 
|---|
 | 220 | +		delta = RPCRDMA_MAX_DATA_SEGS - ep->re_max_fr_depth;  | 
|---|
| 250 | 221 |  		do { | 
|---|
| 251 | 222 |  			depth += 2; /* FRWR reg + invalidate */ | 
|---|
| 252 |  | -			delta -= ia->ri_max_frwr_depth;  | 
|---|
 | 223 | +			delta -= ep->re_max_fr_depth;  | 
|---|
| 253 | 224 |  		} while (delta > 0); | 
|---|
| 254 | 225 |  	} | 
|---|
| 255 | 226 |   | 
|---|
| 256 |  | -	max_qp_wr = ia->ri_device->attrs.max_qp_wr;  | 
|---|
 | 227 | +	max_qp_wr = attrs->max_qp_wr;  | 
|---|
| 257 | 228 |  	max_qp_wr -= RPCRDMA_BACKWARD_WRS; | 
|---|
| 258 | 229 |  	max_qp_wr -= 1; | 
|---|
| 259 | 230 |  	if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) | 
|---|
| 260 | 231 |  		return -ENOMEM; | 
|---|
| 261 |  | -	if (cdata->max_requests > max_qp_wr)  | 
|---|
| 262 |  | -		cdata->max_requests = max_qp_wr;  | 
|---|
| 263 |  | -	ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;  | 
|---|
| 264 |  | -	if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {  | 
|---|
| 265 |  | -		cdata->max_requests = max_qp_wr / depth;  | 
|---|
| 266 |  | -		if (!cdata->max_requests)  | 
|---|
| 267 |  | -			return -EINVAL;  | 
|---|
| 268 |  | -		ep->rep_attr.cap.max_send_wr = cdata->max_requests *  | 
|---|
| 269 |  | -					       depth;  | 
|---|
 | 232 | +	if (ep->re_max_requests > max_qp_wr)  | 
|---|
 | 233 | +		ep->re_max_requests = max_qp_wr;  | 
|---|
 | 234 | +	ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth;  | 
|---|
 | 235 | +	if (ep->re_attr.cap.max_send_wr > max_qp_wr) {  | 
|---|
 | 236 | +		ep->re_max_requests = max_qp_wr / depth;  | 
|---|
 | 237 | +		if (!ep->re_max_requests)  | 
|---|
 | 238 | +			return -ENOMEM;  | 
|---|
 | 239 | +		ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth;  | 
|---|
| 270 | 240 |  	} | 
|---|
| 271 |  | -	ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;  | 
|---|
| 272 |  | -	ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */  | 
|---|
| 273 |  | -	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;  | 
|---|
| 274 |  | -	ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;  | 
|---|
| 275 |  | -	ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */  | 
|---|
 | 241 | +	ep->re_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;  | 
|---|
 | 242 | +	ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */  | 
|---|
 | 243 | +	ep->re_attr.cap.max_recv_wr = ep->re_max_requests;  | 
|---|
 | 244 | +	ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;  | 
|---|
 | 245 | +	ep->re_attr.cap.max_recv_wr += RPCRDMA_MAX_RECV_BATCH;  | 
|---|
 | 246 | +	ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */  | 
|---|
| 276 | 247 |   | 
|---|
| 277 |  | -	ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /  | 
|---|
| 278 |  | -				ia->ri_max_frwr_depth);  | 
|---|
 | 248 | +	ep->re_max_rdma_segs =  | 
|---|
 | 249 | +		DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ep->re_max_fr_depth);  | 
|---|
 | 250 | +	/* Reply chunks require segments for head and tail buffers */  | 
|---|
 | 251 | +	ep->re_max_rdma_segs += 2;  | 
|---|
 | 252 | +	if (ep->re_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS)  | 
|---|
 | 253 | +		ep->re_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS;  | 
|---|
 | 254 | +  | 
|---|
 | 255 | +	/* Ensure the underlying device is capable of conveying the  | 
|---|
 | 256 | +	 * largest r/wsize NFS will ask for. This guarantees that  | 
|---|
 | 257 | +	 * failing over from one RDMA device to another will not  | 
|---|
 | 258 | +	 * break NFS I/O.  | 
|---|
 | 259 | +	 */  | 
|---|
 | 260 | +	if ((ep->re_max_rdma_segs * ep->re_max_fr_depth) < RPCRDMA_MAX_SEGS)  | 
|---|
 | 261 | +		return -ENOMEM;  | 
|---|
 | 262 | +  | 
|---|
| 279 | 263 |  	return 0; | 
|---|
| 280 | 264 |  } | 
|---|
| 281 | 265 |   | 
|---|
| 282 |  | -/* FRWR mode conveys a list of pages per chunk segment. The  | 
|---|
| 283 |  | - * maximum length of that list is the FRWR page list depth.  | 
|---|
| 284 |  | - */  | 
|---|
| 285 |  | -static size_t  | 
|---|
| 286 |  | -frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)  | 
|---|
| 287 |  | -{  | 
|---|
| 288 |  | -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;  | 
|---|
| 289 |  | -  | 
|---|
| 290 |  | -	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,  | 
|---|
| 291 |  | -		     RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);  | 
|---|
| 292 |  | -}  | 
|---|
| 293 |  | -  | 
|---|
| 294 |  | -static void  | 
|---|
| 295 |  | -__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)  | 
|---|
| 296 |  | -{  | 
|---|
| 297 |  | -	if (wc->status != IB_WC_WR_FLUSH_ERR)  | 
|---|
| 298 |  | -		pr_err("rpcrdma: %s: %s (%u/0x%x)\n",  | 
|---|
| 299 |  | -		       wr, ib_wc_status_msg(wc->status),  | 
|---|
| 300 |  | -		       wc->status, wc->vendor_err);  | 
|---|
| 301 |  | -}  | 
|---|
| 302 |  | -  | 
|---|
| 303 | 266 |  /** | 
|---|
| 304 |  | - * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC  | 
|---|
| 305 |  | - * @cq:	completion queue (ignored)  | 
|---|
| 306 |  | - * @wc:	completed WR  | 
|---|
 | 267 | + * frwr_map - Register a memory region  | 
|---|
 | 268 | + * @r_xprt: controlling transport  | 
|---|
 | 269 | + * @seg: memory region co-ordinates  | 
|---|
 | 270 | + * @nsegs: number of segments remaining  | 
|---|
 | 271 | + * @writing: true when RDMA Write will be used  | 
|---|
 | 272 | + * @xid: XID of RPC using the registered memory  | 
|---|
 | 273 | + * @mr: MR to fill in  | 
|---|
| 307 | 274 |   * | 
|---|
| 308 |  | - */  | 
|---|
| 309 |  | -static void  | 
|---|
| 310 |  | -frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)  | 
|---|
| 311 |  | -{  | 
|---|
| 312 |  | -	struct ib_cqe *cqe = wc->wr_cqe;  | 
|---|
| 313 |  | -	struct rpcrdma_frwr *frwr =  | 
|---|
| 314 |  | -			container_of(cqe, struct rpcrdma_frwr, fr_cqe);  | 
|---|
| 315 |  | -  | 
|---|
| 316 |  | -	/* WARNING: Only wr_cqe and status are reliable at this point */  | 
|---|
| 317 |  | -	if (wc->status != IB_WC_SUCCESS) {  | 
|---|
| 318 |  | -		frwr->fr_state = FRWR_FLUSHED_FR;  | 
|---|
| 319 |  | -		__frwr_sendcompletion_flush(wc, "fastreg");  | 
|---|
| 320 |  | -	}  | 
|---|
| 321 |  | -	trace_xprtrdma_wc_fastreg(wc, frwr);  | 
|---|
| 322 |  | -}  | 
|---|
| 323 |  | -  | 
|---|
| 324 |  | -/**  | 
|---|
| 325 |  | - * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC  | 
|---|
| 326 |  | - * @cq:	completion queue (ignored)  | 
|---|
| 327 |  | - * @wc:	completed WR  | 
|---|
| 328 |  | - *  | 
|---|
| 329 |  | - */  | 
|---|
| 330 |  | -static void  | 
|---|
| 331 |  | -frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)  | 
|---|
| 332 |  | -{  | 
|---|
| 333 |  | -	struct ib_cqe *cqe = wc->wr_cqe;  | 
|---|
| 334 |  | -	struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,  | 
|---|
| 335 |  | -						 fr_cqe);  | 
|---|
| 336 |  | -  | 
|---|
| 337 |  | -	/* WARNING: Only wr_cqe and status are reliable at this point */  | 
|---|
| 338 |  | -	if (wc->status != IB_WC_SUCCESS) {  | 
|---|
| 339 |  | -		frwr->fr_state = FRWR_FLUSHED_LI;  | 
|---|
| 340 |  | -		__frwr_sendcompletion_flush(wc, "localinv");  | 
|---|
| 341 |  | -	}  | 
|---|
| 342 |  | -	trace_xprtrdma_wc_li(wc, frwr);  | 
|---|
| 343 |  | -}  | 
|---|
| 344 |  | -  | 
|---|
| 345 |  | -/**  | 
|---|
| 346 |  | - * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC  | 
|---|
| 347 |  | - * @cq:	completion queue (ignored)  | 
|---|
| 348 |  | - * @wc:	completed WR  | 
|---|
| 349 |  | - *  | 
|---|
| 350 |  | - * Awaken anyone waiting for an MR to finish being fenced.  | 
|---|
| 351 |  | - */  | 
|---|
| 352 |  | -static void  | 
|---|
| 353 |  | -frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)  | 
|---|
| 354 |  | -{  | 
|---|
| 355 |  | -	struct ib_cqe *cqe = wc->wr_cqe;  | 
|---|
| 356 |  | -	struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,  | 
|---|
| 357 |  | -						 fr_cqe);  | 
|---|
| 358 |  | -  | 
|---|
| 359 |  | -	/* WARNING: Only wr_cqe and status are reliable at this point */  | 
|---|
| 360 |  | -	if (wc->status != IB_WC_SUCCESS) {  | 
|---|
| 361 |  | -		frwr->fr_state = FRWR_FLUSHED_LI;  | 
|---|
| 362 |  | -		__frwr_sendcompletion_flush(wc, "localinv");  | 
|---|
| 363 |  | -	}  | 
|---|
| 364 |  | -	complete(&frwr->fr_linv_done);  | 
|---|
| 365 |  | -	trace_xprtrdma_wc_li_wake(wc, frwr);  | 
|---|
| 366 |  | -}  | 
|---|
| 367 |  | -  | 
|---|
| 368 |  | -/* Post a REG_MR Work Request to register a memory region  | 
|---|
 | 275 | + * Prepare a REG_MR Work Request to register a memory region  | 
|---|
| 369 | 276 |   * for remote access via RDMA READ or RDMA WRITE. | 
|---|
 | 277 | + *  | 
|---|
 | 278 | + * Returns the next segment or a negative errno pointer.  | 
|---|
 | 279 | + * On success, @mr is filled in.  | 
|---|
| 370 | 280 |   */ | 
|---|
| 371 |  | -static struct rpcrdma_mr_seg *  | 
|---|
| 372 |  | -frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,  | 
|---|
| 373 |  | -	    int nsegs, bool writing, struct rpcrdma_mr **out)  | 
|---|
 | 281 | +struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,  | 
|---|
 | 282 | +				struct rpcrdma_mr_seg *seg,  | 
|---|
 | 283 | +				int nsegs, bool writing, __be32 xid,  | 
|---|
 | 284 | +				struct rpcrdma_mr *mr)  | 
|---|
| 374 | 285 |  { | 
|---|
| 375 |  | -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;  | 
|---|
| 376 |  | -	bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;  | 
|---|
| 377 |  | -	struct rpcrdma_frwr *frwr;  | 
|---|
| 378 |  | -	struct rpcrdma_mr *mr;  | 
|---|
| 379 |  | -	struct ib_mr *ibmr;  | 
|---|
 | 286 | +	struct rpcrdma_ep *ep = r_xprt->rx_ep;  | 
|---|
| 380 | 287 |  	struct ib_reg_wr *reg_wr; | 
|---|
| 381 |  | -	int i, n;  | 
|---|
 | 288 | +	int i, n, dma_nents;  | 
|---|
 | 289 | +	struct ib_mr *ibmr;  | 
|---|
| 382 | 290 |  	u8 key; | 
|---|
| 383 | 291 |   | 
|---|
| 384 |  | -	mr = NULL;  | 
|---|
| 385 |  | -	do {  | 
|---|
| 386 |  | -		if (mr)  | 
|---|
| 387 |  | -			rpcrdma_mr_defer_recovery(mr);  | 
|---|
| 388 |  | -		mr = rpcrdma_mr_get(r_xprt);  | 
|---|
| 389 |  | -		if (!mr)  | 
|---|
| 390 |  | -			return ERR_PTR(-EAGAIN);  | 
|---|
| 391 |  | -	} while (mr->frwr.fr_state != FRWR_IS_INVALID);  | 
|---|
| 392 |  | -	frwr = &mr->frwr;  | 
|---|
| 393 |  | -	frwr->fr_state = FRWR_IS_VALID;  | 
|---|
| 394 |  | -  | 
|---|
| 395 |  | -	if (nsegs > ia->ri_max_frwr_depth)  | 
|---|
| 396 |  | -		nsegs = ia->ri_max_frwr_depth;  | 
|---|
 | 292 | +	if (nsegs > ep->re_max_fr_depth)  | 
|---|
 | 293 | +		nsegs = ep->re_max_fr_depth;  | 
|---|
| 397 | 294 |  	for (i = 0; i < nsegs;) { | 
|---|
| 398 | 295 |  		if (seg->mr_page) | 
|---|
| 399 | 296 |  			sg_set_page(&mr->mr_sg[i], | 
|---|
| .. | .. | 
|---|
| 406 | 303 |   | 
|---|
| 407 | 304 |  		++seg; | 
|---|
| 408 | 305 |  		++i; | 
|---|
| 409 |  | -		if (holes_ok)  | 
|---|
 | 306 | +		if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS)  | 
|---|
| 410 | 307 |  			continue; | 
|---|
| 411 | 308 |  		if ((i < nsegs && offset_in_page(seg->mr_offset)) || | 
|---|
| 412 | 309 |  		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | 
|---|
| 413 | 310 |  			break; | 
|---|
| 414 | 311 |  	} | 
|---|
| 415 | 312 |  	mr->mr_dir = rpcrdma_data_dir(writing); | 
|---|
 | 313 | +	mr->mr_nents = i;  | 
|---|
| 416 | 314 |   | 
|---|
| 417 |  | -	mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);  | 
|---|
| 418 |  | -	if (!mr->mr_nents)  | 
|---|
 | 315 | +	dma_nents = ib_dma_map_sg(ep->re_id->device, mr->mr_sg, mr->mr_nents,  | 
|---|
 | 316 | +				  mr->mr_dir);  | 
|---|
 | 317 | +	if (!dma_nents)  | 
|---|
| 419 | 318 |  		goto out_dmamap_err; | 
|---|
| 420 |  | -	trace_xprtrdma_dma_map(mr);  | 
|---|
| 421 | 319 |   | 
|---|
| 422 |  | -	ibmr = frwr->fr_mr;  | 
|---|
| 423 |  | -	n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);  | 
|---|
| 424 |  | -	if (unlikely(n != mr->mr_nents))  | 
|---|
 | 320 | +	ibmr = mr->frwr.fr_mr;  | 
|---|
 | 321 | +	n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);  | 
|---|
 | 322 | +	if (n != dma_nents)  | 
|---|
| 425 | 323 |  		goto out_mapmr_err; | 
|---|
| 426 | 324 |   | 
|---|
 | 325 | +	ibmr->iova &= 0x00000000ffffffff;  | 
|---|
 | 326 | +	ibmr->iova |= ((u64)be32_to_cpu(xid)) << 32;  | 
|---|
| 427 | 327 |  	key = (u8)(ibmr->rkey & 0x000000FF); | 
|---|
| 428 | 328 |  	ib_update_fast_reg_key(ibmr, ++key); | 
|---|
| 429 | 329 |   | 
|---|
| 430 |  | -	reg_wr = &frwr->fr_regwr;  | 
|---|
 | 330 | +	reg_wr = &mr->frwr.fr_regwr;  | 
|---|
| 431 | 331 |  	reg_wr->mr = ibmr; | 
|---|
| 432 | 332 |  	reg_wr->key = ibmr->rkey; | 
|---|
| 433 | 333 |  	reg_wr->access = writing ? | 
|---|
| .. | .. | 
|---|
| 437 | 337 |  	mr->mr_handle = ibmr->rkey; | 
|---|
| 438 | 338 |  	mr->mr_length = ibmr->length; | 
|---|
| 439 | 339 |  	mr->mr_offset = ibmr->iova; | 
|---|
 | 340 | +	trace_xprtrdma_mr_map(mr);  | 
|---|
| 440 | 341 |   | 
|---|
| 441 |  | -	*out = mr;  | 
|---|
| 442 | 342 |  	return seg; | 
|---|
| 443 | 343 |   | 
|---|
| 444 | 344 |  out_dmamap_err: | 
|---|
| 445 |  | -	pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",  | 
|---|
| 446 |  | -	       mr->mr_sg, i);  | 
|---|
| 447 |  | -	frwr->fr_state = FRWR_IS_INVALID;  | 
|---|
| 448 |  | -	rpcrdma_mr_put(mr);  | 
|---|
 | 345 | +	mr->mr_dir = DMA_NONE;  | 
|---|
 | 346 | +	trace_xprtrdma_frwr_sgerr(mr, i);  | 
|---|
| 449 | 347 |  	return ERR_PTR(-EIO); | 
|---|
| 450 | 348 |   | 
|---|
| 451 | 349 |  out_mapmr_err: | 
|---|
| 452 |  | -	pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",  | 
|---|
| 453 |  | -	       frwr->fr_mr, n, mr->mr_nents);  | 
|---|
| 454 |  | -	rpcrdma_mr_defer_recovery(mr);  | 
|---|
 | 350 | +	trace_xprtrdma_frwr_maperr(mr, n);  | 
|---|
| 455 | 351 |  	return ERR_PTR(-EIO); | 
|---|
| 456 | 352 |  } | 
|---|
| 457 | 353 |   | 
|---|
| 458 |  | -/* Post Send WR containing the RPC Call message.  | 
|---|
 | 354 | +/**  | 
|---|
 | 355 | + * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC  | 
|---|
 | 356 | + * @cq: completion queue  | 
|---|
 | 357 | + * @wc: WCE for a completed FastReg WR  | 
|---|
| 459 | 358 |   * | 
|---|
| 460 |  | - * For FRMR, chain any FastReg WRs to the Send WR. Only a  | 
|---|
 | 359 | + */  | 
|---|
 | 360 | +static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)  | 
|---|
 | 361 | +{  | 
|---|
 | 362 | +	struct ib_cqe *cqe = wc->wr_cqe;  | 
|---|
 | 363 | +	struct rpcrdma_frwr *frwr =  | 
|---|
 | 364 | +		container_of(cqe, struct rpcrdma_frwr, fr_cqe);  | 
|---|
 | 365 | +  | 
|---|
 | 366 | +	/* WARNING: Only wr_cqe and status are reliable at this point */  | 
|---|
 | 367 | +	trace_xprtrdma_wc_fastreg(wc, frwr);  | 
|---|
 | 368 | +	/* The MR will get recycled when the associated req is retransmitted */  | 
|---|
 | 369 | +  | 
|---|
 | 370 | +	rpcrdma_flush_disconnect(cq->cq_context, wc);  | 
|---|
 | 371 | +}  | 
|---|
 | 372 | +  | 
|---|
 | 373 | +/**  | 
|---|
 | 374 | + * frwr_send - post Send WRs containing the RPC Call message  | 
|---|
 | 375 | + * @r_xprt: controlling transport instance  | 
|---|
 | 376 | + * @req: prepared RPC Call  | 
|---|
 | 377 | + *  | 
|---|
 | 378 | + * For FRWR, chain any FastReg WRs to the Send WR. Only a  | 
|---|
| 461 | 379 |   * single ib_post_send call is needed to register memory | 
|---|
| 462 | 380 |   * and then post the Send WR. | 
|---|
 | 381 | + *  | 
|---|
 | 382 | + * Returns the return code from ib_post_send.  | 
|---|
 | 383 | + *  | 
|---|
 | 384 | + * Caller must hold the transport send lock to ensure that the  | 
|---|
 | 385 | + * pointers to the transport's rdma_cm_id and QP are stable.  | 
|---|
| 463 | 386 |   */ | 
|---|
| 464 |  | -static int  | 
|---|
| 465 |  | -frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)  | 
|---|
 | 387 | +int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)  | 
|---|
| 466 | 388 |  { | 
|---|
| 467 | 389 |  	struct ib_send_wr *post_wr; | 
|---|
| 468 | 390 |  	struct rpcrdma_mr *mr; | 
|---|
| 469 | 391 |   | 
|---|
| 470 |  | -	post_wr = &req->rl_sendctx->sc_wr;  | 
|---|
 | 392 | +	post_wr = &req->rl_wr;  | 
|---|
| 471 | 393 |  	list_for_each_entry(mr, &req->rl_registered, mr_list) { | 
|---|
| 472 | 394 |  		struct rpcrdma_frwr *frwr; | 
|---|
| 473 | 395 |   | 
|---|
| .. | .. | 
|---|
| 483 | 405 |  		post_wr = &frwr->fr_regwr.wr; | 
|---|
| 484 | 406 |  	} | 
|---|
| 485 | 407 |   | 
|---|
| 486 |  | -	/* If ib_post_send fails, the next ->send_request for  | 
|---|
| 487 |  | -	 * @req will queue these MWs for recovery.  | 
|---|
| 488 |  | -	 */  | 
|---|
| 489 |  | -	return ib_post_send(ia->ri_id->qp, post_wr, NULL);  | 
|---|
 | 408 | +	return ib_post_send(r_xprt->rx_ep->re_id->qp, post_wr, NULL);  | 
|---|
| 490 | 409 |  } | 
|---|
| 491 | 410 |   | 
|---|
| 492 |  | -/* Handle a remotely invalidated mr on the @mrs list  | 
|---|
 | 411 | +/**  | 
|---|
 | 412 | + * frwr_reminv - handle a remotely invalidated mr on the @mrs list  | 
|---|
 | 413 | + * @rep: Received reply  | 
|---|
 | 414 | + * @mrs: list of MRs to check  | 
|---|
 | 415 | + *  | 
|---|
| 493 | 416 |   */ | 
|---|
| 494 |  | -static void  | 
|---|
| 495 |  | -frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)  | 
|---|
 | 417 | +void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)  | 
|---|
| 496 | 418 |  { | 
|---|
| 497 | 419 |  	struct rpcrdma_mr *mr; | 
|---|
| 498 | 420 |   | 
|---|
| 499 | 421 |  	list_for_each_entry(mr, mrs, mr_list) | 
|---|
| 500 | 422 |  		if (mr->mr_handle == rep->rr_inv_rkey) { | 
|---|
| 501 | 423 |  			list_del_init(&mr->mr_list); | 
|---|
| 502 |  | -			trace_xprtrdma_remoteinv(mr);  | 
|---|
| 503 |  | -			mr->frwr.fr_state = FRWR_IS_INVALID;  | 
|---|
| 504 |  | -			rpcrdma_mr_unmap_and_put(mr);  | 
|---|
 | 424 | +			trace_xprtrdma_mr_reminv(mr);  | 
|---|
 | 425 | +			rpcrdma_mr_put(mr);  | 
|---|
| 505 | 426 |  			break;	/* only one invalidated MR per RPC */ | 
|---|
| 506 | 427 |  		} | 
|---|
| 507 | 428 |  } | 
|---|
| 508 | 429 |   | 
|---|
| 509 |  | -/* Invalidate all memory regions that were registered for "req".  | 
|---|
 | 430 | +static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)  | 
|---|
 | 431 | +{  | 
|---|
 | 432 | +	if (wc->status != IB_WC_SUCCESS)  | 
|---|
 | 433 | +		frwr_mr_recycle(mr);  | 
|---|
 | 434 | +	else  | 
|---|
 | 435 | +		rpcrdma_mr_put(mr);  | 
|---|
 | 436 | +}  | 
|---|
 | 437 | +  | 
|---|
 | 438 | +/**  | 
|---|
 | 439 | + * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC  | 
|---|
 | 440 | + * @cq: completion queue  | 
|---|
 | 441 | + * @wc: WCE for a completed LocalInv WR  | 
|---|
| 510 | 442 |   * | 
|---|
| 511 |  | - * Sleeps until it is safe for the host CPU to access the  | 
|---|
| 512 |  | - * previously mapped memory regions.  | 
|---|
| 513 |  | - *  | 
|---|
| 514 |  | - * Caller ensures that @mrs is not empty before the call. This  | 
|---|
| 515 |  | - * function empties the list.  | 
|---|
| 516 | 443 |   */ | 
|---|
| 517 |  | -static void  | 
|---|
| 518 |  | -frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)  | 
|---|
 | 444 | +static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)  | 
|---|
 | 445 | +{  | 
|---|
 | 446 | +	struct ib_cqe *cqe = wc->wr_cqe;  | 
|---|
 | 447 | +	struct rpcrdma_frwr *frwr =  | 
|---|
 | 448 | +		container_of(cqe, struct rpcrdma_frwr, fr_cqe);  | 
|---|
 | 449 | +	struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);  | 
|---|
 | 450 | +  | 
|---|
 | 451 | +	/* WARNING: Only wr_cqe and status are reliable at this point */  | 
|---|
 | 452 | +	trace_xprtrdma_wc_li(wc, frwr);  | 
|---|
 | 453 | +	__frwr_release_mr(wc, mr);  | 
|---|
 | 454 | +  | 
|---|
 | 455 | +	rpcrdma_flush_disconnect(cq->cq_context, wc);  | 
|---|
 | 456 | +}  | 
|---|
 | 457 | +  | 
|---|
 | 458 | +/**  | 
|---|
 | 459 | + * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC  | 
|---|
 | 460 | + * @cq: completion queue  | 
|---|
 | 461 | + * @wc: WCE for a completed LocalInv WR  | 
|---|
 | 462 | + *  | 
|---|
 | 463 | + * Awaken anyone waiting for an MR to finish being fenced.  | 
|---|
 | 464 | + */  | 
|---|
 | 465 | +static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)  | 
|---|
 | 466 | +{  | 
|---|
 | 467 | +	struct ib_cqe *cqe = wc->wr_cqe;  | 
|---|
 | 468 | +	struct rpcrdma_frwr *frwr =  | 
|---|
 | 469 | +		container_of(cqe, struct rpcrdma_frwr, fr_cqe);  | 
|---|
 | 470 | +	struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);  | 
|---|
 | 471 | +  | 
|---|
 | 472 | +	/* WARNING: Only wr_cqe and status are reliable at this point */  | 
|---|
 | 473 | +	trace_xprtrdma_wc_li_wake(wc, frwr);  | 
|---|
 | 474 | +	__frwr_release_mr(wc, mr);  | 
|---|
 | 475 | +	complete(&frwr->fr_linv_done);  | 
|---|
 | 476 | +  | 
|---|
 | 477 | +	rpcrdma_flush_disconnect(cq->cq_context, wc);  | 
|---|
 | 478 | +}  | 
|---|
 | 479 | +  | 
|---|
 | 480 | +/**  | 
|---|
 | 481 | + * frwr_unmap_sync - invalidate memory regions that were registered for @req  | 
|---|
 | 482 | + * @r_xprt: controlling transport instance  | 
|---|
 | 483 | + * @req: rpcrdma_req with a non-empty list of MRs to process  | 
|---|
 | 484 | + *  | 
|---|
 | 485 | + * Sleeps until it is safe for the host CPU to access the previously mapped  | 
|---|
 | 486 | + * memory regions. This guarantees that registered MRs are properly fenced  | 
|---|
 | 487 | + * from the server before the RPC consumer accesses the data in them. It  | 
|---|
 | 488 | + * also ensures proper Send flow control: waking the next RPC waits until  | 
|---|
 | 489 | + * this RPC has relinquished all its Send Queue entries.  | 
|---|
 | 490 | + */  | 
|---|
 | 491 | +void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)  | 
|---|
| 519 | 492 |  { | 
|---|
| 520 | 493 |  	struct ib_send_wr *first, **prev, *last; | 
|---|
| 521 | 494 |  	const struct ib_send_wr *bad_wr; | 
|---|
| 522 |  | -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;  | 
|---|
| 523 | 495 |  	struct rpcrdma_frwr *frwr; | 
|---|
| 524 | 496 |  	struct rpcrdma_mr *mr; | 
|---|
| 525 |  | -	int count, rc;  | 
|---|
 | 497 | +	int rc;  | 
|---|
| 526 | 498 |   | 
|---|
| 527 | 499 |  	/* ORDER: Invalidate all of the MRs first | 
|---|
| 528 | 500 |  	 * | 
|---|
| .. | .. | 
|---|
| 530 | 502 |  	 * a single ib_post_send() call. | 
|---|
| 531 | 503 |  	 */ | 
|---|
| 532 | 504 |  	frwr = NULL; | 
|---|
| 533 |  | -	count = 0;  | 
|---|
| 534 | 505 |  	prev = &first; | 
|---|
| 535 |  | -	list_for_each_entry(mr, mrs, mr_list) {  | 
|---|
| 536 |  | -		mr->frwr.fr_state = FRWR_IS_INVALID;  | 
|---|
 | 506 | +	while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {  | 
|---|
 | 507 | +  | 
|---|
 | 508 | +		trace_xprtrdma_mr_localinv(mr);  | 
|---|
 | 509 | +		r_xprt->rx_stats.local_inv_needed++;  | 
|---|
| 537 | 510 |   | 
|---|
| 538 | 511 |  		frwr = &mr->frwr; | 
|---|
| 539 |  | -		trace_xprtrdma_localinv(mr);  | 
|---|
| 540 |  | -  | 
|---|
| 541 | 512 |  		frwr->fr_cqe.done = frwr_wc_localinv; | 
|---|
| 542 | 513 |  		last = &frwr->fr_invwr; | 
|---|
| 543 |  | -		memset(last, 0, sizeof(*last));  | 
|---|
 | 514 | +		last->next = NULL;  | 
|---|
| 544 | 515 |  		last->wr_cqe = &frwr->fr_cqe; | 
|---|
 | 516 | +		last->sg_list = NULL;  | 
|---|
 | 517 | +		last->num_sge = 0;  | 
|---|
| 545 | 518 |  		last->opcode = IB_WR_LOCAL_INV; | 
|---|
 | 519 | +		last->send_flags = IB_SEND_SIGNALED;  | 
|---|
| 546 | 520 |  		last->ex.invalidate_rkey = mr->mr_handle; | 
|---|
| 547 |  | -		count++;  | 
|---|
| 548 | 521 |   | 
|---|
| 549 | 522 |  		*prev = last; | 
|---|
| 550 | 523 |  		prev = &last->next; | 
|---|
| 551 | 524 |  	} | 
|---|
| 552 |  | -	if (!frwr)  | 
|---|
| 553 |  | -		goto unmap;  | 
|---|
| 554 | 525 |   | 
|---|
| 555 | 526 |  	/* Strong send queue ordering guarantees that when the | 
|---|
| 556 | 527 |  	 * last WR in the chain completes, all WRs in the chain | 
|---|
| 557 | 528 |  	 * are complete. | 
|---|
| 558 | 529 |  	 */ | 
|---|
| 559 |  | -	last->send_flags = IB_SEND_SIGNALED;  | 
|---|
| 560 | 530 |  	frwr->fr_cqe.done = frwr_wc_localinv_wake; | 
|---|
| 561 | 531 |  	reinit_completion(&frwr->fr_linv_done); | 
|---|
| 562 | 532 |   | 
|---|
| 563 | 533 |  	/* Transport disconnect drains the receive CQ before it | 
|---|
| 564 | 534 |  	 * replaces the QP. The RPC reply handler won't call us | 
|---|
| 565 |  | -	 * unless ri_id->qp is a valid pointer.  | 
|---|
 | 535 | +	 * unless re_id->qp is a valid pointer.  | 
|---|
| 566 | 536 |  	 */ | 
|---|
| 567 |  | -	r_xprt->rx_stats.local_inv_needed++;  | 
|---|
| 568 | 537 |  	bad_wr = NULL; | 
|---|
| 569 |  | -	rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);  | 
|---|
 | 538 | +	rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);  | 
|---|
 | 539 | +  | 
|---|
 | 540 | +	/* The final LOCAL_INV WR in the chain is supposed to  | 
|---|
 | 541 | +	 * do the wake. If it was never posted, the wake will  | 
|---|
 | 542 | +	 * not happen, so don't wait in that case.  | 
|---|
 | 543 | +	 */  | 
|---|
| 570 | 544 |  	if (bad_wr != first) | 
|---|
| 571 | 545 |  		wait_for_completion(&frwr->fr_linv_done); | 
|---|
| 572 |  | -	if (rc)  | 
|---|
| 573 |  | -		goto reset_mrs;  | 
|---|
 | 546 | +	if (!rc)  | 
|---|
 | 547 | +		return;  | 
|---|
| 574 | 548 |   | 
|---|
| 575 |  | -	/* ORDER: Now DMA unmap all of the MRs, and return  | 
|---|
| 576 |  | -	 * them to the free MR list.  | 
|---|
 | 549 | +	/* Recycle MRs in the LOCAL_INV chain that did not get posted.  | 
|---|
| 577 | 550 |  	 */ | 
|---|
| 578 |  | -unmap:  | 
|---|
| 579 |  | -	while (!list_empty(mrs)) {  | 
|---|
| 580 |  | -		mr = rpcrdma_mr_pop(mrs);  | 
|---|
| 581 |  | -		rpcrdma_mr_unmap_and_put(mr);  | 
|---|
| 582 |  | -	}  | 
|---|
| 583 |  | -	return;  | 
|---|
| 584 |  | -  | 
|---|
| 585 |  | -reset_mrs:  | 
|---|
| 586 |  | -	pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);  | 
|---|
| 587 |  | -  | 
|---|
| 588 |  | -	/* Find and reset the MRs in the LOCAL_INV WRs that did not  | 
|---|
| 589 |  | -	 * get posted.  | 
|---|
| 590 |  | -	 */  | 
|---|
 | 551 | +	trace_xprtrdma_post_linv(req, rc);  | 
|---|
| 591 | 552 |  	while (bad_wr) { | 
|---|
| 592 | 553 |  		frwr = container_of(bad_wr, struct rpcrdma_frwr, | 
|---|
| 593 | 554 |  				    fr_invwr); | 
|---|
| 594 | 555 |  		mr = container_of(frwr, struct rpcrdma_mr, frwr); | 
|---|
| 595 |  | -  | 
|---|
| 596 |  | -		__frwr_mr_reset(ia, mr);  | 
|---|
| 597 |  | -  | 
|---|
| 598 | 556 |  		bad_wr = bad_wr->next; | 
|---|
 | 557 | +  | 
|---|
 | 558 | +		frwr_mr_recycle(mr);  | 
|---|
| 599 | 559 |  	} | 
|---|
| 600 |  | -	goto unmap;  | 
|---|
| 601 | 560 |  } | 
|---|
| 602 | 561 |   | 
|---|
| 603 |  | -const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {  | 
|---|
| 604 |  | -	.ro_map				= frwr_op_map,  | 
|---|
| 605 |  | -	.ro_send			= frwr_op_send,  | 
|---|
| 606 |  | -	.ro_reminv			= frwr_op_reminv,  | 
|---|
| 607 |  | -	.ro_unmap_sync			= frwr_op_unmap_sync,  | 
|---|
| 608 |  | -	.ro_recover_mr			= frwr_op_recover_mr,  | 
|---|
| 609 |  | -	.ro_open			= frwr_op_open,  | 
|---|
| 610 |  | -	.ro_maxpages			= frwr_op_maxpages,  | 
|---|
| 611 |  | -	.ro_init_mr			= frwr_op_init_mr,  | 
|---|
| 612 |  | -	.ro_release_mr			= frwr_op_release_mr,  | 
|---|
| 613 |  | -	.ro_displayname			= "frwr",  | 
|---|
| 614 |  | -	.ro_send_w_inv_ok		= RPCRDMA_CMP_F_SND_W_INV_OK,  | 
|---|
| 615 |  | -};  | 
|---|
 | 562 | +/**  | 
|---|
 | 563 | + * frwr_wc_localinv_done - Invoked by RDMA provider for a signaled LOCAL_INV WC  | 
|---|
 | 564 | + * @cq:	completion queue  | 
|---|
 | 565 | + * @wc:	WCE for a completed LocalInv WR  | 
|---|
 | 566 | + *  | 
|---|
 | 567 | + */  | 
|---|
 | 568 | +static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)  | 
|---|
 | 569 | +{  | 
|---|
 | 570 | +	struct ib_cqe *cqe = wc->wr_cqe;  | 
|---|
 | 571 | +	struct rpcrdma_frwr *frwr =  | 
|---|
 | 572 | +		container_of(cqe, struct rpcrdma_frwr, fr_cqe);  | 
|---|
 | 573 | +	struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);  | 
|---|
 | 574 | +	struct rpcrdma_rep *rep = mr->mr_req->rl_reply;  | 
|---|
 | 575 | +  | 
|---|
 | 576 | +	/* WARNING: Only wr_cqe and status are reliable at this point */  | 
|---|
 | 577 | +	trace_xprtrdma_wc_li_done(wc, frwr);  | 
|---|
 | 578 | +	__frwr_release_mr(wc, mr);  | 
|---|
 | 579 | +  | 
|---|
 | 580 | +	/* Ensure @rep is generated before __frwr_release_mr */  | 
|---|
 | 581 | +	smp_rmb();  | 
|---|
 | 582 | +	rpcrdma_complete_rqst(rep);  | 
|---|
 | 583 | +  | 
|---|
 | 584 | +	rpcrdma_flush_disconnect(cq->cq_context, wc);  | 
|---|
 | 585 | +}  | 
|---|
 | 586 | +  | 
|---|
 | 587 | +/**  | 
|---|
 | 588 | + * frwr_unmap_async - invalidate memory regions that were registered for @req  | 
|---|
 | 589 | + * @r_xprt: controlling transport instance  | 
|---|
 | 590 | + * @req: rpcrdma_req with a non-empty list of MRs to process  | 
|---|
 | 591 | + *  | 
|---|
 | 592 | + * This guarantees that registered MRs are properly fenced from the  | 
|---|
 | 593 | + * server before the RPC consumer accesses the data in them. It also  | 
|---|
 | 594 | + * ensures proper Send flow control: waking the next RPC waits until  | 
|---|
 | 595 | + * this RPC has relinquished all its Send Queue entries.  | 
|---|
 | 596 | + */  | 
|---|
 | 597 | +void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)  | 
|---|
 | 598 | +{  | 
|---|
 | 599 | +	struct ib_send_wr *first, *last, **prev;  | 
|---|
 | 600 | +	const struct ib_send_wr *bad_wr;  | 
|---|
 | 601 | +	struct rpcrdma_frwr *frwr;  | 
|---|
 | 602 | +	struct rpcrdma_mr *mr;  | 
|---|
 | 603 | +	int rc;  | 
|---|
 | 604 | +  | 
|---|
 | 605 | +	/* Chain the LOCAL_INV Work Requests and post them with  | 
|---|
 | 606 | +	 * a single ib_post_send() call.  | 
|---|
 | 607 | +	 */  | 
|---|
 | 608 | +	frwr = NULL;  | 
|---|
 | 609 | +	prev = &first;  | 
|---|
 | 610 | +	while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {  | 
|---|
 | 611 | +  | 
|---|
 | 612 | +		trace_xprtrdma_mr_localinv(mr);  | 
|---|
 | 613 | +		r_xprt->rx_stats.local_inv_needed++;  | 
|---|
 | 614 | +  | 
|---|
 | 615 | +		frwr = &mr->frwr;  | 
|---|
 | 616 | +		frwr->fr_cqe.done = frwr_wc_localinv;  | 
|---|
 | 617 | +		last = &frwr->fr_invwr;  | 
|---|
 | 618 | +		last->next = NULL;  | 
|---|
 | 619 | +		last->wr_cqe = &frwr->fr_cqe;  | 
|---|
 | 620 | +		last->sg_list = NULL;  | 
|---|
 | 621 | +		last->num_sge = 0;  | 
|---|
 | 622 | +		last->opcode = IB_WR_LOCAL_INV;  | 
|---|
 | 623 | +		last->send_flags = IB_SEND_SIGNALED;  | 
|---|
 | 624 | +		last->ex.invalidate_rkey = mr->mr_handle;  | 
|---|
 | 625 | +  | 
|---|
 | 626 | +		*prev = last;  | 
|---|
 | 627 | +		prev = &last->next;  | 
|---|
 | 628 | +	}  | 
|---|
 | 629 | +  | 
|---|
 | 630 | +	/* Strong send queue ordering guarantees that when the  | 
|---|
 | 631 | +	 * last WR in the chain completes, all WRs in the chain  | 
|---|
 | 632 | +	 * are complete. The last completion will wake up the  | 
|---|
 | 633 | +	 * RPC waiter.  | 
|---|
 | 634 | +	 */  | 
|---|
 | 635 | +	frwr->fr_cqe.done = frwr_wc_localinv_done;  | 
|---|
 | 636 | +  | 
|---|
 | 637 | +	/* Transport disconnect drains the receive CQ before it  | 
|---|
 | 638 | +	 * replaces the QP. The RPC reply handler won't call us  | 
|---|
 | 639 | +	 * unless re_id->qp is a valid pointer.  | 
|---|
 | 640 | +	 */  | 
|---|
 | 641 | +	bad_wr = NULL;  | 
|---|
 | 642 | +	rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);  | 
|---|
 | 643 | +	if (!rc)  | 
|---|
 | 644 | +		return;  | 
|---|
 | 645 | +  | 
|---|
 | 646 | +	/* Recycle MRs in the LOCAL_INV chain that did not get posted.  | 
|---|
 | 647 | +	 */  | 
|---|
 | 648 | +	trace_xprtrdma_post_linv(req, rc);  | 
|---|
 | 649 | +	while (bad_wr) {  | 
|---|
 | 650 | +		frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);  | 
|---|
 | 651 | +		mr = container_of(frwr, struct rpcrdma_mr, frwr);  | 
|---|
 | 652 | +		bad_wr = bad_wr->next;  | 
|---|
 | 653 | +  | 
|---|
 | 654 | +		frwr_mr_recycle(mr);  | 
|---|
 | 655 | +	}  | 
|---|
 | 656 | +  | 
|---|
 | 657 | +	/* The final LOCAL_INV WR in the chain is supposed to  | 
|---|
 | 658 | +	 * do the wake. If it was never posted, the wake will  | 
|---|
 | 659 | +	 * not happen, so wake here in that case.  | 
|---|
 | 660 | +	 */  | 
|---|
 | 661 | +	rpcrdma_complete_rqst(req->rl_reply);  | 
|---|
 | 662 | +}  | 
|---|