| .. | .. |
|---|
| 53 | 53 | #include <linux/slab.h> |
|---|
| 54 | 54 | #include <linux/sunrpc/addr.h> |
|---|
| 55 | 55 | #include <linux/sunrpc/svc_rdma.h> |
|---|
| 56 | +#include <linux/log2.h> |
|---|
| 56 | 57 | |
|---|
| 57 | 58 | #include <asm-generic/barrier.h> |
|---|
| 58 | 59 | #include <asm/bitops.h> |
|---|
| .. | .. |
|---|
| 73 | 74 | /* |
|---|
| 74 | 75 | * internal functions |
|---|
| 75 | 76 | */ |
|---|
| 76 | | -static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); |
|---|
| 77 | +static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); |
|---|
| 78 | +static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); |
|---|
| 79 | +static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, |
|---|
| 80 | + struct rpcrdma_sendctx *sc); |
|---|
| 81 | +static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); |
|---|
| 82 | +static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); |
|---|
| 83 | +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); |
|---|
| 84 | +static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); |
|---|
| 77 | 85 | static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); |
|---|
| 78 | | -static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); |
|---|
| 79 | | -static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); |
|---|
| 80 | | -static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); |
|---|
| 86 | +static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); |
|---|
| 87 | +static void rpcrdma_ep_get(struct rpcrdma_ep *ep); |
|---|
| 88 | +static int rpcrdma_ep_put(struct rpcrdma_ep *ep); |
|---|
| 89 | +static struct rpcrdma_regbuf * |
|---|
| 90 | +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, |
|---|
| 91 | + gfp_t flags); |
|---|
| 92 | +static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); |
|---|
| 93 | +static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); |
|---|
| 81 | 94 | |
|---|
| 82 | | -struct workqueue_struct *rpcrdma_receive_wq __read_mostly; |
|---|
| 83 | | - |
|---|
| 84 | | -int |
|---|
| 85 | | -rpcrdma_alloc_wq(void) |
|---|
| 95 | +/* Wait for outstanding transport work to finish. ib_drain_qp |
|---|
| 96 | + * handles the drains in the wrong order for us, so open code |
|---|
| 97 | + * them here. |
|---|
| 98 | + */ |
|---|
| 99 | +static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) |
|---|
| 86 | 100 | { |
|---|
| 87 | | - struct workqueue_struct *recv_wq; |
|---|
| 101 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
|---|
| 102 | + struct rdma_cm_id *id = ep->re_id; |
|---|
| 88 | 103 | |
|---|
| 89 | | - recv_wq = alloc_workqueue("xprtrdma_receive", |
|---|
| 90 | | - WQ_MEM_RECLAIM | WQ_HIGHPRI, |
|---|
| 91 | | - 0); |
|---|
| 92 | | - if (!recv_wq) |
|---|
| 93 | | - return -ENOMEM; |
|---|
| 104 | + /* Flush Receives, then wait for deferred Reply work |
|---|
| 105 | + * to complete. |
|---|
| 106 | + */ |
|---|
| 107 | + ib_drain_rq(id->qp); |
|---|
| 94 | 108 | |
|---|
| 95 | | - rpcrdma_receive_wq = recv_wq; |
|---|
| 96 | | - return 0; |
|---|
| 109 | + /* Deferred Reply processing might have scheduled |
|---|
| 110 | + * local invalidations. |
|---|
| 111 | + */ |
|---|
| 112 | + ib_drain_sq(id->qp); |
|---|
| 113 | + |
|---|
| 114 | + rpcrdma_ep_put(ep); |
|---|
| 97 | 115 | } |
|---|
| 98 | 116 | |
|---|
| 99 | | -void |
|---|
| 100 | | -rpcrdma_destroy_wq(void) |
|---|
| 101 | | -{ |
|---|
| 102 | | - struct workqueue_struct *wq; |
|---|
| 103 | | - |
|---|
| 104 | | - if (rpcrdma_receive_wq) { |
|---|
| 105 | | - wq = rpcrdma_receive_wq; |
|---|
| 106 | | - rpcrdma_receive_wq = NULL; |
|---|
| 107 | | - destroy_workqueue(wq); |
|---|
| 108 | | - } |
|---|
| 109 | | -} |
|---|
| 110 | | - |
|---|
| 111 | | -static void |
|---|
| 112 | | -rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) |
|---|
| 117 | +/** |
|---|
| 118 | + * rpcrdma_qp_event_handler - Handle one QP event (error notification) |
|---|
| 119 | + * @event: details of the event |
|---|
| 120 | + * @context: ep that owns QP where event occurred |
|---|
| 121 | + * |
|---|
| 122 | + * Called from the RDMA provider (device driver) possibly in an interrupt |
|---|
| 123 | + * context. The QP is always destroyed before the ID, so the ID will be |
|---|
| 124 | + * reliably available when this handler is invoked. |
|---|
| 125 | + */ |
|---|
| 126 | +static void rpcrdma_qp_event_handler(struct ib_event *event, void *context) |
|---|
| 113 | 127 | { |
|---|
| 114 | 128 | struct rpcrdma_ep *ep = context; |
|---|
| 115 | | - struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, |
|---|
| 116 | | - rx_ep); |
|---|
| 117 | 129 | |
|---|
| 118 | | - trace_xprtrdma_qp_error(r_xprt, event); |
|---|
| 119 | | - pr_err("rpcrdma: %s on device %s ep %p\n", |
|---|
| 120 | | - ib_event_msg(event->event), event->device->name, context); |
|---|
| 130 | + trace_xprtrdma_qp_event(ep, event); |
|---|
| 131 | +} |
|---|
| 121 | 132 | |
|---|
| 122 | | - if (ep->rep_connected == 1) { |
|---|
| 123 | | - ep->rep_connected = -EIO; |
|---|
| 124 | | - rpcrdma_conn_func(ep); |
|---|
| 125 | | - wake_up_all(&ep->rep_connect_wait); |
|---|
| 126 | | - } |
|---|
| 133 | +/* Ensure xprt_force_disconnect() is invoked exactly once when a |
|---|
| 134 | + * connection is closed or lost. (The important thing is it needs |
|---|
| 135 | + * to be invoked "at least" once). |
|---|
| 136 | + */ |
|---|
| 137 | +static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep) |
|---|
| 138 | +{ |
|---|
| 139 | + if (atomic_add_unless(&ep->re_force_disconnect, 1, 1)) |
|---|
| 140 | + xprt_force_disconnect(ep->re_xprt); |
|---|
| 141 | +} |
|---|
| 142 | + |
|---|
| 143 | +/** |
|---|
| 144 | + * rpcrdma_flush_disconnect - Disconnect on flushed completion |
|---|
| 145 | + * @r_xprt: transport to disconnect |
|---|
| 146 | + * @wc: work completion entry |
|---|
| 147 | + * |
|---|
| 148 | + * Must be called in process context. |
|---|
| 149 | + */ |
|---|
| 150 | +void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc) |
|---|
| 151 | +{ |
|---|
| 152 | + if (wc->status != IB_WC_SUCCESS) |
|---|
| 153 | + rpcrdma_force_disconnect(r_xprt->rx_ep); |
|---|
| 127 | 154 | } |
|---|
| 128 | 155 | |
|---|
| 129 | 156 | /** |
|---|
| 130 | 157 | * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC |
|---|
| 131 | | - * @cq: completion queue (ignored) |
|---|
| 132 | | - * @wc: completed WR |
|---|
| 158 | + * @cq: completion queue |
|---|
| 159 | + * @wc: WCE for a completed Send WR |
|---|
| 133 | 160 | * |
|---|
| 134 | 161 | */ |
|---|
| 135 | | -static void |
|---|
| 136 | | -rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) |
|---|
| 162 | +static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) |
|---|
| 137 | 163 | { |
|---|
| 138 | 164 | struct ib_cqe *cqe = wc->wr_cqe; |
|---|
| 139 | 165 | struct rpcrdma_sendctx *sc = |
|---|
| 140 | 166 | container_of(cqe, struct rpcrdma_sendctx, sc_cqe); |
|---|
| 167 | + struct rpcrdma_xprt *r_xprt = cq->cq_context; |
|---|
| 141 | 168 | |
|---|
| 142 | 169 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
|---|
| 143 | 170 | trace_xprtrdma_wc_send(sc, wc); |
|---|
| 144 | | - if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) |
|---|
| 145 | | - pr_err("rpcrdma: Send: %s (%u/0x%x)\n", |
|---|
| 146 | | - ib_wc_status_msg(wc->status), |
|---|
| 147 | | - wc->status, wc->vendor_err); |
|---|
| 148 | | - |
|---|
| 149 | | - rpcrdma_sendctx_put_locked(sc); |
|---|
| 171 | + rpcrdma_sendctx_put_locked(r_xprt, sc); |
|---|
| 172 | + rpcrdma_flush_disconnect(r_xprt, wc); |
|---|
| 150 | 173 | } |
|---|
| 151 | 174 | |
|---|
| 152 | 175 | /** |
|---|
| 153 | 176 | * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC |
|---|
| 154 | | - * @cq: completion queue (ignored) |
|---|
| 155 | | - * @wc: completed WR |
|---|
| 177 | + * @cq: completion queue |
|---|
| 178 | + * @wc: WCE for a completed Receive WR |
|---|
| 156 | 179 | * |
|---|
| 157 | 180 | */ |
|---|
| 158 | | -static void |
|---|
| 159 | | -rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) |
|---|
| 181 | +static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) |
|---|
| 160 | 182 | { |
|---|
| 161 | 183 | struct ib_cqe *cqe = wc->wr_cqe; |
|---|
| 162 | 184 | struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, |
|---|
| 163 | 185 | rr_cqe); |
|---|
| 186 | + struct rpcrdma_xprt *r_xprt = cq->cq_context; |
|---|
| 164 | 187 | |
|---|
| 165 | | - /* WARNING: Only wr_id and status are reliable at this point */ |
|---|
| 188 | + /* WARNING: Only wr_cqe and status are reliable at this point */ |
|---|
| 166 | 189 | trace_xprtrdma_wc_receive(wc); |
|---|
| 190 | + --r_xprt->rx_ep->re_receive_count; |
|---|
| 167 | 191 | if (wc->status != IB_WC_SUCCESS) |
|---|
| 168 | | - goto out_fail; |
|---|
| 192 | + goto out_flushed; |
|---|
| 169 | 193 | |
|---|
| 170 | 194 | /* status == SUCCESS means all fields in wc are trustworthy */ |
|---|
| 171 | 195 | rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); |
|---|
| .. | .. |
|---|
| 176 | 200 | rdmab_addr(rep->rr_rdmabuf), |
|---|
| 177 | 201 | wc->byte_len, DMA_FROM_DEVICE); |
|---|
| 178 | 202 | |
|---|
| 179 | | -out_schedule: |
|---|
| 180 | 203 | rpcrdma_reply_handler(rep); |
|---|
| 181 | 204 | return; |
|---|
| 182 | 205 | |
|---|
| 183 | | -out_fail: |
|---|
| 184 | | - if (wc->status != IB_WC_WR_FLUSH_ERR) |
|---|
| 185 | | - pr_err("rpcrdma: Recv: %s (%u/0x%x)\n", |
|---|
| 186 | | - ib_wc_status_msg(wc->status), |
|---|
| 187 | | - wc->status, wc->vendor_err); |
|---|
| 188 | | - rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0); |
|---|
| 189 | | - goto out_schedule; |
|---|
| 206 | +out_flushed: |
|---|
| 207 | + rpcrdma_flush_disconnect(r_xprt, wc); |
|---|
| 208 | + rpcrdma_rep_destroy(rep); |
|---|
| 190 | 209 | } |
|---|
| 191 | 210 | |
|---|
| 192 | | -static void |
|---|
| 193 | | -rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, |
|---|
| 194 | | - struct rdma_conn_param *param) |
|---|
| 211 | +static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, |
|---|
| 212 | + struct rdma_conn_param *param) |
|---|
| 195 | 213 | { |
|---|
| 196 | | - struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
|---|
| 197 | 214 | const struct rpcrdma_connect_private *pmsg = param->private_data; |
|---|
| 198 | 215 | unsigned int rsize, wsize; |
|---|
| 199 | 216 | |
|---|
| 200 | 217 | /* Default settings for RPC-over-RDMA Version One */ |
|---|
| 201 | | - r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; |
|---|
| 218 | + ep->re_implicit_roundup = xprt_rdma_pad_optimize; |
|---|
| 202 | 219 | rsize = RPCRDMA_V1_DEF_INLINE_SIZE; |
|---|
| 203 | 220 | wsize = RPCRDMA_V1_DEF_INLINE_SIZE; |
|---|
| 204 | 221 | |
|---|
| 205 | 222 | if (pmsg && |
|---|
| 206 | 223 | pmsg->cp_magic == rpcrdma_cmp_magic && |
|---|
| 207 | 224 | pmsg->cp_version == RPCRDMA_CMP_VERSION) { |
|---|
| 208 | | - r_xprt->rx_ia.ri_implicit_roundup = true; |
|---|
| 225 | + ep->re_implicit_roundup = true; |
|---|
| 209 | 226 | rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); |
|---|
| 210 | 227 | wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); |
|---|
| 211 | 228 | } |
|---|
| 212 | 229 | |
|---|
| 213 | | - if (rsize < cdata->inline_rsize) |
|---|
| 214 | | - cdata->inline_rsize = rsize; |
|---|
| 215 | | - if (wsize < cdata->inline_wsize) |
|---|
| 216 | | - cdata->inline_wsize = wsize; |
|---|
| 217 | | - dprintk("RPC: %s: max send %u, max recv %u\n", |
|---|
| 218 | | - __func__, cdata->inline_wsize, cdata->inline_rsize); |
|---|
| 219 | | - rpcrdma_set_max_header_sizes(r_xprt); |
|---|
| 230 | + if (rsize < ep->re_inline_recv) |
|---|
| 231 | + ep->re_inline_recv = rsize; |
|---|
| 232 | + if (wsize < ep->re_inline_send) |
|---|
| 233 | + ep->re_inline_send = wsize; |
|---|
| 234 | + |
|---|
| 235 | + rpcrdma_set_max_header_sizes(ep); |
|---|
| 220 | 236 | } |
|---|
| 221 | 237 | |
|---|
| 238 | +/** |
|---|
| 239 | + * rpcrdma_cm_event_handler - Handle RDMA CM events |
|---|
| 240 | + * @id: rdma_cm_id on which an event has occurred |
|---|
| 241 | + * @event: details of the event |
|---|
| 242 | + * |
|---|
| 243 | + * Called with @id's mutex held. Returns 1 if caller should |
|---|
| 244 | + * destroy @id, otherwise 0. |
|---|
| 245 | + */ |
|---|
| 222 | 246 | static int |
|---|
| 223 | | -rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) |
|---|
| 247 | +rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) |
|---|
| 224 | 248 | { |
|---|
| 225 | | - struct rpcrdma_xprt *xprt = id->context; |
|---|
| 226 | | - struct rpcrdma_ia *ia = &xprt->rx_ia; |
|---|
| 227 | | - struct rpcrdma_ep *ep = &xprt->rx_ep; |
|---|
| 228 | | - int connstate = 0; |
|---|
| 249 | + struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; |
|---|
| 250 | + struct rpcrdma_ep *ep = id->context; |
|---|
| 229 | 251 | |
|---|
| 230 | | - trace_xprtrdma_conn_upcall(xprt, event); |
|---|
| 252 | + might_sleep(); |
|---|
| 253 | + |
|---|
| 231 | 254 | switch (event->event) { |
|---|
| 232 | 255 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
|---|
| 233 | 256 | case RDMA_CM_EVENT_ROUTE_RESOLVED: |
|---|
| 234 | | - ia->ri_async_rc = 0; |
|---|
| 235 | | - complete(&ia->ri_done); |
|---|
| 236 | | - break; |
|---|
| 257 | + ep->re_async_rc = 0; |
|---|
| 258 | + complete(&ep->re_done); |
|---|
| 259 | + return 0; |
|---|
| 237 | 260 | case RDMA_CM_EVENT_ADDR_ERROR: |
|---|
| 238 | | - ia->ri_async_rc = -EPROTO; |
|---|
| 239 | | - complete(&ia->ri_done); |
|---|
| 240 | | - break; |
|---|
| 261 | + ep->re_async_rc = -EPROTO; |
|---|
| 262 | + complete(&ep->re_done); |
|---|
| 263 | + return 0; |
|---|
| 241 | 264 | case RDMA_CM_EVENT_ROUTE_ERROR: |
|---|
| 242 | | - ia->ri_async_rc = -ENETUNREACH; |
|---|
| 243 | | - complete(&ia->ri_done); |
|---|
| 244 | | - break; |
|---|
| 265 | + ep->re_async_rc = -ENETUNREACH; |
|---|
| 266 | + complete(&ep->re_done); |
|---|
| 267 | + return 0; |
|---|
| 245 | 268 | case RDMA_CM_EVENT_DEVICE_REMOVAL: |
|---|
| 246 | | -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
|---|
| 247 | | - pr_info("rpcrdma: removing device %s for %s:%s\n", |
|---|
| 248 | | - ia->ri_device->name, |
|---|
| 249 | | - rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt)); |
|---|
| 250 | | -#endif |
|---|
| 251 | | - init_completion(&ia->ri_remove_done); |
|---|
| 252 | | - set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); |
|---|
| 253 | | - ep->rep_connected = -ENODEV; |
|---|
| 254 | | - xprt_force_disconnect(&xprt->rx_xprt); |
|---|
| 255 | | - wait_for_completion(&ia->ri_remove_done); |
|---|
| 256 | | - |
|---|
| 257 | | - ia->ri_id = NULL; |
|---|
| 258 | | - ia->ri_device = NULL; |
|---|
| 259 | | - /* Return 1 to ensure the core destroys the id. */ |
|---|
| 260 | | - return 1; |
|---|
| 269 | + pr_info("rpcrdma: removing device %s for %pISpc\n", |
|---|
| 270 | + ep->re_id->device->name, sap); |
|---|
| 271 | + fallthrough; |
|---|
| 272 | + case RDMA_CM_EVENT_ADDR_CHANGE: |
|---|
| 273 | + ep->re_connect_status = -ENODEV; |
|---|
| 274 | + goto disconnected; |
|---|
| 261 | 275 | case RDMA_CM_EVENT_ESTABLISHED: |
|---|
| 262 | | - ++xprt->rx_xprt.connect_cookie; |
|---|
| 263 | | - connstate = 1; |
|---|
| 264 | | - rpcrdma_update_connect_private(xprt, &event->param.conn); |
|---|
| 265 | | - goto connected; |
|---|
| 276 | + rpcrdma_ep_get(ep); |
|---|
| 277 | + ep->re_connect_status = 1; |
|---|
| 278 | + rpcrdma_update_cm_private(ep, &event->param.conn); |
|---|
| 279 | + trace_xprtrdma_inline_thresh(ep); |
|---|
| 280 | + wake_up_all(&ep->re_connect_wait); |
|---|
| 281 | + break; |
|---|
| 266 | 282 | case RDMA_CM_EVENT_CONNECT_ERROR: |
|---|
| 267 | | - connstate = -ENOTCONN; |
|---|
| 268 | | - goto connected; |
|---|
| 283 | + ep->re_connect_status = -ENOTCONN; |
|---|
| 284 | + goto wake_connect_worker; |
|---|
| 269 | 285 | case RDMA_CM_EVENT_UNREACHABLE: |
|---|
| 270 | | - connstate = -ENETUNREACH; |
|---|
| 271 | | - goto connected; |
|---|
| 286 | + ep->re_connect_status = -ENETUNREACH; |
|---|
| 287 | + goto wake_connect_worker; |
|---|
| 272 | 288 | case RDMA_CM_EVENT_REJECTED: |
|---|
| 273 | | - dprintk("rpcrdma: connection to %s:%s rejected: %s\n", |
|---|
| 274 | | - rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt), |
|---|
| 275 | | - rdma_reject_msg(id, event->status)); |
|---|
| 276 | | - connstate = -ECONNREFUSED; |
|---|
| 289 | + dprintk("rpcrdma: connection to %pISpc rejected: %s\n", |
|---|
| 290 | + sap, rdma_reject_msg(id, event->status)); |
|---|
| 291 | + ep->re_connect_status = -ECONNREFUSED; |
|---|
| 277 | 292 | if (event->status == IB_CM_REJ_STALE_CONN) |
|---|
| 278 | | - connstate = -EAGAIN; |
|---|
| 279 | | - goto connected; |
|---|
| 293 | + ep->re_connect_status = -ENOTCONN; |
|---|
| 294 | +wake_connect_worker: |
|---|
| 295 | + wake_up_all(&ep->re_connect_wait); |
|---|
| 296 | + return 0; |
|---|
| 280 | 297 | case RDMA_CM_EVENT_DISCONNECTED: |
|---|
| 281 | | - ++xprt->rx_xprt.connect_cookie; |
|---|
| 282 | | - connstate = -ECONNABORTED; |
|---|
| 283 | | -connected: |
|---|
| 284 | | - ep->rep_connected = connstate; |
|---|
| 285 | | - rpcrdma_conn_func(ep); |
|---|
| 286 | | - wake_up_all(&ep->rep_connect_wait); |
|---|
| 287 | | - /*FALLTHROUGH*/ |
|---|
| 298 | + ep->re_connect_status = -ECONNABORTED; |
|---|
| 299 | +disconnected: |
|---|
| 300 | + rpcrdma_force_disconnect(ep); |
|---|
| 301 | + return rpcrdma_ep_put(ep); |
|---|
| 288 | 302 | default: |
|---|
| 289 | | - dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n", |
|---|
| 290 | | - __func__, |
|---|
| 291 | | - rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt), |
|---|
| 292 | | - ia->ri_device->name, ia->ri_ops->ro_displayname, |
|---|
| 293 | | - ep, rdma_event_msg(event->event)); |
|---|
| 294 | 303 | break; |
|---|
| 295 | 304 | } |
|---|
| 296 | 305 | |
|---|
| 306 | + dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap, |
|---|
| 307 | + ep->re_id->device->name, rdma_event_msg(event->event)); |
|---|
| 297 | 308 | return 0; |
|---|
| 298 | 309 | } |
|---|
| 299 | 310 | |
|---|
| 300 | | -static struct rdma_cm_id * |
|---|
| 301 | | -rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) |
|---|
| 311 | +static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, |
|---|
| 312 | + struct rpcrdma_ep *ep) |
|---|
| 302 | 313 | { |
|---|
| 303 | 314 | unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; |
|---|
| 315 | + struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
|---|
| 304 | 316 | struct rdma_cm_id *id; |
|---|
| 305 | 317 | int rc; |
|---|
| 306 | 318 | |
|---|
| 307 | | - trace_xprtrdma_conn_start(xprt); |
|---|
| 319 | + init_completion(&ep->re_done); |
|---|
| 308 | 320 | |
|---|
| 309 | | - init_completion(&ia->ri_done); |
|---|
| 310 | | - |
|---|
| 311 | | - id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall, |
|---|
| 312 | | - xprt, RDMA_PS_TCP, IB_QPT_RC); |
|---|
| 313 | | - if (IS_ERR(id)) { |
|---|
| 314 | | - rc = PTR_ERR(id); |
|---|
| 315 | | - dprintk("RPC: %s: rdma_create_id() failed %i\n", |
|---|
| 316 | | - __func__, rc); |
|---|
| 321 | + id = rdma_create_id(xprt->xprt_net, rpcrdma_cm_event_handler, ep, |
|---|
| 322 | + RDMA_PS_TCP, IB_QPT_RC); |
|---|
| 323 | + if (IS_ERR(id)) |
|---|
| 317 | 324 | return id; |
|---|
| 318 | | - } |
|---|
| 319 | 325 | |
|---|
| 320 | | - ia->ri_async_rc = -ETIMEDOUT; |
|---|
| 321 | | - rc = rdma_resolve_addr(id, NULL, |
|---|
| 322 | | - (struct sockaddr *)&xprt->rx_xprt.addr, |
|---|
| 326 | + ep->re_async_rc = -ETIMEDOUT; |
|---|
| 327 | + rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr, |
|---|
| 323 | 328 | RDMA_RESOLVE_TIMEOUT); |
|---|
| 324 | | - if (rc) { |
|---|
| 325 | | - dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", |
|---|
| 326 | | - __func__, rc); |
|---|
| 329 | + if (rc) |
|---|
| 327 | 330 | goto out; |
|---|
| 328 | | - } |
|---|
| 329 | | - rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); |
|---|
| 330 | | - if (rc < 0) { |
|---|
| 331 | | - trace_xprtrdma_conn_tout(xprt); |
|---|
| 331 | + rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); |
|---|
| 332 | + if (rc < 0) |
|---|
| 332 | 333 | goto out; |
|---|
| 333 | | - } |
|---|
| 334 | 334 | |
|---|
| 335 | | - rc = ia->ri_async_rc; |
|---|
| 335 | + rc = ep->re_async_rc; |
|---|
| 336 | 336 | if (rc) |
|---|
| 337 | 337 | goto out; |
|---|
| 338 | 338 | |
|---|
| 339 | | - ia->ri_async_rc = -ETIMEDOUT; |
|---|
| 339 | + ep->re_async_rc = -ETIMEDOUT; |
|---|
| 340 | 340 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); |
|---|
| 341 | | - if (rc) { |
|---|
| 342 | | - dprintk("RPC: %s: rdma_resolve_route() failed %i\n", |
|---|
| 343 | | - __func__, rc); |
|---|
| 341 | + if (rc) |
|---|
| 344 | 342 | goto out; |
|---|
| 345 | | - } |
|---|
| 346 | | - rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); |
|---|
| 347 | | - if (rc < 0) { |
|---|
| 348 | | - trace_xprtrdma_conn_tout(xprt); |
|---|
| 343 | + rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); |
|---|
| 344 | + if (rc < 0) |
|---|
| 349 | 345 | goto out; |
|---|
| 350 | | - } |
|---|
| 351 | | - rc = ia->ri_async_rc; |
|---|
| 346 | + rc = ep->re_async_rc; |
|---|
| 352 | 347 | if (rc) |
|---|
| 353 | 348 | goto out; |
|---|
| 354 | 349 | |
|---|
| .. | .. |
|---|
| 359 | 354 | return ERR_PTR(rc); |
|---|
| 360 | 355 | } |
|---|
| 361 | 356 | |
|---|
| 362 | | -/* |
|---|
| 363 | | - * Exported functions. |
|---|
| 364 | | - */ |
|---|
| 365 | | - |
|---|
| 366 | | -/** |
|---|
| 367 | | - * rpcrdma_ia_open - Open and initialize an Interface Adapter. |
|---|
| 368 | | - * @xprt: transport with IA to (re)initialize |
|---|
| 369 | | - * |
|---|
| 370 | | - * Returns 0 on success, negative errno if an appropriate |
|---|
| 371 | | - * Interface Adapter could not be found and opened. |
|---|
| 372 | | - */ |
|---|
| 373 | | -int |
|---|
| 374 | | -rpcrdma_ia_open(struct rpcrdma_xprt *xprt) |
|---|
| 357 | +static void rpcrdma_ep_destroy(struct kref *kref) |
|---|
| 375 | 358 | { |
|---|
| 376 | | - struct rpcrdma_ia *ia = &xprt->rx_ia; |
|---|
| 359 | + struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); |
|---|
| 360 | + |
|---|
| 361 | + if (ep->re_id->qp) { |
|---|
| 362 | + rdma_destroy_qp(ep->re_id); |
|---|
| 363 | + ep->re_id->qp = NULL; |
|---|
| 364 | + } |
|---|
| 365 | + |
|---|
| 366 | + if (ep->re_attr.recv_cq) |
|---|
| 367 | + ib_free_cq(ep->re_attr.recv_cq); |
|---|
| 368 | + ep->re_attr.recv_cq = NULL; |
|---|
| 369 | + if (ep->re_attr.send_cq) |
|---|
| 370 | + ib_free_cq(ep->re_attr.send_cq); |
|---|
| 371 | + ep->re_attr.send_cq = NULL; |
|---|
| 372 | + |
|---|
| 373 | + if (ep->re_pd) |
|---|
| 374 | + ib_dealloc_pd(ep->re_pd); |
|---|
| 375 | + ep->re_pd = NULL; |
|---|
| 376 | + |
|---|
| 377 | + kfree(ep); |
|---|
| 378 | + module_put(THIS_MODULE); |
|---|
| 379 | +} |
|---|
| 380 | + |
|---|
| 381 | +static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep) |
|---|
| 382 | +{ |
|---|
| 383 | + kref_get(&ep->re_kref); |
|---|
| 384 | +} |
|---|
| 385 | + |
|---|
| 386 | +/* Returns: |
|---|
| 387 | + * %0 if @ep still has a positive kref count, or |
|---|
| 388 | + * %1 if @ep was destroyed successfully. |
|---|
| 389 | + */ |
|---|
| 390 | +static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep) |
|---|
| 391 | +{ |
|---|
| 392 | + return kref_put(&ep->re_kref, rpcrdma_ep_destroy); |
|---|
| 393 | +} |
|---|
| 394 | + |
|---|
| 395 | +static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) |
|---|
| 396 | +{ |
|---|
| 397 | + struct rpcrdma_connect_private *pmsg; |
|---|
| 398 | + struct ib_device *device; |
|---|
| 399 | + struct rdma_cm_id *id; |
|---|
| 400 | + struct rpcrdma_ep *ep; |
|---|
| 377 | 401 | int rc; |
|---|
| 378 | 402 | |
|---|
| 379 | | - ia->ri_id = rpcrdma_create_id(xprt, ia); |
|---|
| 380 | | - if (IS_ERR(ia->ri_id)) { |
|---|
| 381 | | - rc = PTR_ERR(ia->ri_id); |
|---|
| 382 | | - goto out_err; |
|---|
| 403 | + ep = kzalloc(sizeof(*ep), GFP_NOFS); |
|---|
| 404 | + if (!ep) |
|---|
| 405 | + return -ENOTCONN; |
|---|
| 406 | + ep->re_xprt = &r_xprt->rx_xprt; |
|---|
| 407 | + kref_init(&ep->re_kref); |
|---|
| 408 | + |
|---|
| 409 | + id = rpcrdma_create_id(r_xprt, ep); |
|---|
| 410 | + if (IS_ERR(id)) { |
|---|
| 411 | + kfree(ep); |
|---|
| 412 | + return PTR_ERR(id); |
|---|
| 383 | 413 | } |
|---|
| 384 | | - ia->ri_device = ia->ri_id->device; |
|---|
| 414 | + __module_get(THIS_MODULE); |
|---|
| 415 | + device = id->device; |
|---|
| 416 | + ep->re_id = id; |
|---|
| 385 | 417 | |
|---|
| 386 | | - ia->ri_pd = ib_alloc_pd(ia->ri_device, 0); |
|---|
| 387 | | - if (IS_ERR(ia->ri_pd)) { |
|---|
| 388 | | - rc = PTR_ERR(ia->ri_pd); |
|---|
| 389 | | - pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); |
|---|
| 390 | | - goto out_err; |
|---|
| 391 | | - } |
|---|
| 392 | | - |
|---|
| 393 | | - switch (xprt_rdma_memreg_strategy) { |
|---|
| 394 | | - case RPCRDMA_FRWR: |
|---|
| 395 | | - if (frwr_is_supported(ia)) { |
|---|
| 396 | | - ia->ri_ops = &rpcrdma_frwr_memreg_ops; |
|---|
| 397 | | - break; |
|---|
| 398 | | - } |
|---|
| 399 | | - /*FALLTHROUGH*/ |
|---|
| 400 | | - case RPCRDMA_MTHCAFMR: |
|---|
| 401 | | - if (fmr_is_supported(ia)) { |
|---|
| 402 | | - ia->ri_ops = &rpcrdma_fmr_memreg_ops; |
|---|
| 403 | | - break; |
|---|
| 404 | | - } |
|---|
| 405 | | - /*FALLTHROUGH*/ |
|---|
| 406 | | - default: |
|---|
| 407 | | - pr_err("rpcrdma: Device %s does not support memreg mode %d\n", |
|---|
| 408 | | - ia->ri_device->name, xprt_rdma_memreg_strategy); |
|---|
| 409 | | - rc = -EINVAL; |
|---|
| 410 | | - goto out_err; |
|---|
| 411 | | - } |
|---|
| 412 | | - |
|---|
| 413 | | - return 0; |
|---|
| 414 | | - |
|---|
| 415 | | -out_err: |
|---|
| 416 | | - rpcrdma_ia_close(ia); |
|---|
| 417 | | - return rc; |
|---|
| 418 | | -} |
|---|
| 419 | | - |
|---|
| 420 | | -/** |
|---|
| 421 | | - * rpcrdma_ia_remove - Handle device driver unload |
|---|
| 422 | | - * @ia: interface adapter being removed |
|---|
| 423 | | - * |
|---|
| 424 | | - * Divest transport H/W resources associated with this adapter, |
|---|
| 425 | | - * but allow it to be restored later. |
|---|
| 426 | | - */ |
|---|
| 427 | | -void |
|---|
| 428 | | -rpcrdma_ia_remove(struct rpcrdma_ia *ia) |
|---|
| 429 | | -{ |
|---|
| 430 | | - struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, |
|---|
| 431 | | - rx_ia); |
|---|
| 432 | | - struct rpcrdma_ep *ep = &r_xprt->rx_ep; |
|---|
| 433 | | - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 434 | | - struct rpcrdma_req *req; |
|---|
| 435 | | - struct rpcrdma_rep *rep; |
|---|
| 436 | | - |
|---|
| 437 | | - cancel_delayed_work_sync(&buf->rb_refresh_worker); |
|---|
| 438 | | - |
|---|
| 439 | | - /* This is similar to rpcrdma_ep_destroy, but: |
|---|
| 440 | | - * - Don't cancel the connect worker. |
|---|
| 441 | | - * - Don't call rpcrdma_ep_disconnect, which waits |
|---|
| 442 | | - * for another conn upcall, which will deadlock. |
|---|
| 443 | | - * - rdma_disconnect is unneeded, the underlying |
|---|
| 444 | | - * connection is already gone. |
|---|
| 445 | | - */ |
|---|
| 446 | | - if (ia->ri_id->qp) { |
|---|
| 447 | | - ib_drain_qp(ia->ri_id->qp); |
|---|
| 448 | | - rdma_destroy_qp(ia->ri_id); |
|---|
| 449 | | - ia->ri_id->qp = NULL; |
|---|
| 450 | | - } |
|---|
| 451 | | - ib_free_cq(ep->rep_attr.recv_cq); |
|---|
| 452 | | - ep->rep_attr.recv_cq = NULL; |
|---|
| 453 | | - ib_free_cq(ep->rep_attr.send_cq); |
|---|
| 454 | | - ep->rep_attr.send_cq = NULL; |
|---|
| 455 | | - |
|---|
| 456 | | - /* The ULP is responsible for ensuring all DMA |
|---|
| 457 | | - * mappings and MRs are gone. |
|---|
| 458 | | - */ |
|---|
| 459 | | - list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list) |
|---|
| 460 | | - rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf); |
|---|
| 461 | | - list_for_each_entry(req, &buf->rb_allreqs, rl_all) { |
|---|
| 462 | | - rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf); |
|---|
| 463 | | - rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); |
|---|
| 464 | | - rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); |
|---|
| 465 | | - } |
|---|
| 466 | | - rpcrdma_mrs_destroy(buf); |
|---|
| 467 | | - ib_dealloc_pd(ia->ri_pd); |
|---|
| 468 | | - ia->ri_pd = NULL; |
|---|
| 469 | | - |
|---|
| 470 | | - /* Allow waiters to continue */ |
|---|
| 471 | | - complete(&ia->ri_remove_done); |
|---|
| 472 | | - |
|---|
| 473 | | - trace_xprtrdma_remove(r_xprt); |
|---|
| 474 | | -} |
|---|
| 475 | | - |
|---|
| 476 | | -/** |
|---|
| 477 | | - * rpcrdma_ia_close - Clean up/close an IA. |
|---|
| 478 | | - * @ia: interface adapter to close |
|---|
| 479 | | - * |
|---|
| 480 | | - */ |
|---|
| 481 | | -void |
|---|
| 482 | | -rpcrdma_ia_close(struct rpcrdma_ia *ia) |
|---|
| 483 | | -{ |
|---|
| 484 | | - if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { |
|---|
| 485 | | - if (ia->ri_id->qp) |
|---|
| 486 | | - rdma_destroy_qp(ia->ri_id); |
|---|
| 487 | | - rdma_destroy_id(ia->ri_id); |
|---|
| 488 | | - } |
|---|
| 489 | | - ia->ri_id = NULL; |
|---|
| 490 | | - ia->ri_device = NULL; |
|---|
| 491 | | - |
|---|
| 492 | | - /* If the pd is still busy, xprtrdma missed freeing a resource */ |
|---|
| 493 | | - if (ia->ri_pd && !IS_ERR(ia->ri_pd)) |
|---|
| 494 | | - ib_dealloc_pd(ia->ri_pd); |
|---|
| 495 | | - ia->ri_pd = NULL; |
|---|
| 496 | | -} |
|---|
| 497 | | - |
|---|
| 498 | | -/* |
|---|
| 499 | | - * Create unconnected endpoint. |
|---|
| 500 | | - */ |
|---|
| 501 | | -int |
|---|
| 502 | | -rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, |
|---|
| 503 | | - struct rpcrdma_create_data_internal *cdata) |
|---|
| 504 | | -{ |
|---|
| 505 | | - struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; |
|---|
| 506 | | - struct ib_cq *sendcq, *recvcq; |
|---|
| 507 | | - unsigned int max_sge; |
|---|
| 508 | | - int rc; |
|---|
| 509 | | - |
|---|
| 510 | | - max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge, |
|---|
| 511 | | - RPCRDMA_MAX_SEND_SGES); |
|---|
| 512 | | - if (max_sge < RPCRDMA_MIN_SEND_SGES) { |
|---|
| 513 | | - pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); |
|---|
| 514 | | - return -ENOMEM; |
|---|
| 515 | | - } |
|---|
| 516 | | - ia->ri_max_send_sges = max_sge; |
|---|
| 517 | | - |
|---|
| 518 | | - rc = ia->ri_ops->ro_open(ia, ep, cdata); |
|---|
| 418 | + ep->re_max_requests = r_xprt->rx_xprt.max_reqs; |
|---|
| 419 | + ep->re_inline_send = xprt_rdma_max_inline_write; |
|---|
| 420 | + ep->re_inline_recv = xprt_rdma_max_inline_read; |
|---|
| 421 | + rc = frwr_query_device(ep, device); |
|---|
| 519 | 422 | if (rc) |
|---|
| 520 | | - return rc; |
|---|
| 423 | + goto out_destroy; |
|---|
| 521 | 424 | |
|---|
| 522 | | - ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; |
|---|
| 523 | | - ep->rep_attr.qp_context = ep; |
|---|
| 524 | | - ep->rep_attr.srq = NULL; |
|---|
| 525 | | - ep->rep_attr.cap.max_send_sge = max_sge; |
|---|
| 526 | | - ep->rep_attr.cap.max_recv_sge = 1; |
|---|
| 527 | | - ep->rep_attr.cap.max_inline_data = 0; |
|---|
| 528 | | - ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; |
|---|
| 529 | | - ep->rep_attr.qp_type = IB_QPT_RC; |
|---|
| 530 | | - ep->rep_attr.port_num = ~0; |
|---|
| 425 | + r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests); |
|---|
| 426 | + |
|---|
| 427 | + ep->re_attr.event_handler = rpcrdma_qp_event_handler; |
|---|
| 428 | + ep->re_attr.qp_context = ep; |
|---|
| 429 | + ep->re_attr.srq = NULL; |
|---|
| 430 | + ep->re_attr.cap.max_inline_data = 0; |
|---|
| 431 | + ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR; |
|---|
| 432 | + ep->re_attr.qp_type = IB_QPT_RC; |
|---|
| 433 | + ep->re_attr.port_num = ~0; |
|---|
| 531 | 434 | |
|---|
| 532 | 435 | dprintk("RPC: %s: requested max: dtos: send %d recv %d; " |
|---|
| 533 | 436 | "iovs: send %d recv %d\n", |
|---|
| 534 | 437 | __func__, |
|---|
| 535 | | - ep->rep_attr.cap.max_send_wr, |
|---|
| 536 | | - ep->rep_attr.cap.max_recv_wr, |
|---|
| 537 | | - ep->rep_attr.cap.max_send_sge, |
|---|
| 538 | | - ep->rep_attr.cap.max_recv_sge); |
|---|
| 438 | + ep->re_attr.cap.max_send_wr, |
|---|
| 439 | + ep->re_attr.cap.max_recv_wr, |
|---|
| 440 | + ep->re_attr.cap.max_send_sge, |
|---|
| 441 | + ep->re_attr.cap.max_recv_sge); |
|---|
| 539 | 442 | |
|---|
| 540 | | - /* set trigger for requesting send completion */ |
|---|
| 541 | | - ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH, |
|---|
| 542 | | - cdata->max_requests >> 2); |
|---|
| 543 | | - ep->rep_send_count = ep->rep_send_batch; |
|---|
| 544 | | - init_waitqueue_head(&ep->rep_connect_wait); |
|---|
| 545 | | - INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); |
|---|
| 443 | + ep->re_send_batch = ep->re_max_requests >> 3; |
|---|
| 444 | + ep->re_send_count = ep->re_send_batch; |
|---|
| 445 | + init_waitqueue_head(&ep->re_connect_wait); |
|---|
| 546 | 446 | |
|---|
| 547 | | - sendcq = ib_alloc_cq(ia->ri_device, NULL, |
|---|
| 548 | | - ep->rep_attr.cap.max_send_wr + 1, |
|---|
| 549 | | - ia->ri_device->num_comp_vectors > 1 ? 1 : 0, |
|---|
| 550 | | - IB_POLL_WORKQUEUE); |
|---|
| 551 | | - if (IS_ERR(sendcq)) { |
|---|
| 552 | | - rc = PTR_ERR(sendcq); |
|---|
| 553 | | - dprintk("RPC: %s: failed to create send CQ: %i\n", |
|---|
| 554 | | - __func__, rc); |
|---|
| 555 | | - goto out1; |
|---|
| 447 | + ep->re_attr.send_cq = ib_alloc_cq_any(device, r_xprt, |
|---|
| 448 | + ep->re_attr.cap.max_send_wr, |
|---|
| 449 | + IB_POLL_WORKQUEUE); |
|---|
| 450 | + if (IS_ERR(ep->re_attr.send_cq)) { |
|---|
| 451 | + rc = PTR_ERR(ep->re_attr.send_cq); |
|---|
| 452 | + ep->re_attr.send_cq = NULL; |
|---|
| 453 | + goto out_destroy; |
|---|
| 556 | 454 | } |
|---|
| 557 | 455 | |
|---|
| 558 | | - recvcq = ib_alloc_cq(ia->ri_device, NULL, |
|---|
| 559 | | - ep->rep_attr.cap.max_recv_wr + 1, |
|---|
| 560 | | - 0, IB_POLL_WORKQUEUE); |
|---|
| 561 | | - if (IS_ERR(recvcq)) { |
|---|
| 562 | | - rc = PTR_ERR(recvcq); |
|---|
| 563 | | - dprintk("RPC: %s: failed to create recv CQ: %i\n", |
|---|
| 564 | | - __func__, rc); |
|---|
| 565 | | - goto out2; |
|---|
| 456 | + ep->re_attr.recv_cq = ib_alloc_cq_any(device, r_xprt, |
|---|
| 457 | + ep->re_attr.cap.max_recv_wr, |
|---|
| 458 | + IB_POLL_WORKQUEUE); |
|---|
| 459 | + if (IS_ERR(ep->re_attr.recv_cq)) { |
|---|
| 460 | + rc = PTR_ERR(ep->re_attr.recv_cq); |
|---|
| 461 | + ep->re_attr.recv_cq = NULL; |
|---|
| 462 | + goto out_destroy; |
|---|
| 566 | 463 | } |
|---|
| 567 | | - |
|---|
| 568 | | - ep->rep_attr.send_cq = sendcq; |
|---|
| 569 | | - ep->rep_attr.recv_cq = recvcq; |
|---|
| 464 | + ep->re_receive_count = 0; |
|---|
| 570 | 465 | |
|---|
| 571 | 466 | /* Initialize cma parameters */ |
|---|
| 572 | | - memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma)); |
|---|
| 467 | + memset(&ep->re_remote_cma, 0, sizeof(ep->re_remote_cma)); |
|---|
| 573 | 468 | |
|---|
| 574 | 469 | /* Prepare RDMA-CM private message */ |
|---|
| 470 | + pmsg = &ep->re_cm_private; |
|---|
| 575 | 471 | pmsg->cp_magic = rpcrdma_cmp_magic; |
|---|
| 576 | 472 | pmsg->cp_version = RPCRDMA_CMP_VERSION; |
|---|
| 577 | | - pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok; |
|---|
| 578 | | - pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize); |
|---|
| 579 | | - pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize); |
|---|
| 580 | | - ep->rep_remote_cma.private_data = pmsg; |
|---|
| 581 | | - ep->rep_remote_cma.private_data_len = sizeof(*pmsg); |
|---|
| 473 | + pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; |
|---|
| 474 | + pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->re_inline_send); |
|---|
| 475 | + pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->re_inline_recv); |
|---|
| 476 | + ep->re_remote_cma.private_data = pmsg; |
|---|
| 477 | + ep->re_remote_cma.private_data_len = sizeof(*pmsg); |
|---|
| 582 | 478 | |
|---|
| 583 | 479 | /* Client offers RDMA Read but does not initiate */ |
|---|
| 584 | | - ep->rep_remote_cma.initiator_depth = 0; |
|---|
| 585 | | - ep->rep_remote_cma.responder_resources = |
|---|
| 586 | | - min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom); |
|---|
| 480 | + ep->re_remote_cma.initiator_depth = 0; |
|---|
| 481 | + ep->re_remote_cma.responder_resources = |
|---|
| 482 | + min_t(int, U8_MAX, device->attrs.max_qp_rd_atom); |
|---|
| 587 | 483 | |
|---|
| 588 | 484 | /* Limit transport retries so client can detect server |
|---|
| 589 | 485 | * GID changes quickly. RPC layer handles re-establishing |
|---|
| 590 | 486 | * transport connection and retransmission. |
|---|
| 591 | 487 | */ |
|---|
| 592 | | - ep->rep_remote_cma.retry_count = 6; |
|---|
| 488 | + ep->re_remote_cma.retry_count = 6; |
|---|
| 593 | 489 | |
|---|
| 594 | 490 | /* RPC-over-RDMA handles its own flow control. In addition, |
|---|
| 595 | 491 | * make all RNR NAKs visible so we know that RPC-over-RDMA |
|---|
| 596 | 492 | * flow control is working correctly (no NAKs should be seen). |
|---|
| 597 | 493 | */ |
|---|
| 598 | | - ep->rep_remote_cma.flow_control = 0; |
|---|
| 599 | | - ep->rep_remote_cma.rnr_retry_count = 0; |
|---|
| 494 | + ep->re_remote_cma.flow_control = 0; |
|---|
| 495 | + ep->re_remote_cma.rnr_retry_count = 0; |
|---|
| 600 | 496 | |
|---|
| 601 | | - return 0; |
|---|
| 602 | | - |
|---|
| 603 | | -out2: |
|---|
| 604 | | - ib_free_cq(sendcq); |
|---|
| 605 | | -out1: |
|---|
| 606 | | - return rc; |
|---|
| 607 | | -} |
|---|
| 608 | | - |
|---|
| 609 | | -/* |
|---|
| 610 | | - * rpcrdma_ep_destroy |
|---|
| 611 | | - * |
|---|
| 612 | | - * Disconnect and destroy endpoint. After this, the only |
|---|
| 613 | | - * valid operations on the ep are to free it (if dynamically |
|---|
| 614 | | - * allocated) or re-create it. |
|---|
| 615 | | - */ |
|---|
| 616 | | -void |
|---|
| 617 | | -rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
|---|
| 618 | | -{ |
|---|
| 619 | | - cancel_delayed_work_sync(&ep->rep_connect_worker); |
|---|
| 620 | | - |
|---|
| 621 | | - if (ia->ri_id && ia->ri_id->qp) { |
|---|
| 622 | | - rpcrdma_ep_disconnect(ep, ia); |
|---|
| 623 | | - rdma_destroy_qp(ia->ri_id); |
|---|
| 624 | | - ia->ri_id->qp = NULL; |
|---|
| 625 | | - } |
|---|
| 626 | | - |
|---|
| 627 | | - if (ep->rep_attr.recv_cq) |
|---|
| 628 | | - ib_free_cq(ep->rep_attr.recv_cq); |
|---|
| 629 | | - if (ep->rep_attr.send_cq) |
|---|
| 630 | | - ib_free_cq(ep->rep_attr.send_cq); |
|---|
| 631 | | -} |
|---|
| 632 | | - |
|---|
| 633 | | -/* Re-establish a connection after a device removal event. |
|---|
| 634 | | - * Unlike a normal reconnection, a fresh PD and a new set |
|---|
| 635 | | - * of MRs and buffers is needed. |
|---|
| 636 | | - */ |
|---|
| 637 | | -static int |
|---|
| 638 | | -rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, |
|---|
| 639 | | - struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
|---|
| 640 | | -{ |
|---|
| 641 | | - int rc, err; |
|---|
| 642 | | - |
|---|
| 643 | | - trace_xprtrdma_reinsert(r_xprt); |
|---|
| 644 | | - |
|---|
| 645 | | - rc = -EHOSTUNREACH; |
|---|
| 646 | | - if (rpcrdma_ia_open(r_xprt)) |
|---|
| 647 | | - goto out1; |
|---|
| 648 | | - |
|---|
| 649 | | - rc = -ENOMEM; |
|---|
| 650 | | - err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data); |
|---|
| 651 | | - if (err) { |
|---|
| 652 | | - pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err); |
|---|
| 653 | | - goto out2; |
|---|
| 654 | | - } |
|---|
| 655 | | - |
|---|
| 656 | | - rc = -ENETUNREACH; |
|---|
| 657 | | - err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); |
|---|
| 658 | | - if (err) { |
|---|
| 659 | | - pr_err("rpcrdma: rdma_create_qp returned %d\n", err); |
|---|
| 660 | | - goto out3; |
|---|
| 661 | | - } |
|---|
| 662 | | - |
|---|
| 663 | | - rpcrdma_mrs_create(r_xprt); |
|---|
| 664 | | - return 0; |
|---|
| 665 | | - |
|---|
| 666 | | -out3: |
|---|
| 667 | | - rpcrdma_ep_destroy(ep, ia); |
|---|
| 668 | | -out2: |
|---|
| 669 | | - rpcrdma_ia_close(ia); |
|---|
| 670 | | -out1: |
|---|
| 671 | | - return rc; |
|---|
| 672 | | -} |
|---|
| 673 | | - |
|---|
| 674 | | -static int |
|---|
| 675 | | -rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, |
|---|
| 676 | | - struct rpcrdma_ia *ia) |
|---|
| 677 | | -{ |
|---|
| 678 | | - struct rdma_cm_id *id, *old; |
|---|
| 679 | | - int err, rc; |
|---|
| 680 | | - |
|---|
| 681 | | - trace_xprtrdma_reconnect(r_xprt); |
|---|
| 682 | | - |
|---|
| 683 | | - rpcrdma_ep_disconnect(ep, ia); |
|---|
| 684 | | - |
|---|
| 685 | | - rc = -EHOSTUNREACH; |
|---|
| 686 | | - id = rpcrdma_create_id(r_xprt, ia); |
|---|
| 687 | | - if (IS_ERR(id)) |
|---|
| 688 | | - goto out; |
|---|
| 689 | | - |
|---|
| 690 | | - /* As long as the new ID points to the same device as the |
|---|
| 691 | | - * old ID, we can reuse the transport's existing PD and all |
|---|
| 692 | | - * previously allocated MRs. Also, the same device means |
|---|
| 693 | | - * the transport's previous DMA mappings are still valid. |
|---|
| 694 | | - * |
|---|
| 695 | | - * This is a sanity check only. There should be no way these |
|---|
| 696 | | - * point to two different devices here. |
|---|
| 697 | | - */ |
|---|
| 698 | | - old = id; |
|---|
| 699 | | - rc = -ENETUNREACH; |
|---|
| 700 | | - if (ia->ri_device != id->device) { |
|---|
| 701 | | - pr_err("rpcrdma: can't reconnect on different device!\n"); |
|---|
| 497 | + ep->re_pd = ib_alloc_pd(device, 0); |
|---|
| 498 | + if (IS_ERR(ep->re_pd)) { |
|---|
| 499 | + rc = PTR_ERR(ep->re_pd); |
|---|
| 500 | + ep->re_pd = NULL; |
|---|
| 702 | 501 | goto out_destroy; |
|---|
| 703 | 502 | } |
|---|
| 704 | 503 | |
|---|
| 705 | | - err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); |
|---|
| 706 | | - if (err) { |
|---|
| 707 | | - dprintk("RPC: %s: rdma_create_qp returned %d\n", |
|---|
| 708 | | - __func__, err); |
|---|
| 504 | + rc = rdma_create_qp(id, ep->re_pd, &ep->re_attr); |
|---|
| 505 | + if (rc) |
|---|
| 709 | 506 | goto out_destroy; |
|---|
| 710 | | - } |
|---|
| 711 | 507 | |
|---|
| 712 | | - /* Atomically replace the transport's ID and QP. */ |
|---|
| 713 | | - rc = 0; |
|---|
| 714 | | - old = ia->ri_id; |
|---|
| 715 | | - ia->ri_id = id; |
|---|
| 716 | | - rdma_destroy_qp(old); |
|---|
| 508 | + r_xprt->rx_ep = ep; |
|---|
| 509 | + return 0; |
|---|
| 717 | 510 | |
|---|
| 718 | 511 | out_destroy: |
|---|
| 719 | | - rdma_destroy_id(old); |
|---|
| 720 | | -out: |
|---|
| 512 | + rpcrdma_ep_put(ep); |
|---|
| 513 | + rdma_destroy_id(id); |
|---|
| 721 | 514 | return rc; |
|---|
| 722 | 515 | } |
|---|
| 723 | 516 | |
|---|
| 724 | | -/* |
|---|
| 725 | | - * Connect unconnected endpoint. |
|---|
| 517 | +/** |
|---|
| 518 | + * rpcrdma_xprt_connect - Connect an unconnected transport |
|---|
| 519 | + * @r_xprt: controlling transport instance |
|---|
| 520 | + * |
|---|
| 521 | + * Returns 0 on success or a negative errno. |
|---|
| 726 | 522 | */ |
|---|
| 727 | | -int |
|---|
| 728 | | -rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
|---|
| 523 | +int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) |
|---|
| 729 | 524 | { |
|---|
| 730 | | - struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, |
|---|
| 731 | | - rx_ia); |
|---|
| 525 | + struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
|---|
| 526 | + struct rpcrdma_ep *ep; |
|---|
| 732 | 527 | int rc; |
|---|
| 733 | 528 | |
|---|
| 734 | | -retry: |
|---|
| 735 | | - switch (ep->rep_connected) { |
|---|
| 736 | | - case 0: |
|---|
| 737 | | - dprintk("RPC: %s: connecting...\n", __func__); |
|---|
| 738 | | - rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); |
|---|
| 739 | | - if (rc) { |
|---|
| 740 | | - dprintk("RPC: %s: rdma_create_qp failed %i\n", |
|---|
| 741 | | - __func__, rc); |
|---|
| 742 | | - rc = -ENETUNREACH; |
|---|
| 743 | | - goto out_noupdate; |
|---|
| 744 | | - } |
|---|
| 745 | | - break; |
|---|
| 746 | | - case -ENODEV: |
|---|
| 747 | | - rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia); |
|---|
| 748 | | - if (rc) |
|---|
| 749 | | - goto out_noupdate; |
|---|
| 750 | | - break; |
|---|
| 751 | | - default: |
|---|
| 752 | | - rc = rpcrdma_ep_reconnect(r_xprt, ep, ia); |
|---|
| 753 | | - if (rc) |
|---|
| 754 | | - goto out; |
|---|
| 755 | | - } |
|---|
| 756 | | - |
|---|
| 757 | | - ep->rep_connected = 0; |
|---|
| 758 | | - rpcrdma_post_recvs(r_xprt, true); |
|---|
| 759 | | - |
|---|
| 760 | | - rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); |
|---|
| 761 | | - if (rc) { |
|---|
| 762 | | - dprintk("RPC: %s: rdma_connect() failed with %i\n", |
|---|
| 763 | | - __func__, rc); |
|---|
| 764 | | - goto out; |
|---|
| 765 | | - } |
|---|
| 766 | | - |
|---|
| 767 | | - wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); |
|---|
| 768 | | - if (ep->rep_connected <= 0) { |
|---|
| 769 | | - if (ep->rep_connected == -EAGAIN) |
|---|
| 770 | | - goto retry; |
|---|
| 771 | | - rc = ep->rep_connected; |
|---|
| 772 | | - goto out; |
|---|
| 773 | | - } |
|---|
| 774 | | - |
|---|
| 775 | | - dprintk("RPC: %s: connected\n", __func__); |
|---|
| 776 | | - |
|---|
| 777 | | -out: |
|---|
| 529 | + rc = rpcrdma_ep_create(r_xprt); |
|---|
| 778 | 530 | if (rc) |
|---|
| 779 | | - ep->rep_connected = rc; |
|---|
| 531 | + return rc; |
|---|
| 532 | + ep = r_xprt->rx_ep; |
|---|
| 780 | 533 | |
|---|
| 781 | | -out_noupdate: |
|---|
| 534 | + xprt_clear_connected(xprt); |
|---|
| 535 | + rpcrdma_reset_cwnd(r_xprt); |
|---|
| 536 | + |
|---|
| 537 | + /* Bump the ep's reference count while there are |
|---|
| 538 | + * outstanding Receives. |
|---|
| 539 | + */ |
|---|
| 540 | + rpcrdma_ep_get(ep); |
|---|
| 541 | + rpcrdma_post_recvs(r_xprt, 1, true); |
|---|
| 542 | + |
|---|
| 543 | + rc = rdma_connect(ep->re_id, &ep->re_remote_cma); |
|---|
| 544 | + if (rc) |
|---|
| 545 | + goto out; |
|---|
| 546 | + |
|---|
| 547 | + if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) |
|---|
| 548 | + xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
|---|
| 549 | + wait_event_interruptible(ep->re_connect_wait, |
|---|
| 550 | + ep->re_connect_status != 0); |
|---|
| 551 | + if (ep->re_connect_status <= 0) { |
|---|
| 552 | + rc = ep->re_connect_status; |
|---|
| 553 | + goto out; |
|---|
| 554 | + } |
|---|
| 555 | + |
|---|
| 556 | + rc = rpcrdma_sendctxs_create(r_xprt); |
|---|
| 557 | + if (rc) { |
|---|
| 558 | + rc = -ENOTCONN; |
|---|
| 559 | + goto out; |
|---|
| 560 | + } |
|---|
| 561 | + |
|---|
| 562 | + rc = rpcrdma_reqs_setup(r_xprt); |
|---|
| 563 | + if (rc) { |
|---|
| 564 | + rc = -ENOTCONN; |
|---|
| 565 | + goto out; |
|---|
| 566 | + } |
|---|
| 567 | + rpcrdma_mrs_create(r_xprt); |
|---|
| 568 | + |
|---|
| 569 | +out: |
|---|
| 570 | + trace_xprtrdma_connect(r_xprt, rc); |
|---|
| 782 | 571 | return rc; |
|---|
| 783 | 572 | } |
|---|
| 784 | 573 | |
|---|
| 785 | | -/* |
|---|
| 786 | | - * rpcrdma_ep_disconnect |
|---|
| 574 | +/** |
|---|
| 575 | + * rpcrdma_xprt_disconnect - Disconnect underlying transport |
|---|
| 576 | + * @r_xprt: controlling transport instance |
|---|
| 787 | 577 | * |
|---|
| 788 | | - * This is separate from destroy to facilitate the ability |
|---|
| 789 | | - * to reconnect without recreating the endpoint. |
|---|
| 578 | + * Caller serializes. Either the transport send lock is held, |
|---|
| 579 | + * or we're being called to destroy the transport. |
|---|
| 790 | 580 | * |
|---|
| 791 | | - * This call is not reentrant, and must not be made in parallel |
|---|
| 792 | | - * on the same endpoint. |
|---|
| 581 | + * On return, @r_xprt is completely divested of all hardware |
|---|
| 582 | + * resources and prepared for the next ->connect operation. |
|---|
| 793 | 583 | */ |
|---|
| 794 | | -void |
|---|
| 795 | | -rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
|---|
| 584 | +void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) |
|---|
| 796 | 585 | { |
|---|
| 586 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
|---|
| 587 | + struct rdma_cm_id *id; |
|---|
| 797 | 588 | int rc; |
|---|
| 798 | 589 | |
|---|
| 799 | | - rc = rdma_disconnect(ia->ri_id); |
|---|
| 800 | | - if (!rc) |
|---|
| 801 | | - /* returns without wait if not connected */ |
|---|
| 802 | | - wait_event_interruptible(ep->rep_connect_wait, |
|---|
| 803 | | - ep->rep_connected != 1); |
|---|
| 804 | | - else |
|---|
| 805 | | - ep->rep_connected = rc; |
|---|
| 806 | | - trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt, |
|---|
| 807 | | - rx_ep), rc); |
|---|
| 590 | + if (!ep) |
|---|
| 591 | + return; |
|---|
| 808 | 592 | |
|---|
| 809 | | - ib_drain_qp(ia->ri_id->qp); |
|---|
| 593 | + id = ep->re_id; |
|---|
| 594 | + rc = rdma_disconnect(id); |
|---|
| 595 | + trace_xprtrdma_disconnect(r_xprt, rc); |
|---|
| 596 | + |
|---|
| 597 | + rpcrdma_xprt_drain(r_xprt); |
|---|
| 598 | + rpcrdma_reps_unmap(r_xprt); |
|---|
| 599 | + rpcrdma_reqs_reset(r_xprt); |
|---|
| 600 | + rpcrdma_mrs_destroy(r_xprt); |
|---|
| 601 | + rpcrdma_sendctxs_destroy(r_xprt); |
|---|
| 602 | + |
|---|
| 603 | + if (rpcrdma_ep_put(ep)) |
|---|
| 604 | + rdma_destroy_id(id); |
|---|
| 605 | + |
|---|
| 606 | + r_xprt->rx_ep = NULL; |
|---|
| 810 | 607 | } |
|---|
| 811 | 608 | |
|---|
| 812 | 609 | /* Fixed-size circular FIFO queue. This implementation is wait-free and |
|---|
| .. | .. |
|---|
| 823 | 620 | */ |
|---|
| 824 | 621 | |
|---|
| 825 | 622 | /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced |
|---|
| 826 | | - * queue activity, and ib_drain_qp has flushed all remaining Send |
|---|
| 827 | | - * requests. |
|---|
| 623 | + * queue activity, and rpcrdma_xprt_drain has flushed all remaining |
|---|
| 624 | + * Send requests. |
|---|
| 828 | 625 | */ |
|---|
| 829 | | -static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf) |
|---|
| 626 | +static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt) |
|---|
| 830 | 627 | { |
|---|
| 628 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 831 | 629 | unsigned long i; |
|---|
| 832 | 630 | |
|---|
| 631 | + if (!buf->rb_sc_ctxs) |
|---|
| 632 | + return; |
|---|
| 833 | 633 | for (i = 0; i <= buf->rb_sc_last; i++) |
|---|
| 834 | 634 | kfree(buf->rb_sc_ctxs[i]); |
|---|
| 835 | 635 | kfree(buf->rb_sc_ctxs); |
|---|
| 636 | + buf->rb_sc_ctxs = NULL; |
|---|
| 836 | 637 | } |
|---|
| 837 | 638 | |
|---|
| 838 | | -static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia) |
|---|
| 639 | +static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) |
|---|
| 839 | 640 | { |
|---|
| 840 | 641 | struct rpcrdma_sendctx *sc; |
|---|
| 841 | 642 | |
|---|
| 842 | | - sc = kzalloc(sizeof(*sc) + |
|---|
| 843 | | - ia->ri_max_send_sges * sizeof(struct ib_sge), |
|---|
| 643 | + sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge), |
|---|
| 844 | 644 | GFP_KERNEL); |
|---|
| 845 | 645 | if (!sc) |
|---|
| 846 | 646 | return NULL; |
|---|
| 847 | 647 | |
|---|
| 848 | | - sc->sc_wr.wr_cqe = &sc->sc_cqe; |
|---|
| 849 | | - sc->sc_wr.sg_list = sc->sc_sges; |
|---|
| 850 | | - sc->sc_wr.opcode = IB_WR_SEND; |
|---|
| 851 | 648 | sc->sc_cqe.done = rpcrdma_wc_send; |
|---|
| 852 | 649 | return sc; |
|---|
| 853 | 650 | } |
|---|
| .. | .. |
|---|
| 863 | 660 | * the ->send_request call to fail temporarily before too many |
|---|
| 864 | 661 | * Sends are posted. |
|---|
| 865 | 662 | */ |
|---|
| 866 | | - i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; |
|---|
| 867 | | - dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i); |
|---|
| 663 | + i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS; |
|---|
| 868 | 664 | buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); |
|---|
| 869 | 665 | if (!buf->rb_sc_ctxs) |
|---|
| 870 | 666 | return -ENOMEM; |
|---|
| 871 | 667 | |
|---|
| 872 | 668 | buf->rb_sc_last = i - 1; |
|---|
| 873 | 669 | for (i = 0; i <= buf->rb_sc_last; i++) { |
|---|
| 874 | | - sc = rpcrdma_sendctx_create(&r_xprt->rx_ia); |
|---|
| 670 | + sc = rpcrdma_sendctx_create(r_xprt->rx_ep); |
|---|
| 875 | 671 | if (!sc) |
|---|
| 876 | 672 | return -ENOMEM; |
|---|
| 877 | 673 | |
|---|
| 878 | | - sc->sc_xprt = r_xprt; |
|---|
| 879 | 674 | buf->rb_sc_ctxs[i] = sc; |
|---|
| 880 | 675 | } |
|---|
| 881 | | - buf->rb_flags = 0; |
|---|
| 882 | 676 | |
|---|
| 677 | + buf->rb_sc_head = 0; |
|---|
| 678 | + buf->rb_sc_tail = 0; |
|---|
| 883 | 679 | return 0; |
|---|
| 884 | 680 | } |
|---|
| 885 | 681 | |
|---|
| .. | .. |
|---|
| 895 | 691 | |
|---|
| 896 | 692 | /** |
|---|
| 897 | 693 | * rpcrdma_sendctx_get_locked - Acquire a send context |
|---|
| 898 | | - * @buf: transport buffers from which to acquire an unused context |
|---|
| 694 | + * @r_xprt: controlling transport instance |
|---|
| 899 | 695 | * |
|---|
| 900 | 696 | * Returns pointer to a free send completion context; or NULL if |
|---|
| 901 | 697 | * the queue is empty. |
|---|
| 902 | 698 | * |
|---|
| 903 | 699 | * Usage: Called to acquire an SGE array before preparing a Send WR. |
|---|
| 904 | 700 | * |
|---|
| 905 | | - * The caller serializes calls to this function (per rpcrdma_buffer), |
|---|
| 906 | | - * and provides an effective memory barrier that flushes the new value |
|---|
| 701 | + * The caller serializes calls to this function (per transport), and |
|---|
| 702 | + * provides an effective memory barrier that flushes the new value |
|---|
| 907 | 703 | * of rb_sc_head. |
|---|
| 908 | 704 | */ |
|---|
| 909 | | -struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf) |
|---|
| 705 | +struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt) |
|---|
| 910 | 706 | { |
|---|
| 911 | | - struct rpcrdma_xprt *r_xprt; |
|---|
| 707 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 912 | 708 | struct rpcrdma_sendctx *sc; |
|---|
| 913 | 709 | unsigned long next_head; |
|---|
| 914 | 710 | |
|---|
| .. | .. |
|---|
| 932 | 728 | * completions recently. This is a sign the Send Queue is |
|---|
| 933 | 729 | * backing up. Cause the caller to pause and try again. |
|---|
| 934 | 730 | */ |
|---|
| 935 | | - set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags); |
|---|
| 936 | | - r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); |
|---|
| 731 | + xprt_wait_for_buffer_space(&r_xprt->rx_xprt); |
|---|
| 937 | 732 | r_xprt->rx_stats.empty_sendctx_q++; |
|---|
| 938 | 733 | return NULL; |
|---|
| 939 | 734 | } |
|---|
| 940 | 735 | |
|---|
| 941 | 736 | /** |
|---|
| 942 | 737 | * rpcrdma_sendctx_put_locked - Release a send context |
|---|
| 738 | + * @r_xprt: controlling transport instance |
|---|
| 943 | 739 | * @sc: send context to release |
|---|
| 944 | 740 | * |
|---|
| 945 | 741 | * Usage: Called from Send completion to return a sendctxt |
|---|
| 946 | 742 | * to the queue. |
|---|
| 947 | 743 | * |
|---|
| 948 | | - * The caller serializes calls to this function (per rpcrdma_buffer). |
|---|
| 744 | + * The caller serializes calls to this function (per transport). |
|---|
| 949 | 745 | */ |
|---|
| 950 | | -static void |
|---|
| 951 | | -rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) |
|---|
| 746 | +static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, |
|---|
| 747 | + struct rpcrdma_sendctx *sc) |
|---|
| 952 | 748 | { |
|---|
| 953 | | - struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; |
|---|
| 749 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 954 | 750 | unsigned long next_tail; |
|---|
| 955 | 751 | |
|---|
| 956 | | - /* Unmap SGEs of previously completed by unsignaled |
|---|
| 752 | + /* Unmap SGEs of previously completed but unsignaled |
|---|
| 957 | 753 | * Sends by walking up the queue until @sc is found. |
|---|
| 958 | 754 | */ |
|---|
| 959 | 755 | next_tail = buf->rb_sc_tail; |
|---|
| .. | .. |
|---|
| 961 | 757 | next_tail = rpcrdma_sendctx_next(buf, next_tail); |
|---|
| 962 | 758 | |
|---|
| 963 | 759 | /* ORDER: item must be accessed _before_ tail is updated */ |
|---|
| 964 | | - rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]); |
|---|
| 760 | + rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]); |
|---|
| 965 | 761 | |
|---|
| 966 | 762 | } while (buf->rb_sc_ctxs[next_tail] != sc); |
|---|
| 967 | 763 | |
|---|
| 968 | 764 | /* Paired with READ_ONCE */ |
|---|
| 969 | 765 | smp_store_release(&buf->rb_sc_tail, next_tail); |
|---|
| 970 | 766 | |
|---|
| 971 | | - if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) { |
|---|
| 972 | | - smp_mb__after_atomic(); |
|---|
| 973 | | - xprt_write_space(&sc->sc_xprt->rx_xprt); |
|---|
| 974 | | - } |
|---|
| 975 | | -} |
|---|
| 976 | | - |
|---|
| 977 | | -static void |
|---|
| 978 | | -rpcrdma_mr_recovery_worker(struct work_struct *work) |
|---|
| 979 | | -{ |
|---|
| 980 | | - struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, |
|---|
| 981 | | - rb_recovery_worker.work); |
|---|
| 982 | | - struct rpcrdma_mr *mr; |
|---|
| 983 | | - |
|---|
| 984 | | - spin_lock(&buf->rb_recovery_lock); |
|---|
| 985 | | - while (!list_empty(&buf->rb_stale_mrs)) { |
|---|
| 986 | | - mr = rpcrdma_mr_pop(&buf->rb_stale_mrs); |
|---|
| 987 | | - spin_unlock(&buf->rb_recovery_lock); |
|---|
| 988 | | - |
|---|
| 989 | | - trace_xprtrdma_recover_mr(mr); |
|---|
| 990 | | - mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr); |
|---|
| 991 | | - |
|---|
| 992 | | - spin_lock(&buf->rb_recovery_lock); |
|---|
| 993 | | - } |
|---|
| 994 | | - spin_unlock(&buf->rb_recovery_lock); |
|---|
| 995 | | -} |
|---|
| 996 | | - |
|---|
| 997 | | -void |
|---|
| 998 | | -rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr) |
|---|
| 999 | | -{ |
|---|
| 1000 | | - struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
|---|
| 1001 | | - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 1002 | | - |
|---|
| 1003 | | - spin_lock(&buf->rb_recovery_lock); |
|---|
| 1004 | | - rpcrdma_mr_push(mr, &buf->rb_stale_mrs); |
|---|
| 1005 | | - spin_unlock(&buf->rb_recovery_lock); |
|---|
| 1006 | | - |
|---|
| 1007 | | - schedule_delayed_work(&buf->rb_recovery_worker, 0); |
|---|
| 767 | + xprt_write_space(&r_xprt->rx_xprt); |
|---|
| 1008 | 768 | } |
|---|
| 1009 | 769 | |
|---|
| 1010 | 770 | static void |
|---|
| 1011 | 771 | rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) |
|---|
| 1012 | 772 | { |
|---|
| 1013 | 773 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 1014 | | - struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
|---|
| 774 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
|---|
| 1015 | 775 | unsigned int count; |
|---|
| 1016 | | - LIST_HEAD(free); |
|---|
| 1017 | | - LIST_HEAD(all); |
|---|
| 1018 | 776 | |
|---|
| 1019 | | - for (count = 0; count < 3; count++) { |
|---|
| 777 | + for (count = 0; count < ep->re_max_rdma_segs; count++) { |
|---|
| 1020 | 778 | struct rpcrdma_mr *mr; |
|---|
| 1021 | 779 | int rc; |
|---|
| 1022 | 780 | |
|---|
| 1023 | | - mr = kzalloc(sizeof(*mr), GFP_KERNEL); |
|---|
| 781 | + mr = kzalloc(sizeof(*mr), GFP_NOFS); |
|---|
| 1024 | 782 | if (!mr) |
|---|
| 1025 | 783 | break; |
|---|
| 1026 | 784 | |
|---|
| 1027 | | - rc = ia->ri_ops->ro_init_mr(ia, mr); |
|---|
| 785 | + rc = frwr_mr_init(r_xprt, mr); |
|---|
| 1028 | 786 | if (rc) { |
|---|
| 1029 | 787 | kfree(mr); |
|---|
| 1030 | 788 | break; |
|---|
| 1031 | 789 | } |
|---|
| 1032 | 790 | |
|---|
| 1033 | | - mr->mr_xprt = r_xprt; |
|---|
| 1034 | | - |
|---|
| 1035 | | - list_add(&mr->mr_list, &free); |
|---|
| 1036 | | - list_add(&mr->mr_all, &all); |
|---|
| 791 | + spin_lock(&buf->rb_lock); |
|---|
| 792 | + rpcrdma_mr_push(mr, &buf->rb_mrs); |
|---|
| 793 | + list_add(&mr->mr_all, &buf->rb_all_mrs); |
|---|
| 794 | + spin_unlock(&buf->rb_lock); |
|---|
| 1037 | 795 | } |
|---|
| 1038 | 796 | |
|---|
| 1039 | | - spin_lock(&buf->rb_mrlock); |
|---|
| 1040 | | - list_splice(&free, &buf->rb_mrs); |
|---|
| 1041 | | - list_splice(&all, &buf->rb_all); |
|---|
| 1042 | 797 | r_xprt->rx_stats.mrs_allocated += count; |
|---|
| 1043 | | - spin_unlock(&buf->rb_mrlock); |
|---|
| 1044 | 798 | trace_xprtrdma_createmrs(r_xprt, count); |
|---|
| 1045 | | - |
|---|
| 1046 | | - xprt_write_space(&r_xprt->rx_xprt); |
|---|
| 1047 | 799 | } |
|---|
| 1048 | 800 | |
|---|
| 1049 | 801 | static void |
|---|
| 1050 | 802 | rpcrdma_mr_refresh_worker(struct work_struct *work) |
|---|
| 1051 | 803 | { |
|---|
| 1052 | 804 | struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, |
|---|
| 1053 | | - rb_refresh_worker.work); |
|---|
| 805 | + rb_refresh_worker); |
|---|
| 1054 | 806 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, |
|---|
| 1055 | 807 | rx_buf); |
|---|
| 1056 | 808 | |
|---|
| 1057 | 809 | rpcrdma_mrs_create(r_xprt); |
|---|
| 810 | + xprt_write_space(&r_xprt->rx_xprt); |
|---|
| 1058 | 811 | } |
|---|
| 1059 | 812 | |
|---|
| 1060 | | -struct rpcrdma_req * |
|---|
| 1061 | | -rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) |
|---|
| 813 | +/** |
|---|
| 814 | + * rpcrdma_mrs_refresh - Wake the MR refresh worker |
|---|
| 815 | + * @r_xprt: controlling transport instance |
|---|
| 816 | + * |
|---|
| 817 | + */ |
|---|
| 818 | +void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) |
|---|
| 819 | +{ |
|---|
| 820 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 821 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
|---|
| 822 | + |
|---|
| 823 | + /* If there is no underlying connection, it's no use |
|---|
| 824 | + * to wake the refresh worker. |
|---|
| 825 | + */ |
|---|
| 826 | + if (ep->re_connect_status == 1) { |
|---|
| 827 | + /* The work is scheduled on a WQ_MEM_RECLAIM |
|---|
| 828 | + * workqueue in order to prevent MR allocation |
|---|
| 829 | + * from recursing into NFS during direct reclaim. |
|---|
| 830 | + */ |
|---|
| 831 | + queue_work(xprtiod_workqueue, &buf->rb_refresh_worker); |
|---|
| 832 | + } |
|---|
| 833 | +} |
|---|
| 834 | + |
|---|
| 835 | +/** |
|---|
| 836 | + * rpcrdma_req_create - Allocate an rpcrdma_req object |
|---|
| 837 | + * @r_xprt: controlling r_xprt |
|---|
| 838 | + * @size: initial size, in bytes, of send and receive buffers |
|---|
| 839 | + * @flags: GFP flags passed to memory allocators |
|---|
| 840 | + * |
|---|
| 841 | + * Returns an allocated and fully initialized rpcrdma_req or NULL. |
|---|
| 842 | + */ |
|---|
| 843 | +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, |
|---|
| 844 | + gfp_t flags) |
|---|
| 1062 | 845 | { |
|---|
| 1063 | 846 | struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; |
|---|
| 1064 | | - struct rpcrdma_regbuf *rb; |
|---|
| 1065 | 847 | struct rpcrdma_req *req; |
|---|
| 1066 | 848 | |
|---|
| 1067 | | - req = kzalloc(sizeof(*req), GFP_KERNEL); |
|---|
| 849 | + req = kzalloc(sizeof(*req), flags); |
|---|
| 1068 | 850 | if (req == NULL) |
|---|
| 1069 | | - return ERR_PTR(-ENOMEM); |
|---|
| 851 | + goto out1; |
|---|
| 1070 | 852 | |
|---|
| 1071 | | - rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, |
|---|
| 1072 | | - DMA_TO_DEVICE, GFP_KERNEL); |
|---|
| 1073 | | - if (IS_ERR(rb)) { |
|---|
| 1074 | | - kfree(req); |
|---|
| 1075 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1076 | | - } |
|---|
| 1077 | | - req->rl_rdmabuf = rb; |
|---|
| 1078 | | - xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); |
|---|
| 1079 | | - req->rl_buffer = buffer; |
|---|
| 853 | + req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags); |
|---|
| 854 | + if (!req->rl_sendbuf) |
|---|
| 855 | + goto out2; |
|---|
| 856 | + |
|---|
| 857 | + req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags); |
|---|
| 858 | + if (!req->rl_recvbuf) |
|---|
| 859 | + goto out3; |
|---|
| 860 | + |
|---|
| 861 | + INIT_LIST_HEAD(&req->rl_free_mrs); |
|---|
| 1080 | 862 | INIT_LIST_HEAD(&req->rl_registered); |
|---|
| 1081 | | - |
|---|
| 1082 | | - spin_lock(&buffer->rb_reqslock); |
|---|
| 863 | + spin_lock(&buffer->rb_lock); |
|---|
| 1083 | 864 | list_add(&req->rl_all, &buffer->rb_allreqs); |
|---|
| 1084 | | - spin_unlock(&buffer->rb_reqslock); |
|---|
| 865 | + spin_unlock(&buffer->rb_lock); |
|---|
| 1085 | 866 | return req; |
|---|
| 867 | + |
|---|
| 868 | +out3: |
|---|
| 869 | + rpcrdma_regbuf_free(req->rl_sendbuf); |
|---|
| 870 | +out2: |
|---|
| 871 | + kfree(req); |
|---|
| 872 | +out1: |
|---|
| 873 | + return NULL; |
|---|
| 1086 | 874 | } |
|---|
| 1087 | 875 | |
|---|
| 1088 | | -static int |
|---|
| 1089 | | -rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp) |
|---|
| 876 | +/** |
|---|
| 877 | + * rpcrdma_req_setup - Per-connection instance setup of an rpcrdma_req object |
|---|
| 878 | + * @r_xprt: controlling transport instance |
|---|
| 879 | + * @req: rpcrdma_req object to set up |
|---|
| 880 | + * |
|---|
| 881 | + * Returns zero on success, and a negative errno on failure. |
|---|
| 882 | + */ |
|---|
| 883 | +int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) |
|---|
| 1090 | 884 | { |
|---|
| 1091 | | - struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
|---|
| 885 | + struct rpcrdma_regbuf *rb; |
|---|
| 886 | + size_t maxhdrsize; |
|---|
| 887 | + |
|---|
| 888 | + /* Compute maximum header buffer size in bytes */ |
|---|
| 889 | + maxhdrsize = rpcrdma_fixed_maxsz + 3 + |
|---|
| 890 | + r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz; |
|---|
| 891 | + maxhdrsize *= sizeof(__be32); |
|---|
| 892 | + rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), |
|---|
| 893 | + DMA_TO_DEVICE, GFP_KERNEL); |
|---|
| 894 | + if (!rb) |
|---|
| 895 | + goto out; |
|---|
| 896 | + |
|---|
| 897 | + if (!__rpcrdma_regbuf_dma_map(r_xprt, rb)) |
|---|
| 898 | + goto out_free; |
|---|
| 899 | + |
|---|
| 900 | + req->rl_rdmabuf = rb; |
|---|
| 901 | + xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); |
|---|
| 902 | + return 0; |
|---|
| 903 | + |
|---|
| 904 | +out_free: |
|---|
| 905 | + rpcrdma_regbuf_free(rb); |
|---|
| 906 | +out: |
|---|
| 907 | + return -ENOMEM; |
|---|
| 908 | +} |
|---|
| 909 | + |
|---|
| 910 | +/* ASSUMPTION: the rb_allreqs list is stable for the duration, |
|---|
| 911 | + * and thus can be walked without holding rb_lock. Eg. the |
|---|
| 912 | + * caller is holding the transport send lock to exclude |
|---|
| 913 | + * device removal or disconnection. |
|---|
| 914 | + */ |
|---|
| 915 | +static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt) |
|---|
| 916 | +{ |
|---|
| 1092 | 917 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 1093 | | - struct rpcrdma_rep *rep; |
|---|
| 918 | + struct rpcrdma_req *req; |
|---|
| 1094 | 919 | int rc; |
|---|
| 1095 | 920 | |
|---|
| 1096 | | - rc = -ENOMEM; |
|---|
| 921 | + list_for_each_entry(req, &buf->rb_allreqs, rl_all) { |
|---|
| 922 | + rc = rpcrdma_req_setup(r_xprt, req); |
|---|
| 923 | + if (rc) |
|---|
| 924 | + return rc; |
|---|
| 925 | + } |
|---|
| 926 | + return 0; |
|---|
| 927 | +} |
|---|
| 928 | + |
|---|
| 929 | +static void rpcrdma_req_reset(struct rpcrdma_req *req) |
|---|
| 930 | +{ |
|---|
| 931 | + /* Credits are valid for only one connection */ |
|---|
| 932 | + req->rl_slot.rq_cong = 0; |
|---|
| 933 | + |
|---|
| 934 | + rpcrdma_regbuf_free(req->rl_rdmabuf); |
|---|
| 935 | + req->rl_rdmabuf = NULL; |
|---|
| 936 | + |
|---|
| 937 | + rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); |
|---|
| 938 | + rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); |
|---|
| 939 | + |
|---|
| 940 | + frwr_reset(req); |
|---|
| 941 | +} |
|---|
| 942 | + |
|---|
| 943 | +/* ASSUMPTION: the rb_allreqs list is stable for the duration, |
|---|
| 944 | + * and thus can be walked without holding rb_lock. Eg. the |
|---|
| 945 | + * caller is holding the transport send lock to exclude |
|---|
| 946 | + * device removal or disconnection. |
|---|
| 947 | + */ |
|---|
| 948 | +static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) |
|---|
| 949 | +{ |
|---|
| 950 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 951 | + struct rpcrdma_req *req; |
|---|
| 952 | + |
|---|
| 953 | + list_for_each_entry(req, &buf->rb_allreqs, rl_all) |
|---|
| 954 | + rpcrdma_req_reset(req); |
|---|
| 955 | +} |
|---|
| 956 | + |
|---|
| 957 | +/* No locking needed here. This function is called only by the |
|---|
| 958 | + * Receive completion handler. |
|---|
| 959 | + */ |
|---|
| 960 | +static noinline |
|---|
| 961 | +struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, |
|---|
| 962 | + bool temp) |
|---|
| 963 | +{ |
|---|
| 964 | + struct rpcrdma_rep *rep; |
|---|
| 965 | + |
|---|
| 1097 | 966 | rep = kzalloc(sizeof(*rep), GFP_KERNEL); |
|---|
| 1098 | 967 | if (rep == NULL) |
|---|
| 1099 | 968 | goto out; |
|---|
| 1100 | 969 | |
|---|
| 1101 | | - rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize, |
|---|
| 970 | + rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, |
|---|
| 1102 | 971 | DMA_FROM_DEVICE, GFP_KERNEL); |
|---|
| 1103 | | - if (IS_ERR(rep->rr_rdmabuf)) { |
|---|
| 1104 | | - rc = PTR_ERR(rep->rr_rdmabuf); |
|---|
| 972 | + if (!rep->rr_rdmabuf) |
|---|
| 1105 | 973 | goto out_free; |
|---|
| 1106 | | - } |
|---|
| 1107 | | - xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base, |
|---|
| 1108 | | - rdmab_length(rep->rr_rdmabuf)); |
|---|
| 1109 | 974 | |
|---|
| 975 | + xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), |
|---|
| 976 | + rdmab_length(rep->rr_rdmabuf)); |
|---|
| 1110 | 977 | rep->rr_cqe.done = rpcrdma_wc_receive; |
|---|
| 1111 | 978 | rep->rr_rxprt = r_xprt; |
|---|
| 1112 | | - INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion); |
|---|
| 1113 | 979 | rep->rr_recv_wr.next = NULL; |
|---|
| 1114 | 980 | rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; |
|---|
| 1115 | 981 | rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; |
|---|
| 1116 | 982 | rep->rr_recv_wr.num_sge = 1; |
|---|
| 1117 | 983 | rep->rr_temp = temp; |
|---|
| 1118 | | - |
|---|
| 1119 | | - spin_lock(&buf->rb_lock); |
|---|
| 1120 | | - list_add(&rep->rr_list, &buf->rb_recv_bufs); |
|---|
| 1121 | | - spin_unlock(&buf->rb_lock); |
|---|
| 1122 | | - return 0; |
|---|
| 984 | + list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps); |
|---|
| 985 | + return rep; |
|---|
| 1123 | 986 | |
|---|
| 1124 | 987 | out_free: |
|---|
| 1125 | 988 | kfree(rep); |
|---|
| 1126 | 989 | out: |
|---|
| 1127 | | - dprintk("RPC: %s: reply buffer %d alloc failed\n", |
|---|
| 1128 | | - __func__, rc); |
|---|
| 1129 | | - return rc; |
|---|
| 990 | + return NULL; |
|---|
| 1130 | 991 | } |
|---|
| 1131 | 992 | |
|---|
| 1132 | | -int |
|---|
| 1133 | | -rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) |
|---|
| 993 | +/* No locking needed here. This function is invoked only by the |
|---|
| 994 | + * Receive completion handler, or during transport shutdown. |
|---|
| 995 | + */ |
|---|
| 996 | +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) |
|---|
| 997 | +{ |
|---|
| 998 | + list_del(&rep->rr_all); |
|---|
| 999 | + rpcrdma_regbuf_free(rep->rr_rdmabuf); |
|---|
| 1000 | + kfree(rep); |
|---|
| 1001 | +} |
|---|
| 1002 | + |
|---|
| 1003 | +static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) |
|---|
| 1004 | +{ |
|---|
| 1005 | + struct llist_node *node; |
|---|
| 1006 | + |
|---|
| 1007 | + /* Calls to llist_del_first are required to be serialized */ |
|---|
| 1008 | + node = llist_del_first(&buf->rb_free_reps); |
|---|
| 1009 | + if (!node) |
|---|
| 1010 | + return NULL; |
|---|
| 1011 | + return llist_entry(node, struct rpcrdma_rep, rr_node); |
|---|
| 1012 | +} |
|---|
| 1013 | + |
|---|
| 1014 | +static void rpcrdma_rep_put(struct rpcrdma_buffer *buf, |
|---|
| 1015 | + struct rpcrdma_rep *rep) |
|---|
| 1016 | +{ |
|---|
| 1017 | + llist_add(&rep->rr_node, &buf->rb_free_reps); |
|---|
| 1018 | +} |
|---|
| 1019 | + |
|---|
| 1020 | +static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) |
|---|
| 1021 | +{ |
|---|
| 1022 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 1023 | + struct rpcrdma_rep *rep; |
|---|
| 1024 | + |
|---|
| 1025 | + list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { |
|---|
| 1026 | + rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); |
|---|
| 1027 | + rep->rr_temp = true; |
|---|
| 1028 | + } |
|---|
| 1029 | +} |
|---|
| 1030 | + |
|---|
| 1031 | +static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) |
|---|
| 1032 | +{ |
|---|
| 1033 | + struct rpcrdma_rep *rep; |
|---|
| 1034 | + |
|---|
| 1035 | + while ((rep = rpcrdma_rep_get_locked(buf)) != NULL) |
|---|
| 1036 | + rpcrdma_rep_destroy(rep); |
|---|
| 1037 | +} |
|---|
| 1038 | + |
|---|
| 1039 | +/** |
|---|
| 1040 | + * rpcrdma_buffer_create - Create initial set of req/rep objects |
|---|
| 1041 | + * @r_xprt: transport instance to (re)initialize |
|---|
| 1042 | + * |
|---|
| 1043 | + * Returns zero on success, otherwise a negative errno. |
|---|
| 1044 | + */ |
|---|
| 1045 | +int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) |
|---|
| 1134 | 1046 | { |
|---|
| 1135 | 1047 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 1136 | 1048 | int i, rc; |
|---|
| 1137 | 1049 | |
|---|
| 1138 | | - buf->rb_max_requests = r_xprt->rx_data.max_requests; |
|---|
| 1139 | 1050 | buf->rb_bc_srv_max_requests = 0; |
|---|
| 1140 | | - spin_lock_init(&buf->rb_mrlock); |
|---|
| 1141 | 1051 | spin_lock_init(&buf->rb_lock); |
|---|
| 1142 | | - spin_lock_init(&buf->rb_recovery_lock); |
|---|
| 1143 | 1052 | INIT_LIST_HEAD(&buf->rb_mrs); |
|---|
| 1144 | | - INIT_LIST_HEAD(&buf->rb_all); |
|---|
| 1145 | | - INIT_LIST_HEAD(&buf->rb_stale_mrs); |
|---|
| 1146 | | - INIT_DELAYED_WORK(&buf->rb_refresh_worker, |
|---|
| 1147 | | - rpcrdma_mr_refresh_worker); |
|---|
| 1148 | | - INIT_DELAYED_WORK(&buf->rb_recovery_worker, |
|---|
| 1149 | | - rpcrdma_mr_recovery_worker); |
|---|
| 1150 | | - |
|---|
| 1151 | | - rpcrdma_mrs_create(r_xprt); |
|---|
| 1053 | + INIT_LIST_HEAD(&buf->rb_all_mrs); |
|---|
| 1054 | + INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); |
|---|
| 1152 | 1055 | |
|---|
| 1153 | 1056 | INIT_LIST_HEAD(&buf->rb_send_bufs); |
|---|
| 1154 | 1057 | INIT_LIST_HEAD(&buf->rb_allreqs); |
|---|
| 1155 | | - spin_lock_init(&buf->rb_reqslock); |
|---|
| 1156 | | - for (i = 0; i < buf->rb_max_requests; i++) { |
|---|
| 1058 | + INIT_LIST_HEAD(&buf->rb_all_reps); |
|---|
| 1059 | + |
|---|
| 1060 | + rc = -ENOMEM; |
|---|
| 1061 | + for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { |
|---|
| 1157 | 1062 | struct rpcrdma_req *req; |
|---|
| 1158 | 1063 | |
|---|
| 1159 | | - req = rpcrdma_create_req(r_xprt); |
|---|
| 1160 | | - if (IS_ERR(req)) { |
|---|
| 1161 | | - dprintk("RPC: %s: request buffer %d alloc" |
|---|
| 1162 | | - " failed\n", __func__, i); |
|---|
| 1163 | | - rc = PTR_ERR(req); |
|---|
| 1064 | + req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, |
|---|
| 1065 | + GFP_KERNEL); |
|---|
| 1066 | + if (!req) |
|---|
| 1164 | 1067 | goto out; |
|---|
| 1165 | | - } |
|---|
| 1166 | 1068 | list_add(&req->rl_list, &buf->rb_send_bufs); |
|---|
| 1167 | 1069 | } |
|---|
| 1168 | 1070 | |
|---|
| 1169 | | - buf->rb_credits = 1; |
|---|
| 1170 | | - buf->rb_posted_receives = 0; |
|---|
| 1171 | | - INIT_LIST_HEAD(&buf->rb_recv_bufs); |
|---|
| 1172 | | - |
|---|
| 1173 | | - rc = rpcrdma_sendctxs_create(r_xprt); |
|---|
| 1174 | | - if (rc) |
|---|
| 1175 | | - goto out; |
|---|
| 1071 | + init_llist_head(&buf->rb_free_reps); |
|---|
| 1176 | 1072 | |
|---|
| 1177 | 1073 | return 0; |
|---|
| 1178 | 1074 | out: |
|---|
| .. | .. |
|---|
| 1180 | 1076 | return rc; |
|---|
| 1181 | 1077 | } |
|---|
| 1182 | 1078 | |
|---|
| 1183 | | -static void |
|---|
| 1184 | | -rpcrdma_destroy_rep(struct rpcrdma_rep *rep) |
|---|
| 1079 | +/** |
|---|
| 1080 | + * rpcrdma_req_destroy - Destroy an rpcrdma_req object |
|---|
| 1081 | + * @req: unused object to be destroyed |
|---|
| 1082 | + * |
|---|
| 1083 | + * Relies on caller holding the transport send lock to protect |
|---|
| 1084 | + * removing req->rl_all from buf->rb_all_reqs safely. |
|---|
| 1085 | + */ |
|---|
| 1086 | +void rpcrdma_req_destroy(struct rpcrdma_req *req) |
|---|
| 1185 | 1087 | { |
|---|
| 1186 | | - rpcrdma_free_regbuf(rep->rr_rdmabuf); |
|---|
| 1187 | | - kfree(rep); |
|---|
| 1188 | | -} |
|---|
| 1088 | + struct rpcrdma_mr *mr; |
|---|
| 1189 | 1089 | |
|---|
| 1190 | | -void |
|---|
| 1191 | | -rpcrdma_destroy_req(struct rpcrdma_req *req) |
|---|
| 1192 | | -{ |
|---|
| 1193 | | - rpcrdma_free_regbuf(req->rl_recvbuf); |
|---|
| 1194 | | - rpcrdma_free_regbuf(req->rl_sendbuf); |
|---|
| 1195 | | - rpcrdma_free_regbuf(req->rl_rdmabuf); |
|---|
| 1090 | + list_del(&req->rl_all); |
|---|
| 1091 | + |
|---|
| 1092 | + while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) { |
|---|
| 1093 | + struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; |
|---|
| 1094 | + |
|---|
| 1095 | + spin_lock(&buf->rb_lock); |
|---|
| 1096 | + list_del(&mr->mr_all); |
|---|
| 1097 | + spin_unlock(&buf->rb_lock); |
|---|
| 1098 | + |
|---|
| 1099 | + frwr_release_mr(mr); |
|---|
| 1100 | + } |
|---|
| 1101 | + |
|---|
| 1102 | + rpcrdma_regbuf_free(req->rl_recvbuf); |
|---|
| 1103 | + rpcrdma_regbuf_free(req->rl_sendbuf); |
|---|
| 1104 | + rpcrdma_regbuf_free(req->rl_rdmabuf); |
|---|
| 1196 | 1105 | kfree(req); |
|---|
| 1197 | 1106 | } |
|---|
| 1198 | 1107 | |
|---|
| 1199 | | -static void |
|---|
| 1200 | | -rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) |
|---|
| 1108 | +/** |
|---|
| 1109 | + * rpcrdma_mrs_destroy - Release all of a transport's MRs |
|---|
| 1110 | + * @r_xprt: controlling transport instance |
|---|
| 1111 | + * |
|---|
| 1112 | + * Relies on caller holding the transport send lock to protect |
|---|
| 1113 | + * removing mr->mr_list from req->rl_free_mrs safely. |
|---|
| 1114 | + */ |
|---|
| 1115 | +static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt) |
|---|
| 1201 | 1116 | { |
|---|
| 1202 | | - struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, |
|---|
| 1203 | | - rx_buf); |
|---|
| 1204 | | - struct rpcrdma_ia *ia = rdmab_to_ia(buf); |
|---|
| 1117 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 1205 | 1118 | struct rpcrdma_mr *mr; |
|---|
| 1206 | | - unsigned int count; |
|---|
| 1207 | 1119 | |
|---|
| 1208 | | - count = 0; |
|---|
| 1209 | | - spin_lock(&buf->rb_mrlock); |
|---|
| 1210 | | - while (!list_empty(&buf->rb_all)) { |
|---|
| 1211 | | - mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all); |
|---|
| 1120 | + cancel_work_sync(&buf->rb_refresh_worker); |
|---|
| 1121 | + |
|---|
| 1122 | + spin_lock(&buf->rb_lock); |
|---|
| 1123 | + while ((mr = list_first_entry_or_null(&buf->rb_all_mrs, |
|---|
| 1124 | + struct rpcrdma_mr, |
|---|
| 1125 | + mr_all)) != NULL) { |
|---|
| 1126 | + list_del(&mr->mr_list); |
|---|
| 1212 | 1127 | list_del(&mr->mr_all); |
|---|
| 1128 | + spin_unlock(&buf->rb_lock); |
|---|
| 1213 | 1129 | |
|---|
| 1214 | | - spin_unlock(&buf->rb_mrlock); |
|---|
| 1130 | + frwr_release_mr(mr); |
|---|
| 1215 | 1131 | |
|---|
| 1216 | | - /* Ensure MW is not on any rl_registered list */ |
|---|
| 1217 | | - if (!list_empty(&mr->mr_list)) |
|---|
| 1218 | | - list_del(&mr->mr_list); |
|---|
| 1219 | | - |
|---|
| 1220 | | - ia->ri_ops->ro_release_mr(mr); |
|---|
| 1221 | | - count++; |
|---|
| 1222 | | - spin_lock(&buf->rb_mrlock); |
|---|
| 1132 | + spin_lock(&buf->rb_lock); |
|---|
| 1223 | 1133 | } |
|---|
| 1224 | | - spin_unlock(&buf->rb_mrlock); |
|---|
| 1225 | | - r_xprt->rx_stats.mrs_allocated = 0; |
|---|
| 1226 | | - |
|---|
| 1227 | | - dprintk("RPC: %s: released %u MRs\n", __func__, count); |
|---|
| 1134 | + spin_unlock(&buf->rb_lock); |
|---|
| 1228 | 1135 | } |
|---|
| 1229 | 1136 | |
|---|
| 1137 | +/** |
|---|
| 1138 | + * rpcrdma_buffer_destroy - Release all hw resources |
|---|
| 1139 | + * @buf: root control block for resources |
|---|
| 1140 | + * |
|---|
| 1141 | + * ORDERING: relies on a prior rpcrdma_xprt_drain : |
|---|
| 1142 | + * - No more Send or Receive completions can occur |
|---|
| 1143 | + * - All MRs, reps, and reqs are returned to their free lists |
|---|
| 1144 | + */ |
|---|
| 1230 | 1145 | void |
|---|
| 1231 | 1146 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) |
|---|
| 1232 | 1147 | { |
|---|
| 1233 | | - cancel_delayed_work_sync(&buf->rb_recovery_worker); |
|---|
| 1234 | | - cancel_delayed_work_sync(&buf->rb_refresh_worker); |
|---|
| 1148 | + rpcrdma_reps_destroy(buf); |
|---|
| 1235 | 1149 | |
|---|
| 1236 | | - rpcrdma_sendctxs_destroy(buf); |
|---|
| 1237 | | - |
|---|
| 1238 | | - while (!list_empty(&buf->rb_recv_bufs)) { |
|---|
| 1239 | | - struct rpcrdma_rep *rep; |
|---|
| 1240 | | - |
|---|
| 1241 | | - rep = list_first_entry(&buf->rb_recv_bufs, |
|---|
| 1242 | | - struct rpcrdma_rep, rr_list); |
|---|
| 1243 | | - list_del(&rep->rr_list); |
|---|
| 1244 | | - rpcrdma_destroy_rep(rep); |
|---|
| 1245 | | - } |
|---|
| 1246 | | - |
|---|
| 1247 | | - spin_lock(&buf->rb_reqslock); |
|---|
| 1248 | | - while (!list_empty(&buf->rb_allreqs)) { |
|---|
| 1150 | + while (!list_empty(&buf->rb_send_bufs)) { |
|---|
| 1249 | 1151 | struct rpcrdma_req *req; |
|---|
| 1250 | 1152 | |
|---|
| 1251 | | - req = list_first_entry(&buf->rb_allreqs, |
|---|
| 1252 | | - struct rpcrdma_req, rl_all); |
|---|
| 1253 | | - list_del(&req->rl_all); |
|---|
| 1254 | | - |
|---|
| 1255 | | - spin_unlock(&buf->rb_reqslock); |
|---|
| 1256 | | - rpcrdma_destroy_req(req); |
|---|
| 1257 | | - spin_lock(&buf->rb_reqslock); |
|---|
| 1153 | + req = list_first_entry(&buf->rb_send_bufs, |
|---|
| 1154 | + struct rpcrdma_req, rl_list); |
|---|
| 1155 | + list_del(&req->rl_list); |
|---|
| 1156 | + rpcrdma_req_destroy(req); |
|---|
| 1258 | 1157 | } |
|---|
| 1259 | | - spin_unlock(&buf->rb_reqslock); |
|---|
| 1260 | | - |
|---|
| 1261 | | - rpcrdma_mrs_destroy(buf); |
|---|
| 1262 | 1158 | } |
|---|
| 1263 | 1159 | |
|---|
| 1264 | 1160 | /** |
|---|
| .. | .. |
|---|
| 1272 | 1168 | rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) |
|---|
| 1273 | 1169 | { |
|---|
| 1274 | 1170 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 1275 | | - struct rpcrdma_mr *mr = NULL; |
|---|
| 1171 | + struct rpcrdma_mr *mr; |
|---|
| 1276 | 1172 | |
|---|
| 1277 | | - spin_lock(&buf->rb_mrlock); |
|---|
| 1278 | | - if (!list_empty(&buf->rb_mrs)) |
|---|
| 1279 | | - mr = rpcrdma_mr_pop(&buf->rb_mrs); |
|---|
| 1280 | | - spin_unlock(&buf->rb_mrlock); |
|---|
| 1281 | | - |
|---|
| 1282 | | - if (!mr) |
|---|
| 1283 | | - goto out_nomrs; |
|---|
| 1173 | + spin_lock(&buf->rb_lock); |
|---|
| 1174 | + mr = rpcrdma_mr_pop(&buf->rb_mrs); |
|---|
| 1175 | + spin_unlock(&buf->rb_lock); |
|---|
| 1284 | 1176 | return mr; |
|---|
| 1285 | | - |
|---|
| 1286 | | -out_nomrs: |
|---|
| 1287 | | - trace_xprtrdma_nomrs(r_xprt); |
|---|
| 1288 | | - if (r_xprt->rx_ep.rep_connected != -ENODEV) |
|---|
| 1289 | | - schedule_delayed_work(&buf->rb_refresh_worker, 0); |
|---|
| 1290 | | - |
|---|
| 1291 | | - /* Allow the reply handler and refresh worker to run */ |
|---|
| 1292 | | - cond_resched(); |
|---|
| 1293 | | - |
|---|
| 1294 | | - return NULL; |
|---|
| 1295 | | -} |
|---|
| 1296 | | - |
|---|
| 1297 | | -static void |
|---|
| 1298 | | -__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr) |
|---|
| 1299 | | -{ |
|---|
| 1300 | | - spin_lock(&buf->rb_mrlock); |
|---|
| 1301 | | - rpcrdma_mr_push(mr, &buf->rb_mrs); |
|---|
| 1302 | | - spin_unlock(&buf->rb_mrlock); |
|---|
| 1303 | 1177 | } |
|---|
| 1304 | 1178 | |
|---|
| 1305 | 1179 | /** |
|---|
| 1306 | | - * rpcrdma_mr_put - Release an rpcrdma_mr object |
|---|
| 1307 | | - * @mr: object to release |
|---|
| 1180 | + * rpcrdma_mr_put - DMA unmap an MR and release it |
|---|
| 1181 | + * @mr: MR to release |
|---|
| 1308 | 1182 | * |
|---|
| 1309 | 1183 | */ |
|---|
| 1310 | | -void |
|---|
| 1311 | | -rpcrdma_mr_put(struct rpcrdma_mr *mr) |
|---|
| 1312 | | -{ |
|---|
| 1313 | | - __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr); |
|---|
| 1314 | | -} |
|---|
| 1315 | | - |
|---|
| 1316 | | -/** |
|---|
| 1317 | | - * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it |
|---|
| 1318 | | - * @mr: object to release |
|---|
| 1319 | | - * |
|---|
| 1320 | | - */ |
|---|
| 1321 | | -void |
|---|
| 1322 | | -rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) |
|---|
| 1184 | +void rpcrdma_mr_put(struct rpcrdma_mr *mr) |
|---|
| 1323 | 1185 | { |
|---|
| 1324 | 1186 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
|---|
| 1325 | 1187 | |
|---|
| 1326 | | - trace_xprtrdma_dma_unmap(mr); |
|---|
| 1327 | | - ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, |
|---|
| 1328 | | - mr->mr_sg, mr->mr_nents, mr->mr_dir); |
|---|
| 1329 | | - __rpcrdma_mr_put(&r_xprt->rx_buf, mr); |
|---|
| 1188 | + if (mr->mr_dir != DMA_NONE) { |
|---|
| 1189 | + trace_xprtrdma_mr_unmap(mr); |
|---|
| 1190 | + ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device, |
|---|
| 1191 | + mr->mr_sg, mr->mr_nents, mr->mr_dir); |
|---|
| 1192 | + mr->mr_dir = DMA_NONE; |
|---|
| 1193 | + } |
|---|
| 1194 | + |
|---|
| 1195 | + rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs); |
|---|
| 1196 | +} |
|---|
| 1197 | + |
|---|
| 1198 | +/** |
|---|
| 1199 | + * rpcrdma_reply_put - Put reply buffers back into pool |
|---|
| 1200 | + * @buffers: buffer pool |
|---|
| 1201 | + * @req: object to return |
|---|
| 1202 | + * |
|---|
| 1203 | + */ |
|---|
| 1204 | +void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) |
|---|
| 1205 | +{ |
|---|
| 1206 | + if (req->rl_reply) { |
|---|
| 1207 | + rpcrdma_rep_put(buffers, req->rl_reply); |
|---|
| 1208 | + req->rl_reply = NULL; |
|---|
| 1209 | + } |
|---|
| 1330 | 1210 | } |
|---|
| 1331 | 1211 | |
|---|
| 1332 | 1212 | /** |
|---|
| .. | .. |
|---|
| 1351 | 1231 | |
|---|
| 1352 | 1232 | /** |
|---|
| 1353 | 1233 | * rpcrdma_buffer_put - Put request/reply buffers back into pool |
|---|
| 1234 | + * @buffers: buffer pool |
|---|
| 1354 | 1235 | * @req: object to return |
|---|
| 1355 | 1236 | * |
|---|
| 1356 | 1237 | */ |
|---|
| 1357 | | -void |
|---|
| 1358 | | -rpcrdma_buffer_put(struct rpcrdma_req *req) |
|---|
| 1238 | +void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) |
|---|
| 1359 | 1239 | { |
|---|
| 1360 | | - struct rpcrdma_buffer *buffers = req->rl_buffer; |
|---|
| 1361 | | - struct rpcrdma_rep *rep = req->rl_reply; |
|---|
| 1362 | | - |
|---|
| 1363 | | - req->rl_reply = NULL; |
|---|
| 1240 | + rpcrdma_reply_put(buffers, req); |
|---|
| 1364 | 1241 | |
|---|
| 1365 | 1242 | spin_lock(&buffers->rb_lock); |
|---|
| 1366 | 1243 | list_add(&req->rl_list, &buffers->rb_send_bufs); |
|---|
| 1367 | | - if (rep) { |
|---|
| 1368 | | - if (!rep->rr_temp) { |
|---|
| 1369 | | - list_add(&rep->rr_list, &buffers->rb_recv_bufs); |
|---|
| 1370 | | - rep = NULL; |
|---|
| 1371 | | - } |
|---|
| 1372 | | - } |
|---|
| 1373 | 1244 | spin_unlock(&buffers->rb_lock); |
|---|
| 1374 | | - if (rep) |
|---|
| 1375 | | - rpcrdma_destroy_rep(rep); |
|---|
| 1376 | | -} |
|---|
| 1377 | | - |
|---|
| 1378 | | -/* |
|---|
| 1379 | | - * Put reply buffers back into pool when not attached to |
|---|
| 1380 | | - * request. This happens in error conditions. |
|---|
| 1381 | | - */ |
|---|
| 1382 | | -void |
|---|
| 1383 | | -rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) |
|---|
| 1384 | | -{ |
|---|
| 1385 | | - struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; |
|---|
| 1386 | | - |
|---|
| 1387 | | - if (!rep->rr_temp) { |
|---|
| 1388 | | - spin_lock(&buffers->rb_lock); |
|---|
| 1389 | | - list_add(&rep->rr_list, &buffers->rb_recv_bufs); |
|---|
| 1390 | | - spin_unlock(&buffers->rb_lock); |
|---|
| 1391 | | - } else { |
|---|
| 1392 | | - rpcrdma_destroy_rep(rep); |
|---|
| 1393 | | - } |
|---|
| 1394 | 1245 | } |
|---|
| 1395 | 1246 | |
|---|
| 1396 | 1247 | /** |
|---|
| 1397 | | - * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers |
|---|
| 1398 | | - * @size: size of buffer to be allocated, in bytes |
|---|
| 1399 | | - * @direction: direction of data movement |
|---|
| 1400 | | - * @flags: GFP flags |
|---|
| 1248 | + * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list |
|---|
| 1249 | + * @rep: rep to release |
|---|
| 1401 | 1250 | * |
|---|
| 1402 | | - * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that |
|---|
| 1403 | | - * can be persistently DMA-mapped for I/O. |
|---|
| 1251 | + * Used after error conditions. |
|---|
| 1252 | + */ |
|---|
| 1253 | +void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) |
|---|
| 1254 | +{ |
|---|
| 1255 | + rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep); |
|---|
| 1256 | +} |
|---|
| 1257 | + |
|---|
| 1258 | +/* Returns a pointer to a rpcrdma_regbuf object, or NULL. |
|---|
| 1404 | 1259 | * |
|---|
| 1405 | 1260 | * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for |
|---|
| 1406 | 1261 | * receiving the payload of RDMA RECV operations. During Long Calls |
|---|
| 1407 | | - * or Replies they may be registered externally via ro_map. |
|---|
| 1262 | + * or Replies they may be registered externally via frwr_map. |
|---|
| 1408 | 1263 | */ |
|---|
| 1409 | | -struct rpcrdma_regbuf * |
|---|
| 1410 | | -rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, |
|---|
| 1264 | +static struct rpcrdma_regbuf * |
|---|
| 1265 | +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, |
|---|
| 1411 | 1266 | gfp_t flags) |
|---|
| 1412 | 1267 | { |
|---|
| 1413 | 1268 | struct rpcrdma_regbuf *rb; |
|---|
| 1414 | 1269 | |
|---|
| 1415 | | - rb = kmalloc(sizeof(*rb) + size, flags); |
|---|
| 1416 | | - if (rb == NULL) |
|---|
| 1417 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1270 | + rb = kmalloc(sizeof(*rb), flags); |
|---|
| 1271 | + if (!rb) |
|---|
| 1272 | + return NULL; |
|---|
| 1273 | + rb->rg_data = kmalloc(size, flags); |
|---|
| 1274 | + if (!rb->rg_data) { |
|---|
| 1275 | + kfree(rb); |
|---|
| 1276 | + return NULL; |
|---|
| 1277 | + } |
|---|
| 1418 | 1278 | |
|---|
| 1419 | 1279 | rb->rg_device = NULL; |
|---|
| 1420 | 1280 | rb->rg_direction = direction; |
|---|
| 1421 | 1281 | rb->rg_iov.length = size; |
|---|
| 1422 | | - |
|---|
| 1423 | 1282 | return rb; |
|---|
| 1424 | 1283 | } |
|---|
| 1425 | 1284 | |
|---|
| 1426 | 1285 | /** |
|---|
| 1427 | | - * __rpcrdma_map_regbuf - DMA-map a regbuf |
|---|
| 1428 | | - * @ia: controlling rpcrdma_ia |
|---|
| 1429 | | - * @rb: regbuf to be mapped |
|---|
| 1286 | + * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer |
|---|
| 1287 | + * @rb: regbuf to reallocate |
|---|
| 1288 | + * @size: size of buffer to be allocated, in bytes |
|---|
| 1289 | + * @flags: GFP flags |
|---|
| 1290 | + * |
|---|
| 1291 | + * Returns true if reallocation was successful. If false is |
|---|
| 1292 | + * returned, @rb is left untouched. |
|---|
| 1430 | 1293 | */ |
|---|
| 1431 | | -bool |
|---|
| 1432 | | -__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) |
|---|
| 1294 | +bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) |
|---|
| 1433 | 1295 | { |
|---|
| 1434 | | - struct ib_device *device = ia->ri_device; |
|---|
| 1296 | + void *buf; |
|---|
| 1297 | + |
|---|
| 1298 | + buf = kmalloc(size, flags); |
|---|
| 1299 | + if (!buf) |
|---|
| 1300 | + return false; |
|---|
| 1301 | + |
|---|
| 1302 | + rpcrdma_regbuf_dma_unmap(rb); |
|---|
| 1303 | + kfree(rb->rg_data); |
|---|
| 1304 | + |
|---|
| 1305 | + rb->rg_data = buf; |
|---|
| 1306 | + rb->rg_iov.length = size; |
|---|
| 1307 | + return true; |
|---|
| 1308 | +} |
|---|
| 1309 | + |
|---|
| 1310 | +/** |
|---|
| 1311 | + * __rpcrdma_regbuf_dma_map - DMA-map a regbuf |
|---|
| 1312 | + * @r_xprt: controlling transport instance |
|---|
| 1313 | + * @rb: regbuf to be mapped |
|---|
| 1314 | + * |
|---|
| 1315 | + * Returns true if the buffer is now DMA mapped to @r_xprt's device |
|---|
| 1316 | + */ |
|---|
| 1317 | +bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, |
|---|
| 1318 | + struct rpcrdma_regbuf *rb) |
|---|
| 1319 | +{ |
|---|
| 1320 | + struct ib_device *device = r_xprt->rx_ep->re_id->device; |
|---|
| 1435 | 1321 | |
|---|
| 1436 | 1322 | if (rb->rg_direction == DMA_NONE) |
|---|
| 1437 | 1323 | return false; |
|---|
| 1438 | 1324 | |
|---|
| 1439 | | - rb->rg_iov.addr = ib_dma_map_single(device, |
|---|
| 1440 | | - (void *)rb->rg_base, |
|---|
| 1441 | | - rdmab_length(rb), |
|---|
| 1442 | | - rb->rg_direction); |
|---|
| 1443 | | - if (ib_dma_mapping_error(device, rdmab_addr(rb))) |
|---|
| 1325 | + rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb), |
|---|
| 1326 | + rdmab_length(rb), rb->rg_direction); |
|---|
| 1327 | + if (ib_dma_mapping_error(device, rdmab_addr(rb))) { |
|---|
| 1328 | + trace_xprtrdma_dma_maperr(rdmab_addr(rb)); |
|---|
| 1444 | 1329 | return false; |
|---|
| 1330 | + } |
|---|
| 1445 | 1331 | |
|---|
| 1446 | 1332 | rb->rg_device = device; |
|---|
| 1447 | | - rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; |
|---|
| 1333 | + rb->rg_iov.lkey = r_xprt->rx_ep->re_pd->local_dma_lkey; |
|---|
| 1448 | 1334 | return true; |
|---|
| 1449 | 1335 | } |
|---|
| 1450 | 1336 | |
|---|
| 1451 | | -static void |
|---|
| 1452 | | -rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) |
|---|
| 1337 | +static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb) |
|---|
| 1453 | 1338 | { |
|---|
| 1454 | 1339 | if (!rb) |
|---|
| 1455 | 1340 | return; |
|---|
| .. | .. |
|---|
| 1457 | 1342 | if (!rpcrdma_regbuf_is_mapped(rb)) |
|---|
| 1458 | 1343 | return; |
|---|
| 1459 | 1344 | |
|---|
| 1460 | | - ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), |
|---|
| 1461 | | - rdmab_length(rb), rb->rg_direction); |
|---|
| 1345 | + ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb), |
|---|
| 1346 | + rb->rg_direction); |
|---|
| 1462 | 1347 | rb->rg_device = NULL; |
|---|
| 1463 | 1348 | } |
|---|
| 1464 | 1349 | |
|---|
| 1465 | | -/** |
|---|
| 1466 | | - * rpcrdma_free_regbuf - deregister and free registered buffer |
|---|
| 1467 | | - * @rb: regbuf to be deregistered and freed |
|---|
| 1468 | | - */ |
|---|
| 1469 | | -void |
|---|
| 1470 | | -rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) |
|---|
| 1350 | +static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) |
|---|
| 1471 | 1351 | { |
|---|
| 1472 | | - rpcrdma_dma_unmap_regbuf(rb); |
|---|
| 1352 | + rpcrdma_regbuf_dma_unmap(rb); |
|---|
| 1353 | + if (rb) |
|---|
| 1354 | + kfree(rb->rg_data); |
|---|
| 1473 | 1355 | kfree(rb); |
|---|
| 1474 | 1356 | } |
|---|
| 1475 | 1357 | |
|---|
| 1476 | | -/* |
|---|
| 1477 | | - * Prepost any receive buffer, then post send. |
|---|
| 1358 | +/** |
|---|
| 1359 | + * rpcrdma_post_sends - Post WRs to a transport's Send Queue |
|---|
| 1360 | + * @r_xprt: controlling transport instance |
|---|
| 1361 | + * @req: rpcrdma_req containing the Send WR to post |
|---|
| 1478 | 1362 | * |
|---|
| 1479 | | - * Receive buffer is donated to hardware, reclaimed upon recv completion. |
|---|
| 1363 | + * Returns 0 if the post was successful, otherwise -ENOTCONN |
|---|
| 1364 | + * is returned. |
|---|
| 1480 | 1365 | */ |
|---|
| 1481 | | -int |
|---|
| 1482 | | -rpcrdma_ep_post(struct rpcrdma_ia *ia, |
|---|
| 1483 | | - struct rpcrdma_ep *ep, |
|---|
| 1484 | | - struct rpcrdma_req *req) |
|---|
| 1366 | +int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) |
|---|
| 1485 | 1367 | { |
|---|
| 1486 | | - struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; |
|---|
| 1368 | + struct ib_send_wr *send_wr = &req->rl_wr; |
|---|
| 1369 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
|---|
| 1487 | 1370 | int rc; |
|---|
| 1488 | 1371 | |
|---|
| 1489 | | - if (!ep->rep_send_count || |
|---|
| 1490 | | - test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { |
|---|
| 1372 | + if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) { |
|---|
| 1491 | 1373 | send_wr->send_flags |= IB_SEND_SIGNALED; |
|---|
| 1492 | | - ep->rep_send_count = ep->rep_send_batch; |
|---|
| 1374 | + ep->re_send_count = ep->re_send_batch; |
|---|
| 1493 | 1375 | } else { |
|---|
| 1494 | 1376 | send_wr->send_flags &= ~IB_SEND_SIGNALED; |
|---|
| 1495 | | - --ep->rep_send_count; |
|---|
| 1377 | + --ep->re_send_count; |
|---|
| 1496 | 1378 | } |
|---|
| 1497 | 1379 | |
|---|
| 1498 | | - rc = ia->ri_ops->ro_send(ia, req); |
|---|
| 1499 | | - trace_xprtrdma_post_send(req, rc); |
|---|
| 1380 | + trace_xprtrdma_post_send(req); |
|---|
| 1381 | + rc = frwr_send(r_xprt, req); |
|---|
| 1500 | 1382 | if (rc) |
|---|
| 1501 | 1383 | return -ENOTCONN; |
|---|
| 1502 | 1384 | return 0; |
|---|
| 1503 | 1385 | } |
|---|
| 1504 | 1386 | |
|---|
| 1505 | 1387 | /** |
|---|
| 1506 | | - * rpcrdma_post_recvs - Maybe post some Receive buffers |
|---|
| 1507 | | - * @r_xprt: controlling transport |
|---|
| 1508 | | - * @temp: when true, allocate temp rpcrdma_rep objects |
|---|
| 1388 | + * rpcrdma_post_recvs - Refill the Receive Queue |
|---|
| 1389 | + * @r_xprt: controlling transport instance |
|---|
| 1390 | + * @needed: current credit grant |
|---|
| 1391 | + * @temp: mark Receive buffers to be deleted after one use |
|---|
| 1509 | 1392 | * |
|---|
| 1510 | 1393 | */ |
|---|
| 1511 | | -void |
|---|
| 1512 | | -rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) |
|---|
| 1394 | +void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) |
|---|
| 1513 | 1395 | { |
|---|
| 1514 | 1396 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
|---|
| 1397 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
|---|
| 1515 | 1398 | struct ib_recv_wr *wr, *bad_wr; |
|---|
| 1516 | | - int needed, count, rc; |
|---|
| 1399 | + struct rpcrdma_rep *rep; |
|---|
| 1400 | + int count, rc; |
|---|
| 1517 | 1401 | |
|---|
| 1518 | | - needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); |
|---|
| 1519 | | - if (buf->rb_posted_receives > needed) |
|---|
| 1520 | | - return; |
|---|
| 1521 | | - needed -= buf->rb_posted_receives; |
|---|
| 1522 | | - |
|---|
| 1402 | + rc = 0; |
|---|
| 1523 | 1403 | count = 0; |
|---|
| 1404 | + |
|---|
| 1405 | + if (likely(ep->re_receive_count > needed)) |
|---|
| 1406 | + goto out; |
|---|
| 1407 | + needed -= ep->re_receive_count; |
|---|
| 1408 | + if (!temp) |
|---|
| 1409 | + needed += RPCRDMA_MAX_RECV_BATCH; |
|---|
| 1410 | + |
|---|
| 1411 | + /* fast path: all needed reps can be found on the free list */ |
|---|
| 1524 | 1412 | wr = NULL; |
|---|
| 1525 | 1413 | while (needed) { |
|---|
| 1526 | | - struct rpcrdma_regbuf *rb; |
|---|
| 1527 | | - struct rpcrdma_rep *rep; |
|---|
| 1528 | | - |
|---|
| 1529 | | - spin_lock(&buf->rb_lock); |
|---|
| 1530 | | - rep = list_first_entry_or_null(&buf->rb_recv_bufs, |
|---|
| 1531 | | - struct rpcrdma_rep, rr_list); |
|---|
| 1532 | | - if (likely(rep)) |
|---|
| 1533 | | - list_del(&rep->rr_list); |
|---|
| 1534 | | - spin_unlock(&buf->rb_lock); |
|---|
| 1535 | | - if (!rep) { |
|---|
| 1536 | | - if (rpcrdma_create_rep(r_xprt, temp)) |
|---|
| 1537 | | - break; |
|---|
| 1414 | + rep = rpcrdma_rep_get_locked(buf); |
|---|
| 1415 | + if (rep && rep->rr_temp) { |
|---|
| 1416 | + rpcrdma_rep_destroy(rep); |
|---|
| 1538 | 1417 | continue; |
|---|
| 1539 | 1418 | } |
|---|
| 1540 | | - |
|---|
| 1541 | | - rb = rep->rr_rdmabuf; |
|---|
| 1542 | | - if (!rpcrdma_regbuf_is_mapped(rb)) { |
|---|
| 1543 | | - if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) { |
|---|
| 1544 | | - rpcrdma_recv_buffer_put(rep); |
|---|
| 1545 | | - break; |
|---|
| 1546 | | - } |
|---|
| 1419 | + if (!rep) |
|---|
| 1420 | + rep = rpcrdma_rep_create(r_xprt, temp); |
|---|
| 1421 | + if (!rep) |
|---|
| 1422 | + break; |
|---|
| 1423 | + if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) { |
|---|
| 1424 | + rpcrdma_rep_put(buf, rep); |
|---|
| 1425 | + break; |
|---|
| 1547 | 1426 | } |
|---|
| 1548 | 1427 | |
|---|
| 1549 | | - trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); |
|---|
| 1428 | + trace_xprtrdma_post_recv(rep); |
|---|
| 1550 | 1429 | rep->rr_recv_wr.next = wr; |
|---|
| 1551 | 1430 | wr = &rep->rr_recv_wr; |
|---|
| 1552 | | - ++count; |
|---|
| 1553 | 1431 | --needed; |
|---|
| 1432 | + ++count; |
|---|
| 1554 | 1433 | } |
|---|
| 1555 | | - if (!count) |
|---|
| 1556 | | - return; |
|---|
| 1434 | + if (!wr) |
|---|
| 1435 | + goto out; |
|---|
| 1557 | 1436 | |
|---|
| 1558 | | - rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, |
|---|
| 1437 | + rc = ib_post_recv(ep->re_id->qp, wr, |
|---|
| 1559 | 1438 | (const struct ib_recv_wr **)&bad_wr); |
|---|
| 1439 | +out: |
|---|
| 1440 | + trace_xprtrdma_post_recvs(r_xprt, count, rc); |
|---|
| 1560 | 1441 | if (rc) { |
|---|
| 1561 | 1442 | for (wr = bad_wr; wr;) { |
|---|
| 1562 | 1443 | struct rpcrdma_rep *rep; |
|---|
| .. | .. |
|---|
| 1567 | 1448 | --count; |
|---|
| 1568 | 1449 | } |
|---|
| 1569 | 1450 | } |
|---|
| 1570 | | - buf->rb_posted_receives += count; |
|---|
| 1571 | | - trace_xprtrdma_post_recvs(r_xprt, count, rc); |
|---|
| 1451 | + ep->re_receive_count += count; |
|---|
| 1452 | + return; |
|---|
| 1572 | 1453 | } |
|---|