.. | .. |
---|
53 | 53 | #include <linux/slab.h> |
---|
54 | 54 | #include <linux/sunrpc/addr.h> |
---|
55 | 55 | #include <linux/sunrpc/svc_rdma.h> |
---|
| 56 | +#include <linux/log2.h> |
---|
56 | 57 | |
---|
57 | 58 | #include <asm-generic/barrier.h> |
---|
58 | 59 | #include <asm/bitops.h> |
---|
.. | .. |
---|
73 | 74 | /* |
---|
74 | 75 | * internal functions |
---|
75 | 76 | */ |
---|
76 | | -static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); |
---|
| 77 | +static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); |
---|
| 78 | +static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); |
---|
| 79 | +static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, |
---|
| 80 | + struct rpcrdma_sendctx *sc); |
---|
| 81 | +static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); |
---|
| 82 | +static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); |
---|
| 83 | +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); |
---|
| 84 | +static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); |
---|
77 | 85 | static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); |
---|
78 | | -static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); |
---|
79 | | -static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); |
---|
80 | | -static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); |
---|
| 86 | +static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); |
---|
| 87 | +static void rpcrdma_ep_get(struct rpcrdma_ep *ep); |
---|
| 88 | +static int rpcrdma_ep_put(struct rpcrdma_ep *ep); |
---|
| 89 | +static struct rpcrdma_regbuf * |
---|
| 90 | +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, |
---|
| 91 | + gfp_t flags); |
---|
| 92 | +static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); |
---|
| 93 | +static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); |
---|
81 | 94 | |
---|
82 | | -struct workqueue_struct *rpcrdma_receive_wq __read_mostly; |
---|
83 | | - |
---|
84 | | -int |
---|
85 | | -rpcrdma_alloc_wq(void) |
---|
| 95 | +/* Wait for outstanding transport work to finish. ib_drain_qp |
---|
| 96 | + * handles the drains in the wrong order for us, so open code |
---|
| 97 | + * them here. |
---|
| 98 | + */ |
---|
| 99 | +static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) |
---|
86 | 100 | { |
---|
87 | | - struct workqueue_struct *recv_wq; |
---|
| 101 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
---|
| 102 | + struct rdma_cm_id *id = ep->re_id; |
---|
88 | 103 | |
---|
89 | | - recv_wq = alloc_workqueue("xprtrdma_receive", |
---|
90 | | - WQ_MEM_RECLAIM | WQ_HIGHPRI, |
---|
91 | | - 0); |
---|
92 | | - if (!recv_wq) |
---|
93 | | - return -ENOMEM; |
---|
| 104 | + /* Flush Receives, then wait for deferred Reply work |
---|
| 105 | + * to complete. |
---|
| 106 | + */ |
---|
| 107 | + ib_drain_rq(id->qp); |
---|
94 | 108 | |
---|
95 | | - rpcrdma_receive_wq = recv_wq; |
---|
96 | | - return 0; |
---|
| 109 | + /* Deferred Reply processing might have scheduled |
---|
| 110 | + * local invalidations. |
---|
| 111 | + */ |
---|
| 112 | + ib_drain_sq(id->qp); |
---|
| 113 | + |
---|
| 114 | + rpcrdma_ep_put(ep); |
---|
97 | 115 | } |
---|
98 | 116 | |
---|
99 | | -void |
---|
100 | | -rpcrdma_destroy_wq(void) |
---|
101 | | -{ |
---|
102 | | - struct workqueue_struct *wq; |
---|
103 | | - |
---|
104 | | - if (rpcrdma_receive_wq) { |
---|
105 | | - wq = rpcrdma_receive_wq; |
---|
106 | | - rpcrdma_receive_wq = NULL; |
---|
107 | | - destroy_workqueue(wq); |
---|
108 | | - } |
---|
109 | | -} |
---|
110 | | - |
---|
111 | | -static void |
---|
112 | | -rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) |
---|
| 117 | +/** |
---|
| 118 | + * rpcrdma_qp_event_handler - Handle one QP event (error notification) |
---|
| 119 | + * @event: details of the event |
---|
| 120 | + * @context: ep that owns QP where event occurred |
---|
| 121 | + * |
---|
| 122 | + * Called from the RDMA provider (device driver) possibly in an interrupt |
---|
| 123 | + * context. The QP is always destroyed before the ID, so the ID will be |
---|
| 124 | + * reliably available when this handler is invoked. |
---|
| 125 | + */ |
---|
| 126 | +static void rpcrdma_qp_event_handler(struct ib_event *event, void *context) |
---|
113 | 127 | { |
---|
114 | 128 | struct rpcrdma_ep *ep = context; |
---|
115 | | - struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, |
---|
116 | | - rx_ep); |
---|
117 | 129 | |
---|
118 | | - trace_xprtrdma_qp_error(r_xprt, event); |
---|
119 | | - pr_err("rpcrdma: %s on device %s ep %p\n", |
---|
120 | | - ib_event_msg(event->event), event->device->name, context); |
---|
| 130 | + trace_xprtrdma_qp_event(ep, event); |
---|
| 131 | +} |
---|
121 | 132 | |
---|
122 | | - if (ep->rep_connected == 1) { |
---|
123 | | - ep->rep_connected = -EIO; |
---|
124 | | - rpcrdma_conn_func(ep); |
---|
125 | | - wake_up_all(&ep->rep_connect_wait); |
---|
126 | | - } |
---|
| 133 | +/* Ensure xprt_force_disconnect() is invoked exactly once when a |
---|
| 134 | + * connection is closed or lost. (The important thing is it needs |
---|
| 135 | + * to be invoked "at least" once). |
---|
| 136 | + */ |
---|
| 137 | +static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep) |
---|
| 138 | +{ |
---|
| 139 | + if (atomic_add_unless(&ep->re_force_disconnect, 1, 1)) |
---|
| 140 | + xprt_force_disconnect(ep->re_xprt); |
---|
| 141 | +} |
---|
| 142 | + |
---|
| 143 | +/** |
---|
| 144 | + * rpcrdma_flush_disconnect - Disconnect on flushed completion |
---|
| 145 | + * @r_xprt: transport to disconnect |
---|
| 146 | + * @wc: work completion entry |
---|
| 147 | + * |
---|
| 148 | + * Must be called in process context. |
---|
| 149 | + */ |
---|
| 150 | +void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc) |
---|
| 151 | +{ |
---|
| 152 | + if (wc->status != IB_WC_SUCCESS) |
---|
| 153 | + rpcrdma_force_disconnect(r_xprt->rx_ep); |
---|
127 | 154 | } |
---|
128 | 155 | |
---|
129 | 156 | /** |
---|
130 | 157 | * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC |
---|
131 | | - * @cq: completion queue (ignored) |
---|
132 | | - * @wc: completed WR |
---|
| 158 | + * @cq: completion queue |
---|
| 159 | + * @wc: WCE for a completed Send WR |
---|
133 | 160 | * |
---|
134 | 161 | */ |
---|
135 | | -static void |
---|
136 | | -rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) |
---|
| 162 | +static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) |
---|
137 | 163 | { |
---|
138 | 164 | struct ib_cqe *cqe = wc->wr_cqe; |
---|
139 | 165 | struct rpcrdma_sendctx *sc = |
---|
140 | 166 | container_of(cqe, struct rpcrdma_sendctx, sc_cqe); |
---|
| 167 | + struct rpcrdma_xprt *r_xprt = cq->cq_context; |
---|
141 | 168 | |
---|
142 | 169 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
---|
143 | 170 | trace_xprtrdma_wc_send(sc, wc); |
---|
144 | | - if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) |
---|
145 | | - pr_err("rpcrdma: Send: %s (%u/0x%x)\n", |
---|
146 | | - ib_wc_status_msg(wc->status), |
---|
147 | | - wc->status, wc->vendor_err); |
---|
148 | | - |
---|
149 | | - rpcrdma_sendctx_put_locked(sc); |
---|
| 171 | + rpcrdma_sendctx_put_locked(r_xprt, sc); |
---|
| 172 | + rpcrdma_flush_disconnect(r_xprt, wc); |
---|
150 | 173 | } |
---|
151 | 174 | |
---|
152 | 175 | /** |
---|
153 | 176 | * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC |
---|
154 | | - * @cq: completion queue (ignored) |
---|
155 | | - * @wc: completed WR |
---|
| 177 | + * @cq: completion queue |
---|
| 178 | + * @wc: WCE for a completed Receive WR |
---|
156 | 179 | * |
---|
157 | 180 | */ |
---|
158 | | -static void |
---|
159 | | -rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) |
---|
| 181 | +static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) |
---|
160 | 182 | { |
---|
161 | 183 | struct ib_cqe *cqe = wc->wr_cqe; |
---|
162 | 184 | struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, |
---|
163 | 185 | rr_cqe); |
---|
| 186 | + struct rpcrdma_xprt *r_xprt = cq->cq_context; |
---|
164 | 187 | |
---|
165 | | - /* WARNING: Only wr_id and status are reliable at this point */ |
---|
| 188 | + /* WARNING: Only wr_cqe and status are reliable at this point */ |
---|
166 | 189 | trace_xprtrdma_wc_receive(wc); |
---|
| 190 | + --r_xprt->rx_ep->re_receive_count; |
---|
167 | 191 | if (wc->status != IB_WC_SUCCESS) |
---|
168 | | - goto out_fail; |
---|
| 192 | + goto out_flushed; |
---|
169 | 193 | |
---|
170 | 194 | /* status == SUCCESS means all fields in wc are trustworthy */ |
---|
171 | 195 | rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); |
---|
.. | .. |
---|
176 | 200 | rdmab_addr(rep->rr_rdmabuf), |
---|
177 | 201 | wc->byte_len, DMA_FROM_DEVICE); |
---|
178 | 202 | |
---|
179 | | -out_schedule: |
---|
180 | 203 | rpcrdma_reply_handler(rep); |
---|
181 | 204 | return; |
---|
182 | 205 | |
---|
183 | | -out_fail: |
---|
184 | | - if (wc->status != IB_WC_WR_FLUSH_ERR) |
---|
185 | | - pr_err("rpcrdma: Recv: %s (%u/0x%x)\n", |
---|
186 | | - ib_wc_status_msg(wc->status), |
---|
187 | | - wc->status, wc->vendor_err); |
---|
188 | | - rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0); |
---|
189 | | - goto out_schedule; |
---|
| 206 | +out_flushed: |
---|
| 207 | + rpcrdma_flush_disconnect(r_xprt, wc); |
---|
| 208 | + rpcrdma_rep_destroy(rep); |
---|
190 | 209 | } |
---|
191 | 210 | |
---|
192 | | -static void |
---|
193 | | -rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, |
---|
194 | | - struct rdma_conn_param *param) |
---|
| 211 | +static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, |
---|
| 212 | + struct rdma_conn_param *param) |
---|
195 | 213 | { |
---|
196 | | - struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
---|
197 | 214 | const struct rpcrdma_connect_private *pmsg = param->private_data; |
---|
198 | 215 | unsigned int rsize, wsize; |
---|
199 | 216 | |
---|
200 | 217 | /* Default settings for RPC-over-RDMA Version One */ |
---|
201 | | - r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; |
---|
| 218 | + ep->re_implicit_roundup = xprt_rdma_pad_optimize; |
---|
202 | 219 | rsize = RPCRDMA_V1_DEF_INLINE_SIZE; |
---|
203 | 220 | wsize = RPCRDMA_V1_DEF_INLINE_SIZE; |
---|
204 | 221 | |
---|
205 | 222 | if (pmsg && |
---|
206 | 223 | pmsg->cp_magic == rpcrdma_cmp_magic && |
---|
207 | 224 | pmsg->cp_version == RPCRDMA_CMP_VERSION) { |
---|
208 | | - r_xprt->rx_ia.ri_implicit_roundup = true; |
---|
| 225 | + ep->re_implicit_roundup = true; |
---|
209 | 226 | rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); |
---|
210 | 227 | wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); |
---|
211 | 228 | } |
---|
212 | 229 | |
---|
213 | | - if (rsize < cdata->inline_rsize) |
---|
214 | | - cdata->inline_rsize = rsize; |
---|
215 | | - if (wsize < cdata->inline_wsize) |
---|
216 | | - cdata->inline_wsize = wsize; |
---|
217 | | - dprintk("RPC: %s: max send %u, max recv %u\n", |
---|
218 | | - __func__, cdata->inline_wsize, cdata->inline_rsize); |
---|
219 | | - rpcrdma_set_max_header_sizes(r_xprt); |
---|
| 230 | + if (rsize < ep->re_inline_recv) |
---|
| 231 | + ep->re_inline_recv = rsize; |
---|
| 232 | + if (wsize < ep->re_inline_send) |
---|
| 233 | + ep->re_inline_send = wsize; |
---|
| 234 | + |
---|
| 235 | + rpcrdma_set_max_header_sizes(ep); |
---|
220 | 236 | } |
---|
221 | 237 | |
---|
| 238 | +/** |
---|
| 239 | + * rpcrdma_cm_event_handler - Handle RDMA CM events |
---|
| 240 | + * @id: rdma_cm_id on which an event has occurred |
---|
| 241 | + * @event: details of the event |
---|
| 242 | + * |
---|
| 243 | + * Called with @id's mutex held. Returns 1 if caller should |
---|
| 244 | + * destroy @id, otherwise 0. |
---|
| 245 | + */ |
---|
222 | 246 | static int |
---|
223 | | -rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) |
---|
| 247 | +rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) |
---|
224 | 248 | { |
---|
225 | | - struct rpcrdma_xprt *xprt = id->context; |
---|
226 | | - struct rpcrdma_ia *ia = &xprt->rx_ia; |
---|
227 | | - struct rpcrdma_ep *ep = &xprt->rx_ep; |
---|
228 | | - int connstate = 0; |
---|
| 249 | + struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; |
---|
| 250 | + struct rpcrdma_ep *ep = id->context; |
---|
229 | 251 | |
---|
230 | | - trace_xprtrdma_conn_upcall(xprt, event); |
---|
| 252 | + might_sleep(); |
---|
| 253 | + |
---|
231 | 254 | switch (event->event) { |
---|
232 | 255 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
---|
233 | 256 | case RDMA_CM_EVENT_ROUTE_RESOLVED: |
---|
234 | | - ia->ri_async_rc = 0; |
---|
235 | | - complete(&ia->ri_done); |
---|
236 | | - break; |
---|
| 257 | + ep->re_async_rc = 0; |
---|
| 258 | + complete(&ep->re_done); |
---|
| 259 | + return 0; |
---|
237 | 260 | case RDMA_CM_EVENT_ADDR_ERROR: |
---|
238 | | - ia->ri_async_rc = -EPROTO; |
---|
239 | | - complete(&ia->ri_done); |
---|
240 | | - break; |
---|
| 261 | + ep->re_async_rc = -EPROTO; |
---|
| 262 | + complete(&ep->re_done); |
---|
| 263 | + return 0; |
---|
241 | 264 | case RDMA_CM_EVENT_ROUTE_ERROR: |
---|
242 | | - ia->ri_async_rc = -ENETUNREACH; |
---|
243 | | - complete(&ia->ri_done); |
---|
244 | | - break; |
---|
| 265 | + ep->re_async_rc = -ENETUNREACH; |
---|
| 266 | + complete(&ep->re_done); |
---|
| 267 | + return 0; |
---|
245 | 268 | case RDMA_CM_EVENT_DEVICE_REMOVAL: |
---|
246 | | -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
---|
247 | | - pr_info("rpcrdma: removing device %s for %s:%s\n", |
---|
248 | | - ia->ri_device->name, |
---|
249 | | - rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt)); |
---|
250 | | -#endif |
---|
251 | | - init_completion(&ia->ri_remove_done); |
---|
252 | | - set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); |
---|
253 | | - ep->rep_connected = -ENODEV; |
---|
254 | | - xprt_force_disconnect(&xprt->rx_xprt); |
---|
255 | | - wait_for_completion(&ia->ri_remove_done); |
---|
256 | | - |
---|
257 | | - ia->ri_id = NULL; |
---|
258 | | - ia->ri_device = NULL; |
---|
259 | | - /* Return 1 to ensure the core destroys the id. */ |
---|
260 | | - return 1; |
---|
| 269 | + pr_info("rpcrdma: removing device %s for %pISpc\n", |
---|
| 270 | + ep->re_id->device->name, sap); |
---|
| 271 | + fallthrough; |
---|
| 272 | + case RDMA_CM_EVENT_ADDR_CHANGE: |
---|
| 273 | + ep->re_connect_status = -ENODEV; |
---|
| 274 | + goto disconnected; |
---|
261 | 275 | case RDMA_CM_EVENT_ESTABLISHED: |
---|
262 | | - ++xprt->rx_xprt.connect_cookie; |
---|
263 | | - connstate = 1; |
---|
264 | | - rpcrdma_update_connect_private(xprt, &event->param.conn); |
---|
265 | | - goto connected; |
---|
| 276 | + rpcrdma_ep_get(ep); |
---|
| 277 | + ep->re_connect_status = 1; |
---|
| 278 | + rpcrdma_update_cm_private(ep, &event->param.conn); |
---|
| 279 | + trace_xprtrdma_inline_thresh(ep); |
---|
| 280 | + wake_up_all(&ep->re_connect_wait); |
---|
| 281 | + break; |
---|
266 | 282 | case RDMA_CM_EVENT_CONNECT_ERROR: |
---|
267 | | - connstate = -ENOTCONN; |
---|
268 | | - goto connected; |
---|
| 283 | + ep->re_connect_status = -ENOTCONN; |
---|
| 284 | + goto wake_connect_worker; |
---|
269 | 285 | case RDMA_CM_EVENT_UNREACHABLE: |
---|
270 | | - connstate = -ENETUNREACH; |
---|
271 | | - goto connected; |
---|
| 286 | + ep->re_connect_status = -ENETUNREACH; |
---|
| 287 | + goto wake_connect_worker; |
---|
272 | 288 | case RDMA_CM_EVENT_REJECTED: |
---|
273 | | - dprintk("rpcrdma: connection to %s:%s rejected: %s\n", |
---|
274 | | - rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt), |
---|
275 | | - rdma_reject_msg(id, event->status)); |
---|
276 | | - connstate = -ECONNREFUSED; |
---|
| 289 | + dprintk("rpcrdma: connection to %pISpc rejected: %s\n", |
---|
| 290 | + sap, rdma_reject_msg(id, event->status)); |
---|
| 291 | + ep->re_connect_status = -ECONNREFUSED; |
---|
277 | 292 | if (event->status == IB_CM_REJ_STALE_CONN) |
---|
278 | | - connstate = -EAGAIN; |
---|
279 | | - goto connected; |
---|
| 293 | + ep->re_connect_status = -ENOTCONN; |
---|
| 294 | +wake_connect_worker: |
---|
| 295 | + wake_up_all(&ep->re_connect_wait); |
---|
| 296 | + return 0; |
---|
280 | 297 | case RDMA_CM_EVENT_DISCONNECTED: |
---|
281 | | - ++xprt->rx_xprt.connect_cookie; |
---|
282 | | - connstate = -ECONNABORTED; |
---|
283 | | -connected: |
---|
284 | | - ep->rep_connected = connstate; |
---|
285 | | - rpcrdma_conn_func(ep); |
---|
286 | | - wake_up_all(&ep->rep_connect_wait); |
---|
287 | | - /*FALLTHROUGH*/ |
---|
| 298 | + ep->re_connect_status = -ECONNABORTED; |
---|
| 299 | +disconnected: |
---|
| 300 | + rpcrdma_force_disconnect(ep); |
---|
| 301 | + return rpcrdma_ep_put(ep); |
---|
288 | 302 | default: |
---|
289 | | - dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n", |
---|
290 | | - __func__, |
---|
291 | | - rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt), |
---|
292 | | - ia->ri_device->name, ia->ri_ops->ro_displayname, |
---|
293 | | - ep, rdma_event_msg(event->event)); |
---|
294 | 303 | break; |
---|
295 | 304 | } |
---|
296 | 305 | |
---|
| 306 | + dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap, |
---|
| 307 | + ep->re_id->device->name, rdma_event_msg(event->event)); |
---|
297 | 308 | return 0; |
---|
298 | 309 | } |
---|
299 | 310 | |
---|
300 | | -static struct rdma_cm_id * |
---|
301 | | -rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) |
---|
| 311 | +static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, |
---|
| 312 | + struct rpcrdma_ep *ep) |
---|
302 | 313 | { |
---|
303 | 314 | unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; |
---|
| 315 | + struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
---|
304 | 316 | struct rdma_cm_id *id; |
---|
305 | 317 | int rc; |
---|
306 | 318 | |
---|
307 | | - trace_xprtrdma_conn_start(xprt); |
---|
| 319 | + init_completion(&ep->re_done); |
---|
308 | 320 | |
---|
309 | | - init_completion(&ia->ri_done); |
---|
310 | | - |
---|
311 | | - id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall, |
---|
312 | | - xprt, RDMA_PS_TCP, IB_QPT_RC); |
---|
313 | | - if (IS_ERR(id)) { |
---|
314 | | - rc = PTR_ERR(id); |
---|
315 | | - dprintk("RPC: %s: rdma_create_id() failed %i\n", |
---|
316 | | - __func__, rc); |
---|
| 321 | + id = rdma_create_id(xprt->xprt_net, rpcrdma_cm_event_handler, ep, |
---|
| 322 | + RDMA_PS_TCP, IB_QPT_RC); |
---|
| 323 | + if (IS_ERR(id)) |
---|
317 | 324 | return id; |
---|
318 | | - } |
---|
319 | 325 | |
---|
320 | | - ia->ri_async_rc = -ETIMEDOUT; |
---|
321 | | - rc = rdma_resolve_addr(id, NULL, |
---|
322 | | - (struct sockaddr *)&xprt->rx_xprt.addr, |
---|
| 326 | + ep->re_async_rc = -ETIMEDOUT; |
---|
| 327 | + rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr, |
---|
323 | 328 | RDMA_RESOLVE_TIMEOUT); |
---|
324 | | - if (rc) { |
---|
325 | | - dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", |
---|
326 | | - __func__, rc); |
---|
| 329 | + if (rc) |
---|
327 | 330 | goto out; |
---|
328 | | - } |
---|
329 | | - rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); |
---|
330 | | - if (rc < 0) { |
---|
331 | | - trace_xprtrdma_conn_tout(xprt); |
---|
| 331 | + rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); |
---|
| 332 | + if (rc < 0) |
---|
332 | 333 | goto out; |
---|
333 | | - } |
---|
334 | 334 | |
---|
335 | | - rc = ia->ri_async_rc; |
---|
| 335 | + rc = ep->re_async_rc; |
---|
336 | 336 | if (rc) |
---|
337 | 337 | goto out; |
---|
338 | 338 | |
---|
339 | | - ia->ri_async_rc = -ETIMEDOUT; |
---|
| 339 | + ep->re_async_rc = -ETIMEDOUT; |
---|
340 | 340 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); |
---|
341 | | - if (rc) { |
---|
342 | | - dprintk("RPC: %s: rdma_resolve_route() failed %i\n", |
---|
343 | | - __func__, rc); |
---|
| 341 | + if (rc) |
---|
344 | 342 | goto out; |
---|
345 | | - } |
---|
346 | | - rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); |
---|
347 | | - if (rc < 0) { |
---|
348 | | - trace_xprtrdma_conn_tout(xprt); |
---|
| 343 | + rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); |
---|
| 344 | + if (rc < 0) |
---|
349 | 345 | goto out; |
---|
350 | | - } |
---|
351 | | - rc = ia->ri_async_rc; |
---|
| 346 | + rc = ep->re_async_rc; |
---|
352 | 347 | if (rc) |
---|
353 | 348 | goto out; |
---|
354 | 349 | |
---|
.. | .. |
---|
359 | 354 | return ERR_PTR(rc); |
---|
360 | 355 | } |
---|
361 | 356 | |
---|
362 | | -/* |
---|
363 | | - * Exported functions. |
---|
364 | | - */ |
---|
365 | | - |
---|
366 | | -/** |
---|
367 | | - * rpcrdma_ia_open - Open and initialize an Interface Adapter. |
---|
368 | | - * @xprt: transport with IA to (re)initialize |
---|
369 | | - * |
---|
370 | | - * Returns 0 on success, negative errno if an appropriate |
---|
371 | | - * Interface Adapter could not be found and opened. |
---|
372 | | - */ |
---|
373 | | -int |
---|
374 | | -rpcrdma_ia_open(struct rpcrdma_xprt *xprt) |
---|
| 357 | +static void rpcrdma_ep_destroy(struct kref *kref) |
---|
375 | 358 | { |
---|
376 | | - struct rpcrdma_ia *ia = &xprt->rx_ia; |
---|
| 359 | + struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); |
---|
| 360 | + |
---|
| 361 | + if (ep->re_id->qp) { |
---|
| 362 | + rdma_destroy_qp(ep->re_id); |
---|
| 363 | + ep->re_id->qp = NULL; |
---|
| 364 | + } |
---|
| 365 | + |
---|
| 366 | + if (ep->re_attr.recv_cq) |
---|
| 367 | + ib_free_cq(ep->re_attr.recv_cq); |
---|
| 368 | + ep->re_attr.recv_cq = NULL; |
---|
| 369 | + if (ep->re_attr.send_cq) |
---|
| 370 | + ib_free_cq(ep->re_attr.send_cq); |
---|
| 371 | + ep->re_attr.send_cq = NULL; |
---|
| 372 | + |
---|
| 373 | + if (ep->re_pd) |
---|
| 374 | + ib_dealloc_pd(ep->re_pd); |
---|
| 375 | + ep->re_pd = NULL; |
---|
| 376 | + |
---|
| 377 | + kfree(ep); |
---|
| 378 | + module_put(THIS_MODULE); |
---|
| 379 | +} |
---|
| 380 | + |
---|
| 381 | +static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep) |
---|
| 382 | +{ |
---|
| 383 | + kref_get(&ep->re_kref); |
---|
| 384 | +} |
---|
| 385 | + |
---|
| 386 | +/* Returns: |
---|
| 387 | + * %0 if @ep still has a positive kref count, or |
---|
| 388 | + * %1 if @ep was destroyed successfully. |
---|
| 389 | + */ |
---|
| 390 | +static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep) |
---|
| 391 | +{ |
---|
| 392 | + return kref_put(&ep->re_kref, rpcrdma_ep_destroy); |
---|
| 393 | +} |
---|
| 394 | + |
---|
| 395 | +static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) |
---|
| 396 | +{ |
---|
| 397 | + struct rpcrdma_connect_private *pmsg; |
---|
| 398 | + struct ib_device *device; |
---|
| 399 | + struct rdma_cm_id *id; |
---|
| 400 | + struct rpcrdma_ep *ep; |
---|
377 | 401 | int rc; |
---|
378 | 402 | |
---|
379 | | - ia->ri_id = rpcrdma_create_id(xprt, ia); |
---|
380 | | - if (IS_ERR(ia->ri_id)) { |
---|
381 | | - rc = PTR_ERR(ia->ri_id); |
---|
382 | | - goto out_err; |
---|
| 403 | + ep = kzalloc(sizeof(*ep), GFP_NOFS); |
---|
| 404 | + if (!ep) |
---|
| 405 | + return -ENOTCONN; |
---|
| 406 | + ep->re_xprt = &r_xprt->rx_xprt; |
---|
| 407 | + kref_init(&ep->re_kref); |
---|
| 408 | + |
---|
| 409 | + id = rpcrdma_create_id(r_xprt, ep); |
---|
| 410 | + if (IS_ERR(id)) { |
---|
| 411 | + kfree(ep); |
---|
| 412 | + return PTR_ERR(id); |
---|
383 | 413 | } |
---|
384 | | - ia->ri_device = ia->ri_id->device; |
---|
| 414 | + __module_get(THIS_MODULE); |
---|
| 415 | + device = id->device; |
---|
| 416 | + ep->re_id = id; |
---|
385 | 417 | |
---|
386 | | - ia->ri_pd = ib_alloc_pd(ia->ri_device, 0); |
---|
387 | | - if (IS_ERR(ia->ri_pd)) { |
---|
388 | | - rc = PTR_ERR(ia->ri_pd); |
---|
389 | | - pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); |
---|
390 | | - goto out_err; |
---|
391 | | - } |
---|
392 | | - |
---|
393 | | - switch (xprt_rdma_memreg_strategy) { |
---|
394 | | - case RPCRDMA_FRWR: |
---|
395 | | - if (frwr_is_supported(ia)) { |
---|
396 | | - ia->ri_ops = &rpcrdma_frwr_memreg_ops; |
---|
397 | | - break; |
---|
398 | | - } |
---|
399 | | - /*FALLTHROUGH*/ |
---|
400 | | - case RPCRDMA_MTHCAFMR: |
---|
401 | | - if (fmr_is_supported(ia)) { |
---|
402 | | - ia->ri_ops = &rpcrdma_fmr_memreg_ops; |
---|
403 | | - break; |
---|
404 | | - } |
---|
405 | | - /*FALLTHROUGH*/ |
---|
406 | | - default: |
---|
407 | | - pr_err("rpcrdma: Device %s does not support memreg mode %d\n", |
---|
408 | | - ia->ri_device->name, xprt_rdma_memreg_strategy); |
---|
409 | | - rc = -EINVAL; |
---|
410 | | - goto out_err; |
---|
411 | | - } |
---|
412 | | - |
---|
413 | | - return 0; |
---|
414 | | - |
---|
415 | | -out_err: |
---|
416 | | - rpcrdma_ia_close(ia); |
---|
417 | | - return rc; |
---|
418 | | -} |
---|
419 | | - |
---|
420 | | -/** |
---|
421 | | - * rpcrdma_ia_remove - Handle device driver unload |
---|
422 | | - * @ia: interface adapter being removed |
---|
423 | | - * |
---|
424 | | - * Divest transport H/W resources associated with this adapter, |
---|
425 | | - * but allow it to be restored later. |
---|
426 | | - */ |
---|
427 | | -void |
---|
428 | | -rpcrdma_ia_remove(struct rpcrdma_ia *ia) |
---|
429 | | -{ |
---|
430 | | - struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, |
---|
431 | | - rx_ia); |
---|
432 | | - struct rpcrdma_ep *ep = &r_xprt->rx_ep; |
---|
433 | | - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
434 | | - struct rpcrdma_req *req; |
---|
435 | | - struct rpcrdma_rep *rep; |
---|
436 | | - |
---|
437 | | - cancel_delayed_work_sync(&buf->rb_refresh_worker); |
---|
438 | | - |
---|
439 | | - /* This is similar to rpcrdma_ep_destroy, but: |
---|
440 | | - * - Don't cancel the connect worker. |
---|
441 | | - * - Don't call rpcrdma_ep_disconnect, which waits |
---|
442 | | - * for another conn upcall, which will deadlock. |
---|
443 | | - * - rdma_disconnect is unneeded, the underlying |
---|
444 | | - * connection is already gone. |
---|
445 | | - */ |
---|
446 | | - if (ia->ri_id->qp) { |
---|
447 | | - ib_drain_qp(ia->ri_id->qp); |
---|
448 | | - rdma_destroy_qp(ia->ri_id); |
---|
449 | | - ia->ri_id->qp = NULL; |
---|
450 | | - } |
---|
451 | | - ib_free_cq(ep->rep_attr.recv_cq); |
---|
452 | | - ep->rep_attr.recv_cq = NULL; |
---|
453 | | - ib_free_cq(ep->rep_attr.send_cq); |
---|
454 | | - ep->rep_attr.send_cq = NULL; |
---|
455 | | - |
---|
456 | | - /* The ULP is responsible for ensuring all DMA |
---|
457 | | - * mappings and MRs are gone. |
---|
458 | | - */ |
---|
459 | | - list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list) |
---|
460 | | - rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf); |
---|
461 | | - list_for_each_entry(req, &buf->rb_allreqs, rl_all) { |
---|
462 | | - rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf); |
---|
463 | | - rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); |
---|
464 | | - rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); |
---|
465 | | - } |
---|
466 | | - rpcrdma_mrs_destroy(buf); |
---|
467 | | - ib_dealloc_pd(ia->ri_pd); |
---|
468 | | - ia->ri_pd = NULL; |
---|
469 | | - |
---|
470 | | - /* Allow waiters to continue */ |
---|
471 | | - complete(&ia->ri_remove_done); |
---|
472 | | - |
---|
473 | | - trace_xprtrdma_remove(r_xprt); |
---|
474 | | -} |
---|
475 | | - |
---|
476 | | -/** |
---|
477 | | - * rpcrdma_ia_close - Clean up/close an IA. |
---|
478 | | - * @ia: interface adapter to close |
---|
479 | | - * |
---|
480 | | - */ |
---|
481 | | -void |
---|
482 | | -rpcrdma_ia_close(struct rpcrdma_ia *ia) |
---|
483 | | -{ |
---|
484 | | - if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { |
---|
485 | | - if (ia->ri_id->qp) |
---|
486 | | - rdma_destroy_qp(ia->ri_id); |
---|
487 | | - rdma_destroy_id(ia->ri_id); |
---|
488 | | - } |
---|
489 | | - ia->ri_id = NULL; |
---|
490 | | - ia->ri_device = NULL; |
---|
491 | | - |
---|
492 | | - /* If the pd is still busy, xprtrdma missed freeing a resource */ |
---|
493 | | - if (ia->ri_pd && !IS_ERR(ia->ri_pd)) |
---|
494 | | - ib_dealloc_pd(ia->ri_pd); |
---|
495 | | - ia->ri_pd = NULL; |
---|
496 | | -} |
---|
497 | | - |
---|
498 | | -/* |
---|
499 | | - * Create unconnected endpoint. |
---|
500 | | - */ |
---|
501 | | -int |
---|
502 | | -rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, |
---|
503 | | - struct rpcrdma_create_data_internal *cdata) |
---|
504 | | -{ |
---|
505 | | - struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; |
---|
506 | | - struct ib_cq *sendcq, *recvcq; |
---|
507 | | - unsigned int max_sge; |
---|
508 | | - int rc; |
---|
509 | | - |
---|
510 | | - max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge, |
---|
511 | | - RPCRDMA_MAX_SEND_SGES); |
---|
512 | | - if (max_sge < RPCRDMA_MIN_SEND_SGES) { |
---|
513 | | - pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); |
---|
514 | | - return -ENOMEM; |
---|
515 | | - } |
---|
516 | | - ia->ri_max_send_sges = max_sge; |
---|
517 | | - |
---|
518 | | - rc = ia->ri_ops->ro_open(ia, ep, cdata); |
---|
| 418 | + ep->re_max_requests = r_xprt->rx_xprt.max_reqs; |
---|
| 419 | + ep->re_inline_send = xprt_rdma_max_inline_write; |
---|
| 420 | + ep->re_inline_recv = xprt_rdma_max_inline_read; |
---|
| 421 | + rc = frwr_query_device(ep, device); |
---|
519 | 422 | if (rc) |
---|
520 | | - return rc; |
---|
| 423 | + goto out_destroy; |
---|
521 | 424 | |
---|
522 | | - ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; |
---|
523 | | - ep->rep_attr.qp_context = ep; |
---|
524 | | - ep->rep_attr.srq = NULL; |
---|
525 | | - ep->rep_attr.cap.max_send_sge = max_sge; |
---|
526 | | - ep->rep_attr.cap.max_recv_sge = 1; |
---|
527 | | - ep->rep_attr.cap.max_inline_data = 0; |
---|
528 | | - ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; |
---|
529 | | - ep->rep_attr.qp_type = IB_QPT_RC; |
---|
530 | | - ep->rep_attr.port_num = ~0; |
---|
| 425 | + r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests); |
---|
| 426 | + |
---|
| 427 | + ep->re_attr.event_handler = rpcrdma_qp_event_handler; |
---|
| 428 | + ep->re_attr.qp_context = ep; |
---|
| 429 | + ep->re_attr.srq = NULL; |
---|
| 430 | + ep->re_attr.cap.max_inline_data = 0; |
---|
| 431 | + ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR; |
---|
| 432 | + ep->re_attr.qp_type = IB_QPT_RC; |
---|
| 433 | + ep->re_attr.port_num = ~0; |
---|
531 | 434 | |
---|
532 | 435 | dprintk("RPC: %s: requested max: dtos: send %d recv %d; " |
---|
533 | 436 | "iovs: send %d recv %d\n", |
---|
534 | 437 | __func__, |
---|
535 | | - ep->rep_attr.cap.max_send_wr, |
---|
536 | | - ep->rep_attr.cap.max_recv_wr, |
---|
537 | | - ep->rep_attr.cap.max_send_sge, |
---|
538 | | - ep->rep_attr.cap.max_recv_sge); |
---|
| 438 | + ep->re_attr.cap.max_send_wr, |
---|
| 439 | + ep->re_attr.cap.max_recv_wr, |
---|
| 440 | + ep->re_attr.cap.max_send_sge, |
---|
| 441 | + ep->re_attr.cap.max_recv_sge); |
---|
539 | 442 | |
---|
540 | | - /* set trigger for requesting send completion */ |
---|
541 | | - ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH, |
---|
542 | | - cdata->max_requests >> 2); |
---|
543 | | - ep->rep_send_count = ep->rep_send_batch; |
---|
544 | | - init_waitqueue_head(&ep->rep_connect_wait); |
---|
545 | | - INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); |
---|
| 443 | + ep->re_send_batch = ep->re_max_requests >> 3; |
---|
| 444 | + ep->re_send_count = ep->re_send_batch; |
---|
| 445 | + init_waitqueue_head(&ep->re_connect_wait); |
---|
546 | 446 | |
---|
547 | | - sendcq = ib_alloc_cq(ia->ri_device, NULL, |
---|
548 | | - ep->rep_attr.cap.max_send_wr + 1, |
---|
549 | | - ia->ri_device->num_comp_vectors > 1 ? 1 : 0, |
---|
550 | | - IB_POLL_WORKQUEUE); |
---|
551 | | - if (IS_ERR(sendcq)) { |
---|
552 | | - rc = PTR_ERR(sendcq); |
---|
553 | | - dprintk("RPC: %s: failed to create send CQ: %i\n", |
---|
554 | | - __func__, rc); |
---|
555 | | - goto out1; |
---|
| 447 | + ep->re_attr.send_cq = ib_alloc_cq_any(device, r_xprt, |
---|
| 448 | + ep->re_attr.cap.max_send_wr, |
---|
| 449 | + IB_POLL_WORKQUEUE); |
---|
| 450 | + if (IS_ERR(ep->re_attr.send_cq)) { |
---|
| 451 | + rc = PTR_ERR(ep->re_attr.send_cq); |
---|
| 452 | + ep->re_attr.send_cq = NULL; |
---|
| 453 | + goto out_destroy; |
---|
556 | 454 | } |
---|
557 | 455 | |
---|
558 | | - recvcq = ib_alloc_cq(ia->ri_device, NULL, |
---|
559 | | - ep->rep_attr.cap.max_recv_wr + 1, |
---|
560 | | - 0, IB_POLL_WORKQUEUE); |
---|
561 | | - if (IS_ERR(recvcq)) { |
---|
562 | | - rc = PTR_ERR(recvcq); |
---|
563 | | - dprintk("RPC: %s: failed to create recv CQ: %i\n", |
---|
564 | | - __func__, rc); |
---|
565 | | - goto out2; |
---|
| 456 | + ep->re_attr.recv_cq = ib_alloc_cq_any(device, r_xprt, |
---|
| 457 | + ep->re_attr.cap.max_recv_wr, |
---|
| 458 | + IB_POLL_WORKQUEUE); |
---|
| 459 | + if (IS_ERR(ep->re_attr.recv_cq)) { |
---|
| 460 | + rc = PTR_ERR(ep->re_attr.recv_cq); |
---|
| 461 | + ep->re_attr.recv_cq = NULL; |
---|
| 462 | + goto out_destroy; |
---|
566 | 463 | } |
---|
567 | | - |
---|
568 | | - ep->rep_attr.send_cq = sendcq; |
---|
569 | | - ep->rep_attr.recv_cq = recvcq; |
---|
| 464 | + ep->re_receive_count = 0; |
---|
570 | 465 | |
---|
571 | 466 | /* Initialize cma parameters */ |
---|
572 | | - memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma)); |
---|
| 467 | + memset(&ep->re_remote_cma, 0, sizeof(ep->re_remote_cma)); |
---|
573 | 468 | |
---|
574 | 469 | /* Prepare RDMA-CM private message */ |
---|
| 470 | + pmsg = &ep->re_cm_private; |
---|
575 | 471 | pmsg->cp_magic = rpcrdma_cmp_magic; |
---|
576 | 472 | pmsg->cp_version = RPCRDMA_CMP_VERSION; |
---|
577 | | - pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok; |
---|
578 | | - pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize); |
---|
579 | | - pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize); |
---|
580 | | - ep->rep_remote_cma.private_data = pmsg; |
---|
581 | | - ep->rep_remote_cma.private_data_len = sizeof(*pmsg); |
---|
| 473 | + pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; |
---|
| 474 | + pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->re_inline_send); |
---|
| 475 | + pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->re_inline_recv); |
---|
| 476 | + ep->re_remote_cma.private_data = pmsg; |
---|
| 477 | + ep->re_remote_cma.private_data_len = sizeof(*pmsg); |
---|
582 | 478 | |
---|
583 | 479 | /* Client offers RDMA Read but does not initiate */ |
---|
584 | | - ep->rep_remote_cma.initiator_depth = 0; |
---|
585 | | - ep->rep_remote_cma.responder_resources = |
---|
586 | | - min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom); |
---|
| 480 | + ep->re_remote_cma.initiator_depth = 0; |
---|
| 481 | + ep->re_remote_cma.responder_resources = |
---|
| 482 | + min_t(int, U8_MAX, device->attrs.max_qp_rd_atom); |
---|
587 | 483 | |
---|
588 | 484 | /* Limit transport retries so client can detect server |
---|
589 | 485 | * GID changes quickly. RPC layer handles re-establishing |
---|
590 | 486 | * transport connection and retransmission. |
---|
591 | 487 | */ |
---|
592 | | - ep->rep_remote_cma.retry_count = 6; |
---|
| 488 | + ep->re_remote_cma.retry_count = 6; |
---|
593 | 489 | |
---|
594 | 490 | /* RPC-over-RDMA handles its own flow control. In addition, |
---|
595 | 491 | * make all RNR NAKs visible so we know that RPC-over-RDMA |
---|
596 | 492 | * flow control is working correctly (no NAKs should be seen). |
---|
597 | 493 | */ |
---|
598 | | - ep->rep_remote_cma.flow_control = 0; |
---|
599 | | - ep->rep_remote_cma.rnr_retry_count = 0; |
---|
| 494 | + ep->re_remote_cma.flow_control = 0; |
---|
| 495 | + ep->re_remote_cma.rnr_retry_count = 0; |
---|
600 | 496 | |
---|
601 | | - return 0; |
---|
602 | | - |
---|
603 | | -out2: |
---|
604 | | - ib_free_cq(sendcq); |
---|
605 | | -out1: |
---|
606 | | - return rc; |
---|
607 | | -} |
---|
608 | | - |
---|
609 | | -/* |
---|
610 | | - * rpcrdma_ep_destroy |
---|
611 | | - * |
---|
612 | | - * Disconnect and destroy endpoint. After this, the only |
---|
613 | | - * valid operations on the ep are to free it (if dynamically |
---|
614 | | - * allocated) or re-create it. |
---|
615 | | - */ |
---|
616 | | -void |
---|
617 | | -rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
---|
618 | | -{ |
---|
619 | | - cancel_delayed_work_sync(&ep->rep_connect_worker); |
---|
620 | | - |
---|
621 | | - if (ia->ri_id && ia->ri_id->qp) { |
---|
622 | | - rpcrdma_ep_disconnect(ep, ia); |
---|
623 | | - rdma_destroy_qp(ia->ri_id); |
---|
624 | | - ia->ri_id->qp = NULL; |
---|
625 | | - } |
---|
626 | | - |
---|
627 | | - if (ep->rep_attr.recv_cq) |
---|
628 | | - ib_free_cq(ep->rep_attr.recv_cq); |
---|
629 | | - if (ep->rep_attr.send_cq) |
---|
630 | | - ib_free_cq(ep->rep_attr.send_cq); |
---|
631 | | -} |
---|
632 | | - |
---|
633 | | -/* Re-establish a connection after a device removal event. |
---|
634 | | - * Unlike a normal reconnection, a fresh PD and a new set |
---|
635 | | - * of MRs and buffers is needed. |
---|
636 | | - */ |
---|
637 | | -static int |
---|
638 | | -rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, |
---|
639 | | - struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
---|
640 | | -{ |
---|
641 | | - int rc, err; |
---|
642 | | - |
---|
643 | | - trace_xprtrdma_reinsert(r_xprt); |
---|
644 | | - |
---|
645 | | - rc = -EHOSTUNREACH; |
---|
646 | | - if (rpcrdma_ia_open(r_xprt)) |
---|
647 | | - goto out1; |
---|
648 | | - |
---|
649 | | - rc = -ENOMEM; |
---|
650 | | - err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data); |
---|
651 | | - if (err) { |
---|
652 | | - pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err); |
---|
653 | | - goto out2; |
---|
654 | | - } |
---|
655 | | - |
---|
656 | | - rc = -ENETUNREACH; |
---|
657 | | - err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); |
---|
658 | | - if (err) { |
---|
659 | | - pr_err("rpcrdma: rdma_create_qp returned %d\n", err); |
---|
660 | | - goto out3; |
---|
661 | | - } |
---|
662 | | - |
---|
663 | | - rpcrdma_mrs_create(r_xprt); |
---|
664 | | - return 0; |
---|
665 | | - |
---|
666 | | -out3: |
---|
667 | | - rpcrdma_ep_destroy(ep, ia); |
---|
668 | | -out2: |
---|
669 | | - rpcrdma_ia_close(ia); |
---|
670 | | -out1: |
---|
671 | | - return rc; |
---|
672 | | -} |
---|
673 | | - |
---|
674 | | -static int |
---|
675 | | -rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, |
---|
676 | | - struct rpcrdma_ia *ia) |
---|
677 | | -{ |
---|
678 | | - struct rdma_cm_id *id, *old; |
---|
679 | | - int err, rc; |
---|
680 | | - |
---|
681 | | - trace_xprtrdma_reconnect(r_xprt); |
---|
682 | | - |
---|
683 | | - rpcrdma_ep_disconnect(ep, ia); |
---|
684 | | - |
---|
685 | | - rc = -EHOSTUNREACH; |
---|
686 | | - id = rpcrdma_create_id(r_xprt, ia); |
---|
687 | | - if (IS_ERR(id)) |
---|
688 | | - goto out; |
---|
689 | | - |
---|
690 | | - /* As long as the new ID points to the same device as the |
---|
691 | | - * old ID, we can reuse the transport's existing PD and all |
---|
692 | | - * previously allocated MRs. Also, the same device means |
---|
693 | | - * the transport's previous DMA mappings are still valid. |
---|
694 | | - * |
---|
695 | | - * This is a sanity check only. There should be no way these |
---|
696 | | - * point to two different devices here. |
---|
697 | | - */ |
---|
698 | | - old = id; |
---|
699 | | - rc = -ENETUNREACH; |
---|
700 | | - if (ia->ri_device != id->device) { |
---|
701 | | - pr_err("rpcrdma: can't reconnect on different device!\n"); |
---|
| 497 | + ep->re_pd = ib_alloc_pd(device, 0); |
---|
| 498 | + if (IS_ERR(ep->re_pd)) { |
---|
| 499 | + rc = PTR_ERR(ep->re_pd); |
---|
| 500 | + ep->re_pd = NULL; |
---|
702 | 501 | goto out_destroy; |
---|
703 | 502 | } |
---|
704 | 503 | |
---|
705 | | - err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); |
---|
706 | | - if (err) { |
---|
707 | | - dprintk("RPC: %s: rdma_create_qp returned %d\n", |
---|
708 | | - __func__, err); |
---|
| 504 | + rc = rdma_create_qp(id, ep->re_pd, &ep->re_attr); |
---|
| 505 | + if (rc) |
---|
709 | 506 | goto out_destroy; |
---|
710 | | - } |
---|
711 | 507 | |
---|
712 | | - /* Atomically replace the transport's ID and QP. */ |
---|
713 | | - rc = 0; |
---|
714 | | - old = ia->ri_id; |
---|
715 | | - ia->ri_id = id; |
---|
716 | | - rdma_destroy_qp(old); |
---|
| 508 | + r_xprt->rx_ep = ep; |
---|
| 509 | + return 0; |
---|
717 | 510 | |
---|
718 | 511 | out_destroy: |
---|
719 | | - rdma_destroy_id(old); |
---|
720 | | -out: |
---|
| 512 | + rpcrdma_ep_put(ep); |
---|
| 513 | + rdma_destroy_id(id); |
---|
721 | 514 | return rc; |
---|
722 | 515 | } |
---|
723 | 516 | |
---|
724 | | -/* |
---|
725 | | - * Connect unconnected endpoint. |
---|
| 517 | +/** |
---|
| 518 | + * rpcrdma_xprt_connect - Connect an unconnected transport |
---|
| 519 | + * @r_xprt: controlling transport instance |
---|
| 520 | + * |
---|
| 521 | + * Returns 0 on success or a negative errno. |
---|
726 | 522 | */ |
---|
727 | | -int |
---|
728 | | -rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
---|
| 523 | +int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) |
---|
729 | 524 | { |
---|
730 | | - struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, |
---|
731 | | - rx_ia); |
---|
| 525 | + struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
---|
| 526 | + struct rpcrdma_ep *ep; |
---|
732 | 527 | int rc; |
---|
733 | 528 | |
---|
734 | | -retry: |
---|
735 | | - switch (ep->rep_connected) { |
---|
736 | | - case 0: |
---|
737 | | - dprintk("RPC: %s: connecting...\n", __func__); |
---|
738 | | - rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); |
---|
739 | | - if (rc) { |
---|
740 | | - dprintk("RPC: %s: rdma_create_qp failed %i\n", |
---|
741 | | - __func__, rc); |
---|
742 | | - rc = -ENETUNREACH; |
---|
743 | | - goto out_noupdate; |
---|
744 | | - } |
---|
745 | | - break; |
---|
746 | | - case -ENODEV: |
---|
747 | | - rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia); |
---|
748 | | - if (rc) |
---|
749 | | - goto out_noupdate; |
---|
750 | | - break; |
---|
751 | | - default: |
---|
752 | | - rc = rpcrdma_ep_reconnect(r_xprt, ep, ia); |
---|
753 | | - if (rc) |
---|
754 | | - goto out; |
---|
755 | | - } |
---|
756 | | - |
---|
757 | | - ep->rep_connected = 0; |
---|
758 | | - rpcrdma_post_recvs(r_xprt, true); |
---|
759 | | - |
---|
760 | | - rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); |
---|
761 | | - if (rc) { |
---|
762 | | - dprintk("RPC: %s: rdma_connect() failed with %i\n", |
---|
763 | | - __func__, rc); |
---|
764 | | - goto out; |
---|
765 | | - } |
---|
766 | | - |
---|
767 | | - wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); |
---|
768 | | - if (ep->rep_connected <= 0) { |
---|
769 | | - if (ep->rep_connected == -EAGAIN) |
---|
770 | | - goto retry; |
---|
771 | | - rc = ep->rep_connected; |
---|
772 | | - goto out; |
---|
773 | | - } |
---|
774 | | - |
---|
775 | | - dprintk("RPC: %s: connected\n", __func__); |
---|
776 | | - |
---|
777 | | -out: |
---|
| 529 | + rc = rpcrdma_ep_create(r_xprt); |
---|
778 | 530 | if (rc) |
---|
779 | | - ep->rep_connected = rc; |
---|
| 531 | + return rc; |
---|
| 532 | + ep = r_xprt->rx_ep; |
---|
780 | 533 | |
---|
781 | | -out_noupdate: |
---|
| 534 | + xprt_clear_connected(xprt); |
---|
| 535 | + rpcrdma_reset_cwnd(r_xprt); |
---|
| 536 | + |
---|
| 537 | + /* Bump the ep's reference count while there are |
---|
| 538 | + * outstanding Receives. |
---|
| 539 | + */ |
---|
| 540 | + rpcrdma_ep_get(ep); |
---|
| 541 | + rpcrdma_post_recvs(r_xprt, 1, true); |
---|
| 542 | + |
---|
| 543 | + rc = rdma_connect(ep->re_id, &ep->re_remote_cma); |
---|
| 544 | + if (rc) |
---|
| 545 | + goto out; |
---|
| 546 | + |
---|
| 547 | + if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) |
---|
| 548 | + xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
---|
| 549 | + wait_event_interruptible(ep->re_connect_wait, |
---|
| 550 | + ep->re_connect_status != 0); |
---|
| 551 | + if (ep->re_connect_status <= 0) { |
---|
| 552 | + rc = ep->re_connect_status; |
---|
| 553 | + goto out; |
---|
| 554 | + } |
---|
| 555 | + |
---|
| 556 | + rc = rpcrdma_sendctxs_create(r_xprt); |
---|
| 557 | + if (rc) { |
---|
| 558 | + rc = -ENOTCONN; |
---|
| 559 | + goto out; |
---|
| 560 | + } |
---|
| 561 | + |
---|
| 562 | + rc = rpcrdma_reqs_setup(r_xprt); |
---|
| 563 | + if (rc) { |
---|
| 564 | + rc = -ENOTCONN; |
---|
| 565 | + goto out; |
---|
| 566 | + } |
---|
| 567 | + rpcrdma_mrs_create(r_xprt); |
---|
| 568 | + |
---|
| 569 | +out: |
---|
| 570 | + trace_xprtrdma_connect(r_xprt, rc); |
---|
782 | 571 | return rc; |
---|
783 | 572 | } |
---|
784 | 573 | |
---|
785 | | -/* |
---|
786 | | - * rpcrdma_ep_disconnect |
---|
| 574 | +/** |
---|
| 575 | + * rpcrdma_xprt_disconnect - Disconnect underlying transport |
---|
| 576 | + * @r_xprt: controlling transport instance |
---|
787 | 577 | * |
---|
788 | | - * This is separate from destroy to facilitate the ability |
---|
789 | | - * to reconnect without recreating the endpoint. |
---|
| 578 | + * Caller serializes. Either the transport send lock is held, |
---|
| 579 | + * or we're being called to destroy the transport. |
---|
790 | 580 | * |
---|
791 | | - * This call is not reentrant, and must not be made in parallel |
---|
792 | | - * on the same endpoint. |
---|
| 581 | + * On return, @r_xprt is completely divested of all hardware |
---|
| 582 | + * resources and prepared for the next ->connect operation. |
---|
793 | 583 | */ |
---|
794 | | -void |
---|
795 | | -rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
---|
| 584 | +void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) |
---|
796 | 585 | { |
---|
| 586 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
---|
| 587 | + struct rdma_cm_id *id; |
---|
797 | 588 | int rc; |
---|
798 | 589 | |
---|
799 | | - rc = rdma_disconnect(ia->ri_id); |
---|
800 | | - if (!rc) |
---|
801 | | - /* returns without wait if not connected */ |
---|
802 | | - wait_event_interruptible(ep->rep_connect_wait, |
---|
803 | | - ep->rep_connected != 1); |
---|
804 | | - else |
---|
805 | | - ep->rep_connected = rc; |
---|
806 | | - trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt, |
---|
807 | | - rx_ep), rc); |
---|
| 590 | + if (!ep) |
---|
| 591 | + return; |
---|
808 | 592 | |
---|
809 | | - ib_drain_qp(ia->ri_id->qp); |
---|
| 593 | + id = ep->re_id; |
---|
| 594 | + rc = rdma_disconnect(id); |
---|
| 595 | + trace_xprtrdma_disconnect(r_xprt, rc); |
---|
| 596 | + |
---|
| 597 | + rpcrdma_xprt_drain(r_xprt); |
---|
| 598 | + rpcrdma_reps_unmap(r_xprt); |
---|
| 599 | + rpcrdma_reqs_reset(r_xprt); |
---|
| 600 | + rpcrdma_mrs_destroy(r_xprt); |
---|
| 601 | + rpcrdma_sendctxs_destroy(r_xprt); |
---|
| 602 | + |
---|
| 603 | + if (rpcrdma_ep_put(ep)) |
---|
| 604 | + rdma_destroy_id(id); |
---|
| 605 | + |
---|
| 606 | + r_xprt->rx_ep = NULL; |
---|
810 | 607 | } |
---|
811 | 608 | |
---|
812 | 609 | /* Fixed-size circular FIFO queue. This implementation is wait-free and |
---|
.. | .. |
---|
823 | 620 | */ |
---|
824 | 621 | |
---|
825 | 622 | /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced |
---|
826 | | - * queue activity, and ib_drain_qp has flushed all remaining Send |
---|
827 | | - * requests. |
---|
| 623 | + * queue activity, and rpcrdma_xprt_drain has flushed all remaining |
---|
| 624 | + * Send requests. |
---|
828 | 625 | */ |
---|
829 | | -static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf) |
---|
| 626 | +static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt) |
---|
830 | 627 | { |
---|
| 628 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
831 | 629 | unsigned long i; |
---|
832 | 630 | |
---|
| 631 | + if (!buf->rb_sc_ctxs) |
---|
| 632 | + return; |
---|
833 | 633 | for (i = 0; i <= buf->rb_sc_last; i++) |
---|
834 | 634 | kfree(buf->rb_sc_ctxs[i]); |
---|
835 | 635 | kfree(buf->rb_sc_ctxs); |
---|
| 636 | + buf->rb_sc_ctxs = NULL; |
---|
836 | 637 | } |
---|
837 | 638 | |
---|
838 | | -static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia) |
---|
| 639 | +static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) |
---|
839 | 640 | { |
---|
840 | 641 | struct rpcrdma_sendctx *sc; |
---|
841 | 642 | |
---|
842 | | - sc = kzalloc(sizeof(*sc) + |
---|
843 | | - ia->ri_max_send_sges * sizeof(struct ib_sge), |
---|
| 643 | + sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge), |
---|
844 | 644 | GFP_KERNEL); |
---|
845 | 645 | if (!sc) |
---|
846 | 646 | return NULL; |
---|
847 | 647 | |
---|
848 | | - sc->sc_wr.wr_cqe = &sc->sc_cqe; |
---|
849 | | - sc->sc_wr.sg_list = sc->sc_sges; |
---|
850 | | - sc->sc_wr.opcode = IB_WR_SEND; |
---|
851 | 648 | sc->sc_cqe.done = rpcrdma_wc_send; |
---|
852 | 649 | return sc; |
---|
853 | 650 | } |
---|
.. | .. |
---|
863 | 660 | * the ->send_request call to fail temporarily before too many |
---|
864 | 661 | * Sends are posted. |
---|
865 | 662 | */ |
---|
866 | | - i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; |
---|
867 | | - dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i); |
---|
| 663 | + i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS; |
---|
868 | 664 | buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); |
---|
869 | 665 | if (!buf->rb_sc_ctxs) |
---|
870 | 666 | return -ENOMEM; |
---|
871 | 667 | |
---|
872 | 668 | buf->rb_sc_last = i - 1; |
---|
873 | 669 | for (i = 0; i <= buf->rb_sc_last; i++) { |
---|
874 | | - sc = rpcrdma_sendctx_create(&r_xprt->rx_ia); |
---|
| 670 | + sc = rpcrdma_sendctx_create(r_xprt->rx_ep); |
---|
875 | 671 | if (!sc) |
---|
876 | 672 | return -ENOMEM; |
---|
877 | 673 | |
---|
878 | | - sc->sc_xprt = r_xprt; |
---|
879 | 674 | buf->rb_sc_ctxs[i] = sc; |
---|
880 | 675 | } |
---|
881 | | - buf->rb_flags = 0; |
---|
882 | 676 | |
---|
| 677 | + buf->rb_sc_head = 0; |
---|
| 678 | + buf->rb_sc_tail = 0; |
---|
883 | 679 | return 0; |
---|
884 | 680 | } |
---|
885 | 681 | |
---|
.. | .. |
---|
895 | 691 | |
---|
896 | 692 | /** |
---|
897 | 693 | * rpcrdma_sendctx_get_locked - Acquire a send context |
---|
898 | | - * @buf: transport buffers from which to acquire an unused context |
---|
| 694 | + * @r_xprt: controlling transport instance |
---|
899 | 695 | * |
---|
900 | 696 | * Returns pointer to a free send completion context; or NULL if |
---|
901 | 697 | * the queue is empty. |
---|
902 | 698 | * |
---|
903 | 699 | * Usage: Called to acquire an SGE array before preparing a Send WR. |
---|
904 | 700 | * |
---|
905 | | - * The caller serializes calls to this function (per rpcrdma_buffer), |
---|
906 | | - * and provides an effective memory barrier that flushes the new value |
---|
| 701 | + * The caller serializes calls to this function (per transport), and |
---|
| 702 | + * provides an effective memory barrier that flushes the new value |
---|
907 | 703 | * of rb_sc_head. |
---|
908 | 704 | */ |
---|
909 | | -struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf) |
---|
| 705 | +struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt) |
---|
910 | 706 | { |
---|
911 | | - struct rpcrdma_xprt *r_xprt; |
---|
| 707 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
912 | 708 | struct rpcrdma_sendctx *sc; |
---|
913 | 709 | unsigned long next_head; |
---|
914 | 710 | |
---|
.. | .. |
---|
932 | 728 | * completions recently. This is a sign the Send Queue is |
---|
933 | 729 | * backing up. Cause the caller to pause and try again. |
---|
934 | 730 | */ |
---|
935 | | - set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags); |
---|
936 | | - r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); |
---|
| 731 | + xprt_wait_for_buffer_space(&r_xprt->rx_xprt); |
---|
937 | 732 | r_xprt->rx_stats.empty_sendctx_q++; |
---|
938 | 733 | return NULL; |
---|
939 | 734 | } |
---|
940 | 735 | |
---|
941 | 736 | /** |
---|
942 | 737 | * rpcrdma_sendctx_put_locked - Release a send context |
---|
| 738 | + * @r_xprt: controlling transport instance |
---|
943 | 739 | * @sc: send context to release |
---|
944 | 740 | * |
---|
945 | 741 | * Usage: Called from Send completion to return a sendctxt |
---|
946 | 742 | * to the queue. |
---|
947 | 743 | * |
---|
948 | | - * The caller serializes calls to this function (per rpcrdma_buffer). |
---|
| 744 | + * The caller serializes calls to this function (per transport). |
---|
949 | 745 | */ |
---|
950 | | -static void |
---|
951 | | -rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) |
---|
| 746 | +static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, |
---|
| 747 | + struct rpcrdma_sendctx *sc) |
---|
952 | 748 | { |
---|
953 | | - struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; |
---|
| 749 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
954 | 750 | unsigned long next_tail; |
---|
955 | 751 | |
---|
956 | | - /* Unmap SGEs of previously completed by unsignaled |
---|
| 752 | + /* Unmap SGEs of previously completed but unsignaled |
---|
957 | 753 | * Sends by walking up the queue until @sc is found. |
---|
958 | 754 | */ |
---|
959 | 755 | next_tail = buf->rb_sc_tail; |
---|
.. | .. |
---|
961 | 757 | next_tail = rpcrdma_sendctx_next(buf, next_tail); |
---|
962 | 758 | |
---|
963 | 759 | /* ORDER: item must be accessed _before_ tail is updated */ |
---|
964 | | - rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]); |
---|
| 760 | + rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]); |
---|
965 | 761 | |
---|
966 | 762 | } while (buf->rb_sc_ctxs[next_tail] != sc); |
---|
967 | 763 | |
---|
968 | 764 | /* Paired with READ_ONCE */ |
---|
969 | 765 | smp_store_release(&buf->rb_sc_tail, next_tail); |
---|
970 | 766 | |
---|
971 | | - if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) { |
---|
972 | | - smp_mb__after_atomic(); |
---|
973 | | - xprt_write_space(&sc->sc_xprt->rx_xprt); |
---|
974 | | - } |
---|
975 | | -} |
---|
976 | | - |
---|
977 | | -static void |
---|
978 | | -rpcrdma_mr_recovery_worker(struct work_struct *work) |
---|
979 | | -{ |
---|
980 | | - struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, |
---|
981 | | - rb_recovery_worker.work); |
---|
982 | | - struct rpcrdma_mr *mr; |
---|
983 | | - |
---|
984 | | - spin_lock(&buf->rb_recovery_lock); |
---|
985 | | - while (!list_empty(&buf->rb_stale_mrs)) { |
---|
986 | | - mr = rpcrdma_mr_pop(&buf->rb_stale_mrs); |
---|
987 | | - spin_unlock(&buf->rb_recovery_lock); |
---|
988 | | - |
---|
989 | | - trace_xprtrdma_recover_mr(mr); |
---|
990 | | - mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr); |
---|
991 | | - |
---|
992 | | - spin_lock(&buf->rb_recovery_lock); |
---|
993 | | - } |
---|
994 | | - spin_unlock(&buf->rb_recovery_lock); |
---|
995 | | -} |
---|
996 | | - |
---|
997 | | -void |
---|
998 | | -rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr) |
---|
999 | | -{ |
---|
1000 | | - struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
---|
1001 | | - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
1002 | | - |
---|
1003 | | - spin_lock(&buf->rb_recovery_lock); |
---|
1004 | | - rpcrdma_mr_push(mr, &buf->rb_stale_mrs); |
---|
1005 | | - spin_unlock(&buf->rb_recovery_lock); |
---|
1006 | | - |
---|
1007 | | - schedule_delayed_work(&buf->rb_recovery_worker, 0); |
---|
| 767 | + xprt_write_space(&r_xprt->rx_xprt); |
---|
1008 | 768 | } |
---|
1009 | 769 | |
---|
1010 | 770 | static void |
---|
1011 | 771 | rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) |
---|
1012 | 772 | { |
---|
1013 | 773 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
1014 | | - struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
---|
| 774 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
---|
1015 | 775 | unsigned int count; |
---|
1016 | | - LIST_HEAD(free); |
---|
1017 | | - LIST_HEAD(all); |
---|
1018 | 776 | |
---|
1019 | | - for (count = 0; count < 3; count++) { |
---|
| 777 | + for (count = 0; count < ep->re_max_rdma_segs; count++) { |
---|
1020 | 778 | struct rpcrdma_mr *mr; |
---|
1021 | 779 | int rc; |
---|
1022 | 780 | |
---|
1023 | | - mr = kzalloc(sizeof(*mr), GFP_KERNEL); |
---|
| 781 | + mr = kzalloc(sizeof(*mr), GFP_NOFS); |
---|
1024 | 782 | if (!mr) |
---|
1025 | 783 | break; |
---|
1026 | 784 | |
---|
1027 | | - rc = ia->ri_ops->ro_init_mr(ia, mr); |
---|
| 785 | + rc = frwr_mr_init(r_xprt, mr); |
---|
1028 | 786 | if (rc) { |
---|
1029 | 787 | kfree(mr); |
---|
1030 | 788 | break; |
---|
1031 | 789 | } |
---|
1032 | 790 | |
---|
1033 | | - mr->mr_xprt = r_xprt; |
---|
1034 | | - |
---|
1035 | | - list_add(&mr->mr_list, &free); |
---|
1036 | | - list_add(&mr->mr_all, &all); |
---|
| 791 | + spin_lock(&buf->rb_lock); |
---|
| 792 | + rpcrdma_mr_push(mr, &buf->rb_mrs); |
---|
| 793 | + list_add(&mr->mr_all, &buf->rb_all_mrs); |
---|
| 794 | + spin_unlock(&buf->rb_lock); |
---|
1037 | 795 | } |
---|
1038 | 796 | |
---|
1039 | | - spin_lock(&buf->rb_mrlock); |
---|
1040 | | - list_splice(&free, &buf->rb_mrs); |
---|
1041 | | - list_splice(&all, &buf->rb_all); |
---|
1042 | 797 | r_xprt->rx_stats.mrs_allocated += count; |
---|
1043 | | - spin_unlock(&buf->rb_mrlock); |
---|
1044 | 798 | trace_xprtrdma_createmrs(r_xprt, count); |
---|
1045 | | - |
---|
1046 | | - xprt_write_space(&r_xprt->rx_xprt); |
---|
1047 | 799 | } |
---|
1048 | 800 | |
---|
1049 | 801 | static void |
---|
1050 | 802 | rpcrdma_mr_refresh_worker(struct work_struct *work) |
---|
1051 | 803 | { |
---|
1052 | 804 | struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, |
---|
1053 | | - rb_refresh_worker.work); |
---|
| 805 | + rb_refresh_worker); |
---|
1054 | 806 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, |
---|
1055 | 807 | rx_buf); |
---|
1056 | 808 | |
---|
1057 | 809 | rpcrdma_mrs_create(r_xprt); |
---|
| 810 | + xprt_write_space(&r_xprt->rx_xprt); |
---|
1058 | 811 | } |
---|
1059 | 812 | |
---|
1060 | | -struct rpcrdma_req * |
---|
1061 | | -rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) |
---|
| 813 | +/** |
---|
| 814 | + * rpcrdma_mrs_refresh - Wake the MR refresh worker |
---|
| 815 | + * @r_xprt: controlling transport instance |
---|
| 816 | + * |
---|
| 817 | + */ |
---|
| 818 | +void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) |
---|
| 819 | +{ |
---|
| 820 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
| 821 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
---|
| 822 | + |
---|
| 823 | + /* If there is no underlying connection, it's no use |
---|
| 824 | + * to wake the refresh worker. |
---|
| 825 | + */ |
---|
| 826 | + if (ep->re_connect_status == 1) { |
---|
| 827 | + /* The work is scheduled on a WQ_MEM_RECLAIM |
---|
| 828 | + * workqueue in order to prevent MR allocation |
---|
| 829 | + * from recursing into NFS during direct reclaim. |
---|
| 830 | + */ |
---|
| 831 | + queue_work(xprtiod_workqueue, &buf->rb_refresh_worker); |
---|
| 832 | + } |
---|
| 833 | +} |
---|
| 834 | + |
---|
| 835 | +/** |
---|
| 836 | + * rpcrdma_req_create - Allocate an rpcrdma_req object |
---|
| 837 | + * @r_xprt: controlling r_xprt |
---|
| 838 | + * @size: initial size, in bytes, of send and receive buffers |
---|
| 839 | + * @flags: GFP flags passed to memory allocators |
---|
| 840 | + * |
---|
| 841 | + * Returns an allocated and fully initialized rpcrdma_req or NULL. |
---|
| 842 | + */ |
---|
| 843 | +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, |
---|
| 844 | + gfp_t flags) |
---|
1062 | 845 | { |
---|
1063 | 846 | struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; |
---|
1064 | | - struct rpcrdma_regbuf *rb; |
---|
1065 | 847 | struct rpcrdma_req *req; |
---|
1066 | 848 | |
---|
1067 | | - req = kzalloc(sizeof(*req), GFP_KERNEL); |
---|
| 849 | + req = kzalloc(sizeof(*req), flags); |
---|
1068 | 850 | if (req == NULL) |
---|
1069 | | - return ERR_PTR(-ENOMEM); |
---|
| 851 | + goto out1; |
---|
1070 | 852 | |
---|
1071 | | - rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, |
---|
1072 | | - DMA_TO_DEVICE, GFP_KERNEL); |
---|
1073 | | - if (IS_ERR(rb)) { |
---|
1074 | | - kfree(req); |
---|
1075 | | - return ERR_PTR(-ENOMEM); |
---|
1076 | | - } |
---|
1077 | | - req->rl_rdmabuf = rb; |
---|
1078 | | - xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); |
---|
1079 | | - req->rl_buffer = buffer; |
---|
| 853 | + req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags); |
---|
| 854 | + if (!req->rl_sendbuf) |
---|
| 855 | + goto out2; |
---|
| 856 | + |
---|
| 857 | + req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags); |
---|
| 858 | + if (!req->rl_recvbuf) |
---|
| 859 | + goto out3; |
---|
| 860 | + |
---|
| 861 | + INIT_LIST_HEAD(&req->rl_free_mrs); |
---|
1080 | 862 | INIT_LIST_HEAD(&req->rl_registered); |
---|
1081 | | - |
---|
1082 | | - spin_lock(&buffer->rb_reqslock); |
---|
| 863 | + spin_lock(&buffer->rb_lock); |
---|
1083 | 864 | list_add(&req->rl_all, &buffer->rb_allreqs); |
---|
1084 | | - spin_unlock(&buffer->rb_reqslock); |
---|
| 865 | + spin_unlock(&buffer->rb_lock); |
---|
1085 | 866 | return req; |
---|
| 867 | + |
---|
| 868 | +out3: |
---|
| 869 | + kfree(req->rl_sendbuf); |
---|
| 870 | +out2: |
---|
| 871 | + kfree(req); |
---|
| 872 | +out1: |
---|
| 873 | + return NULL; |
---|
1086 | 874 | } |
---|
1087 | 875 | |
---|
1088 | | -static int |
---|
1089 | | -rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp) |
---|
| 876 | +/** |
---|
| 877 | + * rpcrdma_req_setup - Per-connection instance setup of an rpcrdma_req object |
---|
| 878 | + * @r_xprt: controlling transport instance |
---|
| 879 | + * @req: rpcrdma_req object to set up |
---|
| 880 | + * |
---|
| 881 | + * Returns zero on success, and a negative errno on failure. |
---|
| 882 | + */ |
---|
| 883 | +int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) |
---|
1090 | 884 | { |
---|
1091 | | - struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
---|
| 885 | + struct rpcrdma_regbuf *rb; |
---|
| 886 | + size_t maxhdrsize; |
---|
| 887 | + |
---|
| 888 | + /* Compute maximum header buffer size in bytes */ |
---|
| 889 | + maxhdrsize = rpcrdma_fixed_maxsz + 3 + |
---|
| 890 | + r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz; |
---|
| 891 | + maxhdrsize *= sizeof(__be32); |
---|
| 892 | + rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), |
---|
| 893 | + DMA_TO_DEVICE, GFP_KERNEL); |
---|
| 894 | + if (!rb) |
---|
| 895 | + goto out; |
---|
| 896 | + |
---|
| 897 | + if (!__rpcrdma_regbuf_dma_map(r_xprt, rb)) |
---|
| 898 | + goto out_free; |
---|
| 899 | + |
---|
| 900 | + req->rl_rdmabuf = rb; |
---|
| 901 | + xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); |
---|
| 902 | + return 0; |
---|
| 903 | + |
---|
| 904 | +out_free: |
---|
| 905 | + rpcrdma_regbuf_free(rb); |
---|
| 906 | +out: |
---|
| 907 | + return -ENOMEM; |
---|
| 908 | +} |
---|
| 909 | + |
---|
| 910 | +/* ASSUMPTION: the rb_allreqs list is stable for the duration, |
---|
| 911 | + * and thus can be walked without holding rb_lock. Eg. the |
---|
| 912 | + * caller is holding the transport send lock to exclude |
---|
| 913 | + * device removal or disconnection. |
---|
| 914 | + */ |
---|
| 915 | +static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt) |
---|
| 916 | +{ |
---|
1092 | 917 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
1093 | | - struct rpcrdma_rep *rep; |
---|
| 918 | + struct rpcrdma_req *req; |
---|
1094 | 919 | int rc; |
---|
1095 | 920 | |
---|
1096 | | - rc = -ENOMEM; |
---|
| 921 | + list_for_each_entry(req, &buf->rb_allreqs, rl_all) { |
---|
| 922 | + rc = rpcrdma_req_setup(r_xprt, req); |
---|
| 923 | + if (rc) |
---|
| 924 | + return rc; |
---|
| 925 | + } |
---|
| 926 | + return 0; |
---|
| 927 | +} |
---|
| 928 | + |
---|
| 929 | +static void rpcrdma_req_reset(struct rpcrdma_req *req) |
---|
| 930 | +{ |
---|
| 931 | + /* Credits are valid for only one connection */ |
---|
| 932 | + req->rl_slot.rq_cong = 0; |
---|
| 933 | + |
---|
| 934 | + rpcrdma_regbuf_free(req->rl_rdmabuf); |
---|
| 935 | + req->rl_rdmabuf = NULL; |
---|
| 936 | + |
---|
| 937 | + rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); |
---|
| 938 | + rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); |
---|
| 939 | + |
---|
| 940 | + frwr_reset(req); |
---|
| 941 | +} |
---|
| 942 | + |
---|
| 943 | +/* ASSUMPTION: the rb_allreqs list is stable for the duration, |
---|
| 944 | + * and thus can be walked without holding rb_lock. Eg. the |
---|
| 945 | + * caller is holding the transport send lock to exclude |
---|
| 946 | + * device removal or disconnection. |
---|
| 947 | + */ |
---|
| 948 | +static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) |
---|
| 949 | +{ |
---|
| 950 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
| 951 | + struct rpcrdma_req *req; |
---|
| 952 | + |
---|
| 953 | + list_for_each_entry(req, &buf->rb_allreqs, rl_all) |
---|
| 954 | + rpcrdma_req_reset(req); |
---|
| 955 | +} |
---|
| 956 | + |
---|
| 957 | +/* No locking needed here. This function is called only by the |
---|
| 958 | + * Receive completion handler. |
---|
| 959 | + */ |
---|
| 960 | +static noinline |
---|
| 961 | +struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, |
---|
| 962 | + bool temp) |
---|
| 963 | +{ |
---|
| 964 | + struct rpcrdma_rep *rep; |
---|
| 965 | + |
---|
1097 | 966 | rep = kzalloc(sizeof(*rep), GFP_KERNEL); |
---|
1098 | 967 | if (rep == NULL) |
---|
1099 | 968 | goto out; |
---|
1100 | 969 | |
---|
1101 | | - rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize, |
---|
| 970 | + rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, |
---|
1102 | 971 | DMA_FROM_DEVICE, GFP_KERNEL); |
---|
1103 | | - if (IS_ERR(rep->rr_rdmabuf)) { |
---|
1104 | | - rc = PTR_ERR(rep->rr_rdmabuf); |
---|
| 972 | + if (!rep->rr_rdmabuf) |
---|
1105 | 973 | goto out_free; |
---|
1106 | | - } |
---|
1107 | | - xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base, |
---|
1108 | | - rdmab_length(rep->rr_rdmabuf)); |
---|
1109 | 974 | |
---|
| 975 | + if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) |
---|
| 976 | + goto out_free_regbuf; |
---|
| 977 | + |
---|
| 978 | + xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), |
---|
| 979 | + rdmab_length(rep->rr_rdmabuf)); |
---|
1110 | 980 | rep->rr_cqe.done = rpcrdma_wc_receive; |
---|
1111 | 981 | rep->rr_rxprt = r_xprt; |
---|
1112 | | - INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion); |
---|
1113 | 982 | rep->rr_recv_wr.next = NULL; |
---|
1114 | 983 | rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; |
---|
1115 | 984 | rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; |
---|
1116 | 985 | rep->rr_recv_wr.num_sge = 1; |
---|
1117 | 986 | rep->rr_temp = temp; |
---|
| 987 | + list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps); |
---|
| 988 | + return rep; |
---|
1118 | 989 | |
---|
1119 | | - spin_lock(&buf->rb_lock); |
---|
1120 | | - list_add(&rep->rr_list, &buf->rb_recv_bufs); |
---|
1121 | | - spin_unlock(&buf->rb_lock); |
---|
1122 | | - return 0; |
---|
1123 | | - |
---|
| 990 | +out_free_regbuf: |
---|
| 991 | + rpcrdma_regbuf_free(rep->rr_rdmabuf); |
---|
1124 | 992 | out_free: |
---|
1125 | 993 | kfree(rep); |
---|
1126 | 994 | out: |
---|
1127 | | - dprintk("RPC: %s: reply buffer %d alloc failed\n", |
---|
1128 | | - __func__, rc); |
---|
1129 | | - return rc; |
---|
| 995 | + return NULL; |
---|
1130 | 996 | } |
---|
1131 | 997 | |
---|
1132 | | -int |
---|
1133 | | -rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) |
---|
| 998 | +/* No locking needed here. This function is invoked only by the |
---|
| 999 | + * Receive completion handler, or during transport shutdown. |
---|
| 1000 | + */ |
---|
| 1001 | +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) |
---|
| 1002 | +{ |
---|
| 1003 | + list_del(&rep->rr_all); |
---|
| 1004 | + rpcrdma_regbuf_free(rep->rr_rdmabuf); |
---|
| 1005 | + kfree(rep); |
---|
| 1006 | +} |
---|
| 1007 | + |
---|
| 1008 | +static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) |
---|
| 1009 | +{ |
---|
| 1010 | + struct llist_node *node; |
---|
| 1011 | + |
---|
| 1012 | + /* Calls to llist_del_first are required to be serialized */ |
---|
| 1013 | + node = llist_del_first(&buf->rb_free_reps); |
---|
| 1014 | + if (!node) |
---|
| 1015 | + return NULL; |
---|
| 1016 | + return llist_entry(node, struct rpcrdma_rep, rr_node); |
---|
| 1017 | +} |
---|
| 1018 | + |
---|
| 1019 | +static void rpcrdma_rep_put(struct rpcrdma_buffer *buf, |
---|
| 1020 | + struct rpcrdma_rep *rep) |
---|
| 1021 | +{ |
---|
| 1022 | + llist_add(&rep->rr_node, &buf->rb_free_reps); |
---|
| 1023 | +} |
---|
| 1024 | + |
---|
| 1025 | +static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) |
---|
| 1026 | +{ |
---|
| 1027 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
| 1028 | + struct rpcrdma_rep *rep; |
---|
| 1029 | + |
---|
| 1030 | + list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { |
---|
| 1031 | + rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); |
---|
| 1032 | + rep->rr_temp = true; |
---|
| 1033 | + } |
---|
| 1034 | +} |
---|
| 1035 | + |
---|
| 1036 | +static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) |
---|
| 1037 | +{ |
---|
| 1038 | + struct rpcrdma_rep *rep; |
---|
| 1039 | + |
---|
| 1040 | + while ((rep = rpcrdma_rep_get_locked(buf)) != NULL) |
---|
| 1041 | + rpcrdma_rep_destroy(rep); |
---|
| 1042 | +} |
---|
| 1043 | + |
---|
| 1044 | +/** |
---|
| 1045 | + * rpcrdma_buffer_create - Create initial set of req/rep objects |
---|
| 1046 | + * @r_xprt: transport instance to (re)initialize |
---|
| 1047 | + * |
---|
| 1048 | + * Returns zero on success, otherwise a negative errno. |
---|
| 1049 | + */ |
---|
| 1050 | +int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) |
---|
1134 | 1051 | { |
---|
1135 | 1052 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
1136 | 1053 | int i, rc; |
---|
1137 | 1054 | |
---|
1138 | | - buf->rb_max_requests = r_xprt->rx_data.max_requests; |
---|
1139 | 1055 | buf->rb_bc_srv_max_requests = 0; |
---|
1140 | | - spin_lock_init(&buf->rb_mrlock); |
---|
1141 | 1056 | spin_lock_init(&buf->rb_lock); |
---|
1142 | | - spin_lock_init(&buf->rb_recovery_lock); |
---|
1143 | 1057 | INIT_LIST_HEAD(&buf->rb_mrs); |
---|
1144 | | - INIT_LIST_HEAD(&buf->rb_all); |
---|
1145 | | - INIT_LIST_HEAD(&buf->rb_stale_mrs); |
---|
1146 | | - INIT_DELAYED_WORK(&buf->rb_refresh_worker, |
---|
1147 | | - rpcrdma_mr_refresh_worker); |
---|
1148 | | - INIT_DELAYED_WORK(&buf->rb_recovery_worker, |
---|
1149 | | - rpcrdma_mr_recovery_worker); |
---|
1150 | | - |
---|
1151 | | - rpcrdma_mrs_create(r_xprt); |
---|
| 1058 | + INIT_LIST_HEAD(&buf->rb_all_mrs); |
---|
| 1059 | + INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); |
---|
1152 | 1060 | |
---|
1153 | 1061 | INIT_LIST_HEAD(&buf->rb_send_bufs); |
---|
1154 | 1062 | INIT_LIST_HEAD(&buf->rb_allreqs); |
---|
1155 | | - spin_lock_init(&buf->rb_reqslock); |
---|
1156 | | - for (i = 0; i < buf->rb_max_requests; i++) { |
---|
| 1063 | + INIT_LIST_HEAD(&buf->rb_all_reps); |
---|
| 1064 | + |
---|
| 1065 | + rc = -ENOMEM; |
---|
| 1066 | + for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { |
---|
1157 | 1067 | struct rpcrdma_req *req; |
---|
1158 | 1068 | |
---|
1159 | | - req = rpcrdma_create_req(r_xprt); |
---|
1160 | | - if (IS_ERR(req)) { |
---|
1161 | | - dprintk("RPC: %s: request buffer %d alloc" |
---|
1162 | | - " failed\n", __func__, i); |
---|
1163 | | - rc = PTR_ERR(req); |
---|
| 1069 | + req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, |
---|
| 1070 | + GFP_KERNEL); |
---|
| 1071 | + if (!req) |
---|
1164 | 1072 | goto out; |
---|
1165 | | - } |
---|
1166 | 1073 | list_add(&req->rl_list, &buf->rb_send_bufs); |
---|
1167 | 1074 | } |
---|
1168 | 1075 | |
---|
1169 | | - buf->rb_credits = 1; |
---|
1170 | | - buf->rb_posted_receives = 0; |
---|
1171 | | - INIT_LIST_HEAD(&buf->rb_recv_bufs); |
---|
1172 | | - |
---|
1173 | | - rc = rpcrdma_sendctxs_create(r_xprt); |
---|
1174 | | - if (rc) |
---|
1175 | | - goto out; |
---|
| 1076 | + init_llist_head(&buf->rb_free_reps); |
---|
1176 | 1077 | |
---|
1177 | 1078 | return 0; |
---|
1178 | 1079 | out: |
---|
.. | .. |
---|
1180 | 1081 | return rc; |
---|
1181 | 1082 | } |
---|
1182 | 1083 | |
---|
1183 | | -static void |
---|
1184 | | -rpcrdma_destroy_rep(struct rpcrdma_rep *rep) |
---|
| 1084 | +/** |
---|
| 1085 | + * rpcrdma_req_destroy - Destroy an rpcrdma_req object |
---|
| 1086 | + * @req: unused object to be destroyed |
---|
| 1087 | + * |
---|
| 1088 | + * Relies on caller holding the transport send lock to protect |
---|
| 1089 | + * removing req->rl_all from buf->rb_all_reqs safely. |
---|
| 1090 | + */ |
---|
| 1091 | +void rpcrdma_req_destroy(struct rpcrdma_req *req) |
---|
1185 | 1092 | { |
---|
1186 | | - rpcrdma_free_regbuf(rep->rr_rdmabuf); |
---|
1187 | | - kfree(rep); |
---|
1188 | | -} |
---|
| 1093 | + struct rpcrdma_mr *mr; |
---|
1189 | 1094 | |
---|
1190 | | -void |
---|
1191 | | -rpcrdma_destroy_req(struct rpcrdma_req *req) |
---|
1192 | | -{ |
---|
1193 | | - rpcrdma_free_regbuf(req->rl_recvbuf); |
---|
1194 | | - rpcrdma_free_regbuf(req->rl_sendbuf); |
---|
1195 | | - rpcrdma_free_regbuf(req->rl_rdmabuf); |
---|
| 1095 | + list_del(&req->rl_all); |
---|
| 1096 | + |
---|
| 1097 | + while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) { |
---|
| 1098 | + struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; |
---|
| 1099 | + |
---|
| 1100 | + spin_lock(&buf->rb_lock); |
---|
| 1101 | + list_del(&mr->mr_all); |
---|
| 1102 | + spin_unlock(&buf->rb_lock); |
---|
| 1103 | + |
---|
| 1104 | + frwr_release_mr(mr); |
---|
| 1105 | + } |
---|
| 1106 | + |
---|
| 1107 | + rpcrdma_regbuf_free(req->rl_recvbuf); |
---|
| 1108 | + rpcrdma_regbuf_free(req->rl_sendbuf); |
---|
| 1109 | + rpcrdma_regbuf_free(req->rl_rdmabuf); |
---|
1196 | 1110 | kfree(req); |
---|
1197 | 1111 | } |
---|
1198 | 1112 | |
---|
1199 | | -static void |
---|
1200 | | -rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) |
---|
| 1113 | +/** |
---|
| 1114 | + * rpcrdma_mrs_destroy - Release all of a transport's MRs |
---|
| 1115 | + * @r_xprt: controlling transport instance |
---|
| 1116 | + * |
---|
| 1117 | + * Relies on caller holding the transport send lock to protect |
---|
| 1118 | + * removing mr->mr_list from req->rl_free_mrs safely. |
---|
| 1119 | + */ |
---|
| 1120 | +static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt) |
---|
1201 | 1121 | { |
---|
1202 | | - struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, |
---|
1203 | | - rx_buf); |
---|
1204 | | - struct rpcrdma_ia *ia = rdmab_to_ia(buf); |
---|
| 1122 | + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
1205 | 1123 | struct rpcrdma_mr *mr; |
---|
1206 | | - unsigned int count; |
---|
1207 | 1124 | |
---|
1208 | | - count = 0; |
---|
1209 | | - spin_lock(&buf->rb_mrlock); |
---|
1210 | | - while (!list_empty(&buf->rb_all)) { |
---|
1211 | | - mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all); |
---|
| 1125 | + cancel_work_sync(&buf->rb_refresh_worker); |
---|
| 1126 | + |
---|
| 1127 | + spin_lock(&buf->rb_lock); |
---|
| 1128 | + while ((mr = list_first_entry_or_null(&buf->rb_all_mrs, |
---|
| 1129 | + struct rpcrdma_mr, |
---|
| 1130 | + mr_all)) != NULL) { |
---|
| 1131 | + list_del(&mr->mr_list); |
---|
1212 | 1132 | list_del(&mr->mr_all); |
---|
| 1133 | + spin_unlock(&buf->rb_lock); |
---|
1213 | 1134 | |
---|
1214 | | - spin_unlock(&buf->rb_mrlock); |
---|
| 1135 | + frwr_release_mr(mr); |
---|
1215 | 1136 | |
---|
1216 | | - /* Ensure MW is not on any rl_registered list */ |
---|
1217 | | - if (!list_empty(&mr->mr_list)) |
---|
1218 | | - list_del(&mr->mr_list); |
---|
1219 | | - |
---|
1220 | | - ia->ri_ops->ro_release_mr(mr); |
---|
1221 | | - count++; |
---|
1222 | | - spin_lock(&buf->rb_mrlock); |
---|
| 1137 | + spin_lock(&buf->rb_lock); |
---|
1223 | 1138 | } |
---|
1224 | | - spin_unlock(&buf->rb_mrlock); |
---|
1225 | | - r_xprt->rx_stats.mrs_allocated = 0; |
---|
1226 | | - |
---|
1227 | | - dprintk("RPC: %s: released %u MRs\n", __func__, count); |
---|
| 1139 | + spin_unlock(&buf->rb_lock); |
---|
1228 | 1140 | } |
---|
1229 | 1141 | |
---|
| 1142 | +/** |
---|
| 1143 | + * rpcrdma_buffer_destroy - Release all hw resources |
---|
| 1144 | + * @buf: root control block for resources |
---|
| 1145 | + * |
---|
| 1146 | + * ORDERING: relies on a prior rpcrdma_xprt_drain : |
---|
| 1147 | + * - No more Send or Receive completions can occur |
---|
| 1148 | + * - All MRs, reps, and reqs are returned to their free lists |
---|
| 1149 | + */ |
---|
1230 | 1150 | void |
---|
1231 | 1151 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) |
---|
1232 | 1152 | { |
---|
1233 | | - cancel_delayed_work_sync(&buf->rb_recovery_worker); |
---|
1234 | | - cancel_delayed_work_sync(&buf->rb_refresh_worker); |
---|
| 1153 | + rpcrdma_reps_destroy(buf); |
---|
1235 | 1154 | |
---|
1236 | | - rpcrdma_sendctxs_destroy(buf); |
---|
1237 | | - |
---|
1238 | | - while (!list_empty(&buf->rb_recv_bufs)) { |
---|
1239 | | - struct rpcrdma_rep *rep; |
---|
1240 | | - |
---|
1241 | | - rep = list_first_entry(&buf->rb_recv_bufs, |
---|
1242 | | - struct rpcrdma_rep, rr_list); |
---|
1243 | | - list_del(&rep->rr_list); |
---|
1244 | | - rpcrdma_destroy_rep(rep); |
---|
1245 | | - } |
---|
1246 | | - |
---|
1247 | | - spin_lock(&buf->rb_reqslock); |
---|
1248 | | - while (!list_empty(&buf->rb_allreqs)) { |
---|
| 1155 | + while (!list_empty(&buf->rb_send_bufs)) { |
---|
1249 | 1156 | struct rpcrdma_req *req; |
---|
1250 | 1157 | |
---|
1251 | | - req = list_first_entry(&buf->rb_allreqs, |
---|
1252 | | - struct rpcrdma_req, rl_all); |
---|
1253 | | - list_del(&req->rl_all); |
---|
1254 | | - |
---|
1255 | | - spin_unlock(&buf->rb_reqslock); |
---|
1256 | | - rpcrdma_destroy_req(req); |
---|
1257 | | - spin_lock(&buf->rb_reqslock); |
---|
| 1158 | + req = list_first_entry(&buf->rb_send_bufs, |
---|
| 1159 | + struct rpcrdma_req, rl_list); |
---|
| 1160 | + list_del(&req->rl_list); |
---|
| 1161 | + rpcrdma_req_destroy(req); |
---|
1258 | 1162 | } |
---|
1259 | | - spin_unlock(&buf->rb_reqslock); |
---|
1260 | | - |
---|
1261 | | - rpcrdma_mrs_destroy(buf); |
---|
1262 | 1163 | } |
---|
1263 | 1164 | |
---|
1264 | 1165 | /** |
---|
.. | .. |
---|
1272 | 1173 | rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) |
---|
1273 | 1174 | { |
---|
1274 | 1175 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
1275 | | - struct rpcrdma_mr *mr = NULL; |
---|
| 1176 | + struct rpcrdma_mr *mr; |
---|
1276 | 1177 | |
---|
1277 | | - spin_lock(&buf->rb_mrlock); |
---|
1278 | | - if (!list_empty(&buf->rb_mrs)) |
---|
1279 | | - mr = rpcrdma_mr_pop(&buf->rb_mrs); |
---|
1280 | | - spin_unlock(&buf->rb_mrlock); |
---|
1281 | | - |
---|
1282 | | - if (!mr) |
---|
1283 | | - goto out_nomrs; |
---|
| 1178 | + spin_lock(&buf->rb_lock); |
---|
| 1179 | + mr = rpcrdma_mr_pop(&buf->rb_mrs); |
---|
| 1180 | + spin_unlock(&buf->rb_lock); |
---|
1284 | 1181 | return mr; |
---|
1285 | | - |
---|
1286 | | -out_nomrs: |
---|
1287 | | - trace_xprtrdma_nomrs(r_xprt); |
---|
1288 | | - if (r_xprt->rx_ep.rep_connected != -ENODEV) |
---|
1289 | | - schedule_delayed_work(&buf->rb_refresh_worker, 0); |
---|
1290 | | - |
---|
1291 | | - /* Allow the reply handler and refresh worker to run */ |
---|
1292 | | - cond_resched(); |
---|
1293 | | - |
---|
1294 | | - return NULL; |
---|
1295 | | -} |
---|
1296 | | - |
---|
1297 | | -static void |
---|
1298 | | -__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr) |
---|
1299 | | -{ |
---|
1300 | | - spin_lock(&buf->rb_mrlock); |
---|
1301 | | - rpcrdma_mr_push(mr, &buf->rb_mrs); |
---|
1302 | | - spin_unlock(&buf->rb_mrlock); |
---|
1303 | 1182 | } |
---|
1304 | 1183 | |
---|
1305 | 1184 | /** |
---|
1306 | | - * rpcrdma_mr_put - Release an rpcrdma_mr object |
---|
1307 | | - * @mr: object to release |
---|
| 1185 | + * rpcrdma_mr_put - DMA unmap an MR and release it |
---|
| 1186 | + * @mr: MR to release |
---|
1308 | 1187 | * |
---|
1309 | 1188 | */ |
---|
1310 | | -void |
---|
1311 | | -rpcrdma_mr_put(struct rpcrdma_mr *mr) |
---|
1312 | | -{ |
---|
1313 | | - __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr); |
---|
1314 | | -} |
---|
1315 | | - |
---|
1316 | | -/** |
---|
1317 | | - * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it |
---|
1318 | | - * @mr: object to release |
---|
1319 | | - * |
---|
1320 | | - */ |
---|
1321 | | -void |
---|
1322 | | -rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) |
---|
| 1189 | +void rpcrdma_mr_put(struct rpcrdma_mr *mr) |
---|
1323 | 1190 | { |
---|
1324 | 1191 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
---|
1325 | 1192 | |
---|
1326 | | - trace_xprtrdma_dma_unmap(mr); |
---|
1327 | | - ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, |
---|
1328 | | - mr->mr_sg, mr->mr_nents, mr->mr_dir); |
---|
1329 | | - __rpcrdma_mr_put(&r_xprt->rx_buf, mr); |
---|
| 1193 | + if (mr->mr_dir != DMA_NONE) { |
---|
| 1194 | + trace_xprtrdma_mr_unmap(mr); |
---|
| 1195 | + ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device, |
---|
| 1196 | + mr->mr_sg, mr->mr_nents, mr->mr_dir); |
---|
| 1197 | + mr->mr_dir = DMA_NONE; |
---|
| 1198 | + } |
---|
| 1199 | + |
---|
| 1200 | + rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs); |
---|
| 1201 | +} |
---|
| 1202 | + |
---|
| 1203 | +/** |
---|
| 1204 | + * rpcrdma_reply_put - Put reply buffers back into pool |
---|
| 1205 | + * @buffers: buffer pool |
---|
| 1206 | + * @req: object to return |
---|
| 1207 | + * |
---|
| 1208 | + */ |
---|
| 1209 | +void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) |
---|
| 1210 | +{ |
---|
| 1211 | + if (req->rl_reply) { |
---|
| 1212 | + rpcrdma_rep_put(buffers, req->rl_reply); |
---|
| 1213 | + req->rl_reply = NULL; |
---|
| 1214 | + } |
---|
1330 | 1215 | } |
---|
1331 | 1216 | |
---|
1332 | 1217 | /** |
---|
.. | .. |
---|
1351 | 1236 | |
---|
1352 | 1237 | /** |
---|
1353 | 1238 | * rpcrdma_buffer_put - Put request/reply buffers back into pool |
---|
| 1239 | + * @buffers: buffer pool |
---|
1354 | 1240 | * @req: object to return |
---|
1355 | 1241 | * |
---|
1356 | 1242 | */ |
---|
1357 | | -void |
---|
1358 | | -rpcrdma_buffer_put(struct rpcrdma_req *req) |
---|
| 1243 | +void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) |
---|
1359 | 1244 | { |
---|
1360 | | - struct rpcrdma_buffer *buffers = req->rl_buffer; |
---|
1361 | | - struct rpcrdma_rep *rep = req->rl_reply; |
---|
1362 | | - |
---|
1363 | | - req->rl_reply = NULL; |
---|
| 1245 | + rpcrdma_reply_put(buffers, req); |
---|
1364 | 1246 | |
---|
1365 | 1247 | spin_lock(&buffers->rb_lock); |
---|
1366 | 1248 | list_add(&req->rl_list, &buffers->rb_send_bufs); |
---|
1367 | | - if (rep) { |
---|
1368 | | - if (!rep->rr_temp) { |
---|
1369 | | - list_add(&rep->rr_list, &buffers->rb_recv_bufs); |
---|
1370 | | - rep = NULL; |
---|
1371 | | - } |
---|
1372 | | - } |
---|
1373 | 1249 | spin_unlock(&buffers->rb_lock); |
---|
1374 | | - if (rep) |
---|
1375 | | - rpcrdma_destroy_rep(rep); |
---|
1376 | | -} |
---|
1377 | | - |
---|
1378 | | -/* |
---|
1379 | | - * Put reply buffers back into pool when not attached to |
---|
1380 | | - * request. This happens in error conditions. |
---|
1381 | | - */ |
---|
1382 | | -void |
---|
1383 | | -rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) |
---|
1384 | | -{ |
---|
1385 | | - struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; |
---|
1386 | | - |
---|
1387 | | - if (!rep->rr_temp) { |
---|
1388 | | - spin_lock(&buffers->rb_lock); |
---|
1389 | | - list_add(&rep->rr_list, &buffers->rb_recv_bufs); |
---|
1390 | | - spin_unlock(&buffers->rb_lock); |
---|
1391 | | - } else { |
---|
1392 | | - rpcrdma_destroy_rep(rep); |
---|
1393 | | - } |
---|
1394 | 1250 | } |
---|
1395 | 1251 | |
---|
1396 | 1252 | /** |
---|
1397 | | - * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers |
---|
1398 | | - * @size: size of buffer to be allocated, in bytes |
---|
1399 | | - * @direction: direction of data movement |
---|
1400 | | - * @flags: GFP flags |
---|
| 1253 | + * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list |
---|
| 1254 | + * @rep: rep to release |
---|
1401 | 1255 | * |
---|
1402 | | - * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that |
---|
1403 | | - * can be persistently DMA-mapped for I/O. |
---|
| 1256 | + * Used after error conditions. |
---|
| 1257 | + */ |
---|
| 1258 | +void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) |
---|
| 1259 | +{ |
---|
| 1260 | + rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep); |
---|
| 1261 | +} |
---|
| 1262 | + |
---|
| 1263 | +/* Returns a pointer to a rpcrdma_regbuf object, or NULL. |
---|
1404 | 1264 | * |
---|
1405 | 1265 | * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for |
---|
1406 | 1266 | * receiving the payload of RDMA RECV operations. During Long Calls |
---|
1407 | | - * or Replies they may be registered externally via ro_map. |
---|
| 1267 | + * or Replies they may be registered externally via frwr_map. |
---|
1408 | 1268 | */ |
---|
1409 | | -struct rpcrdma_regbuf * |
---|
1410 | | -rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, |
---|
| 1269 | +static struct rpcrdma_regbuf * |
---|
| 1270 | +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, |
---|
1411 | 1271 | gfp_t flags) |
---|
1412 | 1272 | { |
---|
1413 | 1273 | struct rpcrdma_regbuf *rb; |
---|
1414 | 1274 | |
---|
1415 | | - rb = kmalloc(sizeof(*rb) + size, flags); |
---|
1416 | | - if (rb == NULL) |
---|
1417 | | - return ERR_PTR(-ENOMEM); |
---|
| 1275 | + rb = kmalloc(sizeof(*rb), flags); |
---|
| 1276 | + if (!rb) |
---|
| 1277 | + return NULL; |
---|
| 1278 | + rb->rg_data = kmalloc(size, flags); |
---|
| 1279 | + if (!rb->rg_data) { |
---|
| 1280 | + kfree(rb); |
---|
| 1281 | + return NULL; |
---|
| 1282 | + } |
---|
1418 | 1283 | |
---|
1419 | 1284 | rb->rg_device = NULL; |
---|
1420 | 1285 | rb->rg_direction = direction; |
---|
1421 | 1286 | rb->rg_iov.length = size; |
---|
1422 | | - |
---|
1423 | 1287 | return rb; |
---|
1424 | 1288 | } |
---|
1425 | 1289 | |
---|
1426 | 1290 | /** |
---|
1427 | | - * __rpcrdma_map_regbuf - DMA-map a regbuf |
---|
1428 | | - * @ia: controlling rpcrdma_ia |
---|
1429 | | - * @rb: regbuf to be mapped |
---|
| 1291 | + * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer |
---|
| 1292 | + * @rb: regbuf to reallocate |
---|
| 1293 | + * @size: size of buffer to be allocated, in bytes |
---|
| 1294 | + * @flags: GFP flags |
---|
| 1295 | + * |
---|
| 1296 | + * Returns true if reallocation was successful. If false is |
---|
| 1297 | + * returned, @rb is left untouched. |
---|
1430 | 1298 | */ |
---|
1431 | | -bool |
---|
1432 | | -__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) |
---|
| 1299 | +bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) |
---|
1433 | 1300 | { |
---|
1434 | | - struct ib_device *device = ia->ri_device; |
---|
| 1301 | + void *buf; |
---|
| 1302 | + |
---|
| 1303 | + buf = kmalloc(size, flags); |
---|
| 1304 | + if (!buf) |
---|
| 1305 | + return false; |
---|
| 1306 | + |
---|
| 1307 | + rpcrdma_regbuf_dma_unmap(rb); |
---|
| 1308 | + kfree(rb->rg_data); |
---|
| 1309 | + |
---|
| 1310 | + rb->rg_data = buf; |
---|
| 1311 | + rb->rg_iov.length = size; |
---|
| 1312 | + return true; |
---|
| 1313 | +} |
---|
| 1314 | + |
---|
| 1315 | +/** |
---|
| 1316 | + * __rpcrdma_regbuf_dma_map - DMA-map a regbuf |
---|
| 1317 | + * @r_xprt: controlling transport instance |
---|
| 1318 | + * @rb: regbuf to be mapped |
---|
| 1319 | + * |
---|
| 1320 | + * Returns true if the buffer is now DMA mapped to @r_xprt's device |
---|
| 1321 | + */ |
---|
| 1322 | +bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, |
---|
| 1323 | + struct rpcrdma_regbuf *rb) |
---|
| 1324 | +{ |
---|
| 1325 | + struct ib_device *device = r_xprt->rx_ep->re_id->device; |
---|
1435 | 1326 | |
---|
1436 | 1327 | if (rb->rg_direction == DMA_NONE) |
---|
1437 | 1328 | return false; |
---|
1438 | 1329 | |
---|
1439 | | - rb->rg_iov.addr = ib_dma_map_single(device, |
---|
1440 | | - (void *)rb->rg_base, |
---|
1441 | | - rdmab_length(rb), |
---|
1442 | | - rb->rg_direction); |
---|
1443 | | - if (ib_dma_mapping_error(device, rdmab_addr(rb))) |
---|
| 1330 | + rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb), |
---|
| 1331 | + rdmab_length(rb), rb->rg_direction); |
---|
| 1332 | + if (ib_dma_mapping_error(device, rdmab_addr(rb))) { |
---|
| 1333 | + trace_xprtrdma_dma_maperr(rdmab_addr(rb)); |
---|
1444 | 1334 | return false; |
---|
| 1335 | + } |
---|
1445 | 1336 | |
---|
1446 | 1337 | rb->rg_device = device; |
---|
1447 | | - rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; |
---|
| 1338 | + rb->rg_iov.lkey = r_xprt->rx_ep->re_pd->local_dma_lkey; |
---|
1448 | 1339 | return true; |
---|
1449 | 1340 | } |
---|
1450 | 1341 | |
---|
1451 | | -static void |
---|
1452 | | -rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) |
---|
| 1342 | +static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb) |
---|
1453 | 1343 | { |
---|
1454 | 1344 | if (!rb) |
---|
1455 | 1345 | return; |
---|
.. | .. |
---|
1457 | 1347 | if (!rpcrdma_regbuf_is_mapped(rb)) |
---|
1458 | 1348 | return; |
---|
1459 | 1349 | |
---|
1460 | | - ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), |
---|
1461 | | - rdmab_length(rb), rb->rg_direction); |
---|
| 1350 | + ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb), |
---|
| 1351 | + rb->rg_direction); |
---|
1462 | 1352 | rb->rg_device = NULL; |
---|
1463 | 1353 | } |
---|
1464 | 1354 | |
---|
1465 | | -/** |
---|
1466 | | - * rpcrdma_free_regbuf - deregister and free registered buffer |
---|
1467 | | - * @rb: regbuf to be deregistered and freed |
---|
1468 | | - */ |
---|
1469 | | -void |
---|
1470 | | -rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) |
---|
| 1355 | +static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) |
---|
1471 | 1356 | { |
---|
1472 | | - rpcrdma_dma_unmap_regbuf(rb); |
---|
| 1357 | + rpcrdma_regbuf_dma_unmap(rb); |
---|
| 1358 | + if (rb) |
---|
| 1359 | + kfree(rb->rg_data); |
---|
1473 | 1360 | kfree(rb); |
---|
1474 | 1361 | } |
---|
1475 | 1362 | |
---|
1476 | | -/* |
---|
1477 | | - * Prepost any receive buffer, then post send. |
---|
| 1363 | +/** |
---|
| 1364 | + * rpcrdma_post_sends - Post WRs to a transport's Send Queue |
---|
| 1365 | + * @r_xprt: controlling transport instance |
---|
| 1366 | + * @req: rpcrdma_req containing the Send WR to post |
---|
1478 | 1367 | * |
---|
1479 | | - * Receive buffer is donated to hardware, reclaimed upon recv completion. |
---|
| 1368 | + * Returns 0 if the post was successful, otherwise -ENOTCONN |
---|
| 1369 | + * is returned. |
---|
1480 | 1370 | */ |
---|
1481 | | -int |
---|
1482 | | -rpcrdma_ep_post(struct rpcrdma_ia *ia, |
---|
1483 | | - struct rpcrdma_ep *ep, |
---|
1484 | | - struct rpcrdma_req *req) |
---|
| 1371 | +int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) |
---|
1485 | 1372 | { |
---|
1486 | | - struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; |
---|
| 1373 | + struct ib_send_wr *send_wr = &req->rl_wr; |
---|
| 1374 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
---|
1487 | 1375 | int rc; |
---|
1488 | 1376 | |
---|
1489 | | - if (!ep->rep_send_count || |
---|
1490 | | - test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { |
---|
| 1377 | + if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) { |
---|
1491 | 1378 | send_wr->send_flags |= IB_SEND_SIGNALED; |
---|
1492 | | - ep->rep_send_count = ep->rep_send_batch; |
---|
| 1379 | + ep->re_send_count = ep->re_send_batch; |
---|
1493 | 1380 | } else { |
---|
1494 | 1381 | send_wr->send_flags &= ~IB_SEND_SIGNALED; |
---|
1495 | | - --ep->rep_send_count; |
---|
| 1382 | + --ep->re_send_count; |
---|
1496 | 1383 | } |
---|
1497 | 1384 | |
---|
1498 | | - rc = ia->ri_ops->ro_send(ia, req); |
---|
1499 | | - trace_xprtrdma_post_send(req, rc); |
---|
| 1385 | + trace_xprtrdma_post_send(req); |
---|
| 1386 | + rc = frwr_send(r_xprt, req); |
---|
1500 | 1387 | if (rc) |
---|
1501 | 1388 | return -ENOTCONN; |
---|
1502 | 1389 | return 0; |
---|
1503 | 1390 | } |
---|
1504 | 1391 | |
---|
1505 | 1392 | /** |
---|
1506 | | - * rpcrdma_post_recvs - Maybe post some Receive buffers |
---|
1507 | | - * @r_xprt: controlling transport |
---|
1508 | | - * @temp: when true, allocate temp rpcrdma_rep objects |
---|
| 1393 | + * rpcrdma_post_recvs - Refill the Receive Queue |
---|
| 1394 | + * @r_xprt: controlling transport instance |
---|
| 1395 | + * @needed: current credit grant |
---|
| 1396 | + * @temp: mark Receive buffers to be deleted after one use |
---|
1509 | 1397 | * |
---|
1510 | 1398 | */ |
---|
1511 | | -void |
---|
1512 | | -rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) |
---|
| 1399 | +void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) |
---|
1513 | 1400 | { |
---|
1514 | 1401 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
---|
| 1402 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
---|
1515 | 1403 | struct ib_recv_wr *wr, *bad_wr; |
---|
1516 | | - int needed, count, rc; |
---|
| 1404 | + struct rpcrdma_rep *rep; |
---|
| 1405 | + int count, rc; |
---|
1517 | 1406 | |
---|
1518 | | - needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); |
---|
1519 | | - if (buf->rb_posted_receives > needed) |
---|
1520 | | - return; |
---|
1521 | | - needed -= buf->rb_posted_receives; |
---|
1522 | | - |
---|
| 1407 | + rc = 0; |
---|
1523 | 1408 | count = 0; |
---|
| 1409 | + |
---|
| 1410 | + if (likely(ep->re_receive_count > needed)) |
---|
| 1411 | + goto out; |
---|
| 1412 | + needed -= ep->re_receive_count; |
---|
| 1413 | + if (!temp) |
---|
| 1414 | + needed += RPCRDMA_MAX_RECV_BATCH; |
---|
| 1415 | + |
---|
| 1416 | + /* fast path: all needed reps can be found on the free list */ |
---|
1524 | 1417 | wr = NULL; |
---|
1525 | 1418 | while (needed) { |
---|
1526 | | - struct rpcrdma_regbuf *rb; |
---|
1527 | | - struct rpcrdma_rep *rep; |
---|
1528 | | - |
---|
1529 | | - spin_lock(&buf->rb_lock); |
---|
1530 | | - rep = list_first_entry_or_null(&buf->rb_recv_bufs, |
---|
1531 | | - struct rpcrdma_rep, rr_list); |
---|
1532 | | - if (likely(rep)) |
---|
1533 | | - list_del(&rep->rr_list); |
---|
1534 | | - spin_unlock(&buf->rb_lock); |
---|
1535 | | - if (!rep) { |
---|
1536 | | - if (rpcrdma_create_rep(r_xprt, temp)) |
---|
1537 | | - break; |
---|
| 1419 | + rep = rpcrdma_rep_get_locked(buf); |
---|
| 1420 | + if (rep && rep->rr_temp) { |
---|
| 1421 | + rpcrdma_rep_destroy(rep); |
---|
1538 | 1422 | continue; |
---|
1539 | 1423 | } |
---|
| 1424 | + if (!rep) |
---|
| 1425 | + rep = rpcrdma_rep_create(r_xprt, temp); |
---|
| 1426 | + if (!rep) |
---|
| 1427 | + break; |
---|
1540 | 1428 | |
---|
1541 | | - rb = rep->rr_rdmabuf; |
---|
1542 | | - if (!rpcrdma_regbuf_is_mapped(rb)) { |
---|
1543 | | - if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) { |
---|
1544 | | - rpcrdma_recv_buffer_put(rep); |
---|
1545 | | - break; |
---|
1546 | | - } |
---|
1547 | | - } |
---|
1548 | | - |
---|
1549 | | - trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); |
---|
| 1429 | + trace_xprtrdma_post_recv(rep); |
---|
1550 | 1430 | rep->rr_recv_wr.next = wr; |
---|
1551 | 1431 | wr = &rep->rr_recv_wr; |
---|
1552 | | - ++count; |
---|
1553 | 1432 | --needed; |
---|
| 1433 | + ++count; |
---|
1554 | 1434 | } |
---|
1555 | | - if (!count) |
---|
1556 | | - return; |
---|
| 1435 | + if (!wr) |
---|
| 1436 | + goto out; |
---|
1557 | 1437 | |
---|
1558 | | - rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, |
---|
| 1438 | + rc = ib_post_recv(ep->re_id->qp, wr, |
---|
1559 | 1439 | (const struct ib_recv_wr **)&bad_wr); |
---|
| 1440 | +out: |
---|
| 1441 | + trace_xprtrdma_post_recvs(r_xprt, count, rc); |
---|
1560 | 1442 | if (rc) { |
---|
1561 | 1443 | for (wr = bad_wr; wr;) { |
---|
1562 | 1444 | struct rpcrdma_rep *rep; |
---|
.. | .. |
---|
1567 | 1449 | --count; |
---|
1568 | 1450 | } |
---|
1569 | 1451 | } |
---|
1570 | | - buf->rb_posted_receives += count; |
---|
1571 | | - trace_xprtrdma_post_recvs(r_xprt, count, rc); |
---|
| 1452 | + ep->re_receive_count += count; |
---|
| 1453 | + return; |
---|
1572 | 1454 | } |
---|