.. | .. |
---|
106 | 106 | #include <rdma/rdma_cm.h> |
---|
107 | 107 | |
---|
108 | 108 | #include <linux/sunrpc/debug.h> |
---|
109 | | -#include <linux/sunrpc/rpc_rdma.h> |
---|
110 | 109 | #include <linux/sunrpc/svc_rdma.h> |
---|
111 | 110 | |
---|
112 | 111 | #include "xprt_rdma.h" |
---|
.. | .. |
---|
121 | 120 | { |
---|
122 | 121 | return list_first_entry_or_null(list, struct svc_rdma_send_ctxt, |
---|
123 | 122 | sc_list); |
---|
| 123 | +} |
---|
| 124 | + |
---|
| 125 | +static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, |
---|
| 126 | + struct rpc_rdma_cid *cid) |
---|
| 127 | +{ |
---|
| 128 | + cid->ci_queue_id = rdma->sc_sq_cq->res.id; |
---|
| 129 | + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); |
---|
124 | 130 | } |
---|
125 | 131 | |
---|
126 | 132 | static struct svc_rdma_send_ctxt * |
---|
.. | .. |
---|
145 | 151 | if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) |
---|
146 | 152 | goto fail2; |
---|
147 | 153 | |
---|
| 154 | + svc_rdma_send_cid_init(rdma, &ctxt->sc_cid); |
---|
| 155 | + |
---|
148 | 156 | ctxt->sc_send_wr.next = NULL; |
---|
149 | 157 | ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; |
---|
150 | 158 | ctxt->sc_send_wr.sg_list = ctxt->sc_sges; |
---|
151 | 159 | ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; |
---|
152 | 160 | ctxt->sc_cqe.done = svc_rdma_wc_send; |
---|
153 | 161 | ctxt->sc_xprt_buf = buffer; |
---|
| 162 | + xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, |
---|
| 163 | + rdma->sc_max_req_size); |
---|
154 | 164 | ctxt->sc_sges[0].addr = addr; |
---|
155 | 165 | |
---|
156 | 166 | for (i = 0; i < rdma->sc_max_send_sges; i++) |
---|
.. | .. |
---|
204 | 214 | spin_unlock(&rdma->sc_send_lock); |
---|
205 | 215 | |
---|
206 | 216 | out: |
---|
| 217 | + rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0); |
---|
| 218 | + xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf, |
---|
| 219 | + ctxt->sc_xprt_buf, NULL); |
---|
| 220 | + |
---|
207 | 221 | ctxt->sc_send_wr.num_sge = 0; |
---|
208 | 222 | ctxt->sc_cur_sge_no = 0; |
---|
209 | 223 | ctxt->sc_page_count = 0; |
---|
.. | .. |
---|
233 | 247 | /* The first SGE contains the transport header, which |
---|
234 | 248 | * remains mapped until @ctxt is destroyed. |
---|
235 | 249 | */ |
---|
236 | | - for (i = 1; i < ctxt->sc_send_wr.num_sge; i++) |
---|
| 250 | + for (i = 1; i < ctxt->sc_send_wr.num_sge; i++) { |
---|
237 | 251 | ib_dma_unmap_page(device, |
---|
238 | 252 | ctxt->sc_sges[i].addr, |
---|
239 | 253 | ctxt->sc_sges[i].length, |
---|
240 | 254 | DMA_TO_DEVICE); |
---|
| 255 | + trace_svcrdma_dma_unmap_page(rdma, |
---|
| 256 | + ctxt->sc_sges[i].addr, |
---|
| 257 | + ctxt->sc_sges[i].length); |
---|
| 258 | + } |
---|
241 | 259 | |
---|
242 | 260 | for (i = 0; i < ctxt->sc_page_count; ++i) |
---|
243 | 261 | put_page(ctxt->sc_pages[i]); |
---|
.. | .. |
---|
259 | 277 | { |
---|
260 | 278 | struct svcxprt_rdma *rdma = cq->cq_context; |
---|
261 | 279 | struct ib_cqe *cqe = wc->wr_cqe; |
---|
262 | | - struct svc_rdma_send_ctxt *ctxt; |
---|
| 280 | + struct svc_rdma_send_ctxt *ctxt = |
---|
| 281 | + container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); |
---|
263 | 282 | |
---|
264 | | - trace_svcrdma_wc_send(wc); |
---|
| 283 | + trace_svcrdma_wc_send(wc, &ctxt->sc_cid); |
---|
265 | 284 | |
---|
266 | 285 | atomic_inc(&rdma->sc_sq_avail); |
---|
267 | 286 | wake_up(&rdma->sc_send_wait); |
---|
268 | 287 | |
---|
269 | | - ctxt = container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); |
---|
270 | 288 | svc_rdma_send_ctxt_put(rdma, ctxt); |
---|
271 | 289 | |
---|
272 | 290 | if (unlikely(wc->status != IB_WC_SUCCESS)) { |
---|
273 | 291 | set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); |
---|
274 | 292 | svc_xprt_enqueue(&rdma->sc_xprt); |
---|
275 | | - if (wc->status != IB_WC_WR_FLUSH_ERR) |
---|
276 | | - pr_err("svcrdma: Send: %s (%u/0x%x)\n", |
---|
277 | | - ib_wc_status_msg(wc->status), |
---|
278 | | - wc->status, wc->vendor_err); |
---|
279 | 293 | } |
---|
280 | | - |
---|
281 | | - svc_xprt_put(&rdma->sc_xprt); |
---|
282 | 294 | } |
---|
283 | 295 | |
---|
284 | 296 | /** |
---|
285 | 297 | * svc_rdma_send - Post a single Send WR |
---|
286 | 298 | * @rdma: transport on which to post the WR |
---|
287 | | - * @wr: prepared Send WR to post |
---|
| 299 | + * @ctxt: send ctxt with a Send WR ready to post |
---|
288 | 300 | * |
---|
289 | 301 | * Returns zero the Send WR was posted successfully. Otherwise, a |
---|
290 | 302 | * negative errno is returned. |
---|
291 | 303 | */ |
---|
292 | | -int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) |
---|
| 304 | +int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) |
---|
293 | 305 | { |
---|
| 306 | + struct ib_send_wr *wr = &ctxt->sc_send_wr; |
---|
294 | 307 | int ret; |
---|
295 | 308 | |
---|
296 | 309 | might_sleep(); |
---|
| 310 | + |
---|
| 311 | + /* Sync the transport header buffer */ |
---|
| 312 | + ib_dma_sync_single_for_device(rdma->sc_pd->device, |
---|
| 313 | + wr->sg_list[0].addr, |
---|
| 314 | + wr->sg_list[0].length, |
---|
| 315 | + DMA_TO_DEVICE); |
---|
297 | 316 | |
---|
298 | 317 | /* If the SQ is full, wait until an SQ entry is available */ |
---|
299 | 318 | while (1) { |
---|
.. | .. |
---|
309 | 328 | continue; |
---|
310 | 329 | } |
---|
311 | 330 | |
---|
312 | | - svc_xprt_get(&rdma->sc_xprt); |
---|
313 | | - trace_svcrdma_post_send(wr); |
---|
| 331 | + trace_svcrdma_post_send(ctxt); |
---|
314 | 332 | ret = ib_post_send(rdma->sc_qp, wr, NULL); |
---|
315 | 333 | if (ret) |
---|
316 | 334 | break; |
---|
.. | .. |
---|
319 | 337 | |
---|
320 | 338 | trace_svcrdma_sq_post_err(rdma, ret); |
---|
321 | 339 | set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); |
---|
322 | | - svc_xprt_put(&rdma->sc_xprt); |
---|
323 | 340 | wake_up(&rdma->sc_send_wait); |
---|
324 | 341 | return ret; |
---|
325 | 342 | } |
---|
326 | 343 | |
---|
327 | | -static u32 xdr_padsize(u32 len) |
---|
328 | | -{ |
---|
329 | | - return (len & 3) ? (4 - (len & 3)) : 0; |
---|
330 | | -} |
---|
331 | | - |
---|
332 | | -/* Returns length of transport header, in bytes. |
---|
333 | | - */ |
---|
334 | | -static unsigned int svc_rdma_reply_hdr_len(__be32 *rdma_resp) |
---|
335 | | -{ |
---|
336 | | - unsigned int nsegs; |
---|
337 | | - __be32 *p; |
---|
338 | | - |
---|
339 | | - p = rdma_resp; |
---|
340 | | - |
---|
341 | | - /* RPC-over-RDMA V1 replies never have a Read list. */ |
---|
342 | | - p += rpcrdma_fixed_maxsz + 1; |
---|
343 | | - |
---|
344 | | - /* Skip Write list. */ |
---|
345 | | - while (*p++ != xdr_zero) { |
---|
346 | | - nsegs = be32_to_cpup(p++); |
---|
347 | | - p += nsegs * rpcrdma_segment_maxsz; |
---|
348 | | - } |
---|
349 | | - |
---|
350 | | - /* Skip Reply chunk. */ |
---|
351 | | - if (*p++ != xdr_zero) { |
---|
352 | | - nsegs = be32_to_cpup(p++); |
---|
353 | | - p += nsegs * rpcrdma_segment_maxsz; |
---|
354 | | - } |
---|
355 | | - |
---|
356 | | - return (unsigned long)p - (unsigned long)rdma_resp; |
---|
357 | | -} |
---|
358 | | - |
---|
359 | | -/* One Write chunk is copied from Call transport header to Reply |
---|
360 | | - * transport header. Each segment's length field is updated to |
---|
361 | | - * reflect number of bytes consumed in the segment. |
---|
| 344 | +/** |
---|
| 345 | + * svc_rdma_encode_read_list - Encode RPC Reply's Read chunk list |
---|
| 346 | + * @sctxt: Send context for the RPC Reply |
---|
362 | 347 | * |
---|
363 | | - * Returns number of segments in this chunk. |
---|
| 348 | + * Return values: |
---|
| 349 | + * On success, returns length in bytes of the Reply XDR buffer |
---|
| 350 | + * that was consumed by the Reply Read list |
---|
| 351 | + * %-EMSGSIZE on XDR buffer overflow |
---|
364 | 352 | */ |
---|
365 | | -static unsigned int xdr_encode_write_chunk(__be32 *dst, __be32 *src, |
---|
| 353 | +static ssize_t svc_rdma_encode_read_list(struct svc_rdma_send_ctxt *sctxt) |
---|
| 354 | +{ |
---|
| 355 | + /* RPC-over-RDMA version 1 replies never have a Read list. */ |
---|
| 356 | + return xdr_stream_encode_item_absent(&sctxt->sc_stream); |
---|
| 357 | +} |
---|
| 358 | + |
---|
| 359 | +/** |
---|
| 360 | + * svc_rdma_encode_write_segment - Encode one Write segment |
---|
| 361 | + * @src: matching Write chunk in the RPC Call header |
---|
| 362 | + * @sctxt: Send context for the RPC Reply |
---|
| 363 | + * @remaining: remaining bytes of the payload left in the Write chunk |
---|
| 364 | + * |
---|
| 365 | + * Return values: |
---|
| 366 | + * On success, returns length in bytes of the Reply XDR buffer |
---|
| 367 | + * that was consumed by the Write segment |
---|
| 368 | + * %-EMSGSIZE on XDR buffer overflow |
---|
| 369 | + */ |
---|
| 370 | +static ssize_t svc_rdma_encode_write_segment(__be32 *src, |
---|
| 371 | + struct svc_rdma_send_ctxt *sctxt, |
---|
| 372 | + unsigned int *remaining) |
---|
| 373 | +{ |
---|
| 374 | + __be32 *p; |
---|
| 375 | + const size_t len = rpcrdma_segment_maxsz * sizeof(*p); |
---|
| 376 | + u32 handle, length; |
---|
| 377 | + u64 offset; |
---|
| 378 | + |
---|
| 379 | + p = xdr_reserve_space(&sctxt->sc_stream, len); |
---|
| 380 | + if (!p) |
---|
| 381 | + return -EMSGSIZE; |
---|
| 382 | + |
---|
| 383 | + xdr_decode_rdma_segment(src, &handle, &length, &offset); |
---|
| 384 | + |
---|
| 385 | + if (*remaining < length) { |
---|
| 386 | + /* segment only partly filled */ |
---|
| 387 | + length = *remaining; |
---|
| 388 | + *remaining = 0; |
---|
| 389 | + } else { |
---|
| 390 | + /* entire segment was consumed */ |
---|
| 391 | + *remaining -= length; |
---|
| 392 | + } |
---|
| 393 | + xdr_encode_rdma_segment(p, handle, length, offset); |
---|
| 394 | + |
---|
| 395 | + trace_svcrdma_encode_wseg(handle, length, offset); |
---|
| 396 | + return len; |
---|
| 397 | +} |
---|
| 398 | + |
---|
| 399 | +/** |
---|
| 400 | + * svc_rdma_encode_write_chunk - Encode one Write chunk |
---|
| 401 | + * @src: matching Write chunk in the RPC Call header |
---|
| 402 | + * @sctxt: Send context for the RPC Reply |
---|
| 403 | + * @remaining: size in bytes of the payload in the Write chunk |
---|
| 404 | + * |
---|
| 405 | + * Copy a Write chunk from the Call transport header to the |
---|
| 406 | + * Reply transport header. Update each segment's length field |
---|
| 407 | + * to reflect the number of bytes written in that segment. |
---|
| 408 | + * |
---|
| 409 | + * Return values: |
---|
| 410 | + * On success, returns length in bytes of the Reply XDR buffer |
---|
| 411 | + * that was consumed by the Write chunk |
---|
| 412 | + * %-EMSGSIZE on XDR buffer overflow |
---|
| 413 | + */ |
---|
| 414 | +static ssize_t svc_rdma_encode_write_chunk(__be32 *src, |
---|
| 415 | + struct svc_rdma_send_ctxt *sctxt, |
---|
366 | 416 | unsigned int remaining) |
---|
367 | 417 | { |
---|
368 | 418 | unsigned int i, nsegs; |
---|
369 | | - u32 seg_len; |
---|
| 419 | + ssize_t len, ret; |
---|
370 | 420 | |
---|
371 | | - /* Write list discriminator */ |
---|
372 | | - *dst++ = *src++; |
---|
| 421 | + len = 0; |
---|
| 422 | + trace_svcrdma_encode_write_chunk(remaining); |
---|
373 | 423 | |
---|
374 | | - /* number of segments in this chunk */ |
---|
375 | | - nsegs = be32_to_cpup(src); |
---|
376 | | - *dst++ = *src++; |
---|
| 424 | + src++; |
---|
| 425 | + ret = xdr_stream_encode_item_present(&sctxt->sc_stream); |
---|
| 426 | + if (ret < 0) |
---|
| 427 | + return -EMSGSIZE; |
---|
| 428 | + len += ret; |
---|
| 429 | + |
---|
| 430 | + nsegs = be32_to_cpup(src++); |
---|
| 431 | + ret = xdr_stream_encode_u32(&sctxt->sc_stream, nsegs); |
---|
| 432 | + if (ret < 0) |
---|
| 433 | + return -EMSGSIZE; |
---|
| 434 | + len += ret; |
---|
377 | 435 | |
---|
378 | 436 | for (i = nsegs; i; i--) { |
---|
379 | | - /* segment's RDMA handle */ |
---|
380 | | - *dst++ = *src++; |
---|
381 | | - |
---|
382 | | - /* bytes returned in this segment */ |
---|
383 | | - seg_len = be32_to_cpu(*src); |
---|
384 | | - if (remaining >= seg_len) { |
---|
385 | | - /* entire segment was consumed */ |
---|
386 | | - *dst = *src; |
---|
387 | | - remaining -= seg_len; |
---|
388 | | - } else { |
---|
389 | | - /* segment only partly filled */ |
---|
390 | | - *dst = cpu_to_be32(remaining); |
---|
391 | | - remaining = 0; |
---|
392 | | - } |
---|
393 | | - dst++; src++; |
---|
394 | | - |
---|
395 | | - /* segment's RDMA offset */ |
---|
396 | | - *dst++ = *src++; |
---|
397 | | - *dst++ = *src++; |
---|
| 437 | + ret = svc_rdma_encode_write_segment(src, sctxt, &remaining); |
---|
| 438 | + if (ret < 0) |
---|
| 439 | + return -EMSGSIZE; |
---|
| 440 | + src += rpcrdma_segment_maxsz; |
---|
| 441 | + len += ret; |
---|
398 | 442 | } |
---|
399 | 443 | |
---|
400 | | - return nsegs; |
---|
| 444 | + return len; |
---|
401 | 445 | } |
---|
402 | 446 | |
---|
403 | | -/* The client provided a Write list in the Call message. Fill in |
---|
404 | | - * the segments in the first Write chunk in the Reply's transport |
---|
| 447 | +/** |
---|
| 448 | + * svc_rdma_encode_write_list - Encode RPC Reply's Write chunk list |
---|
| 449 | + * @rctxt: Reply context with information about the RPC Call |
---|
| 450 | + * @sctxt: Send context for the RPC Reply |
---|
| 451 | + * @length: size in bytes of the payload in the first Write chunk |
---|
| 452 | + * |
---|
| 453 | + * The client provides a Write chunk list in the Call message. Fill |
---|
| 454 | + * in the segments in the first Write chunk in the Reply's transport |
---|
405 | 455 | * header with the number of bytes consumed in each segment. |
---|
406 | 456 | * Remaining chunks are returned unused. |
---|
407 | 457 | * |
---|
408 | 458 | * Assumptions: |
---|
409 | 459 | * - Client has provided only one Write chunk |
---|
| 460 | + * |
---|
| 461 | + * Return values: |
---|
| 462 | + * On success, returns length in bytes of the Reply XDR buffer |
---|
| 463 | + * that was consumed by the Reply's Write list |
---|
| 464 | + * %-EMSGSIZE on XDR buffer overflow |
---|
410 | 465 | */ |
---|
411 | | -static void svc_rdma_xdr_encode_write_list(__be32 *rdma_resp, __be32 *wr_ch, |
---|
412 | | - unsigned int consumed) |
---|
| 466 | +static ssize_t |
---|
| 467 | +svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt, |
---|
| 468 | + struct svc_rdma_send_ctxt *sctxt, |
---|
| 469 | + unsigned int length) |
---|
413 | 470 | { |
---|
414 | | - unsigned int nsegs; |
---|
415 | | - __be32 *p, *q; |
---|
| 471 | + ssize_t len, ret; |
---|
416 | 472 | |
---|
417 | | - /* RPC-over-RDMA V1 replies never have a Read list. */ |
---|
418 | | - p = rdma_resp + rpcrdma_fixed_maxsz + 1; |
---|
| 473 | + ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, length); |
---|
| 474 | + if (ret < 0) |
---|
| 475 | + return ret; |
---|
| 476 | + len = ret; |
---|
419 | 477 | |
---|
420 | | - q = wr_ch; |
---|
421 | | - while (*q != xdr_zero) { |
---|
422 | | - nsegs = xdr_encode_write_chunk(p, q, consumed); |
---|
423 | | - q += 2 + nsegs * rpcrdma_segment_maxsz; |
---|
424 | | - p += 2 + nsegs * rpcrdma_segment_maxsz; |
---|
425 | | - consumed = 0; |
---|
426 | | - } |
---|
| 478 | + /* Terminate the Write list */ |
---|
| 479 | + ret = xdr_stream_encode_item_absent(&sctxt->sc_stream); |
---|
| 480 | + if (ret < 0) |
---|
| 481 | + return ret; |
---|
427 | 482 | |
---|
428 | | - /* Terminate Write list */ |
---|
429 | | - *p++ = xdr_zero; |
---|
430 | | - |
---|
431 | | - /* Reply chunk discriminator; may be replaced later */ |
---|
432 | | - *p = xdr_zero; |
---|
| 483 | + return len + ret; |
---|
433 | 484 | } |
---|
434 | 485 | |
---|
435 | | -/* The client provided a Reply chunk in the Call message. Fill in |
---|
436 | | - * the segments in the Reply chunk in the Reply message with the |
---|
437 | | - * number of bytes consumed in each segment. |
---|
| 486 | +/** |
---|
| 487 | + * svc_rdma_encode_reply_chunk - Encode RPC Reply's Reply chunk |
---|
| 488 | + * @rctxt: Reply context with information about the RPC Call |
---|
| 489 | + * @sctxt: Send context for the RPC Reply |
---|
| 490 | + * @length: size in bytes of the payload in the Reply chunk |
---|
438 | 491 | * |
---|
439 | 492 | * Assumptions: |
---|
440 | | - * - Reply can always fit in the provided Reply chunk |
---|
441 | | - */ |
---|
442 | | -static void svc_rdma_xdr_encode_reply_chunk(__be32 *rdma_resp, __be32 *rp_ch, |
---|
443 | | - unsigned int consumed) |
---|
444 | | -{ |
---|
445 | | - __be32 *p; |
---|
446 | | - |
---|
447 | | - /* Find the Reply chunk in the Reply's xprt header. |
---|
448 | | - * RPC-over-RDMA V1 replies never have a Read list. |
---|
449 | | - */ |
---|
450 | | - p = rdma_resp + rpcrdma_fixed_maxsz + 1; |
---|
451 | | - |
---|
452 | | - /* Skip past Write list */ |
---|
453 | | - while (*p++ != xdr_zero) |
---|
454 | | - p += 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz; |
---|
455 | | - |
---|
456 | | - xdr_encode_write_chunk(p, rp_ch, consumed); |
---|
457 | | -} |
---|
458 | | - |
---|
459 | | -/* Parse the RPC Call's transport header. |
---|
460 | | - */ |
---|
461 | | -static void svc_rdma_get_write_arrays(__be32 *rdma_argp, |
---|
462 | | - __be32 **write, __be32 **reply) |
---|
463 | | -{ |
---|
464 | | - __be32 *p; |
---|
465 | | - |
---|
466 | | - p = rdma_argp + rpcrdma_fixed_maxsz; |
---|
467 | | - |
---|
468 | | - /* Read list */ |
---|
469 | | - while (*p++ != xdr_zero) |
---|
470 | | - p += 5; |
---|
471 | | - |
---|
472 | | - /* Write list */ |
---|
473 | | - if (*p != xdr_zero) { |
---|
474 | | - *write = p; |
---|
475 | | - while (*p++ != xdr_zero) |
---|
476 | | - p += 1 + be32_to_cpu(*p) * 4; |
---|
477 | | - } else { |
---|
478 | | - *write = NULL; |
---|
479 | | - p++; |
---|
480 | | - } |
---|
481 | | - |
---|
482 | | - /* Reply chunk */ |
---|
483 | | - if (*p != xdr_zero) |
---|
484 | | - *reply = p; |
---|
485 | | - else |
---|
486 | | - *reply = NULL; |
---|
487 | | -} |
---|
488 | | - |
---|
489 | | -/* RPC-over-RDMA Version One private extension: Remote Invalidation. |
---|
490 | | - * Responder's choice: requester signals it can handle Send With |
---|
491 | | - * Invalidate, and responder chooses one rkey to invalidate. |
---|
| 493 | + * - Reply can always fit in the client-provided Reply chunk |
---|
492 | 494 | * |
---|
493 | | - * Find a candidate rkey to invalidate when sending a reply. Picks the |
---|
494 | | - * first R_key it finds in the chunk lists. |
---|
495 | | - * |
---|
496 | | - * Returns zero if RPC's chunk lists are empty. |
---|
| 495 | + * Return values: |
---|
| 496 | + * On success, returns length in bytes of the Reply XDR buffer |
---|
| 497 | + * that was consumed by the Reply's Reply chunk |
---|
| 498 | + * %-EMSGSIZE on XDR buffer overflow |
---|
497 | 499 | */ |
---|
498 | | -static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp, |
---|
499 | | - __be32 *wr_lst, __be32 *rp_ch) |
---|
| 500 | +static ssize_t |
---|
| 501 | +svc_rdma_encode_reply_chunk(const struct svc_rdma_recv_ctxt *rctxt, |
---|
| 502 | + struct svc_rdma_send_ctxt *sctxt, |
---|
| 503 | + unsigned int length) |
---|
500 | 504 | { |
---|
501 | | - __be32 *p; |
---|
502 | | - |
---|
503 | | - p = rdma_argp + rpcrdma_fixed_maxsz; |
---|
504 | | - if (*p != xdr_zero) |
---|
505 | | - p += 2; |
---|
506 | | - else if (wr_lst && be32_to_cpup(wr_lst + 1)) |
---|
507 | | - p = wr_lst + 2; |
---|
508 | | - else if (rp_ch && be32_to_cpup(rp_ch + 1)) |
---|
509 | | - p = rp_ch + 2; |
---|
510 | | - else |
---|
511 | | - return 0; |
---|
512 | | - return be32_to_cpup(p); |
---|
| 505 | + return svc_rdma_encode_write_chunk(rctxt->rc_reply_chunk, sctxt, |
---|
| 506 | + length); |
---|
513 | 507 | } |
---|
514 | 508 | |
---|
515 | 509 | static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, |
---|
.. | .. |
---|
522 | 516 | dma_addr_t dma_addr; |
---|
523 | 517 | |
---|
524 | 518 | dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE); |
---|
| 519 | + trace_svcrdma_dma_map_page(rdma, dma_addr, len); |
---|
525 | 520 | if (ib_dma_mapping_error(dev, dma_addr)) |
---|
526 | 521 | goto out_maperr; |
---|
527 | 522 | |
---|
.. | .. |
---|
531 | 526 | return 0; |
---|
532 | 527 | |
---|
533 | 528 | out_maperr: |
---|
534 | | - trace_svcrdma_dma_map_page(rdma, page); |
---|
535 | 529 | return -EIO; |
---|
536 | 530 | } |
---|
537 | 531 | |
---|
.. | .. |
---|
548 | 542 | } |
---|
549 | 543 | |
---|
550 | 544 | /** |
---|
551 | | - * svc_rdma_sync_reply_hdr - DMA sync the transport header buffer |
---|
| 545 | + * svc_rdma_pull_up_needed - Determine whether to use pull-up |
---|
552 | 546 | * @rdma: controlling transport |
---|
553 | | - * @ctxt: send_ctxt for the Send WR |
---|
554 | | - * @len: length of transport header |
---|
| 547 | + * @sctxt: send_ctxt for the Send WR |
---|
| 548 | + * @rctxt: Write and Reply chunks provided by client |
---|
| 549 | + * @xdr: xdr_buf containing RPC message to transmit |
---|
555 | 550 | * |
---|
556 | | - */ |
---|
557 | | -void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, |
---|
558 | | - struct svc_rdma_send_ctxt *ctxt, |
---|
559 | | - unsigned int len) |
---|
560 | | -{ |
---|
561 | | - ctxt->sc_sges[0].length = len; |
---|
562 | | - ctxt->sc_send_wr.num_sge++; |
---|
563 | | - ib_dma_sync_single_for_device(rdma->sc_pd->device, |
---|
564 | | - ctxt->sc_sges[0].addr, len, |
---|
565 | | - DMA_TO_DEVICE); |
---|
566 | | -} |
---|
567 | | - |
---|
568 | | -/* If the xdr_buf has more elements than the device can |
---|
569 | | - * transmit in a single RDMA Send, then the reply will |
---|
570 | | - * have to be copied into a bounce buffer. |
---|
| 551 | + * Returns: |
---|
| 552 | + * %true if pull-up must be used |
---|
| 553 | + * %false otherwise |
---|
571 | 554 | */ |
---|
572 | 555 | static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, |
---|
573 | | - struct xdr_buf *xdr, |
---|
574 | | - __be32 *wr_lst) |
---|
| 556 | + struct svc_rdma_send_ctxt *sctxt, |
---|
| 557 | + const struct svc_rdma_recv_ctxt *rctxt, |
---|
| 558 | + struct xdr_buf *xdr) |
---|
575 | 559 | { |
---|
576 | 560 | int elements; |
---|
577 | 561 | |
---|
| 562 | + /* For small messages, copying bytes is cheaper than DMA mapping. |
---|
| 563 | + */ |
---|
| 564 | + if (sctxt->sc_hdrbuf.len + xdr->len < RPCRDMA_PULLUP_THRESH) |
---|
| 565 | + return true; |
---|
| 566 | + |
---|
| 567 | + /* Check whether the xdr_buf has more elements than can |
---|
| 568 | + * fit in a single RDMA Send. |
---|
| 569 | + */ |
---|
578 | 570 | /* xdr->head */ |
---|
579 | 571 | elements = 1; |
---|
580 | 572 | |
---|
581 | 573 | /* xdr->pages */ |
---|
582 | | - if (!wr_lst) { |
---|
| 574 | + if (!rctxt || !rctxt->rc_write_list) { |
---|
583 | 575 | unsigned int remaining; |
---|
584 | 576 | unsigned long pageoff; |
---|
585 | 577 | |
---|
.. | .. |
---|
601 | 593 | return elements >= rdma->sc_max_send_sges; |
---|
602 | 594 | } |
---|
603 | 595 | |
---|
604 | | -/* The device is not capable of sending the reply directly. |
---|
605 | | - * Assemble the elements of @xdr into the transport header |
---|
606 | | - * buffer. |
---|
| 596 | +/** |
---|
| 597 | + * svc_rdma_pull_up_reply_msg - Copy Reply into a single buffer |
---|
| 598 | + * @rdma: controlling transport |
---|
| 599 | + * @sctxt: send_ctxt for the Send WR; xprt hdr is already prepared |
---|
| 600 | + * @rctxt: Write and Reply chunks provided by client |
---|
| 601 | + * @xdr: prepared xdr_buf containing RPC message |
---|
| 602 | + * |
---|
| 603 | + * The device is not capable of sending the reply directly. |
---|
| 604 | + * Assemble the elements of @xdr into the transport header buffer. |
---|
| 605 | + * |
---|
| 606 | + * Returns zero on success, or a negative errno on failure. |
---|
607 | 607 | */ |
---|
608 | 608 | static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, |
---|
609 | | - struct svc_rdma_send_ctxt *ctxt, |
---|
610 | | - struct xdr_buf *xdr, __be32 *wr_lst) |
---|
| 609 | + struct svc_rdma_send_ctxt *sctxt, |
---|
| 610 | + const struct svc_rdma_recv_ctxt *rctxt, |
---|
| 611 | + const struct xdr_buf *xdr) |
---|
611 | 612 | { |
---|
612 | 613 | unsigned char *dst, *tailbase; |
---|
613 | 614 | unsigned int taillen; |
---|
614 | 615 | |
---|
615 | | - dst = ctxt->sc_xprt_buf; |
---|
616 | | - dst += ctxt->sc_sges[0].length; |
---|
617 | | - |
---|
| 616 | + dst = sctxt->sc_xprt_buf + sctxt->sc_hdrbuf.len; |
---|
618 | 617 | memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len); |
---|
619 | 618 | dst += xdr->head[0].iov_len; |
---|
620 | 619 | |
---|
621 | 620 | tailbase = xdr->tail[0].iov_base; |
---|
622 | 621 | taillen = xdr->tail[0].iov_len; |
---|
623 | | - if (wr_lst) { |
---|
| 622 | + if (rctxt && rctxt->rc_write_list) { |
---|
624 | 623 | u32 xdrpad; |
---|
625 | 624 | |
---|
626 | | - xdrpad = xdr_padsize(xdr->page_len); |
---|
| 625 | + xdrpad = xdr_pad_size(xdr->page_len); |
---|
627 | 626 | if (taillen && xdrpad) { |
---|
628 | 627 | tailbase += xdrpad; |
---|
629 | 628 | taillen -= xdrpad; |
---|
.. | .. |
---|
650 | 649 | if (taillen) |
---|
651 | 650 | memcpy(dst, tailbase, taillen); |
---|
652 | 651 | |
---|
653 | | - ctxt->sc_sges[0].length += xdr->len; |
---|
654 | | - ib_dma_sync_single_for_device(rdma->sc_pd->device, |
---|
655 | | - ctxt->sc_sges[0].addr, |
---|
656 | | - ctxt->sc_sges[0].length, |
---|
657 | | - DMA_TO_DEVICE); |
---|
658 | | - |
---|
| 652 | + sctxt->sc_sges[0].length += xdr->len; |
---|
| 653 | + trace_svcrdma_send_pullup(sctxt->sc_sges[0].length); |
---|
659 | 654 | return 0; |
---|
660 | 655 | } |
---|
661 | 656 | |
---|
662 | | -/* svc_rdma_map_reply_msg - Map the buffer holding RPC message |
---|
| 657 | +/* svc_rdma_map_reply_msg - DMA map the buffer holding RPC message |
---|
663 | 658 | * @rdma: controlling transport |
---|
664 | | - * @ctxt: send_ctxt for the Send WR |
---|
| 659 | + * @sctxt: send_ctxt for the Send WR |
---|
| 660 | + * @rctxt: Write and Reply chunks provided by client |
---|
665 | 661 | * @xdr: prepared xdr_buf containing RPC message |
---|
666 | | - * @wr_lst: pointer to Call header's Write list, or NULL |
---|
667 | 662 | * |
---|
668 | 663 | * Load the xdr_buf into the ctxt's sge array, and DMA map each |
---|
669 | | - * element as it is added. |
---|
| 664 | + * element as it is added. The Send WR's num_sge field is set. |
---|
670 | 665 | * |
---|
671 | 666 | * Returns zero on success, or a negative errno on failure. |
---|
672 | 667 | */ |
---|
673 | 668 | int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, |
---|
674 | | - struct svc_rdma_send_ctxt *ctxt, |
---|
675 | | - struct xdr_buf *xdr, __be32 *wr_lst) |
---|
| 669 | + struct svc_rdma_send_ctxt *sctxt, |
---|
| 670 | + const struct svc_rdma_recv_ctxt *rctxt, |
---|
| 671 | + struct xdr_buf *xdr) |
---|
676 | 672 | { |
---|
677 | 673 | unsigned int len, remaining; |
---|
678 | 674 | unsigned long page_off; |
---|
.. | .. |
---|
681 | 677 | u32 xdr_pad; |
---|
682 | 678 | int ret; |
---|
683 | 679 | |
---|
684 | | - if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst)) |
---|
685 | | - return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst); |
---|
| 680 | + /* Set up the (persistently-mapped) transport header SGE. */ |
---|
| 681 | + sctxt->sc_send_wr.num_sge = 1; |
---|
| 682 | + sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; |
---|
686 | 683 | |
---|
687 | | - ++ctxt->sc_cur_sge_no; |
---|
688 | | - ret = svc_rdma_dma_map_buf(rdma, ctxt, |
---|
| 684 | + /* If there is a Reply chunk, nothing follows the transport |
---|
| 685 | + * header, and we're done here. |
---|
| 686 | + */ |
---|
| 687 | + if (rctxt && rctxt->rc_reply_chunk) |
---|
| 688 | + return 0; |
---|
| 689 | + |
---|
| 690 | + /* For pull-up, svc_rdma_send() will sync the transport header. |
---|
| 691 | + * No additional DMA mapping is necessary. |
---|
| 692 | + */ |
---|
| 693 | + if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr)) |
---|
| 694 | + return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr); |
---|
| 695 | + |
---|
| 696 | + ++sctxt->sc_cur_sge_no; |
---|
| 697 | + ret = svc_rdma_dma_map_buf(rdma, sctxt, |
---|
689 | 698 | xdr->head[0].iov_base, |
---|
690 | 699 | xdr->head[0].iov_len); |
---|
691 | 700 | if (ret < 0) |
---|
.. | .. |
---|
696 | 705 | * have added XDR padding in the tail buffer, and that |
---|
697 | 706 | * should not be included inline. |
---|
698 | 707 | */ |
---|
699 | | - if (wr_lst) { |
---|
| 708 | + if (rctxt && rctxt->rc_write_list) { |
---|
700 | 709 | base = xdr->tail[0].iov_base; |
---|
701 | 710 | len = xdr->tail[0].iov_len; |
---|
702 | | - xdr_pad = xdr_padsize(xdr->page_len); |
---|
| 711 | + xdr_pad = xdr_pad_size(xdr->page_len); |
---|
703 | 712 | |
---|
704 | 713 | if (len && xdr_pad) { |
---|
705 | 714 | base += xdr_pad; |
---|
.. | .. |
---|
715 | 724 | while (remaining) { |
---|
716 | 725 | len = min_t(u32, PAGE_SIZE - page_off, remaining); |
---|
717 | 726 | |
---|
718 | | - ++ctxt->sc_cur_sge_no; |
---|
719 | | - ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++, |
---|
| 727 | + ++sctxt->sc_cur_sge_no; |
---|
| 728 | + ret = svc_rdma_dma_map_page(rdma, sctxt, *ppages++, |
---|
720 | 729 | page_off, len); |
---|
721 | 730 | if (ret < 0) |
---|
722 | 731 | return ret; |
---|
.. | .. |
---|
729 | 738 | len = xdr->tail[0].iov_len; |
---|
730 | 739 | tail: |
---|
731 | 740 | if (len) { |
---|
732 | | - ++ctxt->sc_cur_sge_no; |
---|
733 | | - ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len); |
---|
| 741 | + ++sctxt->sc_cur_sge_no; |
---|
| 742 | + ret = svc_rdma_dma_map_buf(rdma, sctxt, base, len); |
---|
734 | 743 | if (ret < 0) |
---|
735 | 744 | return ret; |
---|
736 | 745 | } |
---|
.. | .. |
---|
768 | 777 | * |
---|
769 | 778 | * RDMA Send is the last step of transmitting an RPC reply. Pages |
---|
770 | 779 | * involved in the earlier RDMA Writes are here transferred out |
---|
771 | | - * of the rqstp and into the ctxt's page array. These pages are |
---|
| 780 | + * of the rqstp and into the sctxt's page array. These pages are |
---|
772 | 781 | * DMA unmapped by each Write completion, but the subsequent Send |
---|
773 | 782 | * completion finally releases these pages. |
---|
774 | 783 | * |
---|
.. | .. |
---|
776 | 785 | * - The Reply's transport header will never be larger than a page. |
---|
777 | 786 | */ |
---|
778 | 787 | static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, |
---|
779 | | - struct svc_rdma_send_ctxt *ctxt, |
---|
780 | | - __be32 *rdma_argp, |
---|
781 | | - struct svc_rqst *rqstp, |
---|
782 | | - __be32 *wr_lst, __be32 *rp_ch) |
---|
783 | | -{ |
---|
784 | | - int ret; |
---|
785 | | - |
---|
786 | | - if (!rp_ch) { |
---|
787 | | - ret = svc_rdma_map_reply_msg(rdma, ctxt, |
---|
788 | | - &rqstp->rq_res, wr_lst); |
---|
789 | | - if (ret < 0) |
---|
790 | | - return ret; |
---|
791 | | - } |
---|
792 | | - |
---|
793 | | - svc_rdma_save_io_pages(rqstp, ctxt); |
---|
794 | | - |
---|
795 | | - ctxt->sc_send_wr.opcode = IB_WR_SEND; |
---|
796 | | - if (rdma->sc_snd_w_inv) { |
---|
797 | | - ctxt->sc_send_wr.ex.invalidate_rkey = |
---|
798 | | - svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch); |
---|
799 | | - if (ctxt->sc_send_wr.ex.invalidate_rkey) |
---|
800 | | - ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; |
---|
801 | | - } |
---|
802 | | - dprintk("svcrdma: posting Send WR with %u sge(s)\n", |
---|
803 | | - ctxt->sc_send_wr.num_sge); |
---|
804 | | - return svc_rdma_send(rdma, &ctxt->sc_send_wr); |
---|
805 | | -} |
---|
806 | | - |
---|
807 | | -/* Given the client-provided Write and Reply chunks, the server was not |
---|
808 | | - * able to form a complete reply. Return an RDMA_ERROR message so the |
---|
809 | | - * client can retire this RPC transaction. As above, the Send completion |
---|
810 | | - * routine releases payload pages that were part of a previous RDMA Write. |
---|
811 | | - * |
---|
812 | | - * Remote Invalidation is skipped for simplicity. |
---|
813 | | - */ |
---|
814 | | -static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, |
---|
815 | | - struct svc_rdma_send_ctxt *ctxt, |
---|
| 788 | + struct svc_rdma_send_ctxt *sctxt, |
---|
| 789 | + const struct svc_rdma_recv_ctxt *rctxt, |
---|
816 | 790 | struct svc_rqst *rqstp) |
---|
817 | 791 | { |
---|
818 | | - __be32 *p; |
---|
819 | 792 | int ret; |
---|
820 | 793 | |
---|
821 | | - p = ctxt->sc_xprt_buf; |
---|
822 | | - trace_svcrdma_err_chunk(*p); |
---|
823 | | - p += 3; |
---|
824 | | - *p++ = rdma_error; |
---|
825 | | - *p = err_chunk; |
---|
826 | | - svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_ERR); |
---|
827 | | - |
---|
828 | | - svc_rdma_save_io_pages(rqstp, ctxt); |
---|
829 | | - |
---|
830 | | - ctxt->sc_send_wr.opcode = IB_WR_SEND; |
---|
831 | | - ret = svc_rdma_send(rdma, &ctxt->sc_send_wr); |
---|
832 | | - if (ret) { |
---|
833 | | - svc_rdma_send_ctxt_put(rdma, ctxt); |
---|
| 794 | + ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqstp->rq_res); |
---|
| 795 | + if (ret < 0) |
---|
834 | 796 | return ret; |
---|
835 | | - } |
---|
836 | 797 | |
---|
837 | | - return 0; |
---|
| 798 | + svc_rdma_save_io_pages(rqstp, sctxt); |
---|
| 799 | + |
---|
| 800 | + if (rctxt->rc_inv_rkey) { |
---|
| 801 | + sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; |
---|
| 802 | + sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; |
---|
| 803 | + } else { |
---|
| 804 | + sctxt->sc_send_wr.opcode = IB_WR_SEND; |
---|
| 805 | + } |
---|
| 806 | + return svc_rdma_send(rdma, sctxt); |
---|
838 | 807 | } |
---|
839 | 808 | |
---|
840 | | -void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) |
---|
| 809 | +/** |
---|
| 810 | + * svc_rdma_send_error_msg - Send an RPC/RDMA v1 error response |
---|
| 811 | + * @rdma: controlling transport context |
---|
| 812 | + * @sctxt: Send context for the response |
---|
| 813 | + * @rctxt: Receive context for incoming bad message |
---|
| 814 | + * @status: negative errno indicating error that occurred |
---|
| 815 | + * |
---|
| 816 | + * Given the client-provided Read, Write, and Reply chunks, the |
---|
| 817 | + * server was not able to parse the Call or form a complete Reply. |
---|
| 818 | + * Return an RDMA_ERROR message so the client can retire the RPC |
---|
| 819 | + * transaction. |
---|
| 820 | + * |
---|
| 821 | + * The caller does not have to release @sctxt. It is released by |
---|
| 822 | + * Send completion, or by this function on error. |
---|
| 823 | + */ |
---|
| 824 | +void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, |
---|
| 825 | + struct svc_rdma_send_ctxt *sctxt, |
---|
| 826 | + struct svc_rdma_recv_ctxt *rctxt, |
---|
| 827 | + int status) |
---|
841 | 828 | { |
---|
| 829 | + __be32 *rdma_argp = rctxt->rc_recv_buf; |
---|
| 830 | + __be32 *p; |
---|
| 831 | + |
---|
| 832 | + rpcrdma_set_xdrlen(&sctxt->sc_hdrbuf, 0); |
---|
| 833 | + xdr_init_encode(&sctxt->sc_stream, &sctxt->sc_hdrbuf, |
---|
| 834 | + sctxt->sc_xprt_buf, NULL); |
---|
| 835 | + |
---|
| 836 | + p = xdr_reserve_space(&sctxt->sc_stream, |
---|
| 837 | + rpcrdma_fixed_maxsz * sizeof(*p)); |
---|
| 838 | + if (!p) |
---|
| 839 | + goto put_ctxt; |
---|
| 840 | + |
---|
| 841 | + *p++ = *rdma_argp; |
---|
| 842 | + *p++ = *(rdma_argp + 1); |
---|
| 843 | + *p++ = rdma->sc_fc_credits; |
---|
| 844 | + *p = rdma_error; |
---|
| 845 | + |
---|
| 846 | + switch (status) { |
---|
| 847 | + case -EPROTONOSUPPORT: |
---|
| 848 | + p = xdr_reserve_space(&sctxt->sc_stream, 3 * sizeof(*p)); |
---|
| 849 | + if (!p) |
---|
| 850 | + goto put_ctxt; |
---|
| 851 | + |
---|
| 852 | + *p++ = err_vers; |
---|
| 853 | + *p++ = rpcrdma_version; |
---|
| 854 | + *p = rpcrdma_version; |
---|
| 855 | + trace_svcrdma_err_vers(*rdma_argp); |
---|
| 856 | + break; |
---|
| 857 | + default: |
---|
| 858 | + p = xdr_reserve_space(&sctxt->sc_stream, sizeof(*p)); |
---|
| 859 | + if (!p) |
---|
| 860 | + goto put_ctxt; |
---|
| 861 | + |
---|
| 862 | + *p = err_chunk; |
---|
| 863 | + trace_svcrdma_err_chunk(*rdma_argp); |
---|
| 864 | + } |
---|
| 865 | + |
---|
| 866 | + /* Remote Invalidation is skipped for simplicity. */ |
---|
| 867 | + sctxt->sc_send_wr.num_sge = 1; |
---|
| 868 | + sctxt->sc_send_wr.opcode = IB_WR_SEND; |
---|
| 869 | + sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; |
---|
| 870 | + if (svc_rdma_send(rdma, sctxt)) |
---|
| 871 | + goto put_ctxt; |
---|
| 872 | + return; |
---|
| 873 | + |
---|
| 874 | +put_ctxt: |
---|
| 875 | + svc_rdma_send_ctxt_put(rdma, sctxt); |
---|
842 | 876 | } |
---|
843 | 877 | |
---|
844 | 878 | /** |
---|
.. | .. |
---|
859 | 893 | struct svcxprt_rdma *rdma = |
---|
860 | 894 | container_of(xprt, struct svcxprt_rdma, sc_xprt); |
---|
861 | 895 | struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; |
---|
862 | | - __be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch; |
---|
| 896 | + __be32 *rdma_argp = rctxt->rc_recv_buf; |
---|
| 897 | + __be32 *wr_lst = rctxt->rc_write_list; |
---|
| 898 | + __be32 *rp_ch = rctxt->rc_reply_chunk; |
---|
863 | 899 | struct xdr_buf *xdr = &rqstp->rq_res; |
---|
864 | 900 | struct svc_rdma_send_ctxt *sctxt; |
---|
| 901 | + __be32 *p; |
---|
865 | 902 | int ret; |
---|
866 | 903 | |
---|
867 | | - rdma_argp = rctxt->rc_recv_buf; |
---|
868 | | - svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch); |
---|
| 904 | + ret = -ENOTCONN; |
---|
| 905 | + if (svc_xprt_is_dead(xprt)) |
---|
| 906 | + goto err0; |
---|
869 | 907 | |
---|
870 | | - /* Create the RDMA response header. xprt->xpt_mutex, |
---|
871 | | - * acquired in svc_send(), serializes RPC replies. The |
---|
872 | | - * code path below that inserts the credit grant value |
---|
873 | | - * into each transport header runs only inside this |
---|
874 | | - * critical section. |
---|
875 | | - */ |
---|
876 | 908 | ret = -ENOMEM; |
---|
877 | 909 | sctxt = svc_rdma_send_ctxt_get(rdma); |
---|
878 | 910 | if (!sctxt) |
---|
879 | 911 | goto err0; |
---|
880 | | - rdma_resp = sctxt->sc_xprt_buf; |
---|
881 | 912 | |
---|
882 | | - p = rdma_resp; |
---|
| 913 | + p = xdr_reserve_space(&sctxt->sc_stream, |
---|
| 914 | + rpcrdma_fixed_maxsz * sizeof(*p)); |
---|
| 915 | + if (!p) |
---|
| 916 | + goto err0; |
---|
883 | 917 | *p++ = *rdma_argp; |
---|
884 | 918 | *p++ = *(rdma_argp + 1); |
---|
885 | 919 | *p++ = rdma->sc_fc_credits; |
---|
886 | | - *p++ = rp_ch ? rdma_nomsg : rdma_msg; |
---|
| 920 | + *p = rp_ch ? rdma_nomsg : rdma_msg; |
---|
887 | 921 | |
---|
888 | | - /* Start with empty chunks */ |
---|
889 | | - *p++ = xdr_zero; |
---|
890 | | - *p++ = xdr_zero; |
---|
891 | | - *p = xdr_zero; |
---|
892 | | - |
---|
| 922 | + if (svc_rdma_encode_read_list(sctxt) < 0) |
---|
| 923 | + goto err0; |
---|
893 | 924 | if (wr_lst) { |
---|
894 | 925 | /* XXX: Presume the client sent only one Write chunk */ |
---|
895 | | - ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr); |
---|
| 926 | + unsigned long offset; |
---|
| 927 | + unsigned int length; |
---|
| 928 | + |
---|
| 929 | + if (rctxt->rc_read_payload_length) { |
---|
| 930 | + offset = rctxt->rc_read_payload_offset; |
---|
| 931 | + length = rctxt->rc_read_payload_length; |
---|
| 932 | + } else { |
---|
| 933 | + offset = xdr->head[0].iov_len; |
---|
| 934 | + length = xdr->page_len; |
---|
| 935 | + } |
---|
| 936 | + ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, offset, |
---|
| 937 | + length); |
---|
896 | 938 | if (ret < 0) |
---|
897 | 939 | goto err2; |
---|
898 | | - svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret); |
---|
| 940 | + if (svc_rdma_encode_write_list(rctxt, sctxt, length) < 0) |
---|
| 941 | + goto err0; |
---|
| 942 | + } else { |
---|
| 943 | + if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0) |
---|
| 944 | + goto err0; |
---|
899 | 945 | } |
---|
900 | 946 | if (rp_ch) { |
---|
901 | | - ret = svc_rdma_send_reply_chunk(rdma, rp_ch, wr_lst, xdr); |
---|
| 947 | + ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res); |
---|
902 | 948 | if (ret < 0) |
---|
903 | 949 | goto err2; |
---|
904 | | - svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret); |
---|
| 950 | + if (svc_rdma_encode_reply_chunk(rctxt, sctxt, ret) < 0) |
---|
| 951 | + goto err0; |
---|
| 952 | + } else { |
---|
| 953 | + if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0) |
---|
| 954 | + goto err0; |
---|
905 | 955 | } |
---|
906 | 956 | |
---|
907 | | - svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp)); |
---|
908 | | - ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp, |
---|
909 | | - wr_lst, rp_ch); |
---|
| 957 | + ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); |
---|
910 | 958 | if (ret < 0) |
---|
911 | 959 | goto err1; |
---|
912 | 960 | return 0; |
---|
.. | .. |
---|
915 | 963 | if (ret != -E2BIG && ret != -EINVAL) |
---|
916 | 964 | goto err1; |
---|
917 | 965 | |
---|
918 | | - ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp); |
---|
919 | | - if (ret < 0) |
---|
920 | | - goto err1; |
---|
| 966 | + /* Send completion releases payload pages that were part |
---|
| 967 | + * of previously posted RDMA Writes. |
---|
| 968 | + */ |
---|
| 969 | + svc_rdma_save_io_pages(rqstp, sctxt); |
---|
| 970 | + svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret); |
---|
921 | 971 | return 0; |
---|
922 | 972 | |
---|
923 | 973 | err1: |
---|
924 | 974 | svc_rdma_send_ctxt_put(rdma, sctxt); |
---|
925 | 975 | err0: |
---|
926 | | - trace_svcrdma_send_failed(rqstp, ret); |
---|
| 976 | + trace_svcrdma_send_err(rqstp, ret); |
---|
927 | 977 | set_bit(XPT_CLOSE, &xprt->xpt_flags); |
---|
928 | 978 | return -ENOTCONN; |
---|
929 | 979 | } |
---|
| 980 | + |
---|
| 981 | +/** |
---|
| 982 | + * svc_rdma_read_payload - special processing for a READ payload |
---|
| 983 | + * @rqstp: svc_rqst to operate on |
---|
| 984 | + * @offset: payload's byte offset in @xdr |
---|
| 985 | + * @length: size of payload, in bytes |
---|
| 986 | + * |
---|
| 987 | + * Returns zero on success. |
---|
| 988 | + * |
---|
| 989 | + * For the moment, just record the xdr_buf location of the READ |
---|
| 990 | + * payload. svc_rdma_sendto will use that location later when |
---|
| 991 | + * we actually send the payload. |
---|
| 992 | + */ |
---|
| 993 | +int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, |
---|
| 994 | + unsigned int length) |
---|
| 995 | +{ |
---|
| 996 | + struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; |
---|
| 997 | + |
---|
| 998 | + /* XXX: Just one READ payload slot for now, since our |
---|
| 999 | + * transport implementation currently supports only one |
---|
| 1000 | + * Write chunk. |
---|
| 1001 | + */ |
---|
| 1002 | + rctxt->rc_read_payload_offset = offset; |
---|
| 1003 | + rctxt->rc_read_payload_length = length; |
---|
| 1004 | + |
---|
| 1005 | + return 0; |
---|
| 1006 | +} |
---|