| .. | .. | 
|---|
| 117 | 117 |  					rc_list); | 
|---|
| 118 | 118 |  } | 
|---|
| 119 | 119 |   | 
|---|
 | 120 | +static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,  | 
|---|
 | 121 | +				   struct rpc_rdma_cid *cid)  | 
|---|
 | 122 | +{  | 
|---|
 | 123 | +	cid->ci_queue_id = rdma->sc_rq_cq->res.id;  | 
|---|
 | 124 | +	cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);  | 
|---|
 | 125 | +}  | 
|---|
 | 126 | +  | 
|---|
| 120 | 127 |  static struct svc_rdma_recv_ctxt * | 
|---|
| 121 | 128 |  svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) | 
|---|
| 122 | 129 |  { | 
|---|
| .. | .. | 
|---|
| 134 | 141 |  				 rdma->sc_max_req_size, DMA_FROM_DEVICE); | 
|---|
| 135 | 142 |  	if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) | 
|---|
| 136 | 143 |  		goto fail2; | 
|---|
 | 144 | +  | 
|---|
 | 145 | +	svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid);  | 
|---|
| 137 | 146 |   | 
|---|
| 138 | 147 |  	ctxt->rc_recv_wr.next = NULL; | 
|---|
| 139 | 148 |  	ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe; | 
|---|
| .. | .. | 
|---|
| 172 | 181 |  void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma) | 
|---|
| 173 | 182 |  { | 
|---|
| 174 | 183 |  	struct svc_rdma_recv_ctxt *ctxt; | 
|---|
 | 184 | +	struct llist_node *node;  | 
|---|
| 175 | 185 |   | 
|---|
| 176 |  | -	while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {  | 
|---|
| 177 |  | -		list_del(&ctxt->rc_list);  | 
|---|
 | 186 | +	while ((node = llist_del_first(&rdma->sc_recv_ctxts))) {  | 
|---|
 | 187 | +		ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);  | 
|---|
| 178 | 188 |  		svc_rdma_recv_ctxt_destroy(rdma, ctxt); | 
|---|
| 179 | 189 |  	} | 
|---|
| 180 | 190 |  } | 
|---|
| .. | .. | 
|---|
| 183 | 193 |  svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma) | 
|---|
| 184 | 194 |  { | 
|---|
| 185 | 195 |  	struct svc_rdma_recv_ctxt *ctxt; | 
|---|
 | 196 | +	struct llist_node *node;  | 
|---|
| 186 | 197 |   | 
|---|
| 187 |  | -	spin_lock(&rdma->sc_recv_lock);  | 
|---|
| 188 |  | -	ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts);  | 
|---|
| 189 |  | -	if (!ctxt)  | 
|---|
 | 198 | +	node = llist_del_first(&rdma->sc_recv_ctxts);  | 
|---|
 | 199 | +	if (!node)  | 
|---|
| 190 | 200 |  		goto out_empty; | 
|---|
| 191 |  | -	list_del(&ctxt->rc_list);  | 
|---|
| 192 |  | -	spin_unlock(&rdma->sc_recv_lock);  | 
|---|
 | 201 | +	ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);  | 
|---|
| 193 | 202 |   | 
|---|
| 194 | 203 |  out: | 
|---|
| 195 | 204 |  	ctxt->rc_page_count = 0; | 
|---|
 | 205 | +	ctxt->rc_read_payload_length = 0;  | 
|---|
| 196 | 206 |  	return ctxt; | 
|---|
| 197 | 207 |   | 
|---|
| 198 | 208 |  out_empty: | 
|---|
| 199 |  | -	spin_unlock(&rdma->sc_recv_lock);  | 
|---|
| 200 |  | -  | 
|---|
| 201 | 209 |  	ctxt = svc_rdma_recv_ctxt_alloc(rdma); | 
|---|
| 202 | 210 |  	if (!ctxt) | 
|---|
| 203 | 211 |  		return NULL; | 
|---|
| .. | .. | 
|---|
| 218 | 226 |  	for (i = 0; i < ctxt->rc_page_count; i++) | 
|---|
| 219 | 227 |  		put_page(ctxt->rc_pages[i]); | 
|---|
| 220 | 228 |   | 
|---|
| 221 |  | -	if (!ctxt->rc_temp) {  | 
|---|
| 222 |  | -		spin_lock(&rdma->sc_recv_lock);  | 
|---|
| 223 |  | -		list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);  | 
|---|
| 224 |  | -		spin_unlock(&rdma->sc_recv_lock);  | 
|---|
| 225 |  | -	} else  | 
|---|
 | 229 | +	if (!ctxt->rc_temp)  | 
|---|
 | 230 | +		llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);  | 
|---|
 | 231 | +	else  | 
|---|
| 226 | 232 |  		svc_rdma_recv_ctxt_destroy(rdma, ctxt); | 
|---|
| 227 | 233 |  } | 
|---|
| 228 | 234 |   | 
|---|
| .. | .. | 
|---|
| 251 | 257 |  { | 
|---|
| 252 | 258 |  	int ret; | 
|---|
| 253 | 259 |   | 
|---|
| 254 |  | -	svc_xprt_get(&rdma->sc_xprt);  | 
|---|
 | 260 | +	trace_svcrdma_post_recv(ctxt);  | 
|---|
| 255 | 261 |  	ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL); | 
|---|
| 256 |  | -	trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);  | 
|---|
| 257 | 262 |  	if (ret) | 
|---|
| 258 | 263 |  		goto err_post; | 
|---|
| 259 | 264 |  	return 0; | 
|---|
| 260 | 265 |   | 
|---|
| 261 | 266 |  err_post: | 
|---|
 | 267 | +	trace_svcrdma_rq_post_err(rdma, ret);  | 
|---|
| 262 | 268 |  	svc_rdma_recv_ctxt_put(rdma, ctxt); | 
|---|
| 263 |  | -	svc_xprt_put(&rdma->sc_xprt);  | 
|---|
| 264 | 269 |  	return ret; | 
|---|
| 265 | 270 |  } | 
|---|
| 266 | 271 |   | 
|---|
| .. | .. | 
|---|
| 294 | 299 |  			return false; | 
|---|
| 295 | 300 |  		ctxt->rc_temp = true; | 
|---|
| 296 | 301 |  		ret = __svc_rdma_post_recv(rdma, ctxt); | 
|---|
| 297 |  | -		if (ret) {  | 
|---|
| 298 |  | -			pr_err("svcrdma: failure posting recv buffers: %d\n",  | 
|---|
| 299 |  | -			       ret);  | 
|---|
 | 302 | +		if (ret)  | 
|---|
| 300 | 303 |  			return false; | 
|---|
| 301 |  | -		}  | 
|---|
| 302 | 304 |  	} | 
|---|
| 303 | 305 |  	return true; | 
|---|
| 304 | 306 |  } | 
|---|
| .. | .. | 
|---|
| 317 | 319 |  	struct ib_cqe *cqe = wc->wr_cqe; | 
|---|
| 318 | 320 |  	struct svc_rdma_recv_ctxt *ctxt; | 
|---|
| 319 | 321 |   | 
|---|
| 320 |  | -	trace_svcrdma_wc_receive(wc);  | 
|---|
| 321 |  | -  | 
|---|
| 322 | 322 |  	/* WARNING: Only wc->wr_cqe and wc->status are reliable */ | 
|---|
| 323 | 323 |  	ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); | 
|---|
| 324 | 324 |   | 
|---|
 | 325 | +	trace_svcrdma_wc_receive(wc, &ctxt->rc_cid);  | 
|---|
| 325 | 326 |  	if (wc->status != IB_WC_SUCCESS) | 
|---|
| 326 | 327 |  		goto flushed; | 
|---|
| 327 | 328 |   | 
|---|
| .. | .. | 
|---|
| 336 | 337 |   | 
|---|
| 337 | 338 |  	spin_lock(&rdma->sc_rq_dto_lock); | 
|---|
| 338 | 339 |  	list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q); | 
|---|
| 339 |  | -	spin_unlock(&rdma->sc_rq_dto_lock);  | 
|---|
 | 340 | +	/* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */  | 
|---|
| 340 | 341 |  	set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); | 
|---|
 | 342 | +	spin_unlock(&rdma->sc_rq_dto_lock);  | 
|---|
| 341 | 343 |  	if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) | 
|---|
| 342 | 344 |  		svc_xprt_enqueue(&rdma->sc_xprt); | 
|---|
| 343 |  | -	goto out;  | 
|---|
 | 345 | +	return;  | 
|---|
| 344 | 346 |   | 
|---|
| 345 | 347 |  flushed: | 
|---|
| 346 |  | -	if (wc->status != IB_WC_WR_FLUSH_ERR)  | 
|---|
| 347 |  | -		pr_err("svcrdma: Recv: %s (%u/0x%x)\n",  | 
|---|
| 348 |  | -		       ib_wc_status_msg(wc->status),  | 
|---|
| 349 |  | -		       wc->status, wc->vendor_err);  | 
|---|
| 350 | 348 |  post_err: | 
|---|
| 351 | 349 |  	svc_rdma_recv_ctxt_put(rdma, ctxt); | 
|---|
| 352 | 350 |  	set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); | 
|---|
| 353 | 351 |  	svc_xprt_enqueue(&rdma->sc_xprt); | 
|---|
| 354 |  | -out:  | 
|---|
| 355 |  | -	svc_xprt_put(&rdma->sc_xprt);  | 
|---|
| 356 | 352 |  } | 
|---|
| 357 | 353 |   | 
|---|
| 358 | 354 |  /** | 
|---|
| .. | .. | 
|---|
| 389 | 385 |  	arg->len = ctxt->rc_byte_len; | 
|---|
| 390 | 386 |  } | 
|---|
| 391 | 387 |   | 
|---|
| 392 |  | -/* This accommodates the largest possible Write chunk,  | 
|---|
| 393 |  | - * in one segment.  | 
|---|
 | 388 | +/* This accommodates the largest possible Write chunk.  | 
|---|
| 394 | 389 |   */ | 
|---|
| 395 |  | -#define MAX_BYTES_WRITE_SEG	((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))  | 
|---|
 | 390 | +#define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))  | 
|---|
| 396 | 391 |   | 
|---|
| 397 | 392 |  /* This accommodates the largest possible Position-Zero | 
|---|
| 398 |  | - * Read chunk or Reply chunk, in one segment.  | 
|---|
 | 393 | + * Read chunk or Reply chunk.  | 
|---|
| 399 | 394 |   */ | 
|---|
| 400 |  | -#define MAX_BYTES_SPECIAL_SEG	((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))  | 
|---|
 | 395 | +#define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))  | 
|---|
| 401 | 396 |   | 
|---|
| 402 | 397 |  /* Sanity check the Read list. | 
|---|
| 403 | 398 |   * | 
|---|
| .. | .. | 
|---|
| 405 | 400 |   * - This implementation supports only one Read chunk. | 
|---|
| 406 | 401 |   * | 
|---|
| 407 | 402 |   * Sanity checks: | 
|---|
| 408 |  | - * - Read list does not overflow buffer.  | 
|---|
 | 403 | + * - Read list does not overflow Receive buffer.  | 
|---|
| 409 | 404 |   * - Segment size limited by largest NFS data payload. | 
|---|
| 410 | 405 |   * | 
|---|
| 411 | 406 |   * The segment count is limited to how many segments can | 
|---|
| .. | .. | 
|---|
| 413 | 408 |   * buffer. That's about 40 Read segments for a 1KB inline | 
|---|
| 414 | 409 |   * threshold. | 
|---|
| 415 | 410 |   * | 
|---|
| 416 |  | - * Returns pointer to the following Write list.  | 
|---|
 | 411 | + * Return values:  | 
|---|
 | 412 | + *       %true: Read list is valid. @rctxt's xdr_stream is updated  | 
|---|
 | 413 | + *		to point to the first byte past the Read list.  | 
|---|
 | 414 | + *      %false: Read list is corrupt. @rctxt's xdr_stream is left  | 
|---|
 | 415 | + *		in an unknown state.  | 
|---|
| 417 | 416 |   */ | 
|---|
| 418 |  | -static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)  | 
|---|
 | 417 | +static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)  | 
|---|
| 419 | 418 |  { | 
|---|
| 420 |  | -	u32 position;  | 
|---|
 | 419 | +	u32 position, len;  | 
|---|
| 421 | 420 |  	bool first; | 
|---|
 | 421 | +	__be32 *p;  | 
|---|
| 422 | 422 |   | 
|---|
 | 423 | +	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));  | 
|---|
 | 424 | +	if (!p)  | 
|---|
 | 425 | +		return false;  | 
|---|
 | 426 | +  | 
|---|
 | 427 | +	len = 0;  | 
|---|
| 423 | 428 |  	first = true; | 
|---|
| 424 |  | -	while (*p++ != xdr_zero) {  | 
|---|
| 425 |  | -		if (first) {  | 
|---|
| 426 |  | -			position = be32_to_cpup(p++);  | 
|---|
| 427 |  | -			first = false;  | 
|---|
| 428 |  | -		} else if (be32_to_cpup(p++) != position) {  | 
|---|
| 429 |  | -			return NULL;  | 
|---|
| 430 |  | -		}  | 
|---|
| 431 |  | -		p++;	/* handle */  | 
|---|
| 432 |  | -		if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG)  | 
|---|
| 433 |  | -			return NULL;  | 
|---|
| 434 |  | -		p += 2;	/* offset */  | 
|---|
 | 429 | +	while (xdr_item_is_present(p)) {  | 
|---|
 | 430 | +		p = xdr_inline_decode(&rctxt->rc_stream,  | 
|---|
 | 431 | +				      rpcrdma_readseg_maxsz * sizeof(*p));  | 
|---|
 | 432 | +		if (!p)  | 
|---|
 | 433 | +			return false;  | 
|---|
| 435 | 434 |   | 
|---|
| 436 |  | -		if (p > end)  | 
|---|
| 437 |  | -			return NULL;  | 
|---|
 | 435 | +		if (first) {  | 
|---|
 | 436 | +			position = be32_to_cpup(p);  | 
|---|
 | 437 | +			first = false;  | 
|---|
 | 438 | +		} else if (be32_to_cpup(p) != position) {  | 
|---|
 | 439 | +			return false;  | 
|---|
 | 440 | +		}  | 
|---|
 | 441 | +		p += 2;  | 
|---|
 | 442 | +		len += be32_to_cpup(p);  | 
|---|
 | 443 | +  | 
|---|
 | 444 | +		p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));  | 
|---|
 | 445 | +		if (!p)  | 
|---|
 | 446 | +			return false;  | 
|---|
| 438 | 447 |  	} | 
|---|
| 439 |  | -	return p;  | 
|---|
 | 448 | +	return len <= MAX_BYTES_SPECIAL_CHUNK;  | 
|---|
| 440 | 449 |  } | 
|---|
| 441 | 450 |   | 
|---|
| 442 | 451 |  /* The segment count is limited to how many segments can | 
|---|
| .. | .. | 
|---|
| 444 | 453 |   * buffer. That's about 60 Write segments for a 1KB inline | 
|---|
| 445 | 454 |   * threshold. | 
|---|
| 446 | 455 |   */ | 
|---|
| 447 |  | -static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end,  | 
|---|
| 448 |  | -				     u32 maxlen)  | 
|---|
 | 456 | +static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen)  | 
|---|
| 449 | 457 |  { | 
|---|
| 450 |  | -	u32 i, segcount;  | 
|---|
 | 458 | +	u32 i, segcount, total;  | 
|---|
 | 459 | +	__be32 *p;  | 
|---|
| 451 | 460 |   | 
|---|
| 452 |  | -	segcount = be32_to_cpup(p++);  | 
|---|
 | 461 | +	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));  | 
|---|
 | 462 | +	if (!p)  | 
|---|
 | 463 | +		return false;  | 
|---|
 | 464 | +	segcount = be32_to_cpup(p);  | 
|---|
 | 465 | +  | 
|---|
 | 466 | +	total = 0;  | 
|---|
| 453 | 467 |  	for (i = 0; i < segcount; i++) { | 
|---|
| 454 |  | -		p++;	/* handle */  | 
|---|
| 455 |  | -		if (be32_to_cpup(p++) > maxlen)  | 
|---|
| 456 |  | -			return NULL;  | 
|---|
| 457 |  | -		p += 2;	/* offset */  | 
|---|
 | 468 | +		u32 handle, length;  | 
|---|
 | 469 | +		u64 offset;  | 
|---|
| 458 | 470 |   | 
|---|
| 459 |  | -		if (p > end)  | 
|---|
| 460 |  | -			return NULL;  | 
|---|
 | 471 | +		p = xdr_inline_decode(&rctxt->rc_stream,  | 
|---|
 | 472 | +				      rpcrdma_segment_maxsz * sizeof(*p));  | 
|---|
 | 473 | +		if (!p)  | 
|---|
 | 474 | +			return false;  | 
|---|
 | 475 | +  | 
|---|
 | 476 | +		xdr_decode_rdma_segment(p, &handle, &length, &offset);  | 
|---|
 | 477 | +		trace_svcrdma_decode_wseg(handle, length, offset);  | 
|---|
 | 478 | +  | 
|---|
 | 479 | +		total += length;  | 
|---|
| 461 | 480 |  	} | 
|---|
| 462 |  | -  | 
|---|
| 463 |  | -	return p;  | 
|---|
 | 481 | +	return total <= maxlen;  | 
|---|
| 464 | 482 |  } | 
|---|
| 465 | 483 |   | 
|---|
| 466 | 484 |  /* Sanity check the Write list. | 
|---|
| 467 | 485 |   * | 
|---|
| 468 | 486 |   * Implementation limits: | 
|---|
| 469 |  | - * - This implementation supports only one Write chunk.  | 
|---|
 | 487 | + * - This implementation currently supports only one Write chunk.  | 
|---|
| 470 | 488 |   * | 
|---|
| 471 | 489 |   * Sanity checks: | 
|---|
| 472 |  | - * - Write list does not overflow buffer.  | 
|---|
| 473 |  | - * - Segment size limited by largest NFS data payload.  | 
|---|
 | 490 | + * - Write list does not overflow Receive buffer.  | 
|---|
 | 491 | + * - Chunk size limited by largest NFS data payload.  | 
|---|
| 474 | 492 |   * | 
|---|
| 475 |  | - * Returns pointer to the following Reply chunk.  | 
|---|
 | 493 | + * Return values:  | 
|---|
 | 494 | + *       %true: Write list is valid. @rctxt's xdr_stream is updated  | 
|---|
 | 495 | + *		to point to the first byte past the Write list.  | 
|---|
 | 496 | + *      %false: Write list is corrupt. @rctxt's xdr_stream is left  | 
|---|
 | 497 | + *		in an unknown state.  | 
|---|
| 476 | 498 |   */ | 
|---|
| 477 |  | -static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end)  | 
|---|
 | 499 | +static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt)  | 
|---|
| 478 | 500 |  { | 
|---|
| 479 |  | -	u32 chcount;  | 
|---|
 | 501 | +	u32 chcount = 0;  | 
|---|
 | 502 | +	__be32 *p;  | 
|---|
| 480 | 503 |   | 
|---|
| 481 |  | -	chcount = 0;  | 
|---|
| 482 |  | -	while (*p++ != xdr_zero) {  | 
|---|
| 483 |  | -		p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG);  | 
|---|
 | 504 | +	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));  | 
|---|
 | 505 | +	if (!p)  | 
|---|
 | 506 | +		return false;  | 
|---|
 | 507 | +	rctxt->rc_write_list = p;  | 
|---|
 | 508 | +	while (xdr_item_is_present(p)) {  | 
|---|
 | 509 | +		if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK))  | 
|---|
 | 510 | +			return false;  | 
|---|
 | 511 | +		++chcount;  | 
|---|
 | 512 | +		p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));  | 
|---|
| 484 | 513 |  		if (!p) | 
|---|
| 485 |  | -			return NULL;  | 
|---|
| 486 |  | -		if (chcount++ > 1)  | 
|---|
| 487 |  | -			return NULL;  | 
|---|
 | 514 | +			return false;  | 
|---|
| 488 | 515 |  	} | 
|---|
| 489 |  | -	return p;  | 
|---|
 | 516 | +	if (!chcount)  | 
|---|
 | 517 | +		rctxt->rc_write_list = NULL;  | 
|---|
 | 518 | +	return chcount < 2;  | 
|---|
| 490 | 519 |  } | 
|---|
| 491 | 520 |   | 
|---|
| 492 | 521 |  /* Sanity check the Reply chunk. | 
|---|
| 493 | 522 |   * | 
|---|
| 494 | 523 |   * Sanity checks: | 
|---|
| 495 |  | - * - Reply chunk does not overflow buffer.  | 
|---|
| 496 |  | - * - Segment size limited by largest NFS data payload.  | 
|---|
 | 524 | + * - Reply chunk does not overflow Receive buffer.  | 
|---|
 | 525 | + * - Chunk size limited by largest NFS data payload.  | 
|---|
| 497 | 526 |   * | 
|---|
| 498 |  | - * Returns pointer to the following RPC header.  | 
|---|
 | 527 | + * Return values:  | 
|---|
 | 528 | + *       %true: Reply chunk is valid. @rctxt's xdr_stream is updated  | 
|---|
 | 529 | + *		to point to the first byte past the Reply chunk.  | 
|---|
 | 530 | + *      %false: Reply chunk is corrupt. @rctxt's xdr_stream is left  | 
|---|
 | 531 | + *		in an unknown state.  | 
|---|
| 499 | 532 |   */ | 
|---|
| 500 |  | -static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)  | 
|---|
 | 533 | +static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)  | 
|---|
| 501 | 534 |  { | 
|---|
| 502 |  | -	if (*p++ != xdr_zero) {  | 
|---|
| 503 |  | -		p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG);  | 
|---|
| 504 |  | -		if (!p)  | 
|---|
| 505 |  | -			return NULL;  | 
|---|
 | 535 | +	__be32 *p;  | 
|---|
 | 536 | +  | 
|---|
 | 537 | +	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));  | 
|---|
 | 538 | +	if (!p)  | 
|---|
 | 539 | +		return false;  | 
|---|
 | 540 | +	rctxt->rc_reply_chunk = NULL;  | 
|---|
 | 541 | +	if (xdr_item_is_present(p)) {  | 
|---|
 | 542 | +		if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK))  | 
|---|
 | 543 | +			return false;  | 
|---|
 | 544 | +		rctxt->rc_reply_chunk = p;  | 
|---|
| 506 | 545 |  	} | 
|---|
| 507 |  | -	return p;  | 
|---|
 | 546 | +	return true;  | 
|---|
| 508 | 547 |  } | 
|---|
| 509 | 548 |   | 
|---|
| 510 |  | -/* On entry, xdr->head[0].iov_base points to first byte in the  | 
|---|
| 511 |  | - * RPC-over-RDMA header.  | 
|---|
 | 549 | +/* RPC-over-RDMA Version One private extension: Remote Invalidation.  | 
|---|
 | 550 | + * Responder's choice: requester signals it can handle Send With  | 
|---|
 | 551 | + * Invalidate, and responder chooses one R_key to invalidate.  | 
|---|
 | 552 | + *  | 
|---|
 | 553 | + * If there is exactly one distinct R_key in the received transport  | 
|---|
 | 554 | + * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero.  | 
|---|
 | 555 | + *  | 
|---|
 | 556 | + * Perform this operation while the received transport header is  | 
|---|
 | 557 | + * still in the CPU cache.  | 
|---|
 | 558 | + */  | 
|---|
 | 559 | +static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,  | 
|---|
 | 560 | +				  struct svc_rdma_recv_ctxt *ctxt)  | 
|---|
 | 561 | +{  | 
|---|
 | 562 | +	__be32 inv_rkey, *p;  | 
|---|
 | 563 | +	u32 i, segcount;  | 
|---|
 | 564 | +  | 
|---|
 | 565 | +	ctxt->rc_inv_rkey = 0;  | 
|---|
 | 566 | +  | 
|---|
 | 567 | +	if (!rdma->sc_snd_w_inv)  | 
|---|
 | 568 | +		return;  | 
|---|
 | 569 | +  | 
|---|
 | 570 | +	inv_rkey = xdr_zero;  | 
|---|
 | 571 | +	p = ctxt->rc_recv_buf;  | 
|---|
 | 572 | +	p += rpcrdma_fixed_maxsz;  | 
|---|
 | 573 | +  | 
|---|
 | 574 | +	/* Read list */  | 
|---|
 | 575 | +	while (xdr_item_is_present(p++)) {  | 
|---|
 | 576 | +		p++;	/* position */  | 
|---|
 | 577 | +		if (inv_rkey == xdr_zero)  | 
|---|
 | 578 | +			inv_rkey = *p;  | 
|---|
 | 579 | +		else if (inv_rkey != *p)  | 
|---|
 | 580 | +			return;  | 
|---|
 | 581 | +		p += 4;  | 
|---|
 | 582 | +	}  | 
|---|
 | 583 | +  | 
|---|
 | 584 | +	/* Write list */  | 
|---|
 | 585 | +	while (xdr_item_is_present(p++)) {  | 
|---|
 | 586 | +		segcount = be32_to_cpup(p++);  | 
|---|
 | 587 | +		for (i = 0; i < segcount; i++) {  | 
|---|
 | 588 | +			if (inv_rkey == xdr_zero)  | 
|---|
 | 589 | +				inv_rkey = *p;  | 
|---|
 | 590 | +			else if (inv_rkey != *p)  | 
|---|
 | 591 | +				return;  | 
|---|
 | 592 | +			p += 4;  | 
|---|
 | 593 | +		}  | 
|---|
 | 594 | +	}  | 
|---|
 | 595 | +  | 
|---|
 | 596 | +	/* Reply chunk */  | 
|---|
 | 597 | +	if (xdr_item_is_present(p++)) {  | 
|---|
 | 598 | +		segcount = be32_to_cpup(p++);  | 
|---|
 | 599 | +		for (i = 0; i < segcount; i++) {  | 
|---|
 | 600 | +			if (inv_rkey == xdr_zero)  | 
|---|
 | 601 | +				inv_rkey = *p;  | 
|---|
 | 602 | +			else if (inv_rkey != *p)  | 
|---|
 | 603 | +				return;  | 
|---|
 | 604 | +			p += 4;  | 
|---|
 | 605 | +		}  | 
|---|
 | 606 | +	}  | 
|---|
 | 607 | +  | 
|---|
 | 608 | +	ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);  | 
|---|
 | 609 | +}  | 
|---|
 | 610 | +  | 
|---|
 | 611 | +/**  | 
|---|
 | 612 | + * svc_rdma_xdr_decode_req - Decode the transport header  | 
|---|
 | 613 | + * @rq_arg: xdr_buf containing ingress RPC/RDMA message  | 
|---|
 | 614 | + * @rctxt: state of decoding  | 
|---|
 | 615 | + *  | 
|---|
 | 616 | + * On entry, xdr->head[0].iov_base points to first byte of the  | 
|---|
 | 617 | + * RPC-over-RDMA transport header.  | 
|---|
| 512 | 618 |   * | 
|---|
| 513 | 619 |   * On successful exit, head[0] points to first byte past the | 
|---|
| 514 | 620 |   * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message. | 
|---|
 | 621 | + *  | 
|---|
| 515 | 622 |   * The length of the RPC-over-RDMA header is returned. | 
|---|
| 516 | 623 |   * | 
|---|
| 517 | 624 |   * Assumptions: | 
|---|
| 518 | 625 |   * - The transport header is entirely contained in the head iovec. | 
|---|
| 519 | 626 |   */ | 
|---|
| 520 |  | -static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)  | 
|---|
 | 627 | +static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,  | 
|---|
 | 628 | +				   struct svc_rdma_recv_ctxt *rctxt)  | 
|---|
| 521 | 629 |  { | 
|---|
| 522 |  | -	__be32 *p, *end, *rdma_argp;  | 
|---|
 | 630 | +	__be32 *p, *rdma_argp;  | 
|---|
| 523 | 631 |  	unsigned int hdr_len; | 
|---|
| 524 | 632 |   | 
|---|
| 525 |  | -	/* Verify that there's enough bytes for header + something */  | 
|---|
| 526 |  | -	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)  | 
|---|
| 527 |  | -		goto out_short;  | 
|---|
| 528 |  | -  | 
|---|
| 529 | 633 |  	rdma_argp = rq_arg->head[0].iov_base; | 
|---|
| 530 |  | -	if (*(rdma_argp + 1) != rpcrdma_version)  | 
|---|
| 531 |  | -		goto out_version;  | 
|---|
 | 634 | +	xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL);  | 
|---|
| 532 | 635 |   | 
|---|
| 533 |  | -	switch (*(rdma_argp + 3)) {  | 
|---|
 | 636 | +	p = xdr_inline_decode(&rctxt->rc_stream,  | 
|---|
 | 637 | +			      rpcrdma_fixed_maxsz * sizeof(*p));  | 
|---|
 | 638 | +	if (unlikely(!p))  | 
|---|
 | 639 | +		goto out_short;  | 
|---|
 | 640 | +	p++;  | 
|---|
 | 641 | +	if (*p != rpcrdma_version)  | 
|---|
 | 642 | +		goto out_version;  | 
|---|
 | 643 | +	p += 2;  | 
|---|
 | 644 | +	switch (*p) {  | 
|---|
| 534 | 645 |  	case rdma_msg: | 
|---|
| 535 | 646 |  		break; | 
|---|
| 536 | 647 |  	case rdma_nomsg: | 
|---|
| 537 | 648 |  		break; | 
|---|
| 538 |  | -  | 
|---|
| 539 | 649 |  	case rdma_done: | 
|---|
| 540 | 650 |  		goto out_drop; | 
|---|
| 541 |  | -  | 
|---|
| 542 | 651 |  	case rdma_error: | 
|---|
| 543 | 652 |  		goto out_drop; | 
|---|
| 544 |  | -  | 
|---|
| 545 | 653 |  	default: | 
|---|
| 546 | 654 |  		goto out_proc; | 
|---|
| 547 | 655 |  	} | 
|---|
| 548 | 656 |   | 
|---|
| 549 |  | -	end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);  | 
|---|
| 550 |  | -	p = xdr_check_read_list(rdma_argp + 4, end);  | 
|---|
| 551 |  | -	if (!p)  | 
|---|
 | 657 | +	if (!xdr_check_read_list(rctxt))  | 
|---|
| 552 | 658 |  		goto out_inval; | 
|---|
| 553 |  | -	p = xdr_check_write_list(p, end);  | 
|---|
| 554 |  | -	if (!p)  | 
|---|
 | 659 | +	if (!xdr_check_write_list(rctxt))  | 
|---|
| 555 | 660 |  		goto out_inval; | 
|---|
| 556 |  | -	p = xdr_check_reply_chunk(p, end);  | 
|---|
| 557 |  | -	if (!p)  | 
|---|
| 558 |  | -		goto out_inval;  | 
|---|
| 559 |  | -	if (p > end)  | 
|---|
 | 661 | +	if (!xdr_check_reply_chunk(rctxt))  | 
|---|
| 560 | 662 |  		goto out_inval; | 
|---|
| 561 | 663 |   | 
|---|
| 562 |  | -	rq_arg->head[0].iov_base = p;  | 
|---|
| 563 |  | -	hdr_len = (unsigned long)p - (unsigned long)rdma_argp;  | 
|---|
 | 664 | +	rq_arg->head[0].iov_base = rctxt->rc_stream.p;  | 
|---|
 | 665 | +	hdr_len = xdr_stream_pos(&rctxt->rc_stream);  | 
|---|
| 564 | 666 |  	rq_arg->head[0].iov_len -= hdr_len; | 
|---|
| 565 | 667 |  	rq_arg->len -= hdr_len; | 
|---|
| 566 |  | -	trace_svcrdma_decode_rqst(rdma_argp, hdr_len);  | 
|---|
 | 668 | +	trace_svcrdma_decode_rqst(rctxt, rdma_argp, hdr_len);  | 
|---|
| 567 | 669 |  	return hdr_len; | 
|---|
| 568 | 670 |   | 
|---|
| 569 | 671 |  out_short: | 
|---|
| 570 |  | -	trace_svcrdma_decode_short(rq_arg->len);  | 
|---|
 | 672 | +	trace_svcrdma_decode_short_err(rctxt, rq_arg->len);  | 
|---|
| 571 | 673 |  	return -EINVAL; | 
|---|
| 572 | 674 |   | 
|---|
| 573 | 675 |  out_version: | 
|---|
| 574 |  | -	trace_svcrdma_decode_badvers(rdma_argp);  | 
|---|
 | 676 | +	trace_svcrdma_decode_badvers_err(rctxt, rdma_argp);  | 
|---|
| 575 | 677 |  	return -EPROTONOSUPPORT; | 
|---|
| 576 | 678 |   | 
|---|
| 577 | 679 |  out_drop: | 
|---|
| 578 |  | -	trace_svcrdma_decode_drop(rdma_argp);  | 
|---|
 | 680 | +	trace_svcrdma_decode_drop_err(rctxt, rdma_argp);  | 
|---|
| 579 | 681 |  	return 0; | 
|---|
| 580 | 682 |   | 
|---|
| 581 | 683 |  out_proc: | 
|---|
| 582 |  | -	trace_svcrdma_decode_badproc(rdma_argp);  | 
|---|
 | 684 | +	trace_svcrdma_decode_badproc_err(rctxt, rdma_argp);  | 
|---|
| 583 | 685 |  	return -EINVAL; | 
|---|
| 584 | 686 |   | 
|---|
| 585 | 687 |  out_inval: | 
|---|
| 586 |  | -	trace_svcrdma_decode_parse(rdma_argp);  | 
|---|
 | 688 | +	trace_svcrdma_decode_parse_err(rctxt, rdma_argp);  | 
|---|
| 587 | 689 |  	return -EINVAL; | 
|---|
| 588 | 690 |  } | 
|---|
| 589 | 691 |   | 
|---|
| .. | .. | 
|---|
| 616 | 718 |  	rqstp->rq_arg.buflen = head->rc_arg.buflen; | 
|---|
| 617 | 719 |  } | 
|---|
| 618 | 720 |   | 
|---|
| 619 |  | -static void svc_rdma_send_error(struct svcxprt_rdma *xprt,  | 
|---|
| 620 |  | -				__be32 *rdma_argp, int status)  | 
|---|
 | 721 | +static void svc_rdma_send_error(struct svcxprt_rdma *rdma,  | 
|---|
 | 722 | +				struct svc_rdma_recv_ctxt *rctxt,  | 
|---|
 | 723 | +				int status)  | 
|---|
| 621 | 724 |  { | 
|---|
| 622 |  | -	struct svc_rdma_send_ctxt *ctxt;  | 
|---|
| 623 |  | -	unsigned int length;  | 
|---|
| 624 |  | -	__be32 *p;  | 
|---|
| 625 |  | -	int ret;  | 
|---|
 | 725 | +	struct svc_rdma_send_ctxt *sctxt;  | 
|---|
| 626 | 726 |   | 
|---|
| 627 |  | -	ctxt = svc_rdma_send_ctxt_get(xprt);  | 
|---|
| 628 |  | -	if (!ctxt)  | 
|---|
 | 727 | +	sctxt = svc_rdma_send_ctxt_get(rdma);  | 
|---|
 | 728 | +	if (!sctxt)  | 
|---|
| 629 | 729 |  		return; | 
|---|
| 630 |  | -  | 
|---|
| 631 |  | -	p = ctxt->sc_xprt_buf;  | 
|---|
| 632 |  | -	*p++ = *rdma_argp;  | 
|---|
| 633 |  | -	*p++ = *(rdma_argp + 1);  | 
|---|
| 634 |  | -	*p++ = xprt->sc_fc_credits;  | 
|---|
| 635 |  | -	*p++ = rdma_error;  | 
|---|
| 636 |  | -	switch (status) {  | 
|---|
| 637 |  | -	case -EPROTONOSUPPORT:  | 
|---|
| 638 |  | -		*p++ = err_vers;  | 
|---|
| 639 |  | -		*p++ = rpcrdma_version;  | 
|---|
| 640 |  | -		*p++ = rpcrdma_version;  | 
|---|
| 641 |  | -		trace_svcrdma_err_vers(*rdma_argp);  | 
|---|
| 642 |  | -		break;  | 
|---|
| 643 |  | -	default:  | 
|---|
| 644 |  | -		*p++ = err_chunk;  | 
|---|
| 645 |  | -		trace_svcrdma_err_chunk(*rdma_argp);  | 
|---|
| 646 |  | -	}  | 
|---|
| 647 |  | -	length = (unsigned long)p - (unsigned long)ctxt->sc_xprt_buf;  | 
|---|
| 648 |  | -	svc_rdma_sync_reply_hdr(xprt, ctxt, length);  | 
|---|
| 649 |  | -  | 
|---|
| 650 |  | -	ctxt->sc_send_wr.opcode = IB_WR_SEND;  | 
|---|
| 651 |  | -	ret = svc_rdma_send(xprt, &ctxt->sc_send_wr);  | 
|---|
| 652 |  | -	if (ret)  | 
|---|
| 653 |  | -		svc_rdma_send_ctxt_put(xprt, ctxt);  | 
|---|
 | 730 | +	svc_rdma_send_error_msg(rdma, sctxt, rctxt, status);  | 
|---|
| 654 | 731 |  } | 
|---|
| 655 | 732 |   | 
|---|
| 656 | 733 |  /* By convention, backchannel calls arrive via rdma_msg type | 
|---|
| .. | .. | 
|---|
| 757 | 834 |  	rqstp->rq_next_page = rqstp->rq_respages; | 
|---|
| 758 | 835 |   | 
|---|
| 759 | 836 |  	p = (__be32 *)rqstp->rq_arg.head[0].iov_base; | 
|---|
| 760 |  | -	ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);  | 
|---|
 | 837 | +	ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);  | 
|---|
| 761 | 838 |  	if (ret < 0) | 
|---|
| 762 | 839 |  		goto out_err; | 
|---|
| 763 | 840 |  	if (ret == 0) | 
|---|
| 764 | 841 |  		goto out_drop; | 
|---|
| 765 | 842 |  	rqstp->rq_xprt_hlen = ret; | 
|---|
| 766 | 843 |   | 
|---|
| 767 |  | -	if (svc_rdma_is_backchannel_reply(xprt, p)) {  | 
|---|
| 768 |  | -		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,  | 
|---|
| 769 |  | -					       &rqstp->rq_arg);  | 
|---|
| 770 |  | -		svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);  | 
|---|
| 771 |  | -		return ret;  | 
|---|
| 772 |  | -	}  | 
|---|
 | 844 | +	if (svc_rdma_is_backchannel_reply(xprt, p))  | 
|---|
 | 845 | +		goto out_backchannel;  | 
|---|
 | 846 | +  | 
|---|
 | 847 | +	svc_rdma_get_inv_rkey(rdma_xprt, ctxt);  | 
|---|
| 773 | 848 |   | 
|---|
| 774 | 849 |  	p += rpcrdma_fixed_maxsz; | 
|---|
| 775 | 850 |  	if (*p != xdr_zero) | 
|---|
| .. | .. | 
|---|
| 788 | 863 |  	return 0; | 
|---|
| 789 | 864 |   | 
|---|
| 790 | 865 |  out_err: | 
|---|
| 791 |  | -	svc_rdma_send_error(rdma_xprt, p, ret);  | 
|---|
 | 866 | +	svc_rdma_send_error(rdma_xprt, ctxt, ret);  | 
|---|
| 792 | 867 |  	svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); | 
|---|
| 793 | 868 |  	return 0; | 
|---|
| 794 | 869 |   | 
|---|
| 795 | 870 |  out_postfail: | 
|---|
| 796 | 871 |  	if (ret == -EINVAL) | 
|---|
| 797 |  | -		svc_rdma_send_error(rdma_xprt, p, ret);  | 
|---|
 | 872 | +		svc_rdma_send_error(rdma_xprt, ctxt, ret);  | 
|---|
| 798 | 873 |  	svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); | 
|---|
| 799 | 874 |  	return ret; | 
|---|
| 800 | 875 |   | 
|---|
 | 876 | +out_backchannel:  | 
|---|
 | 877 | +	svc_rdma_handle_bc_reply(rqstp, ctxt);  | 
|---|
| 801 | 878 |  out_drop: | 
|---|
| 802 | 879 |  	svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); | 
|---|
| 803 | 880 |  	return 0; | 
|---|