.. | .. |
---|
70 | 70 | |
---|
71 | 71 | static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; |
---|
72 | 72 | unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
---|
73 | | -static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
---|
| 73 | +unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
---|
74 | 74 | unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; |
---|
75 | 75 | int xprt_rdma_pad_optimize; |
---|
76 | 76 | |
---|
.. | .. |
---|
80 | 80 | static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; |
---|
81 | 81 | static unsigned int min_inline_size = RPCRDMA_MIN_INLINE; |
---|
82 | 82 | static unsigned int max_inline_size = RPCRDMA_MAX_INLINE; |
---|
83 | | -static unsigned int zero; |
---|
84 | 83 | static unsigned int max_padding = PAGE_SIZE; |
---|
85 | 84 | static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; |
---|
86 | 85 | static unsigned int max_memreg = RPCRDMA_LAST - 1; |
---|
.. | .. |
---|
122 | 121 | .maxlen = sizeof(unsigned int), |
---|
123 | 122 | .mode = 0644, |
---|
124 | 123 | .proc_handler = proc_dointvec_minmax, |
---|
125 | | - .extra1 = &zero, |
---|
| 124 | + .extra1 = SYSCTL_ZERO, |
---|
126 | 125 | .extra2 = &max_padding, |
---|
127 | 126 | }, |
---|
128 | 127 | { |
---|
.. | .. |
---|
225 | 224 | } |
---|
226 | 225 | } |
---|
227 | 226 | |
---|
228 | | -void |
---|
229 | | -rpcrdma_conn_func(struct rpcrdma_ep *ep) |
---|
230 | | -{ |
---|
231 | | - schedule_delayed_work(&ep->rep_connect_worker, 0); |
---|
232 | | -} |
---|
233 | | - |
---|
234 | | -void |
---|
235 | | -rpcrdma_connect_worker(struct work_struct *work) |
---|
236 | | -{ |
---|
237 | | - struct rpcrdma_ep *ep = |
---|
238 | | - container_of(work, struct rpcrdma_ep, rep_connect_worker.work); |
---|
239 | | - struct rpcrdma_xprt *r_xprt = |
---|
240 | | - container_of(ep, struct rpcrdma_xprt, rx_ep); |
---|
241 | | - struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
---|
242 | | - |
---|
243 | | - spin_lock_bh(&xprt->transport_lock); |
---|
244 | | - if (ep->rep_connected > 0) { |
---|
245 | | - if (!xprt_test_and_set_connected(xprt)) { |
---|
246 | | - xprt->stat.connect_count++; |
---|
247 | | - xprt->stat.connect_time += (long)jiffies - |
---|
248 | | - xprt->stat.connect_start; |
---|
249 | | - xprt_wake_pending_tasks(xprt, 0); |
---|
250 | | - } |
---|
251 | | - } else { |
---|
252 | | - if (xprt_test_and_clear_connected(xprt)) |
---|
253 | | - xprt_wake_pending_tasks(xprt, -ENOTCONN); |
---|
254 | | - } |
---|
255 | | - spin_unlock_bh(&xprt->transport_lock); |
---|
256 | | -} |
---|
257 | | - |
---|
| 227 | +/** |
---|
| 228 | + * xprt_rdma_connect_worker - establish connection in the background |
---|
| 229 | + * @work: worker thread context |
---|
| 230 | + * |
---|
| 231 | + * Requester holds the xprt's send lock to prevent activity on this |
---|
| 232 | + * transport while a fresh connection is being established. RPC tasks |
---|
| 233 | + * sleep on the xprt's pending queue waiting for connect to complete. |
---|
| 234 | + */ |
---|
258 | 235 | static void |
---|
259 | 236 | xprt_rdma_connect_worker(struct work_struct *work) |
---|
260 | 237 | { |
---|
261 | 238 | struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, |
---|
262 | 239 | rx_connect_worker.work); |
---|
263 | 240 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
---|
264 | | - int rc = 0; |
---|
| 241 | + int rc; |
---|
265 | 242 | |
---|
266 | | - xprt_clear_connected(xprt); |
---|
267 | | - |
---|
268 | | - rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); |
---|
269 | | - if (rc) |
---|
270 | | - xprt_wake_pending_tasks(xprt, rc); |
---|
271 | | - |
---|
| 243 | + rc = rpcrdma_xprt_connect(r_xprt); |
---|
272 | 244 | xprt_clear_connecting(xprt); |
---|
| 245 | + if (!rc) { |
---|
| 246 | + xprt->connect_cookie++; |
---|
| 247 | + xprt->stat.connect_count++; |
---|
| 248 | + xprt->stat.connect_time += (long)jiffies - |
---|
| 249 | + xprt->stat.connect_start; |
---|
| 250 | + xprt_set_connected(xprt); |
---|
| 251 | + rc = -EAGAIN; |
---|
| 252 | + } else |
---|
| 253 | + rpcrdma_xprt_disconnect(r_xprt); |
---|
| 254 | + xprt_unlock_connect(xprt, r_xprt); |
---|
| 255 | + xprt_wake_pending_tasks(xprt, rc); |
---|
273 | 256 | } |
---|
274 | 257 | |
---|
| 258 | +/** |
---|
| 259 | + * xprt_rdma_inject_disconnect - inject a connection fault |
---|
| 260 | + * @xprt: transport context |
---|
| 261 | + * |
---|
| 262 | + * If @xprt is connected, disconnect it to simulate spurious |
---|
| 263 | + * connection loss. Caller must hold @xprt's send lock to |
---|
| 264 | + * ensure that data structures and hardware resources are |
---|
| 265 | + * stable during the rdma_disconnect() call. |
---|
| 266 | + */ |
---|
275 | 267 | static void |
---|
276 | 268 | xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) |
---|
277 | 269 | { |
---|
278 | | - struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, |
---|
279 | | - rx_xprt); |
---|
| 270 | + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
---|
280 | 271 | |
---|
281 | | - trace_xprtrdma_inject_dsc(r_xprt); |
---|
282 | | - rdma_disconnect(r_xprt->rx_ia.ri_id); |
---|
| 272 | + trace_xprtrdma_op_inject_dsc(r_xprt); |
---|
| 273 | + rdma_disconnect(r_xprt->rx_ep->re_id); |
---|
283 | 274 | } |
---|
284 | 275 | |
---|
285 | | -/* |
---|
286 | | - * xprt_rdma_destroy |
---|
| 276 | +/** |
---|
| 277 | + * xprt_rdma_destroy - Full tear down of transport |
---|
| 278 | + * @xprt: doomed transport context |
---|
287 | 279 | * |
---|
288 | | - * Destroy the xprt. |
---|
289 | | - * Free all memory associated with the object, including its own. |
---|
290 | | - * NOTE: none of the *destroy methods free memory for their top-level |
---|
291 | | - * objects, even though they may have allocated it (they do free |
---|
292 | | - * private memory). It's up to the caller to handle it. In this |
---|
293 | | - * case (RDMA transport), all structure memory is inlined with the |
---|
294 | | - * struct rpcrdma_xprt. |
---|
| 280 | + * Caller guarantees there will be no more calls to us with |
---|
| 281 | + * this @xprt. |
---|
295 | 282 | */ |
---|
296 | 283 | static void |
---|
297 | 284 | xprt_rdma_destroy(struct rpc_xprt *xprt) |
---|
298 | 285 | { |
---|
299 | 286 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
---|
300 | 287 | |
---|
301 | | - trace_xprtrdma_destroy(r_xprt); |
---|
302 | | - |
---|
303 | 288 | cancel_delayed_work_sync(&r_xprt->rx_connect_worker); |
---|
304 | 289 | |
---|
305 | | - xprt_clear_connected(xprt); |
---|
306 | | - |
---|
307 | | - rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); |
---|
| 290 | + rpcrdma_xprt_disconnect(r_xprt); |
---|
308 | 291 | rpcrdma_buffer_destroy(&r_xprt->rx_buf); |
---|
309 | | - rpcrdma_ia_close(&r_xprt->rx_ia); |
---|
310 | 292 | |
---|
311 | 293 | xprt_rdma_free_addresses(xprt); |
---|
312 | 294 | xprt_free(xprt); |
---|
.. | .. |
---|
314 | 296 | module_put(THIS_MODULE); |
---|
315 | 297 | } |
---|
316 | 298 | |
---|
| 299 | +/* 60 second timeout, no retries */ |
---|
317 | 300 | static const struct rpc_timeout xprt_rdma_default_timeout = { |
---|
318 | 301 | .to_initval = 60 * HZ, |
---|
319 | 302 | .to_maxval = 60 * HZ, |
---|
.. | .. |
---|
327 | 310 | static struct rpc_xprt * |
---|
328 | 311 | xprt_setup_rdma(struct xprt_create *args) |
---|
329 | 312 | { |
---|
330 | | - struct rpcrdma_create_data_internal cdata; |
---|
331 | 313 | struct rpc_xprt *xprt; |
---|
332 | 314 | struct rpcrdma_xprt *new_xprt; |
---|
333 | | - struct rpcrdma_ep *new_ep; |
---|
334 | 315 | struct sockaddr *sap; |
---|
335 | 316 | int rc; |
---|
336 | 317 | |
---|
337 | | - if (args->addrlen > sizeof(xprt->addr)) { |
---|
338 | | - dprintk("RPC: %s: address too large\n", __func__); |
---|
| 318 | + if (args->addrlen > sizeof(xprt->addr)) |
---|
339 | 319 | return ERR_PTR(-EBADF); |
---|
340 | | - } |
---|
341 | 320 | |
---|
342 | | - xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); |
---|
343 | | - if (xprt == NULL) { |
---|
344 | | - dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", |
---|
345 | | - __func__); |
---|
| 321 | + if (!try_module_get(THIS_MODULE)) |
---|
| 322 | + return ERR_PTR(-EIO); |
---|
| 323 | + |
---|
| 324 | + xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, |
---|
| 325 | + xprt_rdma_slot_table_entries); |
---|
| 326 | + if (!xprt) { |
---|
| 327 | + module_put(THIS_MODULE); |
---|
346 | 328 | return ERR_PTR(-ENOMEM); |
---|
347 | 329 | } |
---|
348 | 330 | |
---|
349 | | - /* 60 second timeout, no retries */ |
---|
350 | 331 | xprt->timeout = &xprt_rdma_default_timeout; |
---|
| 332 | + xprt->connect_timeout = xprt->timeout->to_initval; |
---|
| 333 | + xprt->max_reconnect_timeout = xprt->timeout->to_maxval; |
---|
351 | 334 | xprt->bind_timeout = RPCRDMA_BIND_TO; |
---|
352 | 335 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
---|
353 | 336 | xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; |
---|
354 | 337 | |
---|
355 | 338 | xprt->resvport = 0; /* privileged port not needed */ |
---|
356 | | - xprt->tsh_size = 0; /* RPC-RDMA handles framing */ |
---|
357 | 339 | xprt->ops = &xprt_rdma_procs; |
---|
358 | 340 | |
---|
359 | 341 | /* |
---|
.. | .. |
---|
371 | 353 | xprt_set_bound(xprt); |
---|
372 | 354 | xprt_rdma_format_addresses(xprt, sap); |
---|
373 | 355 | |
---|
374 | | - cdata.max_requests = xprt_rdma_slot_table_entries; |
---|
375 | | - |
---|
376 | | - cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ |
---|
377 | | - cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ |
---|
378 | | - |
---|
379 | | - cdata.inline_wsize = xprt_rdma_max_inline_write; |
---|
380 | | - if (cdata.inline_wsize > cdata.wsize) |
---|
381 | | - cdata.inline_wsize = cdata.wsize; |
---|
382 | | - |
---|
383 | | - cdata.inline_rsize = xprt_rdma_max_inline_read; |
---|
384 | | - if (cdata.inline_rsize > cdata.rsize) |
---|
385 | | - cdata.inline_rsize = cdata.rsize; |
---|
386 | | - |
---|
387 | | - /* |
---|
388 | | - * Create new transport instance, which includes initialized |
---|
389 | | - * o ia |
---|
390 | | - * o endpoint |
---|
391 | | - * o buffers |
---|
392 | | - */ |
---|
393 | | - |
---|
394 | 356 | new_xprt = rpcx_to_rdmax(xprt); |
---|
395 | | - |
---|
396 | | - rc = rpcrdma_ia_open(new_xprt); |
---|
397 | | - if (rc) |
---|
398 | | - goto out1; |
---|
399 | | - |
---|
400 | | - /* |
---|
401 | | - * initialize and create ep |
---|
402 | | - */ |
---|
403 | | - new_xprt->rx_data = cdata; |
---|
404 | | - new_ep = &new_xprt->rx_ep; |
---|
405 | | - |
---|
406 | | - rc = rpcrdma_ep_create(&new_xprt->rx_ep, |
---|
407 | | - &new_xprt->rx_ia, &new_xprt->rx_data); |
---|
408 | | - if (rc) |
---|
409 | | - goto out2; |
---|
410 | | - |
---|
411 | 357 | rc = rpcrdma_buffer_create(new_xprt); |
---|
412 | | - if (rc) |
---|
413 | | - goto out3; |
---|
| 358 | + if (rc) { |
---|
| 359 | + xprt_rdma_free_addresses(xprt); |
---|
| 360 | + xprt_free(xprt); |
---|
| 361 | + module_put(THIS_MODULE); |
---|
| 362 | + return ERR_PTR(rc); |
---|
| 363 | + } |
---|
414 | 364 | |
---|
415 | 365 | INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, |
---|
416 | 366 | xprt_rdma_connect_worker); |
---|
417 | 367 | |
---|
418 | | - xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); |
---|
419 | | - if (xprt->max_payload == 0) |
---|
420 | | - goto out4; |
---|
421 | | - xprt->max_payload <<= PAGE_SHIFT; |
---|
422 | | - dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", |
---|
423 | | - __func__, xprt->max_payload); |
---|
| 368 | + xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; |
---|
424 | 369 | |
---|
425 | | - if (!try_module_get(THIS_MODULE)) |
---|
426 | | - goto out4; |
---|
427 | | - |
---|
428 | | - dprintk("RPC: %s: %s:%s\n", __func__, |
---|
429 | | - xprt->address_strings[RPC_DISPLAY_ADDR], |
---|
430 | | - xprt->address_strings[RPC_DISPLAY_PORT]); |
---|
431 | | - trace_xprtrdma_create(new_xprt); |
---|
432 | 370 | return xprt; |
---|
433 | | - |
---|
434 | | -out4: |
---|
435 | | - rpcrdma_buffer_destroy(&new_xprt->rx_buf); |
---|
436 | | - rc = -ENODEV; |
---|
437 | | -out3: |
---|
438 | | - rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); |
---|
439 | | -out2: |
---|
440 | | - rpcrdma_ia_close(&new_xprt->rx_ia); |
---|
441 | | -out1: |
---|
442 | | - trace_xprtrdma_destroy(new_xprt); |
---|
443 | | - xprt_rdma_free_addresses(xprt); |
---|
444 | | - xprt_free(xprt); |
---|
445 | | - return ERR_PTR(rc); |
---|
446 | 371 | } |
---|
447 | 372 | |
---|
448 | 373 | /** |
---|
449 | | - * xprt_rdma_close - Close down RDMA connection |
---|
450 | | - * @xprt: generic transport to be closed |
---|
| 374 | + * xprt_rdma_close - close a transport connection |
---|
| 375 | + * @xprt: transport context |
---|
451 | 376 | * |
---|
452 | | - * Called during transport shutdown reconnect, or device |
---|
453 | | - * removal. Caller holds the transport's write lock. |
---|
| 377 | + * Called during autoclose or device removal. |
---|
| 378 | + * |
---|
| 379 | + * Caller holds @xprt's send lock to prevent activity on this |
---|
| 380 | + * transport while the connection is torn down. |
---|
454 | 381 | */ |
---|
455 | | -static void |
---|
456 | | -xprt_rdma_close(struct rpc_xprt *xprt) |
---|
| 382 | +void xprt_rdma_close(struct rpc_xprt *xprt) |
---|
457 | 383 | { |
---|
458 | 384 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
---|
459 | | - struct rpcrdma_ep *ep = &r_xprt->rx_ep; |
---|
460 | | - struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
---|
461 | 385 | |
---|
462 | | - dprintk("RPC: %s: closing xprt %p\n", __func__, xprt); |
---|
| 386 | + rpcrdma_xprt_disconnect(r_xprt); |
---|
463 | 387 | |
---|
464 | | - if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { |
---|
465 | | - xprt_clear_connected(xprt); |
---|
466 | | - rpcrdma_ia_remove(ia); |
---|
467 | | - return; |
---|
468 | | - } |
---|
469 | | - if (ep->rep_connected == -ENODEV) |
---|
470 | | - return; |
---|
471 | | - if (ep->rep_connected > 0) |
---|
472 | | - xprt->reestablish_timeout = 0; |
---|
| 388 | + xprt->reestablish_timeout = 0; |
---|
| 389 | + ++xprt->connect_cookie; |
---|
473 | 390 | xprt_disconnect_done(xprt); |
---|
474 | | - rpcrdma_ep_disconnect(ep, ia); |
---|
475 | | - |
---|
476 | | - /* Prepare @xprt for the next connection by reinitializing |
---|
477 | | - * its credit grant to one (see RFC 8166, Section 3.3.3). |
---|
478 | | - */ |
---|
479 | | - r_xprt->rx_buf.rb_credits = 1; |
---|
480 | | - xprt->cwnd = RPC_CWNDSHIFT; |
---|
481 | 391 | } |
---|
482 | 392 | |
---|
483 | 393 | /** |
---|
.. | .. |
---|
492 | 402 | { |
---|
493 | 403 | struct sockaddr *sap = (struct sockaddr *)&xprt->addr; |
---|
494 | 404 | char buf[8]; |
---|
495 | | - |
---|
496 | | - dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n", |
---|
497 | | - __func__, xprt, |
---|
498 | | - xprt->address_strings[RPC_DISPLAY_ADDR], |
---|
499 | | - xprt->address_strings[RPC_DISPLAY_PORT], |
---|
500 | | - port); |
---|
501 | 405 | |
---|
502 | 406 | rpc_set_port(sap, port); |
---|
503 | 407 | |
---|
.. | .. |
---|
529 | 433 | xprt_force_disconnect(xprt); |
---|
530 | 434 | } |
---|
531 | 435 | |
---|
| 436 | +/** |
---|
| 437 | + * xprt_rdma_set_connect_timeout - set timeouts for establishing a connection |
---|
| 438 | + * @xprt: controlling transport instance |
---|
| 439 | + * @connect_timeout: reconnect timeout after client disconnects |
---|
| 440 | + * @reconnect_timeout: reconnect timeout after server disconnects |
---|
| 441 | + * |
---|
| 442 | + */ |
---|
| 443 | +static void xprt_rdma_set_connect_timeout(struct rpc_xprt *xprt, |
---|
| 444 | + unsigned long connect_timeout, |
---|
| 445 | + unsigned long reconnect_timeout) |
---|
| 446 | +{ |
---|
| 447 | + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
---|
| 448 | + |
---|
| 449 | + trace_xprtrdma_op_set_cto(r_xprt, connect_timeout, reconnect_timeout); |
---|
| 450 | + |
---|
| 451 | + spin_lock(&xprt->transport_lock); |
---|
| 452 | + |
---|
| 453 | + if (connect_timeout < xprt->connect_timeout) { |
---|
| 454 | + struct rpc_timeout to; |
---|
| 455 | + unsigned long initval; |
---|
| 456 | + |
---|
| 457 | + to = *xprt->timeout; |
---|
| 458 | + initval = connect_timeout; |
---|
| 459 | + if (initval < RPCRDMA_INIT_REEST_TO << 1) |
---|
| 460 | + initval = RPCRDMA_INIT_REEST_TO << 1; |
---|
| 461 | + to.to_initval = initval; |
---|
| 462 | + to.to_maxval = initval; |
---|
| 463 | + r_xprt->rx_timeout = to; |
---|
| 464 | + xprt->timeout = &r_xprt->rx_timeout; |
---|
| 465 | + xprt->connect_timeout = connect_timeout; |
---|
| 466 | + } |
---|
| 467 | + |
---|
| 468 | + if (reconnect_timeout < xprt->max_reconnect_timeout) |
---|
| 469 | + xprt->max_reconnect_timeout = reconnect_timeout; |
---|
| 470 | + |
---|
| 471 | + spin_unlock(&xprt->transport_lock); |
---|
| 472 | +} |
---|
| 473 | + |
---|
| 474 | +/** |
---|
| 475 | + * xprt_rdma_connect - schedule an attempt to reconnect |
---|
| 476 | + * @xprt: transport state |
---|
| 477 | + * @task: RPC scheduler context (unused) |
---|
| 478 | + * |
---|
| 479 | + */ |
---|
532 | 480 | static void |
---|
533 | 481 | xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) |
---|
534 | 482 | { |
---|
535 | 483 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
---|
| 484 | + struct rpcrdma_ep *ep = r_xprt->rx_ep; |
---|
| 485 | + unsigned long delay; |
---|
536 | 486 | |
---|
537 | | - if (r_xprt->rx_ep.rep_connected != 0) { |
---|
538 | | - /* Reconnect */ |
---|
539 | | - schedule_delayed_work(&r_xprt->rx_connect_worker, |
---|
540 | | - xprt->reestablish_timeout); |
---|
541 | | - xprt->reestablish_timeout <<= 1; |
---|
542 | | - if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) |
---|
543 | | - xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; |
---|
544 | | - else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) |
---|
545 | | - xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
---|
546 | | - } else { |
---|
547 | | - schedule_delayed_work(&r_xprt->rx_connect_worker, 0); |
---|
548 | | - if (!RPC_IS_ASYNC(task)) |
---|
549 | | - flush_delayed_work(&r_xprt->rx_connect_worker); |
---|
| 487 | + WARN_ON_ONCE(!xprt_lock_connect(xprt, task, r_xprt)); |
---|
| 488 | + |
---|
| 489 | + delay = 0; |
---|
| 490 | + if (ep && ep->re_connect_status != 0) { |
---|
| 491 | + delay = xprt_reconnect_delay(xprt); |
---|
| 492 | + xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); |
---|
550 | 493 | } |
---|
| 494 | + trace_xprtrdma_op_connect(r_xprt, delay); |
---|
| 495 | + queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker, |
---|
| 496 | + delay); |
---|
551 | 497 | } |
---|
552 | 498 | |
---|
553 | 499 | /** |
---|
.. | .. |
---|
573 | 519 | return; |
---|
574 | 520 | |
---|
575 | 521 | out_sleep: |
---|
576 | | - rpc_sleep_on(&xprt->backlog, task, NULL); |
---|
577 | | - task->tk_status = -EAGAIN; |
---|
| 522 | + task->tk_status = -ENOMEM; |
---|
| 523 | + xprt_add_backlog(xprt, task); |
---|
578 | 524 | } |
---|
579 | 525 | |
---|
580 | 526 | /** |
---|
.. | .. |
---|
586 | 532 | static void |
---|
587 | 533 | xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) |
---|
588 | 534 | { |
---|
589 | | - memset(rqst, 0, sizeof(*rqst)); |
---|
590 | | - rpcrdma_buffer_put(rpcr_to_rdmar(rqst)); |
---|
591 | | - rpc_wake_up_next(&xprt->backlog); |
---|
| 535 | + struct rpcrdma_xprt *r_xprt = |
---|
| 536 | + container_of(xprt, struct rpcrdma_xprt, rx_xprt); |
---|
| 537 | + |
---|
| 538 | + rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); |
---|
| 539 | + if (!xprt_wake_up_backlog(xprt, rqst)) { |
---|
| 540 | + memset(rqst, 0, sizeof(*rqst)); |
---|
| 541 | + rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); |
---|
| 542 | + } |
---|
592 | 543 | } |
---|
593 | 544 | |
---|
594 | | -static bool |
---|
595 | | -rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, |
---|
596 | | - size_t size, gfp_t flags) |
---|
| 545 | +static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt, |
---|
| 546 | + struct rpcrdma_regbuf *rb, size_t size, |
---|
| 547 | + gfp_t flags) |
---|
597 | 548 | { |
---|
598 | | - struct rpcrdma_regbuf *rb; |
---|
599 | | - |
---|
600 | | - if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size) |
---|
601 | | - return true; |
---|
602 | | - |
---|
603 | | - rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); |
---|
604 | | - if (IS_ERR(rb)) |
---|
605 | | - return false; |
---|
606 | | - |
---|
607 | | - rpcrdma_free_regbuf(req->rl_sendbuf); |
---|
608 | | - r_xprt->rx_stats.hardway_register_count += size; |
---|
609 | | - req->rl_sendbuf = rb; |
---|
610 | | - return true; |
---|
611 | | -} |
---|
612 | | - |
---|
613 | | -/* The rq_rcv_buf is used only if a Reply chunk is necessary. |
---|
614 | | - * The decision to use a Reply chunk is made later in |
---|
615 | | - * rpcrdma_marshal_req. This buffer is registered at that time. |
---|
616 | | - * |
---|
617 | | - * Otherwise, the associated RPC Reply arrives in a separate |
---|
618 | | - * Receive buffer, arbitrarily chosen by the HCA. The buffer |
---|
619 | | - * allocated here for the RPC Reply is not utilized in that |
---|
620 | | - * case. See rpcrdma_inline_fixup. |
---|
621 | | - * |
---|
622 | | - * A regbuf is used here to remember the buffer size. |
---|
623 | | - */ |
---|
624 | | -static bool |
---|
625 | | -rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, |
---|
626 | | - size_t size, gfp_t flags) |
---|
627 | | -{ |
---|
628 | | - struct rpcrdma_regbuf *rb; |
---|
629 | | - |
---|
630 | | - if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size) |
---|
631 | | - return true; |
---|
632 | | - |
---|
633 | | - rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags); |
---|
634 | | - if (IS_ERR(rb)) |
---|
635 | | - return false; |
---|
636 | | - |
---|
637 | | - rpcrdma_free_regbuf(req->rl_recvbuf); |
---|
638 | | - r_xprt->rx_stats.hardway_register_count += size; |
---|
639 | | - req->rl_recvbuf = rb; |
---|
| 549 | + if (unlikely(rdmab_length(rb) < size)) { |
---|
| 550 | + if (!rpcrdma_regbuf_realloc(rb, size, flags)) |
---|
| 551 | + return false; |
---|
| 552 | + r_xprt->rx_stats.hardway_register_count += size; |
---|
| 553 | + } |
---|
640 | 554 | return true; |
---|
641 | 555 | } |
---|
642 | 556 | |
---|
.. | .. |
---|
648 | 562 | * 0: Success; rq_buffer points to RPC buffer to use |
---|
649 | 563 | * ENOMEM: Out of memory, call again later |
---|
650 | 564 | * EIO: A permanent error occurred, do not retry |
---|
651 | | - * |
---|
652 | | - * The RDMA allocate/free functions need the task structure as a place |
---|
653 | | - * to hide the struct rpcrdma_req, which is necessary for the actual |
---|
654 | | - * send/recv sequence. |
---|
655 | | - * |
---|
656 | | - * xprt_rdma_allocate provides buffers that are already mapped for |
---|
657 | | - * DMA, and a local DMA lkey is provided for each. |
---|
658 | 565 | */ |
---|
659 | 566 | static int |
---|
660 | 567 | xprt_rdma_allocate(struct rpc_task *task) |
---|
.. | .. |
---|
665 | 572 | gfp_t flags; |
---|
666 | 573 | |
---|
667 | 574 | flags = RPCRDMA_DEF_GFP; |
---|
| 575 | + if (RPC_IS_ASYNC(task)) |
---|
| 576 | + flags = GFP_NOWAIT | __GFP_NOWARN; |
---|
668 | 577 | if (RPC_IS_SWAPPER(task)) |
---|
669 | | - flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; |
---|
| 578 | + flags |= __GFP_MEMALLOC; |
---|
670 | 579 | |
---|
671 | | - if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags)) |
---|
| 580 | + if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, |
---|
| 581 | + flags)) |
---|
672 | 582 | goto out_fail; |
---|
673 | | - if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) |
---|
| 583 | + if (!rpcrdma_check_regbuf(r_xprt, req->rl_recvbuf, rqst->rq_rcvsize, |
---|
| 584 | + flags)) |
---|
674 | 585 | goto out_fail; |
---|
675 | 586 | |
---|
676 | | - rqst->rq_buffer = req->rl_sendbuf->rg_base; |
---|
677 | | - rqst->rq_rbuffer = req->rl_recvbuf->rg_base; |
---|
678 | | - trace_xprtrdma_allocate(task, req); |
---|
| 587 | + rqst->rq_buffer = rdmab_data(req->rl_sendbuf); |
---|
| 588 | + rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf); |
---|
679 | 589 | return 0; |
---|
680 | 590 | |
---|
681 | 591 | out_fail: |
---|
682 | | - trace_xprtrdma_allocate(task, NULL); |
---|
683 | 592 | return -ENOMEM; |
---|
684 | 593 | } |
---|
685 | 594 | |
---|
.. | .. |
---|
696 | 605 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
---|
697 | 606 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
---|
698 | 607 | |
---|
699 | | - if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) |
---|
700 | | - rpcrdma_release_rqst(r_xprt, req); |
---|
701 | | - trace_xprtrdma_rpc_done(task, req); |
---|
| 608 | + if (!list_empty(&req->rl_registered)) |
---|
| 609 | + frwr_unmap_sync(r_xprt, req); |
---|
| 610 | + |
---|
| 611 | + /* XXX: If the RPC is completing because of a signal and |
---|
| 612 | + * not because a reply was received, we ought to ensure |
---|
| 613 | + * that the Send completion has fired, so that memory |
---|
| 614 | + * involved with the Send is not still visible to the NIC. |
---|
| 615 | + */ |
---|
702 | 616 | } |
---|
703 | 617 | |
---|
704 | 618 | /** |
---|
705 | 619 | * xprt_rdma_send_request - marshal and send an RPC request |
---|
706 | | - * @task: RPC task with an RPC message in rq_snd_buf |
---|
| 620 | + * @rqst: RPC message in rq_snd_buf |
---|
707 | 621 | * |
---|
708 | 622 | * Caller holds the transport's write lock. |
---|
709 | 623 | * |
---|
.. | .. |
---|
712 | 626 | * %-ENOTCONN if the caller should reconnect and call again |
---|
713 | 627 | * %-EAGAIN if the caller should call again |
---|
714 | 628 | * %-ENOBUFS if the caller should call again after a delay |
---|
715 | | - * %-EIO if a permanent error occurred and the request was not |
---|
716 | | - * sent. Do not try to send this message again. |
---|
| 629 | + * %-EMSGSIZE if encoding ran out of buffer space. The request |
---|
| 630 | + * was not sent. Do not try to send this message again. |
---|
| 631 | + * %-EIO if an I/O error occurred. The request was not sent. |
---|
| 632 | + * Do not try to send this message again. |
---|
717 | 633 | */ |
---|
718 | 634 | static int |
---|
719 | | -xprt_rdma_send_request(struct rpc_task *task) |
---|
| 635 | +xprt_rdma_send_request(struct rpc_rqst *rqst) |
---|
720 | 636 | { |
---|
721 | | - struct rpc_rqst *rqst = task->tk_rqstp; |
---|
722 | 637 | struct rpc_xprt *xprt = rqst->rq_xprt; |
---|
723 | 638 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
---|
724 | 639 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
---|
.. | .. |
---|
730 | 645 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
---|
731 | 646 | |
---|
732 | 647 | if (!xprt_connected(xprt)) |
---|
733 | | - goto drop_connection; |
---|
| 648 | + return -ENOTCONN; |
---|
| 649 | + |
---|
| 650 | + if (!xprt_request_get_cong(xprt, rqst)) |
---|
| 651 | + return -EBADSLT; |
---|
734 | 652 | |
---|
735 | 653 | rc = rpcrdma_marshal_req(r_xprt, rqst); |
---|
736 | 654 | if (rc < 0) |
---|
.. | .. |
---|
741 | 659 | goto drop_connection; |
---|
742 | 660 | rqst->rq_xtime = ktime_get(); |
---|
743 | 661 | |
---|
744 | | - __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); |
---|
745 | | - if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) |
---|
| 662 | + if (rpcrdma_post_sends(r_xprt, req)) |
---|
746 | 663 | goto drop_connection; |
---|
747 | 664 | |
---|
748 | 665 | rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; |
---|
749 | | - rqst->rq_bytes_sent = 0; |
---|
750 | 666 | |
---|
751 | 667 | /* An RPC with no reply will throw off credit accounting, |
---|
752 | 668 | * so drop the connection to reset the credit grant. |
---|
753 | 669 | */ |
---|
754 | | - if (!rpc_reply_expected(task)) |
---|
| 670 | + if (!rpc_reply_expected(rqst->rq_task)) |
---|
755 | 671 | goto drop_connection; |
---|
756 | 672 | return 0; |
---|
757 | 673 | |
---|
.. | .. |
---|
759 | 675 | if (rc != -ENOTCONN) |
---|
760 | 676 | return rc; |
---|
761 | 677 | drop_connection: |
---|
762 | | - xprt_disconnect_done(xprt); |
---|
763 | | - return -ENOTCONN; /* implies disconnect */ |
---|
| 678 | + xprt_rdma_close(xprt); |
---|
| 679 | + return -ENOTCONN; |
---|
764 | 680 | } |
---|
765 | 681 | |
---|
766 | 682 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
---|
.. | .. |
---|
776 | 692 | 0, /* need a local port? */ |
---|
777 | 693 | xprt->stat.bind_count, |
---|
778 | 694 | xprt->stat.connect_count, |
---|
779 | | - xprt->stat.connect_time, |
---|
| 695 | + xprt->stat.connect_time / HZ, |
---|
780 | 696 | idle_time, |
---|
781 | 697 | xprt->stat.sends, |
---|
782 | 698 | xprt->stat.recvs, |
---|
.. | .. |
---|
796 | 712 | r_xprt->rx_stats.bad_reply_count, |
---|
797 | 713 | r_xprt->rx_stats.nomsg_call_count); |
---|
798 | 714 | seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n", |
---|
799 | | - r_xprt->rx_stats.mrs_recovered, |
---|
| 715 | + r_xprt->rx_stats.mrs_recycled, |
---|
800 | 716 | r_xprt->rx_stats.mrs_orphaned, |
---|
801 | 717 | r_xprt->rx_stats.mrs_allocated, |
---|
802 | 718 | r_xprt->rx_stats.local_inv_needed, |
---|
.. | .. |
---|
825 | 741 | .alloc_slot = xprt_rdma_alloc_slot, |
---|
826 | 742 | .free_slot = xprt_rdma_free_slot, |
---|
827 | 743 | .release_request = xprt_release_rqst_cong, /* ditto */ |
---|
828 | | - .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ |
---|
| 744 | + .wait_for_reply_request = xprt_wait_for_reply_request_def, /* ditto */ |
---|
829 | 745 | .timer = xprt_rdma_timer, |
---|
830 | 746 | .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ |
---|
831 | 747 | .set_port = xprt_rdma_set_port, |
---|
.. | .. |
---|
835 | 751 | .send_request = xprt_rdma_send_request, |
---|
836 | 752 | .close = xprt_rdma_close, |
---|
837 | 753 | .destroy = xprt_rdma_destroy, |
---|
| 754 | + .set_connect_timeout = xprt_rdma_set_connect_timeout, |
---|
838 | 755 | .print_stats = xprt_rdma_print_stats, |
---|
839 | 756 | .enable_swap = xprt_rdma_enable_swap, |
---|
840 | 757 | .disable_swap = xprt_rdma_disable_swap, |
---|
841 | 758 | .inject_disconnect = xprt_rdma_inject_disconnect, |
---|
842 | 759 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
---|
843 | 760 | .bc_setup = xprt_rdma_bc_setup, |
---|
844 | | - .bc_up = xprt_rdma_bc_up, |
---|
845 | 761 | .bc_maxpayload = xprt_rdma_bc_maxpayload, |
---|
| 762 | + .bc_num_slots = xprt_rdma_bc_max_slots, |
---|
846 | 763 | .bc_free_rqst = xprt_rdma_bc_free_rqst, |
---|
847 | 764 | .bc_destroy = xprt_rdma_bc_destroy, |
---|
848 | 765 | #endif |
---|
.. | .. |
---|
859 | 776 | |
---|
860 | 777 | void xprt_rdma_cleanup(void) |
---|
861 | 778 | { |
---|
862 | | - int rc; |
---|
863 | | - |
---|
864 | | - dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); |
---|
865 | 779 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
---|
866 | 780 | if (sunrpc_table_header) { |
---|
867 | 781 | unregister_sysctl_table(sunrpc_table_header); |
---|
868 | 782 | sunrpc_table_header = NULL; |
---|
869 | 783 | } |
---|
870 | 784 | #endif |
---|
871 | | - rc = xprt_unregister_transport(&xprt_rdma); |
---|
872 | | - if (rc) |
---|
873 | | - dprintk("RPC: %s: xprt_unregister returned %i\n", |
---|
874 | | - __func__, rc); |
---|
875 | 785 | |
---|
876 | | - rpcrdma_destroy_wq(); |
---|
877 | | - |
---|
878 | | - rc = xprt_unregister_transport(&xprt_rdma_bc); |
---|
879 | | - if (rc) |
---|
880 | | - dprintk("RPC: %s: xprt_unregister(bc) returned %i\n", |
---|
881 | | - __func__, rc); |
---|
| 786 | + xprt_unregister_transport(&xprt_rdma); |
---|
| 787 | + xprt_unregister_transport(&xprt_rdma_bc); |
---|
882 | 788 | } |
---|
883 | 789 | |
---|
884 | 790 | int xprt_rdma_init(void) |
---|
885 | 791 | { |
---|
886 | 792 | int rc; |
---|
887 | 793 | |
---|
888 | | - rc = rpcrdma_alloc_wq(); |
---|
| 794 | + rc = xprt_register_transport(&xprt_rdma); |
---|
889 | 795 | if (rc) |
---|
890 | 796 | return rc; |
---|
891 | | - |
---|
892 | | - rc = xprt_register_transport(&xprt_rdma); |
---|
893 | | - if (rc) { |
---|
894 | | - rpcrdma_destroy_wq(); |
---|
895 | | - return rc; |
---|
896 | | - } |
---|
897 | 797 | |
---|
898 | 798 | rc = xprt_register_transport(&xprt_rdma_bc); |
---|
899 | 799 | if (rc) { |
---|
900 | 800 | xprt_unregister_transport(&xprt_rdma); |
---|
901 | | - rpcrdma_destroy_wq(); |
---|
902 | 801 | return rc; |
---|
903 | 802 | } |
---|
904 | | - |
---|
905 | | - dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); |
---|
906 | | - |
---|
907 | | - dprintk("Defaults:\n"); |
---|
908 | | - dprintk("\tSlots %d\n" |
---|
909 | | - "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", |
---|
910 | | - xprt_rdma_slot_table_entries, |
---|
911 | | - xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); |
---|
912 | | - dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy); |
---|
913 | 803 | |
---|
914 | 804 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
---|
915 | 805 | if (!sunrpc_table_header) |
---|