forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-10 cde9070d9970eef1f7ec2360586c802a16230ad8
kernel/drivers/infiniband/core/ucma.c
....@@ -52,6 +52,9 @@
5252 #include <rdma/rdma_cm_ib.h>
5353 #include <rdma/ib_addr.h>
5454 #include <rdma/ib.h>
55
+#include <rdma/ib_cm.h>
56
+#include <rdma/rdma_netlink.h>
57
+#include "core_priv.h"
5558
5659 MODULE_AUTHOR("Sean Hefty");
5760 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
....@@ -77,15 +80,14 @@
7780 struct list_head ctx_list;
7881 struct list_head event_list;
7982 wait_queue_head_t poll_wait;
80
- struct workqueue_struct *close_wq;
8183 };
8284
8385 struct ucma_context {
84
- int id;
86
+ u32 id;
8587 struct completion comp;
86
- atomic_t ref;
88
+ refcount_t ref;
8789 int events_reported;
88
- int backlog;
90
+ atomic_t backlog;
8991
9092 struct ucma_file *file;
9193 struct rdma_cm_id *cm_id;
....@@ -94,18 +96,12 @@
9496
9597 struct list_head list;
9698 struct list_head mc_list;
97
- /* mark that device is in process of destroying the internal HW
98
- * resources, protected by the global mut
99
- */
100
- int closing;
101
- /* sync between removal event and id destroy, protected by file mut */
102
- int destroying;
10399 struct work_struct close_work;
104100 };
105101
106102 struct ucma_multicast {
107103 struct ucma_context *ctx;
108
- int id;
104
+ u32 id;
109105 int events_reported;
110106
111107 u64 uid;
....@@ -116,28 +112,27 @@
116112
117113 struct ucma_event {
118114 struct ucma_context *ctx;
115
+ struct ucma_context *conn_req_ctx;
119116 struct ucma_multicast *mc;
120117 struct list_head list;
121
- struct rdma_cm_id *cm_id;
122118 struct rdma_ucm_event_resp resp;
123
- struct work_struct close_work;
124119 };
125120
126
-static DEFINE_MUTEX(mut);
127
-static DEFINE_IDR(ctx_idr);
128
-static DEFINE_IDR(multicast_idr);
121
+static DEFINE_XARRAY_ALLOC(ctx_table);
122
+static DEFINE_XARRAY_ALLOC(multicast_table);
129123
130124 static const struct file_operations ucma_fops;
125
+static int ucma_destroy_private_ctx(struct ucma_context *ctx);
131126
132127 static inline struct ucma_context *_ucma_find_context(int id,
133128 struct ucma_file *file)
134129 {
135130 struct ucma_context *ctx;
136131
137
- ctx = idr_find(&ctx_idr, id);
132
+ ctx = xa_load(&ctx_table, id);
138133 if (!ctx)
139134 ctx = ERR_PTR(-ENOENT);
140
- else if (ctx->file != file || !ctx->cm_id)
135
+ else if (ctx->file != file)
141136 ctx = ERR_PTR(-EINVAL);
142137 return ctx;
143138 }
....@@ -146,21 +141,18 @@
146141 {
147142 struct ucma_context *ctx;
148143
149
- mutex_lock(&mut);
144
+ xa_lock(&ctx_table);
150145 ctx = _ucma_find_context(id, file);
151
- if (!IS_ERR(ctx)) {
152
- if (ctx->closing)
153
- ctx = ERR_PTR(-EIO);
154
- else
155
- atomic_inc(&ctx->ref);
156
- }
157
- mutex_unlock(&mut);
146
+ if (!IS_ERR(ctx))
147
+ if (!refcount_inc_not_zero(&ctx->ref))
148
+ ctx = ERR_PTR(-ENXIO);
149
+ xa_unlock(&ctx_table);
158150 return ctx;
159151 }
160152
161153 static void ucma_put_ctx(struct ucma_context *ctx)
162154 {
163
- if (atomic_dec_and_test(&ctx->ref))
155
+ if (refcount_dec_and_test(&ctx->ref))
164156 complete(&ctx->comp);
165157 }
166158
....@@ -181,26 +173,21 @@
181173 return ctx;
182174 }
183175
184
-static void ucma_close_event_id(struct work_struct *work)
185
-{
186
- struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work);
187
-
188
- rdma_destroy_id(uevent_close->cm_id);
189
- kfree(uevent_close);
190
-}
191
-
192176 static void ucma_close_id(struct work_struct *work)
193177 {
194178 struct ucma_context *ctx = container_of(work, struct ucma_context, close_work);
195179
196180 /* once all inflight tasks are finished, we close all underlying
197181 * resources. The context is still alive till its explicit destryoing
198
- * by its creator.
182
+ * by its creator. This puts back the xarray's reference.
199183 */
200184 ucma_put_ctx(ctx);
201185 wait_for_completion(&ctx->comp);
202186 /* No new events will be generated after destroying the id. */
203187 rdma_destroy_id(ctx->cm_id);
188
+
189
+ /* Reading the cm_id without holding a positive ref is not allowed */
190
+ ctx->cm_id = NULL;
204191 }
205192
206193 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
....@@ -212,47 +199,32 @@
212199 return NULL;
213200
214201 INIT_WORK(&ctx->close_work, ucma_close_id);
215
- atomic_set(&ctx->ref, 1);
216202 init_completion(&ctx->comp);
217203 INIT_LIST_HEAD(&ctx->mc_list);
204
+ /* So list_del() will work if we don't do ucma_finish_ctx() */
205
+ INIT_LIST_HEAD(&ctx->list);
218206 ctx->file = file;
219207 mutex_init(&ctx->mutex);
220208
221
- mutex_lock(&mut);
222
- ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL);
223
- mutex_unlock(&mut);
224
- if (ctx->id < 0)
225
- goto error;
226
-
227
- list_add_tail(&ctx->list, &file->ctx_list);
209
+ if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) {
210
+ kfree(ctx);
211
+ return NULL;
212
+ }
228213 return ctx;
229
-
230
-error:
231
- kfree(ctx);
232
- return NULL;
233214 }
234215
235
-static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
216
+static void ucma_set_ctx_cm_id(struct ucma_context *ctx,
217
+ struct rdma_cm_id *cm_id)
236218 {
237
- struct ucma_multicast *mc;
219
+ refcount_set(&ctx->ref, 1);
220
+ ctx->cm_id = cm_id;
221
+}
238222
239
- mc = kzalloc(sizeof(*mc), GFP_KERNEL);
240
- if (!mc)
241
- return NULL;
242
-
243
- mutex_lock(&mut);
244
- mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL);
245
- mutex_unlock(&mut);
246
- if (mc->id < 0)
247
- goto error;
248
-
249
- mc->ctx = ctx;
250
- list_add_tail(&mc->list, &ctx->mc_list);
251
- return mc;
252
-
253
-error:
254
- kfree(mc);
255
- return NULL;
223
+static void ucma_finish_ctx(struct ucma_context *ctx)
224
+{
225
+ lockdep_assert_held(&ctx->file->mut);
226
+ list_add_tail(&ctx->list, &ctx->file->ctx_list);
227
+ xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL);
256228 }
257229
258230 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
....@@ -284,10 +256,15 @@
284256 dst->qkey = src->qkey;
285257 }
286258
287
-static void ucma_set_event_context(struct ucma_context *ctx,
288
- struct rdma_cm_event *event,
289
- struct ucma_event *uevent)
259
+static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
260
+ struct rdma_cm_event *event)
290261 {
262
+ struct ucma_event *uevent;
263
+
264
+ uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
265
+ if (!uevent)
266
+ return NULL;
267
+
291268 uevent->ctx = ctx;
292269 switch (event->event) {
293270 case RDMA_CM_EVENT_MULTICAST_JOIN:
....@@ -302,44 +279,55 @@
302279 uevent->resp.id = ctx->id;
303280 break;
304281 }
282
+ uevent->resp.event = event->event;
283
+ uevent->resp.status = event->status;
284
+ if (ctx->cm_id->qp_type == IB_QPT_UD)
285
+ ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud,
286
+ &event->param.ud);
287
+ else
288
+ ucma_copy_conn_event(&uevent->resp.param.conn,
289
+ &event->param.conn);
290
+
291
+ uevent->resp.ece.vendor_id = event->ece.vendor_id;
292
+ uevent->resp.ece.attr_mod = event->ece.attr_mod;
293
+ return uevent;
305294 }
306295
307
-/* Called with file->mut locked for the relevant context. */
308
-static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
296
+static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
297
+ struct rdma_cm_event *event)
309298 {
310
- struct ucma_context *ctx = cm_id->context;
311
- struct ucma_event *con_req_eve;
312
- int event_found = 0;
299
+ struct ucma_context *listen_ctx = cm_id->context;
300
+ struct ucma_context *ctx;
301
+ struct ucma_event *uevent;
313302
314
- if (ctx->destroying)
315
- return;
303
+ if (!atomic_add_unless(&listen_ctx->backlog, -1, 0))
304
+ return -ENOMEM;
305
+ ctx = ucma_alloc_ctx(listen_ctx->file);
306
+ if (!ctx)
307
+ goto err_backlog;
308
+ ucma_set_ctx_cm_id(ctx, cm_id);
316309
317
- /* only if context is pointing to cm_id that it owns it and can be
318
- * queued to be closed, otherwise that cm_id is an inflight one that
319
- * is part of that context event list pending to be detached and
320
- * reattached to its new context as part of ucma_get_event,
321
- * handled separately below.
322
- */
323
- if (ctx->cm_id == cm_id) {
324
- mutex_lock(&mut);
325
- ctx->closing = 1;
326
- mutex_unlock(&mut);
327
- queue_work(ctx->file->close_wq, &ctx->close_work);
328
- return;
329
- }
310
+ uevent = ucma_create_uevent(listen_ctx, event);
311
+ if (!uevent)
312
+ goto err_alloc;
313
+ uevent->conn_req_ctx = ctx;
314
+ uevent->resp.id = ctx->id;
330315
331
- list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
332
- if (con_req_eve->cm_id == cm_id &&
333
- con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
334
- list_del(&con_req_eve->list);
335
- INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
336
- queue_work(ctx->file->close_wq, &con_req_eve->close_work);
337
- event_found = 1;
338
- break;
339
- }
340
- }
341
- if (!event_found)
342
- pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
316
+ ctx->cm_id->context = ctx;
317
+
318
+ mutex_lock(&ctx->file->mut);
319
+ ucma_finish_ctx(ctx);
320
+ list_add_tail(&uevent->list, &ctx->file->event_list);
321
+ mutex_unlock(&ctx->file->mut);
322
+ wake_up_interruptible(&ctx->file->poll_wait);
323
+ return 0;
324
+
325
+err_alloc:
326
+ ucma_destroy_private_ctx(ctx);
327
+err_backlog:
328
+ atomic_inc(&listen_ctx->backlog);
329
+ /* Returning error causes the new ID to be destroyed */
330
+ return -ENOMEM;
343331 }
344332
345333 static int ucma_event_handler(struct rdma_cm_id *cm_id,
....@@ -347,69 +335,49 @@
347335 {
348336 struct ucma_event *uevent;
349337 struct ucma_context *ctx = cm_id->context;
350
- int ret = 0;
351338
352
- uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
353
- if (!uevent)
354
- return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
339
+ if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
340
+ return ucma_connect_event_handler(cm_id, event);
355341
356
- mutex_lock(&ctx->file->mut);
357
- uevent->cm_id = cm_id;
358
- ucma_set_event_context(ctx, event, uevent);
359
- uevent->resp.event = event->event;
360
- uevent->resp.status = event->status;
361
- if (cm_id->qp_type == IB_QPT_UD)
362
- ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud,
363
- &event->param.ud);
364
- else
365
- ucma_copy_conn_event(&uevent->resp.param.conn,
366
- &event->param.conn);
342
+ /*
343
+ * We ignore events for new connections until userspace has set their
344
+ * context. This can only happen if an error occurs on a new connection
345
+ * before the user accepts it. This is okay, since the accept will just
346
+ * fail later. However, we do need to release the underlying HW
347
+ * resources in case of a device removal event.
348
+ */
349
+ if (ctx->uid) {
350
+ uevent = ucma_create_uevent(ctx, event);
351
+ if (!uevent)
352
+ return 0;
367353
368
- if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
369
- if (!ctx->backlog) {
370
- ret = -ENOMEM;
371
- kfree(uevent);
372
- goto out;
373
- }
374
- ctx->backlog--;
375
- } else if (!ctx->uid || ctx->cm_id != cm_id) {
376
- /*
377
- * We ignore events for new connections until userspace has set
378
- * their context. This can only happen if an error occurs on a
379
- * new connection before the user accepts it. This is okay,
380
- * since the accept will just fail later. However, we do need
381
- * to release the underlying HW resources in case of a device
382
- * removal event.
383
- */
384
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
385
- ucma_removal_event_handler(cm_id);
386
-
387
- kfree(uevent);
388
- goto out;
354
+ mutex_lock(&ctx->file->mut);
355
+ list_add_tail(&uevent->list, &ctx->file->event_list);
356
+ mutex_unlock(&ctx->file->mut);
357
+ wake_up_interruptible(&ctx->file->poll_wait);
389358 }
390359
391
- list_add_tail(&uevent->list, &ctx->file->event_list);
392
- wake_up_interruptible(&ctx->file->poll_wait);
393
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
394
- ucma_removal_event_handler(cm_id);
395
-out:
396
- mutex_unlock(&ctx->file->mut);
397
- return ret;
360
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
361
+ xa_lock(&ctx_table);
362
+ if (xa_load(&ctx_table, ctx->id) == ctx)
363
+ queue_work(system_unbound_wq, &ctx->close_work);
364
+ xa_unlock(&ctx_table);
365
+ }
366
+ return 0;
398367 }
399368
400369 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
401370 int in_len, int out_len)
402371 {
403
- struct ucma_context *ctx;
404372 struct rdma_ucm_get_event cmd;
405373 struct ucma_event *uevent;
406
- int ret = 0;
407374
408375 /*
409376 * Old 32 bit user space does not send the 4 byte padding in the
410377 * reserved field. We don't care, allow it to keep working.
411378 */
412
- if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved))
379
+ if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) -
380
+ sizeof(uevent->resp.ece))
413381 return -ENOSPC;
414382
415383 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
....@@ -429,35 +397,25 @@
429397 mutex_lock(&file->mut);
430398 }
431399
432
- uevent = list_entry(file->event_list.next, struct ucma_event, list);
433
-
434
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
435
- ctx = ucma_alloc_ctx(file);
436
- if (!ctx) {
437
- ret = -ENOMEM;
438
- goto done;
439
- }
440
- uevent->ctx->backlog++;
441
- ctx->cm_id = uevent->cm_id;
442
- ctx->cm_id->context = ctx;
443
- uevent->resp.id = ctx->id;
444
- }
400
+ uevent = list_first_entry(&file->event_list, struct ucma_event, list);
445401
446402 if (copy_to_user(u64_to_user_ptr(cmd.response),
447403 &uevent->resp,
448404 min_t(size_t, out_len, sizeof(uevent->resp)))) {
449
- ret = -EFAULT;
450
- goto done;
405
+ mutex_unlock(&file->mut);
406
+ return -EFAULT;
451407 }
452408
453409 list_del(&uevent->list);
454410 uevent->ctx->events_reported++;
455411 if (uevent->mc)
456412 uevent->mc->events_reported++;
457
- kfree(uevent);
458
-done:
413
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
414
+ atomic_inc(&uevent->ctx->backlog);
459415 mutex_unlock(&file->mut);
460
- return ret;
416
+
417
+ kfree(uevent);
418
+ return 0;
461419 }
462420
463421 static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
....@@ -498,40 +456,32 @@
498456 if (ret)
499457 return ret;
500458
501
- mutex_lock(&file->mut);
502459 ctx = ucma_alloc_ctx(file);
503
- mutex_unlock(&file->mut);
504460 if (!ctx)
505461 return -ENOMEM;
506462
507463 ctx->uid = cmd.uid;
508
- cm_id = __rdma_create_id(current->nsproxy->net_ns,
509
- ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
464
+ cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type);
510465 if (IS_ERR(cm_id)) {
511466 ret = PTR_ERR(cm_id);
512467 goto err1;
513468 }
469
+ ucma_set_ctx_cm_id(ctx, cm_id);
514470
515471 resp.id = ctx->id;
516472 if (copy_to_user(u64_to_user_ptr(cmd.response),
517473 &resp, sizeof(resp))) {
518
- ret = -EFAULT;
519
- goto err2;
474
+ ucma_destroy_private_ctx(ctx);
475
+ return -EFAULT;
520476 }
521477
522
- ctx->cm_id = cm_id;
478
+ mutex_lock(&file->mut);
479
+ ucma_finish_ctx(ctx);
480
+ mutex_unlock(&file->mut);
523481 return 0;
524482
525
-err2:
526
- rdma_destroy_id(cm_id);
527483 err1:
528
- mutex_lock(&mut);
529
- idr_remove(&ctx_idr, ctx->id);
530
- mutex_unlock(&mut);
531
- mutex_lock(&file->mut);
532
- list_del(&ctx->list);
533
- mutex_unlock(&file->mut);
534
- kfree(ctx);
484
+ ucma_destroy_private_ctx(ctx);
535485 return ret;
536486 }
537487
....@@ -539,19 +489,25 @@
539489 {
540490 struct ucma_multicast *mc, *tmp;
541491
542
- mutex_lock(&mut);
492
+ xa_lock(&multicast_table);
543493 list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
544494 list_del(&mc->list);
545
- idr_remove(&multicast_idr, mc->id);
495
+ /*
496
+ * At this point mc->ctx->ref is 0 so the mc cannot leave the
497
+ * lock on the reader and this is enough serialization
498
+ */
499
+ __xa_erase(&multicast_table, mc->id);
546500 kfree(mc);
547501 }
548
- mutex_unlock(&mut);
502
+ xa_unlock(&multicast_table);
549503 }
550504
551505 static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
552506 {
553507 struct ucma_event *uevent, *tmp;
554508
509
+ rdma_lock_handler(mc->ctx->cm_id);
510
+ mutex_lock(&mc->ctx->file->mut);
555511 list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
556512 if (uevent->mc != mc)
557513 continue;
....@@ -559,45 +515,74 @@
559515 list_del(&uevent->list);
560516 kfree(uevent);
561517 }
518
+ mutex_unlock(&mc->ctx->file->mut);
519
+ rdma_unlock_handler(mc->ctx->cm_id);
562520 }
563521
564
-/*
565
- * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
566
- * this point, no new events will be reported from the hardware. However, we
567
- * still need to cleanup the UCMA context for this ID. Specifically, there
568
- * might be events that have not yet been consumed by the user space software.
569
- * These might include pending connect requests which we have not completed
570
- * processing. We cannot call rdma_destroy_id while holding the lock of the
571
- * context (file->mut), as it might cause a deadlock. We therefore extract all
572
- * relevant events from the context pending events list while holding the
573
- * mutex. After that we release them as needed.
574
- */
575
-static int ucma_free_ctx(struct ucma_context *ctx)
522
+static int ucma_cleanup_ctx_events(struct ucma_context *ctx)
576523 {
577524 int events_reported;
578525 struct ucma_event *uevent, *tmp;
579526 LIST_HEAD(list);
580527
581
-
582
- ucma_cleanup_multicast(ctx);
583
-
584
- /* Cleanup events not yet reported to the user. */
528
+ /* Cleanup events not yet reported to the user.*/
585529 mutex_lock(&ctx->file->mut);
586530 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
587
- if (uevent->ctx == ctx)
531
+ if (uevent->ctx != ctx)
532
+ continue;
533
+
534
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
535
+ xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id,
536
+ uevent->conn_req_ctx, XA_ZERO_ENTRY,
537
+ GFP_KERNEL) == uevent->conn_req_ctx) {
588538 list_move_tail(&uevent->list, &list);
539
+ continue;
540
+ }
541
+ list_del(&uevent->list);
542
+ kfree(uevent);
589543 }
590544 list_del(&ctx->list);
591545 events_reported = ctx->events_reported;
592546 mutex_unlock(&ctx->file->mut);
593547
548
+ /*
549
+ * If this was a listening ID then any connections spawned from it that
550
+ * have not been delivered to userspace are cleaned up too. Must be done
551
+ * outside any locks.
552
+ */
594553 list_for_each_entry_safe(uevent, tmp, &list, list) {
595
- list_del(&uevent->list);
596
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
597
- rdma_destroy_id(uevent->cm_id);
554
+ ucma_destroy_private_ctx(uevent->conn_req_ctx);
598555 kfree(uevent);
599556 }
557
+ return events_reported;
558
+}
600559
560
+/*
561
+ * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie
562
+ * the ctx is not public to the user). This either because:
563
+ * - ucma_finish_ctx() hasn't been called
564
+ * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed)
565
+ */
566
+static int ucma_destroy_private_ctx(struct ucma_context *ctx)
567
+{
568
+ int events_reported;
569
+
570
+ /*
571
+ * Destroy the underlying cm_id. New work queuing is prevented now by
572
+ * the removal from the xarray. Once the work is cancled ref will either
573
+ * be 0 because the work ran to completion and consumed the ref from the
574
+ * xarray, or it will be positive because we still have the ref from the
575
+ * xarray. This can also be 0 in cases where cm_id was never set
576
+ */
577
+ cancel_work_sync(&ctx->close_work);
578
+ if (refcount_read(&ctx->ref))
579
+ ucma_close_id(&ctx->close_work);
580
+
581
+ events_reported = ucma_cleanup_ctx_events(ctx);
582
+ ucma_cleanup_multicast(ctx);
583
+
584
+ WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL,
585
+ GFP_KERNEL) != NULL);
601586 mutex_destroy(&ctx->mutex);
602587 kfree(ctx);
603588 return events_reported;
....@@ -617,33 +602,19 @@
617602 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
618603 return -EFAULT;
619604
620
- mutex_lock(&mut);
605
+ xa_lock(&ctx_table);
621606 ctx = _ucma_find_context(cmd.id, file);
622
- if (!IS_ERR(ctx))
623
- idr_remove(&ctx_idr, ctx->id);
624
- mutex_unlock(&mut);
607
+ if (!IS_ERR(ctx)) {
608
+ if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
609
+ GFP_KERNEL) != ctx)
610
+ ctx = ERR_PTR(-ENOENT);
611
+ }
612
+ xa_unlock(&ctx_table);
625613
626614 if (IS_ERR(ctx))
627615 return PTR_ERR(ctx);
628616
629
- mutex_lock(&ctx->file->mut);
630
- ctx->destroying = 1;
631
- mutex_unlock(&ctx->file->mut);
632
-
633
- flush_workqueue(ctx->file->close_wq);
634
- /* At this point it's guaranteed that there is no inflight
635
- * closing task */
636
- mutex_lock(&mut);
637
- if (!ctx->closing) {
638
- mutex_unlock(&mut);
639
- ucma_put_ctx(ctx);
640
- wait_for_completion(&ctx->comp);
641
- rdma_destroy_id(ctx->cm_id);
642
- } else {
643
- mutex_unlock(&mut);
644
- }
645
-
646
- resp.events_reported = ucma_free_ctx(ctx);
617
+ resp.events_reported = ucma_destroy_private_ctx(ctx);
647618 if (copy_to_user(u64_to_user_ptr(cmd.response),
648619 &resp, sizeof(resp)))
649620 ret = -EFAULT;
....@@ -796,7 +767,7 @@
796767 case 2:
797768 ib_copy_path_rec_to_user(&resp->ib_route[1],
798769 &route->path_rec[1]);
799
- /* fall through */
770
+ fallthrough;
800771 case 1:
801772 ib_copy_path_rec_to_user(&resp->ib_route[0],
802773 &route->path_rec[0]);
....@@ -822,7 +793,7 @@
822793 case 2:
823794 ib_copy_path_rec_to_user(&resp->ib_route[1],
824795 &route->path_rec[1]);
825
- /* fall through */
796
+ fallthrough;
826797 case 1:
827798 ib_copy_path_rec_to_user(&resp->ib_route[0],
828799 &route->path_rec[0]);
....@@ -852,7 +823,7 @@
852823 struct sockaddr *addr;
853824 int ret = 0;
854825
855
- if (out_len < sizeof(resp))
826
+ if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index))
856827 return -ENOSPC;
857828
858829 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
....@@ -876,6 +847,7 @@
876847 goto out;
877848
878849 resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
850
+ resp.ibdev_index = ctx->cm_id->device->index;
879851 resp.port_num = ctx->cm_id->port_num;
880852
881853 if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
....@@ -887,8 +859,8 @@
887859
888860 out:
889861 mutex_unlock(&ctx->mutex);
890
- if (copy_to_user(u64_to_user_ptr(cmd.response),
891
- &resp, sizeof(resp)))
862
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp,
863
+ min_t(size_t, out_len, sizeof(resp))))
892864 ret = -EFAULT;
893865
894866 ucma_put_ctx(ctx);
....@@ -902,6 +874,7 @@
902874 return;
903875
904876 resp->node_guid = (__force __u64) cm_id->device->node_guid;
877
+ resp->ibdev_index = cm_id->device->index;
905878 resp->port_num = cm_id->port_num;
906879 resp->pkey = (__force __u16) cpu_to_be16(
907880 ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
....@@ -914,7 +887,7 @@
914887 struct sockaddr *addr;
915888 int ret = 0;
916889
917
- if (out_len < sizeof(resp))
890
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
918891 return -ENOSPC;
919892
920893 memset(&resp, 0, sizeof resp);
....@@ -929,7 +902,7 @@
929902
930903 ucma_query_device_addr(ctx->cm_id, &resp);
931904
932
- if (copy_to_user(response, &resp, sizeof(resp)))
905
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
933906 ret = -EFAULT;
934907
935908 return ret;
....@@ -967,8 +940,7 @@
967940 }
968941 }
969942
970
- if (copy_to_user(response, resp,
971
- sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
943
+ if (copy_to_user(response, resp, struct_size(resp, path_data, i)))
972944 ret = -EFAULT;
973945
974946 kfree(resp);
....@@ -982,7 +954,7 @@
982954 struct sockaddr_ib *addr;
983955 int ret = 0;
984956
985
- if (out_len < sizeof(resp))
957
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
986958 return -ENOSPC;
987959
988960 memset(&resp, 0, sizeof resp);
....@@ -1015,7 +987,7 @@
1015987 &ctx->cm_id->route.addr.dst_addr);
1016988 }
1017989
1018
- if (copy_to_user(response, &resp, sizeof(resp)))
990
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
1019991 ret = -EFAULT;
1020992
1021993 return ret;
....@@ -1071,19 +1043,24 @@
10711043 dst->retry_count = src->retry_count;
10721044 dst->rnr_retry_count = src->rnr_retry_count;
10731045 dst->srq = src->srq;
1074
- dst->qp_num = src->qp_num;
1046
+ dst->qp_num = src->qp_num & 0xFFFFFF;
10751047 dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
10761048 }
10771049
10781050 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
10791051 int in_len, int out_len)
10801052 {
1081
- struct rdma_ucm_connect cmd;
10821053 struct rdma_conn_param conn_param;
1054
+ struct rdma_ucm_ece ece = {};
1055
+ struct rdma_ucm_connect cmd;
10831056 struct ucma_context *ctx;
1057
+ size_t in_size;
10841058 int ret;
10851059
1086
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1060
+ if (in_len < offsetofend(typeof(cmd), reserved))
1061
+ return -EINVAL;
1062
+ in_size = min_t(size_t, in_len, sizeof(cmd));
1063
+ if (copy_from_user(&cmd, inbuf, in_size))
10871064 return -EFAULT;
10881065
10891066 if (!cmd.conn_param.valid)
....@@ -1094,8 +1071,13 @@
10941071 return PTR_ERR(ctx);
10951072
10961073 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1074
+ if (offsetofend(typeof(cmd), ece) <= in_size) {
1075
+ ece.vendor_id = cmd.ece.vendor_id;
1076
+ ece.attr_mod = cmd.ece.attr_mod;
1077
+ }
1078
+
10971079 mutex_lock(&ctx->mutex);
1098
- ret = rdma_connect(ctx->cm_id, &conn_param);
1080
+ ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece);
10991081 mutex_unlock(&ctx->mutex);
11001082 ucma_put_ctx(ctx);
11011083 return ret;
....@@ -1115,10 +1097,12 @@
11151097 if (IS_ERR(ctx))
11161098 return PTR_ERR(ctx);
11171099
1118
- ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
1119
- cmd.backlog : max_backlog;
1100
+ if (cmd.backlog <= 0 || cmd.backlog > max_backlog)
1101
+ cmd.backlog = max_backlog;
1102
+ atomic_set(&ctx->backlog, cmd.backlog);
1103
+
11201104 mutex_lock(&ctx->mutex);
1121
- ret = rdma_listen(ctx->cm_id, ctx->backlog);
1105
+ ret = rdma_listen(ctx->cm_id, cmd.backlog);
11221106 mutex_unlock(&ctx->mutex);
11231107 ucma_put_ctx(ctx);
11241108 return ret;
....@@ -1129,28 +1113,42 @@
11291113 {
11301114 struct rdma_ucm_accept cmd;
11311115 struct rdma_conn_param conn_param;
1116
+ struct rdma_ucm_ece ece = {};
11321117 struct ucma_context *ctx;
1118
+ size_t in_size;
11331119 int ret;
11341120
1135
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1121
+ if (in_len < offsetofend(typeof(cmd), reserved))
1122
+ return -EINVAL;
1123
+ in_size = min_t(size_t, in_len, sizeof(cmd));
1124
+ if (copy_from_user(&cmd, inbuf, in_size))
11361125 return -EFAULT;
11371126
11381127 ctx = ucma_get_ctx_dev(file, cmd.id);
11391128 if (IS_ERR(ctx))
11401129 return PTR_ERR(ctx);
11411130
1131
+ if (offsetofend(typeof(cmd), ece) <= in_size) {
1132
+ ece.vendor_id = cmd.ece.vendor_id;
1133
+ ece.attr_mod = cmd.ece.attr_mod;
1134
+ }
1135
+
11421136 if (cmd.conn_param.valid) {
11431137 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1144
- mutex_lock(&file->mut);
11451138 mutex_lock(&ctx->mutex);
1146
- ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
1147
- mutex_unlock(&ctx->mutex);
1148
- if (!ret)
1139
+ rdma_lock_handler(ctx->cm_id);
1140
+ ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece);
1141
+ if (!ret) {
1142
+ /* The uid must be set atomically with the handler */
11491143 ctx->uid = cmd.uid;
1150
- mutex_unlock(&file->mut);
1144
+ }
1145
+ rdma_unlock_handler(ctx->cm_id);
1146
+ mutex_unlock(&ctx->mutex);
11511147 } else {
11521148 mutex_lock(&ctx->mutex);
1153
- ret = __rdma_accept(ctx->cm_id, NULL, NULL);
1149
+ rdma_lock_handler(ctx->cm_id);
1150
+ ret = rdma_accept_ece(ctx->cm_id, NULL, &ece);
1151
+ rdma_unlock_handler(ctx->cm_id);
11541152 mutex_unlock(&ctx->mutex);
11551153 }
11561154 ucma_put_ctx(ctx);
....@@ -1167,12 +1165,24 @@
11671165 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
11681166 return -EFAULT;
11691167
1168
+ if (!cmd.reason)
1169
+ cmd.reason = IB_CM_REJ_CONSUMER_DEFINED;
1170
+
1171
+ switch (cmd.reason) {
1172
+ case IB_CM_REJ_CONSUMER_DEFINED:
1173
+ case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED:
1174
+ break;
1175
+ default:
1176
+ return -EINVAL;
1177
+ }
1178
+
11701179 ctx = ucma_get_ctx_dev(file, cmd.id);
11711180 if (IS_ERR(ctx))
11721181 return PTR_ERR(ctx);
11731182
11741183 mutex_lock(&ctx->mutex);
1175
- ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
1184
+ ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len,
1185
+ cmd.reason);
11761186 mutex_unlock(&ctx->mutex);
11771187 ucma_put_ctx(ctx);
11781188 return ret;
....@@ -1267,6 +1277,13 @@
12671277 break;
12681278 }
12691279 ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
1280
+ break;
1281
+ case RDMA_OPTION_ID_ACK_TIMEOUT:
1282
+ if (optlen != sizeof(u8)) {
1283
+ ret = -EINVAL;
1284
+ break;
1285
+ }
1286
+ ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval));
12701287 break;
12711288 default:
12721289 ret = -ENOSYS;
....@@ -1444,50 +1461,59 @@
14441461 if (IS_ERR(ctx))
14451462 return PTR_ERR(ctx);
14461463
1447
- mutex_lock(&file->mut);
1448
- mc = ucma_alloc_multicast(ctx);
1464
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
14491465 if (!mc) {
14501466 ret = -ENOMEM;
1451
- goto err1;
1467
+ goto err_put_ctx;
14521468 }
1469
+
1470
+ mc->ctx = ctx;
14531471 mc->join_state = join_state;
14541472 mc->uid = cmd->uid;
14551473 memcpy(&mc->addr, addr, cmd->addr_size);
1474
+
1475
+ xa_lock(&multicast_table);
1476
+ if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
1477
+ GFP_KERNEL)) {
1478
+ ret = -ENOMEM;
1479
+ goto err_free_mc;
1480
+ }
1481
+
1482
+ list_add_tail(&mc->list, &ctx->mc_list);
1483
+ xa_unlock(&multicast_table);
1484
+
14561485 mutex_lock(&ctx->mutex);
14571486 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
14581487 join_state, mc);
14591488 mutex_unlock(&ctx->mutex);
14601489 if (ret)
1461
- goto err2;
1490
+ goto err_xa_erase;
14621491
14631492 resp.id = mc->id;
14641493 if (copy_to_user(u64_to_user_ptr(cmd->response),
14651494 &resp, sizeof(resp))) {
14661495 ret = -EFAULT;
1467
- goto err3;
1496
+ goto err_leave_multicast;
14681497 }
14691498
1470
- mutex_lock(&mut);
1471
- idr_replace(&multicast_idr, mc, mc->id);
1472
- mutex_unlock(&mut);
1499
+ xa_store(&multicast_table, mc->id, mc, 0);
14731500
1474
- mutex_unlock(&file->mut);
14751501 ucma_put_ctx(ctx);
14761502 return 0;
14771503
1478
-err3:
1504
+err_leave_multicast:
14791505 mutex_lock(&ctx->mutex);
14801506 rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
14811507 mutex_unlock(&ctx->mutex);
14821508 ucma_cleanup_mc_events(mc);
1483
-err2:
1484
- mutex_lock(&mut);
1485
- idr_remove(&multicast_idr, mc->id);
1486
- mutex_unlock(&mut);
1509
+err_xa_erase:
1510
+ xa_lock(&multicast_table);
14871511 list_del(&mc->list);
1512
+ __xa_erase(&multicast_table, mc->id);
1513
+err_free_mc:
1514
+ xa_unlock(&multicast_table);
14881515 kfree(mc);
1489
-err1:
1490
- mutex_unlock(&file->mut);
1516
+err_put_ctx:
14911517 ucma_put_ctx(ctx);
14921518 return ret;
14931519 }
....@@ -1545,31 +1571,30 @@
15451571 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
15461572 return -EFAULT;
15471573
1548
- mutex_lock(&mut);
1549
- mc = idr_find(&multicast_idr, cmd.id);
1574
+ xa_lock(&multicast_table);
1575
+ mc = xa_load(&multicast_table, cmd.id);
15501576 if (!mc)
15511577 mc = ERR_PTR(-ENOENT);
1552
- else if (mc->ctx->file != file)
1578
+ else if (READ_ONCE(mc->ctx->file) != file)
15531579 mc = ERR_PTR(-EINVAL);
1554
- else if (!atomic_inc_not_zero(&mc->ctx->ref))
1580
+ else if (!refcount_inc_not_zero(&mc->ctx->ref))
15551581 mc = ERR_PTR(-ENXIO);
1556
- else
1557
- idr_remove(&multicast_idr, mc->id);
1558
- mutex_unlock(&mut);
15591582
15601583 if (IS_ERR(mc)) {
1584
+ xa_unlock(&multicast_table);
15611585 ret = PTR_ERR(mc);
15621586 goto out;
15631587 }
1588
+
1589
+ list_del(&mc->list);
1590
+ __xa_erase(&multicast_table, mc->id);
1591
+ xa_unlock(&multicast_table);
15641592
15651593 mutex_lock(&mc->ctx->mutex);
15661594 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
15671595 mutex_unlock(&mc->ctx->mutex);
15681596
1569
- mutex_lock(&mc->ctx->file->mut);
15701597 ucma_cleanup_mc_events(mc);
1571
- list_del(&mc->list);
1572
- mutex_unlock(&mc->ctx->file->mut);
15731598
15741599 ucma_put_ctx(mc->ctx);
15751600 resp.events_reported = mc->events_reported;
....@@ -1582,45 +1607,15 @@
15821607 return ret;
15831608 }
15841609
1585
-static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
1586
-{
1587
- /* Acquire mutex's based on pointer comparison to prevent deadlock. */
1588
- if (file1 < file2) {
1589
- mutex_lock(&file1->mut);
1590
- mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
1591
- } else {
1592
- mutex_lock(&file2->mut);
1593
- mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
1594
- }
1595
-}
1596
-
1597
-static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
1598
-{
1599
- if (file1 < file2) {
1600
- mutex_unlock(&file2->mut);
1601
- mutex_unlock(&file1->mut);
1602
- } else {
1603
- mutex_unlock(&file1->mut);
1604
- mutex_unlock(&file2->mut);
1605
- }
1606
-}
1607
-
1608
-static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
1609
-{
1610
- struct ucma_event *uevent, *tmp;
1611
-
1612
- list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
1613
- if (uevent->ctx == ctx)
1614
- list_move_tail(&uevent->list, &file->event_list);
1615
-}
1616
-
16171610 static ssize_t ucma_migrate_id(struct ucma_file *new_file,
16181611 const char __user *inbuf,
16191612 int in_len, int out_len)
16201613 {
16211614 struct rdma_ucm_migrate_id cmd;
16221615 struct rdma_ucm_migrate_resp resp;
1616
+ struct ucma_event *uevent, *tmp;
16231617 struct ucma_context *ctx;
1618
+ LIST_HEAD(event_list);
16241619 struct fd f;
16251620 struct ucma_file *cur_file;
16261621 int ret = 0;
....@@ -1636,42 +1631,53 @@
16361631 ret = -EINVAL;
16371632 goto file_put;
16381633 }
1634
+ cur_file = f.file->private_data;
16391635
16401636 /* Validate current fd and prevent destruction of id. */
1641
- ctx = ucma_get_ctx(f.file->private_data, cmd.id);
1637
+ ctx = ucma_get_ctx(cur_file, cmd.id);
16421638 if (IS_ERR(ctx)) {
16431639 ret = PTR_ERR(ctx);
16441640 goto file_put;
16451641 }
16461642
1647
- cur_file = ctx->file;
1648
- if (cur_file == new_file) {
1649
- mutex_lock(&cur_file->mut);
1650
- resp.events_reported = ctx->events_reported;
1651
- mutex_unlock(&cur_file->mut);
1652
- goto response;
1653
- }
1654
-
1643
+ rdma_lock_handler(ctx->cm_id);
16551644 /*
1656
- * Migrate events between fd's, maintaining order, and avoiding new
1657
- * events being added before existing events.
1645
+ * ctx->file can only be changed under the handler & xa_lock. xa_load()
1646
+ * must be checked again to ensure the ctx hasn't begun destruction
1647
+ * since the ucma_get_ctx().
16581648 */
1659
- ucma_lock_files(cur_file, new_file);
1660
- mutex_lock(&mut);
1661
-
1662
- list_move_tail(&ctx->list, &new_file->ctx_list);
1663
- ucma_move_events(ctx, new_file);
1649
+ xa_lock(&ctx_table);
1650
+ if (_ucma_find_context(cmd.id, cur_file) != ctx) {
1651
+ xa_unlock(&ctx_table);
1652
+ ret = -ENOENT;
1653
+ goto err_unlock;
1654
+ }
16641655 ctx->file = new_file;
1656
+ xa_unlock(&ctx_table);
1657
+
1658
+ mutex_lock(&cur_file->mut);
1659
+ list_del(&ctx->list);
1660
+ /*
1661
+ * At this point lock_handler() prevents addition of new uevents for
1662
+ * this ctx.
1663
+ */
1664
+ list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list)
1665
+ if (uevent->ctx == ctx)
1666
+ list_move_tail(&uevent->list, &event_list);
16651667 resp.events_reported = ctx->events_reported;
1668
+ mutex_unlock(&cur_file->mut);
16661669
1667
- mutex_unlock(&mut);
1668
- ucma_unlock_files(cur_file, new_file);
1670
+ mutex_lock(&new_file->mut);
1671
+ list_add_tail(&ctx->list, &new_file->ctx_list);
1672
+ list_splice_tail(&event_list, &new_file->event_list);
1673
+ mutex_unlock(&new_file->mut);
16691674
1670
-response:
16711675 if (copy_to_user(u64_to_user_ptr(cmd.response),
16721676 &resp, sizeof(resp)))
16731677 ret = -EFAULT;
16741678
1679
+err_unlock:
1680
+ rdma_unlock_handler(ctx->cm_id);
16751681 ucma_put_ctx(ctx);
16761682 file_put:
16771683 fdput(f);
....@@ -1771,13 +1777,6 @@
17711777 if (!file)
17721778 return -ENOMEM;
17731779
1774
- file->close_wq = alloc_ordered_workqueue("ucma_close_id",
1775
- WQ_MEM_RECLAIM);
1776
- if (!file->close_wq) {
1777
- kfree(file);
1778
- return -ENOMEM;
1779
- }
1780
-
17811780 INIT_LIST_HEAD(&file->event_list);
17821781 INIT_LIST_HEAD(&file->ctx_list);
17831782 init_waitqueue_head(&file->poll_wait);
....@@ -1786,46 +1785,29 @@
17861785 filp->private_data = file;
17871786 file->filp = filp;
17881787
1789
- return nonseekable_open(inode, filp);
1788
+ return stream_open(inode, filp);
17901789 }
17911790
17921791 static int ucma_close(struct inode *inode, struct file *filp)
17931792 {
17941793 struct ucma_file *file = filp->private_data;
1795
- struct ucma_context *ctx, *tmp;
17961794
1797
- mutex_lock(&file->mut);
1798
- list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
1799
- ctx->destroying = 1;
1800
- mutex_unlock(&file->mut);
1795
+ /*
1796
+ * All paths that touch ctx_list or ctx_list starting from write() are
1797
+ * prevented by this being a FD release function. The list_add_tail() in
1798
+ * ucma_connect_event_handler() can run concurrently, however it only
1799
+ * adds to the list *after* a listening ID. By only reading the first of
1800
+ * the list, and relying on ucma_destroy_private_ctx() to block
1801
+ * ucma_connect_event_handler(), no additional locking is needed.
1802
+ */
1803
+ while (!list_empty(&file->ctx_list)) {
1804
+ struct ucma_context *ctx = list_first_entry(
1805
+ &file->ctx_list, struct ucma_context, list);
18011806
1802
- mutex_lock(&mut);
1803
- idr_remove(&ctx_idr, ctx->id);
1804
- mutex_unlock(&mut);
1805
-
1806
- flush_workqueue(file->close_wq);
1807
- /* At that step once ctx was marked as destroying and workqueue
1808
- * was flushed we are safe from any inflights handlers that
1809
- * might put other closing task.
1810
- */
1811
- mutex_lock(&mut);
1812
- if (!ctx->closing) {
1813
- mutex_unlock(&mut);
1814
- ucma_put_ctx(ctx);
1815
- wait_for_completion(&ctx->comp);
1816
- /* rdma_destroy_id ensures that no event handlers are
1817
- * inflight for that id before releasing it.
1818
- */
1819
- rdma_destroy_id(ctx->cm_id);
1820
- } else {
1821
- mutex_unlock(&mut);
1822
- }
1823
-
1824
- ucma_free_ctx(ctx);
1825
- mutex_lock(&file->mut);
1807
+ WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
1808
+ GFP_KERNEL) != ctx);
1809
+ ucma_destroy_private_ctx(ctx);
18261810 }
1827
- mutex_unlock(&file->mut);
1828
- destroy_workqueue(file->close_wq);
18291811 kfree(file);
18301812 return 0;
18311813 }
....@@ -1846,6 +1828,19 @@
18461828 .mode = 0666,
18471829 .fops = &ucma_fops,
18481830 };
1831
+
1832
+static int ucma_get_global_nl_info(struct ib_client_nl_info *res)
1833
+{
1834
+ res->abi = RDMA_USER_CM_ABI_VERSION;
1835
+ res->cdev = ucma_misc.this_device;
1836
+ return 0;
1837
+}
1838
+
1839
+static struct ib_client rdma_cma_client = {
1840
+ .name = "rdma_cm",
1841
+ .get_global_nl_info = ucma_get_global_nl_info,
1842
+};
1843
+MODULE_ALIAS_RDMA_CLIENT("rdma_cm");
18491844
18501845 static ssize_t show_abi_version(struct device *dev,
18511846 struct device_attribute *attr,
....@@ -1875,7 +1870,14 @@
18751870 ret = -ENOMEM;
18761871 goto err2;
18771872 }
1873
+
1874
+ ret = ib_register_client(&rdma_cma_client);
1875
+ if (ret)
1876
+ goto err3;
1877
+
18781878 return 0;
1879
+err3:
1880
+ unregister_net_sysctl_table(ucma_ctl_table_hdr);
18791881 err2:
18801882 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
18811883 err1:
....@@ -1885,11 +1887,10 @@
18851887
18861888 static void __exit ucma_cleanup(void)
18871889 {
1890
+ ib_unregister_client(&rdma_cma_client);
18881891 unregister_net_sysctl_table(ucma_ctl_table_hdr);
18891892 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
18901893 misc_deregister(&ucma_misc);
1891
- idr_destroy(&ctx_idr);
1892
- idr_destroy(&multicast_idr);
18931894 }
18941895
18951896 module_init(ucma_init);