hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/infiniband/sw/rdmavt/cq.c
....@@ -60,22 +60,39 @@
6060 * @solicited: true if @entry is solicited
6161 *
6262 * This may be called with qp->s_lock held.
63
+ *
64
+ * Return: return true on success, else return
65
+ * false if cq is full.
6366 */
64
-void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
67
+bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
6568 {
66
- struct rvt_cq_wc *wc;
69
+ struct ib_uverbs_wc *uqueue = NULL;
70
+ struct ib_wc *kqueue = NULL;
71
+ struct rvt_cq_wc *u_wc = NULL;
72
+ struct rvt_k_cq_wc *k_wc = NULL;
6773 unsigned long flags;
6874 u32 head;
6975 u32 next;
76
+ u32 tail;
7077
7178 spin_lock_irqsave(&cq->lock, flags);
7279
80
+ if (cq->ip) {
81
+ u_wc = cq->queue;
82
+ uqueue = &u_wc->uqueue[0];
83
+ head = RDMA_READ_UAPI_ATOMIC(u_wc->head);
84
+ tail = RDMA_READ_UAPI_ATOMIC(u_wc->tail);
85
+ } else {
86
+ k_wc = cq->kqueue;
87
+ kqueue = &k_wc->kqueue[0];
88
+ head = k_wc->head;
89
+ tail = k_wc->tail;
90
+ }
91
+
7392 /*
74
- * Note that the head pointer might be writable by user processes.
75
- * Take care to verify it is a sane value.
93
+ * Note that the head pointer might be writable by
94
+ * user processes.Take care to verify it is a sane value.
7695 */
77
- wc = cq->queue;
78
- head = wc->head;
7996 if (head >= (unsigned)cq->ibcq.cqe) {
8097 head = cq->ibcq.cqe;
8198 next = 0;
....@@ -83,7 +100,12 @@
83100 next = head + 1;
84101 }
85102
86
- if (unlikely(next == wc->tail)) {
103
+ if (unlikely(next == tail || cq->cq_full)) {
104
+ struct rvt_dev_info *rdi = cq->rdi;
105
+
106
+ if (!cq->cq_full)
107
+ rvt_pr_err_ratelimited(rdi, "CQ is full!\n");
108
+ cq->cq_full = true;
87109 spin_unlock_irqrestore(&cq->lock, flags);
88110 if (cq->ibcq.event_handler) {
89111 struct ib_event ev;
....@@ -93,30 +115,30 @@
93115 ev.event = IB_EVENT_CQ_ERR;
94116 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
95117 }
96
- return;
118
+ return false;
97119 }
98120 trace_rvt_cq_enter(cq, entry, head);
99
- if (cq->ip) {
100
- wc->uqueue[head].wr_id = entry->wr_id;
101
- wc->uqueue[head].status = entry->status;
102
- wc->uqueue[head].opcode = entry->opcode;
103
- wc->uqueue[head].vendor_err = entry->vendor_err;
104
- wc->uqueue[head].byte_len = entry->byte_len;
105
- wc->uqueue[head].ex.imm_data = entry->ex.imm_data;
106
- wc->uqueue[head].qp_num = entry->qp->qp_num;
107
- wc->uqueue[head].src_qp = entry->src_qp;
108
- wc->uqueue[head].wc_flags = entry->wc_flags;
109
- wc->uqueue[head].pkey_index = entry->pkey_index;
110
- wc->uqueue[head].slid = ib_lid_cpu16(entry->slid);
111
- wc->uqueue[head].sl = entry->sl;
112
- wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
113
- wc->uqueue[head].port_num = entry->port_num;
121
+ if (uqueue) {
122
+ uqueue[head].wr_id = entry->wr_id;
123
+ uqueue[head].status = entry->status;
124
+ uqueue[head].opcode = entry->opcode;
125
+ uqueue[head].vendor_err = entry->vendor_err;
126
+ uqueue[head].byte_len = entry->byte_len;
127
+ uqueue[head].ex.imm_data = entry->ex.imm_data;
128
+ uqueue[head].qp_num = entry->qp->qp_num;
129
+ uqueue[head].src_qp = entry->src_qp;
130
+ uqueue[head].wc_flags = entry->wc_flags;
131
+ uqueue[head].pkey_index = entry->pkey_index;
132
+ uqueue[head].slid = ib_lid_cpu16(entry->slid);
133
+ uqueue[head].sl = entry->sl;
134
+ uqueue[head].dlid_path_bits = entry->dlid_path_bits;
135
+ uqueue[head].port_num = entry->port_num;
114136 /* Make sure entry is written before the head index. */
115
- smp_wmb();
137
+ RDMA_WRITE_UAPI_ATOMIC(u_wc->head, next);
116138 } else {
117
- wc->kqueue[head] = *entry;
139
+ kqueue[head] = *entry;
140
+ k_wc->head = next;
118141 }
119
- wc->head = next;
120142
121143 if (cq->notify == IB_CQ_NEXT_COMP ||
122144 (cq->notify == IB_CQ_SOLICITED &&
....@@ -132,6 +154,7 @@
132154 }
133155
134156 spin_unlock_irqrestore(&cq->lock, flags);
157
+ return true;
135158 }
136159 EXPORT_SYMBOL(rvt_cq_enter);
137160
....@@ -166,44 +189,37 @@
166189
167190 /**
168191 * rvt_create_cq - create a completion queue
169
- * @ibdev: the device this completion queue is attached to
192
+ * @ibcq: Allocated CQ
170193 * @attr: creation attributes
171
- * @context: unused by the QLogic_IB driver
172194 * @udata: user data for libibverbs.so
173195 *
174196 * Called by ib_create_cq() in the generic verbs code.
175197 *
176
- * Return: pointer to the completion queue or negative errno values
177
- * for failure.
198
+ * Return: 0 on success
178199 */
179
-struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
180
- const struct ib_cq_init_attr *attr,
181
- struct ib_ucontext *context,
182
- struct ib_udata *udata)
200
+int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
201
+ struct ib_udata *udata)
183202 {
203
+ struct ib_device *ibdev = ibcq->device;
184204 struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
185
- struct rvt_cq *cq;
186
- struct rvt_cq_wc *wc;
187
- struct ib_cq *ret;
205
+ struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
206
+ struct rvt_cq_wc *u_wc = NULL;
207
+ struct rvt_k_cq_wc *k_wc = NULL;
188208 u32 sz;
189209 unsigned int entries = attr->cqe;
190210 int comp_vector = attr->comp_vector;
211
+ int err;
191212
192213 if (attr->flags)
193
- return ERR_PTR(-EINVAL);
214
+ return -EINVAL;
194215
195216 if (entries < 1 || entries > rdi->dparms.props.max_cqe)
196
- return ERR_PTR(-EINVAL);
217
+ return -EINVAL;
197218
198219 if (comp_vector < 0)
199220 comp_vector = 0;
200221
201222 comp_vector = comp_vector % rdi->ibdev.num_comp_vectors;
202
-
203
- /* Allocate the completion queue structure. */
204
- cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node);
205
- if (!cq)
206
- return ERR_PTR(-ENOMEM);
207223
208224 /*
209225 * Allocate the completion queue entries and head/tail pointers.
....@@ -212,17 +228,18 @@
212228 * We need to use vmalloc() in order to support mmap and large
213229 * numbers of entries.
214230 */
215
- sz = sizeof(*wc);
216
- if (udata && udata->outlen >= sizeof(__u64))
217
- sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
218
- else
219
- sz += sizeof(struct ib_wc) * (entries + 1);
220
- wc = udata ?
221
- vmalloc_user(sz) :
222
- vzalloc_node(sz, rdi->dparms.node);
223
- if (!wc) {
224
- ret = ERR_PTR(-ENOMEM);
225
- goto bail_cq;
231
+ if (udata && udata->outlen >= sizeof(__u64)) {
232
+ sz = sizeof(struct ib_uverbs_wc) * (entries + 1);
233
+ sz += sizeof(*u_wc);
234
+ u_wc = vmalloc_user(sz);
235
+ if (!u_wc)
236
+ return -ENOMEM;
237
+ } else {
238
+ sz = sizeof(struct ib_wc) * (entries + 1);
239
+ sz += sizeof(*k_wc);
240
+ k_wc = vzalloc_node(sz, rdi->dparms.node);
241
+ if (!k_wc)
242
+ return -ENOMEM;
226243 }
227244
228245 /*
....@@ -230,26 +247,22 @@
230247 * See rvt_mmap() for details.
231248 */
232249 if (udata && udata->outlen >= sizeof(__u64)) {
233
- int err;
234
-
235
- cq->ip = rvt_create_mmap_info(rdi, sz, context, wc);
236
- if (!cq->ip) {
237
- ret = ERR_PTR(-ENOMEM);
250
+ cq->ip = rvt_create_mmap_info(rdi, sz, udata, u_wc);
251
+ if (IS_ERR(cq->ip)) {
252
+ err = PTR_ERR(cq->ip);
238253 goto bail_wc;
239254 }
240255
241256 err = ib_copy_to_udata(udata, &cq->ip->offset,
242257 sizeof(cq->ip->offset));
243
- if (err) {
244
- ret = ERR_PTR(err);
258
+ if (err)
245259 goto bail_ip;
246
- }
247260 }
248261
249262 spin_lock_irq(&rdi->n_cqs_lock);
250263 if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
251264 spin_unlock_irq(&rdi->n_cqs_lock);
252
- ret = ERR_PTR(-ENOMEM);
265
+ err = -ENOMEM;
253266 goto bail_ip;
254267 }
255268
....@@ -279,32 +292,30 @@
279292 cq->notify = RVT_CQ_NONE;
280293 spin_lock_init(&cq->lock);
281294 INIT_WORK(&cq->comptask, send_complete);
282
- cq->queue = wc;
283
-
284
- ret = &cq->ibcq;
295
+ if (u_wc)
296
+ cq->queue = u_wc;
297
+ else
298
+ cq->kqueue = k_wc;
285299
286300 trace_rvt_create_cq(cq, attr);
287
- goto done;
301
+ return 0;
288302
289303 bail_ip:
290304 kfree(cq->ip);
291305 bail_wc:
292
- vfree(wc);
293
-bail_cq:
294
- kfree(cq);
295
-done:
296
- return ret;
306
+ vfree(u_wc);
307
+ vfree(k_wc);
308
+ return err;
297309 }
298310
299311 /**
300312 * rvt_destroy_cq - destroy a completion queue
301313 * @ibcq: the completion queue to destroy.
314
+ * @udata: user data or NULL for kernel object
302315 *
303316 * Called by ib_destroy_cq() in the generic verbs code.
304
- *
305
- * Return: always 0
306317 */
307
-int rvt_destroy_cq(struct ib_cq *ibcq)
318
+int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
308319 {
309320 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
310321 struct rvt_dev_info *rdi = cq->rdi;
....@@ -316,9 +327,7 @@
316327 if (cq->ip)
317328 kref_put(&cq->ip->ref, rvt_release_mmap_info);
318329 else
319
- vfree(cq->queue);
320
- kfree(cq);
321
-
330
+ vfree(cq->kqueue);
322331 return 0;
323332 }
324333
....@@ -346,9 +355,16 @@
346355 if (cq->notify != IB_CQ_NEXT_COMP)
347356 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
348357
349
- if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
350
- cq->queue->head != cq->queue->tail)
351
- ret = 1;
358
+ if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
359
+ if (cq->queue) {
360
+ if (RDMA_READ_UAPI_ATOMIC(cq->queue->head) !=
361
+ RDMA_READ_UAPI_ATOMIC(cq->queue->tail))
362
+ ret = 1;
363
+ } else {
364
+ if (cq->kqueue->head != cq->kqueue->tail)
365
+ ret = 1;
366
+ }
367
+ }
352368
353369 spin_unlock_irqrestore(&cq->lock, flags);
354370
....@@ -364,12 +380,14 @@
364380 int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
365381 {
366382 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
367
- struct rvt_cq_wc *old_wc;
368
- struct rvt_cq_wc *wc;
369383 u32 head, tail, n;
370384 int ret;
371385 u32 sz;
372386 struct rvt_dev_info *rdi = cq->rdi;
387
+ struct rvt_cq_wc *u_wc = NULL;
388
+ struct rvt_cq_wc *old_u_wc = NULL;
389
+ struct rvt_k_cq_wc *k_wc = NULL;
390
+ struct rvt_k_cq_wc *old_k_wc = NULL;
373391
374392 if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
375393 return -EINVAL;
....@@ -377,17 +395,19 @@
377395 /*
378396 * Need to use vmalloc() if we want to support large #s of entries.
379397 */
380
- sz = sizeof(*wc);
381
- if (udata && udata->outlen >= sizeof(__u64))
382
- sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
383
- else
384
- sz += sizeof(struct ib_wc) * (cqe + 1);
385
- wc = udata ?
386
- vmalloc_user(sz) :
387
- vzalloc_node(sz, rdi->dparms.node);
388
- if (!wc)
389
- return -ENOMEM;
390
-
398
+ if (udata && udata->outlen >= sizeof(__u64)) {
399
+ sz = sizeof(struct ib_uverbs_wc) * (cqe + 1);
400
+ sz += sizeof(*u_wc);
401
+ u_wc = vmalloc_user(sz);
402
+ if (!u_wc)
403
+ return -ENOMEM;
404
+ } else {
405
+ sz = sizeof(struct ib_wc) * (cqe + 1);
406
+ sz += sizeof(*k_wc);
407
+ k_wc = vzalloc_node(sz, rdi->dparms.node);
408
+ if (!k_wc)
409
+ return -ENOMEM;
410
+ }
391411 /* Check that we can write the offset to mmap. */
392412 if (udata && udata->outlen >= sizeof(__u64)) {
393413 __u64 offset = 0;
....@@ -402,11 +422,18 @@
402422 * Make sure head and tail are sane since they
403423 * might be user writable.
404424 */
405
- old_wc = cq->queue;
406
- head = old_wc->head;
425
+ if (u_wc) {
426
+ old_u_wc = cq->queue;
427
+ head = RDMA_READ_UAPI_ATOMIC(old_u_wc->head);
428
+ tail = RDMA_READ_UAPI_ATOMIC(old_u_wc->tail);
429
+ } else {
430
+ old_k_wc = cq->kqueue;
431
+ head = old_k_wc->head;
432
+ tail = old_k_wc->tail;
433
+ }
434
+
407435 if (head > (u32)cq->ibcq.cqe)
408436 head = (u32)cq->ibcq.cqe;
409
- tail = old_wc->tail;
410437 if (tail > (u32)cq->ibcq.cqe)
411438 tail = (u32)cq->ibcq.cqe;
412439 if (head < tail)
....@@ -418,27 +445,36 @@
418445 goto bail_unlock;
419446 }
420447 for (n = 0; tail != head; n++) {
421
- if (cq->ip)
422
- wc->uqueue[n] = old_wc->uqueue[tail];
448
+ if (u_wc)
449
+ u_wc->uqueue[n] = old_u_wc->uqueue[tail];
423450 else
424
- wc->kqueue[n] = old_wc->kqueue[tail];
451
+ k_wc->kqueue[n] = old_k_wc->kqueue[tail];
425452 if (tail == (u32)cq->ibcq.cqe)
426453 tail = 0;
427454 else
428455 tail++;
429456 }
430457 cq->ibcq.cqe = cqe;
431
- wc->head = n;
432
- wc->tail = 0;
433
- cq->queue = wc;
458
+ if (u_wc) {
459
+ RDMA_WRITE_UAPI_ATOMIC(u_wc->head, n);
460
+ RDMA_WRITE_UAPI_ATOMIC(u_wc->tail, 0);
461
+ cq->queue = u_wc;
462
+ } else {
463
+ k_wc->head = n;
464
+ k_wc->tail = 0;
465
+ cq->kqueue = k_wc;
466
+ }
434467 spin_unlock_irq(&cq->lock);
435468
436
- vfree(old_wc);
469
+ if (u_wc)
470
+ vfree(old_u_wc);
471
+ else
472
+ vfree(old_k_wc);
437473
438474 if (cq->ip) {
439475 struct rvt_mmap_info *ip = cq->ip;
440476
441
- rvt_update_mmap_info(rdi, ip, sz, wc);
477
+ rvt_update_mmap_info(rdi, ip, sz, u_wc);
442478
443479 /*
444480 * Return the offset to mmap.
....@@ -462,7 +498,9 @@
462498 bail_unlock:
463499 spin_unlock_irq(&cq->lock);
464500 bail_free:
465
- vfree(wc);
501
+ vfree(u_wc);
502
+ vfree(k_wc);
503
+
466504 return ret;
467505 }
468506
....@@ -480,7 +518,7 @@
480518 int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
481519 {
482520 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
483
- struct rvt_cq_wc *wc;
521
+ struct rvt_k_cq_wc *wc;
484522 unsigned long flags;
485523 int npolled;
486524 u32 tail;
....@@ -491,7 +529,7 @@
491529
492530 spin_lock_irqsave(&cq->lock, flags);
493531
494
- wc = cq->queue;
532
+ wc = cq->kqueue;
495533 tail = wc->tail;
496534 if (tail > (u32)cq->ibcq.cqe)
497535 tail = (u32)cq->ibcq.cqe;
....@@ -515,7 +553,6 @@
515553
516554 /**
517555 * rvt_driver_cq_init - Init cq resources on behalf of driver
518
- * @rdi: rvt dev structure
519556 *
520557 * Return: 0 on success
521558 */
....@@ -531,7 +568,6 @@
531568
532569 /**
533570 * rvt_cq_exit - tear down cq reources
534
- * @rdi: rvt dev structure
535571 */
536572 void rvt_cq_exit(void)
537573 {