hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/infiniband/hw/hfi1/qp.c
....@@ -1,5 +1,5 @@
11 /*
2
- * Copyright(c) 2015 - 2018 Intel Corporation.
2
+ * Copyright(c) 2015 - 2020 Intel Corporation.
33 *
44 * This file is provided under a dual BSD/GPLv2 license. When using or
55 * redistributing this file, you may do so under either license.
....@@ -66,7 +66,7 @@
6666 static void flush_tx_list(struct rvt_qp *qp);
6767 static int iowait_sleep(
6868 struct sdma_engine *sde,
69
- struct iowait *wait,
69
+ struct iowait_work *wait,
7070 struct sdma_txreq *stx,
7171 unsigned int seq,
7272 bool pkts_sent);
....@@ -132,23 +132,41 @@
132132 .qpt_support = BIT(IB_QPT_RC),
133133 },
134134
135
+[IB_WR_OPFN] = {
136
+ .length = sizeof(struct ib_atomic_wr),
137
+ .qpt_support = BIT(IB_QPT_RC),
138
+ .flags = RVT_OPERATION_USE_RESERVE,
139
+},
140
+
141
+[IB_WR_TID_RDMA_WRITE] = {
142
+ .length = sizeof(struct ib_rdma_wr),
143
+ .qpt_support = BIT(IB_QPT_RC),
144
+ .flags = RVT_OPERATION_IGN_RNR_CNT,
145
+},
146
+
135147 };
136148
137
-static void flush_tx_list(struct rvt_qp *qp)
149
+static void flush_list_head(struct list_head *l)
138150 {
139
- struct hfi1_qp_priv *priv = qp->priv;
140
-
141
- while (!list_empty(&priv->s_iowait.tx_head)) {
151
+ while (!list_empty(l)) {
142152 struct sdma_txreq *tx;
143153
144154 tx = list_first_entry(
145
- &priv->s_iowait.tx_head,
155
+ l,
146156 struct sdma_txreq,
147157 list);
148158 list_del_init(&tx->list);
149159 hfi1_put_txreq(
150160 container_of(tx, struct verbs_txreq, txreq));
151161 }
162
+}
163
+
164
+static void flush_tx_list(struct rvt_qp *qp)
165
+{
166
+ struct hfi1_qp_priv *priv = qp->priv;
167
+
168
+ flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
169
+ flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
152170 }
153171
154172 static void flush_iowait(struct rvt_qp *qp)
....@@ -168,15 +186,6 @@
168186 write_sequnlock_irqrestore(lock, flags);
169187 }
170188
171
-static inline int opa_mtu_enum_to_int(int mtu)
172
-{
173
- switch (mtu) {
174
- case OPA_MTU_8192: return 8192;
175
- case OPA_MTU_10240: return 10240;
176
- default: return -1;
177
- }
178
-}
179
-
180189 /**
181190 * This function is what we would push to the core layer if we wanted to be a
182191 * "first class citizen". Instead we hide this here and rely on Verbs ULPs
....@@ -184,15 +193,10 @@
184193 */
185194 static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
186195 {
187
- int val;
188
-
189196 /* Constraining 10KB packets to 8KB packets */
190197 if (mtu == (enum ib_mtu)OPA_MTU_10240)
191
- mtu = OPA_MTU_8192;
192
- val = opa_mtu_enum_to_int((int)mtu);
193
- if (val > 0)
194
- return val;
195
- return ib_mtu_enum_to_int(mtu);
198
+ mtu = (enum ib_mtu)OPA_MTU_8192;
199
+ return opa_mtu_enum_to_int((enum opa_mtu)mtu);
196200 }
197201
198202 int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
....@@ -279,41 +283,58 @@
279283 priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
280284 qp_set_16b(qp);
281285 }
286
+
287
+ opfn_qp_init(qp, attr, attr_mask);
282288 }
283289
284290 /**
285
- * hfi1_check_send_wqe - validate wqe
291
+ * hfi1_setup_wqe - set up the wqe
286292 * @qp - The qp
287293 * @wqe - The built wqe
294
+ * @call_send - Determine if the send should be posted or scheduled.
288295 *
289
- * validate wqe. This is called
290
- * prior to inserting the wqe into
291
- * the ring but after the wqe has been
292
- * setup.
296
+ * Perform setup of the wqe. This is called
297
+ * prior to inserting the wqe into the ring but after
298
+ * the wqe has been setup by RDMAVT. This function
299
+ * allows the driver the opportunity to perform
300
+ * validation and additional setup of the wqe.
293301 *
294302 * Returns 0 on success, -EINVAL on failure
295303 *
296304 */
297
-int hfi1_check_send_wqe(struct rvt_qp *qp,
298
- struct rvt_swqe *wqe)
305
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
299306 {
300307 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
301308 struct rvt_ah *ah;
309
+ struct hfi1_pportdata *ppd;
310
+ struct hfi1_devdata *dd;
302311
303312 switch (qp->ibqp.qp_type) {
304313 case IB_QPT_RC:
314
+ hfi1_setup_tid_rdma_wqe(qp, wqe);
315
+ fallthrough;
305316 case IB_QPT_UC:
306317 if (wqe->length > 0x80000000U)
307318 return -EINVAL;
319
+ if (wqe->length > qp->pmtu)
320
+ *call_send = false;
308321 break;
309322 case IB_QPT_SMI:
310
- ah = ibah_to_rvtah(wqe->ud_wr.ah);
311
- if (wqe->length > (1 << ah->log_pmtu))
323
+ /*
324
+ * SM packets should exclusively use VL15 and their SL is
325
+ * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
326
+ * is created, SL is 0 in most cases and as a result some
327
+ * fields (vl and pmtu) in ah may not be set correctly,
328
+ * depending on the SL2SC and SC2VL tables at the time.
329
+ */
330
+ ppd = ppd_from_ibp(ibp);
331
+ dd = dd_from_ppd(ppd);
332
+ if (wqe->length > dd->vld[15].mtu)
312333 return -EINVAL;
313334 break;
314335 case IB_QPT_GSI:
315336 case IB_QPT_UD:
316
- ah = ibah_to_rvtah(wqe->ud_wr.ah);
337
+ ah = rvt_get_swqe_ah(wqe);
317338 if (wqe->length > (1 << ah->log_pmtu))
318339 return -EINVAL;
319340 if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
....@@ -321,7 +342,14 @@
321342 default:
322343 break;
323344 }
324
- return wqe->length <= piothreshold;
345
+
346
+ /*
347
+ * System latency between send and schedule is large enough that
348
+ * forcing call_send to true for piothreshold packets is necessary.
349
+ */
350
+ if (wqe->length <= piothreshold)
351
+ *call_send = true;
352
+ return 0;
325353 }
326354
327355 /**
....@@ -333,36 +361,37 @@
333361 * It is only used in the post send, which doesn't hold
334362 * the s_lock.
335363 */
336
-void _hfi1_schedule_send(struct rvt_qp *qp)
364
+bool _hfi1_schedule_send(struct rvt_qp *qp)
337365 {
338366 struct hfi1_qp_priv *priv = qp->priv;
339367 struct hfi1_ibport *ibp =
340368 to_iport(qp->ibqp.device, qp->port_num);
341369 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
342
- struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
370
+ struct hfi1_devdata *dd = ppd->dd;
343371
344
- iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
345
- priv->s_sde ?
346
- priv->s_sde->cpu :
347
- cpumask_first(cpumask_of_node(dd->node)));
372
+ if (dd->flags & HFI1_SHUTDOWN)
373
+ return true;
374
+
375
+ return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
376
+ priv->s_sde ?
377
+ priv->s_sde->cpu :
378
+ cpumask_first(cpumask_of_node(dd->node)));
348379 }
349380
350381 static void qp_pio_drain(struct rvt_qp *qp)
351382 {
352
- struct hfi1_ibdev *dev;
353383 struct hfi1_qp_priv *priv = qp->priv;
354384
355385 if (!priv->s_sendcontext)
356386 return;
357
- dev = to_idev(qp->ibqp.device);
358387 while (iowait_pio_pending(&priv->s_iowait)) {
359
- write_seqlock_irq(&dev->iowait_lock);
388
+ write_seqlock_irq(&priv->s_sendcontext->waitlock);
360389 hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
361
- write_sequnlock_irq(&dev->iowait_lock);
390
+ write_sequnlock_irq(&priv->s_sendcontext->waitlock);
362391 iowait_pio_drain(&priv->s_iowait);
363
- write_seqlock_irq(&dev->iowait_lock);
392
+ write_seqlock_irq(&priv->s_sendcontext->waitlock);
364393 hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
365
- write_sequnlock_irq(&dev->iowait_lock);
394
+ write_sequnlock_irq(&priv->s_sendcontext->waitlock);
366395 }
367396 }
368397
....@@ -372,12 +401,37 @@
372401 *
373402 * This schedules qp progress and caller should hold
374403 * the s_lock.
404
+ * @return true if the first leg is scheduled;
405
+ * false if the first leg is not scheduled.
375406 */
376
-void hfi1_schedule_send(struct rvt_qp *qp)
407
+bool hfi1_schedule_send(struct rvt_qp *qp)
377408 {
378409 lockdep_assert_held(&qp->s_lock);
379
- if (hfi1_send_ok(qp))
410
+ if (hfi1_send_ok(qp)) {
380411 _hfi1_schedule_send(qp);
412
+ return true;
413
+ }
414
+ if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
415
+ iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
416
+ IOWAIT_PENDING_IB);
417
+ return false;
418
+}
419
+
420
+static void hfi1_qp_schedule(struct rvt_qp *qp)
421
+{
422
+ struct hfi1_qp_priv *priv = qp->priv;
423
+ bool ret;
424
+
425
+ if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
426
+ ret = hfi1_schedule_send(qp);
427
+ if (ret)
428
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
429
+ }
430
+ if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_TID)) {
431
+ ret = hfi1_schedule_tid_send(qp);
432
+ if (ret)
433
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
434
+ }
381435 }
382436
383437 void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
....@@ -388,16 +442,41 @@
388442 if (qp->s_flags & flag) {
389443 qp->s_flags &= ~flag;
390444 trace_hfi1_qpwakeup(qp, flag);
391
- hfi1_schedule_send(qp);
445
+ hfi1_qp_schedule(qp);
392446 }
393447 spin_unlock_irqrestore(&qp->s_lock, flags);
394448 /* Notify hfi1_destroy_qp() if it is waiting. */
395449 rvt_put_qp(qp);
396450 }
397451
452
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
453
+{
454
+ struct hfi1_qp_priv *priv = qp->priv;
455
+
456
+ if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) {
457
+ qp->s_flags &= ~RVT_S_BUSY;
458
+ /*
459
+ * If we are sending a first-leg packet from the second leg,
460
+ * we need to clear the busy flag from priv->s_flags to
461
+ * avoid a race condition when the qp wakes up before
462
+ * the call to hfi1_verbs_send() returns to the second
463
+ * leg. In that case, the second leg will terminate without
464
+ * being re-scheduled, resulting in failure to send TID RDMA
465
+ * WRITE DATA and TID RDMA ACK packets.
466
+ */
467
+ if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
468
+ priv->s_flags &= ~(HFI1_S_TID_BUSY_SET |
469
+ RVT_S_BUSY);
470
+ iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
471
+ }
472
+ } else {
473
+ priv->s_flags &= ~RVT_S_BUSY;
474
+ }
475
+}
476
+
398477 static int iowait_sleep(
399478 struct sdma_engine *sde,
400
- struct iowait *wait,
479
+ struct iowait_work *wait,
401480 struct sdma_txreq *stx,
402481 uint seq,
403482 bool pkts_sent)
....@@ -407,7 +486,6 @@
407486 struct hfi1_qp_priv *priv;
408487 unsigned long flags;
409488 int ret = 0;
410
- struct hfi1_ibdev *dev;
411489
412490 qp = tx->qp;
413491 priv = qp->priv;
....@@ -420,9 +498,8 @@
420498 * buffer and undoing the side effects of the copy.
421499 */
422500 /* Make a common routine? */
423
- dev = &sde->dd->verbs_dev;
424501 list_add_tail(&stx->list, &wait->tx_head);
425
- write_seqlock(&dev->iowait_lock);
502
+ write_seqlock(&sde->waitlock);
426503 if (sdma_progress(sde, seq, stx))
427504 goto eagain;
428505 if (list_empty(&priv->s_iowait.list)) {
....@@ -431,14 +508,15 @@
431508
432509 ibp->rvp.n_dmawait++;
433510 qp->s_flags |= RVT_S_WAIT_DMA_DESC;
511
+ iowait_get_priority(&priv->s_iowait);
434512 iowait_queue(pkts_sent, &priv->s_iowait,
435513 &sde->dmawait);
436
- priv->s_iowait.lock = &dev->iowait_lock;
514
+ priv->s_iowait.lock = &sde->waitlock;
437515 trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
438516 rvt_get_qp(qp);
439517 }
440
- write_sequnlock(&dev->iowait_lock);
441
- qp->s_flags &= ~RVT_S_BUSY;
518
+ write_sequnlock(&sde->waitlock);
519
+ hfi1_qp_unbusy(qp, wait);
442520 spin_unlock_irqrestore(&qp->s_lock, flags);
443521 ret = -EBUSY;
444522 } else {
....@@ -447,7 +525,7 @@
447525 }
448526 return ret;
449527 eagain:
450
- write_sequnlock(&dev->iowait_lock);
528
+ write_sequnlock(&sde->waitlock);
451529 spin_unlock_irqrestore(&qp->s_lock, flags);
452530 list_del_init(&stx->list);
453531 return -EAGAIN;
....@@ -478,6 +556,17 @@
478556 hfi1_schedule_send(qp);
479557 }
480558 spin_unlock_irqrestore(&qp->s_lock, flags);
559
+}
560
+
561
+static void hfi1_init_priority(struct iowait *w)
562
+{
563
+ struct rvt_qp *qp = iowait_to_qp(w);
564
+ struct hfi1_qp_priv *priv = qp->priv;
565
+
566
+ if (qp->s_flags & RVT_S_ACK_PENDING)
567
+ w->priority++;
568
+ if (priv->s_flags & RVT_S_ACK_PENDING)
569
+ w->priority++;
481570 }
482571
483572 /**
....@@ -602,8 +691,8 @@
602691 sde ? sde->this_idx : 0,
603692 send_context,
604693 send_context ? send_context->sw_index : 0,
605
- ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head,
606
- ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail,
694
+ ib_cq_head(qp->ibqp.send_cq),
695
+ ib_cq_tail(qp->ibqp.send_cq),
607696 qp->pid,
608697 qp->s_state,
609698 qp->s_ack_state,
....@@ -637,9 +726,13 @@
637726 &priv->s_iowait,
638727 1,
639728 _hfi1_do_send,
729
+ _hfi1_do_tid_send,
640730 iowait_sleep,
641731 iowait_wakeup,
642
- iowait_sdma_drained);
732
+ iowait_sdma_drained,
733
+ hfi1_init_priority);
734
+ /* Init to a value to start the running average correctly */
735
+ priv->s_running_pkt_size = piothreshold / 2;
643736 return priv;
644737 }
645738
....@@ -647,6 +740,7 @@
647740 {
648741 struct hfi1_qp_priv *priv = qp->priv;
649742
743
+ hfi1_qp_priv_tid_free(rdi, qp);
650744 kfree(priv->s_ahg);
651745 kfree(priv);
652746 }
....@@ -680,19 +774,24 @@
680774 {
681775 lockdep_assert_held(&qp->s_lock);
682776 flush_iowait(qp);
777
+ hfi1_tid_rdma_flush_wait(qp);
683778 }
684779
685780 void stop_send_queue(struct rvt_qp *qp)
686781 {
687782 struct hfi1_qp_priv *priv = qp->priv;
688783
689
- cancel_work_sync(&priv->s_iowait.iowork);
784
+ iowait_cancel_work(&priv->s_iowait);
785
+ if (cancel_work_sync(&priv->tid_rdma.trigger_work))
786
+ rvt_put_qp(qp);
690787 }
691788
692789 void quiesce_qp(struct rvt_qp *qp)
693790 {
694791 struct hfi1_qp_priv *priv = qp->priv;
695792
793
+ hfi1_del_tid_reap_timer(qp);
794
+ hfi1_del_tid_retry_timer(qp);
696795 iowait_sdma_drain(&priv->s_iowait);
697796 qp_pio_drain(qp);
698797 flush_tx_list(qp);
....@@ -700,8 +799,13 @@
700799
701800 void notify_qp_reset(struct rvt_qp *qp)
702801 {
802
+ hfi1_qp_kern_exp_rcv_clear_all(qp);
703803 qp->r_adefered = 0;
704804 clear_ahg(qp);
805
+
806
+ /* Clear any OPFN state */
807
+ if (qp->ibqp.qp_type == IB_QPT_RC)
808
+ opfn_conn_error(qp);
705809 }
706810
707811 /*
....@@ -783,8 +887,11 @@
783887 if (lock) {
784888 write_seqlock(lock);
785889 if (!list_empty(&priv->s_iowait.list) &&
786
- !(qp->s_flags & RVT_S_BUSY)) {
890
+ !(qp->s_flags & RVT_S_BUSY) &&
891
+ !(priv->s_flags & RVT_S_BUSY)) {
787892 qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
893
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
894
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
788895 list_del_init(&priv->s_iowait.list);
789896 priv->s_iowait.lock = NULL;
790897 rvt_put_qp(qp);
....@@ -792,7 +899,8 @@
792899 write_sequnlock(lock);
793900 }
794901
795
- if (!(qp->s_flags & RVT_S_BUSY)) {
902
+ if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) {
903
+ qp->s_hdrwords = 0;
796904 if (qp->s_rdma_mr) {
797905 rvt_put_mr(qp->s_rdma_mr);
798906 qp->s_rdma_mr = NULL;