From d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 02:45:28 +0000 Subject: [PATCH] add boot partition size --- kernel/drivers/infiniband/hw/hfi1/qp.c | 234 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 files changed, 171 insertions(+), 63 deletions(-) diff --git a/kernel/drivers/infiniband/hw/hfi1/qp.c b/kernel/drivers/infiniband/hw/hfi1/qp.c index 63c5ba6..356518e 100644 --- a/kernel/drivers/infiniband/hw/hfi1/qp.c +++ b/kernel/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -66,7 +66,7 @@ static void flush_tx_list(struct rvt_qp *qp); static int iowait_sleep( struct sdma_engine *sde, - struct iowait *wait, + struct iowait_work *wait, struct sdma_txreq *stx, unsigned int seq, bool pkts_sent); @@ -132,23 +132,41 @@ .qpt_support = BIT(IB_QPT_RC), }, +[IB_WR_OPFN] = { + .length = sizeof(struct ib_atomic_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_USE_RESERVE, +}, + +[IB_WR_TID_RDMA_WRITE] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_IGN_RNR_CNT, +}, + }; -static void flush_tx_list(struct rvt_qp *qp) +static void flush_list_head(struct list_head *l) { - struct hfi1_qp_priv *priv = qp->priv; - - while (!list_empty(&priv->s_iowait.tx_head)) { + while (!list_empty(l)) { struct sdma_txreq *tx; tx = list_first_entry( - &priv->s_iowait.tx_head, + l, struct sdma_txreq, list); list_del_init(&tx->list); hfi1_put_txreq( container_of(tx, struct verbs_txreq, txreq)); } +} + +static void flush_tx_list(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head); + flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head); } static void flush_iowait(struct rvt_qp *qp) @@ -168,15 +186,6 @@ write_sequnlock_irqrestore(lock, flags); } -static inline int opa_mtu_enum_to_int(int mtu) -{ - switch (mtu) { - case OPA_MTU_8192: return 8192; - case OPA_MTU_10240: return 10240; - default: return -1; - } -} - /** * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs @@ -184,15 +193,10 @@ */ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) { - int val; - /* Constraining 10KB packets to 8KB packets */ if (mtu == (enum ib_mtu)OPA_MTU_10240) - mtu = OPA_MTU_8192; - val = opa_mtu_enum_to_int((int)mtu); - if (val > 0) - return val; - return ib_mtu_enum_to_int(mtu); + mtu = (enum ib_mtu)OPA_MTU_8192; + return opa_mtu_enum_to_int((enum opa_mtu)mtu); } int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, @@ -279,41 +283,58 @@ priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); qp_set_16b(qp); } + + opfn_qp_init(qp, attr, attr_mask); } /** - * hfi1_check_send_wqe - validate wqe + * hfi1_setup_wqe - set up the wqe * @qp - The qp * @wqe - The built wqe + * @call_send - Determine if the send should be posted or scheduled. * - * validate wqe. This is called - * prior to inserting the wqe into - * the ring but after the wqe has been - * setup. + * Perform setup of the wqe. This is called + * prior to inserting the wqe into the ring but after + * the wqe has been setup by RDMAVT. This function + * allows the driver the opportunity to perform + * validation and additional setup of the wqe. * * Returns 0 on success, -EINVAL on failure * */ -int hfi1_check_send_wqe(struct rvt_qp *qp, - struct rvt_swqe *wqe) +int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct rvt_ah *ah; + struct hfi1_pportdata *ppd; + struct hfi1_devdata *dd; switch (qp->ibqp.qp_type) { case IB_QPT_RC: + hfi1_setup_tid_rdma_wqe(qp, wqe); + fallthrough; case IB_QPT_UC: if (wqe->length > 0x80000000U) return -EINVAL; + if (wqe->length > qp->pmtu) + *call_send = false; break; case IB_QPT_SMI: - ah = ibah_to_rvtah(wqe->ud_wr.ah); - if (wqe->length > (1 << ah->log_pmtu)) + /* + * SM packets should exclusively use VL15 and their SL is + * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah + * is created, SL is 0 in most cases and as a result some + * fields (vl and pmtu) in ah may not be set correctly, + * depending on the SL2SC and SC2VL tables at the time. + */ + ppd = ppd_from_ibp(ibp); + dd = dd_from_ppd(ppd); + if (wqe->length > dd->vld[15].mtu) return -EINVAL; break; case IB_QPT_GSI: case IB_QPT_UD: - ah = ibah_to_rvtah(wqe->ud_wr.ah); + ah = rvt_get_swqe_ah(wqe); if (wqe->length > (1 << ah->log_pmtu)) return -EINVAL; if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) @@ -321,7 +342,14 @@ default: break; } - return wqe->length <= piothreshold; + + /* + * System latency between send and schedule is large enough that + * forcing call_send to true for piothreshold packets is necessary. + */ + if (wqe->length <= piothreshold) + *call_send = true; + return 0; } /** @@ -333,36 +361,37 @@ * It is only used in the post send, which doesn't hold * the s_lock. */ -void _hfi1_schedule_send(struct rvt_qp *qp) +bool _hfi1_schedule_send(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_devdata *dd = ppd->dd; - iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, - priv->s_sde ? - priv->s_sde->cpu : - cpumask_first(cpumask_of_node(dd->node))); + if (dd->flags & HFI1_SHUTDOWN) + return true; + + return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, + priv->s_sde ? + priv->s_sde->cpu : + cpumask_first(cpumask_of_node(dd->node))); } static void qp_pio_drain(struct rvt_qp *qp) { - struct hfi1_ibdev *dev; struct hfi1_qp_priv *priv = qp->priv; if (!priv->s_sendcontext) return; - dev = to_idev(qp->ibqp.device); while (iowait_pio_pending(&priv->s_iowait)) { - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); iowait_pio_drain(&priv->s_iowait); - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); } } @@ -372,12 +401,37 @@ * * This schedules qp progress and caller should hold * the s_lock. + * @return true if the first leg is scheduled; + * false if the first leg is not scheduled. */ -void hfi1_schedule_send(struct rvt_qp *qp) +bool hfi1_schedule_send(struct rvt_qp *qp) { lockdep_assert_held(&qp->s_lock); - if (hfi1_send_ok(qp)) + if (hfi1_send_ok(qp)) { _hfi1_schedule_send(qp); + return true; + } + if (qp->s_flags & HFI1_S_ANY_WAIT_IO) + iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait, + IOWAIT_PENDING_IB); + return false; +} + +static void hfi1_qp_schedule(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + bool ret; + + if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) { + ret = hfi1_schedule_send(qp); + if (ret) + iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB); + } + if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_TID)) { + ret = hfi1_schedule_tid_send(qp); + if (ret) + iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID); + } } void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) @@ -388,16 +442,41 @@ if (qp->s_flags & flag) { qp->s_flags &= ~flag; trace_hfi1_qpwakeup(qp, flag); - hfi1_schedule_send(qp); + hfi1_qp_schedule(qp); } spin_unlock_irqrestore(&qp->s_lock, flags); /* Notify hfi1_destroy_qp() if it is waiting. */ rvt_put_qp(qp); } +void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait) +{ + struct hfi1_qp_priv *priv = qp->priv; + + if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) { + qp->s_flags &= ~RVT_S_BUSY; + /* + * If we are sending a first-leg packet from the second leg, + * we need to clear the busy flag from priv->s_flags to + * avoid a race condition when the qp wakes up before + * the call to hfi1_verbs_send() returns to the second + * leg. In that case, the second leg will terminate without + * being re-scheduled, resulting in failure to send TID RDMA + * WRITE DATA and TID RDMA ACK packets. + */ + if (priv->s_flags & HFI1_S_TID_BUSY_SET) { + priv->s_flags &= ~(HFI1_S_TID_BUSY_SET | + RVT_S_BUSY); + iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID); + } + } else { + priv->s_flags &= ~RVT_S_BUSY; + } +} + static int iowait_sleep( struct sdma_engine *sde, - struct iowait *wait, + struct iowait_work *wait, struct sdma_txreq *stx, uint seq, bool pkts_sent) @@ -407,7 +486,6 @@ struct hfi1_qp_priv *priv; unsigned long flags; int ret = 0; - struct hfi1_ibdev *dev; qp = tx->qp; priv = qp->priv; @@ -420,9 +498,8 @@ * buffer and undoing the side effects of the copy. */ /* Make a common routine? */ - dev = &sde->dd->verbs_dev; list_add_tail(&stx->list, &wait->tx_head); - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); if (sdma_progress(sde, seq, stx)) goto eagain; if (list_empty(&priv->s_iowait.list)) { @@ -431,14 +508,15 @@ ibp->rvp.n_dmawait++; qp->s_flags |= RVT_S_WAIT_DMA_DESC; + iowait_get_priority(&priv->s_iowait); iowait_queue(pkts_sent, &priv->s_iowait, &sde->dmawait); - priv->s_iowait.lock = &dev->iowait_lock; + priv->s_iowait.lock = &sde->waitlock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); } - write_sequnlock(&dev->iowait_lock); - qp->s_flags &= ~RVT_S_BUSY; + write_sequnlock(&sde->waitlock); + hfi1_qp_unbusy(qp, wait); spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EBUSY; } else { @@ -447,7 +525,7 @@ } return ret; eagain: - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); spin_unlock_irqrestore(&qp->s_lock, flags); list_del_init(&stx->list); return -EAGAIN; @@ -478,6 +556,17 @@ hfi1_schedule_send(qp); } spin_unlock_irqrestore(&qp->s_lock, flags); +} + +static void hfi1_init_priority(struct iowait *w) +{ + struct rvt_qp *qp = iowait_to_qp(w); + struct hfi1_qp_priv *priv = qp->priv; + + if (qp->s_flags & RVT_S_ACK_PENDING) + w->priority++; + if (priv->s_flags & RVT_S_ACK_PENDING) + w->priority++; } /** @@ -602,8 +691,8 @@ sde ? sde->this_idx : 0, send_context, send_context ? send_context->sw_index : 0, - ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, - ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, + ib_cq_head(qp->ibqp.send_cq), + ib_cq_tail(qp->ibqp.send_cq), qp->pid, qp->s_state, qp->s_ack_state, @@ -637,9 +726,13 @@ &priv->s_iowait, 1, _hfi1_do_send, + _hfi1_do_tid_send, iowait_sleep, iowait_wakeup, - iowait_sdma_drained); + iowait_sdma_drained, + hfi1_init_priority); + /* Init to a value to start the running average correctly */ + priv->s_running_pkt_size = piothreshold / 2; return priv; } @@ -647,6 +740,7 @@ { struct hfi1_qp_priv *priv = qp->priv; + hfi1_qp_priv_tid_free(rdi, qp); kfree(priv->s_ahg); kfree(priv); } @@ -680,19 +774,24 @@ { lockdep_assert_held(&qp->s_lock); flush_iowait(qp); + hfi1_tid_rdma_flush_wait(qp); } void stop_send_queue(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - cancel_work_sync(&priv->s_iowait.iowork); + iowait_cancel_work(&priv->s_iowait); + if (cancel_work_sync(&priv->tid_rdma.trigger_work)) + rvt_put_qp(qp); } void quiesce_qp(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; + hfi1_del_tid_reap_timer(qp); + hfi1_del_tid_retry_timer(qp); iowait_sdma_drain(&priv->s_iowait); qp_pio_drain(qp); flush_tx_list(qp); @@ -700,8 +799,13 @@ void notify_qp_reset(struct rvt_qp *qp) { + hfi1_qp_kern_exp_rcv_clear_all(qp); qp->r_adefered = 0; clear_ahg(qp); + + /* Clear any OPFN state */ + if (qp->ibqp.qp_type == IB_QPT_RC) + opfn_conn_error(qp); } /* @@ -783,8 +887,11 @@ if (lock) { write_seqlock(lock); if (!list_empty(&priv->s_iowait.list) && - !(qp->s_flags & RVT_S_BUSY)) { + !(qp->s_flags & RVT_S_BUSY) && + !(priv->s_flags & RVT_S_BUSY)) { qp->s_flags &= ~HFI1_S_ANY_WAIT_IO; + iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB); + iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID); list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; rvt_put_qp(qp); @@ -792,7 +899,8 @@ write_sequnlock(lock); } - if (!(qp->s_flags & RVT_S_BUSY)) { + if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) { + qp->s_hdrwords = 0; if (qp->s_rdma_mr) { rvt_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; -- Gitblit v1.6.2