From 071106ecf68c401173c58808b1cf5f68cc50d390 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 05 Jan 2024 08:39:27 +0000 Subject: [PATCH] change wifi driver to cypress --- kernel/drivers/ntb/ntb_transport.c | 370 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 files changed, 309 insertions(+), 61 deletions(-) diff --git a/kernel/drivers/ntb/ntb_transport.c b/kernel/drivers/ntb/ntb_transport.c index 9398959..8595700 100644 --- a/kernel/drivers/ntb/ntb_transport.c +++ b/kernel/drivers/ntb/ntb_transport.c @@ -93,6 +93,12 @@ module_param(use_dma, bool, 0644); MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy"); +static bool use_msi; +#ifdef CONFIG_NTB_MSI +module_param(use_msi, bool, 0644); +MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells"); +#endif + static struct dentry *nt_debugfs_dir; /* Only two-ports NTB devices are supported */ @@ -144,7 +150,9 @@ struct list_head tx_free_q; spinlock_t ntb_tx_free_q_lock; void __iomem *tx_mw; - dma_addr_t tx_mw_phys; + phys_addr_t tx_mw_phys; + size_t tx_mw_size; + dma_addr_t tx_mw_dma_addr; unsigned int tx_index; unsigned int tx_max_entry; unsigned int tx_max_frame; @@ -186,6 +194,11 @@ u64 tx_err_no_buf; u64 tx_memcpy; u64 tx_async; + + bool use_msi; + int msi_irq; + struct ntb_msi_desc msi_desc; + struct ntb_msi_desc peer_msi_desc; }; struct ntb_transport_mw { @@ -194,6 +207,8 @@ void __iomem *vbase; size_t xlat_size; size_t buff_size; + size_t alloc_size; + void *alloc_addr; void *virt_addr; dma_addr_t dma_addr; }; @@ -216,6 +231,10 @@ unsigned int qp_count; u64 qp_bitmap; u64 qp_bitmap_free; + + bool use_msi; + unsigned int msi_spad_offset; + u64 msi_db_mask; bool link_is_up; struct delayed_work link_work; @@ -273,7 +292,7 @@ static int ntb_transport_bus_probe(struct device *dev) { const struct ntb_transport_client *client; - int rc = -EINVAL; + int rc; get_device(dev); @@ -393,7 +412,7 @@ rc = device_register(dev); if (rc) { - kfree(client_dev); + put_device(dev); goto err; } @@ -462,70 +481,70 @@ return -ENOMEM; out_offset = 0; - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "\nNTB QP stats:\n\n"); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_bytes - \t%llu\n", qp->rx_bytes); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_pkts - \t%llu\n", qp->rx_pkts); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_memcpy - \t%llu\n", qp->rx_memcpy); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_async - \t%llu\n", qp->rx_async); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_ring_empty - %llu\n", qp->rx_ring_empty); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_err_oflow - \t%llu\n", qp->rx_err_oflow); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_err_ver - \t%llu\n", qp->rx_err_ver); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_buff - \t0x%p\n", qp->rx_buff); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_index - \t%u\n", qp->rx_index); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_max_entry - \t%u\n", qp->rx_max_entry); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "tx_bytes - \t%llu\n", qp->tx_bytes); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "tx_pkts - \t%llu\n", qp->tx_pkts); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "tx_memcpy - \t%llu\n", qp->tx_memcpy); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "tx_async - \t%llu\n", qp->tx_async); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "tx_ring_full - \t%llu\n", qp->tx_ring_full); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "tx_mw - \t0x%p\n", qp->tx_mw); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "tx_index (H) - \t%u\n", qp->tx_index); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "RRI (T) - \t%u\n", qp->remote_rx_info->entry); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "tx_max_entry - \t%u\n", qp->tx_max_entry); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "\n"); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Using TX DMA - \t%s\n", qp->tx_dma_chan ? "Yes" : "No"); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Using RX DMA - \t%s\n", qp->rx_dma_chan ? "Yes" : "No"); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down"); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "\n"); if (out_offset > out_count) @@ -663,6 +682,114 @@ return 0; } +static irqreturn_t ntb_transport_isr(int irq, void *dev) +{ + struct ntb_transport_qp *qp = dev; + + tasklet_schedule(&qp->rxc_db_work); + + return IRQ_HANDLED; +} + +static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt, + unsigned int qp_num) +{ + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; + int spad = qp_num * 2 + nt->msi_spad_offset; + + if (!nt->use_msi) + return; + + if (spad >= ntb_spad_count(nt->ndev)) + return; + + qp->peer_msi_desc.addr_offset = + ntb_peer_spad_read(qp->ndev, PIDX, spad); + qp->peer_msi_desc.data = + ntb_peer_spad_read(qp->ndev, PIDX, spad + 1); + + dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n", + qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data); + + if (qp->peer_msi_desc.addr_offset) { + qp->use_msi = true; + dev_info(&qp->ndev->pdev->dev, + "Using MSI interrupts for QP%d\n", qp_num); + } +} + +static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt, + unsigned int qp_num) +{ + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; + int spad = qp_num * 2 + nt->msi_spad_offset; + int rc; + + if (!nt->use_msi) + return; + + if (spad >= ntb_spad_count(nt->ndev)) { + dev_warn_once(&qp->ndev->pdev->dev, + "Not enough SPADS to use MSI interrupts\n"); + return; + } + + ntb_spad_write(qp->ndev, spad, 0); + ntb_spad_write(qp->ndev, spad + 1, 0); + + if (!qp->msi_irq) { + qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr, + KBUILD_MODNAME, qp, + &qp->msi_desc); + if (qp->msi_irq < 0) { + dev_warn(&qp->ndev->pdev->dev, + "Unable to allocate MSI interrupt for qp%d\n", + qp_num); + return; + } + } + + rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset); + if (rc) + goto err_free_interrupt; + + rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data); + if (rc) + goto err_free_interrupt; + + dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n", + qp_num, qp->msi_irq, qp->msi_desc.addr_offset, + qp->msi_desc.data); + + return; + +err_free_interrupt: + devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp); +} + +static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt) +{ + int i; + + dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed"); + + for (i = 0; i < nt->qp_count; i++) + ntb_transport_setup_qp_peer_msi(nt, i); +} + +static void ntb_transport_msi_desc_changed(void *data) +{ + struct ntb_transport_ctx *nt = data; + int i; + + dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed"); + + for (i = 0; i < nt->qp_count; i++) + ntb_transport_setup_qp_msi(nt, i); + + ntb_peer_db_set(nt->ndev, nt->msi_db_mask); +} + static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) { struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; @@ -672,11 +799,57 @@ return; ntb_mw_clear_trans(nt->ndev, PIDX, num_mw); - dma_free_coherent(&pdev->dev, mw->buff_size, - mw->virt_addr, mw->dma_addr); + dma_free_coherent(&pdev->dev, mw->alloc_size, + mw->alloc_addr, mw->dma_addr); mw->xlat_size = 0; mw->buff_size = 0; + mw->alloc_size = 0; + mw->alloc_addr = NULL; mw->virt_addr = NULL; +} + +static int ntb_alloc_mw_buffer(struct ntb_transport_mw *mw, + struct device *dma_dev, size_t align) +{ + dma_addr_t dma_addr; + void *alloc_addr, *virt_addr; + int rc; + + alloc_addr = dma_alloc_coherent(dma_dev, mw->alloc_size, + &dma_addr, GFP_KERNEL); + if (!alloc_addr) { + dev_err(dma_dev, "Unable to alloc MW buff of size %zu\n", + mw->alloc_size); + return -ENOMEM; + } + virt_addr = alloc_addr; + + /* + * we must ensure that the memory address allocated is BAR size + * aligned in order for the XLAT register to take the value. This + * is a requirement of the hardware. It is recommended to setup CMA + * for BAR sizes equal or greater than 4MB. + */ + if (!IS_ALIGNED(dma_addr, align)) { + if (mw->alloc_size > mw->buff_size) { + virt_addr = PTR_ALIGN(alloc_addr, align); + dma_addr = ALIGN(dma_addr, align); + } else { + rc = -ENOMEM; + goto err; + } + } + + mw->alloc_addr = alloc_addr; + mw->virt_addr = virt_addr; + mw->dma_addr = dma_addr; + + return 0; + +err: + dma_free_coherent(dma_dev, mw->alloc_size, alloc_addr, dma_addr); + + return rc; } static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, @@ -710,28 +883,20 @@ /* Alloc memory for receiving data. Must be aligned */ mw->xlat_size = xlat_size; mw->buff_size = buff_size; + mw->alloc_size = buff_size; - mw->virt_addr = dma_alloc_coherent(&pdev->dev, buff_size, - &mw->dma_addr, GFP_KERNEL); - if (!mw->virt_addr) { - mw->xlat_size = 0; - mw->buff_size = 0; - dev_err(&pdev->dev, "Unable to alloc MW buff of size %zu\n", - buff_size); - return -ENOMEM; - } - - /* - * we must ensure that the memory address allocated is BAR size - * aligned in order for the XLAT register to take the value. This - * is a requirement of the hardware. It is recommended to setup CMA - * for BAR sizes equal or greater than 4MB. - */ - if (!IS_ALIGNED(mw->dma_addr, xlat_align)) { - dev_err(&pdev->dev, "DMA memory %pad is not aligned\n", - &mw->dma_addr); - ntb_free_mw(nt, num_mw); - return -ENOMEM; + rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); + if (rc) { + mw->alloc_size *= 2; + rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); + if (rc) { + dev_err(&pdev->dev, + "Unable to alloc aligned MW buff\n"); + mw->xlat_size = 0; + mw->buff_size = 0; + mw->alloc_size = 0; + return rc; + } } /* Notify HW the memory location of the receive buffer */ @@ -746,7 +911,7 @@ return 0; } -static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp) +static void ntb_qp_link_context_reset(struct ntb_transport_qp *qp) { qp->link_is_up = false; qp->active = false; @@ -767,6 +932,13 @@ qp->tx_err_no_buf = 0; qp->tx_memcpy = 0; qp->tx_async = 0; +} + +static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp) +{ + ntb_qp_link_context_reset(qp); + if (qp->remote_rx_info) + qp->remote_rx_info->entry = qp->rx_max_entry - 1; } static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp) @@ -822,6 +994,9 @@ if (!nt->link_is_up) cancel_delayed_work_sync(&nt->link_work); + for (i = 0; i < nt->mw_count; i++) + ntb_free_mw(nt, i); + /* The scratchpad registers keep the values if the remote side * goes down, blast them now to give them a sane value the next * time they are accessed @@ -860,6 +1035,20 @@ int rc = 0, i, spad; /* send the local info, in the opposite order of the way we read it */ + + if (nt->use_msi) { + rc = ntb_msi_setup_mws(ndev); + if (rc) { + dev_warn(&pdev->dev, + "Failed to register MSI memory window: %d\n", + rc); + nt->use_msi = false; + } + } + + for (i = 0; i < nt->qp_count; i++) + ntb_transport_setup_qp_msi(nt, i); + for (i = 0; i < nt->mw_count; i++) { size = nt->mw_vec[i].phys_size; @@ -917,6 +1106,7 @@ struct ntb_transport_qp *qp = &nt->qp_vec[i]; ntb_transport_setup_qp_mw(nt, i); + ntb_transport_setup_qp_peer_msi(nt, i); if (qp->client_ready) schedule_delayed_work(&qp->link_work, 0); @@ -993,7 +1183,7 @@ qp->ndev = nt->ndev; qp->client_ready = false; qp->event_handler = NULL; - ntb_qp_link_down_reset(qp); + ntb_qp_link_context_reset(qp); if (mw_num < qp_count % mw_count) num_qps_mw = qp_count / mw_count + 1; @@ -1009,6 +1199,7 @@ tx_size = (unsigned int)mw_size / num_qps_mw; qp_offset = tx_size * (qp_num / mw_count); + qp->tx_mw_size = tx_size; qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset; if (!qp->tx_mw) return -EINVAL; @@ -1089,6 +1280,19 @@ return -ENOMEM; nt->ndev = ndev; + + /* + * If we are using MSI, and have at least one extra memory window, + * we will reserve the last MW for the MSI window. + */ + if (use_msi && mw_count > 1) { + rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed); + if (!rc) { + mw_count -= 1; + nt->use_msi = true; + } + } + spad_count = ntb_spad_count(ndev); /* Limit the MW's based on the availability of scratchpads */ @@ -1101,6 +1305,8 @@ max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2; nt->mw_count = min(mw_count, max_mw_count_for_spads); + + nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH; nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec), GFP_KERNEL, node); @@ -1132,6 +1338,12 @@ qp_bitmap = ntb_db_valid_mask(ndev); qp_count = ilog2(qp_bitmap); + if (nt->use_msi) { + qp_count -= 1; + nt->msi_db_mask = 1 << qp_count; + ntb_db_clear_mask(ndev, nt->msi_db_mask); + } + if (max_num_clients && max_num_clients < qp_count) qp_count = max_num_clients; else if (nt->mw_count < qp_count) @@ -1278,6 +1490,7 @@ case DMA_TRANS_READ_FAILED: case DMA_TRANS_WRITE_FAILED: entry->errors++; + fallthrough; case DMA_TRANS_ABORTED: { struct ntb_transport_qp *qp = entry->qp; @@ -1533,6 +1746,7 @@ case DMA_TRANS_READ_FAILED: case DMA_TRANS_WRITE_FAILED: entry->errors++; + fallthrough; case DMA_TRANS_ABORTED: { void __iomem *offset = @@ -1553,7 +1767,10 @@ iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); - ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); + if (qp->use_msi) + ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc); + else + ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); /* The entry length can only be zero if the packet is intended to be a * "link down" or similar. Since no payload is being sent in these @@ -1602,7 +1819,7 @@ dma_cookie_t cookie; device = chan->device; - dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index; + dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index; buff_off = (size_t)buf & ~PAGE_MASK; dest_off = (size_t)dest & ~PAGE_MASK; @@ -1821,6 +2038,19 @@ qp->rx_dma_chan = NULL; } + qp->tx_mw_dma_addr = 0; + if (qp->tx_dma_chan) { + qp->tx_mw_dma_addr = + dma_map_resource(qp->tx_dma_chan->device->dev, + qp->tx_mw_phys, qp->tx_mw_size, + DMA_FROM_DEVICE, 0); + if (dma_mapping_error(qp->tx_dma_chan->device->dev, + qp->tx_mw_dma_addr)) { + qp->tx_mw_dma_addr = 0; + goto err1; + } + } + dev_dbg(&pdev->dev, "Using %s memcpy for TX\n", qp->tx_dma_chan ? "DMA" : "CPU"); @@ -1862,6 +2092,10 @@ qp->rx_alloc_entry = 0; while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) kfree(entry); + if (qp->tx_mw_dma_addr) + dma_unmap_resource(qp->tx_dma_chan->device->dev, + qp->tx_mw_dma_addr, qp->tx_mw_size, + DMA_FROM_DEVICE, 0); if (qp->tx_dma_chan) dma_release_channel(qp->tx_dma_chan); if (qp->rx_dma_chan) @@ -1903,6 +2137,11 @@ */ dma_sync_wait(chan, qp->last_cookie); dmaengine_terminate_all(chan); + + dma_unmap_resource(chan->device->dev, + qp->tx_mw_dma_addr, qp->tx_mw_size, + DMA_FROM_DEVICE, 0); + dma_release_channel(chan); } @@ -2046,8 +2285,12 @@ struct ntb_queue_entry *entry; int rc; - if (!qp || !qp->link_is_up || !len) + if (!qp || !len) return -EINVAL; + + /* If the qp link is down already, just ignore. */ + if (!qp->link_is_up) + return 0; entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); if (!entry) { @@ -2188,7 +2431,7 @@ unsigned int head = qp->tx_index; unsigned int tail = qp->remote_rx_info->entry; - return tail > head ? tail - head : qp->tx_max_entry + tail - head; + return tail >= head ? tail - head : qp->tx_max_entry + tail - head; } EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry); @@ -2199,6 +2442,11 @@ u64 db_bits; unsigned int qp_num; + if (ntb_db_read(nt->ndev) & nt->msi_db_mask) { + ntb_transport_msi_peer_desc_changed(nt); + ntb_db_clear(nt->ndev, nt->msi_db_mask); + } + db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free & ntb_db_vector_mask(nt->ndev, vector)); -- Gitblit v1.6.2