From 072de836f53be56a70cecf70b43ae43b7ce17376 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 10:08:36 +0000
Subject: [PATCH] mk-rootfs.sh
---
kernel/drivers/ntb/ntb_transport.c | 349 ++++++++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 293 insertions(+), 56 deletions(-)
diff --git a/kernel/drivers/ntb/ntb_transport.c b/kernel/drivers/ntb/ntb_transport.c
index 9398959..4a02561 100644
--- a/kernel/drivers/ntb/ntb_transport.c
+++ b/kernel/drivers/ntb/ntb_transport.c
@@ -93,6 +93,12 @@
module_param(use_dma, bool, 0644);
MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
+static bool use_msi;
+#ifdef CONFIG_NTB_MSI
+module_param(use_msi, bool, 0644);
+MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
+#endif
+
static struct dentry *nt_debugfs_dir;
/* Only two-ports NTB devices are supported */
@@ -144,7 +150,9 @@
struct list_head tx_free_q;
spinlock_t ntb_tx_free_q_lock;
void __iomem *tx_mw;
- dma_addr_t tx_mw_phys;
+ phys_addr_t tx_mw_phys;
+ size_t tx_mw_size;
+ dma_addr_t tx_mw_dma_addr;
unsigned int tx_index;
unsigned int tx_max_entry;
unsigned int tx_max_frame;
@@ -186,6 +194,11 @@
u64 tx_err_no_buf;
u64 tx_memcpy;
u64 tx_async;
+
+ bool use_msi;
+ int msi_irq;
+ struct ntb_msi_desc msi_desc;
+ struct ntb_msi_desc peer_msi_desc;
};
struct ntb_transport_mw {
@@ -194,6 +207,8 @@
void __iomem *vbase;
size_t xlat_size;
size_t buff_size;
+ size_t alloc_size;
+ void *alloc_addr;
void *virt_addr;
dma_addr_t dma_addr;
};
@@ -216,6 +231,10 @@
unsigned int qp_count;
u64 qp_bitmap;
u64 qp_bitmap_free;
+
+ bool use_msi;
+ unsigned int msi_spad_offset;
+ u64 msi_db_mask;
bool link_is_up;
struct delayed_work link_work;
@@ -273,7 +292,7 @@
static int ntb_transport_bus_probe(struct device *dev)
{
const struct ntb_transport_client *client;
- int rc = -EINVAL;
+ int rc;
get_device(dev);
@@ -462,70 +481,70 @@
return -ENOMEM;
out_offset = 0;
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"\nNTB QP stats:\n\n");
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_bytes - \t%llu\n", qp->rx_bytes);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_pkts - \t%llu\n", qp->rx_pkts);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_memcpy - \t%llu\n", qp->rx_memcpy);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_async - \t%llu\n", qp->rx_async);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_ring_empty - %llu\n", qp->rx_ring_empty);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_err_oflow - \t%llu\n", qp->rx_err_oflow);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_err_ver - \t%llu\n", qp->rx_err_ver);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_buff - \t0x%p\n", qp->rx_buff);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_index - \t%u\n", qp->rx_index);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_max_entry - \t%u\n", qp->rx_max_entry);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"tx_bytes - \t%llu\n", qp->tx_bytes);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"tx_pkts - \t%llu\n", qp->tx_pkts);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"tx_memcpy - \t%llu\n", qp->tx_memcpy);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"tx_async - \t%llu\n", qp->tx_async);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"tx_ring_full - \t%llu\n", qp->tx_ring_full);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"tx_mw - \t0x%p\n", qp->tx_mw);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"tx_index (H) - \t%u\n", qp->tx_index);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"RRI (T) - \t%u\n",
qp->remote_rx_info->entry);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"tx_max_entry - \t%u\n", qp->tx_max_entry);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"free tx - \t%u\n",
ntb_transport_tx_free_entry(qp));
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"\n");
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Using TX DMA - \t%s\n",
qp->tx_dma_chan ? "Yes" : "No");
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Using RX DMA - \t%s\n",
qp->rx_dma_chan ? "Yes" : "No");
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"QP Link - \t%s\n",
qp->link_is_up ? "Up" : "Down");
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"\n");
if (out_offset > out_count)
@@ -663,6 +682,114 @@
return 0;
}
+static irqreturn_t ntb_transport_isr(int irq, void *dev)
+{
+ struct ntb_transport_qp *qp = dev;
+
+ tasklet_schedule(&qp->rxc_db_work);
+
+ return IRQ_HANDLED;
+}
+
+static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt,
+ unsigned int qp_num)
+{
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+ int spad = qp_num * 2 + nt->msi_spad_offset;
+
+ if (!nt->use_msi)
+ return;
+
+ if (spad >= ntb_spad_count(nt->ndev))
+ return;
+
+ qp->peer_msi_desc.addr_offset =
+ ntb_peer_spad_read(qp->ndev, PIDX, spad);
+ qp->peer_msi_desc.data =
+ ntb_peer_spad_read(qp->ndev, PIDX, spad + 1);
+
+ dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n",
+ qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data);
+
+ if (qp->peer_msi_desc.addr_offset) {
+ qp->use_msi = true;
+ dev_info(&qp->ndev->pdev->dev,
+ "Using MSI interrupts for QP%d\n", qp_num);
+ }
+}
+
+static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt,
+ unsigned int qp_num)
+{
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+ int spad = qp_num * 2 + nt->msi_spad_offset;
+ int rc;
+
+ if (!nt->use_msi)
+ return;
+
+ if (spad >= ntb_spad_count(nt->ndev)) {
+ dev_warn_once(&qp->ndev->pdev->dev,
+ "Not enough SPADS to use MSI interrupts\n");
+ return;
+ }
+
+ ntb_spad_write(qp->ndev, spad, 0);
+ ntb_spad_write(qp->ndev, spad + 1, 0);
+
+ if (!qp->msi_irq) {
+ qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr,
+ KBUILD_MODNAME, qp,
+ &qp->msi_desc);
+ if (qp->msi_irq < 0) {
+ dev_warn(&qp->ndev->pdev->dev,
+ "Unable to allocate MSI interrupt for qp%d\n",
+ qp_num);
+ return;
+ }
+ }
+
+ rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset);
+ if (rc)
+ goto err_free_interrupt;
+
+ rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data);
+ if (rc)
+ goto err_free_interrupt;
+
+ dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n",
+ qp_num, qp->msi_irq, qp->msi_desc.addr_offset,
+ qp->msi_desc.data);
+
+ return;
+
+err_free_interrupt:
+ devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp);
+}
+
+static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt)
+{
+ int i;
+
+ dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed");
+
+ for (i = 0; i < nt->qp_count; i++)
+ ntb_transport_setup_qp_peer_msi(nt, i);
+}
+
+static void ntb_transport_msi_desc_changed(void *data)
+{
+ struct ntb_transport_ctx *nt = data;
+ int i;
+
+ dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed");
+
+ for (i = 0; i < nt->qp_count; i++)
+ ntb_transport_setup_qp_msi(nt, i);
+
+ ntb_peer_db_set(nt->ndev, nt->msi_db_mask);
+}
+
static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
{
struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
@@ -672,11 +799,57 @@
return;
ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
- dma_free_coherent(&pdev->dev, mw->buff_size,
- mw->virt_addr, mw->dma_addr);
+ dma_free_coherent(&pdev->dev, mw->alloc_size,
+ mw->alloc_addr, mw->dma_addr);
mw->xlat_size = 0;
mw->buff_size = 0;
+ mw->alloc_size = 0;
+ mw->alloc_addr = NULL;
mw->virt_addr = NULL;
+}
+
+static int ntb_alloc_mw_buffer(struct ntb_transport_mw *mw,
+ struct device *dma_dev, size_t align)
+{
+ dma_addr_t dma_addr;
+ void *alloc_addr, *virt_addr;
+ int rc;
+
+ alloc_addr = dma_alloc_coherent(dma_dev, mw->alloc_size,
+ &dma_addr, GFP_KERNEL);
+ if (!alloc_addr) {
+ dev_err(dma_dev, "Unable to alloc MW buff of size %zu\n",
+ mw->alloc_size);
+ return -ENOMEM;
+ }
+ virt_addr = alloc_addr;
+
+ /*
+ * we must ensure that the memory address allocated is BAR size
+ * aligned in order for the XLAT register to take the value. This
+ * is a requirement of the hardware. It is recommended to setup CMA
+ * for BAR sizes equal or greater than 4MB.
+ */
+ if (!IS_ALIGNED(dma_addr, align)) {
+ if (mw->alloc_size > mw->buff_size) {
+ virt_addr = PTR_ALIGN(alloc_addr, align);
+ dma_addr = ALIGN(dma_addr, align);
+ } else {
+ rc = -ENOMEM;
+ goto err;
+ }
+ }
+
+ mw->alloc_addr = alloc_addr;
+ mw->virt_addr = virt_addr;
+ mw->dma_addr = dma_addr;
+
+ return 0;
+
+err:
+ dma_free_coherent(dma_dev, mw->alloc_size, alloc_addr, dma_addr);
+
+ return rc;
}
static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
@@ -710,28 +883,20 @@
/* Alloc memory for receiving data. Must be aligned */
mw->xlat_size = xlat_size;
mw->buff_size = buff_size;
+ mw->alloc_size = buff_size;
- mw->virt_addr = dma_alloc_coherent(&pdev->dev, buff_size,
- &mw->dma_addr, GFP_KERNEL);
- if (!mw->virt_addr) {
- mw->xlat_size = 0;
- mw->buff_size = 0;
- dev_err(&pdev->dev, "Unable to alloc MW buff of size %zu\n",
- buff_size);
- return -ENOMEM;
- }
-
- /*
- * we must ensure that the memory address allocated is BAR size
- * aligned in order for the XLAT register to take the value. This
- * is a requirement of the hardware. It is recommended to setup CMA
- * for BAR sizes equal or greater than 4MB.
- */
- if (!IS_ALIGNED(mw->dma_addr, xlat_align)) {
- dev_err(&pdev->dev, "DMA memory %pad is not aligned\n",
- &mw->dma_addr);
- ntb_free_mw(nt, num_mw);
- return -ENOMEM;
+ rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
+ if (rc) {
+ mw->alloc_size *= 2;
+ rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
+ if (rc) {
+ dev_err(&pdev->dev,
+ "Unable to alloc aligned MW buff\n");
+ mw->xlat_size = 0;
+ mw->buff_size = 0;
+ mw->alloc_size = 0;
+ return rc;
+ }
}
/* Notify HW the memory location of the receive buffer */
@@ -822,6 +987,9 @@
if (!nt->link_is_up)
cancel_delayed_work_sync(&nt->link_work);
+ for (i = 0; i < nt->mw_count; i++)
+ ntb_free_mw(nt, i);
+
/* The scratchpad registers keep the values if the remote side
* goes down, blast them now to give them a sane value the next
* time they are accessed
@@ -860,6 +1028,20 @@
int rc = 0, i, spad;
/* send the local info, in the opposite order of the way we read it */
+
+ if (nt->use_msi) {
+ rc = ntb_msi_setup_mws(ndev);
+ if (rc) {
+ dev_warn(&pdev->dev,
+ "Failed to register MSI memory window: %d\n",
+ rc);
+ nt->use_msi = false;
+ }
+ }
+
+ for (i = 0; i < nt->qp_count; i++)
+ ntb_transport_setup_qp_msi(nt, i);
+
for (i = 0; i < nt->mw_count; i++) {
size = nt->mw_vec[i].phys_size;
@@ -917,6 +1099,7 @@
struct ntb_transport_qp *qp = &nt->qp_vec[i];
ntb_transport_setup_qp_mw(nt, i);
+ ntb_transport_setup_qp_peer_msi(nt, i);
if (qp->client_ready)
schedule_delayed_work(&qp->link_work, 0);
@@ -1009,6 +1192,7 @@
tx_size = (unsigned int)mw_size / num_qps_mw;
qp_offset = tx_size * (qp_num / mw_count);
+ qp->tx_mw_size = tx_size;
qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
if (!qp->tx_mw)
return -EINVAL;
@@ -1089,6 +1273,19 @@
return -ENOMEM;
nt->ndev = ndev;
+
+ /*
+ * If we are using MSI, and have at least one extra memory window,
+ * we will reserve the last MW for the MSI window.
+ */
+ if (use_msi && mw_count > 1) {
+ rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed);
+ if (!rc) {
+ mw_count -= 1;
+ nt->use_msi = true;
+ }
+ }
+
spad_count = ntb_spad_count(ndev);
/* Limit the MW's based on the availability of scratchpads */
@@ -1101,6 +1298,8 @@
max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2;
nt->mw_count = min(mw_count, max_mw_count_for_spads);
+
+ nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH;
nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec),
GFP_KERNEL, node);
@@ -1132,6 +1331,12 @@
qp_bitmap = ntb_db_valid_mask(ndev);
qp_count = ilog2(qp_bitmap);
+ if (nt->use_msi) {
+ qp_count -= 1;
+ nt->msi_db_mask = 1 << qp_count;
+ ntb_db_clear_mask(ndev, nt->msi_db_mask);
+ }
+
if (max_num_clients && max_num_clients < qp_count)
qp_count = max_num_clients;
else if (nt->mw_count < qp_count)
@@ -1278,6 +1483,7 @@
case DMA_TRANS_READ_FAILED:
case DMA_TRANS_WRITE_FAILED:
entry->errors++;
+ fallthrough;
case DMA_TRANS_ABORTED:
{
struct ntb_transport_qp *qp = entry->qp;
@@ -1533,6 +1739,7 @@
case DMA_TRANS_READ_FAILED:
case DMA_TRANS_WRITE_FAILED:
entry->errors++;
+ fallthrough;
case DMA_TRANS_ABORTED:
{
void __iomem *offset =
@@ -1553,7 +1760,10 @@
iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
- ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
+ if (qp->use_msi)
+ ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc);
+ else
+ ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
/* The entry length can only be zero if the packet is intended to be a
* "link down" or similar. Since no payload is being sent in these
@@ -1602,7 +1812,7 @@
dma_cookie_t cookie;
device = chan->device;
- dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
+ dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index;
buff_off = (size_t)buf & ~PAGE_MASK;
dest_off = (size_t)dest & ~PAGE_MASK;
@@ -1821,6 +2031,19 @@
qp->rx_dma_chan = NULL;
}
+ qp->tx_mw_dma_addr = 0;
+ if (qp->tx_dma_chan) {
+ qp->tx_mw_dma_addr =
+ dma_map_resource(qp->tx_dma_chan->device->dev,
+ qp->tx_mw_phys, qp->tx_mw_size,
+ DMA_FROM_DEVICE, 0);
+ if (dma_mapping_error(qp->tx_dma_chan->device->dev,
+ qp->tx_mw_dma_addr)) {
+ qp->tx_mw_dma_addr = 0;
+ goto err1;
+ }
+ }
+
dev_dbg(&pdev->dev, "Using %s memcpy for TX\n",
qp->tx_dma_chan ? "DMA" : "CPU");
@@ -1862,6 +2085,10 @@
qp->rx_alloc_entry = 0;
while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
kfree(entry);
+ if (qp->tx_mw_dma_addr)
+ dma_unmap_resource(qp->tx_dma_chan->device->dev,
+ qp->tx_mw_dma_addr, qp->tx_mw_size,
+ DMA_FROM_DEVICE, 0);
if (qp->tx_dma_chan)
dma_release_channel(qp->tx_dma_chan);
if (qp->rx_dma_chan)
@@ -1903,6 +2130,11 @@
*/
dma_sync_wait(chan, qp->last_cookie);
dmaengine_terminate_all(chan);
+
+ dma_unmap_resource(chan->device->dev,
+ qp->tx_mw_dma_addr, qp->tx_mw_size,
+ DMA_FROM_DEVICE, 0);
+
dma_release_channel(chan);
}
@@ -2199,6 +2431,11 @@
u64 db_bits;
unsigned int qp_num;
+ if (ntb_db_read(nt->ndev) & nt->msi_db_mask) {
+ ntb_transport_msi_peer_desc_changed(nt);
+ ntb_db_clear(nt->ndev, nt->msi_db_mask);
+ }
+
db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free &
ntb_db_vector_mask(nt->ndev, vector));
--
Gitblit v1.6.2