From ea08eeccae9297f7aabd2ef7f0c2517ac4549acc Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 20 Feb 2024 01:18:26 +0000
Subject: [PATCH] write in 30M
---
kernel/net/xdp/xdp_umem.c | 239 +++++++++++++++++------------------------------------------
1 files changed, 68 insertions(+), 171 deletions(-)
diff --git a/kernel/net/xdp/xdp_umem.c b/kernel/net/xdp/xdp_umem.c
index b87e63c..42b19fe 100644
--- a/kernel/net/xdp/xdp_umem.c
+++ b/kernel/net/xdp/xdp_umem.c
@@ -13,131 +13,19 @@
#include <linux/mm.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
+#include <linux/idr.h>
+#include <linux/vmalloc.h>
#include "xdp_umem.h"
#include "xsk_queue.h"
#define XDP_UMEM_MIN_CHUNK_SIZE 2048
-void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
-{
- unsigned long flags;
-
- if (!xs->tx)
- return;
-
- spin_lock_irqsave(&umem->xsk_list_lock, flags);
- list_add_rcu(&xs->list, &umem->xsk_list);
- spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
-}
-
-void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
-{
- unsigned long flags;
-
- if (!xs->tx)
- return;
-
- spin_lock_irqsave(&umem->xsk_list_lock, flags);
- list_del_rcu(&xs->list);
- spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
-}
-
-int xdp_umem_query(struct net_device *dev, u16 queue_id)
-{
- struct netdev_bpf bpf;
-
- ASSERT_RTNL();
-
- memset(&bpf, 0, sizeof(bpf));
- bpf.command = XDP_QUERY_XSK_UMEM;
- bpf.xsk.queue_id = queue_id;
-
- if (!dev->netdev_ops->ndo_bpf)
- return 0;
- return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
-}
-
-int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
- u32 queue_id, u16 flags)
-{
- bool force_zc, force_copy;
- struct netdev_bpf bpf;
- int err;
-
- force_zc = flags & XDP_ZEROCOPY;
- force_copy = flags & XDP_COPY;
-
- if (force_zc && force_copy)
- return -EINVAL;
-
- if (force_copy)
- return 0;
-
- if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
- return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
-
- bpf.command = XDP_QUERY_XSK_UMEM;
-
- rtnl_lock();
- err = xdp_umem_query(dev, queue_id);
- if (err) {
- err = err < 0 ? -EOPNOTSUPP : -EBUSY;
- goto err_rtnl_unlock;
- }
-
- bpf.command = XDP_SETUP_XSK_UMEM;
- bpf.xsk.umem = umem;
- bpf.xsk.queue_id = queue_id;
-
- err = dev->netdev_ops->ndo_bpf(dev, &bpf);
- if (err)
- goto err_rtnl_unlock;
- rtnl_unlock();
-
- dev_hold(dev);
- umem->dev = dev;
- umem->queue_id = queue_id;
- umem->zc = true;
- return 0;
-
-err_rtnl_unlock:
- rtnl_unlock();
- return force_zc ? err : 0; /* fail or fallback */
-}
-
-static void xdp_umem_clear_dev(struct xdp_umem *umem)
-{
- struct netdev_bpf bpf;
- int err;
-
- if (umem->dev) {
- bpf.command = XDP_SETUP_XSK_UMEM;
- bpf.xsk.umem = NULL;
- bpf.xsk.queue_id = umem->queue_id;
-
- rtnl_lock();
- err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
- rtnl_unlock();
-
- if (err)
- WARN(1, "failed to disable umem!\n");
-
- dev_put(umem->dev);
- umem->dev = NULL;
- }
-}
+static DEFINE_IDA(umem_ida);
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
- unsigned int i;
-
- for (i = 0; i < umem->npgs; i++) {
- struct page *page = umem->pgs[i];
-
- set_page_dirty_lock(page);
- put_page(page);
- }
+ unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
kfree(umem->pgs);
umem->pgs = NULL;
@@ -151,24 +39,28 @@
}
}
+static void xdp_umem_addr_unmap(struct xdp_umem *umem)
+{
+ vunmap(umem->addrs);
+ umem->addrs = NULL;
+}
+
+static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages,
+ u32 nr_pages)
+{
+ umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!umem->addrs)
+ return -ENOMEM;
+ return 0;
+}
+
static void xdp_umem_release(struct xdp_umem *umem)
{
- xdp_umem_clear_dev(umem);
+ umem->zc = false;
+ ida_simple_remove(&umem_ida, umem->id);
- if (umem->fq) {
- xskq_destroy(umem->fq);
- umem->fq = NULL;
- }
-
- if (umem->cq) {
- xskq_destroy(umem->cq);
- umem->cq = NULL;
- }
-
+ xdp_umem_addr_unmap(umem);
xdp_umem_unpin_pages(umem);
-
- kfree(umem->pages);
- umem->pages = NULL;
xdp_umem_unaccount_pages(umem);
kfree(umem);
@@ -186,18 +78,22 @@
refcount_inc(&umem->users);
}
-void xdp_put_umem(struct xdp_umem *umem)
+void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup)
{
if (!umem)
return;
if (refcount_dec_and_test(&umem->users)) {
- INIT_WORK(&umem->work, xdp_umem_release_deferred);
- schedule_work(&umem->work);
+ if (defer_cleanup) {
+ INIT_WORK(&umem->work, xdp_umem_release_deferred);
+ schedule_work(&umem->work);
+ } else {
+ xdp_umem_release(umem);
+ }
}
}
-static int xdp_umem_pin_pages(struct xdp_umem *umem)
+static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
{
unsigned int gup_flags = FOLL_WRITE;
long npgs;
@@ -208,10 +104,10 @@
if (!umem->pgs)
return -ENOMEM;
- down_write(¤t->mm->mmap_sem);
- npgs = get_user_pages(umem->address, umem->npgs,
- gup_flags, &umem->pgs[0], NULL);
- up_write(¤t->mm->mmap_sem);
+ mmap_read_lock(current->mm);
+ npgs = pin_user_pages(address, umem->npgs,
+ gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
+ mmap_read_unlock(current->mm);
if (npgs != umem->npgs) {
if (npgs >= 0) {
@@ -257,10 +153,12 @@
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
+ bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
- u64 npgs, addr = mr->addr, size = mr->len;
- unsigned int chunks, chunks_per_page;
- int err, i;
+ u64 addr = mr->addr, size = mr->len;
+ u32 chunks_rem, npgs_rem;
+ u64 chunks, npgs;
+ int err;
if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if:
@@ -272,7 +170,10 @@
return -EINVAL;
}
- if (!is_power_of_2(chunk_size))
+ if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+ return -EINVAL;
+
+ if (!unaligned_chunks && !is_power_of_2(chunk_size))
return -EINVAL;
if (!PAGE_ALIGNED(addr)) {
@@ -285,56 +186,49 @@
if ((addr + size) < addr)
return -EINVAL;
- npgs = div_u64(size, PAGE_SIZE);
+ npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem);
+ if (npgs_rem)
+ npgs++;
if (npgs > U32_MAX)
return -EINVAL;
- chunks = (unsigned int)div_u64(size, chunk_size);
- if (chunks == 0)
+ chunks = div_u64_rem(size, chunk_size, &chunks_rem);
+ if (!chunks || chunks > U32_MAX)
return -EINVAL;
- chunks_per_page = PAGE_SIZE / chunk_size;
- if (chunks < chunks_per_page || chunks % chunks_per_page)
+ if (!unaligned_chunks && chunks_rem)
return -EINVAL;
-
- headroom = ALIGN(headroom, 64);
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
- umem->address = (unsigned long)addr;
- umem->props.chunk_mask = ~((u64)chunk_size - 1);
- umem->props.size = size;
+ umem->size = size;
umem->headroom = headroom;
- umem->chunk_size_nohr = chunk_size - headroom;
- umem->npgs = (u32)npgs;
+ umem->chunk_size = chunk_size;
+ umem->chunks = chunks;
+ umem->npgs = npgs;
umem->pgs = NULL;
umem->user = NULL;
- INIT_LIST_HEAD(&umem->xsk_list);
- spin_lock_init(&umem->xsk_list_lock);
+ umem->flags = mr->flags;
+ INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
err = xdp_umem_account_pages(umem);
if (err)
return err;
- err = xdp_umem_pin_pages(umem);
+ err = xdp_umem_pin_pages(umem, (unsigned long)addr);
if (err)
goto out_account;
- umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
- if (!umem->pages) {
- err = -ENOMEM;
- goto out_pin;
- }
-
- for (i = 0; i < umem->npgs; i++)
- umem->pages[i].addr = page_address(umem->pgs[i]);
+ err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs);
+ if (err)
+ goto out_unpin;
return 0;
-out_pin:
+out_unpin:
xdp_umem_unpin_pages(umem);
out_account:
xdp_umem_unaccount_pages(umem);
@@ -350,16 +244,19 @@
if (!umem)
return ERR_PTR(-ENOMEM);
+ err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
+ if (err < 0) {
+ kfree(umem);
+ return ERR_PTR(err);
+ }
+ umem->id = err;
+
err = xdp_umem_reg(umem, mr);
if (err) {
+ ida_simple_remove(&umem_ida, umem->id);
kfree(umem);
return ERR_PTR(err);
}
return umem;
-}
-
-bool xdp_umem_validate_queues(struct xdp_umem *umem)
-{
- return umem->fq && umem->cq;
}
--
Gitblit v1.6.2