hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/xdp/xdp_umem.c
....@@ -13,131 +13,19 @@
1313 #include <linux/mm.h>
1414 #include <linux/netdevice.h>
1515 #include <linux/rtnetlink.h>
16
+#include <linux/idr.h>
17
+#include <linux/vmalloc.h>
1618
1719 #include "xdp_umem.h"
1820 #include "xsk_queue.h"
1921
2022 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
2123
22
-void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
23
-{
24
- unsigned long flags;
25
-
26
- if (!xs->tx)
27
- return;
28
-
29
- spin_lock_irqsave(&umem->xsk_list_lock, flags);
30
- list_add_rcu(&xs->list, &umem->xsk_list);
31
- spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
32
-}
33
-
34
-void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
35
-{
36
- unsigned long flags;
37
-
38
- if (!xs->tx)
39
- return;
40
-
41
- spin_lock_irqsave(&umem->xsk_list_lock, flags);
42
- list_del_rcu(&xs->list);
43
- spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
44
-}
45
-
46
-int xdp_umem_query(struct net_device *dev, u16 queue_id)
47
-{
48
- struct netdev_bpf bpf;
49
-
50
- ASSERT_RTNL();
51
-
52
- memset(&bpf, 0, sizeof(bpf));
53
- bpf.command = XDP_QUERY_XSK_UMEM;
54
- bpf.xsk.queue_id = queue_id;
55
-
56
- if (!dev->netdev_ops->ndo_bpf)
57
- return 0;
58
- return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
59
-}
60
-
61
-int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
62
- u32 queue_id, u16 flags)
63
-{
64
- bool force_zc, force_copy;
65
- struct netdev_bpf bpf;
66
- int err;
67
-
68
- force_zc = flags & XDP_ZEROCOPY;
69
- force_copy = flags & XDP_COPY;
70
-
71
- if (force_zc && force_copy)
72
- return -EINVAL;
73
-
74
- if (force_copy)
75
- return 0;
76
-
77
- if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
78
- return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
79
-
80
- bpf.command = XDP_QUERY_XSK_UMEM;
81
-
82
- rtnl_lock();
83
- err = xdp_umem_query(dev, queue_id);
84
- if (err) {
85
- err = err < 0 ? -EOPNOTSUPP : -EBUSY;
86
- goto err_rtnl_unlock;
87
- }
88
-
89
- bpf.command = XDP_SETUP_XSK_UMEM;
90
- bpf.xsk.umem = umem;
91
- bpf.xsk.queue_id = queue_id;
92
-
93
- err = dev->netdev_ops->ndo_bpf(dev, &bpf);
94
- if (err)
95
- goto err_rtnl_unlock;
96
- rtnl_unlock();
97
-
98
- dev_hold(dev);
99
- umem->dev = dev;
100
- umem->queue_id = queue_id;
101
- umem->zc = true;
102
- return 0;
103
-
104
-err_rtnl_unlock:
105
- rtnl_unlock();
106
- return force_zc ? err : 0; /* fail or fallback */
107
-}
108
-
109
-static void xdp_umem_clear_dev(struct xdp_umem *umem)
110
-{
111
- struct netdev_bpf bpf;
112
- int err;
113
-
114
- if (umem->dev) {
115
- bpf.command = XDP_SETUP_XSK_UMEM;
116
- bpf.xsk.umem = NULL;
117
- bpf.xsk.queue_id = umem->queue_id;
118
-
119
- rtnl_lock();
120
- err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
121
- rtnl_unlock();
122
-
123
- if (err)
124
- WARN(1, "failed to disable umem!\n");
125
-
126
- dev_put(umem->dev);
127
- umem->dev = NULL;
128
- }
129
-}
24
+static DEFINE_IDA(umem_ida);
13025
13126 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
13227 {
133
- unsigned int i;
134
-
135
- for (i = 0; i < umem->npgs; i++) {
136
- struct page *page = umem->pgs[i];
137
-
138
- set_page_dirty_lock(page);
139
- put_page(page);
140
- }
28
+ unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
14129
14230 kfree(umem->pgs);
14331 umem->pgs = NULL;
....@@ -151,24 +39,28 @@
15139 }
15240 }
15341
42
+static void xdp_umem_addr_unmap(struct xdp_umem *umem)
43
+{
44
+ vunmap(umem->addrs);
45
+ umem->addrs = NULL;
46
+}
47
+
48
+static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages,
49
+ u32 nr_pages)
50
+{
51
+ umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
52
+ if (!umem->addrs)
53
+ return -ENOMEM;
54
+ return 0;
55
+}
56
+
15457 static void xdp_umem_release(struct xdp_umem *umem)
15558 {
156
- xdp_umem_clear_dev(umem);
59
+ umem->zc = false;
60
+ ida_simple_remove(&umem_ida, umem->id);
15761
158
- if (umem->fq) {
159
- xskq_destroy(umem->fq);
160
- umem->fq = NULL;
161
- }
162
-
163
- if (umem->cq) {
164
- xskq_destroy(umem->cq);
165
- umem->cq = NULL;
166
- }
167
-
62
+ xdp_umem_addr_unmap(umem);
16863 xdp_umem_unpin_pages(umem);
169
-
170
- kfree(umem->pages);
171
- umem->pages = NULL;
17264
17365 xdp_umem_unaccount_pages(umem);
17466 kfree(umem);
....@@ -186,18 +78,22 @@
18678 refcount_inc(&umem->users);
18779 }
18880
189
-void xdp_put_umem(struct xdp_umem *umem)
81
+void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup)
19082 {
19183 if (!umem)
19284 return;
19385
19486 if (refcount_dec_and_test(&umem->users)) {
195
- INIT_WORK(&umem->work, xdp_umem_release_deferred);
196
- schedule_work(&umem->work);
87
+ if (defer_cleanup) {
88
+ INIT_WORK(&umem->work, xdp_umem_release_deferred);
89
+ schedule_work(&umem->work);
90
+ } else {
91
+ xdp_umem_release(umem);
92
+ }
19793 }
19894 }
19995
200
-static int xdp_umem_pin_pages(struct xdp_umem *umem)
96
+static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
20197 {
20298 unsigned int gup_flags = FOLL_WRITE;
20399 long npgs;
....@@ -208,10 +104,10 @@
208104 if (!umem->pgs)
209105 return -ENOMEM;
210106
211
- down_write(&current->mm->mmap_sem);
212
- npgs = get_user_pages(umem->address, umem->npgs,
213
- gup_flags, &umem->pgs[0], NULL);
214
- up_write(&current->mm->mmap_sem);
107
+ mmap_read_lock(current->mm);
108
+ npgs = pin_user_pages(address, umem->npgs,
109
+ gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
110
+ mmap_read_unlock(current->mm);
215111
216112 if (npgs != umem->npgs) {
217113 if (npgs >= 0) {
....@@ -257,10 +153,12 @@
257153
258154 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
259155 {
156
+ bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
260157 u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
261
- u64 npgs, addr = mr->addr, size = mr->len;
262
- unsigned int chunks, chunks_per_page;
263
- int err, i;
158
+ u64 addr = mr->addr, size = mr->len;
159
+ u32 chunks_rem, npgs_rem;
160
+ u64 chunks, npgs;
161
+ int err;
264162
265163 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
266164 /* Strictly speaking we could support this, if:
....@@ -272,7 +170,10 @@
272170 return -EINVAL;
273171 }
274172
275
- if (!is_power_of_2(chunk_size))
173
+ if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
174
+ return -EINVAL;
175
+
176
+ if (!unaligned_chunks && !is_power_of_2(chunk_size))
276177 return -EINVAL;
277178
278179 if (!PAGE_ALIGNED(addr)) {
....@@ -285,56 +186,49 @@
285186 if ((addr + size) < addr)
286187 return -EINVAL;
287188
288
- npgs = div_u64(size, PAGE_SIZE);
189
+ npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem);
190
+ if (npgs_rem)
191
+ npgs++;
289192 if (npgs > U32_MAX)
290193 return -EINVAL;
291194
292
- chunks = (unsigned int)div_u64(size, chunk_size);
293
- if (chunks == 0)
195
+ chunks = div_u64_rem(size, chunk_size, &chunks_rem);
196
+ if (!chunks || chunks > U32_MAX)
294197 return -EINVAL;
295198
296
- chunks_per_page = PAGE_SIZE / chunk_size;
297
- if (chunks < chunks_per_page || chunks % chunks_per_page)
199
+ if (!unaligned_chunks && chunks_rem)
298200 return -EINVAL;
299
-
300
- headroom = ALIGN(headroom, 64);
301201
302202 if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
303203 return -EINVAL;
304204
305
- umem->address = (unsigned long)addr;
306
- umem->props.chunk_mask = ~((u64)chunk_size - 1);
307
- umem->props.size = size;
205
+ umem->size = size;
308206 umem->headroom = headroom;
309
- umem->chunk_size_nohr = chunk_size - headroom;
310
- umem->npgs = (u32)npgs;
207
+ umem->chunk_size = chunk_size;
208
+ umem->chunks = chunks;
209
+ umem->npgs = npgs;
311210 umem->pgs = NULL;
312211 umem->user = NULL;
313
- INIT_LIST_HEAD(&umem->xsk_list);
314
- spin_lock_init(&umem->xsk_list_lock);
212
+ umem->flags = mr->flags;
315213
214
+ INIT_LIST_HEAD(&umem->xsk_dma_list);
316215 refcount_set(&umem->users, 1);
317216
318217 err = xdp_umem_account_pages(umem);
319218 if (err)
320219 return err;
321220
322
- err = xdp_umem_pin_pages(umem);
221
+ err = xdp_umem_pin_pages(umem, (unsigned long)addr);
323222 if (err)
324223 goto out_account;
325224
326
- umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
327
- if (!umem->pages) {
328
- err = -ENOMEM;
329
- goto out_pin;
330
- }
331
-
332
- for (i = 0; i < umem->npgs; i++)
333
- umem->pages[i].addr = page_address(umem->pgs[i]);
225
+ err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs);
226
+ if (err)
227
+ goto out_unpin;
334228
335229 return 0;
336230
337
-out_pin:
231
+out_unpin:
338232 xdp_umem_unpin_pages(umem);
339233 out_account:
340234 xdp_umem_unaccount_pages(umem);
....@@ -350,16 +244,19 @@
350244 if (!umem)
351245 return ERR_PTR(-ENOMEM);
352246
247
+ err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
248
+ if (err < 0) {
249
+ kfree(umem);
250
+ return ERR_PTR(err);
251
+ }
252
+ umem->id = err;
253
+
353254 err = xdp_umem_reg(umem, mr);
354255 if (err) {
256
+ ida_simple_remove(&umem_ida, umem->id);
355257 kfree(umem);
356258 return ERR_PTR(err);
357259 }
358260
359261 return umem;
360
-}
361
-
362
-bool xdp_umem_validate_queues(struct xdp_umem *umem)
363
-{
364
- return umem->fq && umem->cq;
365262 }