.. | .. |
---|
13 | 13 | #include <linux/mm.h> |
---|
14 | 14 | #include <linux/netdevice.h> |
---|
15 | 15 | #include <linux/rtnetlink.h> |
---|
| 16 | +#include <linux/idr.h> |
---|
| 17 | +#include <linux/vmalloc.h> |
---|
16 | 18 | |
---|
17 | 19 | #include "xdp_umem.h" |
---|
18 | 20 | #include "xsk_queue.h" |
---|
19 | 21 | |
---|
20 | 22 | #define XDP_UMEM_MIN_CHUNK_SIZE 2048 |
---|
21 | 23 | |
---|
22 | | -void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) |
---|
23 | | -{ |
---|
24 | | - unsigned long flags; |
---|
25 | | - |
---|
26 | | - if (!xs->tx) |
---|
27 | | - return; |
---|
28 | | - |
---|
29 | | - spin_lock_irqsave(&umem->xsk_list_lock, flags); |
---|
30 | | - list_add_rcu(&xs->list, &umem->xsk_list); |
---|
31 | | - spin_unlock_irqrestore(&umem->xsk_list_lock, flags); |
---|
32 | | -} |
---|
33 | | - |
---|
34 | | -void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) |
---|
35 | | -{ |
---|
36 | | - unsigned long flags; |
---|
37 | | - |
---|
38 | | - if (!xs->tx) |
---|
39 | | - return; |
---|
40 | | - |
---|
41 | | - spin_lock_irqsave(&umem->xsk_list_lock, flags); |
---|
42 | | - list_del_rcu(&xs->list); |
---|
43 | | - spin_unlock_irqrestore(&umem->xsk_list_lock, flags); |
---|
44 | | -} |
---|
45 | | - |
---|
46 | | -int xdp_umem_query(struct net_device *dev, u16 queue_id) |
---|
47 | | -{ |
---|
48 | | - struct netdev_bpf bpf; |
---|
49 | | - |
---|
50 | | - ASSERT_RTNL(); |
---|
51 | | - |
---|
52 | | - memset(&bpf, 0, sizeof(bpf)); |
---|
53 | | - bpf.command = XDP_QUERY_XSK_UMEM; |
---|
54 | | - bpf.xsk.queue_id = queue_id; |
---|
55 | | - |
---|
56 | | - if (!dev->netdev_ops->ndo_bpf) |
---|
57 | | - return 0; |
---|
58 | | - return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem; |
---|
59 | | -} |
---|
60 | | - |
---|
61 | | -int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, |
---|
62 | | - u32 queue_id, u16 flags) |
---|
63 | | -{ |
---|
64 | | - bool force_zc, force_copy; |
---|
65 | | - struct netdev_bpf bpf; |
---|
66 | | - int err; |
---|
67 | | - |
---|
68 | | - force_zc = flags & XDP_ZEROCOPY; |
---|
69 | | - force_copy = flags & XDP_COPY; |
---|
70 | | - |
---|
71 | | - if (force_zc && force_copy) |
---|
72 | | - return -EINVAL; |
---|
73 | | - |
---|
74 | | - if (force_copy) |
---|
75 | | - return 0; |
---|
76 | | - |
---|
77 | | - if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit) |
---|
78 | | - return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */ |
---|
79 | | - |
---|
80 | | - bpf.command = XDP_QUERY_XSK_UMEM; |
---|
81 | | - |
---|
82 | | - rtnl_lock(); |
---|
83 | | - err = xdp_umem_query(dev, queue_id); |
---|
84 | | - if (err) { |
---|
85 | | - err = err < 0 ? -EOPNOTSUPP : -EBUSY; |
---|
86 | | - goto err_rtnl_unlock; |
---|
87 | | - } |
---|
88 | | - |
---|
89 | | - bpf.command = XDP_SETUP_XSK_UMEM; |
---|
90 | | - bpf.xsk.umem = umem; |
---|
91 | | - bpf.xsk.queue_id = queue_id; |
---|
92 | | - |
---|
93 | | - err = dev->netdev_ops->ndo_bpf(dev, &bpf); |
---|
94 | | - if (err) |
---|
95 | | - goto err_rtnl_unlock; |
---|
96 | | - rtnl_unlock(); |
---|
97 | | - |
---|
98 | | - dev_hold(dev); |
---|
99 | | - umem->dev = dev; |
---|
100 | | - umem->queue_id = queue_id; |
---|
101 | | - umem->zc = true; |
---|
102 | | - return 0; |
---|
103 | | - |
---|
104 | | -err_rtnl_unlock: |
---|
105 | | - rtnl_unlock(); |
---|
106 | | - return force_zc ? err : 0; /* fail or fallback */ |
---|
107 | | -} |
---|
108 | | - |
---|
109 | | -static void xdp_umem_clear_dev(struct xdp_umem *umem) |
---|
110 | | -{ |
---|
111 | | - struct netdev_bpf bpf; |
---|
112 | | - int err; |
---|
113 | | - |
---|
114 | | - if (umem->dev) { |
---|
115 | | - bpf.command = XDP_SETUP_XSK_UMEM; |
---|
116 | | - bpf.xsk.umem = NULL; |
---|
117 | | - bpf.xsk.queue_id = umem->queue_id; |
---|
118 | | - |
---|
119 | | - rtnl_lock(); |
---|
120 | | - err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf); |
---|
121 | | - rtnl_unlock(); |
---|
122 | | - |
---|
123 | | - if (err) |
---|
124 | | - WARN(1, "failed to disable umem!\n"); |
---|
125 | | - |
---|
126 | | - dev_put(umem->dev); |
---|
127 | | - umem->dev = NULL; |
---|
128 | | - } |
---|
129 | | -} |
---|
| 24 | +static DEFINE_IDA(umem_ida); |
---|
130 | 25 | |
---|
131 | 26 | static void xdp_umem_unpin_pages(struct xdp_umem *umem) |
---|
132 | 27 | { |
---|
133 | | - unsigned int i; |
---|
134 | | - |
---|
135 | | - for (i = 0; i < umem->npgs; i++) { |
---|
136 | | - struct page *page = umem->pgs[i]; |
---|
137 | | - |
---|
138 | | - set_page_dirty_lock(page); |
---|
139 | | - put_page(page); |
---|
140 | | - } |
---|
| 28 | + unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); |
---|
141 | 29 | |
---|
142 | 30 | kfree(umem->pgs); |
---|
143 | 31 | umem->pgs = NULL; |
---|
.. | .. |
---|
151 | 39 | } |
---|
152 | 40 | } |
---|
153 | 41 | |
---|
| 42 | +static void xdp_umem_addr_unmap(struct xdp_umem *umem) |
---|
| 43 | +{ |
---|
| 44 | + vunmap(umem->addrs); |
---|
| 45 | + umem->addrs = NULL; |
---|
| 46 | +} |
---|
| 47 | + |
---|
| 48 | +static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, |
---|
| 49 | + u32 nr_pages) |
---|
| 50 | +{ |
---|
| 51 | + umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); |
---|
| 52 | + if (!umem->addrs) |
---|
| 53 | + return -ENOMEM; |
---|
| 54 | + return 0; |
---|
| 55 | +} |
---|
| 56 | + |
---|
154 | 57 | static void xdp_umem_release(struct xdp_umem *umem) |
---|
155 | 58 | { |
---|
156 | | - xdp_umem_clear_dev(umem); |
---|
| 59 | + umem->zc = false; |
---|
| 60 | + ida_simple_remove(&umem_ida, umem->id); |
---|
157 | 61 | |
---|
158 | | - if (umem->fq) { |
---|
159 | | - xskq_destroy(umem->fq); |
---|
160 | | - umem->fq = NULL; |
---|
161 | | - } |
---|
162 | | - |
---|
163 | | - if (umem->cq) { |
---|
164 | | - xskq_destroy(umem->cq); |
---|
165 | | - umem->cq = NULL; |
---|
166 | | - } |
---|
167 | | - |
---|
| 62 | + xdp_umem_addr_unmap(umem); |
---|
168 | 63 | xdp_umem_unpin_pages(umem); |
---|
169 | | - |
---|
170 | | - kfree(umem->pages); |
---|
171 | | - umem->pages = NULL; |
---|
172 | 64 | |
---|
173 | 65 | xdp_umem_unaccount_pages(umem); |
---|
174 | 66 | kfree(umem); |
---|
.. | .. |
---|
186 | 78 | refcount_inc(&umem->users); |
---|
187 | 79 | } |
---|
188 | 80 | |
---|
189 | | -void xdp_put_umem(struct xdp_umem *umem) |
---|
| 81 | +void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) |
---|
190 | 82 | { |
---|
191 | 83 | if (!umem) |
---|
192 | 84 | return; |
---|
193 | 85 | |
---|
194 | 86 | if (refcount_dec_and_test(&umem->users)) { |
---|
195 | | - INIT_WORK(&umem->work, xdp_umem_release_deferred); |
---|
196 | | - schedule_work(&umem->work); |
---|
| 87 | + if (defer_cleanup) { |
---|
| 88 | + INIT_WORK(&umem->work, xdp_umem_release_deferred); |
---|
| 89 | + schedule_work(&umem->work); |
---|
| 90 | + } else { |
---|
| 91 | + xdp_umem_release(umem); |
---|
| 92 | + } |
---|
197 | 93 | } |
---|
198 | 94 | } |
---|
199 | 95 | |
---|
200 | | -static int xdp_umem_pin_pages(struct xdp_umem *umem) |
---|
| 96 | +static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) |
---|
201 | 97 | { |
---|
202 | 98 | unsigned int gup_flags = FOLL_WRITE; |
---|
203 | 99 | long npgs; |
---|
.. | .. |
---|
208 | 104 | if (!umem->pgs) |
---|
209 | 105 | return -ENOMEM; |
---|
210 | 106 | |
---|
211 | | - down_write(¤t->mm->mmap_sem); |
---|
212 | | - npgs = get_user_pages(umem->address, umem->npgs, |
---|
213 | | - gup_flags, &umem->pgs[0], NULL); |
---|
214 | | - up_write(¤t->mm->mmap_sem); |
---|
| 107 | + mmap_read_lock(current->mm); |
---|
| 108 | + npgs = pin_user_pages(address, umem->npgs, |
---|
| 109 | + gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL); |
---|
| 110 | + mmap_read_unlock(current->mm); |
---|
215 | 111 | |
---|
216 | 112 | if (npgs != umem->npgs) { |
---|
217 | 113 | if (npgs >= 0) { |
---|
.. | .. |
---|
257 | 153 | |
---|
258 | 154 | static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) |
---|
259 | 155 | { |
---|
| 156 | + bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; |
---|
260 | 157 | u32 chunk_size = mr->chunk_size, headroom = mr->headroom; |
---|
261 | | - u64 npgs, addr = mr->addr, size = mr->len; |
---|
262 | | - unsigned int chunks, chunks_per_page; |
---|
263 | | - int err, i; |
---|
| 158 | + u64 addr = mr->addr, size = mr->len; |
---|
| 159 | + u32 chunks_rem, npgs_rem; |
---|
| 160 | + u64 chunks, npgs; |
---|
| 161 | + int err; |
---|
264 | 162 | |
---|
265 | 163 | if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { |
---|
266 | 164 | /* Strictly speaking we could support this, if: |
---|
.. | .. |
---|
272 | 170 | return -EINVAL; |
---|
273 | 171 | } |
---|
274 | 172 | |
---|
275 | | - if (!is_power_of_2(chunk_size)) |
---|
| 173 | + if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) |
---|
| 174 | + return -EINVAL; |
---|
| 175 | + |
---|
| 176 | + if (!unaligned_chunks && !is_power_of_2(chunk_size)) |
---|
276 | 177 | return -EINVAL; |
---|
277 | 178 | |
---|
278 | 179 | if (!PAGE_ALIGNED(addr)) { |
---|
.. | .. |
---|
285 | 186 | if ((addr + size) < addr) |
---|
286 | 187 | return -EINVAL; |
---|
287 | 188 | |
---|
288 | | - npgs = div_u64(size, PAGE_SIZE); |
---|
| 189 | + npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); |
---|
| 190 | + if (npgs_rem) |
---|
| 191 | + npgs++; |
---|
289 | 192 | if (npgs > U32_MAX) |
---|
290 | 193 | return -EINVAL; |
---|
291 | 194 | |
---|
292 | | - chunks = (unsigned int)div_u64(size, chunk_size); |
---|
293 | | - if (chunks == 0) |
---|
| 195 | + chunks = div_u64_rem(size, chunk_size, &chunks_rem); |
---|
| 196 | + if (!chunks || chunks > U32_MAX) |
---|
294 | 197 | return -EINVAL; |
---|
295 | 198 | |
---|
296 | | - chunks_per_page = PAGE_SIZE / chunk_size; |
---|
297 | | - if (chunks < chunks_per_page || chunks % chunks_per_page) |
---|
| 199 | + if (!unaligned_chunks && chunks_rem) |
---|
298 | 200 | return -EINVAL; |
---|
299 | | - |
---|
300 | | - headroom = ALIGN(headroom, 64); |
---|
301 | 201 | |
---|
302 | 202 | if (headroom >= chunk_size - XDP_PACKET_HEADROOM) |
---|
303 | 203 | return -EINVAL; |
---|
304 | 204 | |
---|
305 | | - umem->address = (unsigned long)addr; |
---|
306 | | - umem->props.chunk_mask = ~((u64)chunk_size - 1); |
---|
307 | | - umem->props.size = size; |
---|
| 205 | + umem->size = size; |
---|
308 | 206 | umem->headroom = headroom; |
---|
309 | | - umem->chunk_size_nohr = chunk_size - headroom; |
---|
310 | | - umem->npgs = (u32)npgs; |
---|
| 207 | + umem->chunk_size = chunk_size; |
---|
| 208 | + umem->chunks = chunks; |
---|
| 209 | + umem->npgs = npgs; |
---|
311 | 210 | umem->pgs = NULL; |
---|
312 | 211 | umem->user = NULL; |
---|
313 | | - INIT_LIST_HEAD(&umem->xsk_list); |
---|
314 | | - spin_lock_init(&umem->xsk_list_lock); |
---|
| 212 | + umem->flags = mr->flags; |
---|
315 | 213 | |
---|
| 214 | + INIT_LIST_HEAD(&umem->xsk_dma_list); |
---|
316 | 215 | refcount_set(&umem->users, 1); |
---|
317 | 216 | |
---|
318 | 217 | err = xdp_umem_account_pages(umem); |
---|
319 | 218 | if (err) |
---|
320 | 219 | return err; |
---|
321 | 220 | |
---|
322 | | - err = xdp_umem_pin_pages(umem); |
---|
| 221 | + err = xdp_umem_pin_pages(umem, (unsigned long)addr); |
---|
323 | 222 | if (err) |
---|
324 | 223 | goto out_account; |
---|
325 | 224 | |
---|
326 | | - umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL); |
---|
327 | | - if (!umem->pages) { |
---|
328 | | - err = -ENOMEM; |
---|
329 | | - goto out_pin; |
---|
330 | | - } |
---|
331 | | - |
---|
332 | | - for (i = 0; i < umem->npgs; i++) |
---|
333 | | - umem->pages[i].addr = page_address(umem->pgs[i]); |
---|
| 225 | + err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); |
---|
| 226 | + if (err) |
---|
| 227 | + goto out_unpin; |
---|
334 | 228 | |
---|
335 | 229 | return 0; |
---|
336 | 230 | |
---|
337 | | -out_pin: |
---|
| 231 | +out_unpin: |
---|
338 | 232 | xdp_umem_unpin_pages(umem); |
---|
339 | 233 | out_account: |
---|
340 | 234 | xdp_umem_unaccount_pages(umem); |
---|
.. | .. |
---|
350 | 244 | if (!umem) |
---|
351 | 245 | return ERR_PTR(-ENOMEM); |
---|
352 | 246 | |
---|
| 247 | + err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL); |
---|
| 248 | + if (err < 0) { |
---|
| 249 | + kfree(umem); |
---|
| 250 | + return ERR_PTR(err); |
---|
| 251 | + } |
---|
| 252 | + umem->id = err; |
---|
| 253 | + |
---|
353 | 254 | err = xdp_umem_reg(umem, mr); |
---|
354 | 255 | if (err) { |
---|
| 256 | + ida_simple_remove(&umem_ida, umem->id); |
---|
355 | 257 | kfree(umem); |
---|
356 | 258 | return ERR_PTR(err); |
---|
357 | 259 | } |
---|
358 | 260 | |
---|
359 | 261 | return umem; |
---|
360 | | -} |
---|
361 | | - |
---|
362 | | -bool xdp_umem_validate_queues(struct xdp_umem *umem) |
---|
363 | | -{ |
---|
364 | | - return umem->fq && umem->cq; |
---|
365 | 262 | } |
---|