| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* net/core/xdp.c |
|---|
| 2 | 3 | * |
|---|
| 3 | 4 | * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. |
|---|
| 4 | | - * Released under terms in GPL version 2. See COPYING. |
|---|
| 5 | 5 | */ |
|---|
| 6 | 6 | #include <linux/bpf.h> |
|---|
| 7 | 7 | #include <linux/filter.h> |
|---|
| .. | .. |
|---|
| 11 | 11 | #include <linux/slab.h> |
|---|
| 12 | 12 | #include <linux/idr.h> |
|---|
| 13 | 13 | #include <linux/rhashtable.h> |
|---|
| 14 | +#include <linux/bug.h> |
|---|
| 14 | 15 | #include <net/page_pool.h> |
|---|
| 15 | 16 | |
|---|
| 16 | 17 | #include <net/xdp.h> |
|---|
| 18 | +#include <net/xdp_priv.h> /* struct xdp_mem_allocator */ |
|---|
| 19 | +#include <trace/events/xdp.h> |
|---|
| 20 | +#include <net/xdp_sock_drv.h> |
|---|
| 17 | 21 | |
|---|
| 18 | 22 | #define REG_STATE_NEW 0x0 |
|---|
| 19 | 23 | #define REG_STATE_REGISTERED 0x1 |
|---|
| .. | .. |
|---|
| 29 | 33 | static bool mem_id_init; /* false */ |
|---|
| 30 | 34 | static struct rhashtable *mem_id_ht; |
|---|
| 31 | 35 | |
|---|
| 32 | | -struct xdp_mem_allocator { |
|---|
| 33 | | - struct xdp_mem_info mem; |
|---|
| 34 | | - union { |
|---|
| 35 | | - void *allocator; |
|---|
| 36 | | - struct page_pool *page_pool; |
|---|
| 37 | | - struct zero_copy_allocator *zc_alloc; |
|---|
| 38 | | - }; |
|---|
| 39 | | - struct rhash_head node; |
|---|
| 40 | | - struct rcu_head rcu; |
|---|
| 41 | | -}; |
|---|
| 42 | | - |
|---|
| 43 | 36 | static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) |
|---|
| 44 | 37 | { |
|---|
| 45 | 38 | const u32 *k = data; |
|---|
| 46 | 39 | const u32 key = *k; |
|---|
| 47 | 40 | |
|---|
| 48 | | - BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id) |
|---|
| 41 | + BUILD_BUG_ON(sizeof_field(struct xdp_mem_allocator, mem.id) |
|---|
| 49 | 42 | != sizeof(u32)); |
|---|
| 50 | 43 | |
|---|
| 51 | 44 | /* Use cyclic increasing ID as direct hash key */ |
|---|
| .. | .. |
|---|
| 65 | 58 | .nelem_hint = 64, |
|---|
| 66 | 59 | .head_offset = offsetof(struct xdp_mem_allocator, node), |
|---|
| 67 | 60 | .key_offset = offsetof(struct xdp_mem_allocator, mem.id), |
|---|
| 68 | | - .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id), |
|---|
| 61 | + .key_len = sizeof_field(struct xdp_mem_allocator, mem.id), |
|---|
| 69 | 62 | .max_size = MEM_ID_MAX, |
|---|
| 70 | 63 | .min_size = 8, |
|---|
| 71 | 64 | .automatic_shrinking = true, |
|---|
| .. | .. |
|---|
| 82 | 75 | /* Allow this ID to be reused */ |
|---|
| 83 | 76 | ida_simple_remove(&mem_id_pool, xa->mem.id); |
|---|
| 84 | 77 | |
|---|
| 85 | | - /* Notice, driver is expected to free the *allocator, |
|---|
| 86 | | - * e.g. page_pool, and MUST also use RCU free. |
|---|
| 87 | | - */ |
|---|
| 88 | | - |
|---|
| 89 | | - /* Poison memory */ |
|---|
| 90 | | - xa->mem.id = 0xFFFF; |
|---|
| 91 | | - xa->mem.type = 0xF0F0; |
|---|
| 92 | | - xa->allocator = (void *)0xDEAD9001; |
|---|
| 93 | | - |
|---|
| 94 | 78 | kfree(xa); |
|---|
| 95 | 79 | } |
|---|
| 96 | 80 | |
|---|
| 97 | | -static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) |
|---|
| 81 | +static void mem_xa_remove(struct xdp_mem_allocator *xa) |
|---|
| 82 | +{ |
|---|
| 83 | + trace_mem_disconnect(xa); |
|---|
| 84 | + |
|---|
| 85 | + if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) |
|---|
| 86 | + call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); |
|---|
| 87 | +} |
|---|
| 88 | + |
|---|
| 89 | +static void mem_allocator_disconnect(void *allocator) |
|---|
| 90 | +{ |
|---|
| 91 | + struct xdp_mem_allocator *xa; |
|---|
| 92 | + struct rhashtable_iter iter; |
|---|
| 93 | + |
|---|
| 94 | + mutex_lock(&mem_id_lock); |
|---|
| 95 | + |
|---|
| 96 | + rhashtable_walk_enter(mem_id_ht, &iter); |
|---|
| 97 | + do { |
|---|
| 98 | + rhashtable_walk_start(&iter); |
|---|
| 99 | + |
|---|
| 100 | + while ((xa = rhashtable_walk_next(&iter)) && !IS_ERR(xa)) { |
|---|
| 101 | + if (xa->allocator == allocator) |
|---|
| 102 | + mem_xa_remove(xa); |
|---|
| 103 | + } |
|---|
| 104 | + |
|---|
| 105 | + rhashtable_walk_stop(&iter); |
|---|
| 106 | + |
|---|
| 107 | + } while (xa == ERR_PTR(-EAGAIN)); |
|---|
| 108 | + rhashtable_walk_exit(&iter); |
|---|
| 109 | + |
|---|
| 110 | + mutex_unlock(&mem_id_lock); |
|---|
| 111 | +} |
|---|
| 112 | + |
|---|
| 113 | +void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) |
|---|
| 98 | 114 | { |
|---|
| 99 | 115 | struct xdp_mem_allocator *xa; |
|---|
| 100 | 116 | int id = xdp_rxq->mem.id; |
|---|
| 101 | 117 | |
|---|
| 118 | + if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { |
|---|
| 119 | + WARN(1, "Missing register, driver bug"); |
|---|
| 120 | + return; |
|---|
| 121 | + } |
|---|
| 122 | + |
|---|
| 102 | 123 | if (id == 0) |
|---|
| 103 | 124 | return; |
|---|
| 104 | 125 | |
|---|
| 105 | | - mutex_lock(&mem_id_lock); |
|---|
| 106 | | - |
|---|
| 107 | | - xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); |
|---|
| 108 | | - if (xa && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) |
|---|
| 109 | | - call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); |
|---|
| 110 | | - |
|---|
| 111 | | - mutex_unlock(&mem_id_lock); |
|---|
| 126 | + if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) { |
|---|
| 127 | + rcu_read_lock(); |
|---|
| 128 | + xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params); |
|---|
| 129 | + page_pool_destroy(xa->page_pool); |
|---|
| 130 | + rcu_read_unlock(); |
|---|
| 131 | + } |
|---|
| 112 | 132 | } |
|---|
| 133 | +EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model); |
|---|
| 113 | 134 | |
|---|
| 114 | 135 | void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) |
|---|
| 115 | 136 | { |
|---|
| .. | .. |
|---|
| 119 | 140 | |
|---|
| 120 | 141 | WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); |
|---|
| 121 | 142 | |
|---|
| 122 | | - __xdp_rxq_info_unreg_mem_model(xdp_rxq); |
|---|
| 143 | + xdp_rxq_info_unreg_mem_model(xdp_rxq); |
|---|
| 123 | 144 | |
|---|
| 124 | 145 | xdp_rxq->reg_state = REG_STATE_UNREGISTERED; |
|---|
| 125 | 146 | xdp_rxq->dev = NULL; |
|---|
| .. | .. |
|---|
| 257 | 278 | xdp_rxq->mem.type = type; |
|---|
| 258 | 279 | |
|---|
| 259 | 280 | if (!allocator) { |
|---|
| 260 | | - if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY) |
|---|
| 281 | + if (type == MEM_TYPE_PAGE_POOL) |
|---|
| 261 | 282 | return -EINVAL; /* Setup time check page_pool req */ |
|---|
| 262 | 283 | return 0; |
|---|
| 263 | 284 | } |
|---|
| .. | .. |
|---|
| 290 | 311 | /* Insert allocator into ID lookup table */ |
|---|
| 291 | 312 | ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); |
|---|
| 292 | 313 | if (IS_ERR(ptr)) { |
|---|
| 314 | + ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id); |
|---|
| 315 | + xdp_rxq->mem.id = 0; |
|---|
| 293 | 316 | errno = PTR_ERR(ptr); |
|---|
| 294 | 317 | goto err; |
|---|
| 295 | 318 | } |
|---|
| 296 | 319 | |
|---|
| 320 | + if (type == MEM_TYPE_PAGE_POOL) |
|---|
| 321 | + page_pool_use_xdp_mem(allocator, mem_allocator_disconnect); |
|---|
| 322 | + |
|---|
| 297 | 323 | mutex_unlock(&mem_id_lock); |
|---|
| 298 | 324 | |
|---|
| 325 | + trace_mem_connect(xdp_alloc, xdp_rxq); |
|---|
| 299 | 326 | return 0; |
|---|
| 300 | 327 | err: |
|---|
| 301 | 328 | mutex_unlock(&mem_id_lock); |
|---|
| .. | .. |
|---|
| 306 | 333 | |
|---|
| 307 | 334 | /* XDP RX runs under NAPI protection, and in different delivery error |
|---|
| 308 | 335 | * scenarios (e.g. queue full), it is possible to return the xdp_frame |
|---|
| 309 | | - * while still leveraging this protection. The @napi_direct boolian |
|---|
| 336 | + * while still leveraging this protection. The @napi_direct boolean |
|---|
| 310 | 337 | * is used for those calls sites. Thus, allowing for faster recycling |
|---|
| 311 | 338 | * of xdp_frames/pages in those cases. |
|---|
| 312 | 339 | */ |
|---|
| 313 | 340 | static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, |
|---|
| 314 | | - unsigned long handle) |
|---|
| 341 | + struct xdp_buff *xdp) |
|---|
| 315 | 342 | { |
|---|
| 316 | 343 | struct xdp_mem_allocator *xa; |
|---|
| 317 | 344 | struct page *page; |
|---|
| .. | .. |
|---|
| 322 | 349 | /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ |
|---|
| 323 | 350 | xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); |
|---|
| 324 | 351 | page = virt_to_head_page(data); |
|---|
| 325 | | - if (xa) { |
|---|
| 326 | | - napi_direct &= !xdp_return_frame_no_direct(); |
|---|
| 327 | | - page_pool_put_page(xa->page_pool, page, napi_direct); |
|---|
| 328 | | - } else { |
|---|
| 329 | | - put_page(page); |
|---|
| 330 | | - } |
|---|
| 352 | + if (napi_direct && xdp_return_frame_no_direct()) |
|---|
| 353 | + napi_direct = false; |
|---|
| 354 | + page_pool_put_full_page(xa->page_pool, page, napi_direct); |
|---|
| 331 | 355 | rcu_read_unlock(); |
|---|
| 332 | 356 | break; |
|---|
| 333 | 357 | case MEM_TYPE_PAGE_SHARED: |
|---|
| .. | .. |
|---|
| 337 | 361 | page = virt_to_page(data); /* Assumes order0 page*/ |
|---|
| 338 | 362 | put_page(page); |
|---|
| 339 | 363 | break; |
|---|
| 340 | | - case MEM_TYPE_ZERO_COPY: |
|---|
| 364 | + case MEM_TYPE_XSK_BUFF_POOL: |
|---|
| 341 | 365 | /* NB! Only valid from an xdp_buff! */ |
|---|
| 342 | | - rcu_read_lock(); |
|---|
| 343 | | - /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ |
|---|
| 344 | | - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); |
|---|
| 345 | | - xa->zc_alloc->free(xa->zc_alloc, handle); |
|---|
| 346 | | - rcu_read_unlock(); |
|---|
| 366 | + xsk_buff_free(xdp); |
|---|
| 367 | + break; |
|---|
| 347 | 368 | default: |
|---|
| 348 | 369 | /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ |
|---|
| 370 | + WARN(1, "Incorrect XDP memory type (%d) usage", mem->type); |
|---|
| 349 | 371 | break; |
|---|
| 350 | 372 | } |
|---|
| 351 | 373 | } |
|---|
| 352 | 374 | |
|---|
| 353 | 375 | void xdp_return_frame(struct xdp_frame *xdpf) |
|---|
| 354 | 376 | { |
|---|
| 355 | | - __xdp_return(xdpf->data, &xdpf->mem, false, 0); |
|---|
| 377 | + __xdp_return(xdpf->data, &xdpf->mem, false, NULL); |
|---|
| 356 | 378 | } |
|---|
| 357 | 379 | EXPORT_SYMBOL_GPL(xdp_return_frame); |
|---|
| 358 | 380 | |
|---|
| 359 | 381 | void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) |
|---|
| 360 | 382 | { |
|---|
| 361 | | - __xdp_return(xdpf->data, &xdpf->mem, true, 0); |
|---|
| 383 | + __xdp_return(xdpf->data, &xdpf->mem, true, NULL); |
|---|
| 362 | 384 | } |
|---|
| 363 | 385 | EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); |
|---|
| 364 | 386 | |
|---|
| 365 | 387 | void xdp_return_buff(struct xdp_buff *xdp) |
|---|
| 366 | 388 | { |
|---|
| 367 | | - __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle); |
|---|
| 389 | + __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); |
|---|
| 368 | 390 | } |
|---|
| 369 | | -EXPORT_SYMBOL_GPL(xdp_return_buff); |
|---|
| 370 | 391 | |
|---|
| 371 | | -int xdp_attachment_query(struct xdp_attachment_info *info, |
|---|
| 372 | | - struct netdev_bpf *bpf) |
|---|
| 392 | +/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ |
|---|
| 393 | +void __xdp_release_frame(void *data, struct xdp_mem_info *mem) |
|---|
| 373 | 394 | { |
|---|
| 374 | | - bpf->prog_id = info->prog ? info->prog->aux->id : 0; |
|---|
| 375 | | - bpf->prog_flags = info->prog ? info->flags : 0; |
|---|
| 376 | | - return 0; |
|---|
| 377 | | -} |
|---|
| 378 | | -EXPORT_SYMBOL_GPL(xdp_attachment_query); |
|---|
| 395 | + struct xdp_mem_allocator *xa; |
|---|
| 396 | + struct page *page; |
|---|
| 379 | 397 | |
|---|
| 380 | | -bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, |
|---|
| 381 | | - struct netdev_bpf *bpf) |
|---|
| 382 | | -{ |
|---|
| 383 | | - if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) { |
|---|
| 384 | | - NL_SET_ERR_MSG(bpf->extack, |
|---|
| 385 | | - "program loaded with different flags"); |
|---|
| 386 | | - return false; |
|---|
| 387 | | - } |
|---|
| 388 | | - return true; |
|---|
| 398 | + rcu_read_lock(); |
|---|
| 399 | + xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); |
|---|
| 400 | + page = virt_to_head_page(data); |
|---|
| 401 | + if (xa) |
|---|
| 402 | + page_pool_release_page(xa->page_pool, page); |
|---|
| 403 | + rcu_read_unlock(); |
|---|
| 389 | 404 | } |
|---|
| 390 | | -EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok); |
|---|
| 405 | +EXPORT_SYMBOL_GPL(__xdp_release_frame); |
|---|
| 391 | 406 | |
|---|
| 392 | 407 | void xdp_attachment_setup(struct xdp_attachment_info *info, |
|---|
| 393 | 408 | struct netdev_bpf *bpf) |
|---|
| .. | .. |
|---|
| 398 | 413 | info->flags = bpf->flags; |
|---|
| 399 | 414 | } |
|---|
| 400 | 415 | EXPORT_SYMBOL_GPL(xdp_attachment_setup); |
|---|
| 416 | + |
|---|
| 417 | +struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) |
|---|
| 418 | +{ |
|---|
| 419 | + unsigned int metasize, totsize; |
|---|
| 420 | + void *addr, *data_to_copy; |
|---|
| 421 | + struct xdp_frame *xdpf; |
|---|
| 422 | + struct page *page; |
|---|
| 423 | + |
|---|
| 424 | + /* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */ |
|---|
| 425 | + metasize = xdp_data_meta_unsupported(xdp) ? 0 : |
|---|
| 426 | + xdp->data - xdp->data_meta; |
|---|
| 427 | + totsize = xdp->data_end - xdp->data + metasize; |
|---|
| 428 | + |
|---|
| 429 | + if (sizeof(*xdpf) + totsize > PAGE_SIZE) |
|---|
| 430 | + return NULL; |
|---|
| 431 | + |
|---|
| 432 | + page = dev_alloc_page(); |
|---|
| 433 | + if (!page) |
|---|
| 434 | + return NULL; |
|---|
| 435 | + |
|---|
| 436 | + addr = page_to_virt(page); |
|---|
| 437 | + xdpf = addr; |
|---|
| 438 | + memset(xdpf, 0, sizeof(*xdpf)); |
|---|
| 439 | + |
|---|
| 440 | + addr += sizeof(*xdpf); |
|---|
| 441 | + data_to_copy = metasize ? xdp->data_meta : xdp->data; |
|---|
| 442 | + memcpy(addr, data_to_copy, totsize); |
|---|
| 443 | + |
|---|
| 444 | + xdpf->data = addr + metasize; |
|---|
| 445 | + xdpf->len = totsize - metasize; |
|---|
| 446 | + xdpf->headroom = 0; |
|---|
| 447 | + xdpf->metasize = metasize; |
|---|
| 448 | + xdpf->frame_sz = PAGE_SIZE; |
|---|
| 449 | + xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; |
|---|
| 450 | + |
|---|
| 451 | + xsk_buff_free(xdp); |
|---|
| 452 | + return xdpf; |
|---|
| 453 | +} |
|---|
| 454 | +EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame); |
|---|
| 455 | + |
|---|
| 456 | +/* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */ |
|---|
| 457 | +void xdp_warn(const char *msg, const char *func, const int line) |
|---|
| 458 | +{ |
|---|
| 459 | + WARN(1, "XDP_WARN: %s(line:%d): %s\n", func, line, msg); |
|---|
| 460 | +}; |
|---|
| 461 | +EXPORT_SYMBOL_GPL(xdp_warn); |
|---|