From 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 22 Oct 2024 10:36:11 +0000 Subject: [PATCH] 修改4g拨号为QMI,需要在系统里后台执行quectel-CM --- kernel/net/core/page_pool.c | 374 ++++++++++++++++++++++++++++++++++++++++++---------- 1 files changed, 300 insertions(+), 74 deletions(-) diff --git a/kernel/net/core/page_pool.c b/kernel/net/core/page_pool.c index 43a932c..08fbf40 100644 --- a/kernel/net/core/page_pool.c +++ b/kernel/net/core/page_pool.c @@ -4,15 +4,22 @@ * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> * Copyright (C) 2016 Red Hat, Inc. */ + #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/device.h> #include <net/page_pool.h> #include <linux/dma-direction.h> #include <linux/dma-mapping.h> #include <linux/page-flags.h> #include <linux/mm.h> /* for __put_page() */ + +#include <trace/events/page_pool.h> + +#define DEFER_TIME (msecs_to_jiffies(1000)) +#define DEFER_WARN_INTERVAL (60 * HZ) static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) @@ -36,12 +43,37 @@ * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, * which is the XDP_TX use-case. */ - if ((pool->p.dma_dir != DMA_FROM_DEVICE) && - (pool->p.dma_dir != DMA_BIDIRECTIONAL)) - return -EINVAL; + if (pool->p.flags & PP_FLAG_DMA_MAP) { + if ((pool->p.dma_dir != DMA_FROM_DEVICE) && + (pool->p.dma_dir != DMA_BIDIRECTIONAL)) + return -EINVAL; + } + + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) { + /* In order to request DMA-sync-for-device the page + * needs to be mapped + */ + if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + return -EINVAL; + + if (!pool->p.max_len) + return -EINVAL; + + /* pool->p.offset has to be set according to the address + * offset used by the DMA engine to start copying rx data + */ + } if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) return -ENOMEM; + + atomic_set(&pool->pages_state_release_cnt, 0); + + /* Driver calling page_pool_create() also call page_pool_destroy() */ + refcount_set(&pool->user_cnt, 1); + + if (pool->p.flags & PP_FLAG_DMA_MAP) + get_device(pool->p.dev); return 0; } @@ -49,7 +81,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) { struct page_pool *pool; - int err = 0; + int err; pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid); if (!pool) @@ -61,49 +93,91 @@ kfree(pool); return ERR_PTR(err); } + return pool; } EXPORT_SYMBOL(page_pool_create); -/* fast path */ -static struct page *__page_pool_get_cached(struct page_pool *pool) +static void page_pool_return_page(struct page_pool *pool, struct page *page); + +noinline +static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) { struct ptr_ring *r = &pool->ring; struct page *page; + int pref_nid; /* preferred NUMA node */ /* Quicker fallback, avoid locks when ring is empty */ if (__ptr_ring_empty(r)) return NULL; - /* Test for safe-context, caller should provide this guarantee */ - if (likely(in_serving_softirq())) { - if (likely(pool->alloc.count)) { - /* Fast-path */ - page = pool->alloc.cache[--pool->alloc.count]; - return page; - } - /* Slower-path: Alloc array empty, time to refill - * - * Open-coded bulk ptr_ring consumer. - * - * Discussion: the ring consumer lock is not really - * needed due to the softirq/NAPI protection, but - * later need the ability to reclaim pages on the - * ring. Thus, keeping the locks. - */ - spin_lock(&r->consumer_lock); - while ((page = __ptr_ring_consume(r))) { - if (pool->alloc.count == PP_ALLOC_CACHE_REFILL) - break; + /* Softirq guarantee CPU and thus NUMA node is stable. This, + * assumes CPU refilling driver RX-ring will also run RX-NAPI. + */ +#ifdef CONFIG_NUMA + pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid; +#else + /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */ + pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */ +#endif + + /* Slower-path: Get pages from locked ring queue */ + spin_lock(&r->consumer_lock); + + /* Refill alloc array, but only if NUMA match */ + do { + page = __ptr_ring_consume(r); + if (unlikely(!page)) + break; + + if (likely(page_to_nid(page) == pref_nid)) { pool->alloc.cache[pool->alloc.count++] = page; + } else { + /* NUMA mismatch; + * (1) release 1 page to page-allocator and + * (2) break out to fallthrough to alloc_pages_node. + * This limit stress on page buddy alloactor. + */ + page_pool_return_page(pool, page); + page = NULL; + break; } - spin_unlock(&r->consumer_lock); - return page; + } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); + + /* Return last page */ + if (likely(pool->alloc.count > 0)) + page = pool->alloc.cache[--pool->alloc.count]; + + spin_unlock(&r->consumer_lock); + return page; +} + +/* fast path */ +static struct page *__page_pool_get_cached(struct page_pool *pool) +{ + struct page *page; + + /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */ + if (likely(pool->alloc.count)) { + /* Fast-path */ + page = pool->alloc.cache[--pool->alloc.count]; + } else { + page = page_pool_refill_alloc_cache(pool); } - /* Slow-path: Get page from locked ring queue */ - page = ptr_ring_consume(&pool->ring); return page; +} + +static void page_pool_dma_sync_for_device(struct page_pool *pool, + struct page *page, + unsigned int dma_sync_size) +{ + dma_addr_t dma_addr = page_pool_get_dma_addr(page); + + dma_sync_size = min(dma_sync_size, pool->p.max_len); + dma_sync_single_range_for_device(pool->p.dev, dma_addr, + pool->p.offset, dma_sync_size, + pool->p.dma_dir); } /* slow path */ @@ -129,26 +203,40 @@ */ /* Cache was empty, do real allocation */ +#ifdef CONFIG_NUMA page = alloc_pages_node(pool->p.nid, gfp, pool->p.order); +#else + page = alloc_pages(gfp, pool->p.order); +#endif if (!page) return NULL; if (!(pool->p.flags & PP_FLAG_DMA_MAP)) goto skip_dma_map; - /* Setup DMA mapping: use page->private for DMA-addr + /* Setup DMA mapping: use 'struct page' area for storing DMA-addr + * since dma_addr_t can be either 32 or 64 bits and does not always fit + * into page private data (i.e 32bit cpu with 64bit DMA caps) * This mapping is kept for lifetime of page, until leaving pool. */ - dma = dma_map_page(pool->p.dev, page, 0, - (PAGE_SIZE << pool->p.order), - pool->p.dma_dir); + dma = dma_map_page_attrs(pool->p.dev, page, 0, + (PAGE_SIZE << pool->p.order), + pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(pool->p.dev, dma)) { put_page(page); return NULL; } - set_page_private(page, dma); /* page->private = dma; */ + page_pool_set_dma_addr(page, dma); + + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + page_pool_dma_sync_for_device(pool, page, pool->p.max_len); skip_dma_map: + /* Track how many pages are held 'in-flight' */ + pool->pages_state_hold_cnt++; + + trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt); + /* When page just alloc'ed is should/must have refcnt 1. */ return page; } @@ -171,23 +259,62 @@ } EXPORT_SYMBOL(page_pool_alloc_pages); -/* Cleanup page_pool state from page */ -static void __page_pool_clean_page(struct page_pool *pool, - struct page *page) -{ - if (!(pool->p.flags & PP_FLAG_DMA_MAP)) - return; +/* Calculate distance between two u32 values, valid if distance is below 2^(31) + * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution + */ +#define _distance(a, b) (s32)((a) - (b)) - /* DMA unmap */ - dma_unmap_page(pool->p.dev, page_private(page), - PAGE_SIZE << pool->p.order, pool->p.dma_dir); - set_page_private(page, 0); +static s32 page_pool_inflight(struct page_pool *pool) +{ + u32 release_cnt = atomic_read(&pool->pages_state_release_cnt); + u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt); + s32 inflight; + + inflight = _distance(hold_cnt, release_cnt); + + trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); + WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight); + + return inflight; } -/* Return a page to the page allocator, cleaning up our state */ -static void __page_pool_return_page(struct page_pool *pool, struct page *page) +/* Disconnects a page (from a page_pool). API users can have a need + * to disconnect a page (from a page_pool), to allow it to be used as + * a regular page (that will eventually be returned to the normal + * page-allocator via put_page). + */ +void page_pool_release_page(struct page_pool *pool, struct page *page) { - __page_pool_clean_page(pool, page); + dma_addr_t dma; + int count; + + if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + /* Always account for inflight pages, even if we didn't + * map them + */ + goto skip_dma_unmap; + + dma = page_pool_get_dma_addr(page); + + /* When page is unmapped, it cannot be returned to our pool */ + dma_unmap_page_attrs(pool->p.dev, dma, + PAGE_SIZE << pool->p.order, pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC); + page_pool_set_dma_addr(page, 0); +skip_dma_unmap: + /* This may be the last page returned, releasing the pool, so + * it is not safe to reference pool afterwards. + */ + count = atomic_inc_return(&pool->pages_state_release_cnt); + trace_page_pool_state_release(pool, page, count); +} +EXPORT_SYMBOL(page_pool_release_page); + +/* Return a page to the page allocator, cleaning up our state */ +static void page_pool_return_page(struct page_pool *pool, struct page *page) +{ + page_pool_release_page(pool, page); + put_page(page); /* An optimization would be to call __free_pages(page, pool->p.order) * knowing page is not part of page-cache (thus avoiding a @@ -195,8 +322,7 @@ */ } -static bool __page_pool_recycle_into_ring(struct page_pool *pool, - struct page *page) +static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) { int ret; /* BH protection not needed if current is serving softirq */ @@ -213,7 +339,7 @@ * * Caller must provide appropriate safe context. */ -static bool __page_pool_recycle_direct(struct page *page, +static bool page_pool_recycle_in_cache(struct page *page, struct page_pool *pool) { if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) @@ -224,8 +350,22 @@ return true; } -void __page_pool_put_page(struct page_pool *pool, - struct page *page, bool allow_direct) +/* page is NOT reusable when: + * 1) allocated when system is under some pressure. (page_is_pfmemalloc) + */ +static bool pool_page_reusable(struct page_pool *pool, struct page *page) +{ + return !page_is_pfmemalloc(page); +} + +/* If the page refcnt == 1, this will try to recycle the page. + * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for + * the configured size min(dma_sync_size, pool->max_len). + * If the page refcnt != 1, then the page will be returned to memory + * subsystem. + */ +void page_pool_put_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct) { /* This allocator is optimized for the XDP mode that uses * one-frame-per-page, but have fallbacks that act like the @@ -233,16 +373,21 @@ * * refcnt == 1 means page_pool owns page, and can recycle it. */ - if (likely(page_ref_count(page) == 1)) { + if (likely(page_ref_count(page) == 1 && + pool_page_reusable(pool, page))) { /* Read barrier done in page_ref_count / READ_ONCE */ + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + page_pool_dma_sync_for_device(pool, page, + dma_sync_size); + if (allow_direct && in_serving_softirq()) - if (__page_pool_recycle_direct(page, pool)) + if (page_pool_recycle_in_cache(page, pool)) return; - if (!__page_pool_recycle_into_ring(pool, page)) { + if (!page_pool_recycle_in_ring(pool, page)) { /* Cache full, fallback to free pages */ - __page_pool_return_page(pool, page); + page_pool_return_page(pool, page); } return; } @@ -259,12 +404,13 @@ * doing refcnt based recycle tricks, meaning another process * will be invoking put_page. */ - __page_pool_clean_page(pool, page); + /* Do not replace this with page_pool_return_page() */ + page_pool_release_page(pool, page); put_page(page); } -EXPORT_SYMBOL(__page_pool_put_page); +EXPORT_SYMBOL(page_pool_put_page); -static void __page_pool_empty_ring(struct page_pool *pool) +static void page_pool_empty_ring(struct page_pool *pool) { struct page *page; @@ -275,27 +421,29 @@ pr_crit("%s() page_pool refcnt %d violation\n", __func__, page_ref_count(page)); - __page_pool_return_page(pool, page); + page_pool_return_page(pool, page); } } -static void __page_pool_destroy_rcu(struct rcu_head *rcu) +static void page_pool_free(struct page_pool *pool) { - struct page_pool *pool; + if (pool->disconnect) + pool->disconnect(pool); - pool = container_of(rcu, struct page_pool, rcu); - - WARN(pool->alloc.count, "API usage violation"); - - __page_pool_empty_ring(pool); ptr_ring_cleanup(&pool->ring, NULL); + + if (pool->p.flags & PP_FLAG_DMA_MAP) + put_device(pool->p.dev); + kfree(pool); } -/* Cleanup and release resources */ -void page_pool_destroy(struct page_pool *pool) +static void page_pool_empty_alloc_cache_once(struct page_pool *pool) { struct page *page; + + if (pool->destroy_cnt) + return; /* Empty alloc cache, assume caller made sure this is * no-longer in use, and page_pool_alloc_pages() cannot be @@ -303,15 +451,93 @@ */ while (pool->alloc.count) { page = pool->alloc.cache[--pool->alloc.count]; - __page_pool_return_page(pool, page); + page_pool_return_page(pool, page); } +} + +static void page_pool_scrub(struct page_pool *pool) +{ + page_pool_empty_alloc_cache_once(pool); + pool->destroy_cnt++; /* No more consumers should exist, but producers could still * be in-flight. */ - __page_pool_empty_ring(pool); + page_pool_empty_ring(pool); +} - /* An xdp_mem_allocator can still ref page_pool pointer */ - call_rcu(&pool->rcu, __page_pool_destroy_rcu); +static int page_pool_release(struct page_pool *pool) +{ + int inflight; + + page_pool_scrub(pool); + inflight = page_pool_inflight(pool); + if (!inflight) + page_pool_free(pool); + + return inflight; +} + +static void page_pool_release_retry(struct work_struct *wq) +{ + struct delayed_work *dwq = to_delayed_work(wq); + struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw); + int inflight; + + inflight = page_pool_release(pool); + if (!inflight) + return; + + /* Periodic warning */ + if (time_after_eq(jiffies, pool->defer_warn)) { + int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ; + + pr_warn("%s() stalled pool shutdown %d inflight %d sec\n", + __func__, inflight, sec); + pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; + } + + /* Still not ready to be disconnected, retry later */ + schedule_delayed_work(&pool->release_dw, DEFER_TIME); +} + +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)) +{ + refcount_inc(&pool->user_cnt); + pool->disconnect = disconnect; +} + +void page_pool_destroy(struct page_pool *pool) +{ + if (!pool) + return; + + if (!page_pool_put(pool)) + return; + + if (!page_pool_release(pool)) + return; + + pool->defer_start = jiffies; + pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; + + INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry); + schedule_delayed_work(&pool->release_dw, DEFER_TIME); } EXPORT_SYMBOL(page_pool_destroy); + +/* Caller must provide appropriate safe context, e.g. NAPI. */ +void page_pool_update_nid(struct page_pool *pool, int new_nid) +{ + struct page *page; + + trace_page_pool_update_nid(pool, new_nid); + pool->p.nid = new_nid; + + /* Flush pool alloc cache, as refill will check NUMA node */ + while (pool->alloc.count) { + page = pool->alloc.cache[--pool->alloc.count]; + page_pool_return_page(pool, page); + } +} +EXPORT_SYMBOL(page_pool_update_nid); -- Gitblit v1.6.2