From 9370bb92b2d16684ee45cf24e879c93c509162da Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Thu, 19 Dec 2024 01:47:39 +0000 Subject: [PATCH] add wifi6 8852be driver --- kernel/tools/perf/bench/numa.c | 157 ++++++++++++++++++++++++++++++---------------------- 1 files changed, 91 insertions(+), 66 deletions(-) diff --git a/kernel/tools/perf/bench/numa.c b/kernel/tools/perf/bench/numa.c index 91c0a44..88c1130 100644 --- a/kernel/tools/perf/bench/numa.c +++ b/kernel/tools/perf/bench/numa.c @@ -9,9 +9,6 @@ /* For the CLR_() macros */ #include <pthread.h> -#include "../perf.h" -#include "../builtin.h" -#include "../util/util.h" #include <subcmd/parse-options.h> #include "../util/cloexec.h" @@ -34,6 +31,8 @@ #include <sys/types.h> #include <linux/kernel.h> #include <linux/time64.h> +#include <linux/numa.h> +#include <linux/zalloc.h> #include <numa.h> #include <numaif.h> @@ -138,12 +137,13 @@ u8 *data; pthread_mutex_t startup_mutex; + pthread_cond_t startup_cond; int nr_tasks_started; - pthread_mutex_t startup_done_mutex; - pthread_mutex_t start_work_mutex; + pthread_cond_t start_work_cond; int nr_tasks_working; + bool start_work; pthread_mutex_t stop_work_mutex; u64 bytes_done; @@ -248,17 +248,22 @@ */ static bool node_has_cpus(int node) { - struct bitmask *cpu = numa_allocate_cpumask(); - unsigned int i; + struct bitmask *cpumask = numa_allocate_cpumask(); + bool ret = false; /* fall back to nocpus */ + int cpu; - if (cpu && !numa_node_to_cpus(node, cpu)) { - for (i = 0; i < cpu->size; i++) { - if (numa_bitmask_isbitset(cpu, i)) - return true; + BUG_ON(!cpumask); + if (!numa_node_to_cpus(node, cpumask)) { + for (cpu = 0; cpu < (int)cpumask->size; cpu++) { + if (numa_bitmask_isbitset(cpumask, cpu)) { + ret = true; + break; + } } } + numa_free_cpumask(cpumask); - return false; /* lets fall back to nocpus safely */ + return ret; } static cpu_set_t bind_to_cpu(int target_cpu) @@ -289,30 +294,29 @@ static cpu_set_t bind_to_node(int target_node) { - int cpus_per_node = g->p.nr_cpus / nr_numa_nodes(); cpu_set_t orig_mask, mask; int cpu; int ret; - - BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus); - BUG_ON(!cpus_per_node); ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); BUG_ON(ret); CPU_ZERO(&mask); - if (target_node == -1) { + if (target_node == NUMA_NO_NODE) { for (cpu = 0; cpu < g->p.nr_cpus; cpu++) CPU_SET(cpu, &mask); } else { - int cpu_start = (target_node + 0) * cpus_per_node; - int cpu_stop = (target_node + 1) * cpus_per_node; + struct bitmask *cpumask = numa_allocate_cpumask(); - BUG_ON(cpu_stop > g->p.nr_cpus); - - for (cpu = cpu_start; cpu < cpu_stop; cpu++) - CPU_SET(cpu, &mask); + BUG_ON(!cpumask); + if (!numa_node_to_cpus(target_node, cpumask)) { + for (cpu = 0; cpu < (int)cpumask->size; cpu++) { + if (numa_bitmask_isbitset(cpumask, cpu)) + CPU_SET(cpu, &mask); + } + } + numa_free_cpumask(cpumask); } ret = sched_setaffinity(0, sizeof(mask), &mask); @@ -343,7 +347,7 @@ unsigned long nodemask; int ret; - if (node == -1) + if (node == NUMA_NO_NODE) return; BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); @@ -478,6 +482,18 @@ pthread_mutexattr_init(&attr); pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); pthread_mutex_init(mutex, &attr); +} + +/* + * Return a process-shared (global) condition variable: + */ +static void init_global_cond(pthread_cond_t *cond) +{ + pthread_condattr_t attr; + + pthread_condattr_init(&attr); + pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); + pthread_cond_init(cond, &attr); } static int parse_cpu_list(const char *arg) @@ -730,8 +746,6 @@ return -1; return parse_node_list(arg); - - return 0; } #define BIT(x) (1ul << x) @@ -814,12 +828,12 @@ } } } else if (!g->p.data_backwards || (nr + loop) & 1) { + /* Process data forwards: */ d0 = data + off; d = data + off + 1; d1 = data + words; - /* Process data forwards: */ for (;;) { if (unlikely(d >= d1)) d = data; @@ -837,7 +851,6 @@ d = data + off - 1; d1 = data + words; - /* Process data forwards: */ for (;;) { if (unlikely(d < data)) d = data + words-1; @@ -1136,15 +1149,18 @@ if (g->p.serialize_startup) { pthread_mutex_lock(&g->startup_mutex); g->nr_tasks_started++; + /* The last thread wakes the main process. */ + if (g->nr_tasks_started == g->p.nr_tasks) + pthread_cond_signal(&g->startup_cond); + pthread_mutex_unlock(&g->startup_mutex); /* Here we will wait for the main process to start us all at once: */ pthread_mutex_lock(&g->start_work_mutex); + g->start_work = false; g->nr_tasks_working++; - - /* Last one wake the main process: */ - if (g->nr_tasks_working == g->p.nr_tasks) - pthread_mutex_unlock(&g->startup_done_mutex); + while (!g->start_work) + pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex); pthread_mutex_unlock(&g->start_work_mutex); } @@ -1369,7 +1385,7 @@ int cpu; /* Allow all nodes by default: */ - td->bind_node = -1; + td->bind_node = NUMA_NO_NODE; /* Allow all CPUs by default: */ CPU_ZERO(&td->bind_cpumask); @@ -1441,8 +1457,9 @@ /* Startup serialization: */ init_global_mutex(&g->start_work_mutex); + init_global_cond(&g->start_work_cond); init_global_mutex(&g->startup_mutex); - init_global_mutex(&g->startup_done_mutex); + init_global_cond(&g->startup_cond); init_global_mutex(&g->stop_work_mutex); init_thread_data(); @@ -1502,9 +1519,6 @@ pids = zalloc(g->p.nr_proc * sizeof(*pids)); pid = -1; - /* All threads try to acquire it, this way we can wait for them to start up: */ - pthread_mutex_lock(&g->start_work_mutex); - if (g->p.serialize_startup) { tprintf(" #\n"); tprintf(" # Startup synchronization: ..."); fflush(stdout); @@ -1526,22 +1540,29 @@ pids[i] = pid; } - /* Wait for all the threads to start up: */ - while (g->nr_tasks_started != g->p.nr_tasks) - usleep(USEC_PER_MSEC); - - BUG_ON(g->nr_tasks_started != g->p.nr_tasks); if (g->p.serialize_startup) { + bool threads_ready = false; double startup_sec; - pthread_mutex_lock(&g->startup_done_mutex); + /* + * Wait for all the threads to start up. The last thread will + * signal this process. + */ + pthread_mutex_lock(&g->startup_mutex); + while (g->nr_tasks_started != g->p.nr_tasks) + pthread_cond_wait(&g->startup_cond, &g->startup_mutex); - /* This will start all threads: */ - pthread_mutex_unlock(&g->start_work_mutex); + pthread_mutex_unlock(&g->startup_mutex); - /* This mutex is locked - the last started thread will wake us: */ - pthread_mutex_lock(&g->startup_done_mutex); + /* Wait for all threads to be at the start_work_cond. */ + while (!threads_ready) { + pthread_mutex_lock(&g->start_work_mutex); + threads_ready = (g->nr_tasks_working == g->p.nr_tasks); + pthread_mutex_unlock(&g->start_work_mutex); + if (!threads_ready) + usleep(1); + } gettimeofday(&stop, NULL); @@ -1555,7 +1576,11 @@ tprintf(" #\n"); start = stop; - pthread_mutex_unlock(&g->startup_done_mutex); + /* Start all threads running. */ + pthread_mutex_lock(&g->start_work_mutex); + g->start_work = true; + pthread_mutex_unlock(&g->start_work_mutex); + pthread_cond_broadcast(&g->start_work_cond); } else { gettimeofday(&start, NULL); } @@ -1631,7 +1656,7 @@ "GB/sec,", "total-speed", "GB/sec total speed"); if (g->p.show_details >= 2) { - char tname[14 + 2 * 10 + 1]; + char tname[14 + 2 * 11 + 1]; struct thread_data *td; for (p = 0; p < g->p.nr_proc; p++) { for (t = 0; t < g->p.nr_threads; t++) { @@ -1734,12 +1759,12 @@ */ static const char *tests[][MAX_ARGS] = { /* Basic single-stream NUMA bandwidth measurements: */ - { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", + { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "0", OPT_BW_RAM }, { "RAM-bw-local-NOTHP,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP }, - { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", + { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "1", OPT_BW_RAM }, /* 2-stream NUMA bandwidth measurements: */ @@ -1756,7 +1781,7 @@ { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV }, { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV }, { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV }, - { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, + { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV }, { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV }, { " 4x4-convergence-NOTHP,", @@ -1781,24 +1806,24 @@ "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP }, { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW }, - { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, - { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, - { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, - { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, + { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, + { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, + { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, + { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, - { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, - { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, - { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, - { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, - { " 4x8-bw-thread-NOTHP,", + { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, + { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, + { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, + { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, + { " 4x8-bw-process-NOTHP,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP }, - { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, - { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, + { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, + { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, - { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, - { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, + { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, + { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, - { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, + { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP }, { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW }, { "numa01-bw-thread-NOTHP,", -- Gitblit v1.6.2