hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/tools/perf/bench/numa.c
....@@ -9,9 +9,6 @@
99 /* For the CLR_() macros */
1010 #include <pthread.h>
1111
12
-#include "../perf.h"
13
-#include "../builtin.h"
14
-#include "../util/util.h"
1512 #include <subcmd/parse-options.h>
1613 #include "../util/cloexec.h"
1714
....@@ -34,6 +31,8 @@
3431 #include <sys/types.h>
3532 #include <linux/kernel.h>
3633 #include <linux/time64.h>
34
+#include <linux/numa.h>
35
+#include <linux/zalloc.h>
3736
3837 #include <numa.h>
3938 #include <numaif.h>
....@@ -138,12 +137,13 @@
138137 u8 *data;
139138
140139 pthread_mutex_t startup_mutex;
140
+ pthread_cond_t startup_cond;
141141 int nr_tasks_started;
142142
143
- pthread_mutex_t startup_done_mutex;
144
-
145143 pthread_mutex_t start_work_mutex;
144
+ pthread_cond_t start_work_cond;
146145 int nr_tasks_working;
146
+ bool start_work;
147147
148148 pthread_mutex_t stop_work_mutex;
149149 u64 bytes_done;
....@@ -248,17 +248,22 @@
248248 */
249249 static bool node_has_cpus(int node)
250250 {
251
- struct bitmask *cpu = numa_allocate_cpumask();
252
- unsigned int i;
251
+ struct bitmask *cpumask = numa_allocate_cpumask();
252
+ bool ret = false; /* fall back to nocpus */
253
+ int cpu;
253254
254
- if (cpu && !numa_node_to_cpus(node, cpu)) {
255
- for (i = 0; i < cpu->size; i++) {
256
- if (numa_bitmask_isbitset(cpu, i))
257
- return true;
255
+ BUG_ON(!cpumask);
256
+ if (!numa_node_to_cpus(node, cpumask)) {
257
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
258
+ if (numa_bitmask_isbitset(cpumask, cpu)) {
259
+ ret = true;
260
+ break;
261
+ }
258262 }
259263 }
264
+ numa_free_cpumask(cpumask);
260265
261
- return false; /* lets fall back to nocpus safely */
266
+ return ret;
262267 }
263268
264269 static cpu_set_t bind_to_cpu(int target_cpu)
....@@ -289,30 +294,29 @@
289294
290295 static cpu_set_t bind_to_node(int target_node)
291296 {
292
- int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
293297 cpu_set_t orig_mask, mask;
294298 int cpu;
295299 int ret;
296
-
297
- BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
298
- BUG_ON(!cpus_per_node);
299300
300301 ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
301302 BUG_ON(ret);
302303
303304 CPU_ZERO(&mask);
304305
305
- if (target_node == -1) {
306
+ if (target_node == NUMA_NO_NODE) {
306307 for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
307308 CPU_SET(cpu, &mask);
308309 } else {
309
- int cpu_start = (target_node + 0) * cpus_per_node;
310
- int cpu_stop = (target_node + 1) * cpus_per_node;
310
+ struct bitmask *cpumask = numa_allocate_cpumask();
311311
312
- BUG_ON(cpu_stop > g->p.nr_cpus);
313
-
314
- for (cpu = cpu_start; cpu < cpu_stop; cpu++)
315
- CPU_SET(cpu, &mask);
312
+ BUG_ON(!cpumask);
313
+ if (!numa_node_to_cpus(target_node, cpumask)) {
314
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
315
+ if (numa_bitmask_isbitset(cpumask, cpu))
316
+ CPU_SET(cpu, &mask);
317
+ }
318
+ }
319
+ numa_free_cpumask(cpumask);
316320 }
317321
318322 ret = sched_setaffinity(0, sizeof(mask), &mask);
....@@ -343,7 +347,7 @@
343347 unsigned long nodemask;
344348 int ret;
345349
346
- if (node == -1)
350
+ if (node == NUMA_NO_NODE)
347351 return;
348352
349353 BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
....@@ -478,6 +482,18 @@
478482 pthread_mutexattr_init(&attr);
479483 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
480484 pthread_mutex_init(mutex, &attr);
485
+}
486
+
487
+/*
488
+ * Return a process-shared (global) condition variable:
489
+ */
490
+static void init_global_cond(pthread_cond_t *cond)
491
+{
492
+ pthread_condattr_t attr;
493
+
494
+ pthread_condattr_init(&attr);
495
+ pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
496
+ pthread_cond_init(cond, &attr);
481497 }
482498
483499 static int parse_cpu_list(const char *arg)
....@@ -730,8 +746,6 @@
730746 return -1;
731747
732748 return parse_node_list(arg);
733
-
734
- return 0;
735749 }
736750
737751 #define BIT(x) (1ul << x)
....@@ -814,12 +828,12 @@
814828 }
815829 }
816830 } else if (!g->p.data_backwards || (nr + loop) & 1) {
831
+ /* Process data forwards: */
817832
818833 d0 = data + off;
819834 d = data + off + 1;
820835 d1 = data + words;
821836
822
- /* Process data forwards: */
823837 for (;;) {
824838 if (unlikely(d >= d1))
825839 d = data;
....@@ -837,7 +851,6 @@
837851 d = data + off - 1;
838852 d1 = data + words;
839853
840
- /* Process data forwards: */
841854 for (;;) {
842855 if (unlikely(d < data))
843856 d = data + words-1;
....@@ -1136,15 +1149,18 @@
11361149 if (g->p.serialize_startup) {
11371150 pthread_mutex_lock(&g->startup_mutex);
11381151 g->nr_tasks_started++;
1152
+ /* The last thread wakes the main process. */
1153
+ if (g->nr_tasks_started == g->p.nr_tasks)
1154
+ pthread_cond_signal(&g->startup_cond);
1155
+
11391156 pthread_mutex_unlock(&g->startup_mutex);
11401157
11411158 /* Here we will wait for the main process to start us all at once: */
11421159 pthread_mutex_lock(&g->start_work_mutex);
1160
+ g->start_work = false;
11431161 g->nr_tasks_working++;
1144
-
1145
- /* Last one wake the main process: */
1146
- if (g->nr_tasks_working == g->p.nr_tasks)
1147
- pthread_mutex_unlock(&g->startup_done_mutex);
1162
+ while (!g->start_work)
1163
+ pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex);
11481164
11491165 pthread_mutex_unlock(&g->start_work_mutex);
11501166 }
....@@ -1369,7 +1385,7 @@
13691385 int cpu;
13701386
13711387 /* Allow all nodes by default: */
1372
- td->bind_node = -1;
1388
+ td->bind_node = NUMA_NO_NODE;
13731389
13741390 /* Allow all CPUs by default: */
13751391 CPU_ZERO(&td->bind_cpumask);
....@@ -1441,8 +1457,9 @@
14411457
14421458 /* Startup serialization: */
14431459 init_global_mutex(&g->start_work_mutex);
1460
+ init_global_cond(&g->start_work_cond);
14441461 init_global_mutex(&g->startup_mutex);
1445
- init_global_mutex(&g->startup_done_mutex);
1462
+ init_global_cond(&g->startup_cond);
14461463 init_global_mutex(&g->stop_work_mutex);
14471464
14481465 init_thread_data();
....@@ -1502,9 +1519,6 @@
15021519 pids = zalloc(g->p.nr_proc * sizeof(*pids));
15031520 pid = -1;
15041521
1505
- /* All threads try to acquire it, this way we can wait for them to start up: */
1506
- pthread_mutex_lock(&g->start_work_mutex);
1507
-
15081522 if (g->p.serialize_startup) {
15091523 tprintf(" #\n");
15101524 tprintf(" # Startup synchronization: ..."); fflush(stdout);
....@@ -1526,22 +1540,29 @@
15261540 pids[i] = pid;
15271541
15281542 }
1529
- /* Wait for all the threads to start up: */
1530
- while (g->nr_tasks_started != g->p.nr_tasks)
1531
- usleep(USEC_PER_MSEC);
1532
-
1533
- BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
15341543
15351544 if (g->p.serialize_startup) {
1545
+ bool threads_ready = false;
15361546 double startup_sec;
15371547
1538
- pthread_mutex_lock(&g->startup_done_mutex);
1548
+ /*
1549
+ * Wait for all the threads to start up. The last thread will
1550
+ * signal this process.
1551
+ */
1552
+ pthread_mutex_lock(&g->startup_mutex);
1553
+ while (g->nr_tasks_started != g->p.nr_tasks)
1554
+ pthread_cond_wait(&g->startup_cond, &g->startup_mutex);
15391555
1540
- /* This will start all threads: */
1541
- pthread_mutex_unlock(&g->start_work_mutex);
1556
+ pthread_mutex_unlock(&g->startup_mutex);
15421557
1543
- /* This mutex is locked - the last started thread will wake us: */
1544
- pthread_mutex_lock(&g->startup_done_mutex);
1558
+ /* Wait for all threads to be at the start_work_cond. */
1559
+ while (!threads_ready) {
1560
+ pthread_mutex_lock(&g->start_work_mutex);
1561
+ threads_ready = (g->nr_tasks_working == g->p.nr_tasks);
1562
+ pthread_mutex_unlock(&g->start_work_mutex);
1563
+ if (!threads_ready)
1564
+ usleep(1);
1565
+ }
15451566
15461567 gettimeofday(&stop, NULL);
15471568
....@@ -1555,7 +1576,11 @@
15551576 tprintf(" #\n");
15561577
15571578 start = stop;
1558
- pthread_mutex_unlock(&g->startup_done_mutex);
1579
+ /* Start all threads running. */
1580
+ pthread_mutex_lock(&g->start_work_mutex);
1581
+ g->start_work = true;
1582
+ pthread_mutex_unlock(&g->start_work_mutex);
1583
+ pthread_cond_broadcast(&g->start_work_cond);
15591584 } else {
15601585 gettimeofday(&start, NULL);
15611586 }
....@@ -1631,7 +1656,7 @@
16311656 "GB/sec,", "total-speed", "GB/sec total speed");
16321657
16331658 if (g->p.show_details >= 2) {
1634
- char tname[14 + 2 * 10 + 1];
1659
+ char tname[14 + 2 * 11 + 1];
16351660 struct thread_data *td;
16361661 for (p = 0; p < g->p.nr_proc; p++) {
16371662 for (t = 0; t < g->p.nr_threads; t++) {
....@@ -1734,12 +1759,12 @@
17341759 */
17351760 static const char *tests[][MAX_ARGS] = {
17361761 /* Basic single-stream NUMA bandwidth measurements: */
1737
- { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
1762
+ { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
17381763 "-C" , "0", "-M", "0", OPT_BW_RAM },
17391764 { "RAM-bw-local-NOTHP,",
17401765 "mem", "-p", "1", "-t", "1", "-P", "1024",
17411766 "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
1742
- { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
1767
+ { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
17431768 "-C" , "0", "-M", "1", OPT_BW_RAM },
17441769
17451770 /* 2-stream NUMA bandwidth measurements: */
....@@ -1756,7 +1781,7 @@
17561781 { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
17571782 { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
17581783 { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
1759
- { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
1784
+ { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV },
17601785 { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
17611786 { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
17621787 { " 4x4-convergence-NOTHP,",
....@@ -1781,24 +1806,24 @@
17811806 "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
17821807 { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
17831808
1784
- { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
1785
- { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
1786
- { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
1787
- { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
1809
+ { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
1810
+ { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
1811
+ { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
1812
+ { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
17881813
1789
- { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
1790
- { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
1791
- { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
1792
- { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
1793
- { " 4x8-bw-thread-NOTHP,",
1814
+ { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
1815
+ { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
1816
+ { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
1817
+ { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
1818
+ { " 4x8-bw-process-NOTHP,",
17941819 "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
1795
- { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
1796
- { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
1820
+ { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
1821
+ { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
17971822
1798
- { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
1799
- { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
1823
+ { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
1824
+ { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
18001825
1801
- { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
1826
+ { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
18021827 { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
18031828 { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
18041829 { "numa01-bw-thread-NOTHP,",