| .. | .. |
|---|
| 9 | 9 | /* For the CLR_() macros */ |
|---|
| 10 | 10 | #include <pthread.h> |
|---|
| 11 | 11 | |
|---|
| 12 | | -#include "../perf.h" |
|---|
| 13 | | -#include "../builtin.h" |
|---|
| 14 | | -#include "../util/util.h" |
|---|
| 15 | 12 | #include <subcmd/parse-options.h> |
|---|
| 16 | 13 | #include "../util/cloexec.h" |
|---|
| 17 | 14 | |
|---|
| .. | .. |
|---|
| 34 | 31 | #include <sys/types.h> |
|---|
| 35 | 32 | #include <linux/kernel.h> |
|---|
| 36 | 33 | #include <linux/time64.h> |
|---|
| 34 | +#include <linux/numa.h> |
|---|
| 35 | +#include <linux/zalloc.h> |
|---|
| 37 | 36 | |
|---|
| 38 | 37 | #include <numa.h> |
|---|
| 39 | 38 | #include <numaif.h> |
|---|
| .. | .. |
|---|
| 138 | 137 | u8 *data; |
|---|
| 139 | 138 | |
|---|
| 140 | 139 | pthread_mutex_t startup_mutex; |
|---|
| 140 | + pthread_cond_t startup_cond; |
|---|
| 141 | 141 | int nr_tasks_started; |
|---|
| 142 | 142 | |
|---|
| 143 | | - pthread_mutex_t startup_done_mutex; |
|---|
| 144 | | - |
|---|
| 145 | 143 | pthread_mutex_t start_work_mutex; |
|---|
| 144 | + pthread_cond_t start_work_cond; |
|---|
| 146 | 145 | int nr_tasks_working; |
|---|
| 146 | + bool start_work; |
|---|
| 147 | 147 | |
|---|
| 148 | 148 | pthread_mutex_t stop_work_mutex; |
|---|
| 149 | 149 | u64 bytes_done; |
|---|
| .. | .. |
|---|
| 248 | 248 | */ |
|---|
| 249 | 249 | static bool node_has_cpus(int node) |
|---|
| 250 | 250 | { |
|---|
| 251 | | - struct bitmask *cpu = numa_allocate_cpumask(); |
|---|
| 252 | | - unsigned int i; |
|---|
| 251 | + struct bitmask *cpumask = numa_allocate_cpumask(); |
|---|
| 252 | + bool ret = false; /* fall back to nocpus */ |
|---|
| 253 | + int cpu; |
|---|
| 253 | 254 | |
|---|
| 254 | | - if (cpu && !numa_node_to_cpus(node, cpu)) { |
|---|
| 255 | | - for (i = 0; i < cpu->size; i++) { |
|---|
| 256 | | - if (numa_bitmask_isbitset(cpu, i)) |
|---|
| 257 | | - return true; |
|---|
| 255 | + BUG_ON(!cpumask); |
|---|
| 256 | + if (!numa_node_to_cpus(node, cpumask)) { |
|---|
| 257 | + for (cpu = 0; cpu < (int)cpumask->size; cpu++) { |
|---|
| 258 | + if (numa_bitmask_isbitset(cpumask, cpu)) { |
|---|
| 259 | + ret = true; |
|---|
| 260 | + break; |
|---|
| 261 | + } |
|---|
| 258 | 262 | } |
|---|
| 259 | 263 | } |
|---|
| 264 | + numa_free_cpumask(cpumask); |
|---|
| 260 | 265 | |
|---|
| 261 | | - return false; /* lets fall back to nocpus safely */ |
|---|
| 266 | + return ret; |
|---|
| 262 | 267 | } |
|---|
| 263 | 268 | |
|---|
| 264 | 269 | static cpu_set_t bind_to_cpu(int target_cpu) |
|---|
| .. | .. |
|---|
| 289 | 294 | |
|---|
| 290 | 295 | static cpu_set_t bind_to_node(int target_node) |
|---|
| 291 | 296 | { |
|---|
| 292 | | - int cpus_per_node = g->p.nr_cpus / nr_numa_nodes(); |
|---|
| 293 | 297 | cpu_set_t orig_mask, mask; |
|---|
| 294 | 298 | int cpu; |
|---|
| 295 | 299 | int ret; |
|---|
| 296 | | - |
|---|
| 297 | | - BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus); |
|---|
| 298 | | - BUG_ON(!cpus_per_node); |
|---|
| 299 | 300 | |
|---|
| 300 | 301 | ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); |
|---|
| 301 | 302 | BUG_ON(ret); |
|---|
| 302 | 303 | |
|---|
| 303 | 304 | CPU_ZERO(&mask); |
|---|
| 304 | 305 | |
|---|
| 305 | | - if (target_node == -1) { |
|---|
| 306 | + if (target_node == NUMA_NO_NODE) { |
|---|
| 306 | 307 | for (cpu = 0; cpu < g->p.nr_cpus; cpu++) |
|---|
| 307 | 308 | CPU_SET(cpu, &mask); |
|---|
| 308 | 309 | } else { |
|---|
| 309 | | - int cpu_start = (target_node + 0) * cpus_per_node; |
|---|
| 310 | | - int cpu_stop = (target_node + 1) * cpus_per_node; |
|---|
| 310 | + struct bitmask *cpumask = numa_allocate_cpumask(); |
|---|
| 311 | 311 | |
|---|
| 312 | | - BUG_ON(cpu_stop > g->p.nr_cpus); |
|---|
| 313 | | - |
|---|
| 314 | | - for (cpu = cpu_start; cpu < cpu_stop; cpu++) |
|---|
| 315 | | - CPU_SET(cpu, &mask); |
|---|
| 312 | + BUG_ON(!cpumask); |
|---|
| 313 | + if (!numa_node_to_cpus(target_node, cpumask)) { |
|---|
| 314 | + for (cpu = 0; cpu < (int)cpumask->size; cpu++) { |
|---|
| 315 | + if (numa_bitmask_isbitset(cpumask, cpu)) |
|---|
| 316 | + CPU_SET(cpu, &mask); |
|---|
| 317 | + } |
|---|
| 318 | + } |
|---|
| 319 | + numa_free_cpumask(cpumask); |
|---|
| 316 | 320 | } |
|---|
| 317 | 321 | |
|---|
| 318 | 322 | ret = sched_setaffinity(0, sizeof(mask), &mask); |
|---|
| .. | .. |
|---|
| 343 | 347 | unsigned long nodemask; |
|---|
| 344 | 348 | int ret; |
|---|
| 345 | 349 | |
|---|
| 346 | | - if (node == -1) |
|---|
| 350 | + if (node == NUMA_NO_NODE) |
|---|
| 347 | 351 | return; |
|---|
| 348 | 352 | |
|---|
| 349 | 353 | BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); |
|---|
| .. | .. |
|---|
| 478 | 482 | pthread_mutexattr_init(&attr); |
|---|
| 479 | 483 | pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); |
|---|
| 480 | 484 | pthread_mutex_init(mutex, &attr); |
|---|
| 485 | +} |
|---|
| 486 | + |
|---|
| 487 | +/* |
|---|
| 488 | + * Return a process-shared (global) condition variable: |
|---|
| 489 | + */ |
|---|
| 490 | +static void init_global_cond(pthread_cond_t *cond) |
|---|
| 491 | +{ |
|---|
| 492 | + pthread_condattr_t attr; |
|---|
| 493 | + |
|---|
| 494 | + pthread_condattr_init(&attr); |
|---|
| 495 | + pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); |
|---|
| 496 | + pthread_cond_init(cond, &attr); |
|---|
| 481 | 497 | } |
|---|
| 482 | 498 | |
|---|
| 483 | 499 | static int parse_cpu_list(const char *arg) |
|---|
| .. | .. |
|---|
| 730 | 746 | return -1; |
|---|
| 731 | 747 | |
|---|
| 732 | 748 | return parse_node_list(arg); |
|---|
| 733 | | - |
|---|
| 734 | | - return 0; |
|---|
| 735 | 749 | } |
|---|
| 736 | 750 | |
|---|
| 737 | 751 | #define BIT(x) (1ul << x) |
|---|
| .. | .. |
|---|
| 814 | 828 | } |
|---|
| 815 | 829 | } |
|---|
| 816 | 830 | } else if (!g->p.data_backwards || (nr + loop) & 1) { |
|---|
| 831 | + /* Process data forwards: */ |
|---|
| 817 | 832 | |
|---|
| 818 | 833 | d0 = data + off; |
|---|
| 819 | 834 | d = data + off + 1; |
|---|
| 820 | 835 | d1 = data + words; |
|---|
| 821 | 836 | |
|---|
| 822 | | - /* Process data forwards: */ |
|---|
| 823 | 837 | for (;;) { |
|---|
| 824 | 838 | if (unlikely(d >= d1)) |
|---|
| 825 | 839 | d = data; |
|---|
| .. | .. |
|---|
| 837 | 851 | d = data + off - 1; |
|---|
| 838 | 852 | d1 = data + words; |
|---|
| 839 | 853 | |
|---|
| 840 | | - /* Process data forwards: */ |
|---|
| 841 | 854 | for (;;) { |
|---|
| 842 | 855 | if (unlikely(d < data)) |
|---|
| 843 | 856 | d = data + words-1; |
|---|
| .. | .. |
|---|
| 1136 | 1149 | if (g->p.serialize_startup) { |
|---|
| 1137 | 1150 | pthread_mutex_lock(&g->startup_mutex); |
|---|
| 1138 | 1151 | g->nr_tasks_started++; |
|---|
| 1152 | + /* The last thread wakes the main process. */ |
|---|
| 1153 | + if (g->nr_tasks_started == g->p.nr_tasks) |
|---|
| 1154 | + pthread_cond_signal(&g->startup_cond); |
|---|
| 1155 | + |
|---|
| 1139 | 1156 | pthread_mutex_unlock(&g->startup_mutex); |
|---|
| 1140 | 1157 | |
|---|
| 1141 | 1158 | /* Here we will wait for the main process to start us all at once: */ |
|---|
| 1142 | 1159 | pthread_mutex_lock(&g->start_work_mutex); |
|---|
| 1160 | + g->start_work = false; |
|---|
| 1143 | 1161 | g->nr_tasks_working++; |
|---|
| 1144 | | - |
|---|
| 1145 | | - /* Last one wake the main process: */ |
|---|
| 1146 | | - if (g->nr_tasks_working == g->p.nr_tasks) |
|---|
| 1147 | | - pthread_mutex_unlock(&g->startup_done_mutex); |
|---|
| 1162 | + while (!g->start_work) |
|---|
| 1163 | + pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex); |
|---|
| 1148 | 1164 | |
|---|
| 1149 | 1165 | pthread_mutex_unlock(&g->start_work_mutex); |
|---|
| 1150 | 1166 | } |
|---|
| .. | .. |
|---|
| 1369 | 1385 | int cpu; |
|---|
| 1370 | 1386 | |
|---|
| 1371 | 1387 | /* Allow all nodes by default: */ |
|---|
| 1372 | | - td->bind_node = -1; |
|---|
| 1388 | + td->bind_node = NUMA_NO_NODE; |
|---|
| 1373 | 1389 | |
|---|
| 1374 | 1390 | /* Allow all CPUs by default: */ |
|---|
| 1375 | 1391 | CPU_ZERO(&td->bind_cpumask); |
|---|
| .. | .. |
|---|
| 1441 | 1457 | |
|---|
| 1442 | 1458 | /* Startup serialization: */ |
|---|
| 1443 | 1459 | init_global_mutex(&g->start_work_mutex); |
|---|
| 1460 | + init_global_cond(&g->start_work_cond); |
|---|
| 1444 | 1461 | init_global_mutex(&g->startup_mutex); |
|---|
| 1445 | | - init_global_mutex(&g->startup_done_mutex); |
|---|
| 1462 | + init_global_cond(&g->startup_cond); |
|---|
| 1446 | 1463 | init_global_mutex(&g->stop_work_mutex); |
|---|
| 1447 | 1464 | |
|---|
| 1448 | 1465 | init_thread_data(); |
|---|
| .. | .. |
|---|
| 1502 | 1519 | pids = zalloc(g->p.nr_proc * sizeof(*pids)); |
|---|
| 1503 | 1520 | pid = -1; |
|---|
| 1504 | 1521 | |
|---|
| 1505 | | - /* All threads try to acquire it, this way we can wait for them to start up: */ |
|---|
| 1506 | | - pthread_mutex_lock(&g->start_work_mutex); |
|---|
| 1507 | | - |
|---|
| 1508 | 1522 | if (g->p.serialize_startup) { |
|---|
| 1509 | 1523 | tprintf(" #\n"); |
|---|
| 1510 | 1524 | tprintf(" # Startup synchronization: ..."); fflush(stdout); |
|---|
| .. | .. |
|---|
| 1526 | 1540 | pids[i] = pid; |
|---|
| 1527 | 1541 | |
|---|
| 1528 | 1542 | } |
|---|
| 1529 | | - /* Wait for all the threads to start up: */ |
|---|
| 1530 | | - while (g->nr_tasks_started != g->p.nr_tasks) |
|---|
| 1531 | | - usleep(USEC_PER_MSEC); |
|---|
| 1532 | | - |
|---|
| 1533 | | - BUG_ON(g->nr_tasks_started != g->p.nr_tasks); |
|---|
| 1534 | 1543 | |
|---|
| 1535 | 1544 | if (g->p.serialize_startup) { |
|---|
| 1545 | + bool threads_ready = false; |
|---|
| 1536 | 1546 | double startup_sec; |
|---|
| 1537 | 1547 | |
|---|
| 1538 | | - pthread_mutex_lock(&g->startup_done_mutex); |
|---|
| 1548 | + /* |
|---|
| 1549 | + * Wait for all the threads to start up. The last thread will |
|---|
| 1550 | + * signal this process. |
|---|
| 1551 | + */ |
|---|
| 1552 | + pthread_mutex_lock(&g->startup_mutex); |
|---|
| 1553 | + while (g->nr_tasks_started != g->p.nr_tasks) |
|---|
| 1554 | + pthread_cond_wait(&g->startup_cond, &g->startup_mutex); |
|---|
| 1539 | 1555 | |
|---|
| 1540 | | - /* This will start all threads: */ |
|---|
| 1541 | | - pthread_mutex_unlock(&g->start_work_mutex); |
|---|
| 1556 | + pthread_mutex_unlock(&g->startup_mutex); |
|---|
| 1542 | 1557 | |
|---|
| 1543 | | - /* This mutex is locked - the last started thread will wake us: */ |
|---|
| 1544 | | - pthread_mutex_lock(&g->startup_done_mutex); |
|---|
| 1558 | + /* Wait for all threads to be at the start_work_cond. */ |
|---|
| 1559 | + while (!threads_ready) { |
|---|
| 1560 | + pthread_mutex_lock(&g->start_work_mutex); |
|---|
| 1561 | + threads_ready = (g->nr_tasks_working == g->p.nr_tasks); |
|---|
| 1562 | + pthread_mutex_unlock(&g->start_work_mutex); |
|---|
| 1563 | + if (!threads_ready) |
|---|
| 1564 | + usleep(1); |
|---|
| 1565 | + } |
|---|
| 1545 | 1566 | |
|---|
| 1546 | 1567 | gettimeofday(&stop, NULL); |
|---|
| 1547 | 1568 | |
|---|
| .. | .. |
|---|
| 1555 | 1576 | tprintf(" #\n"); |
|---|
| 1556 | 1577 | |
|---|
| 1557 | 1578 | start = stop; |
|---|
| 1558 | | - pthread_mutex_unlock(&g->startup_done_mutex); |
|---|
| 1579 | + /* Start all threads running. */ |
|---|
| 1580 | + pthread_mutex_lock(&g->start_work_mutex); |
|---|
| 1581 | + g->start_work = true; |
|---|
| 1582 | + pthread_mutex_unlock(&g->start_work_mutex); |
|---|
| 1583 | + pthread_cond_broadcast(&g->start_work_cond); |
|---|
| 1559 | 1584 | } else { |
|---|
| 1560 | 1585 | gettimeofday(&start, NULL); |
|---|
| 1561 | 1586 | } |
|---|
| .. | .. |
|---|
| 1631 | 1656 | "GB/sec,", "total-speed", "GB/sec total speed"); |
|---|
| 1632 | 1657 | |
|---|
| 1633 | 1658 | if (g->p.show_details >= 2) { |
|---|
| 1634 | | - char tname[14 + 2 * 10 + 1]; |
|---|
| 1659 | + char tname[14 + 2 * 11 + 1]; |
|---|
| 1635 | 1660 | struct thread_data *td; |
|---|
| 1636 | 1661 | for (p = 0; p < g->p.nr_proc; p++) { |
|---|
| 1637 | 1662 | for (t = 0; t < g->p.nr_threads; t++) { |
|---|
| .. | .. |
|---|
| 1734 | 1759 | */ |
|---|
| 1735 | 1760 | static const char *tests[][MAX_ARGS] = { |
|---|
| 1736 | 1761 | /* Basic single-stream NUMA bandwidth measurements: */ |
|---|
| 1737 | | - { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", |
|---|
| 1762 | + { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", |
|---|
| 1738 | 1763 | "-C" , "0", "-M", "0", OPT_BW_RAM }, |
|---|
| 1739 | 1764 | { "RAM-bw-local-NOTHP,", |
|---|
| 1740 | 1765 | "mem", "-p", "1", "-t", "1", "-P", "1024", |
|---|
| 1741 | 1766 | "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP }, |
|---|
| 1742 | | - { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", |
|---|
| 1767 | + { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", |
|---|
| 1743 | 1768 | "-C" , "0", "-M", "1", OPT_BW_RAM }, |
|---|
| 1744 | 1769 | |
|---|
| 1745 | 1770 | /* 2-stream NUMA bandwidth measurements: */ |
|---|
| .. | .. |
|---|
| 1756 | 1781 | { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV }, |
|---|
| 1757 | 1782 | { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV }, |
|---|
| 1758 | 1783 | { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV }, |
|---|
| 1759 | | - { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, |
|---|
| 1784 | + { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV }, |
|---|
| 1760 | 1785 | { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, |
|---|
| 1761 | 1786 | { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV }, |
|---|
| 1762 | 1787 | { " 4x4-convergence-NOTHP,", |
|---|
| .. | .. |
|---|
| 1781 | 1806 | "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP }, |
|---|
| 1782 | 1807 | { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW }, |
|---|
| 1783 | 1808 | |
|---|
| 1784 | | - { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, |
|---|
| 1785 | | - { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, |
|---|
| 1786 | | - { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, |
|---|
| 1787 | | - { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, |
|---|
| 1809 | + { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, |
|---|
| 1810 | + { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, |
|---|
| 1811 | + { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, |
|---|
| 1812 | + { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, |
|---|
| 1788 | 1813 | |
|---|
| 1789 | | - { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, |
|---|
| 1790 | | - { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, |
|---|
| 1791 | | - { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, |
|---|
| 1792 | | - { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, |
|---|
| 1793 | | - { " 4x8-bw-thread-NOTHP,", |
|---|
| 1814 | + { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, |
|---|
| 1815 | + { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, |
|---|
| 1816 | + { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, |
|---|
| 1817 | + { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, |
|---|
| 1818 | + { " 4x8-bw-process-NOTHP,", |
|---|
| 1794 | 1819 | "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP }, |
|---|
| 1795 | | - { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, |
|---|
| 1796 | | - { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, |
|---|
| 1820 | + { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, |
|---|
| 1821 | + { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, |
|---|
| 1797 | 1822 | |
|---|
| 1798 | | - { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, |
|---|
| 1799 | | - { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, |
|---|
| 1823 | + { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, |
|---|
| 1824 | + { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, |
|---|
| 1800 | 1825 | |
|---|
| 1801 | | - { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, |
|---|
| 1826 | + { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, |
|---|
| 1802 | 1827 | { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP }, |
|---|
| 1803 | 1828 | { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW }, |
|---|
| 1804 | 1829 | { "numa01-bw-thread-NOTHP,", |
|---|