Release 4.10 tools/perf/bench/numa.c
/*
* numa.c
*
* numa: Simulate NUMA-sensitive workload and measure their NUMA performance
*/
/* For the CLR_() macros */
#include <pthread.h>
#include "../perf.h"
#include "../builtin.h"
#include "../util/util.h"
#include <subcmd/parse-options.h>
#include "../util/cloexec.h"
#include "bench.h"
#include <errno.h>
#include <sched.h>
#include <stdio.h>
#include <assert.h>
#include <malloc.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <sys/prctl.h>
#include <sys/types.h>
#include <linux/time64.h>
#include <numa.h>
#include <numaif.h>
/*
* Regular printout to the terminal, supressed if -q is specified:
*/
#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
/*
* Debug printf:
*/
#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
struct thread_data {
int curr_cpu;
cpu_set_t bind_cpumask;
int bind_node;
u8 *process_data;
int process_nr;
int thread_nr;
int task_nr;
unsigned int loops_done;
u64 val;
u64 runtime_ns;
u64 system_time_ns;
u64 user_time_ns;
double speed_gbs;
pthread_mutex_t *process_lock;
};
/* Parameters set by options: */
struct params {
/* Startup synchronization: */
bool serialize_startup;
/* Task hierarchy: */
int nr_proc;
int nr_threads;
/* Working set sizes: */
const char *mb_global_str;
const char *mb_proc_str;
const char *mb_proc_locked_str;
const char *mb_thread_str;
double mb_global;
double mb_proc;
double mb_proc_locked;
double mb_thread;
/* Access patterns to the working set: */
bool data_reads;
bool data_writes;
bool data_backwards;
bool data_zero_memset;
bool data_rand_walk;
u32 nr_loops;
u32 nr_secs;
u32 sleep_usecs;
/* Working set initialization: */
bool init_zero;
bool init_random;
bool init_cpu0;
/* Misc options: */
int show_details;
int run_all;
int thp;
long bytes_global;
long bytes_process;
long bytes_process_locked;
long bytes_thread;
int nr_tasks;
bool show_quiet;
bool show_convergence;
bool measure_convergence;
int perturb_secs;
int nr_cpus;
int nr_nodes;
/* Affinity options -C and -N: */
char *cpu_list_str;
char *node_list_str;
};
/* Global, read-writable area, accessible to all processes and threads: */
struct global_info {
u8 *data;
pthread_mutex_t startup_mutex;
int nr_tasks_started;
pthread_mutex_t startup_done_mutex;
pthread_mutex_t start_work_mutex;
int nr_tasks_working;
pthread_mutex_t stop_work_mutex;
u64 bytes_done;
struct thread_data *threads;
/* Convergence latency measurement: */
bool all_converged;
bool stop_work;
int print_once;
struct params p;
};
static struct global_info *g = NULL;
static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
struct params p0;
static const struct option options[] = {
OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"),
OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"),
OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"),
OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"),
OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"),
OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run (default: unlimited)"),
OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"),
OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"),
OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"),
OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"),
OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"),
OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"),
OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"),
OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"),
OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"),
OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"),
OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"),
OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"),
OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"),
OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
/* Special option string parsing callbacks: */
OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
"bind the first N tasks to these specific cpus (the rest is unbound)",
parse_cpus_opt),
OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
"bind the first N tasks to these specific memory nodes (the rest is unbound)",
parse_nodes_opt),
OPT_END()
};
static const char * const bench_numa_usage[] = {
"perf bench numa <options>",
NULL
};
static const char * const numa_usage[] = {
"perf bench numa mem [<options>]",
NULL
};
static cpu_set_t bind_to_cpu(int target_cpu)
{
cpu_set_t orig_mask, mask;
int ret;
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
CPU_ZERO(&mask);
if (target_cpu == -1) {
int cpu;
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
} else {
BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
CPU_SET(target_cpu, &mask);
}
ret = sched_setaffinity(0, sizeof(mask), &mask);
BUG_ON(ret);
return orig_mask;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 129 | 100.00% | 1 | 100.00% |
| Total | 129 | 100.00% | 1 | 100.00% |
static cpu_set_t bind_to_node(int target_node)
{
int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes;
cpu_set_t orig_mask, mask;
int cpu;
int ret;
BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus);
BUG_ON(!cpus_per_node);
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
CPU_ZERO(&mask);
if (target_node == -1) {
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
} else {
int cpu_start = (target_node + 0) * cpus_per_node;
int cpu_stop = (target_node + 1) * cpus_per_node;
BUG_ON(cpu_stop > g->p.nr_cpus);
for (cpu = cpu_start; cpu < cpu_stop; cpu++)
CPU_SET(cpu, &mask);
}
ret = sched_setaffinity(0, sizeof(mask), &mask);
BUG_ON(ret);
return orig_mask;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 198 | 100.00% | 1 | 100.00% |
| Total | 198 | 100.00% | 1 | 100.00% |
static void bind_to_cpumask(cpu_set_t mask)
{
int ret;
ret = sched_setaffinity(0, sizeof(mask), &mask);
BUG_ON(ret);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 32 | 100.00% | 1 | 100.00% |
| Total | 32 | 100.00% | 1 | 100.00% |
static void mempol_restore(void)
{
int ret;
ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
BUG_ON(ret);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 33 | 100.00% | 1 | 100.00% |
| Total | 33 | 100.00% | 1 | 100.00% |
static void bind_to_memnode(int node)
{
unsigned long nodemask;
int ret;
if (node == -1)
return;
BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
nodemask = 1L << node;
ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
BUG_ON(ret);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 80 | 97.56% | 1 | 50.00% |
jakub jelen | jakub jelen | 2 | 2.44% | 1 | 50.00% |
| Total | 82 | 100.00% | 2 | 100.00% |
#define HPSIZE (2*1024*1024)
#define set_taskname(fmt...) \
do { \
char name[20]; \
\
snprintf(name, 20, fmt); \
prctl(PR_SET_NAME, name); \
} while (0)
static u8 *alloc_data(ssize_t bytes0, int map_flags,
int init_zero, int init_cpu0, int thp, int init_random)
{
cpu_set_t orig_mask;
ssize_t bytes;
u8 *buf;
int ret;
if (!bytes0)
return NULL;
/* Allocate and initialize all memory on CPU#0: */
if (init_cpu0) {
orig_mask = bind_to_node(0);
bind_to_memnode(0);
}
bytes = bytes0 + HPSIZE;
buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
BUG_ON(buf == (void *)-1);
if (map_flags == MAP_PRIVATE) {
if (thp > 0) {
ret = madvise(buf, bytes, MADV_HUGEPAGE);
if (ret && !g->print_once) {
g->print_once = 1;
printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
}
}
if (thp < 0) {
ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
if (ret && !g->print_once) {
g->print_once = 1;
printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
}
}
}
if (init_zero) {
bzero(buf, bytes);
} else {
/* Initialize random contents, different in each word: */
if (init_random) {
u64 *wbuf = (void *)buf;
long off = rand();
long i;
for (i = 0; i < bytes/8; i++)
wbuf[i] = i + off;
}
}
/* Align to 2MB boundary: */
buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
/* Restore affinity: */
if (init_cpu0) {
bind_to_cpumask(orig_mask);
mempol_restore();
}
return buf;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 311 | 100.00% | 1 | 100.00% |
| Total | 311 | 100.00% | 1 | 100.00% |
static void free_data(void *data, ssize_t bytes)
{
int ret;
if (!data)
return;
ret = munmap(data, bytes);
BUG_ON(ret);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 36 | 100.00% | 1 | 100.00% |
| Total | 36 | 100.00% | 1 | 100.00% |
/*
* Create a shared memory buffer that can be shared between processes, zeroed:
*/
static void * zalloc_shared_data(ssize_t bytes)
{
return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 38 | 100.00% | 1 | 100.00% |
| Total | 38 | 100.00% | 1 | 100.00% |
/*
* Create a shared memory buffer that can be shared between processes:
*/
static void * setup_shared_data(ssize_t bytes)
{
return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 38 | 100.00% | 1 | 100.00% |
| Total | 38 | 100.00% | 1 | 100.00% |
/*
* Allocate process-local memory - this will either be shared between
* threads of this process, or only be accessed by this thread:
*/
static void * setup_private_data(ssize_t bytes)
{
return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 38 | 100.00% | 1 | 100.00% |
| Total | 38 | 100.00% | 1 | 100.00% |
/*
* Return a process-shared (global) mutex:
*/
static void init_global_mutex(pthread_mutex_t *mutex)
{
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
pthread_mutex_init(mutex, &attr);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 35 | 100.00% | 1 | 100.00% |
| Total | 35 | 100.00% | 1 | 100.00% |
static int parse_cpu_list(const char *arg)
{
p0.cpu_list_str = strdup(arg);
dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
return 0;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 32 | 100.00% | 1 | 100.00% |
| Total | 32 | 100.00% | 1 | 100.00% |
static int parse_setup_cpu_list(void)
{
struct thread_data *td;
char *str0, *str;
int t;
if (!g->p.cpu_list_str)
return 0;
dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
str0 = str = strdup(g->p.cpu_list_str);
t = 0;
BUG_ON(!str);
tprintf("# binding tasks to CPUs:\n");
tprintf("# ");
while (true) {
int bind_cpu, bind_cpu_0, bind_cpu_1;
char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
int bind_len;
int step;
int mul;
tok = strsep(&str, ",");
if (!tok)
break;
tok_end = strstr(tok, "-");
dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
if (!tok_end) {
/* Single CPU specified: */
bind_cpu_0 = bind_cpu_1 = atol(tok);
} else {
/* CPU range specified (for example: "5-11"): */
bind_cpu_0 = atol(tok);
bind_cpu_1 = atol(tok_end + 1);
}
step = 1;
tok_step = strstr(tok, "#");
if (tok_step) {
step = atol(tok_step + 1);
BUG_ON(step <= 0 || step >= g->p.nr_cpus);
}
/*
* Mask length.
* Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
* where the _4 means the next 4 CPUs are allowed.
*/
bind_len = 1;
tok_len = strstr(tok, "_");
if (tok_len) {
bind_len = atol(tok_len + 1);
BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
}
/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
mul = 1;
tok_mul = strstr(tok, "x");
if (tok_mul) {
mul = atol(tok_mul + 1);
BUG_ON(mul <= 0);
}
dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) {
printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus);
return -1;
}
BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
BUG_ON(bind_cpu_0 > bind_cpu_1);
for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
int i;
for (i = 0; i < mul; i++) {
int cpu;
if (t >= g->p.nr_tasks) {
printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
goto out;
}
td = g->threads + t;
if (t)
tprintf(",");
if (bind_len > 1) {
tprintf("%2d/%d", bind_cpu, bind_len);
} else {
tprintf("%2d", bind_cpu);
}
CPU_ZERO(&td->bind_cpumask);
for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
CPU_SET(cpu, &td->bind_cpumask);
}
t++;
}
}
}
out:
tprintf("\n");
if (t < g->p.nr_tasks)
printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
free(str0);
return 0;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 540 | 93.91% | 1 | 50.00% |
petr holasek | petr holasek | 35 | 6.09% | 1 | 50.00% |
| Total | 575 | 100.00% | 2 | 100.00% |
static int parse_cpus_opt(const struct option *opt __maybe_unused,
const char *arg, int unset __maybe_unused)
{
if (!arg)
return -1;
return parse_cpu_list(arg);
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 37 | 100.00% | 1 | 100.00% |
| Total | 37 | 100.00% | 1 | 100.00% |
static int parse_node_list(const char *arg)
{
p0.node_list_str = strdup(arg);
dprintf("got NODE list: {%s}\n", p0.node_list_str);
return 0;
}
Contributors
| Person | Tokens | Prop | Commits | CommitProp |
ingo molnar | ingo molnar | 32 | 100.00% | 1 | 100.00% |
| Total | 32 | 100.00% | 1 | 100.00% |
static int parse_setup_node_list(void)
{
struct thread_data *td;
char *str0, *str;
int t;
if (!g->p.node_list_str)
return 0;
dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
str0 = str = strdup(g->p.node_list_str);
t = 0;
BUG_ON(!str);
tprintf("# binding tasks to NODEs:\n");
tprintf("# ");
while (true) {
int bind_node, bind_node_0, bind_node_1;
char *tok, *tok_end, *tok_step, *tok_mul;
int step;
int mul;
tok = strsep(&str, ",");
if (!tok)
break;
tok_end = strstr(tok, "-");
dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
if (!tok_end) {
/* Single NODE specified: */
bind_node_0 = bind_node_1 = atol(tok);
} else {
/* NODE range specified (for example: "5-11"): */
bind_node_0 = atol(tok);
bind_node_1 = atol(tok_end + 1);
}
step = 1;
tok_step = strstr(tok, "#");
if (tok_step) {
step = atol(tok_step + 1);
BUG_ON(step <= 0 || step >= g->p.nr_nodes);
}
/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
mul = 1;
tok_mul = strstr(tok, "x");
if (tok_mul) {
mul = atol(tok_mul + 1);
BUG_ON(mul <= 0);
}
dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) {
printf