cregit-Linux how code gets into the kernel

Release 4.10 tools/perf/bench/numa.c

Directory: tools/perf/bench
/*
 * numa.c
 *
 * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
 */

/* For the CLR_() macros */
#include <pthread.h>

#include "../perf.h"
#include "../builtin.h"
#include "../util/util.h"
#include <subcmd/parse-options.h>
#include "../util/cloexec.h"

#include "bench.h"

#include <errno.h>
#include <sched.h>
#include <stdio.h>
#include <assert.h>
#include <malloc.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <sys/prctl.h>
#include <sys/types.h>
#include <linux/time64.h>

#include <numa.h>
#include <numaif.h>

/*
 * Regular printout to the terminal, supressed if -q is specified:
 */

#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)

/*
 * Debug printf:
 */

#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)


struct thread_data {
	
int			curr_cpu;
	
cpu_set_t		bind_cpumask;
	
int			bind_node;
	
u8			*process_data;
	
int			process_nr;
	
int			thread_nr;
	
int			task_nr;
	
unsigned int		loops_done;
	
u64			val;
	
u64			runtime_ns;
	
u64			system_time_ns;
	
u64			user_time_ns;
	
double			speed_gbs;
	
pthread_mutex_t		*process_lock;
};

/* Parameters set by options: */


struct params {
	/* Startup synchronization: */
	
bool			serialize_startup;

	/* Task hierarchy: */
	
int			nr_proc;
	
int			nr_threads;

	/* Working set sizes: */
	
const char		*mb_global_str;
	
const char		*mb_proc_str;
	
const char		*mb_proc_locked_str;
	
const char		*mb_thread_str;

	
double			mb_global;
	
double			mb_proc;
	
double			mb_proc_locked;
	
double			mb_thread;

	/* Access patterns to the working set: */
	
bool			data_reads;
	
bool			data_writes;
	
bool			data_backwards;
	
bool			data_zero_memset;
	
bool			data_rand_walk;
	
u32			nr_loops;
	
u32			nr_secs;
	
u32			sleep_usecs;

	/* Working set initialization: */
	
bool			init_zero;
	
bool			init_random;
	
bool			init_cpu0;

	/* Misc options: */
	
int			show_details;
	
int			run_all;
	
int			thp;

	
long			bytes_global;
	
long			bytes_process;
	
long			bytes_process_locked;
	
long			bytes_thread;

	
int			nr_tasks;
	
bool			show_quiet;

	
bool			show_convergence;
	
bool			measure_convergence;

	
int			perturb_secs;
	
int			nr_cpus;
	
int			nr_nodes;

	/* Affinity options -C and -N: */
	
char			*cpu_list_str;
	
char			*node_list_str;
};


/* Global, read-writable area, accessible to all processes and threads: */


struct global_info {
	
u8			*data;

	
pthread_mutex_t		startup_mutex;
	
int			nr_tasks_started;

	
pthread_mutex_t		startup_done_mutex;

	
pthread_mutex_t		start_work_mutex;
	
int			nr_tasks_working;

	
pthread_mutex_t		stop_work_mutex;
	
u64			bytes_done;

	
struct thread_data	*threads;

	/* Convergence latency measurement: */
	
bool			all_converged;
	
bool			stop_work;

	
int			print_once;

	
struct params		p;
};


static struct global_info	*g = NULL;

static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);


struct params p0;


static const struct option options[] = {
	OPT_INTEGER('p', "nr_proc"	, &p0.nr_proc,		"number of processes"),
	OPT_INTEGER('t', "nr_threads"	, &p0.nr_threads,	"number of threads per process"),

	OPT_STRING('G', "mb_global"	, &p0.mb_global_str,	"MB", "global  memory (MBs)"),
	OPT_STRING('P', "mb_proc"	, &p0.mb_proc_str,	"MB", "process memory (MBs)"),
	OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
	OPT_STRING('T', "mb_thread"	, &p0.mb_thread_str,	"MB", "thread  memory (MBs)"),

	OPT_UINTEGER('l', "nr_loops"	, &p0.nr_loops,		"max number of loops to run (default: unlimited)"),
	OPT_UINTEGER('s', "nr_secs"	, &p0.nr_secs,		"max number of seconds to run (default: 5 secs)"),
	OPT_UINTEGER('u', "usleep"	, &p0.sleep_usecs,	"usecs to sleep per loop iteration"),

	OPT_BOOLEAN('R', "data_reads"	, &p0.data_reads,	"access the data via writes (can be mixed with -W)"),
	OPT_BOOLEAN('W', "data_writes"	, &p0.data_writes,	"access the data via writes (can be mixed with -R)"),
	OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards,	"access the data backwards as well"),
	OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
	OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk,	"access the data with random (32bit LFSR) walk"),


	OPT_BOOLEAN('z', "init_zero"	, &p0.init_zero,	"bzero the initial allocations"),
	OPT_BOOLEAN('I', "init_random"	, &p0.init_random,	"randomize the contents of the initial allocations"),
	OPT_BOOLEAN('0', "init_cpu0"	, &p0.init_cpu0,	"do the initial allocations on CPU#0"),
	OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs,	"perturb thread 0/0 every X secs, to test convergence stability"),

	OPT_INCR   ('d', "show_details"	, &p0.show_details,	"Show details"),
	OPT_INCR   ('a', "all"		, &p0.run_all,		"Run all tests in the suite"),
	OPT_INTEGER('H', "thp"		, &p0.thp,		"MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
	OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
	OPT_BOOLEAN('m', "measure_convergence",	&p0.measure_convergence, "measure convergence latency"),
	OPT_BOOLEAN('q', "quiet"	, &p0.show_quiet,	"quiet mode"),
	OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),

	/* Special option string parsing callbacks: */
        OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
			"bind the first N tasks to these specific cpus (the rest is unbound)",
			parse_cpus_opt),
        OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
			"bind the first N tasks to these specific memory nodes (the rest is unbound)",
			parse_nodes_opt),
	OPT_END()
};


static const char * const bench_numa_usage[] = {
	"perf bench numa <options>",
	NULL
};


static const char * const numa_usage[] = {
	"perf bench numa mem [<options>]",
	NULL
};


static cpu_set_t bind_to_cpu(int target_cpu) { cpu_set_t orig_mask, mask; int ret; ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); BUG_ON(ret); CPU_ZERO(&mask); if (target_cpu == -1) { int cpu; for (cpu = 0; cpu < g->p.nr_cpus; cpu++) CPU_SET(cpu, &mask); } else { BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus); CPU_SET(target_cpu, &mask); } ret = sched_setaffinity(0, sizeof(mask), &mask); BUG_ON(ret); return orig_mask; }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar129100.00%1100.00%
Total129100.00%1100.00%


static cpu_set_t bind_to_node(int target_node) { int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes; cpu_set_t orig_mask, mask; int cpu; int ret; BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus); BUG_ON(!cpus_per_node); ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); BUG_ON(ret); CPU_ZERO(&mask); if (target_node == -1) { for (cpu = 0; cpu < g->p.nr_cpus; cpu++) CPU_SET(cpu, &mask); } else { int cpu_start = (target_node + 0) * cpus_per_node; int cpu_stop = (target_node + 1) * cpus_per_node; BUG_ON(cpu_stop > g->p.nr_cpus); for (cpu = cpu_start; cpu < cpu_stop; cpu++) CPU_SET(cpu, &mask); } ret = sched_setaffinity(0, sizeof(mask), &mask); BUG_ON(ret); return orig_mask; }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar198100.00%1100.00%
Total198100.00%1100.00%


static void bind_to_cpumask(cpu_set_t mask) { int ret; ret = sched_setaffinity(0, sizeof(mask), &mask); BUG_ON(ret); }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar32100.00%1100.00%
Total32100.00%1100.00%


static void mempol_restore(void) { int ret; ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1); BUG_ON(ret); }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar33100.00%1100.00%
Total33100.00%1100.00%


static void bind_to_memnode(int node) { unsigned long nodemask; int ret; if (node == -1) return; BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); nodemask = 1L << node; ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8); dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret); BUG_ON(ret); }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar8097.56%150.00%
jakub jelenjakub jelen22.44%150.00%
Total82100.00%2100.00%

#define HPSIZE (2*1024*1024) #define set_taskname(fmt...) \ do { \ char name[20]; \ \ snprintf(name, 20, fmt); \ prctl(PR_SET_NAME, name); \ } while (0)
static u8 *alloc_data(ssize_t bytes0, int map_flags, int init_zero, int init_cpu0, int thp, int init_random) { cpu_set_t orig_mask; ssize_t bytes; u8 *buf; int ret; if (!bytes0) return NULL; /* Allocate and initialize all memory on CPU#0: */ if (init_cpu0) { orig_mask = bind_to_node(0); bind_to_memnode(0); } bytes = bytes0 + HPSIZE; buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0); BUG_ON(buf == (void *)-1); if (map_flags == MAP_PRIVATE) { if (thp > 0) { ret = madvise(buf, bytes, MADV_HUGEPAGE); if (ret && !g->print_once) { g->print_once = 1; printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n"); } } if (thp < 0) { ret = madvise(buf, bytes, MADV_NOHUGEPAGE); if (ret && !g->print_once) { g->print_once = 1; printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n"); } } } if (init_zero) { bzero(buf, bytes); } else { /* Initialize random contents, different in each word: */ if (init_random) { u64 *wbuf = (void *)buf; long off = rand(); long i; for (i = 0; i < bytes/8; i++) wbuf[i] = i + off; } } /* Align to 2MB boundary: */ buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1)); /* Restore affinity: */ if (init_cpu0) { bind_to_cpumask(orig_mask); mempol_restore(); } return buf; }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar311100.00%1100.00%
Total311100.00%1100.00%


static void free_data(void *data, ssize_t bytes) { int ret; if (!data) return; ret = munmap(data, bytes); BUG_ON(ret); }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar36100.00%1100.00%
Total36100.00%1100.00%

/* * Create a shared memory buffer that can be shared between processes, zeroed: */
static void * zalloc_shared_data(ssize_t bytes) { return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random); }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar38100.00%1100.00%
Total38100.00%1100.00%

/* * Create a shared memory buffer that can be shared between processes: */
static void * setup_shared_data(ssize_t bytes) { return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar38100.00%1100.00%
Total38100.00%1100.00%

/* * Allocate process-local memory - this will either be shared between * threads of this process, or only be accessed by this thread: */
static void * setup_private_data(ssize_t bytes) { return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar38100.00%1100.00%
Total38100.00%1100.00%

/* * Return a process-shared (global) mutex: */
static void init_global_mutex(pthread_mutex_t *mutex) { pthread_mutexattr_t attr; pthread_mutexattr_init(&attr); pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); pthread_mutex_init(mutex, &attr); }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar35100.00%1100.00%
Total35100.00%1100.00%


static int parse_cpu_list(const char *arg) { p0.cpu_list_str = strdup(arg); dprintf("got CPU list: {%s}\n", p0.cpu_list_str); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar32100.00%1100.00%
Total32100.00%1100.00%


static int parse_setup_cpu_list(void) { struct thread_data *td; char *str0, *str; int t; if (!g->p.cpu_list_str) return 0; dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); str0 = str = strdup(g->p.cpu_list_str); t = 0; BUG_ON(!str); tprintf("# binding tasks to CPUs:\n"); tprintf("# "); while (true) { int bind_cpu, bind_cpu_0, bind_cpu_1; char *tok, *tok_end, *tok_step, *tok_len, *tok_mul; int bind_len; int step; int mul; tok = strsep(&str, ","); if (!tok) break; tok_end = strstr(tok, "-"); dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end); if (!tok_end) { /* Single CPU specified: */ bind_cpu_0 = bind_cpu_1 = atol(tok); } else { /* CPU range specified (for example: "5-11"): */ bind_cpu_0 = atol(tok); bind_cpu_1 = atol(tok_end + 1); } step = 1; tok_step = strstr(tok, "#"); if (tok_step) { step = atol(tok_step + 1); BUG_ON(step <= 0 || step >= g->p.nr_cpus); } /* * Mask length. * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4', * where the _4 means the next 4 CPUs are allowed. */ bind_len = 1; tok_len = strstr(tok, "_"); if (tok_len) { bind_len = atol(tok_len + 1); BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus); } /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */ mul = 1; tok_mul = strstr(tok, "x"); if (tok_mul) { mul = atol(tok_mul + 1); BUG_ON(mul <= 0); } dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul); if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) { printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus); return -1; } BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0); BUG_ON(bind_cpu_0 > bind_cpu_1); for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) { int i; for (i = 0; i < mul; i++) { int cpu; if (t >= g->p.nr_tasks) { printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu); goto out; } td = g->threads + t; if (t) tprintf(","); if (bind_len > 1) { tprintf("%2d/%d", bind_cpu, bind_len); } else { tprintf("%2d", bind_cpu); } CPU_ZERO(&td->bind_cpumask); for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) { BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus); CPU_SET(cpu, &td->bind_cpumask); } t++; } } } out: tprintf("\n"); if (t < g->p.nr_tasks) printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t); free(str0); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar54093.91%150.00%
petr holasekpetr holasek356.09%150.00%
Total575100.00%2100.00%


static int parse_cpus_opt(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused) { if (!arg) return -1; return parse_cpu_list(arg); }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar37100.00%1100.00%
Total37100.00%1100.00%


static int parse_node_list(const char *arg) { p0.node_list_str = strdup(arg); dprintf("got NODE list: {%s}\n", p0.node_list_str); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
ingo molnaringo molnar32100.00%1100.00%
Total32100.00%1100.00%


static int parse_setup_node_list(void) { struct thread_data *td; char *str0, *str; int t; if (!g->p.node_list_str) return 0; dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); str0 = str = strdup(g->p.node_list_str); t = 0; BUG_ON(!str); tprintf("# binding tasks to NODEs:\n"); tprintf("# "); while (true) { int bind_node, bind_node_0, bind_node_1; char *tok, *tok_end, *tok_step, *tok_mul; int step; int mul; tok = strsep(&str, ","); if (!tok) break; tok_end = strstr(tok, "-"); dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end); if (!tok_end) { /* Single NODE specified: */ bind_node_0 = bind_node_1 = atol(tok); } else { /* NODE range specified (for example: "5-11"): */ bind_node_0 = atol(tok); bind_node_1 = atol(tok_end + 1); } step = 1; tok_step = strstr(tok, "#"); if (tok_step) { step = atol(tok_step + 1); BUG_ON(step <= 0 || step >= g->p.nr_nodes); } /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */ mul = 1; tok_mul = strstr(tok, "x"); if (tok_mul) { mul = atol(tok_mul + 1); BUG_ON(mul <= 0); } dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step); if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) { printf