cregit-Linux how code gets into the kernel

Release 4.7 kernel/cpuset.c

Directory: kernel
/*
 *  kernel/cpuset.c
 *
 *  Processor and Memory placement constraints for sets of tasks.
 *
 *  Copyright (C) 2003 BULL SA.
 *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
 *  Copyright (C) 2006 Google, Inc
 *
 *  Portions derived from Patrick Mochel's sysfs code.
 *  sysfs is Copyright (c) 2001-3 Patrick Mochel
 *
 *  2003-10-10 Written by Simon Derr.
 *  2003-10-22 Updates by Stephen Hemminger.
 *  2004 May-July Rework by Paul Jackson.
 *  2006 Rework by Paul Menage to use generic cgroups
 *  2008 Rework of the scheduler domains and CPU hotplug handling
 *       by Max Krasnyansky
 *
 *  This file is subject to the terms and conditions of the GNU General Public
 *  License.  See the file COPYING in the main directory of the Linux
 *  distribution for more details.
 */

#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/cpuset.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/mempolicy.h>
#include <linux/mm.h>
#include <linux/memory.h>
#include <linux/export.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/proc_fs.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/time.h>
#include <linux/time64.h>
#include <linux/backing-dev.h>
#include <linux/sort.h>

#include <asm/uaccess.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
#include <linux/cgroup.h>
#include <linux/wait.h>


DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);

/* See "Frequency meter" comments, below. */


struct fmeter {
	
int cnt;		/* unprocessed events count */
	
int val;		/* most recent output value */
	
time64_t time;		/* clock (secs) when val computed */
	
spinlock_t lock;	/* guards read or write of above */
};


struct cpuset {
	
struct cgroup_subsys_state css;

	
unsigned long flags;		/* "unsigned long" so bitops work */

	/*
         * On default hierarchy:
         *
         * The user-configured masks can only be changed by writing to
         * cpuset.cpus and cpuset.mems, and won't be limited by the
         * parent masks.
         *
         * The effective masks is the real masks that apply to the tasks
         * in the cpuset. They may be changed if the configured masks are
         * changed or hotplug happens.
         *
         * effective_mask == configured_mask & parent's effective_mask,
         * and if it ends up empty, it will inherit the parent's mask.
         *
         *
         * On legacy hierachy:
         *
         * The user-configured masks are always the same with effective masks.
         */

	/* user-configured CPUs and Memory Nodes allow to tasks */
	
cpumask_var_t cpus_allowed;
	
nodemask_t mems_allowed;

	/* effective CPUs and Memory Nodes allow to tasks */
	
cpumask_var_t effective_cpus;
	
nodemask_t effective_mems;

	/*
         * This is old Memory Nodes tasks took on.
         *
         * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
         * - A new cpuset's old_mems_allowed is initialized when some
         *   task is moved into it.
         * - old_mems_allowed is used in cpuset_migrate_mm() when we change
         *   cpuset.mems_allowed and have tasks' nodemask updated, and
         *   then old_mems_allowed is updated to mems_allowed.
         */
	
nodemask_t old_mems_allowed;

	
struct fmeter fmeter;		/* memory_pressure filter */

	/*
         * Tasks are being attached to this cpuset.  Used to prevent
         * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
         */
	
int attach_in_progress;

	/* partition number for rebuild_sched_domains() */
	
int pn;

	/* for custom sched domain */
	
int relax_domain_level;
};


static inline struct cpuset *css_cs(struct cgroup_subsys_state *css) { return css ? container_of(css, struct cpuset, css) : NULL; }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage2172.41%150.00%
tejun heotejun heo827.59%150.00%
Total29100.00%2100.00%

/* Retrieve the cpuset for a task */
static inline struct cpuset *task_cs(struct task_struct *task) { return css_cs(task_css(task, cpuset_cgrp_id)); }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage2288.00%125.00%
tejun heotejun heo312.00%375.00%
Total25100.00%4100.00%


static inline struct cpuset *parent_cs(struct cpuset *cs) { return css_cs(cs->css.parent); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo24100.00%3100.00%
Total24100.00%3100.00%

#ifdef CONFIG_NUMA
static inline bool task_has_mempolicy(struct task_struct *task) { return task->mempolicy; }

Contributors

PersonTokensPropCommitsCommitProp
david rientjesdavid rientjes17100.00%1100.00%
Total17100.00%1100.00%

#else
static inline bool task_has_mempolicy(struct task_struct *task) { return false; }

Contributors

PersonTokensPropCommitsCommitProp
david rientjesdavid rientjes15100.00%1100.00%
Total15100.00%1100.00%

#endif /* bits in struct cpuset flags field */ typedef enum { CS_ONLINE, CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, CS_MEM_HARDWALL, CS_MEMORY_MIGRATE, CS_SCHED_LOAD_BALANCE, CS_SPREAD_PAGE, CS_SPREAD_SLAB, } cpuset_flagbits_t; /* convenient tests for these bits */
static inline bool is_cpuset_online(const struct cpuset *cs) { return test_bit(CS_ONLINE, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_cpu_exclusive(const struct cpuset *cs) { return test_bit(CS_CPU_EXCLUSIVE, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_mem_exclusive(const struct cpuset *cs) { return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_mem_hardwall(const struct cpuset *cs) { return test_bit(CS_MEM_HARDWALL, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_sched_load_balance(const struct cpuset *cs) { return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_memory_migrate(const struct cpuset *cs) { return test_bit(CS_MEMORY_MIGRATE, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_spread_page(const struct cpuset *cs) { return test_bit(CS_SPREAD_PAGE, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_spread_slab(const struct cpuset *cs) { return test_bit(CS_SPREAD_SLAB, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson24100.00%1100.00%
Total24100.00%1100.00%

static struct cpuset top_cpuset = { .flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), }; /** * cpuset_for_each_child - traverse online children of a cpuset * @child_cs: loop cursor pointing to the current child * @pos_css: used for iteration * @parent_cs: target cpuset to walk children of * * Walk @child_cs through the online children of @parent_cs. Must be used * with RCU read locked. */ #define cpuset_for_each_child(child_cs, pos_css, parent_cs) \ css_for_each_child((pos_css), &(parent_cs)->css) \ if (is_cpuset_online(((child_cs) = css_cs((pos_css))))) /** * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants * @des_cs: loop cursor pointing to the current descendant * @pos_css: used for iteration * @root_cs: target cpuset to walk ancestor of * * Walk @des_cs through the online descendants of @root_cs. Must be used * with RCU read locked. The caller may modify @pos_css by calling * css_rightmost_descendant() to skip subtree. @root_cs is included in the * iteration and the first node to be visited. */ #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs) \ css_for_each_descendant_pre((pos_css), &(root_cs)->css) \ if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) /* * There are two global locks guarding cpuset structures - cpuset_mutex and * callback_lock. We also require taking task_lock() when dereferencing a * task's cpuset pointer. See "The task_lock() exception", at the end of this * comment. * * A task must hold both locks to modify cpusets. If a task holds * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it * is the only task able to also acquire callback_lock and be able to * modify cpusets. It can perform various checks on the cpuset structure * first, knowing nothing will change. It can also allocate memory while * just holding cpuset_mutex. While it is performing these checks, various * callback routines can briefly acquire callback_lock to query cpusets. * Once it is ready to make the changes, it takes callback_lock, blocking * everyone else. * * Calls to the kernel memory allocator can not be made while holding * callback_lock, as that would risk double tripping on callback_lock * from one of the callbacks into the cpuset code from within * __alloc_pages(). * * If a task is only holding callback_lock, then it has read-only * access to cpusets. * * Now, the task_struct fields mems_allowed and mempolicy may be changed * by other task, we use alloc_lock in the task_struct fields to protect * them. * * The cpuset_common_file_read() handlers only hold callback_lock across * small pieces of code, such as when reading out possibly multi-word * cpumasks and nodemasks. * * Accessing a task's cpuset should be done in accordance with the * guidelines for accessing subsystem state in kernel/cgroup.c */ static DEFINE_MUTEX(cpuset_mutex); static DEFINE_SPINLOCK(callback_lock); static struct workqueue_struct *cpuset_migrate_mm_wq; /* * CPU / memory hotplug is handled asynchronously. */ static void cpuset_hotplug_workfn(struct work_struct *work); static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); /* * This is ugly, but preserves the userspace API for existing cpuset * users. If someone tries to mount the "cpuset" filesystem, we * silently switch it to mount "cgroup" instead */
static struct dentry *cpuset_mount(struct file_system_type *fs_type, int flags, const char *unused_dev_name, void *data) { struct file_system_type *cgroup_fs = get_fs_type("cgroup"); struct dentry *ret = ERR_PTR(-ENODEV); if (cgroup_fs) { char mountopts[] = "cpuset,noprefix," "release_agent=/sbin/cpuset_release_agent"; ret = cgroup_fs->mount(cgroup_fs, flags, unused_dev_name, mountopts); put_filesystem(cgroup_fs); } return ret; }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage6275.61%133.33%
al viroal viro1113.41%133.33%
paul jacksonpaul jackson910.98%133.33%
Total82100.00%3100.00%

static struct file_system_type cpuset_fs_type = { .name = "cpuset", .mount = cpuset_mount, }; /* * Return in pmask the portion of a cpusets's cpus_allowed that * are online. If none are online, walk up the cpuset hierarchy * until we find one that does have some online cpus. The top * cpuset always has some cpus online. * * One way or another, we guarantee to return some non-empty subset * of cpu_online_mask. * * Call with callback_lock or cpuset_mutex held. */
static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) { while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) cs = parent_cs(cs); cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage2247.83%116.67%
paul jacksonpaul jackson1328.26%116.67%
li zefanli zefan817.39%350.00%
tejun heotejun heo36.52%116.67%
Total46100.00%6100.00%

/* * Return in *pmask the portion of a cpusets's mems_allowed that * are online, with memory. If none are online with memory, walk * up the cpuset hierarchy until we find one that does have some * online mems. The top cpuset always has some mems online. * * One way or another, we guarantee to return some non-empty subset * of node_states[N_MEMORY]. * * Call with callback_lock or cpuset_mutex held. */
static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) { while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY])) cs = parent_cs(cs); nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]); }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage2344.23%120.00%
paul jacksonpaul jackson2242.31%120.00%
tejun heotejun heo35.77%120.00%
li zefanli zefan23.85%120.00%
lai jiangshanlai jiangshan23.85%120.00%
Total52100.00%5100.00%

/* * update task's spread flag if cpuset's page/slab spread flag is set * * Call with callback_lock or cpuset_mutex held. */
static void cpuset_update_task_spread_flag(struct cpuset *cs, struct task_struct *tsk) { if (is_spread_page(cs)) task_set_spread_page(tsk); else task_clear_spread_page(tsk); if (is_spread_slab(cs)) task_set_spread_slab(tsk); else task_clear_spread_slab(tsk); }

Contributors

PersonTokensPropCommitsCommitProp
miao xiemiao xie4076.92%150.00%
li zefanli zefan1223.08%150.00%
Total52100.00%2100.00%

/* * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q? * * One cpuset is a subset of another if all its allowed CPUs and * Memory Nodes are a subset of the other, and its exclusive flags * are only set if the other's are set. Call holding cpuset_mutex. */
static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) { return cpumask_subset(p->cpus_allowed, q->cpus_allowed) && nodes_subset(p->mems_allowed, q->mems_allowed) && is_cpu_exclusive(p) <= is_cpu_exclusive(q) && is_mem_exclusive(p) <= is_mem_exclusive(q); }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage3963.93%125.00%
paul jacksonpaul jackson2032.79%125.00%
dave hansendave hansen11.64%125.00%
li zefanli zefan11.64%125.00%
Total61100.00%4100.00%

/** * alloc_trial_cpuset - allocate a trial cpuset * @cs: the cpuset that the trial cpuset duplicates */
static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) { struct cpuset *trial; trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL); if (!trial) return NULL; if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) goto free_cs; if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL)) goto free_cpus; cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); cpumask_copy(trial->effective_cpus, cs->effective_cpus); return trial; free_cpus: free_cpumask_var(trial->cpus_allowed); free_cs: kfree(trial); return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
li zefanli zefan117100.00%3100.00%
Total117100.00%3100.00%

/** * free_trial_cpuset - free the trial cpuset * @trial: the trial cpuset to be freed */
static void free_trial_cpuset(struct cpuset *trial) { free_cpumask_var(trial->effective_cpus); free_cpumask_var(trial->cpus_allowed); kfree(trial); }

Contributors

PersonTokensPropCommitsCommitProp
li zefanli zefan30100.00%3100.00%
Total30100.00%3100.00%

/* * validate_change() - Used to validate that any proposed cpuset change * follows the structural rules for cpusets. * * If we replaced the flag and mask values of the current cpuset * (cur) with those values in the trial cpuset (trial), would * our various subset and exclusive rules still be valid? Presumes * cpuset_mutex held. * * 'cur' is the address of an actual, in-use cpuset. Operations * such as list traversal that depend on the actual address of the * cpuset in the list must use cur below, not trial. * * 'trial' is the address of bulk structure copy of cur, with * perhaps one or more of the fields cpus_allowed, mems_allowed, * or flags changed to new, trial values. * * Return 0 if valid, -errno if not. */
static int validate_change(struct cpuset *cur, struct cpuset *trial) { struct cgroup_subsys_state *css; struct cpuset *c, *par; int ret; rcu_read_lock(); /* Each of our child cpusets must be a subset of us */ ret = -EBUSY; cpuset_for_each_child(c, css, cur) if (!is_cpuset_subset(c, trial)) goto out; /* Remaining checks don't apply to root cpuset */ ret = 0; if (cur == &top_cpuset) goto out; par = parent_cs(cur); /* On legacy hiearchy, we must be a subset of our parent cpuset. */ ret = -EACCES; if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && !is_cpuset_subset(trial, par)) goto out; /* * If either I or some sibling (!= me) is exclusive, we can't * overlap */ ret = -EINVAL; cpuset_for_each_child(c, css, par) { if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && c != cur && cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) goto out; if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && c != cur && nodes_intersects(trial->mems_allowed, c->mems_allowed)) goto out; } /* * Cpusets with tasks - existing or newly being attached - can't * be changed to have empty cpus_allowed or mems_allowed. */ ret = -ENOSPC; if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) { if (!cpumask_empty(cur->cpus_allowed) && cpumask_empty(trial->cpus_allowed)) goto out; if (!nodes_empty(cur->mems_allowed) && nodes_empty(trial->mems_allowed)) goto out; } /* * We can't shrink if we won't have enough room for SCHED_DEADLINE * tasks. */ ret = -EBUSY; if (is_cpu_exclusive(cur) && !cpuset_cpumask_can_shrink(cur->cpus_allowed, trial->cpus_allowed)) goto out; ret = 0; out: rcu_read_unlock(); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson8729.59%317.65%
tejun heotejun heo8227.89%635.29%
paul menagepaul menage6020.41%211.76%
li zefanli zefan3411.56%423.53%
juri lellijuri lelli289.52%15.88%
david howellsdavid howells31.02%15.88%
Total294100.00%17100.00%

#ifdef CONFIG_SMP /* * Helper routine for generate_sched_domains(). * Do cpusets a, b have overlapping effective cpus_allowed masks? */
static int cpusets_overlap(struct cpuset *a, struct cpuset *b) { return cpumask_intersects(a->effective_cpus, b->effective_cpus); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson2589.29%133.33%
li zefanli zefan310.71%266.67%
Total28100.00%3100.00%


static void update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) { if (dattr->relax_domain_level < c->relax_domain_level) dattr->relax_domain_level = c->relax_domain_level; return; }

Contributors

PersonTokensPropCommitsCommitProp
hidetoshi setohidetoshi seto35100.00%1100.00%
Total35100.00%1100.00%


static void update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *root_cs) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { /* skip the whole subtree if @cp doesn't have any CPU */ if (cpumask_empty(cp->cpus_allowed)) { pos_css = css_rightmost_descendant(pos_css); continue; } if (is_sched_load_balance(cp)) update_domain_attr(dattr, cp); } rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
lai jiangshanlai jiangshan5167.11%120.00%
tejun heotejun heo2431.58%360.00%
li zefanli zefan11.32%120.00%
Total76100.00%5100.00%

/* * generate_sched_domains() * * This function builds a partial partition of the systems CPUs * A 'partial partition' is a set of non-overlapping subsets whose * union is a subset of that set. * The output of this function needs to be passed to kernel/sched/core.c * partition_sched_domains() routine, which will rebuild the scheduler's * load balancing domains (sched domains) as specified by that partial * partition. * * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt * for a background explanation of this. * * Does not return errors, on the theory that the callers of this * routine would rather not worry about failures to rebuild sched * domains when operating in the severe memory shortage situations * that could cause allocation failures below. * * Must be called with cpuset_mutex held. * * The three key local variables below are: * q - a linked-list queue of cpuset pointers, used to implement a * top-down scan of all cpusets. This scan loads a pointer * to each cpuset marked is_sched_load_balance into the * array 'csa'. For our purposes, rebuilding the schedulers * sched domains, we can ignore !is_sched_load_balance cpusets. * csa - (for CpuSet Array) Array of pointers to all the cpusets * that need to be load balanced, for convenient iterative * access by the subsequent code that finds the best partition, * i.e the set of domains (subsets) of CPUs such that the * cpus_allowed of every cpuset marked is_sched_load_balance * is a subset of one of these domains, while there are as * many such domains as possible, each as small as possible. * doms - Conversion of 'csa' to an array of cpumasks, for passing to * the kernel/sched/core.c routine partition_sched_domains() in a * convenient format, that can be easily compared to the prior * value to determine what partition elements (sched domains) * were changed (added or removed.) * * Finding the best partition (set of domains): * The triple nested loops below over i, j, k scan over the * load balanced cpusets (using the array of cpuset pointers in * csa[]) looking for pairs of cpusets that have overlapping * cpus_allowed, but which don't have the same 'pn' partition * number and gives them in the same partition number. It keeps * looping on the 'restart' label until it can no longer find * any such pairs. * * The union of the cpus_allowed masks from the set of * all cpusets having the same 'pn' value then form the one * element of the partition (one sched domain) to be passed to * partition_sched_domains(). */
static int generate_sched_domains(cpumask_var_t **domains, struct sched_domain_attr **attributes) { struct cpuset *cp; /* scans q */ struct cpuset **csa; /* array of all cpuset ptrs */ int csn; /* how many cpuset ptrs in csa so far */ int i, j, k; /* indices for partition finding loops */ cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ cpumask_var_t non_isolated_cpus; /* load balanced CPUs */ struct sched_domain_attr *dattr; /* attributes for custom domains */ int ndoms = 0; /* number of sched domains in result */ int nslot; /* next empty doms[] struct cpumask slot */ struct cgroup_subsys_state *pos_css; doms = NULL; dattr = NULL; csa = NULL; if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL)) goto done; cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); /* Special case for the 99% of systems with one, full, sched domain */ if (is_sched_load_balance(&top_cpuset)) { ndoms = 1; doms = alloc_sched_domains(ndoms); if (!doms) goto done; dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); if (dattr) { *dattr = SD_ATTR_INIT; update_domain_attr_tree(dattr, &top_cpuset); } cpumask_and(doms[0], top_cpuset.effective_cpus, non_isolated_cpus); goto done; } csa = kmalloc(nr_cpusets() * sizeof(cp), GFP_KERNEL); if (!csa) goto done; csn = 0; rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) { if (cp == &top_cpuset) continue; /* * Continue traversing beyond @cp iff @cp has some CPUs and * isn't load balancing. The former is obvious. The * latter: All child cpusets contain a subset of the * parent's cpus, so just skip them, and then we call * update_domain_attr_tree() to calc relax_domain_level of * the corresponding sched domain. */ if (!cpumask_empty(cp->cpus_allowed) && !(is_sched_load_balance(cp) && cpumask_intersects(cp->cpus_allowed, non_isolated_cpus))) continue; if (is_sched_load_balance(cp)) csa[csn++] = cp; /* skip @cp's subtree */ pos_css = css_rightmost_descendant(pos_css); } rcu_read_unlock(); for (i = 0; i < csn; i++) csa[i]->pn = i; ndoms = csn; restart: /* Find the best partition (set of sched domains) */ for (i = 0; i < csn; i++) { struct cpuset *a = csa[i]; int apn = a->pn; for (j = 0; j < csn; j++) { struct cpuset *b = csa[j]; int bpn = b->pn; if (apn != bpn && cpusets_overlap(a, b)) { for (k = 0; k < csn; k++) { struct cpuset *c = csa[k]; if (c->pn == bpn) c->pn = apn; } ndoms--; /* one less element */ goto restart; } } } /* * Now we know how many domains to create. * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. */ doms = alloc_sched_domains(ndoms); if (!doms) goto done; /* * The rest of the code, including the scheduler, can deal with * dattr==NULL case. No need to abort if alloc fails. */ dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL); for (nslot = 0, i = 0; i < csn; i++) { struct cpuset *a = csa[i]; struct cpumask *dp; int apn = a->pn; if (apn < 0) { /* Skip completed partitions */ continue; } dp = doms[nslot]; if (nslot == ndoms) { static int warnings = 10; if (warnings) { pr_warn("rebuild_sched_domains confused: nslot %d, ndoms %d, csn %d, i %d, apn %d\n", nslot, ndoms, csn, i, apn); warnings--; } continue; } cpumask_clear(dp); if (dattr) *(dattr + nslot) = SD_ATTR_INIT; for (j = i; j < csn; j++) { struct cpuset *b = csa[j]; if (apn == b->pn) { cpumask_or(dp, dp, b->effective_cpus); cpumask_and(dp, dp, non_isolated_cpus); if (dattr) update_domain_attr_tree(dattr + nslot, b); /* Done with this partition */ b->pn = -1; } } nslot++; } BUG_ON(nslot != ndoms); done: free_cpumask_var(non_isolated_cpus); kfree(csa); /* * Fallback to the default domain if kmalloc() failed. * See comments in partition_sched_domains(). */ if (doms == NULL) ndoms = 1; *domains = doms; *attributes = dattr; return ndoms; }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson42359.41%14.55%
hidetoshi setohidetoshi seto7510.53%14.55%
maksim krasnyanskiymaksim krasnyanskiy588.15%14.55%
rik van rielrik van riel557.72%14.55%
tejun heotejun heo354.92%418.18%
li zefanli zefan273.79%627.27%
rusty russellrusty russell141.97%14.55%
lai jiangshanlai jiangshan101.40%29.09%
miao xiemiao xie60.84%14.55%
paul menagepaul menage30.42%14.55%
fabian frederickfabian frederick20.28%14.55%
mel gormanmel gorman20.28%14.55%
ingo molnaringo molnar20.28%14.55%
Total712100.00%22100.00%

/* * Rebuild scheduler domains. * * If the flag 'sched_load_balance' of any cpuset with non-empty * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset * which has that flag enabled, or if any cpuset with a non-empty * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * * Call with cpuset_mutex held. Takes get_online_cpus(). */
static void rebuild_sched_domains_locked(void) { struct sched_domain_attr *attr; cpumask_var_t *doms; int ndoms; lockdep_assert_held(&cpuset_mutex); get_online_cpus(); /* * We have raced with CPU hotplug. Don't do anything to avoid * passing doms with offlined cpu to partition_sched_domains(). * Anyways, hotplug work item will rebuild sched domains. */ if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) goto out; /* Generate domain masks and attrs */ ndoms = generate_sched_domains(&doms, &attr); /* Have scheduler rebuild the domains */ partition_sched_domains(ndoms, doms, attr); out: put_online_cpus(); }

Contributors

PersonTokensPropCommitsCommitProp
maksim krasnyanskiymaksim krasnyanskiy3143.06%19.09%
li zefanli zefan1825.00%218.18%
paul jacksonpaul jackson1013.89%218.18%
tejun heotejun heo79.72%218.18%
gautham r shenoygautham r shenoy22.78%19.09%
paul menagepaul menage22.78%19.09%
rusty russellrusty russell11.39%19.09%
hidetoshi setohidetoshi seto11.39%19.09%
Total72100.00%11100.00%

#else /* !CONFIG_SMP */
static void rebuild_sched_domains_locked(void) { }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage571.43%150.00%
tejun heotejun heo228.57%150.00%
Total7100.00%2100.00%

#endif /* CONFIG_SMP */
void rebuild_sched_domains(void) { mutex_lock(&cpuset_mutex); rebuild_sched_domains_locked(); mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
maksim krasnyanskiymaksim krasnyanskiy1150.00%120.00%
tejun heotejun heo836.36%240.00%
paul jacksonpaul jackson313.64%240.00%
Total22100.00%5100.00%

/** * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed * * Iterate through each task of @cs updating its cpus_allowed to the * effective cpuset's. As this function is called with cpuset_mutex held, * cpuset membership stays stable. */
static void update_tasks_cpumask(struct cpuset *cs) { struct css_task_iter it; struct task_struct *task; css_task_iter_start(&cs->css, &it); while ((task = css_task_iter_next(&it))) set_cpus_allowed_ptr(task, cs->effective_cpus); css_task_iter_end(&it); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo4272.41%333.33%
miao xiemiao xie610.34%111.11%
li zefanli zefan610.34%333.33%
cliff wickmancliff wickman35.17%111.11%
adrian bunkadrian bunk11.72%111.11%
Total58100.00%9100.00%

/* * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree * @cs: the cpuset to consider * @new_cpus: temp variable for calculating new effective_cpus * * When congifured cpumask is changed, the effective cpumasks of this cpuset * and all its descendants need to be updated. * * On legacy hierachy, effective_cpus will be the same with cpu_allowed. * * Called with cpuset_mutex held */
static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; bool need_rebuild_sched_domains = false; rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, cs) { struct cpuset *parent = parent_cs(cp); cpumask_and(new_cpus, cp->cpus_allowed, parent->effective_cpus); /* * If it becomes empty, inherit the effective mask of the * parent, which is guaranteed to have some CPUs. */ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && cpumask_empty(new_cpus)) cpumask_copy(new_cpus, parent->effective_cpus); /* Skip the whole subtree if the cpumask remains the same. */ if (cpumask_equal(new_cpus, cp->effective_cpus)) { pos_css = css_rightmost_descendant(pos_css); continue; } if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); spin_lock_irq(&callback_lock); cpumask_copy(cp->effective_cpus, new_cpus); spin_unlock_irq(&callback_lock); WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); update_tasks_cpumask(cp); /* * If the effective cpumask of any non-empty cpuset is changed, * we need to rebuild sched domains. */ if (!cpumask_empty(cp->cpus_allowed) && is_sched_load_balance(cp)) need_rebuild_sched_domains = true; rcu_read_lock(); css_put(&cp->css); } rcu_read_unlock(); if (need_rebuild_sched_domains) rebuild_sched_domains_locked(); }

Contributors

PersonTokensPropCommitsCommitProp
li zefanli zefan17883.18%640.00%
tejun heotejun heo157.01%426.67%
paul jacksonpaul jackson104.67%16.67%
paul menagepaul menage62.80%213.33%
vladimir davydovvladimir davydov41.87%16.67%
cliff wickmancliff wickman10.47%16.67%
Total214100.00%15100.00%

/** * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it * @cs: the cpuset to consider * @trialcs: trial cpuset * @buf: buffer of cpu numbers written to this cpuset */
static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, const char *buf) { int retval; /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ if (cs == &top_cpuset) return -EACCES; /* * An empty cpus_allowed is ok only if the cpuset has no tasks. * Since cpulist_parse() fails on an empty mask, we special case * that parsing. The validate_change() call ensures that cpusets * with tasks have cpus. */ if (!*buf) { cpumask_clear(trialcs->cpus_allowed); } else { retval = cpulist_parse(buf, trialcs->cpus_allowed); if (retval < 0) return retval; if (!cpumask_subset(trialcs->cpus_allowed, top_cpuset.cpus_allowed)) return -EINVAL; } /* Nothing to do if the cpus didn't change */ if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) return 0; retval = validate_change(cs, trialcs); if (retval < 0) return retval; spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); spin_unlock_irq(&callback_lock); /* use trialcs->cpus_allowed as a temp variable */ update_cpumasks_hier(cs, trialcs->cpus_allowed); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage7243.90%317.65%
paul jacksonpaul jackson4326.22%423.53%
li zefanli zefan2817.07%529.41%
lai jiangshanlai jiangshan137.93%15.88%
vladimir davydovvladimir davydov42.44%15.88%
cliff wickmancliff wickman21.22%15.88%
miao xiemiao xie10.61%15.88%
rusty russellrusty russell10.61%15.88%
Total164100.00%17100.00%

/* * Migrate memory region from one set of nodes to another. This is * performed asynchronously as it can be called from process migration path * holding locks involved in process management. All mm migrations are * performed in the queued order and can be waited for by flushing * cpuset_migrate_mm_wq. */ struct cpuset_migrate_mm_work { struct work_struct work; struct mm_struct *mm; nodemask_t from; nodemask_t to; };
static void cpuset_migrate_mm_workfn(struct work_struct *work) { struct cpuset_migrate_mm_work *mwork = container_of(work, struct cpuset_migrate_mm_work, work); /* on a wq worker, no need to worry about %current's mems_allowed */ do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL); mmput(mwork->mm); kfree(mwork); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo58100.00%1100.00%
Total58100.00%1100.00%


static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to) { struct cpuset_migrate_mm_work *mwork; mwork = kzalloc(sizeof(*mwork), GFP_KERNEL); if (mwork) { mwork->mm = mm; mwork->from = *from; mwork->to = *to; INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn); queue_work(cpuset_migrate_mm_wq, &mwork->work); } else { mmput(mm); } }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo5559.14%125.00%
paul menagepaul menage2324.73%125.00%
paul jacksonpaul jackson1516.13%250.00%
Total93100.00%4100.00%


static void cpuset_post_attach(void) { flush_workqueue(cpuset_migrate_mm_wq); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo1184.62%250.00%
miao xiemiao xie17.69%125.00%
li zefanli zefan17.69%125.00%
Total13100.00%4100.00%

/* * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy * @tsk: the task to change * @newmems: new nodes that the task will be set * * In order to avoid seeing no nodes if the old and new nodes are disjoint, * we structure updates as setting all new allowed nodes, then clearing newly * disallowed ones. */
static void cpuset_change_task_nodemask(struct task_struct *tsk, nodemask_t *newmems) { bool need_loop; /* * Allow tasks that have access to memory reserves because they have * been OOM killed to get memory anywhere. */ if (unlikely(test_thread_flag(TIF_MEMDIE))) return; if (current->flags & PF_EXITING) /* Let dying task have memory */ return; task_lock(tsk); /* * Determine if a loop is necessary if another thread is doing * read_mems_allowed_begin(). If at least one node remains unchanged and * tsk does not have a mempolicy, then an empty nodemask will not be * possible when mems_allowed is larger than a word. */ need_loop = task_has_mempolicy(tsk) || !nodes_intersects(*newmems, tsk->mems_allowed); if (need_loop) { local_irq_disable(); write_seqcount_begin(&tsk->mems_allowed_seq); } nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); tsk->mems_allowed = *newmems; if (need_loop) { write_seqcount_end(&tsk->mems_allowed_seq); local_irq_enable(); } task_unlock(tsk); }

Contributors

PersonTokensPropCommitsCommitProp
miao xiemiao xie8257.75%330.00%
mel gormanmel gorman2517.61%220.00%
david rientjesdavid rientjes2114.79%220.00%
peter zijlstrapeter zijlstra107.04%110.00%
paul jacksonpaul jackson42.82%220.00%
Total142100.00%10100.00%

static void *cpuset_being_rebound; /** * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. * @cs: the cpuset in which each task's mems_allowed mask needs to be changed * * Iterate through each task of @cs updating its mems_allowed to the * effective cpuset's. As this function is called with cpuset_mutex held, * cpuset membership stays stable. */
static void update_tasks_nodemask(struct cpuset *cs) { static nodemask_t newmems; /* protected by cpuset_mutex */ struct css_task_iter it; struct task_struct *task; cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ guarantee_online_mems(cs, &newmems); /* * The mpol_rebind_mm() call takes mmap_sem, which we couldn't * take while holding tasklist_lock. Forks can happen - the * mpol_dup() cpuset_being_rebound check will catch such forks, * and rebind their vma mempolicies too. Because we still hold * the global cpuset_mutex, we know that no other rebind effort * will be contending for the global variable cpuset_being_rebound. * It's ok if we rebind the same mm twice; mpol_rebind_mm() * is idempotent. Also migrate pages in each mm to new nodes. */ css_task_iter_start(&cs->css, &it); while ((task = css_task_iter_next(&it))) { struct mm_struct *mm; bool migrate; cpuset_change_task_nodemask(task, &newmems); mm = get_task_mm(task); if (!mm) continue; migrate = is_memory_migrate(cs); mpol_rebind_mm(mm, &cs->mems_allowed); if (migrate) cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); else mmput(mm); } css_task_iter_end(&it); /* * All the tasks' nodemasks have been updated, update * cs->old_mems_allowed. */ cs->old_mems_allowed = newmems; /* We're done rebinding vmas to this cpuset's new mems_allowed. */ cpuset_being_rebound = NULL; }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo6039.74%325.00%
paul menagepaul menage3925.83%18.33%
li zefanli zefan2315.23%325.00%
paul jacksonpaul jackson2315.23%325.00%
miao xiemiao xie63.97%216.67%
Total151100.00%12100.00%

/* * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree * @cs: the cpuset to consider * @new_mems: a temp variable for calculating new effective_mems * * When configured nodemask is changed, the effective nodemasks of this cpuset * and all its descendants need to be updated. * * On legacy hiearchy, effective_mems will be the same with mems_allowed. * * Called with cpuset_mutex held */
static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, cs) { struct cpuset *parent = parent_cs(cp); nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); /* * If it becomes empty, inherit the effective mask of the * parent, which is guaranteed to have some MEMs. */ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && nodes_empty(*new_mems)) *new_mems = parent->effective_mems; /* Skip the whole subtree if the nodemask remains the same. */ if (nodes_equal(*new_mems, cp->effective_mems)) { pos_css = css_rightmost_descendant(pos_css); continue; } if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); spin_lock_irq(&callback_lock); cp->effective_mems = *new_mems; spin_unlock_irq(&callback_lock); WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && !nodes_equal(cp->mems_allowed, cp->effective_mems)); update_tasks_nodemask(cp); rcu_read_lock(); css_put(&cp->css); } rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
li zefanli zefan15988.33%646.15%
tejun heotejun heo158.33%430.77%
vladimir davydovvladimir davydov42.22%17.69%
paul jacksonpaul jackson10.56%17.69%
paul menagepaul menage10.56%17.69%
Total180100.00%13100.00%

/* * Handle user request to change the 'mems' memory placement * of a cpuset. Needs to validate the request, update the * cpusets mems_allowed, and for each task in the cpuset, * update mems_allowed and rebind task's mempolicy and any vma * mempolicies and if the cpuset is marked 'memory_migrate', * migrate the tasks pages to the new memory. * * Call with cpuset_mutex held. May take callback_lock during call. * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, * lock each such tasks mm->mmap_sem, scan its vma's and rebind * their mempolicies to the cpusets new mems_allowed. */
static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, const char *buf) { int retval; /* * top_cpuset.mems_allowed tracks node_stats[N_MEMORY]; * it's read-only */ if (cs == &top_cpuset) { retval = -EACCES; goto done; } /* * An empty mems_allowed is ok iff there are no tasks in the cpuset. * Since nodelist_parse() fails on an empty mask, we special case * that parsing. The validate_change() call ensures that cpusets * with tasks have memory. */ if (!*buf) { nodes_clear(trialcs->mems_allowed); } else { retval = nodelist_parse(buf, trialcs->mems_allowed); if (retval < 0) goto done; if (!nodes_subset(trialcs->mems_allowed, top_cpuset.mems_allowed)) { retval = -EINVAL; goto done; } } if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) { retval = 0; /* Too easy - nothing to do */ goto done; } retval = validate_change(cs, trialcs); if (retval < 0) goto done; spin_lock_irq(&callback_lock); cs->mems_allowed = trialcs->mems_allowed; spin_unlock_irq(&callback_lock); /* use trialcs->mems_allowed as a temp variable */ update_nodemasks_hier(cs, &trialcs->mems_allowed); done: return retval; }

Contributors

PersonTokensPropCommitsCommitProp
miao xiemiao xie15585.16%222.22%
li zefanli zefan2111.54%444.44%
vladimir davydovvladimir davydov42.20%111.11%
alban crequyalban crequy10.55%111.11%
lai jiangshanlai jiangshan10.55%111.11%
Total182100.00%9100.00%


int current_cpuset_is_being_rebound(void) { int ret; rcu_read_lock(); ret = task_cs(current) == cpuset_being_rebound; rcu_read_unlock(); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
gu zhenggu zheng1450.00%133.33%
paul menagepaul menage1242.86%133.33%
paul jacksonpaul jackson27.14%133.33%
Total28100.00%3100.00%


static int update_relax_domain_level(struct cpuset *cs, s64 val) { #ifdef CONFIG_SMP if (val < -1 || val >= sched_domain_level_max) return -EINVAL; #endif if (val != cs->relax_domain_level) { cs->relax_domain_level = val; if (!cpumask_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) rebuild_sched_domains_locked(); } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
hidetoshi setohidetoshi seto3954.93%112.50%
li zefanli zefan2332.39%337.50%
paul menagepaul menage79.86%225.00%
peter zijlstrapeter zijlstra11.41%112.50%
tejun heotejun heo11.41%112.50%
Total71100.00%8100.00%

/** * update_tasks_flags - update the spread flags of tasks in the cpuset. * @cs: the cpuset in which each task's spread flags needs to be changed * * Iterate through each task of @cs updating its spread flags. As this * function is called with cpuset_mutex held, cpuset membership stays * stable. */
static void update_tasks_flags(struct cpuset *cs) { struct css_task_iter it; struct task_struct *task; css_task_iter_start(&cs->css, &it); while ((task = css_task_iter_next(&it))) cpuset_update_task_spread_flag(cs, task); css_task_iter_end(&it); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo4071.43%375.00%
miao xiemiao xie1628.57%125.00%
Total56100.00%4100.00%

/* * update_flag - read a 0 or a 1 in a file and update associated flag * bit: the bit to update (see cpuset_flagbits_t) * cs: the cpuset to update * turning_on: whether the flag is being set or cleared * * Call with cpuset_mutex held. */
static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on) { struct cpuset *trialcs; int balance_flag_changed; int spread_flag_changed; int err; trialcs = alloc_trial_cpuset(cs); if (!trialcs) return -ENOMEM; if (turning_on) set_bit(bit, &trialcs->flags); else clear_bit(bit, &trialcs->flags); err = validate_change(cs, trialcs); if (err < 0) goto out; balance_flag_changed = (is_sched_load_balance(cs) != is_sched_load_balance(trialcs)); spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) || (is_spread_page(cs) != is_spread_page(trialcs))); spin_lock_irq(&callback_lock); cs->flags = trialcs->flags; spin_unlock_irq(&callback_lock); if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) rebuild_sched_domains_locked(); if (spread_flag_changed) update_tasks_flags(cs); out: free_trial_cpuset(trialcs); return err; }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson6233.33%323.08%
miao xiemiao xie4222.58%17.69%
paul menagepaul menage3920.97%215.38%
li zefanli zefan2915.59%215.38%
rakib mullickrakib mullick52.69%17.69%
vladimir davydovvladimir davydov42.15%17.69%
david rientjesdavid rientjes31.61%17.69%
dinakar guniguntaladinakar guniguntala10.54%17.69%
tejun heotejun heo10.54%17.69%
Total186100.00%13100.00%

/* * Frequency meter - How fast is some event occurring? * * These routines manage a digitally filtered, constant time based, * event frequency meter. There are four routines: * fmeter_init() - initialize a frequency meter. * fmeter_markevent() - called each time the event happens. * fmeter_getrate() - returns the recent rate of such events. * fmeter_update() - internal routine used to update fmeter. * * A common data structure is passed to each of these routines, * which is used to keep track of the state required to manage the * frequency meter and its digital filter. * * The filter works on the number of events marked per unit time. * The filter is single-pole low-pass recursive (IIR). The time unit * is 1 second. Arithmetic is done using 32-bit integers scaled to * simulate 3 decimal digits of precision (multiplied by 1000). * * With an FM_COEF of 933, and a time base of 1 second, the filter * has a half-life of 10 seconds, meaning that if the events quit * happening, then the rate returned from the fmeter_getrate() * will be cut in half each 10 seconds, until it converges to zero. * * It is not worth doing a real infinitely recursive filter. If more * than FM_MAXTICKS ticks have elapsed since the last filter event, * just compute FM_MAXTICKS ticks worth, by which point the level * will be stable. * * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid * arithmetic overflow in the fmeter_update() routine. * * Given the simple 32 bit integer arithmetic used, this meter works * best for reporting rates between one per millisecond (msec) and * one per 32 (approx) seconds. At constant rates faster than one * per msec it maxes out at values just under 1,000,000. At constant * rates between one per msec, and one per second it will stabilize * to a value N*1000, where N is the rate of events per second. * At constant rates between one per second and one per 32 seconds, * it will be choppy, moving up on the seconds that have an event, * and then decaying until the next event. At rates slower than * about one in 32 seconds, it decays all the way back to zero between * each event. */ #define FM_COEF 933 /* coefficient for half-life of 10 secs */ #define FM_MAXTICKS ((u32)99) /* useless computing more ticks than this */ #define FM_MAXCNT 1000000 /* limit cnt to avoid overflow */ #define FM_SCALE 1000 /* faux fixed point scale */ /* Initialize a frequency meter */
static void fmeter_init(struct fmeter *fmp) { fmp->cnt = 0; fmp->val = 0; fmp->time = 0; spin_lock_init(&fmp->lock); }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage2054.05%150.00%
paul jacksonpaul jackson1745.95%150.00%
Total37100.00%2100.00%

/* Internal meter update - process cnt events and update value */
static void fmeter_update(struct fmeter *fmp) { time64_t now; u32 ticks; now = ktime_get_seconds(); ticks = now - fmp->time; if (ticks == 0) return; ticks = min(FM_MAXTICKS, ticks); while (ticks-- > 0) fmp->val = (FM_COEF * fmp->val) / FM_SCALE; fmp->time = now; fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE; fmp->cnt = 0; }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage5758.76%112.50%
paul jacksonpaul jackson2626.80%450.00%
arnd bergmannarnd bergmann1111.34%112.50%
david rientjesdavid rientjes22.06%112.50%
christoph lameterchristoph lameter11.03%112.50%
Total97100.00%8100.00%

/* Process any previous ticks, then bump cnt by one (times scale). */
static void fmeter_markevent(struct fmeter *fmp) { spin_lock(&fmp->lock); fmeter_update(fmp); fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE); spin_unlock(&fmp->lock); }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage3165.96%150.00%
paul jacksonpaul jackson1634.04%150.00%
Total47100.00%2100.00%

/* Process any previous ticks, then return current value. */
static int fmeter_getrate(struct fmeter *fmp) { int val; spin_lock(&fmp->lock); fmeter_update(fmp); val = fmp->val; spin_unlock(&fmp->lock); return val; }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson2352.27%150.00%
paul menagepaul menage2147.73%150.00%
Total44100.00%2100.00%

static struct cpuset *cpuset_attach_old_cs; /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
static int cpuset_can_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct cpuset *cs; struct task_struct *task; int ret; /* used later by cpuset_attach() */ cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css)); cs = css_cs(css); mutex_lock(&cpuset_mutex); /* allow moving tasks into an empty cpuset if on default hierarchy */ ret = -ENOSPC; if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) goto out_unlock; cgroup_taskset_for_each(task, css, tset) { ret = task_can_attach(task, cs->cpus_allowed); if (ret) goto out_unlock; ret = security_task_setscheduler(task); if (ret) goto out_unlock; } /* * Mark attach is in progress. This makes validate_change() fail * changes which zero cpus/mems_allowed. */ cs->attach_in_progress++; ret = 0; out_unlock: mutex_unlock(&cpuset_mutex); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo9864.47%952.94%
paul menagepaul menage149.21%15.88%
paul jacksonpaul jackson149.21%15.88%
juri lellijuri lelli95.92%15.88%
ben blumben blum74.61%211.76%
li zefanli zefan74.61%211.76%
david rientjesdavid rientjes31.97%15.88%
Total152100.00%17100.00%


static void cpuset_cancel_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct cpuset *cs; cgroup_taskset_first(tset, &css); cs = css_cs(css); mutex_lock(&cpuset_mutex); css_cs(css)->attach_in_progress--; mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo4580.36%457.14%
ben blumben blum1017.86%228.57%
paul jacksonpaul jackson11.79%114.29%
Total56100.00%7100.00%

/* * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach() * but we can't allocate it dynamically there. Define it global and * allocate from cpuset_init(). */ static cpumask_var_t cpus_attach;
static void cpuset_attach(struct cgroup_taskset *tset) { /* static buf protected by cpuset_mutex */ static nodemask_t cpuset_attach_nodemask_to; struct task_struct *task; struct task_struct *leader; struct cgroup_subsys_state *css; struct cpuset *cs; struct cpuset *oldcs = cpuset_attach_old_cs; cgroup_taskset_first(tset, &css); cs = css_cs(css); mutex_lock(&cpuset_mutex); /* prepare for attach */ if (cs == &top_cpuset) cpumask_copy(cpus_attach, cpu_possible_mask); else guarantee_online_cpus(cs, cpus_attach); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); cgroup_taskset_for_each(task, css, tset) { /* * can_attach beforehand should guarantee that this doesn't * fail. TODO: have a better way to handle failure here */ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); cpuset_update_task_spread_flag(cs, task); } /* * Change mm for all threadgroup leaders. This is expensive and may * sleep and should be moved outside migration path proper. */ cpuset_attach_nodemask_to = cs->effective_mems; cgroup_taskset_for_each_leader(leader, css, tset) { struct mm_struct *mm = get_task_mm(leader); if (mm) { mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); /* * old_mems_allowed is the same with mems_allowed * here, except if this task is being moved * automatically due to hotplug. In that case * @mems_allowed has been updated and is empty, so * @old_mems_allowed is the right nodesets that we * migrate mm from. */ if (is_memory_migrate(cs)) cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, &cpuset_attach_nodemask_to); else mmput(mm); } } cs->old_mems_allowed = cpuset_attach_nodemask_to; cs->attach_in_progress--; if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo12655.02%1150.00%
paul menagepaul menage3515.28%14.55%
ben blumben blum3414.85%29.09%
li zefanli zefan219.17%731.82%
paul jacksonpaul jackson135.68%14.55%
Total229100.00%22100.00%

/* The various types of files and directories in a cpuset file system */ typedef enum { FILE_MEMORY_MIGRATE, FILE_CPULIST, FILE_MEMLIST, FILE_EFFECTIVE_CPULIST, FILE_EFFECTIVE_MEMLIST, FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, FILE_MEM_HARDWALL, FILE_SCHED_LOAD_BALANCE, FILE_SCHED_RELAX_DOMAIN_LEVEL, FILE_MEMORY_PRESSURE_ENABLED, FILE_MEMORY_PRESSURE, FILE_SPREAD_PAGE, FILE_SPREAD_SLAB, } cpuset_filetype_t;
static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, u64 val) { struct cpuset *cs = css_cs(css); cpuset_filetype_t type = cft->private; int retval = 0; mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) { retval = -ENODEV; goto out_unlock; } switch (type) { case FILE_CPU_EXCLUSIVE: retval = update_flag(CS_CPU_EXCLUSIVE, cs, val); break; case FILE_MEM_EXCLUSIVE: retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); break; case FILE_MEM_HARDWALL: retval = update_flag(CS_MEM_HARDWALL, cs, val); break; case FILE_SCHED_LOAD_BALANCE: retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); break; case FILE_MEMORY_MIGRATE: retval = update_flag(CS_MEMORY_MIGRATE, cs, val); break; case FILE_MEMORY_PRESSURE_ENABLED: cpuset_memory_pressure_enabled = !!val; break; case FILE_SPREAD_PAGE: retval = update_flag(CS_SPREAD_PAGE, cs, val); break; case FILE_SPREAD_SLAB: retval = update_flag(CS_SPREAD_SLAB, cs, val); break; default: retval = -EINVAL; break; } out_unlock: mutex_unlock(&cpuset_mutex); return retval; }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage12360.29%440.00%
paul jacksonpaul jackson3718.14%220.00%
tejun heotejun heo2612.75%220.00%
hidetoshi setohidetoshi seto104.90%110.00%
li zefanli zefan83.92%110.00%
Total204100.00%10100.00%


static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, s64 val) { struct cpuset *cs = css_cs(css); cpuset_filetype_t type = cft->private; int retval = -ENODEV; mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; switch (type) { case FILE_SCHED_RELAX_DOMAIN_LEVEL: retval = update_relax_domain_level(cs, val); break; default: retval = -EINVAL; break; } out_unlock: mutex_unlock(&cpuset_mutex); return retval; }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage6870.83%250.00%
tejun heotejun heo2829.17%250.00%
Total96100.00%4100.00%

/* * Common handling for a write to a "cpus" or "mems" file. */
static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cpuset *cs = css_cs(of_css(of)); struct cpuset *trialcs; int retval = -ENODEV; buf = strstrip(buf); /* * CPU or memory hotunplug may leave @cs w/o any execution * resources, in which case the hotplug code asynchronously updates * configuration and transfers all tasks to the nearest ancestor * which can execute. * * As writes to "cpus" or "mems" may restore @cs's execution * resources, wait for the previously scheduled operations before * proceeding, so that we don't end up keep removing tasks added * after execution capability is restored. * * cpuset_hotplug_work calls back into cgroup core via * cgroup_transfer_tasks() and waiting for it from a cgroupfs * operation like this one can lead to a deadlock through kernfs * active_ref protection. Let's break the protection. Losing the * protection is okay as we check whether @cs is online after * grabbing cpuset_mutex anyway. This only happens on the legacy * hierarchies. */ css_get(&cs->css); kernfs_break_active_protection(of->kn); flush_work(&cpuset_hotplug_work); mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; trialcs = alloc_trial_cpuset(cs); if (!trialcs) { retval = -ENOMEM; goto out_unlock; } switch (of_cft(of)->private) { case FILE_CPULIST: retval = update_cpumask(cs, trialcs, buf); break; case FILE_MEMLIST: retval = update_nodemask(cs, trialcs, buf); break; default: retval = -EINVAL; break; } free_trial_cpuset(trialcs); out_unlock: mutex_unlock(&cpuset_mutex); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); flush_workqueue(cpuset_migrate_mm_wq); return retval ?: nbytes; }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo9447.00%666.67%
paul menagepaul menage6030.00%111.11%
li zefanli zefan4623.00%222.22%
Total200100.00%9100.00%

/* * These ascii lists should be read in a single call, by using a user * buffer large enough to hold the entire map. If read in smaller * chunks, there is no guarantee of atomicity. Since the display format * used, list of ranges of sequential numbers, is variable length, * and since these maps can change value dynamically, one could read * gibberish by doing partial reads while a list was changing. */
static int cpuset_common_seq_show(struct seq_file *sf, void *v) { struct cpuset *cs = css_cs(seq_css(sf)); cpuset_filetype_t type = seq_cft(sf)->private; int ret = 0; spin_lock_irq(&callback_lock); switch (type) { case FILE_CPULIST: seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); break; case FILE_MEMLIST: seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); break; case FILE_EFFECTIVE_CPULIST: seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus)); break; case FILE_EFFECTIVE_MEMLIST: seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems)); break; default: ret = -EINVAL; } spin_unlock_irq(&callback_lock); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo6142.36%450.00%
paul jacksonpaul jackson3222.22%112.50%
li zefanli zefan2416.67%112.50%
paul menagepaul menage2315.97%112.50%
vladimir davydovvladimir davydov42.78%112.50%
Total144100.00%8100.00%


static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft) { struct cpuset *cs = css_cs(css); cpuset_filetype_t type = cft->private; switch (type) { case FILE_CPU_EXCLUSIVE: return is_cpu_exclusive(cs); case FILE_MEM_EXCLUSIVE: return is_mem_exclusive(cs); case FILE_MEM_HARDWALL: return is_mem_hardwall(cs); case FILE_SCHED_LOAD_BALANCE: return is_sched_load_balance(cs); case FILE_MEMORY_MIGRATE: return is_memory_migrate(cs); case FILE_MEMORY_PRESSURE_ENABLED: return cpuset_memory_pressure_enabled; case FILE_MEMORY_PRESSURE: return fmeter_getrate(&cs->fmeter); case FILE_SPREAD_PAGE: return is_spread_page(cs); case FILE_SPREAD_SLAB: return is_spread_slab(cs); default: BUG(); } /* Unreachable but makes gcc happy */ return 0; }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage12093.75%250.00%
maksim krasnyanskiymaksim krasnyanskiy43.12%125.00%
tejun heotejun heo43.12%125.00%
Total128100.00%4100.00%


static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) { struct cpuset *cs = css_cs(css); cpuset_filetype_t type = cft->private; switch (type) { case FILE_SCHED_RELAX_DOMAIN_LEVEL: return cs->relax_domain_level; default: BUG(); } /* Unrechable but makes gcc happy */ return 0; }

Contributors

PersonTokensPropCommitsCommitProp
paul menagepaul menage4785.45%133.33%
maksim krasnyanskiymaksim krasnyanskiy47.27%133.33%
tejun heotejun heo47.27%133.33%
Total55100.00%3100.00%

/* * for the common functions, 'private' gives the type of file */ static struct cftype files[] = { { .name = "cpus", .seq_show = cpuset_common_seq_show, .write = cpuset_write_resmask, .max_write_len = (100U + 6 * NR_CPUS), .private = FILE_CPULIST, }, { .name = "mems", .seq_show = cpuset_common_seq_show, .write = cpuset_write_resmask, .max_write_len = (100U + 6 * MAX_NUMNODES), .private = FILE_MEMLIST, }, { .name = "effective_cpus", .seq_show = cpuset_common_seq_show, .private = FILE_EFFECTIVE_CPULIST, }, { .name = "effective_mems", .seq_show = cpuset_common_seq_show, .private = FILE_EFFECTIVE_MEMLIST, }, { .name = "cpu_exclusive", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_CPU_EXCLUSIVE, }, { .name = "mem_exclusive", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEM_EXCLUSIVE, }, { .name = "mem_hardwall", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEM_HARDWALL, }, { .name = "sched_load_balance", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_SCHED_LOAD_BALANCE, }, { .name = "sched_relax_domain_level", .read_s64 = cpuset_read_s64, .write_s64 = cpuset_write_s64, .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, }, { .name = "memory_migrate", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEMORY_MIGRATE, }, { .name = "memory_pressure", .read_u64 = cpuset_read_u64, }, { .name = "memory_spread_page", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_SPREAD_PAGE, }, { .name = "memory_spread_slab", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_SPREAD_SLAB, }, { .name = "memory_pressure_enabled", .flags = CFTYPE_ONLY_ON_ROOT, .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEMORY_PRESSURE_ENABLED, }, { } /* terminate */ }; /* * cpuset_css_alloc - allocate a cpuset css * cgrp: control group that the new cpuset will be part of */
static struct cgroup_subsys_state * cpuset_css_alloc(struct cgroup_subsys_state *parent_css) { struct cpuset *cs; if (!parent_css) return &top_cpuset.css; cs = kzalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) goto free_cs; if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL)) goto free_cpus; set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); cpumask_clear(cs->cpus_allowed); nodes_clear(cs->mems_allowed); cpumask_clear(cs->effective_cpus); nodes_clear(cs->effective_mems); fmeter_init(&cs->fmeter); cs->relax_domain_level = -1; return &cs->css; free_cpus: free_cpumask_var(cs->cpus_allowed); free_cs: kfree(cs); return ERR_PTR(-ENOMEM); }

Contributors

PersonTokensPropCommitsCommitProp
li zefanli zefan7041.67%218.18%
paul jacksonpaul jackson3923.21%218.18%
paul menagepaul menage3621.43%19.09%
tejun heotejun heo137.74%436.36%
mike travismike travis52.98%19.09%
hidetoshi setohidetoshi seto52.98%19.09%
Total168100.00%11100.00%


static int cpuset_css_online(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); struct cpuset *parent = parent_cs(cs); struct cpuset *tmp_cs; struct cgroup_subsys_state *pos_css; if (!parent) return 0; mutex_lock(&cpuset_mutex); set_bit(CS_ONLINE, &cs->flags); if (is_spread_page(parent)) set_bit(CS_SPREAD_PAGE, &cs->flags); if (is_spread_slab(parent)) set_bit(CS_SPREAD_SLAB, &cs->flags); cpuset_inc(); spin_lock_irq(&callback_lock); if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { cpumask_copy(cs->effective_cpus, parent->effective_cpus); cs->effective_mems = parent->effective_mems; } spin_unlock_irq(&callback_lock); if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; /* * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is * set. This flag handling is implemented in cgroup core for * histrical reasons - the flag may be specified during mount. * * Currently, if any sibling cpusets have exclusive cpus or mem, we * refuse to clone the configuration - thereby refusing the task to * be entered, and as a result refusing the sys_unshare() or * clone() which initiated it. If this becomes a problem for some * users who wish to allow that scenario, then this could be * changed to grant parent->cpus_allowed-sibling_cpus_exclusive * (and likewise for mems) to the new cgroup. */ rcu_read_lock(); cpuset_for_each_child(tmp_cs, pos_css, parent) { if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) { rcu_read_unlock(); goto out_unlock; } } rcu_read_unlock(); spin_lock_irq(&callback_lock); cs->mems_allowed = parent->mems_allowed; cs->effective_mems = parent->mems_allowed; cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); cpumask_copy(cs->effective_cpus, parent->cpus_allowed); spin_unlock_irq(&callback_lock); out_unlock: mutex_unlock(&cpuset_mutex); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo18571.71%950.00%
li zefanli zefan5320.54%211.11%
vladimir davydovvladimir davydov72.71%15.56%
paul menagepaul menage51.94%15.56%
paul jacksonpaul jackson31.16%211.11%
hidetoshi setohidetoshi seto20.78%15.56%
mel gormanmel gorman20.78%15.56%
dan carpenterdan carpenter10.39%15.56%
Total258100.00%18100.00%

/* * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which * will call rebuild_sched_domains_locked(). */
static void cpuset_css_offline(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); mutex_lock(&cpuset_mutex); if (is_sched_load_balance(cs)) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags); mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo2845.16%440.00%
paul jacksonpaul jackson1829.03%330.00%
paul menagepaul menage1422.58%220.00%
mel gormanmel gorman23.23%110.00%
Total62100.00%10100.00%


static void cpuset_css_free(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); free_cpumask_var(cs->effective_cpus); free_cpumask_var(cs->cpus_allowed); kfree(cs); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo2050.00%228.57%
li zefanli zefan1435.00%228.57%
paul jacksonpaul jackson410.00%228.57%
paul menagepaul menage25.00%114.29%
Total40100.00%7100.00%


static void cpuset_bind(struct cgroup_subsys_state *root_css) { mutex_lock(&cpuset_mutex); spin_lock_irq(&callback_lock); if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); top_cpuset.mems_allowed = node_possible_map; } else { cpumask_copy(top_cpuset.cpus_allowed, top_cpuset.effective_cpus); top_cpuset.mems_allowed = top_cpuset.effective_mems; } spin_unlock_irq(&callback_lock); mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
li zefanli zefan7592.59%133.33%
vladimir davydovvladimir davydov44.94%133.33%
tejun heotejun heo22.47%133.33%
Total81100.00%3100.00%

struct cgroup_subsys cpuset_cgrp_subsys = { .css_alloc = cpuset_css_alloc, .css_online = cpuset_css_online, .css_offline = cpuset_css_offline, .css_free = cpuset_css_free, .can_attach = cpuset_can_attach, .cancel_attach = cpuset_cancel_attach, .attach = cpuset_attach, .post_attach = cpuset_post_attach, .bind = cpuset_bind, .legacy_cftypes = files, .early_init = true, }; /** * cpuset_init - initialize cpusets at system boot * * Description: Initialize top_cpuset and the cpuset internal file system, **/
int __init cpuset_init(void) { int err = 0; if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)) BUG(); if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)) BUG(); cpumask_setall(top_cpuset.cpus_allowed); nodes_setall(top_cpuset.mems_allowed); cpumask_setall(top_cpuset.effective_cpus); nodes_setall(top_cpuset.effective_mems); fmeter_init(&top_cpuset.fmeter); set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); top_cpuset.relax_domain_level = -1; err = register_filesystem(&cpuset_fs_type); if (err < 0) return err; if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)) BUG(); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
li zefanli zefan5239.39%323.08%
paul jacksonpaul jackson3526.52%430.77%
paul menagepaul menage1712.88%17.69%
miao xiemiao xie139.85%17.69%
hidetoshi setohidetoshi seto75.30%17.69%
mike travismike travis53.79%17.69%
yinghai luyinghai lu21.52%17.69%
dave hansendave hansen10.76%17.69%
Total132100.00%13100.00%

/* * If CPU and/or memory hotplug handlers, below, unplug any CPUs * or memory nodes, we need to walk over the cpuset hierarchy, * removing that CPU or node from all cpusets. If this removes the * last CPU or node from a cpuset, then move the tasks in the empty * cpuset to its next-highest non-empty parent. */
static void remove_tasks_in_empty_cpuset(struct cpuset *cs) { struct cpuset *parent; /* * Find its next-highest non-empty parent, (top cpuset * has online cpus, so can't be empty). */ parent = parent_cs(cs); while (cpumask_empty(parent->cpus_allowed) || nodes_empty(parent->mems_allowed)) parent = parent_cs(parent); if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) { pr_err("cpuset: failed to transfer tasks out of empty cpuset "); pr_cont_cgroup_name(cs->css.cgroup); pr_cont("\n"); } }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo4047.06%337.50%
cliff wickmancliff wickman3541.18%112.50%
paul jacksonpaul jackson78.24%112.50%
fabian frederickfabian frederick11.18%112.50%
paul menagepaul menage11.18%112.50%
li zefanli zefan11.18%112.50%
Total85100.00%8100.00%


static void hotplug_update_tasks_legacy(struct cpuset *cs, struct cpumask *new_cpus, nodemask_t *new_mems, bool cpus_updated, bool mems_updated) { bool is_empty; spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, new_cpus); cpumask_copy(cs->effective_cpus, new_cpus); cs->mems_allowed = *new_mems; cs->effective_mems = *new_mems; spin_unlock_irq(&callback_lock); /* * Don't call update_tasks_cpumask() if the cpuset becomes empty, * as the tasks will be migratecd to an ancestor. */ if (cpus_updated && !cpumask_empty(cs->cpus_allowed)) update_tasks_cpumask(cs); if (mems_updated && !nodes_empty(cs->mems_allowed)) update_tasks_nodemask(cs); is_empty = cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed); mutex_unlock(&cpuset_mutex); /* * Move tasks to the nearest ancestor with execution resources, * This is full cgroup operation which will also call back into * cpuset. Should be done outside any lock. */ if (is_empty) remove_tasks_in_empty_cpuset(cs); mutex_lock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
li zefanli zefan8860.27%844.44%
srivatsa s. bhatsrivatsa s. bhat2919.86%211.11%
tejun heotejun heo106.85%211.11%
cliff wickmancliff wickman74.79%15.56%
paul jacksonpaul jackson53.42%211.11%
vladimir davydovvladimir davydov42.74%15.56%
paul menagepaul menage21.37%15.56%
miao xiemiao xie10.68%15.56%
Total146100.00%18100.00%


static void hotplug_update_tasks(struct cpuset *cs, struct cpumask *new_cpus, nodemask_t *new_mems, bool cpus_updated, bool mems_updated) { if (cpumask_empty(new_cpus)) cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus); if (nodes_empty(*new_mems)) *new_mems = parent_cs(cs)->effective_mems; spin_lock_irq(&callback_lock); cpumask_copy(cs->effective_cpus, new_cpus); cs->effective_mems = *new_mems; spin_unlock_irq(&callback_lock); if (cpus_updated) update_tasks_cpumask(cs); if (mems_updated) update_tasks_nodemask(cs); }

Contributors

PersonTokensPropCommitsCommitProp
li zefanli zefan8174.31%555.56%
srivatsa s. bhatsrivatsa s. bhat1614.68%111.11%
tejun heotejun heo65.50%111.11%
vladimir davydovvladimir davydov43.67%111.11%
miao xiemiao xie21.83%111.11%
Total109100.00%9100.00%

/** * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug * @cs: cpuset in interest * * Compare @cs's cpu and mem masks against top_cpuset and if some have gone * offline, update @cs accordingly. If @cs ends up with no CPU or memory, * all its tasks are moved to the nearest ancestor with both resources. */
static void cpuset_hotplug_update_tasks(struct cpuset *cs) { static cpumask_t new_cpus; static nodemask_t new_mems; bool cpus_updated; bool mems_updated; retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); mutex_lock(&cpuset_mutex); /* * We have raced with task attaching. We wait until attaching * is finished, so we won't attach a task to an empty cpuset. */ if (cs->attach_in_progress) { mutex_unlock(&cpuset_mutex); goto retry; } cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus); nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems); cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); mems_updated = !nodes_equal(new_mems, cs->effective_mems); if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) hotplug_update_tasks(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); else hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
li zefanli zefan12978.66%333.33%
tejun heotejun heo2112.80%333.33%
srivatsa s. bhatsrivatsa s. bhat84.88%111.11%
paul jacksonpaul jackson53.05%111.11%
miao xiemiao xie10.61%111.11%
Total164100.00%9100.00%

/** * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset * * This function is called after either CPU or memory configuration has * changed and updates cpuset accordingly. The top_cpuset is always * synchronized to cpu_active_mask and N_MEMORY, which is necessary in * order to make cpusets transparent (of no affect) on systems that are * actively using CPU hotplug but making no active use of cpusets. * * Non-root cpusets are only affected by offlining. If any CPUs or memory * nodes have been taken down, cpuset_hotplug_update_tasks() is invoked on * all descendants. * * Note that CPU offlining during suspend is ignored. We don't modify * cpusets across suspend/resume cycles at all. */
static void cpuset_hotplug_workfn(struct work_struct *work) { static cpumask_t new_cpus; static nodemask_t new_mems; bool cpus_updated, mems_updated; bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys); mutex_lock(&cpuset_mutex); /* fetch the available cpus/mems and find out which changed how */ cpumask_copy(&new_cpus, cpu_active_mask); new_mems = node_states[N_MEMORY]; cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus); mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { spin_lock_irq(&callback_lock); if (!on_dfl) cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); cpumask_copy(top_cpuset.effective_cpus, &new_cpus); spin_unlock_irq(&callback_lock); /* we don't mess with cpumasks of tasks in top_cpuset */ } /* synchronize mems_allowed to N_MEMORY */ if (mems_updated) { spin_lock_irq(&callback_lock); if (!on_dfl) top_cpuset.mems_allowed = new_mems; top_cpuset.effective_mems = new_mems; spin_unlock_irq(&callback_lock); update_tasks_nodemask(&top_cpuset); } mutex_unlock(&cpuset_mutex); /* if cpus or mems changed, we need to propagate to descendants */ if (cpus_updated || mems_updated) { struct cpuset *cs; struct cgroup_subsys_state *pos_css; rcu_read_lock(); cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { if (cs == &top_cpuset || !css_tryget_online(&cs->css)) continue; rcu_read_unlock(); cpuset_hotplug_update_tasks(cs); rcu_read_lock(); css_put(&cs->css); } rcu_read_unlock(); } /* rebuild sched domains if cpus_allowed has changed */ if (cpus_updated) rebuild_sched_domains(); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo11043.82%1038.46%
li zefanli zefan7529.88%623.08%
paul jacksonpaul jackson249.56%311.54%
maksim krasnyanskiymaksim krasnyanskiy176.77%13.85%
vladimir davydovvladimir davydov83.19%13.85%
srivatsa s. bhatsrivatsa s. bhat51.99%13.85%
miao xiemiao xie51.99%13.85%
cliff wickmancliff wickman41.59%13.85%
li zhongli zhong20.80%13.85%
lai jiangshanlai jiangshan10.40%13.85%
Total251100.00%26100.00%


void cpuset_update_active_cpus(bool cpu_online) { /* * We're inside cpu hotplug critical region which usually nests * inside cgroup synchronization. Bounce actual hotplug processing * to a work item to avoid reverse locking order. * * We still need to do partition_sched_domains() synchronously; * otherwise, the scheduler will get confused and put tasks to the * dead CPU. Fall back to the default single domain. * cpuset_hotplug_workfn() will rebuild it as necessary. */ partition_sched_domains(1, NULL, NULL); schedule_work(&cpuset_hotplug_work); }

Contributors

PersonTokensPropCommitsCommitProp
tejun heotejun heo24100.00%2100.00%
Total24100.00%2100.00%

/* * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY]. * Call this routine anytime after node_states[N_MEMORY] changes. * See cpuset_update_active_cpus() for CPU hotplug handling. */
static int cpuset_track_online_nodes(struct notifier_block *self, unsigned long action, void *arg) { schedule_work(&cpuset_hotplug_work); return NOTIFY_OK; }

Contributors

PersonTokensPropCommitsCommitProp
miao xiemiao xie1657.14%116.67%
paul jacksonpaul jackson414.29%116.67%
al viroal viro310.71%116.67%
dmitry adamushkodmitry adamushko27.14%116.67%
tejun heotejun heo27.14%116.67%
maksim krasnyanskiymaksim krasnyanskiy13.57%116.67%
Total28100.00%6100.00%

static struct notifier_block cpuset_track_online_nodes_nb = { .notifier_call = cpuset_track_online_nodes, .priority = 10, /* ??! */ }; /** * cpuset_init_smp - initialize cpus_allowed * * Description: Finish top cpuset after cpu, node maps are initialized */
void __init cpuset_init_smp(void) { cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); top_cpuset.mems_allowed = node_states[N_MEMORY]; top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); top_cpuset.effective_mems = node_states[N_MEMORY]; register_hotmemory_notifier(&cpuset_track_online_nodes_nb); cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0); BUG_ON(!cpuset_migrate_mm_wq); }

Contributors

PersonTokensPropCommitsCommitProp
li zefanli zefan3041.10%330.00%
paul jacksonpaul jackson1723.29%110.00%
tejun heotejun heo1520.55%110.00%
miao xiemiao xie34.11%110.00%
andrew mortonandrew morton34.11%110.00%
christoph lameterchristoph lameter34.11%110.00%
peter zijlstrapeter zijlstra11.37%110.00%
lai jiangshanlai jiangshan11.37%110.00%
Total73100.00%10100.00%

/** * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. * @pmask: pointer to struct cpumask variable to receive cpus_allowed set. * * Description: Returns the cpumask_var_t cpus_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty * subset of cpu_online_mask, even if this means going outside the * tasks cpuset. **/
void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { unsigned long flags; spin_lock_irqsave(&callback_lock, flags); rcu_read_lock(); guarantee_online_cpus(task_cs(tsk), pmask); rcu_read_unlock(); spin_unlock_irqrestore(&callback_lock, flags); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson2039.22%112.50%
vladimir davydovvladimir davydov1223.53%112.50%
li zefanli zefan713.73%337.50%
mike travismike travis59.80%112.50%
oleg nesterovoleg nesterov47.84%112.50%
paul menagepaul menage35.88%112.50%
Total51100.00%8100.00%


void cpuset_cpus_allowed_fallback(struct task_struct *tsk) { rcu_read_lock(); do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus); rcu_read_unlock(); /* * We own tsk->cpus_allowed, nobody can change it under us. * * But we used cs && cs->cpus_allowed lockless and thus can * race with cgroup_attach_task() or update_cpumask() and get * the wrong tsk->cpus_allowed. However, both cases imply the * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr() * which takes task_rq_lock(). * * If we are called after it dropped the lock we must see all * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary * set any mask even if it is not right from task_cs() pov, * the pending set_cpus_allowed_ptr() will fix things. * * select_fallback_rq() will fix things ups and set cpu_possible_mask * if required. */ }

Contributors

PersonTokensPropCommitsCommitProp
oleg nesterovoleg nesterov2275.86%125.00%
li zefanli zefan517.24%250.00%
peter zijlstrapeter zijlstra26.90%125.00%
Total29100.00%4100.00%


void __init cpuset_init_current_mems_allowed(void) { nodes_setall(current->mems_allowed); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson1173.33%133.33%
mike travismike travis320.00%133.33%
rasmus villemoesrasmus villemoes16.67%133.33%
Total15100.00%3100.00%

/** * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset. * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed. * * Description: Returns the nodemask_t mems_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty * subset of node_states[N_MEMORY], even if this means going outside the * tasks cpuset. **/
nodemask_t cpuset_mems_allowed(struct task_struct *tsk) { nodemask_t mask; unsigned long flags; spin_lock_irqsave(&callback_lock, flags); rcu_read_lock(); guarantee_online_mems(task_cs(tsk), &mask); rcu_read_unlock(); spin_unlock_irqrestore(&callback_lock, flags); return mask; }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson3362.26%120.00%
vladimir davydovvladimir davydov1222.64%120.00%
li zefanli zefan59.43%240.00%
paul menagepaul menage35.66%120.00%
Total53100.00%5100.00%

/** * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed * @nodemask: the nodemask to be checked * * Are any of the nodes in the nodemask allowed in current->mems_allowed? */
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) { return nodes_intersects(*nodemask, current->mems_allowed); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson1365.00%150.00%
mel gormanmel gorman735.00%150.00%
Total20100.00%2100.00%

/* * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or * mem_hardwall ancestor to the specified cpuset. Call holding * callback_lock. If no ancestor is mem_exclusive or mem_hardwall * (an unusual configuration), then returns the root cpuset. */
static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) { while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs)) cs = parent_cs(cs); return cs; }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson2967.44%133.33%
paul menagepaul menage818.60%133.33%
tejun heotejun heo613.95%133.33%
Total43100.00%3100.00%

/** * cpuset_node_allowed - Can we allocate on a memory node? * @node: is this an allowed node? * @gfp_mask: memory allocation flags * * If we're in interrupt, yes, we can always allocate. If @node is set in * current's mems_allowed, yes. If it's not a __GFP_HARDWALL request and this * node is set in the nearest hardwalled cpuset ancestor to current's cpuset, * yes. If current has access to memory reserves due to TIF_MEMDIE, yes. * Otherwise, no. * * GFP_USER allocations are marked with the __GFP_HARDWALL bit, * and do not allow allocations outside the current tasks cpuset * unless the task has been OOM killed as is marked TIF_MEMDIE. * GFP_KERNEL allocations are not so marked, so can escape to the * nearest enclosing hardwalled ancestor cpuset. * * Scanning up parent cpusets requires callback_lock. The * __alloc_pages() routine only calls here with __GFP_HARDWALL bit * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the * current tasks mems_allowed came up empty on the first pass over * the zonelist. So only GFP_KERNEL allocations, if all nodes in the * cpuset are short of memory, might require taking the callback_lock. * * The first call here from mm/page_alloc:get_page_from_freelist() * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, * so no allocation on a node outside the cpuset is allowed (unless * in interrupt, of course). * * The second pass through get_page_from_freelist() doesn't even call * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set * in alloc_flags. That logic and the checks below have the combined * affect that: * in_interrupt - any node ok (current task context irrelevant) * GFP_ATOMIC - any node ok * TIF_MEMDIE - any node ok * GFP_KERNEL - any node in enclosing hardwalled cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. */
bool __cpuset_node_allowed(int node, gfp_t gfp_mask) { struct cpuset *cs; /* current cpuset ancestors */ int allowed; /* is allocation in zone z allowed? */ unsigned long flags; if (in_interrupt()) return true; if (node_isset(node, current->mems_allowed)) return true; /* * Allow tasks that have access to memory reserves because they have * been OOM killed to get memory anywhere. */ if (unlikely(test_thread_flag(TIF_MEMDIE))) return true; if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ return false; if (current->flags & PF_EXITING) /* Let dying task have memory */ return true; /* Not hardwall and node outside mems_allowed: scan up cpusets */ spin_lock_irqsave(&callback_lock, flags); rcu_read_lock(); cs = nearest_hardwall_ancestor(task_cs(current)); allowed = node_isset(node, cs->mems_allowed); rcu_read_unlock(); spin_unlock_irqrestore(&callback_lock, flags); return allowed; }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson8162.31%320.00%
david rientjesdavid rientjes1511.54%213.33%
vladimir davydovvladimir davydov1310.00%213.33%
vlastimil babkavlastimil babka64.62%16.67%
bob piccobob picco53.85%16.67%
paul menagepaul menage43.08%213.33%
li zefanli zefan32.31%213.33%
linus torvaldslinus torvalds21.54%16.67%
al viroal viro10.77%16.67%
Total130100.00%15100.00%

/** * cpuset_mem_spread_node() - On which node to begin search for a file page * cpuset_slab_spread_node() - On which node to begin search for a slab page * * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for * tasks in a cpuset with is_spread_page or is_spread_slab set), * and if the memory allocation used cpuset_mem_spread_node() * to determine on which node to start looking, as it will for * certain page cache or slab cache pages such as used for file * system buffers and inode caches, then instead of starting on the * local node to look for a free page, rather spread the starting * node around the tasks mems_allowed nodes. * * We don't have to worry about the returned node being offline * because "it can't happen", and even if it did, it would be ok. * * The routines calling guarantee_online_mems() are careful to * only set nodes in task->mems_allowed that are online. So it * should not be possible for the following code to return an * offline node. But if it did, that would be ok, as this routine * is not returning the node where the allocation must be, only * the node where the search should start. The zonelist passed to * __alloc_pages() will include all nodes. If the slab allocator * is passed an offline node, it will fall back to the local node. * See kmem_cache_alloc_node(). */
static int cpuset_spread_node(int *rotor) { return *rotor = next_node_in(*rotor, current->mems_allowed); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson1354.17%133.33%
jack steinerjack steiner729.17%133.33%
andrew mortonandrew morton416.67%133.33%
Total24100.00%3100.00%


int cpuset_mem_spread_node(void) { if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE) current->cpuset_mem_spread_rotor = node_random(&current->mems_allowed); return cpuset_spread_node(&current->cpuset_mem_spread_rotor); }

Contributors

PersonTokensPropCommitsCommitProp
michal hockomichal hocko2055.56%150.00%
jack steinerjack steiner1644.44%150.00%
Total36100.00%2100.00%


int cpuset_slab_spread_node(void) { if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE) current->cpuset_slab_spread_rotor = node_random(&current->mems_allowed); return cpuset_spread_node(&current->cpuset_slab_spread_rotor); }

Contributors

PersonTokensPropCommitsCommitProp
michal hockomichal hocko2055.56%150.00%
jack steinerjack steiner1644.44%150.00%
Total36100.00%2100.00%

EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); /** * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's? * @tsk1: pointer to task_struct of some task. * @tsk2: pointer to task_struct of some other task. * * Description: Return true if @tsk1's mems_allowed intersects the * mems_allowed of @tsk2. Used by the OOM killer to determine if * one of the task's memory usage might impact the memory available * to the other. **/
int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, const struct task_struct *tsk2) { return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson2068.97%266.67%
david rientjesdavid rientjes931.03%133.33%
Total29100.00%3100.00%

/** * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed * * Description: Prints current's name, cpuset name, and cached copy of its * mems_allowed to the kernel log. */
void cpuset_print_current_mems_allowed(void) { struct cgroup *cgrp; rcu_read_lock(); cgrp = task_cs(current)->css.cgroup; pr_info("%s cpuset=", current->comm); pr_cont_cgroup_name(cgrp); pr_cont(" mems_allowed=%*pbl\n", nodemask_pr_args(&current->mems_allowed)); rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
david rientjesdavid rientjes2748.21%228.57%
li zefanli zefan1425.00%228.57%
tejun heotejun heo1425.00%228.57%
fabian frederickfabian frederick11.79%114.29%
Total56100.00%7100.00%

/* * Collection of memory_pressure is suppressed unless * this flag is enabled by writing "1" to the special * cpuset file 'memory_pressure_enabled' in the root cpuset. */ int cpuset_memory_pressure_enabled __read_mostly; /** * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. * * Keep a running average of the rate of synchronous (direct) * page reclaim efforts initiated by tasks in each cpuset. * * This represents the rate at which some task in the cpuset * ran low on memory on all nodes it was allowed to use, and * had to enter the kernels page reclaim code in an effort to * create more free memory by tossing clean pages or swapping * or writing dirty pages. * * Display to user space in the per-cpuset read-only file * "memory_pressure". Value displayed is an integer * representing the recent rate of entry into the synchronous * (direct) page reclaim by any task attached to the cpuset. **/
void __cpuset_memory_pressure_bump(void) { rcu_read_lock(); fmeter_markevent(&task_cs(current)->fmeter); rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson1666.67%133.33%
paul menagepaul menage416.67%133.33%
li zefanli zefan416.67%133.33%
Total24100.00%3100.00%

#ifdef CONFIG_PROC_PID_CPUSET /* * proc_cpuset_show() * - Print tasks cpuset path into seq_file. * - Used for /proc/<pid>/cpuset. * - No need to task_lock(tsk) on this tsk->cpuset reference, as it * doesn't really matter if tsk->cpuset changes after we read it, * and we take cpuset_mutex, keeping cpuset_attach() from changing it * anyway. */
int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk) { char *buf, *p; struct cgroup_subsys_state *css; int retval; retval = -ENOMEM; buf = kmalloc(PATH_MAX, GFP_KERNEL); if (!buf) goto out; retval = -ENAMETOOLONG; css = task_get_css(tsk, cpuset_cgrp_id); p = cgroup_path_ns(css->cgroup, buf, PATH_MAX, current->nsproxy->cgroup_ns); css_put(css); if (!p) goto out_free; seq_puts(m, p); seq_putc(m, '\n'); retval = 0; out_free: kfree(buf); out: return retval; }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson7252.17%220.00%
eric w. biedermaneric w. biederman1813.04%220.00%
tejun heotejun heo1611.59%220.00%
aditya kaliaditya kali128.70%110.00%
paul menagepaul menage117.97%110.00%
li zefanli zefan96.52%220.00%
Total138100.00%10100.00%

#endif /* CONFIG_PROC_PID_CPUSET */ /* Display task mems_allowed in /proc/<pid>/status file. */
void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) { seq_printf(m, "Mems_allowed:\t%*pb\n", nodemask_pr_args(&task->mems_allowed)); seq_printf(m, "Mems_allowed_list:\t%*pbl\n", nodemask_pr_args(&task->mems_allowed)); }

Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson1840.00%120.00%
mike travismike travis1022.22%120.00%
tejun heotejun heo817.78%120.00%
eric w. biedermaneric w. biederman715.56%120.00%
lai jiangshanlai jiangshan24.44%120.00%
Total45100.00%5100.00%


Overall Contributors

PersonTokensPropCommitsCommitProp
paul jacksonpaul jackson191522.99%2611.45%
tejun heotejun heo169120.30%5122.47%
li zefanli zefan161619.40%4921.59%
paul menagepaul menage145017.41%93.96%
miao xiemiao xie4014.81%104.41%
hidetoshi setohidetoshi seto1962.35%10.44%
maksim krasnyanskiymaksim krasnyanskiy1281.54%10.44%
david rientjesdavid rientjes1221.46%104.41%
vladimir davydovvladimir davydov961.15%20.88%
lai jiangshanlai jiangshan831.00%52.20%
srivatsa s. bhatsrivatsa s. bhat580.70%20.88%
rik van rielrik van riel550.66%10.44%
cliff wickmancliff wickman530.64%20.88%
ben blumben blum510.61%20.88%
michal hockomichal hocko400.48%10.44%
jack steinerjack steiner400.48%10.44%
mel gormanmel gorman400.48%41.76%
juri lellijuri lelli370.44%20.88%
mike travismike travis280.34%20.88%
andrew mortonandrew morton270.32%20.88%
oleg nesterovoleg nesterov260.31%20.88%
eric w. biedermaneric w. biederman250.30%31.32%
rusty russellrusty russell170.20%20.88%
al viroal viro170.20%31.32%
arnd bergmannarnd bergmann160.19%10.44%
gu zhenggu zheng140.17%10.44%
peter zijlstrapeter zijlstra140.17%41.76%
aditya kaliaditya kali120.14%10.44%
vlastimil babkavlastimil babka100.12%10.44%
fabian frederickfabian frederick50.06%20.88%
bob piccobob picco50.06%10.44%
rakib mullickrakib mullick50.06%10.44%
christoph lameterchristoph lameter40.05%10.44%
ingo molnaringo molnar40.05%20.88%
david howellsdavid howells40.05%20.88%
david p. quigleydavid p. quigley30.04%10.44%
linus torvaldslinus torvalds20.02%10.44%
dave hansendave hansen20.02%10.44%
gautham r shenoygautham r shenoy20.02%10.44%
li zhongli zhong20.02%10.44%
yinghai luyinghai lu20.02%10.44%
dmitry adamushkodmitry adamushko20.02%10.44%
alban crequyalban crequy10.01%10.44%
zhao hongjiangzhao hongjiang10.01%10.44%
dan carpenterdan carpenter10.01%10.44%
rasmus villemoesrasmus villemoes10.01%10.44%
paul gortmakerpaul gortmaker10.01%10.44%
dinakar guniguntaladinakar guniguntala10.01%10.44%
adrian bunkadrian bunk10.01%10.44%
viresh kumarviresh kumar10.01%10.44%
heiko carstensheiko carstens10.01%10.44%
arun sharmaarun sharma10.01%10.44%
Total8330100.00%227100.00%
Directory: kernel
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
{% endraw %}