cregit-Linux how code gets into the kernel

Release 4.15 kernel/cgroup/cpuset.c

Directory: kernel/cgroup
/*
 *  kernel/cpuset.c
 *
 *  Processor and Memory placement constraints for sets of tasks.
 *
 *  Copyright (C) 2003 BULL SA.
 *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
 *  Copyright (C) 2006 Google, Inc
 *
 *  Portions derived from Patrick Mochel's sysfs code.
 *  sysfs is Copyright (c) 2001-3 Patrick Mochel
 *
 *  2003-10-10 Written by Simon Derr.
 *  2003-10-22 Updates by Stephen Hemminger.
 *  2004 May-July Rework by Paul Jackson.
 *  2006 Rework by Paul Menage to use generic cgroups
 *  2008 Rework of the scheduler domains and CPU hotplug handling
 *       by Max Krasnyansky
 *
 *  This file is subject to the terms and conditions of the GNU General Public
 *  License.  See the file COPYING in the main directory of the Linux
 *  distribution for more details.
 */

#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/cpuset.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/mempolicy.h>
#include <linux/mm.h>
#include <linux/memory.h>
#include <linux/export.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/proc_fs.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
#include <linux/seq_file.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/time.h>
#include <linux/time64.h>
#include <linux/backing-dev.h>
#include <linux/sort.h>
#include <linux/oom.h>
#include <linux/sched/isolation.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
#include <linux/cgroup.h>
#include <linux/wait.h>


DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);

DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);

/* See "Frequency meter" comments, below. */


struct fmeter {
	
int cnt;		/* unprocessed events count */
	
int val;		/* most recent output value */
	
time64_t time;		/* clock (secs) when val computed */
	
spinlock_t lock;	/* guards read or write of above */
};


struct cpuset {
	
struct cgroup_subsys_state css;

	
unsigned long flags;		/* "unsigned long" so bitops work */

	/*
         * On default hierarchy:
         *
         * The user-configured masks can only be changed by writing to
         * cpuset.cpus and cpuset.mems, and won't be limited by the
         * parent masks.
         *
         * The effective masks is the real masks that apply to the tasks
         * in the cpuset. They may be changed if the configured masks are
         * changed or hotplug happens.
         *
         * effective_mask == configured_mask & parent's effective_mask,
         * and if it ends up empty, it will inherit the parent's mask.
         *
         *
         * On legacy hierachy:
         *
         * The user-configured masks are always the same with effective masks.
         */

	/* user-configured CPUs and Memory Nodes allow to tasks */
	
cpumask_var_t cpus_allowed;
	
nodemask_t mems_allowed;

	/* effective CPUs and Memory Nodes allow to tasks */
	
cpumask_var_t effective_cpus;
	
nodemask_t effective_mems;

	/*
         * This is old Memory Nodes tasks took on.
         *
         * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
         * - A new cpuset's old_mems_allowed is initialized when some
         *   task is moved into it.
         * - old_mems_allowed is used in cpuset_migrate_mm() when we change
         *   cpuset.mems_allowed and have tasks' nodemask updated, and
         *   then old_mems_allowed is updated to mems_allowed.
         */
	
nodemask_t old_mems_allowed;

	
struct fmeter fmeter;		/* memory_pressure filter */

	/*
         * Tasks are being attached to this cpuset.  Used to prevent
         * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
         */
	
int attach_in_progress;

	/* partition number for rebuild_sched_domains() */
	
int pn;

	/* for custom sched domain */
	
int relax_domain_level;
};


static inline struct cpuset *css_cs(struct cgroup_subsys_state *css) { return css ? container_of(css, struct cpuset, css) : NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage2172.41%150.00%
Tejun Heo827.59%150.00%
Total29100.00%2100.00%

/* Retrieve the cpuset for a task */
static inline struct cpuset *task_cs(struct task_struct *task) { return css_cs(task_css(task, cpuset_cgrp_id)); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage2288.00%125.00%
Tejun Heo312.00%375.00%
Total25100.00%4100.00%


static inline struct cpuset *parent_cs(struct cpuset *cs) { return css_cs(cs->css.parent); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo24100.00%3100.00%
Total24100.00%3100.00%

#ifdef CONFIG_NUMA
static inline bool task_has_mempolicy(struct task_struct *task) { return task->mempolicy; }

Contributors

PersonTokensPropCommitsCommitProp
David Rientjes17100.00%1100.00%
Total17100.00%1100.00%

#else
static inline bool task_has_mempolicy(struct task_struct *task) { return false; }

Contributors

PersonTokensPropCommitsCommitProp
David Rientjes15100.00%1100.00%
Total15100.00%1100.00%

#endif /* bits in struct cpuset flags field */ typedef enum { CS_ONLINE, CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, CS_MEM_HARDWALL, CS_MEMORY_MIGRATE, CS_SCHED_LOAD_BALANCE, CS_SPREAD_PAGE, CS_SPREAD_SLAB, } cpuset_flagbits_t; /* convenient tests for these bits */
static inline bool is_cpuset_online(struct cpuset *cs) { return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo3196.88%266.67%
Srivatsa S. Bhat13.12%133.33%
Total32100.00%3100.00%


static inline int is_cpu_exclusive(const struct cpuset *cs) { return test_bit(CS_CPU_EXCLUSIVE, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_mem_exclusive(const struct cpuset *cs) { return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_mem_hardwall(const struct cpuset *cs) { return test_bit(CS_MEM_HARDWALL, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_sched_load_balance(const struct cpuset *cs) { return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_memory_migrate(const struct cpuset *cs) { return test_bit(CS_MEMORY_MIGRATE, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_spread_page(const struct cpuset *cs) { return test_bit(CS_SPREAD_PAGE, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson24100.00%1100.00%
Total24100.00%1100.00%


static inline int is_spread_slab(const struct cpuset *cs) { return test_bit(CS_SPREAD_SLAB, &cs->flags); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson24100.00%1100.00%
Total24100.00%1100.00%

static struct cpuset top_cpuset = { .flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), }; /** * cpuset_for_each_child - traverse online children of a cpuset * @child_cs: loop cursor pointing to the current child * @pos_css: used for iteration * @parent_cs: target cpuset to walk children of * * Walk @child_cs through the online children of @parent_cs. Must be used * with RCU read locked. */ #define cpuset_for_each_child(child_cs, pos_css, parent_cs) \ css_for_each_child((pos_css), &(parent_cs)->css) \ if (is_cpuset_online(((child_cs) = css_cs((pos_css))))) /** * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants * @des_cs: loop cursor pointing to the current descendant * @pos_css: used for iteration * @root_cs: target cpuset to walk ancestor of * * Walk @des_cs through the online descendants of @root_cs. Must be used * with RCU read locked. The caller may modify @pos_css by calling * css_rightmost_descendant() to skip subtree. @root_cs is included in the * iteration and the first node to be visited. */ #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs) \ css_for_each_descendant_pre((pos_css), &(root_cs)->css) \ if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) /* * There are two global locks guarding cpuset structures - cpuset_mutex and * callback_lock. We also require taking task_lock() when dereferencing a * task's cpuset pointer. See "The task_lock() exception", at the end of this * comment. * * A task must hold both locks to modify cpusets. If a task holds * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it * is the only task able to also acquire callback_lock and be able to * modify cpusets. It can perform various checks on the cpuset structure * first, knowing nothing will change. It can also allocate memory while * just holding cpuset_mutex. While it is performing these checks, various * callback routines can briefly acquire callback_lock to query cpusets. * Once it is ready to make the changes, it takes callback_lock, blocking * everyone else. * * Calls to the kernel memory allocator can not be made while holding * callback_lock, as that would risk double tripping on callback_lock * from one of the callbacks into the cpuset code from within * __alloc_pages(). * * If a task is only holding callback_lock, then it has read-only * access to cpusets. * * Now, the task_struct fields mems_allowed and mempolicy may be changed * by other task, we use alloc_lock in the task_struct fields to protect * them. * * The cpuset_common_file_read() handlers only hold callback_lock across * small pieces of code, such as when reading out possibly multi-word * cpumasks and nodemasks. * * Accessing a task's cpuset should be done in accordance with the * guidelines for accessing subsystem state in kernel/cgroup.c */ static DEFINE_MUTEX(cpuset_mutex); static DEFINE_SPINLOCK(callback_lock); static struct workqueue_struct *cpuset_migrate_mm_wq; /* * CPU / memory hotplug is handled asynchronously. */ static void cpuset_hotplug_workfn(struct work_struct *work); static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); /* * Cgroup v2 behavior is used when on default hierarchy or the * cgroup_v2_mode flag is set. */
static inline bool is_in_v2_mode(void) { return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) || (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE); }

Contributors

PersonTokensPropCommitsCommitProp
Waiman Long25100.00%1100.00%
Total25100.00%1100.00%

/* * This is ugly, but preserves the userspace API for existing cpuset * users. If someone tries to mount the "cpuset" filesystem, we * silently switch it to mount "cgroup" instead */
static struct dentry *cpuset_mount(struct file_system_type *fs_type, int flags, const char *unused_dev_name, void *data) { struct file_system_type *cgroup_fs = get_fs_type("cgroup"); struct dentry *ret = ERR_PTR(-ENODEV); if (cgroup_fs) { char mountopts[] = "cpuset,noprefix," "release_agent=/sbin/cpuset_release_agent"; ret = cgroup_fs->mount(cgroup_fs, flags, unused_dev_name, mountopts); put_filesystem(cgroup_fs); } return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage6275.61%133.33%
Al Viro1113.41%133.33%
Paul Jackson910.98%133.33%
Total82100.00%3100.00%

static struct file_system_type cpuset_fs_type = { .name = "cpuset", .mount = cpuset_mount, }; /* * Return in pmask the portion of a cpusets's cpus_allowed that * are online. If none are online, walk up the cpuset hierarchy * until we find one that does have some online cpus. * * One way or another, we guarantee to return some non-empty subset * of cpu_online_mask. * * Call with callback_lock or cpuset_mutex held. */
static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) { while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) { cs = parent_cs(cs); if (unlikely(!cs)) { /* * The top cpuset doesn't have any online cpu as a * consequence of a race between cpuset_hotplug_work * and cpu hotplug notifier. But we know the top * cpuset's effective_cpus is on its way to to be * identical to cpu_online_mask. */ cpumask_copy(pmask, cpu_online_mask); return; } } cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson2232.84%114.29%
Joonwoo Park2131.34%114.29%
Paul Menage1319.40%114.29%
Li Zefan811.94%342.86%
Tejun Heo34.48%114.29%
Total67100.00%7100.00%

/* * Return in *pmask the portion of a cpusets's mems_allowed that * are online, with memory. If none are online with memory, walk * up the cpuset hierarchy until we find one that does have some * online mems. The top cpuset always has some mems online. * * One way or another, we guarantee to return some non-empty subset * of node_states[N_MEMORY]. * * Call with callback_lock or cpuset_mutex held. */
static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) { while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY])) cs = parent_cs(cs); nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage2446.15%120.00%
Paul Jackson2140.38%120.00%
Tejun Heo35.77%120.00%
Li Zefan23.85%120.00%
Lai Jiangshan23.85%120.00%
Total52100.00%5100.00%

/* * update task's spread flag if cpuset's page/slab spread flag is set * * Call with callback_lock or cpuset_mutex held. */
static void cpuset_update_task_spread_flag(struct cpuset *cs, struct task_struct *tsk) { if (is_spread_page(cs)) task_set_spread_page(tsk); else task_clear_spread_page(tsk); if (is_spread_slab(cs)) task_set_spread_slab(tsk); else task_clear_spread_slab(tsk); }

Contributors

PersonTokensPropCommitsCommitProp
Miao Xie4076.92%150.00%
Li Zefan1223.08%150.00%
Total52100.00%2100.00%

/* * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q? * * One cpuset is a subset of another if all its allowed CPUs and * Memory Nodes are a subset of the other, and its exclusive flags * are only set if the other's are set. Call holding cpuset_mutex. */
static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) { return cpumask_subset(p->cpus_allowed, q->cpus_allowed) && nodes_subset(p->mems_allowed, q->mems_allowed) && is_cpu_exclusive(p) <= is_cpu_exclusive(q) && is_mem_exclusive(p) <= is_mem_exclusive(q); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage5793.44%133.33%
Paul Jackson34.92%133.33%
Li Zefan11.64%133.33%
Total61100.00%3100.00%

/** * alloc_trial_cpuset - allocate a trial cpuset * @cs: the cpuset that the trial cpuset duplicates */
static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) { struct cpuset *trial; trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL); if (!trial) return NULL; if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) goto free_cs; if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL)) goto free_cpus; cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); cpumask_copy(trial->effective_cpus, cs->effective_cpus); return trial; free_cpus: free_cpumask_var(trial->cpus_allowed); free_cs: kfree(trial); return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan117100.00%3100.00%
Total117100.00%3100.00%

/** * free_trial_cpuset - free the trial cpuset * @trial: the trial cpuset to be freed */
static void free_trial_cpuset(struct cpuset *trial) { free_cpumask_var(trial->effective_cpus); free_cpumask_var(trial->cpus_allowed); kfree(trial); }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan30100.00%3100.00%
Total30100.00%3100.00%

/* * validate_change() - Used to validate that any proposed cpuset change * follows the structural rules for cpusets. * * If we replaced the flag and mask values of the current cpuset * (cur) with those values in the trial cpuset (trial), would * our various subset and exclusive rules still be valid? Presumes * cpuset_mutex held. * * 'cur' is the address of an actual, in-use cpuset. Operations * such as list traversal that depend on the actual address of the * cpuset in the list must use cur below, not trial. * * 'trial' is the address of bulk structure copy of cur, with * perhaps one or more of the fields cpus_allowed, mems_allowed, * or flags changed to new, trial values. * * Return 0 if valid, -errno if not. */
static int validate_change(struct cpuset *cur, struct cpuset *trial) { struct cgroup_subsys_state *css; struct cpuset *c, *par; int ret; rcu_read_lock(); /* Each of our child cpusets must be a subset of us */ ret = -EBUSY; cpuset_for_each_child(c, css, cur) if (!is_cpuset_subset(c, trial)) goto out; /* Remaining checks don't apply to root cpuset */ ret = 0; if (cur == &top_cpuset) goto out; par = parent_cs(cur); /* On legacy hiearchy, we must be a subset of our parent cpuset. */ ret = -EACCES; if (!is_in_v2_mode() && !is_cpuset_subset(trial, par)) goto out; /* * If either I or some sibling (!= me) is exclusive, we can't * overlap */ ret = -EINVAL; cpuset_for_each_child(c, css, par) { if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && c != cur && cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) goto out; if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && c != cur && nodes_intersects(trial->mems_allowed, c->mems_allowed)) goto out; } /* * Cpusets with tasks - existing or newly being attached - can't * be changed to have empty cpus_allowed or mems_allowed. */ ret = -ENOSPC; if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) { if (!cpumask_empty(cur->cpus_allowed) && cpumask_empty(trial->cpus_allowed)) goto out; if (!nodes_empty(cur->mems_allowed) && nodes_empty(trial->mems_allowed)) goto out; } /* * We can't shrink if we won't have enough room for SCHED_DEADLINE * tasks. */ ret = -EBUSY; if (is_cpu_exclusive(cur) && !cpuset_cpumask_can_shrink(cur->cpus_allowed, trial->cpus_allowed)) goto out; ret = 0; out: rcu_read_unlock(); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson10234.93%317.65%
Tejun Heo8027.40%529.41%
Paul Menage4615.75%211.76%
Li Zefan3210.96%423.53%
Juri Lelli289.59%15.88%
Dave Hansen20.68%15.88%
Waiman Long20.68%15.88%
Total292100.00%17100.00%

#ifdef CONFIG_SMP /* * Helper routine for generate_sched_domains(). * Do cpusets a, b have overlapping effective cpus_allowed masks? */
static int cpusets_overlap(struct cpuset *a, struct cpuset *b) { return cpumask_intersects(a->effective_cpus, b->effective_cpus); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson2589.29%133.33%
Li Zefan310.71%266.67%
Total28100.00%3100.00%


static void update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) { if (dattr->relax_domain_level < c->relax_domain_level) dattr->relax_domain_level = c->relax_domain_level; return; }

Contributors

PersonTokensPropCommitsCommitProp
Hidetoshi Seto35100.00%1100.00%
Total35100.00%1100.00%


static void update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *root_cs) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { /* skip the whole subtree if @cp doesn't have any CPU */ if (cpumask_empty(cp->cpus_allowed)) { pos_css = css_rightmost_descendant(pos_css); continue; } if (is_sched_load_balance(cp)) update_domain_attr(dattr, cp); } rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
Lai Jiangshan5167.11%120.00%
Tejun Heo2431.58%360.00%
Li Zefan11.32%120.00%
Total76100.00%5100.00%

/* Must be called with cpuset_mutex held. */
static inline int nr_cpusets(void) { /* jump label reference count + the top-level cpuset */ return static_key_count(&cpusets_enabled_key.key) + 1; }

Contributors

PersonTokensPropCommitsCommitProp
Paolo Bonzini21100.00%1100.00%
Total21100.00%1100.00%

/* * generate_sched_domains() * * This function builds a partial partition of the systems CPUs * A 'partial partition' is a set of non-overlapping subsets whose * union is a subset of that set. * The output of this function needs to be passed to kernel/sched/core.c * partition_sched_domains() routine, which will rebuild the scheduler's * load balancing domains (sched domains) as specified by that partial * partition. * * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt * for a background explanation of this. * * Does not return errors, on the theory that the callers of this * routine would rather not worry about failures to rebuild sched * domains when operating in the severe memory shortage situations * that could cause allocation failures below. * * Must be called with cpuset_mutex held. * * The three key local variables below are: * q - a linked-list queue of cpuset pointers, used to implement a * top-down scan of all cpusets. This scan loads a pointer * to each cpuset marked is_sched_load_balance into the * array 'csa'. For our purposes, rebuilding the schedulers * sched domains, we can ignore !is_sched_load_balance cpusets. * csa - (for CpuSet Array) Array of pointers to all the cpusets * that need to be load balanced, for convenient iterative * access by the subsequent code that finds the best partition, * i.e the set of domains (subsets) of CPUs such that the * cpus_allowed of every cpuset marked is_sched_load_balance * is a subset of one of these domains, while there are as * many such domains as possible, each as small as possible. * doms - Conversion of 'csa' to an array of cpumasks, for passing to * the kernel/sched/core.c routine partition_sched_domains() in a * convenient format, that can be easily compared to the prior * value to determine what partition elements (sched domains) * were changed (added or removed.) * * Finding the best partition (set of domains): * The triple nested loops below over i, j, k scan over the * load balanced cpusets (using the array of cpuset pointers in * csa[]) looking for pairs of cpusets that have overlapping * cpus_allowed, but which don't have the same 'pn' partition * number and gives them in the same partition number. It keeps * looping on the 'restart' label until it can no longer find * any such pairs. * * The union of the cpus_allowed masks from the set of * all cpusets having the same 'pn' value then form the one * element of the partition (one sched domain) to be passed to * partition_sched_domains(). */
static int generate_sched_domains(cpumask_var_t **domains, struct sched_domain_attr **attributes) { struct cpuset *cp; /* scans q */ struct cpuset **csa; /* array of all cpuset ptrs */ int csn; /* how many cpuset ptrs in csa so far */ int i, j, k; /* indices for partition finding loops */ cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ struct sched_domain_attr *dattr; /* attributes for custom domains */ int ndoms = 0; /* number of sched domains in result */ int nslot; /* next empty doms[] struct cpumask slot */ struct cgroup_subsys_state *pos_css; doms = NULL; dattr = NULL; csa = NULL; /* Special case for the 99% of systems with one, full, sched domain */ if (is_sched_load_balance(&top_cpuset)) { ndoms = 1; doms = alloc_sched_domains(ndoms); if (!doms) goto done; dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); if (dattr) { *dattr = SD_ATTR_INIT; update_domain_attr_tree(dattr, &top_cpuset); } cpumask_and(doms[0], top_cpuset.effective_cpus, housekeeping_cpumask(HK_FLAG_DOMAIN)); goto done; } csa = kmalloc(nr_cpusets() * sizeof(cp), GFP_KERNEL); if (!csa) goto done; csn = 0; rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) { if (cp == &top_cpuset) continue; /* * Continue traversing beyond @cp iff @cp has some CPUs and * isn't load balancing. The former is obvious. The * latter: All child cpusets contain a subset of the * parent's cpus, so just skip them, and then we call * update_domain_attr_tree() to calc relax_domain_level of * the corresponding sched domain. */ if (!cpumask_empty(cp->cpus_allowed) && !(is_sched_load_balance(cp) && cpumask_intersects(cp->cpus_allowed, housekeeping_cpumask(HK_FLAG_DOMAIN)))) continue; if (is_sched_load_balance(cp)) csa[csn++] = cp; /* skip @cp's subtree */ pos_css = css_rightmost_descendant(pos_css); } rcu_read_unlock(); for (i = 0; i < csn; i++) csa[i]->pn = i; ndoms = csn; restart: /* Find the best partition (set of sched domains) */ for (i = 0; i < csn; i++) { struct cpuset *a = csa[i]; int apn = a->pn; for (j = 0; j < csn; j++) { struct cpuset *b = csa[j]; int bpn = b->pn; if (apn != bpn && cpusets_overlap(a, b)) { for (k = 0; k < csn; k++) { struct cpuset *c = csa[k]; if (c->pn == bpn) c->pn = apn; } ndoms--; /* one less element */ goto restart; } } } /* * Now we know how many domains to create. * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. */ doms = alloc_sched_domains(ndoms); if (!doms) goto done; /* * The rest of the code, including the scheduler, can deal with * dattr==NULL case. No need to abort if alloc fails. */ dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL); for (nslot = 0, i = 0; i < csn; i++) { struct cpuset *a = csa[i]; struct cpumask *dp; int apn = a->pn; if (apn < 0) { /* Skip completed partitions */ continue; } dp = doms[nslot]; if (nslot == ndoms) { static int warnings = 10; if (warnings) { pr_warn("rebuild_sched_domains confused: nslot %d, ndoms %d, csn %d, i %d, apn %d\n", nslot, ndoms, csn, i, apn); warnings--; } continue; } cpumask_clear(dp); if (dattr) *(dattr + nslot) = SD_ATTR_INIT; for (j = i; j < csn; j++) { struct cpuset *b = csa[j]; if (apn == b->pn) { cpumask_or(dp, dp, b->effective_cpus); cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN)); if (dattr) update_domain_attr_tree(dattr + nslot, b); /* Done with this partition */ b->pn = -1; } } nslot++; } BUG_ON(nslot != ndoms); done: kfree(csa); /* * Fallback to the default domain if kmalloc() failed. * See comments in partition_sched_domains(). */ if (doms == NULL) ndoms = 1; *domains = doms; *attributes = dattr; return ndoms; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson42361.39%14.35%
Hidetoshi Seto7510.89%14.35%
Maksim Krasnyanskiy588.42%14.35%
Tejun Heo355.08%417.39%
Li Zefan273.92%626.09%
Rik Van Riel202.90%14.35%
Rusty Russell142.03%14.35%
Frédéric Weisbecker121.74%14.35%
Lai Jiangshan101.45%28.70%
Miao Xie60.87%14.35%
Paul Menage30.44%14.35%
Mel Gorman20.29%14.35%
Ingo Molnar20.29%14.35%
Fabian Frederick20.29%14.35%
Total689100.00%23100.00%

/* * Rebuild scheduler domains. * * If the flag 'sched_load_balance' of any cpuset with non-empty * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset * which has that flag enabled, or if any cpuset with a non-empty * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * * Call with cpuset_mutex held. Takes get_online_cpus(). */
static void rebuild_sched_domains_locked(void) { struct sched_domain_attr *attr; cpumask_var_t *doms; int ndoms; lockdep_assert_held(&cpuset_mutex); get_online_cpus(); /* * We have raced with CPU hotplug. Don't do anything to avoid * passing doms with offlined cpu to partition_sched_domains(). * Anyways, hotplug work item will rebuild sched domains. */ if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) goto out; /* Generate domain masks and attrs */ ndoms = generate_sched_domains(&doms, &attr); /* Have scheduler rebuild the domains */ partition_sched_domains(ndoms, doms, attr); out: put_online_cpus(); }

Contributors

PersonTokensPropCommitsCommitProp
Maksim Krasnyanskiy3143.06%110.00%
Li Zefan1825.00%220.00%
Paul Jackson912.50%110.00%
Tejun Heo79.72%220.00%
Paul Menage34.17%110.00%
Gautham R. Shenoy22.78%110.00%
Hidetoshi Seto11.39%110.00%
Rusty Russell11.39%110.00%
Total72100.00%10100.00%

#else /* !CONFIG_SMP */
static void rebuild_sched_domains_locked(void) { }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage571.43%150.00%
Tejun Heo228.57%150.00%
Total7100.00%2100.00%

#endif /* CONFIG_SMP */
void rebuild_sched_domains(void) { mutex_lock(&cpuset_mutex); rebuild_sched_domains_locked(); mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Maksim Krasnyanskiy1150.00%116.67%
Tejun Heo836.36%233.33%
Paul Jackson29.09%233.33%
Paul Menage14.55%116.67%
Total22100.00%6100.00%

/** * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed * * Iterate through each task of @cs updating its cpus_allowed to the * effective cpuset's. As this function is called with cpuset_mutex held, * cpuset membership stays stable. */
static void update_tasks_cpumask(struct cpuset *cs) { struct css_task_iter it; struct task_struct *task; css_task_iter_start(&cs->css, 0, &it); while ((task = css_task_iter_next(&it))) set_cpus_allowed_ptr(task, cs->effective_cpus); css_task_iter_end(&it); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo4473.33%440.00%
Li Zefan610.00%330.00%
Miao Xie610.00%110.00%
Cliff Wickman35.00%110.00%
Adrian Bunk11.67%110.00%
Total60100.00%10100.00%

/* * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree * @cs: the cpuset to consider * @new_cpus: temp variable for calculating new effective_cpus * * When congifured cpumask is changed, the effective cpumasks of this cpuset * and all its descendants need to be updated. * * On legacy hierachy, effective_cpus will be the same with cpu_allowed. * * Called with cpuset_mutex held */
static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; bool need_rebuild_sched_domains = false; rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, cs) { struct cpuset *parent = parent_cs(cp); cpumask_and(new_cpus, cp->cpus_allowed, parent->effective_cpus); /* * If it becomes empty, inherit the effective mask of the * parent, which is guaranteed to have some CPUs. */ if (is_in_v2_mode() && cpumask_empty(new_cpus)) cpumask_copy(new_cpus, parent->effective_cpus); /* Skip the whole subtree if the cpumask remains the same. */ if (cpumask_equal(new_cpus, cp->effective_cpus)) { pos_css = css_rightmost_descendant(pos_css); continue; } if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); spin_lock_irq(&callback_lock); cpumask_copy(cp->effective_cpus, new_cpus); spin_unlock_irq(&callback_lock); WARN_ON(!is_in_v2_mode() && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); update_tasks_cpumask(cp); /* * If the effective cpumask of any non-empty cpuset is changed, * we need to rebuild sched domains. */ if (!cpumask_empty(cp->cpus_allowed) && is_sched_load_balance(cp)) need_rebuild_sched_domains = true; rcu_read_lock(); css_put(&cp->css); } rcu_read_unlock(); if (need_rebuild_sched_domains) rebuild_sched_domains_locked(); }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan18085.71%535.71%
Tejun Heo115.24%321.43%
Paul Jackson73.33%17.14%
Vladimir Davydov41.90%17.14%
Waiman Long41.90%17.14%
Paul Menage31.43%214.29%
Cliff Wickman10.48%17.14%
Total210100.00%14100.00%

/** * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it * @cs: the cpuset to consider * @trialcs: trial cpuset * @buf: buffer of cpu numbers written to this cpuset */
static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, const char *buf) { int retval; /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ if (cs == &top_cpuset) return -EACCES; /* * An empty cpus_allowed is ok only if the cpuset has no tasks. * Since cpulist_parse() fails on an empty mask, we special case * that parsing. The validate_change() call ensures that cpusets * with tasks have cpus. */ if (!*buf) { cpumask_clear(trialcs->cpus_allowed); } else { retval = cpulist_parse(buf, trialcs->cpus_allowed); if (retval < 0) return retval; if (!cpumask_subset(trialcs->cpus_allowed, top_cpuset.cpus_allowed)) return -EINVAL; } /* Nothing to do if the cpus didn't change */ if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) return 0; retval = validate_change(cs, trialcs); if (retval < 0) return retval; spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); spin_unlock_irq(&callback_lock); /* use trialcs->cpus_allowed as a temp variable */ update_cpumasks_hier(cs, trialcs->cpus_allowed); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage5533.54%314.29%
Paul Jackson4326.22%523.81%
Li Zefan4125.00%628.57%
Lai Jiangshan95.49%14.76%
David P. Quigley63.66%14.76%
Vladimir Davydov42.44%14.76%
Cliff Wickman31.83%14.76%
David Howells10.61%14.76%
Miao Xie10.61%14.76%
Rusty Russell10.61%14.76%
Total164100.00%21100.00%

/* * Migrate memory region from one set of nodes to another. This is * performed asynchronously as it can be called from process migration path * holding locks involved in process management. All mm migrations are * performed in the queued order and can be waited for by flushing * cpuset_migrate_mm_wq. */ struct cpuset_migrate_mm_work { struct work_struct work; struct mm_struct *mm; nodemask_t from; nodemask_t to; };
static void cpuset_migrate_mm_workfn(struct work_struct *work) { struct cpuset_migrate_mm_work *mwork = container_of(work, struct cpuset_migrate_mm_work, work); /* on a wq worker, no need to worry about %current's mems_allowed */ do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL); mmput(mwork->mm); kfree(mwork); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo58100.00%1100.00%
Total58100.00%1100.00%


static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to) { struct cpuset_migrate_mm_work *mwork; mwork = kzalloc(sizeof(*mwork), GFP_KERNEL); if (mwork) { mwork->mm = mm; mwork->from = *from; mwork->to = *to; INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn); queue_work(cpuset_migrate_mm_wq, &mwork->work); } else { mmput(mm); } }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo5559.14%120.00%
Paul Menage2830.11%120.00%
Paul Jackson1010.75%360.00%
Total93100.00%5100.00%


static void cpuset_post_attach(void) { flush_workqueue(cpuset_migrate_mm_wq); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo1184.62%250.00%
Li Zefan17.69%125.00%
Miao Xie17.69%125.00%
Total13100.00%4100.00%

/* * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy * @tsk: the task to change * @newmems: new nodes that the task will be set * * We use the mems_allowed_seq seqlock to safely update both tsk->mems_allowed * and rebind an eventual tasks' mempolicy. If the task is allocating in * parallel, it might temporarily see an empty intersection, which results in * a seqlock check and retry before OOM or allocation failure. */
static void cpuset_change_task_nodemask(struct task_struct *tsk, nodemask_t *newmems) { task_lock(tsk); local_irq_disable(); write_seqcount_begin(&tsk->mems_allowed_seq); nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); mpol_rebind_task(tsk, newmems); tsk->mems_allowed = *newmems; write_seqcount_end(&tsk->mems_allowed_seq); local_irq_enable(); task_unlock(tsk); }

Contributors

PersonTokensPropCommitsCommitProp
Miao Xie5168.00%240.00%
Mel Gorman1621.33%120.00%
Peter Zijlstra68.00%120.00%
Paul Jackson22.67%120.00%
Total75100.00%5100.00%

static void *cpuset_being_rebound; /** * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. * @cs: the cpuset in which each task's mems_allowed mask needs to be changed * * Iterate through each task of @cs updating its mems_allowed to the * effective cpuset's. As this function is called with cpuset_mutex held, * cpuset membership stays stable. */
static void update_tasks_nodemask(struct cpuset *cs) { static nodemask_t newmems; /* protected by cpuset_mutex */ struct css_task_iter it; struct task_struct *task; cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ guarantee_online_mems(cs, &newmems); /* * The mpol_rebind_mm() call takes mmap_sem, which we couldn't * take while holding tasklist_lock. Forks can happen - the * mpol_dup() cpuset_being_rebound check will catch such forks, * and rebind their vma mempolicies too. Because we still hold * the global cpuset_mutex, we know that no other rebind effort * will be contending for the global variable cpuset_being_rebound. * It's ok if we rebind the same mm twice; mpol_rebind_mm() * is idempotent. Also migrate pages in each mm to new nodes. */ css_task_iter_start(&cs->css, 0, &it); while ((task = css_task_iter_next(&it))) { struct mm_struct *mm; bool migrate; cpuset_change_task_nodemask(task, &newmems); mm = get_task_mm(task); if (!mm) continue; migrate = is_memory_migrate(cs); mpol_rebind_mm(mm, &cs->mems_allowed); if (migrate) cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); else mmput(mm); } css_task_iter_end(&it); /* * All the tasks' nodemasks have been updated, update * cs->old_mems_allowed. */ cs->old_mems_allowed = newmems; /* We're done rebinding vmas to this cpuset's new mems_allowed. */ cpuset_being_rebound = NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo6441.83%541.67%
Paul Menage3321.57%18.33%
Paul Jackson2918.95%18.33%
Li Zefan2113.73%325.00%
Miao Xie63.92%216.67%
Total153100.00%12100.00%

/* * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree * @cs: the cpuset to consider * @new_mems: a temp variable for calculating new effective_mems * * When configured nodemask is changed, the effective nodemasks of this cpuset * and all its descendants need to be updated. * * On legacy hiearchy, effective_mems will be the same with mems_allowed. * * Called with cpuset_mutex held */
static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, cs) { struct cpuset *parent = parent_cs(cp); nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); /* * If it becomes empty, inherit the effective mask of the * parent, which is guaranteed to have some MEMs. */ if (is_in_v2_mode() && nodes_empty(*new_mems)) *new_mems = parent->effective_mems; /* Skip the whole subtree if the nodemask remains the same. */ if (nodes_equal(*new_mems, cp->effective_mems)) { pos_css = css_rightmost_descendant(pos_css); continue; } if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); spin_lock_irq(&callback_lock); cp->effective_mems = *new_mems; spin_unlock_irq(&callback_lock); WARN_ON(!is_in_v2_mode() && !nodes_equal(cp->mems_allowed, cp->effective_mems)); update_tasks_nodemask(cp); rcu_read_lock(); css_put(&cp->css); } rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan15487.50%646.15%
Tejun Heo116.25%323.08%
Vladimir Davydov42.27%17.69%
Waiman Long42.27%17.69%
Paul Menage21.14%17.69%
Paul Jackson10.57%17.69%
Total176100.00%13100.00%

/* * Handle user request to change the 'mems' memory placement * of a cpuset. Needs to validate the request, update the * cpusets mems_allowed, and for each task in the cpuset, * update mems_allowed and rebind task's mempolicy and any vma * mempolicies and if the cpuset is marked 'memory_migrate', * migrate the tasks pages to the new memory. * * Call with cpuset_mutex held. May take callback_lock during call. * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, * lock each such tasks mm->mmap_sem, scan its vma's and rebind * their mempolicies to the cpusets new mems_allowed. */
static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, const char *buf) { int retval; /* * top_cpuset.mems_allowed tracks node_stats[N_MEMORY]; * it's read-only */ if (cs == &top_cpuset) { retval = -EACCES; goto done; } /* * An empty mems_allowed is ok iff there are no tasks in the cpuset. * Since nodelist_parse() fails on an empty mask, we special case * that parsing. The validate_change() call ensures that cpusets * with tasks have memory. */ if (!*buf) { nodes_clear(trialcs->mems_allowed); } else { retval = nodelist_parse(buf, trialcs->mems_allowed); if (retval < 0) goto done; if (!nodes_subset(trialcs->mems_allowed, top_cpuset.mems_allowed)) { retval = -EINVAL; goto done; } } if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) { retval = 0; /* Too easy - nothing to do */ goto done; } retval = validate_change(cs, trialcs); if (retval < 0) goto done; spin_lock_irq(&callback_lock); cs->mems_allowed = trialcs->mems_allowed; spin_unlock_irq(&callback_lock); /* use trialcs->mems_allowed as a temp variable */ update_nodemasks_hier(cs, &trialcs->mems_allowed); done: return retval; }

Contributors

PersonTokensPropCommitsCommitProp
Miao Xie15585.16%222.22%
Li Zefan2111.54%444.44%
Vladimir Davydov42.20%111.11%
Lai Jiangshan10.55%111.11%
Alban Crequy10.55%111.11%
Total182100.00%9100.00%


int current_cpuset_is_being_rebound(void) { int ret; rcu_read_lock(); ret = task_cs(current) == cpuset_being_rebound; rcu_read_unlock(); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Gu Zheng1450.00%133.33%
Paul Jackson828.57%133.33%
Paul Menage621.43%133.33%
Total28100.00%3100.00%


static int update_relax_domain_level(struct cpuset *cs, s64 val) { #ifdef CONFIG_SMP if (val < -1 || val >= sched_domain_level_max) return -EINVAL; #endif if (val != cs->relax_domain_level) { cs->relax_domain_level = val; if (!cpumask_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) rebuild_sched_domains_locked(); } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Hidetoshi Seto3954.93%112.50%
Li Zefan2332.39%337.50%
Paul Menage79.86%225.00%
Peter Zijlstra11.41%112.50%
Tejun Heo11.41%112.50%
Total71100.00%8100.00%

/** * update_tasks_flags - update the spread flags of tasks in the cpuset. * @cs: the cpuset in which each task's spread flags needs to be changed * * Iterate through each task of @cs updating its spread flags. As this * function is called with cpuset_mutex held, cpuset membership stays * stable. */
static void update_tasks_flags(struct cpuset *cs) { struct css_task_iter it; struct task_struct *task; css_task_iter_start(&cs->css, 0, &it); while ((task = css_task_iter_next(&it))) cpuset_update_task_spread_flag(cs, task); css_task_iter_end(&it); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo4272.41%480.00%
Miao Xie1627.59%120.00%
Total58100.00%5100.00%

/* * update_flag - read a 0 or a 1 in a file and update associated flag * bit: the bit to update (see cpuset_flagbits_t) * cs: the cpuset to update * turning_on: whether the flag is being set or cleared * * Call with cpuset_mutex held. */
static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on) { struct cpuset *trialcs; int balance_flag_changed; int spread_flag_changed; int err; trialcs = alloc_trial_cpuset(cs); if (!trialcs) return -ENOMEM; if (turning_on) set_bit(bit, &trialcs->flags); else clear_bit(bit, &trialcs->flags); err = validate_change(cs, trialcs); if (err < 0) goto out; balance_flag_changed = (is_sched_load_balance(cs) != is_sched_load_balance(trialcs)); spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) || (is_spread_page(cs) != is_spread_page(trialcs))); spin_lock_irq(&callback_lock); cs->flags = trialcs->flags; spin_unlock_irq(&callback_lock); if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) rebuild_sched_domains_locked(); if (spread_flag_changed) update_tasks_flags(cs); out: free_trial_cpuset(trialcs); return err; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage5429.03%216.67%
Paul Jackson5127.42%433.33%
Miao Xie4222.58%18.33%
Li Zefan2915.59%216.67%
Rakib Mullick52.69%18.33%
Vladimir Davydov42.15%18.33%
Tejun Heo10.54%18.33%
Total186100.00%12100.00%

/* * Frequency meter - How fast is some event occurring? * * These routines manage a digitally filtered, constant time based, * event frequency meter. There are four routines: * fmeter_init() - initialize a frequency meter. * fmeter_markevent() - called each time the event happens. * fmeter_getrate() - returns the recent rate of such events. * fmeter_update() - internal routine used to update fmeter. * * A common data structure is passed to each of these routines, * which is used to keep track of the state required to manage the * frequency meter and its digital filter. * * The filter works on the number of events marked per unit time. * The filter is single-pole low-pass recursive (IIR). The time unit * is 1 second. Arithmetic is done using 32-bit integers scaled to * simulate 3 decimal digits of precision (multiplied by 1000). * * With an FM_COEF of 933, and a time base of 1 second, the filter * has a half-life of 10 seconds, meaning that if the events quit * happening, then the rate returned from the fmeter_getrate() * will be cut in half each 10 seconds, until it converges to zero. * * It is not worth doing a real infinitely recursive filter. If more * than FM_MAXTICKS ticks have elapsed since the last filter event, * just compute FM_MAXTICKS ticks worth, by which point the level * will be stable. * * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid * arithmetic overflow in the fmeter_update() routine. * * Given the simple 32 bit integer arithmetic used, this meter works * best for reporting rates between one per millisecond (msec) and * one per 32 (approx) seconds. At constant rates faster than one * per msec it maxes out at values just under 1,000,000. At constant * rates between one per msec, and one per second it will stabilize * to a value N*1000, where N is the rate of events per second. * At constant rates between one per second and one per 32 seconds, * it will be choppy, moving up on the seconds that have an event, * and then decaying until the next event. At rates slower than * about one in 32 seconds, it decays all the way back to zero between * each event. */ #define FM_COEF 933 /* coefficient for half-life of 10 secs */ #define FM_MAXTICKS ((u32)99) /* useless computing more ticks than this */ #define FM_MAXCNT 1000000 /* limit cnt to avoid overflow */ #define FM_SCALE 1000 /* faux fixed point scale */ /* Initialize a frequency meter */
static void fmeter_init(struct fmeter *fmp) { fmp->cnt = 0; fmp->val = 0; fmp->time = 0; spin_lock_init(&fmp->lock); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage2670.27%133.33%
Paul Jackson1129.73%266.67%
Total37100.00%3100.00%

/* Internal meter update - process cnt events and update value */
static void fmeter_update(struct fmeter *fmp) { time64_t now; u32 ticks; now = ktime_get_seconds(); ticks = now - fmp->time; if (ticks == 0) return; ticks = min(FM_MAXTICKS, ticks); while (ticks-- > 0) fmp->val = (FM_COEF * fmp->val) / FM_SCALE; fmp->time = now; fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE; fmp->cnt = 0; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage6769.07%133.33%
Paul Jackson1919.59%133.33%
Arnd Bergmann1111.34%133.33%
Total97100.00%3100.00%

/* Process any previous ticks, then bump cnt by one (times scale). */
static void fmeter_markevent(struct fmeter *fmp) { spin_lock(&fmp->lock); fmeter_update(fmp); fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE); spin_unlock(&fmp->lock); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage2757.45%150.00%
Paul Jackson2042.55%150.00%
Total47100.00%2100.00%

/* Process any previous ticks, then return current value. */
static int fmeter_getrate(struct fmeter *fmp) { int val; spin_lock(&fmp->lock); fmeter_update(fmp); val = fmp->val; spin_unlock(&fmp->lock); return val; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage2863.64%150.00%
Paul Jackson1636.36%150.00%
Total44100.00%2100.00%

static struct cpuset *cpuset_attach_old_cs; /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
static int cpuset_can_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct cpuset *cs; struct task_struct *task; int ret; /* used later by cpuset_attach() */ cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css)); cs = css_cs(css); mutex_lock(&cpuset_mutex); /* allow moving tasks into an empty cpuset if on default hierarchy */ ret = -ENOSPC; if (!is_in_v2_mode() && (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) goto out_unlock; cgroup_taskset_for_each(task, css, tset) { ret = task_can_attach(task, cs->cpus_allowed); if (ret) goto out_unlock; ret = security_task_setscheduler(task); if (ret) goto out_unlock; } /* * Mark attach is in progress. This makes validate_change() fail * changes which zero cpus/mems_allowed. */ cs->attach_in_progress++; ret = 0; out_unlock: mutex_unlock(&cpuset_mutex); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo9664.00%847.06%
Paul Menage149.33%15.88%
Paul Jackson149.33%15.88%
Juri Lelli96.00%15.88%
Ben Blum74.67%211.76%
Li Zefan53.33%211.76%
David Rientjes32.00%15.88%
Waiman Long21.33%15.88%
Total150100.00%17100.00%


static void cpuset_cancel_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct cpuset *cs; cgroup_taskset_first(tset, &css); cs = css_cs(css); mutex_lock(&cpuset_mutex); css_cs(css)->attach_in_progress--; mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo4580.36%466.67%
Ben Blum1119.64%233.33%
Total56100.00%6100.00%

/* * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach() * but we can't allocate it dynamically there. Define it global and * allocate from cpuset_init(). */ static cpumask_var_t cpus_attach;
static void cpuset_attach(struct cgroup_taskset *tset) { /* static buf protected by cpuset_mutex */ static nodemask_t cpuset_attach_nodemask_to; struct task_struct *task; struct task_struct *leader; struct cgroup_subsys_state *css; struct cpuset *cs; struct cpuset *oldcs = cpuset_attach_old_cs; cgroup_taskset_first(tset, &css); cs = css_cs(css); mutex_lock(&cpuset_mutex); /* prepare for attach */ if (cs == &top_cpuset) cpumask_copy(cpus_attach, cpu_possible_mask); else guarantee_online_cpus(cs, cpus_attach); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); cgroup_taskset_for_each(task, css, tset) { /* * can_attach beforehand should guarantee that this doesn't * fail. TODO: have a better way to handle failure here */ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); cpuset_update_task_spread_flag(cs, task); } /* * Change mm for all threadgroup leaders. This is expensive and may * sleep and should be moved outside migration path proper. */ cpuset_attach_nodemask_to = cs->effective_mems; cgroup_taskset_for_each_leader(leader, css, tset) { struct mm_struct *mm = get_task_mm(leader); if (mm) { mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); /* * old_mems_allowed is the same with mems_allowed * here, except if this task is being moved * automatically due to hotplug. In that case * @mems_allowed has been updated and is empty, so * @old_mems_allowed is the right nodesets that we * migrate mm from. */ if (is_memory_migrate(cs)) cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, &cpuset_attach_nodemask_to); else mmput(mm); } } cs->old_mems_allowed = cpuset_attach_nodemask_to; cs->attach_in_progress--; if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo12655.02%1150.00%
Paul Menage3716.16%14.55%
Ben Blum3414.85%29.09%
Li Zefan219.17%731.82%
Paul Jackson114.80%14.55%
Total229100.00%22100.00%

/* The various types of files and directories in a cpuset file system */ typedef enum { FILE_MEMORY_MIGRATE, FILE_CPULIST, FILE_MEMLIST, FILE_EFFECTIVE_CPULIST, FILE_EFFECTIVE_MEMLIST, FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, FILE_MEM_HARDWALL, FILE_SCHED_LOAD_BALANCE, FILE_SCHED_RELAX_DOMAIN_LEVEL, FILE_MEMORY_PRESSURE_ENABLED, FILE_MEMORY_PRESSURE, FILE_SPREAD_PAGE, FILE_SPREAD_SLAB, } cpuset_filetype_t;
static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, u64 val) { struct cpuset *cs = css_cs(css); cpuset_filetype_t type = cft->private; int retval = 0; mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) { retval = -ENODEV; goto out_unlock; } switch (type) { case FILE_CPU_EXCLUSIVE: retval = update_flag(CS_CPU_EXCLUSIVE, cs, val); break; case FILE_MEM_EXCLUSIVE: retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); break; case FILE_MEM_HARDWALL: retval = update_flag(CS_MEM_HARDWALL, cs, val); break; case FILE_SCHED_LOAD_BALANCE: retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); break; case FILE_MEMORY_MIGRATE: retval = update_flag(CS_MEMORY_MIGRATE, cs, val); break; case FILE_MEMORY_PRESSURE_ENABLED: cpuset_memory_pressure_enabled = !!val; break; case FILE_SPREAD_PAGE: retval = update_flag(CS_SPREAD_PAGE, cs, val); break; case FILE_SPREAD_SLAB: retval = update_flag(CS_SPREAD_SLAB, cs, val); break; default: retval = -EINVAL; break; } out_unlock: mutex_unlock(&cpuset_mutex); return retval; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage12762.25%440.00%
Paul Jackson3316.18%220.00%
Tejun Heo2612.75%220.00%
Hidetoshi Seto104.90%110.00%
Li Zefan83.92%110.00%
Total204100.00%10100.00%


static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, s64 val) { struct cpuset *cs = css_cs(css); cpuset_filetype_t type = cft->private; int retval = -ENODEV; mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; switch (type) { case FILE_SCHED_RELAX_DOMAIN_LEVEL: retval = update_relax_domain_level(cs, val); break; default: retval = -EINVAL; break; } out_unlock: mutex_unlock(&cpuset_mutex); return retval; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage7072.92%250.00%
Tejun Heo2627.08%250.00%
Total96100.00%4100.00%

/* * Common handling for a write to a "cpus" or "mems" file. */
static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cpuset *cs = css_cs(of_css(of)); struct cpuset *trialcs; int retval = -ENODEV; buf = strstrip(buf); /* * CPU or memory hotunplug may leave @cs w/o any execution * resources, in which case the hotplug code asynchronously updates * configuration and transfers all tasks to the nearest ancestor * which can execute. * * As writes to "cpus" or "mems" may restore @cs's execution * resources, wait for the previously scheduled operations before * proceeding, so that we don't end up keep removing tasks added * after execution capability is restored. * * cpuset_hotplug_work calls back into cgroup core via * cgroup_transfer_tasks() and waiting for it from a cgroupfs * operation like this one can lead to a deadlock through kernfs * active_ref protection. Let's break the protection. Losing the * protection is okay as we check whether @cs is online after * grabbing cpuset_mutex anyway. This only happens on the legacy * hierarchies. */ css_get(&cs->css); kernfs_break_active_protection(of->kn); flush_work(&cpuset_hotplug_work); mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; trialcs = alloc_trial_cpuset(cs); if (!trialcs) { retval = -ENOMEM; goto out_unlock; } switch (of_cft(of)->private) { case FILE_CPULIST: retval = update_cpumask(cs, trialcs, buf); break; case FILE_MEMLIST: retval = update_nodemask(cs, trialcs, buf); break; default: retval = -EINVAL; break; } free_trial_cpuset(trialcs); out_unlock: mutex_unlock(&cpuset_mutex); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); flush_workqueue(cpuset_migrate_mm_wq); return retval ?: nbytes; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo9447.00%666.67%
Paul Menage6030.00%111.11%
Li Zefan4623.00%222.22%
Total200100.00%9100.00%

/* * These ascii lists should be read in a single call, by using a user * buffer large enough to hold the entire map. If read in smaller * chunks, there is no guarantee of atomicity. Since the display format * used, list of ranges of sequential numbers, is variable length, * and since these maps can change value dynamically, one could read * gibberish by doing partial reads while a list was changing. */
static int cpuset_common_seq_show(struct seq_file *sf, void *v) { struct cpuset *cs = css_cs(seq_css(sf)); cpuset_filetype_t type = seq_cft(sf)->private; int ret = 0; spin_lock_irq(&callback_lock); switch (type) { case FILE_CPULIST: seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); break; case FILE_MEMLIST: seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); break; case FILE_EFFECTIVE_CPULIST: seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus)); break; case FILE_EFFECTIVE_MEMLIST: seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems)); break; default: ret = -EINVAL; } spin_unlock_irq(&callback_lock); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo6142.36%450.00%
Paul Jackson3322.92%112.50%
Li Zefan2416.67%112.50%
Paul Menage2215.28%112.50%
Vladimir Davydov42.78%112.50%
Total144100.00%8100.00%


static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft) { struct cpuset *cs = css_cs(css); cpuset_filetype_t type = cft->private; switch (type) { case FILE_CPU_EXCLUSIVE: return is_cpu_exclusive(cs); case FILE_MEM_EXCLUSIVE: return is_mem_exclusive(cs); case FILE_MEM_HARDWALL: return is_mem_hardwall(cs); case FILE_SCHED_LOAD_BALANCE: return is_sched_load_balance(cs); case FILE_MEMORY_MIGRATE: return is_memory_migrate(cs); case FILE_MEMORY_PRESSURE_ENABLED: return cpuset_memory_pressure_enabled; case FILE_MEMORY_PRESSURE: return fmeter_getrate(&cs->fmeter); case FILE_SPREAD_PAGE: return is_spread_page(cs); case FILE_SPREAD_SLAB: return is_spread_slab(cs); default: BUG(); } /* Unreachable but makes gcc happy */ return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage12093.75%250.00%
Maksim Krasnyanskiy43.12%125.00%
Tejun Heo43.12%125.00%
Total128100.00%4100.00%


static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) { struct cpuset *cs = css_cs(css); cpuset_filetype_t type = cft->private; switch (type) { case FILE_SCHED_RELAX_DOMAIN_LEVEL: return cs->relax_domain_level; default: BUG(); } /* Unrechable but makes gcc happy */ return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Menage4785.45%133.33%
Maksim Krasnyanskiy47.27%133.33%
Tejun Heo47.27%133.33%
Total55100.00%3100.00%

/* * for the common functions, 'private' gives the type of file */ static struct cftype files[] = { { .name = "cpus", .seq_show = cpuset_common_seq_show, .write = cpuset_write_resmask, .max_write_len = (100U + 6 * NR_CPUS), .private = FILE_CPULIST, }, { .name = "mems", .seq_show = cpuset_common_seq_show, .write = cpuset_write_resmask, .max_write_len = (100U + 6 * MAX_NUMNODES), .private = FILE_MEMLIST, }, { .name = "effective_cpus", .seq_show = cpuset_common_seq_show, .private = FILE_EFFECTIVE_CPULIST, }, { .name = "effective_mems", .seq_show = cpuset_common_seq_show, .private = FILE_EFFECTIVE_MEMLIST, }, { .name = "cpu_exclusive", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_CPU_EXCLUSIVE, }, { .name = "mem_exclusive", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEM_EXCLUSIVE, }, { .name = "mem_hardwall", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEM_HARDWALL, }, { .name = "sched_load_balance", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_SCHED_LOAD_BALANCE, }, { .name = "sched_relax_domain_level", .read_s64 = cpuset_read_s64, .write_s64 = cpuset_write_s64, .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, }, { .name = "memory_migrate", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEMORY_MIGRATE, }, { .name = "memory_pressure", .read_u64 = cpuset_read_u64, .private = FILE_MEMORY_PRESSURE, }, { .name = "memory_spread_page", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_SPREAD_PAGE, }, { .name = "memory_spread_slab", .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_SPREAD_SLAB, }, { .name = "memory_pressure_enabled", .flags = CFTYPE_ONLY_ON_ROOT, .read_u64 = cpuset_read_u64, .write_u64 = cpuset_write_u64, .private = FILE_MEMORY_PRESSURE_ENABLED, }, { } /* terminate */ }; /* * cpuset_css_alloc - allocate a cpuset css * cgrp: control group that the new cpuset will be part of */
static struct cgroup_subsys_state * cpuset_css_alloc(struct cgroup_subsys_state *parent_css) { struct cpuset *cs; if (!parent_css) return &top_cpuset.css; cs = kzalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) goto free_cs; if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL)) goto free_cpus; set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); cpumask_clear(cs->cpus_allowed); nodes_clear(cs->mems_allowed); cpumask_clear(cs->effective_cpus); nodes_clear(cs->effective_mems); fmeter_init(&cs->fmeter); cs->relax_domain_level = -1; return &cs->css; free_cpus: free_cpumask_var(cs->cpus_allowed); free_cs: kfree(cs); return ERR_PTR(-ENOMEM); }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan7041.67%216.67%
Paul Jackson4124.40%325.00%
Paul Menage3923.21%18.33%
Tejun Heo105.95%433.33%
Mike Travis52.98%18.33%
Hidetoshi Seto31.79%18.33%
Total168100.00%12100.00%


static int cpuset_css_online(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); struct cpuset *parent = parent_cs(cs); struct cpuset *tmp_cs; struct cgroup_subsys_state *pos_css; if (!parent) return 0; mutex_lock(&cpuset_mutex); set_bit(CS_ONLINE, &cs->flags); if (is_spread_page(parent)) set_bit(CS_SPREAD_PAGE, &cs->flags); if (is_spread_slab(parent)) set_bit(CS_SPREAD_SLAB, &cs->flags); cpuset_inc(); spin_lock_irq(&callback_lock); if (is_in_v2_mode()) { cpumask_copy(cs->effective_cpus, parent->effective_cpus); cs->effective_mems = parent->effective_mems; } spin_unlock_irq(&callback_lock); if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; /* * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is * set. This flag handling is implemented in cgroup core for * histrical reasons - the flag may be specified during mount. * * Currently, if any sibling cpusets have exclusive cpus or mem, we * refuse to clone the configuration - thereby refusing the task to * be entered, and as a result refusing the sys_unshare() or * clone() which initiated it. If this becomes a problem for some * users who wish to allow that scenario, then this could be * changed to grant parent->cpus_allowed-sibling_cpus_exclusive * (and likewise for mems) to the new cgroup. */ rcu_read_lock(); cpuset_for_each_child(tmp_cs, pos_css, parent) { if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) { rcu_read_unlock(); goto out_unlock; } } rcu_read_unlock(); spin_lock_irq(&callback_lock); cs->mems_allowed = parent->mems_allowed; cs->effective_mems = parent->mems_allowed; cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); cpumask_copy(cs->effective_cpus, parent->cpus_allowed); spin_unlock_irq(&callback_lock); out_unlock: mutex_unlock(&cpuset_mutex); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo17467.97%847.06%
Li Zefan5119.92%211.76%
Paul Menage155.86%15.88%
Vladimir Davydov72.73%15.88%
Paul Jackson41.56%211.76%
Mel Gorman20.78%15.88%
Waiman Long20.78%15.88%
Dan Carpenter10.39%15.88%
Total256100.00%17100.00%

/* * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which * will call rebuild_sched_domains_locked(). */
static void cpuset_css_offline(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); mutex_lock(&cpuset_mutex); if (is_sched_load_balance(cs)) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags); mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo2845.16%440.00%
Paul Jackson1829.03%330.00%
Paul Menage1422.58%220.00%
Mel Gorman23.23%110.00%
Total62100.00%10100.00%


static void cpuset_css_free(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); free_cpumask_var(cs->effective_cpus); free_cpumask_var(cs->cpus_allowed); kfree(cs); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo2050.00%228.57%
Li Zefan1435.00%228.57%
Paul Jackson410.00%228.57%
Paul Menage25.00%114.29%
Total40100.00%7100.00%


static void cpuset_bind(struct cgroup_subsys_state *root_css) { mutex_lock(&cpuset_mutex); spin_lock_irq(&callback_lock); if (is_in_v2_mode()) { cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); top_cpuset.mems_allowed = node_possible_map; } else { cpumask_copy(top_cpuset.cpus_allowed, top_cpuset.effective_cpus); top_cpuset.mems_allowed = top_cpuset.effective_mems; } spin_unlock_irq(&callback_lock); mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan7392.41%133.33%
Vladimir Davydov45.06%133.33%
Waiman Long22.53%133.33%
Total79100.00%3100.00%

/* * Make sure the new task conform to the current state of its parent, * which could have been changed by cpuset just after it inherits the * state from the parent and before it sits on the cgroup's task list. */
static void cpuset_fork(struct task_struct *task) { if (task_css_is_root(task, cpuset_cgrp_id)) return; set_cpus_allowed_ptr(task, &current->cpus_allowed); task->mems_allowed = current->mems_allowed; }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan3897.44%150.00%
Wei Yongjun12.56%150.00%
Total39100.00%2100.00%

struct cgroup_subsys cpuset_cgrp_subsys = { .css_alloc = cpuset_css_alloc, .css_online = cpuset_css_online, .css_offline = cpuset_css_offline, .css_free = cpuset_css_free, .can_attach = cpuset_can_attach, .cancel_attach = cpuset_cancel_attach, .attach = cpuset_attach, .post_attach = cpuset_post_attach, .bind = cpuset_bind, .fork = cpuset_fork, .legacy_cftypes = files, .early_init = true, }; /** * cpuset_init - initialize cpusets at system boot * * Description: Initialize top_cpuset and the cpuset internal file system, **/
int __init cpuset_init(void) { int err = 0; BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)); cpumask_setall(top_cpuset.cpus_allowed); nodes_setall(top_cpuset.mems_allowed); cpumask_setall(top_cpuset.effective_cpus); nodes_setall(top_cpuset.effective_mems); fmeter_init(&top_cpuset.fmeter); set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); top_cpuset.relax_domain_level = -1; err = register_filesystem(&cpuset_fs_type); if (err < 0) return err; BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan4233.33%321.43%
Paul Jackson3628.57%428.57%
Paul Menage1511.90%17.14%
Nicholas Mc Guire97.14%17.14%
Miao Xie86.35%17.14%
Hidetoshi Seto75.56%17.14%
Mike Travis53.97%17.14%
Dave Hansen21.59%17.14%
Yinghai Lu21.59%17.14%
Total126100.00%14100.00%

/* * If CPU and/or memory hotplug handlers, below, unplug any CPUs * or memory nodes, we need to walk over the cpuset hierarchy, * removing that CPU or node from all cpusets. If this removes the * last CPU or node from a cpuset, then move the tasks in the empty * cpuset to its next-highest non-empty parent. */
static void remove_tasks_in_empty_cpuset(struct cpuset *cs) { struct cpuset *parent; /* * Find its next-highest non-empty parent, (top cpuset * has online cpus, so can't be empty). */ parent = parent_cs(cs); while (cpumask_empty(parent->cpus_allowed) || nodes_empty(parent->mems_allowed)) parent = parent_cs(parent); if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) { pr_err("cpuset: failed to transfer tasks out of empty cpuset "); pr_cont_cgroup_name(cs->css.cgroup); pr_cont("\n"); } }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo4047.06%337.50%
Cliff Wickman3440.00%112.50%
Paul Jackson78.24%112.50%
Paul Menage22.35%112.50%
Li Zefan11.18%112.50%
Fabian Frederick11.18%112.50%
Total85100.00%8100.00%


static void hotplug_update_tasks_legacy(struct cpuset *cs, struct cpumask *new_cpus, nodemask_t *new_mems, bool cpus_updated, bool mems_updated) { bool is_empty; spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, new_cpus); cpumask_copy(cs->effective_cpus, new_cpus); cs->mems_allowed = *new_mems; cs->effective_mems = *new_mems; spin_unlock_irq(&callback_lock); /* * Don't call update_tasks_cpumask() if the cpuset becomes empty, * as the tasks will be migratecd to an ancestor. */ if (cpus_updated && !cpumask_empty(cs->cpus_allowed)) update_tasks_cpumask(cs); if (mems_updated && !nodes_empty(cs->mems_allowed)) update_tasks_nodemask(cs); is_empty = cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed); mutex_unlock(&cpuset_mutex); /* * Move tasks to the nearest ancestor with execution resources, * This is full cgroup operation which will also call back into * cpuset. Should be done outside any lock. */ if (is_empty) remove_tasks_in_empty_cpuset(cs); mutex_lock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan8658.90%850.00%
Srivatsa S. Bhat3121.23%212.50%
Tejun Heo1510.27%212.50%
Cliff Wickman53.42%16.25%
Paul Jackson42.74%16.25%
Vladimir Davydov42.74%16.25%
Miao Xie10.68%16.25%
Total146100.00%16100.00%


static void hotplug_update_tasks(struct cpuset *cs, struct cpumask *new_cpus, nodemask_t *new_mems, bool cpus_updated, bool mems_updated) { if (cpumask_empty(new_cpus)) cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus); if (nodes_empty(*new_mems)) *new_mems = parent_cs(cs)->effective_mems; spin_lock_irq(&callback_lock); cpumask_copy(cs->effective_cpus, new_cpus); cs->effective_mems = *new_mems; spin_unlock_irq(&callback_lock); if (cpus_updated) update_tasks_cpumask(cs); if (mems_updated) update_tasks_nodemask(cs); }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan8174.31%545.45%
Cliff Wickman98.26%19.09%
Paul Jackson76.42%218.18%
Tejun Heo65.50%19.09%
Vladimir Davydov43.67%19.09%
Miao Xie21.83%19.09%
Total109100.00%11100.00%

/** * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug * @cs: cpuset in interest * * Compare @cs's cpu and mem masks against top_cpuset and if some have gone * offline, update @cs accordingly. If @cs ends up with no CPU or memory, * all its tasks are moved to the nearest ancestor with both resources. */
static void cpuset_hotplug_update_tasks(struct cpuset *cs) { static cpumask_t new_cpus; static nodemask_t new_mems; bool cpus_updated; bool mems_updated; retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); mutex_lock(&cpuset_mutex); /* * We have raced with task attaching. We wait until attaching * is finished, so we won't attach a task to an empty cpuset. */ if (cs->attach_in_progress) { mutex_unlock(&cpuset_mutex); goto retry; } cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus); nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems); cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); mems_updated = !nodes_equal(new_mems, cs->effective_mems); if (is_in_v2_mode()) hotplug_update_tasks(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); else hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); mutex_unlock(&cpuset_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan12778.40%330.00%
Tejun Heo2012.35%220.00%
Paul Jackson84.94%110.00%
Paul Menage31.85%110.00%
Waiman Long21.23%110.00%
Cliff Wickman10.62%110.00%
Miao Xie10.62%110.00%
Total162100.00%10100.00%

static bool force_rebuild;
void cpuset_force_rebuild(void) { force_rebuild = true; }

Contributors

PersonTokensPropCommitsCommitProp
Peter Zijlstra11100.00%1100.00%
Total11100.00%1100.00%

/** * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset * * This function is called after either CPU or memory configuration has * changed and updates cpuset accordingly. The top_cpuset is always * synchronized to cpu_active_mask and N_MEMORY, which is necessary in * order to make cpusets transparent (of no affect) on systems that are * actively using CPU hotplug but making no active use of cpusets. * * Non-root cpusets are only affected by offlining. If any CPUs or memory * nodes have been taken down, cpuset_hotplug_update_tasks() is invoked on * all descendants. * * Note that CPU offlining during suspend is ignored. We don't modify * cpusets across suspend/resume cycles at all. */
static void cpuset_hotplug_workfn(struct work_struct *work) { static cpumask_t new_cpus; static nodemask_t new_mems; bool cpus_updated, mems_updated; bool on_dfl = is_in_v2_mode(); mutex_lock(&cpuset_mutex); /* fetch the available cpus/mems and find out which changed how */ cpumask_copy(&new_cpus, cpu_active_mask); new_mems = node_states[N_MEMORY]; cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus); mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { spin_lock_irq(&callback_lock); if (!on_dfl) cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); cpumask_copy(top_cpuset.effective_cpus, &new_cpus); spin_unlock_irq(&callback_lock); /* we don't mess with cpumasks of tasks in top_cpuset */ } /* synchronize mems_allowed to N_MEMORY */ if (mems_updated) { spin_lock_irq(&callback_lock); if (!on_dfl) top_cpuset.mems_allowed = new_mems; top_cpuset.effective_mems = new_mems; spin_unlock_irq(&callback_lock); update_tasks_nodemask(&top_cpuset); } mutex_unlock(&cpuset_mutex); /* if cpus or mems changed, we need to propagate to descendants */ if (cpus_updated || mems_updated) { struct cpuset *cs; struct cgroup_subsys_state *pos_css; rcu_read_lock(); cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { if (cs == &top_cpuset || !css_tryget_online(&cs->css)) continue; rcu_read_unlock(); cpuset_hotplug_update_tasks(cs); rcu_read_lock(); css_put(&cs->css); } rcu_read_unlock(); } /* rebuild sched domains if cpus_allowed has changed */ if (cpus_updated || force_rebuild) { force_rebuild = false; rebuild_sched_domains(); } }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo9235.80%936.00%
Li Zefan8131.52%624.00%
Srivatsa S. Bhat2610.12%14.00%
Paul Jackson187.00%28.00%
Maksim Krasnyanskiy145.45%14.00%
Vladimir Davydov83.11%14.00%
Peter Zijlstra83.11%14.00%
Miao Xie51.95%14.00%
Li Zhong20.78%14.00%
Waiman Long20.78%14.00%
Cliff Wickman10.39%14.00%
Total257100.00%25100.00%


void cpuset_update_active_cpus(void) { /* * We're inside cpu hotplug critical region which usually nests * inside cgroup synchronization. Bounce actual hotplug processing * to a work item to avoid reverse locking order. */ schedule_work(&cpuset_hotplug_work); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo1285.71%250.00%
Rakib Mullick17.14%125.00%
Peter Zijlstra17.14%125.00%
Total14100.00%4100.00%


void cpuset_wait_for_hotplug(void) { flush_work(&cpuset_hotplug_work); }

Contributors

PersonTokensPropCommitsCommitProp
Peter Zijlstra13100.00%1100.00%
Total13100.00%1100.00%

/* * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY]. * Call this routine anytime after node_states[N_MEMORY] changes. * See cpuset_update_active_cpus() for CPU hotplug handling. */
static int cpuset_track_online_nodes(struct notifier_block *self, unsigned long action, void *arg) { schedule_work(&cpuset_hotplug_work); return NOTIFY_OK; }

Contributors

PersonTokensPropCommitsCommitProp
Miao Xie1657.14%116.67%
Paul Jackson414.29%116.67%
Al Viro310.71%116.67%
Dmitry Adamushko27.14%116.67%
Tejun Heo27.14%116.67%
Maksim Krasnyanskiy13.57%116.67%
Total28100.00%6100.00%

static struct notifier_block cpuset_track_online_nodes_nb = { .notifier_call = cpuset_track_online_nodes, .priority = 10, /* ??! */ }; /** * cpuset_init_smp - initialize cpus_allowed * * Description: Finish top cpuset after cpu, node maps are initialized */
void __init cpuset_init_smp(void) { cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); top_cpuset.mems_allowed = node_states[N_MEMORY]; top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); top_cpuset.effective_mems = node_states[N_MEMORY]; register_hotmemory_notifier(&cpuset_track_online_nodes_nb); cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0); BUG_ON(!cpuset_migrate_mm_wq); }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan3041.10%330.00%
Paul Jackson1723.29%110.00%
Tejun Heo1520.55%110.00%
Miao Xie34.11%110.00%
Andrew Morton34.11%110.00%
Christoph Lameter34.11%110.00%
Peter Zijlstra11.37%110.00%
Lai Jiangshan11.37%110.00%
Total73100.00%10100.00%

/** * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. * @pmask: pointer to struct cpumask variable to receive cpus_allowed set. * * Description: Returns the cpumask_var_t cpus_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty * subset of cpu_online_mask, even if this means going outside the * tasks cpuset. **/
void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { unsigned long flags; spin_lock_irqsave(&callback_lock, flags); rcu_read_lock(); guarantee_online_cpus(task_cs(tsk), pmask); rcu_read_unlock(); spin_unlock_irqrestore(&callback_lock, flags); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson2039.22%112.50%
Vladimir Davydov1223.53%112.50%
Li Zefan713.73%337.50%
Mike Travis59.80%112.50%
Oleg Nesterov47.84%112.50%
Paul Menage35.88%112.50%
Total51100.00%8100.00%


void cpuset_cpus_allowed_fallback(struct task_struct *tsk) { rcu_read_lock(); do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus); rcu_read_unlock(); /* * We own tsk->cpus_allowed, nobody can change it under us. * * But we used cs && cs->cpus_allowed lockless and thus can * race with cgroup_attach_task() or update_cpumask() and get * the wrong tsk->cpus_allowed. However, both cases imply the * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr() * which takes task_rq_lock(). * * If we are called after it dropped the lock we must see all * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary * set any mask even if it is not right from task_cs() pov, * the pending set_cpus_allowed_ptr() will fix things. * * select_fallback_rq() will fix things ups and set cpu_possible_mask * if required. */ }

Contributors

PersonTokensPropCommitsCommitProp
Oleg Nesterov2275.86%125.00%
Li Zefan517.24%250.00%
Peter Zijlstra26.90%125.00%
Total29100.00%4100.00%


void __init cpuset_init_current_mems_allowed(void) { nodes_setall(current->mems_allowed); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson1173.33%133.33%
Mike Travis320.00%133.33%
Rasmus Villemoes16.67%133.33%
Total15100.00%3100.00%

/** * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset. * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed. * * Description: Returns the nodemask_t mems_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty * subset of node_states[N_MEMORY], even if this means going outside the * tasks cpuset. **/
nodemask_t cpuset_mems_allowed(struct task_struct *tsk) { nodemask_t mask; unsigned long flags; spin_lock_irqsave(&callback_lock, flags); rcu_read_lock(); guarantee_online_mems(task_cs(tsk), &mask); rcu_read_unlock(); spin_unlock_irqrestore(&callback_lock, flags); return mask; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson3362.26%120.00%
Vladimir Davydov1222.64%120.00%
Li Zefan59.43%240.00%
Paul Menage35.66%120.00%
Total53100.00%5100.00%

/** * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed * @nodemask: the nodemask to be checked * * Are any of the nodes in the nodemask allowed in current->mems_allowed? */
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) { return nodes_intersects(*nodemask, current->mems_allowed); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson1365.00%150.00%
Mel Gorman735.00%150.00%
Total20100.00%2100.00%

/* * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or * mem_hardwall ancestor to the specified cpuset. Call holding * callback_lock. If no ancestor is mem_exclusive or mem_hardwall * (an unusual configuration), then returns the root cpuset. */
static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) { while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs)) cs = parent_cs(cs); return cs; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson2967.44%133.33%
Paul Menage818.60%133.33%
Tejun Heo613.95%133.33%
Total43100.00%3100.00%

/** * cpuset_node_allowed - Can we allocate on a memory node? * @node: is this an allowed node? * @gfp_mask: memory allocation flags * * If we're in interrupt, yes, we can always allocate. If @node is set in * current's mems_allowed, yes. If it's not a __GFP_HARDWALL request and this * node is set in the nearest hardwalled cpuset ancestor to current's cpuset, * yes. If current has access to memory reserves as an oom victim, yes. * Otherwise, no. * * GFP_USER allocations are marked with the __GFP_HARDWALL bit, * and do not allow allocations outside the current tasks cpuset * unless the task has been OOM killed. * GFP_KERNEL allocations are not so marked, so can escape to the * nearest enclosing hardwalled ancestor cpuset. * * Scanning up parent cpusets requires callback_lock. The * __alloc_pages() routine only calls here with __GFP_HARDWALL bit * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the * current tasks mems_allowed came up empty on the first pass over * the zonelist. So only GFP_KERNEL allocations, if all nodes in the * cpuset are short of memory, might require taking the callback_lock. * * The first call here from mm/page_alloc:get_page_from_freelist() * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, * so no allocation on a node outside the cpuset is allowed (unless * in interrupt, of course). * * The second pass through get_page_from_freelist() doesn't even call * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set * in alloc_flags. That logic and the checks below have the combined * affect that: * in_interrupt - any node ok (current task context irrelevant) * GFP_ATOMIC - any node ok * tsk_is_oom_victim - any node ok * GFP_KERNEL - any node in enclosing hardwalled cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. */
bool __cpuset_node_allowed(int node, gfp_t gfp_mask) { struct cpuset *cs; /* current cpuset ancestors */ int allowed; /* is allocation in zone z allowed? */ unsigned long flags; if (in_interrupt()) return true; if (node_isset(node, current->mems_allowed)) return true; /* * Allow tasks that have access to memory reserves because they have * been OOM killed to get memory anywhere. */ if (unlikely(tsk_is_oom_victim(current))) return true; if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ return false; if (current->flags & PF_EXITING) /* Let dying task have memory */ return true; /* Not hardwall and node outside mems_allowed: scan up cpusets */ spin_lock_irqsave(&callback_lock, flags); rcu_read_lock(); cs = nearest_hardwall_ancestor(task_cs(current)); allowed = node_isset(node, cs->mems_allowed); rcu_read_unlock(); spin_unlock_irqrestore(&callback_lock, flags); return allowed; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson8162.31%318.75%
Vladimir Davydov1310.00%212.50%
David Rientjes1310.00%212.50%
Vlastimil Babka64.62%16.25%
Bob Picco53.85%16.25%
Paul Menage43.08%212.50%
Li Zefan32.31%212.50%
Michal Hocko21.54%16.25%
Linus Torvalds21.54%16.25%
Al Viro10.77%16.25%
Total130100.00%16100.00%

/** * cpuset_mem_spread_node() - On which node to begin search for a file page * cpuset_slab_spread_node() - On which node to begin search for a slab page * * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for * tasks in a cpuset with is_spread_page or is_spread_slab set), * and if the memory allocation used cpuset_mem_spread_node() * to determine on which node to start looking, as it will for * certain page cache or slab cache pages such as used for file * system buffers and inode caches, then instead of starting on the * local node to look for a free page, rather spread the starting * node around the tasks mems_allowed nodes. * * We don't have to worry about the returned node being offline * because "it can't happen", and even if it did, it would be ok. * * The routines calling guarantee_online_mems() are careful to * only set nodes in task->mems_allowed that are online. So it * should not be possible for the following code to return an * offline node. But if it did, that would be ok, as this routine * is not returning the node where the allocation must be, only * the node where the search should start. The zonelist passed to * __alloc_pages() will include all nodes. If the slab allocator * is passed an offline node, it will fall back to the local node. * See kmem_cache_alloc_node(). */
static int cpuset_spread_node(int *rotor) { return *rotor = next_node_in(*rotor, current->mems_allowed); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson1354.17%133.33%
Jack Steiner729.17%133.33%
Andrew Morton416.67%133.33%
Total24100.00%3100.00%


int cpuset_mem_spread_node(void) { if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE) current->cpuset_mem_spread_rotor = node_random(&current->mems_allowed); return cpuset_spread_node(&current->cpuset_mem_spread_rotor); }

Contributors

PersonTokensPropCommitsCommitProp
Michal Hocko2055.56%150.00%
Jack Steiner1644.44%150.00%
Total36100.00%2100.00%


int cpuset_slab_spread_node(void) { if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE) current->cpuset_slab_spread_rotor = node_random(&current->mems_allowed); return cpuset_spread_node(&current->cpuset_slab_spread_rotor); }

Contributors

PersonTokensPropCommitsCommitProp
Michal Hocko2055.56%150.00%
Jack Steiner1644.44%150.00%
Total36100.00%2100.00%

EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); /** * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's? * @tsk1: pointer to task_struct of some task. * @tsk2: pointer to task_struct of some other task. * * Description: Return true if @tsk1's mems_allowed intersects the * mems_allowed of @tsk2. Used by the OOM killer to determine if * one of the task's memory usage might impact the memory available * to the other. **/
int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, const struct task_struct *tsk2) { return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson2068.97%266.67%
David Rientjes931.03%133.33%
Total29100.00%3100.00%

/** * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed * * Description: Prints current's name, cpuset name, and cached copy of its * mems_allowed to the kernel log. */
void cpuset_print_current_mems_allowed(void) { struct cgroup *cgrp; rcu_read_lock(); cgrp = task_cs(current)->css.cgroup; pr_info("%s cpuset=", current->comm); pr_cont_cgroup_name(cgrp); pr_cont(" mems_allowed=%*pbl\n", nodemask_pr_args(&current->mems_allowed)); rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
David Rientjes2748.21%228.57%
Li Zefan1425.00%228.57%
Tejun Heo1425.00%228.57%
Fabian Frederick11.79%114.29%
Total56100.00%7100.00%

/* * Collection of memory_pressure is suppressed unless * this flag is enabled by writing "1" to the special * cpuset file 'memory_pressure_enabled' in the root cpuset. */ int cpuset_memory_pressure_enabled __read_mostly; /** * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. * * Keep a running average of the rate of synchronous (direct) * page reclaim efforts initiated by tasks in each cpuset. * * This represents the rate at which some task in the cpuset * ran low on memory on all nodes it was allowed to use, and * had to enter the kernels page reclaim code in an effort to * create more free memory by tossing clean pages or swapping * or writing dirty pages. * * Display to user space in the per-cpuset read-only file * "memory_pressure". Value displayed is an integer * representing the recent rate of entry into the synchronous * (direct) page reclaim by any task attached to the cpuset. **/
void __cpuset_memory_pressure_bump(void) { rcu_read_lock(); fmeter_markevent(&task_cs(current)->fmeter); rcu_read_unlock(); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson1666.67%133.33%
Li Zefan416.67%133.33%
Paul Menage416.67%133.33%
Total24100.00%3100.00%

#ifdef CONFIG_PROC_PID_CPUSET /* * proc_cpuset_show() * - Print tasks cpuset path into seq_file. * - Used for /proc/<pid>/cpuset. * - No need to task_lock(tsk) on this tsk->cpuset reference, as it * doesn't really matter if tsk->cpuset changes after we read it, * and we take cpuset_mutex, keeping cpuset_attach() from changing it * anyway. */
int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk) { char *buf; struct cgroup_subsys_state *css; int retval; retval = -ENOMEM; buf = kmalloc(PATH_MAX, GFP_KERNEL); if (!buf) goto out; css = task_get_css(tsk, cpuset_cgrp_id); retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX, current->nsproxy->cgroup_ns); css_put(css); if (retval >= PATH_MAX) retval = -ENAMETOOLONG; if (retval < 0) goto out_free; seq_puts(m, buf); seq_putc(m, '\n'); retval = 0; out_free: kfree(buf); out: return retval; }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson7250.70%216.67%
Tejun Heo2215.49%433.33%
Eric W. Biedermann1611.27%216.67%
Aditya Kali128.45%18.33%
Paul Menage117.75%18.33%
Li Zefan96.34%216.67%
Total142100.00%12100.00%

#endif /* CONFIG_PROC_PID_CPUSET */ /* Display task mems_allowed in /proc/<pid>/status file. */
void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) { seq_printf(m, "Mems_allowed:\t%*pb\n", nodemask_pr_args(&task->mems_allowed)); seq_printf(m, "Mems_allowed_list:\t%*pbl\n", nodemask_pr_args(&task->mems_allowed)); }

Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson1840.00%120.00%
Mike Travis1022.22%120.00%
Tejun Heo817.78%120.00%
Eric W. Biedermann715.56%120.00%
Lai Jiangshan24.44%120.00%
Total45100.00%5100.00%


Overall Contributors

PersonTokensPropCommitsCommitProp
Paul Jackson188722.48%229.24%
Tejun Heo165419.70%5422.69%
Li Zefan165219.68%5121.43%
Paul Menage148517.69%93.78%
Miao Xie3644.34%93.78%
Hidetoshi Seto1922.29%10.42%
Maksim Krasnyanskiy1251.49%10.42%
Vladimir Davydov951.13%20.84%
David Rientjes931.11%72.94%
Lai Jiangshan780.93%52.10%
Cliff Wickman580.69%20.84%
Srivatsa S. Bhat580.69%20.84%
Ben Blum520.62%20.84%
Waiman Long510.61%20.84%
Peter Zijlstra470.56%62.52%
Michal Hocko460.55%20.84%
Jack Steiner400.48%10.42%
Juri Lelli370.44%20.84%
Mel Gorman310.37%31.26%
Mike Travis280.33%20.84%
Andrew Morton270.32%20.84%
Oleg Nesterov260.31%20.84%
Eric W. Biedermann230.27%31.26%
Joonwoo Park220.26%10.42%
Paolo Bonzini220.26%10.42%
Rik Van Riel200.24%10.42%
Rusty Russell170.20%20.84%
Al Viro170.20%31.26%
Arnd Bergmann160.19%10.42%
Frédéric Weisbecker150.18%10.42%
Gu Zheng140.17%10.42%
Aditya Kali120.14%10.42%
Vlastimil Babka110.13%20.84%
Ingo Molnar100.12%41.68%
David P. Quigley90.11%10.42%
Nicholas Mc Guire90.11%10.42%
Rakib Mullick60.07%20.84%
Bob Picco50.06%10.42%
Dima Zavin50.06%10.42%
Fabian Frederick50.06%20.84%
Dave Hansen40.05%10.42%
Linus Torvalds30.04%20.84%
Christoph Lameter30.04%10.42%
Gautham R. Shenoy20.02%10.42%
Dmitry Adamushko20.02%10.42%
Yinghai Lu20.02%10.42%
David Howells20.02%20.84%
Li Zhong20.02%10.42%
Arun Sharma10.01%10.42%
Alban Crequy10.01%10.42%
Rasmus Villemoes10.01%10.42%
Viresh Kumar10.01%10.42%
Wei Yongjun10.01%10.42%
Adrian Bunk10.01%10.42%
Heiko Carstens10.01%10.42%
Zhao Hongjiang10.01%10.42%
Paul Gortmaker10.01%10.42%
Dan Carpenter10.01%10.42%
Total8394100.00%238100.00%
Directory: kernel/cgroup
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with cregit.