Release 4.15 kernel/workqueue.c
/*
* kernel/workqueue.c - generic async execution with shared worker pool
*
* Copyright (C) 2002 Ingo Molnar
*
* Derived from the taskqueue/keventd code by:
* David Woodhouse <dwmw2@infradead.org>
* Andrew Morton
* Kai Petzke <wpp@marie.physik.tu-berlin.de>
* Theodore Ts'o <tytso@mit.edu>
*
* Made to use alloc_percpu by Christoph Lameter.
*
* Copyright (C) 2010 SUSE Linux Products GmbH
* Copyright (C) 2010 Tejun Heo <tj@kernel.org>
*
* This is the generic async execution mechanism. Work items as are
* executed in process context. The worker pool is shared and
* automatically managed. There are two worker pools for each CPU (one for
* normal work items and the other for high priority ones) and some extra
* pools for workqueues which are not bound to any specific CPU - the
* number of these backing pools is dynamic.
*
* Please read Documentation/core-api/workqueue.rst for details.
*/
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/signal.h>
#include <linux/completion.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
#include <linux/hardirq.h>
#include <linux/mempolicy.h>
#include <linux/freezer.h>
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
#include <linux/idr.h>
#include <linux/jhash.h>
#include <linux/hashtable.h>
#include <linux/rculist.h>
#include <linux/nodemask.h>
#include <linux/moduleparam.h>
#include <linux/uaccess.h>
#include <linux/sched/isolation.h>
#include <linux/nmi.h>
#include "workqueue_internal.h"
enum {
/*
* worker_pool flags
*
* A bound pool is either associated or disassociated with its CPU.
* While associated (!DISASSOCIATED), all workers are bound to the
* CPU and none has %WORKER_UNBOUND set and concurrency management
* is in effect.
*
* While DISASSOCIATED, the cpu may be offline and all workers have
* %WORKER_UNBOUND set and concurrency management disabled, and may
* be executing on any CPU. The pool behaves as an unbound one.
*
* Note that DISASSOCIATED should be flipped only while holding
* attach_mutex to avoid changing binding state while
* worker_attach_to_pool() is in progress.
*/
POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
/* worker flags */
WORKER_DIE = 1 << 1, /* die die die */
WORKER_IDLE = 1 << 2, /* is idle */
WORKER_PREP = 1 << 3, /* preparing to run works */
WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */
WORKER_UNBOUND = 1 << 7, /* worker is unbound */
WORKER_REBOUND = 1 << 8, /* worker was rebound */
WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
WORKER_UNBOUND | WORKER_REBOUND,
NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */
UNBOUND_POOL_HASH_ORDER = 6, /* hashed by pool->attrs */
BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
/* call for help after 10ms
(min two ticks) */
MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */
CREATE_COOLDOWN = HZ, /* time to breath after fail */
/*
* Rescue workers are used only on emergencies and shared by
* all cpus. Give MIN_NICE.
*/
RESCUER_NICE_LEVEL = MIN_NICE,
HIGHPRI_NICE_LEVEL = MIN_NICE,
WQ_NAME_LEN = 24,
};
/*
* Structure fields follow one of the following exclusion rules.
*
* I: Modifiable by initialization/destruction paths and read-only for
* everyone else.
*
* P: Preemption protected. Disabling preemption is enough and should
* only be modified and accessed from the local cpu.
*
* L: pool->lock protected. Access with pool->lock held.
*
* X: During normal operation, modification requires pool->lock and should
* be done only from local cpu. Either disabling preemption on local
* cpu or grabbing pool->lock is enough for read access. If
* POOL_DISASSOCIATED is set, it's identical to L.
*
* A: pool->attach_mutex protected.
*
* PL: wq_pool_mutex protected.
*
* PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
*
* PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
*
* PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
* sched-RCU for reads.
*
* WQ: wq->mutex protected.
*
* WR: wq->mutex protected for writes. Sched-RCU protected for reads.
*
* MD: wq_mayday_lock protected.
*/
/* struct worker is defined in workqueue_internal.h */
struct worker_pool {
spinlock_t lock; /* the pool lock */
int cpu; /* I: the associated cpu */
int node; /* I: the associated node ID */
int id; /* I: pool ID */
unsigned int flags; /* X: flags */
unsigned long watchdog_ts; /* L: watchdog timestamp */
struct list_head worklist; /* L: list of pending works */
int nr_workers; /* L: total number of workers */
/* nr_idle includes the ones off idle_list for rebinding */
int nr_idle; /* L: currently idle ones */
struct list_head idle_list; /* X: list of idle workers */
struct timer_list idle_timer; /* L: worker idle timeout */
struct timer_list mayday_timer; /* L: SOS timer for workers */
/* a workers is either on busy_hash or idle_list, or the manager */
DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
/* L: hash of busy workers */
/* see manage_workers() for details on the two manager mutexes */
struct worker *manager; /* L: purely informational */
struct mutex attach_mutex; /* attach/detach exclusion */
struct list_head workers; /* A: attached workers */
struct completion *detach_completion; /* all workers detached */
struct ida worker_ida; /* worker IDs for task name */
struct workqueue_attrs *attrs; /* I: worker attributes */
struct hlist_node hash_node; /* PL: unbound_pool_hash node */
int refcnt; /* PL: refcnt for unbound pools */
/*
* The current concurrency level. As it's likely to be accessed
* from other CPUs during try_to_wake_up(), put it in a separate
* cacheline.
*/
atomic_t nr_running ____cacheline_aligned_in_smp;
/*
* Destruction of pool is sched-RCU protected to allow dereferences
* from get_work_pool().
*/
struct rcu_head rcu;
} ____cacheline_aligned_in_smp;
/*
* The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
* of work_struct->data are used for flags and the remaining high bits
* point to the pwq; thus, pwqs need to be aligned at two's power of the
* number of flag bits.
*/
struct pool_workqueue {
struct worker_pool *pool; /* I: the associated pool */
struct workqueue_struct *wq; /* I: the owning workqueue */
int work_color; /* L: current color */
int flush_color; /* L: flushing color */
int refcnt; /* L: reference count */
int nr_in_flight[WORK_NR_COLORS];
/* L: nr of in_flight works */
int nr_active; /* L: nr of active works */
int max_active; /* L: max active works */
struct list_head delayed_works; /* L: delayed works */
struct list_head pwqs_node; /* WR: node on wq->pwqs */
struct list_head mayday_node; /* MD: node on wq->maydays */
/*
* Release of unbound pwq is punted to system_wq. See put_pwq()
* and pwq_unbound_release_workfn() for details. pool_workqueue
* itself is also sched-RCU protected so that the first pwq can be
* determined without grabbing wq->mutex.
*/
struct work_struct unbound_release_work;
struct rcu_head rcu;
} __aligned(1 << WORK_STRUCT_FLAG_BITS);
/*
* Structure used to wait for workqueue flush.
*/
struct wq_flusher {
struct list_head list; /* WQ: list of flushers */
int flush_color; /* WQ: flush color waiting for */
struct completion done; /* flush completion */
};
struct wq_device;
/*
* The externally visible workqueue. It relays the issued work items to
* the appropriate worker_pool through its pool_workqueues.
*/
struct workqueue_struct {
struct list_head pwqs; /* WR: all pwqs of this wq */
struct list_head list; /* PR: list of all workqueues */
struct mutex mutex; /* protects this wq */
int work_color; /* WQ: current work color */
int flush_color; /* WQ: current flush color */
atomic_t nr_pwqs_to_flush; /* flush in progress */
struct wq_flusher *first_flusher; /* WQ: first flusher */
struct list_head flusher_queue; /* WQ: flush waiters */
struct list_head flusher_overflow; /* WQ: flush overflow list */
struct list_head maydays; /* MD: pwqs requesting rescue */
struct worker *rescuer; /* I: rescue worker */
int nr_drainers; /* WQ: drain in progress */
int saved_max_active; /* WQ: saved pwq max_active */
struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */
struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */
#ifdef CONFIG_SYSFS
struct wq_device *wq_dev; /* I: for sysfs interface */
#endif
#ifdef CONFIG_LOCKDEP
struct lockdep_map lockdep_map;
#endif
char name[WQ_NAME_LEN]; /* I: workqueue name */
/*
* Destruction of workqueue_struct is sched-RCU protected to allow
* walking the workqueues list without grabbing wq_pool_mutex.
* This is used to dump all workqueues from sysrq.
*/
struct rcu_head rcu;
/* hot fields used during command issue, aligned to cacheline */
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
};
static struct kmem_cache *pwq_cache;
static cpumask_var_t *wq_numa_possible_cpumask;
/* possible CPUs of each node */
static bool wq_disable_numa;
module_param_named(disable_numa, wq_disable_numa, bool, 0444);
/* see the comment above the definition of WQ_POWER_EFFICIENT */
static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
module_param_named(power_efficient, wq_power_efficient, bool, 0444);
static bool wq_online;
/* can kworkers be created yet? */
static bool wq_numa_enabled;
/* unbound NUMA affinity enabled */
/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
static bool workqueue_freezing;
/* PL: have wqs started freezing? */
/* PL: allowable cpus for unbound wqs and work items */
static cpumask_var_t wq_unbound_cpumask;
/* CPU where unbound work was last round robin scheduled from this CPU */
static DEFINE_PER_CPU(int, wq_rr_cpu_last);
/*
* Local execution of unbound work items is no longer guaranteed. The
* following always forces round-robin CPU selection on unbound work items
* to uncover usages which depend on it.
*/
#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
static bool wq_debug_force_rr_cpu = true;
#else
static bool wq_debug_force_rr_cpu = false;
#endif
module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
/* the per-cpu worker pools */
static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */
/* PL: hash of all unbound pools keyed by pool->attrs */
static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
/* I: attributes used when instantiating standard unbound pools on demand */
static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
/* I: attributes used when instantiating ordered pools on demand */
static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
struct workqueue_struct *system_wq __read_mostly;
EXPORT_SYMBOL(system_wq);
struct workqueue_struct *system_highpri_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_highpri_wq);
struct workqueue_struct *system_long_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_long_wq);
struct workqueue_struct *system_unbound_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_unbound_wq);
struct workqueue_struct *system_freezable_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_freezable_wq);
struct workqueue_struct *system_power_efficient_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_power_efficient_wq);
struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
static int worker_thread(void *__worker);
static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
#define assert_rcu_or_pool_mutex() \
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
!lockdep_is_held(&wq_pool_mutex), \
"sched RCU or wq_pool_mutex should be held")
#define assert_rcu_or_wq_mutex(wq) \
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
!lockdep_is_held(&wq->mutex), \
"sched RCU or wq->mutex should be held")
#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
!lockdep_is_held(&wq->mutex) && \
!lockdep_is_held(&wq_pool_mutex), \
"sched RCU, wq->mutex or wq_pool_mutex should be held")
#define for_each_cpu_worker_pool(pool, cpu) \
for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
(pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
(pool)++)
/**
* for_each_pool - iterate through all worker_pools in the system
* @pool: iteration cursor
* @pi: integer used for iteration
*
* This must be called either with wq_pool_mutex held or sched RCU read
* locked. If the pool needs to be used beyond the locking in effect, the
* caller is responsible for guaranteeing that the pool stays online.
*
* The if/else clause exists only for the lockdep assertion and can be
* ignored.
*/
#define for_each_pool(pool, pi) \
idr_for_each_entry(&worker_pool_idr, pool, pi) \
if (({ assert_rcu_or_pool_mutex(); false; })) { } \
else
/**
* for_each_pool_worker - iterate through all workers of a worker_pool
* @worker: iteration cursor
* @pool: worker_pool to iterate workers of
*
* This must be called with @pool->attach_mutex.
*
* The if/else clause exists only for the lockdep assertion and can be
* ignored.
*/
#define for_each_pool_worker(worker, pool) \
list_for_each_entry((worker), &(pool)->workers, node) \
if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
else
/**
* for_each_pwq - iterate through all pool_workqueues of the specified workqueue
* @pwq: iteration cursor
* @wq: the target workqueue
*
* This must be called either with wq->mutex held or sched RCU read locked.
* If the pwq needs to be used beyond the locking in effect, the caller is
* responsible for guaranteeing that the pwq stays online.
*
* The if/else clause exists only for the lockdep assertion and can be
* ignored.
*/
#define for_each_pwq(pwq, wq) \
list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
else
#ifdef CONFIG_DEBUG_OBJECTS_WORK
static struct debug_obj_descr work_debug_descr;
static void *work_debug_hint(void *addr)
{
return ((struct work_struct *) addr)->func;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stanislaw Gruszka | 23 | 100.00% | 1 | 100.00% |
Total | 23 | 100.00% | 1 | 100.00% |
static bool work_is_static_object(void *addr)
{
struct work_struct *work = addr;
return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Thomas Gleixner | 20 | 71.43% | 1 | 33.33% |
Changbin Du | 8 | 28.57% | 2 | 66.67% |
Total | 28 | 100.00% | 3 | 100.00% |
/*
* fixup_init is called when:
* - an active object is initialized
*/
static bool work_fixup_init(void *addr, enum debug_obj_state state)
{
struct work_struct *work = addr;
switch (state) {
case ODEBUG_STATE_ACTIVE:
cancel_work_sync(work);
debug_object_init(work, &work_debug_descr);
return true;
default:
return false;
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Thomas Gleixner | 43 | 86.00% | 1 | 33.33% |
Changbin Du | 7 | 14.00% | 2 | 66.67% |
Total | 50 | 100.00% | 3 | 100.00% |
/*
* fixup_free is called when:
* - an active object is freed
*/
static bool work_fixup_free(void *addr, enum debug_obj_state state)
{
struct work_struct *work = addr;
switch (state) {
case ODEBUG_STATE_ACTIVE:
cancel_work_sync(work);
debug_object_free(work, &work_debug_descr);
return true;
default:
return false;
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Thomas Gleixner | 47 | 94.00% | 1 | 50.00% |
Changbin Du | 3 | 6.00% | 1 | 50.00% |
Total | 50 | 100.00% | 2 | 100.00% |
static struct debug_obj_descr work_debug_descr = {
.name = "work_struct",
.debug_hint = work_debug_hint,
.is_static_object = work_is_static_object,
.fixup_init = work_fixup_init,
.fixup_free = work_fixup_free,
};
static inline void debug_work_activate(struct work_struct *work)
{
debug_object_activate(work, &work_debug_descr);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Thomas Gleixner | 20 | 100.00% | 1 | 100.00% |
Total | 20 | 100.00% | 1 | 100.00% |
static inline void debug_work_deactivate(struct work_struct *work)
{
debug_object_deactivate(work, &work_debug_descr);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Thomas Gleixner | 20 | 100.00% | 1 | 100.00% |
Total | 20 | 100.00% | 1 | 100.00% |
void __init_work(struct work_struct *work, int onstack)
{
if (onstack)
debug_object_init_on_stack(work, &work_debug_descr);
else
debug_object_init(work, &work_debug_descr);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Thomas Gleixner | 34 | 100.00% | 1 | 100.00% |
Total | 34 | 100.00% | 1 | 100.00% |
EXPORT_SYMBOL_GPL(__init_work);
void destroy_work_on_stack(struct work_struct *work)
{
debug_object_free(work, &work_debug_descr);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Thomas Gleixner | 18 | 100.00% | 1 | 100.00% |
Total | 18 | 100.00% | 1 | 100.00% |
EXPORT_SYMBOL_GPL(destroy_work_on_stack);
void destroy_delayed_work_on_stack(struct delayed_work *work)
{
destroy_timer_on_stack(&work->timer);
debug_object_free(&work->work, &work_debug_descr);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Thomas Gleixner | 29 | 100.00% | 1 | 100.00% |
Total | 29 | 100.00% | 1 | 100.00% |
EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
#else
static inline void debug_work_activate(struct work_struct *work) { }
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Thomas Gleixner | 11 | 100.00% | 1 | 100.00% |
Total | 11 | 100.00% | 1 | 100.00% |
static inline void debug_work_deactivate(struct work_struct *work) { }
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Thomas Gleixner | 11 | 100.00% | 1 | 100.00% |
Total | 11 | 100.00% | 1 | 100.00% |
#endif
/**
* worker_pool_assign_id - allocate ID and assing it to @pool
* @pool: the pool pointer of interest
*
* Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
* successfully, -errno on failure.
*/
static int worker_pool_assign_id(struct worker_pool *pool)
{
int ret;
lockdep_assert_held(&wq_pool_mutex);
ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
GFP_KERNEL);
if (ret >= 0) {
pool->id = ret;
return 0;
}
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 54 | 96.43% | 6 | 75.00% |
Li Bin | 1 | 1.79% | 1 | 12.50% |
Lai Jiangshan | 1 | 1.79% | 1 | 12.50% |
Total | 56 | 100.00% | 8 | 100.00% |
/**
* unbound_pwq_by_node - return the unbound pool_workqueue for the given node
* @wq: the target workqueue
* @node: the node ID
*
* This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
* read locked.
* If the pwq needs to be used beyond the locking in effect, the caller is
* responsible for guaranteeing that the pwq stays online.
*
* Return: The unbound pool_workqueue for @node.
*/
static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
int node)
{
assert_rcu_or_wq_mutex_or_pool_mutex(wq);
/*
* XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
* delayed item is pending. The plan is to keep CPU -> NODE
* mapping valid and stable across CPU on/offlines. Once that
* happens, this workaround can be removed.
*/
if (unlikely(node == NUMA_NO_NODE))
return wq->dfl_pwq;
return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 46 | 97.87% | 2 | 66.67% |
Lai Jiangshan | 1 | 2.13% | 1 | 33.33% |
Total | 47 | 100.00% | 3 | 100.00% |
static unsigned int work_color_to_flags(int color)
{
return color << WORK_STRUCT_COLOR_SHIFT;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 15 | 100.00% | 1 | 100.00% |
Total | 15 | 100.00% | 1 | 100.00% |
static int get_work_color(struct work_struct *work)
{
return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
((1 << WORK_STRUCT_COLOR_BITS) - 1);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 32 | 100.00% | 1 | 100.00% |
Total | 32 | 100.00% | 1 | 100.00% |
static int work_next_color(int color)
{
return (color + 1) % WORK_NR_COLORS;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 18 | 100.00% | 1 | 100.00% |
Total | 18 | 100.00% | 1 | 100.00% |
/*
* While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
* contain the pointer to the queued pwq. Once execution starts, the flag
* is cleared and the high bits contain OFFQ flags and pool ID.
*
* set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
* and clear_work_data() can be used to set the pwq, pool or clear
* work->data. These functions should only be called while the work is
* owned - ie. while the PENDING bit is set.
*
* get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
* corresponding to a work. Pool is available once the work has been
* queued anywhere after initialization until it is sync canceled. pwq is
* available only while the work item is queued.
*
* %WORK_OFFQ_CANCELING is used to mark a work item which is being
* canceled. While being canceled, a work item may have its PENDING set
* but stay off timer and worklist for arbitrarily long and nobody should
* try to steal the PENDING bit.
*/
static inline void set_work_data(struct work_struct *work, unsigned long data,
unsigned long flags)
{
WARN_ON_ONCE(!work_pending(work));
atomic_long_set(&work->data, data | flags | work_static(work));
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 26 | 56.52% | 3 | 60.00% |
David Howells | 20 | 43.48% | 2 | 40.00% |
Total | 46 | 100.00% | 5 | 100.00% |
static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
unsigned long extra_flags)
{
set_work_data(work, (unsigned long)pwq,
WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 33 | 89.19% | 4 | 66.67% |
David Howells | 3 | 8.11% | 1 | 16.67% |
Linus Torvalds | 1 | 2.70% | 1 | 16.67% |
Total | 37 | 100.00% | 6 | 100.00% |
static void set_work_pool_and_keep_pending(struct work_struct *work,
int pool_id)
{
set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
WORK_STRUCT_PENDING);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Lai Jiangshan | 29 | 100.00% | 1 | 100.00% |
Total | 29 | 100.00% | 1 | 100.00% |
static void set_work_pool_and_clear_pending(struct work_struct *work,
int pool_id)
{
/*
* The following wmb is paired with the implied mb in
* test_and_set_bit(PENDING) and ensures all updates to @work made
* here are visible to and precede any updates by the next PENDING
* owner.
*/
smp_wmb();
set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
/*
* The following mb guarantees that previous clear of a PENDING bit
* will not be reordered with any speculative LOADS or STORES from
* work->current_func, which is executed afterwards. This possible
* reordering can lead to a missed execution on attempt to qeueue
* the same @work. E.g. consider this case:
*
* CPU#0 CPU#1
* ---------------------------- --------------------------------
*
* 1 STORE event_indicated
* 2 queue_work_on() {
* 3 test_and_set_bit(PENDING)
* 4 } set_..._and_clear_pending() {
* 5 set_work_data() # clear bit
* 6 smp_mb()
* 7 work->current_func() {
* 8 LOAD event_indicated
* }
*
* Without an explicit full barrier speculative LOAD on line 8 can
* be executed before CPU#0 does STORE on line 1. If that happens,
* CPU#0 observes the PENDING bit is still set and new execution of
* a @work is not queued in a hope, that CPU#1 will eventually
* finish the queued @work. Meanwhile CPU#1 does not see
* event_indicated is set, because speculative LOAD was executed
* before actual STORE.
*/
smp_mb();
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 21 | 56.76% | 5 | 71.43% |
Oleg Nesterov | 12 | 32.43% | 1 | 14.29% |
Roman Peniaev | 4 | 10.81% | 1 | 14.29% |
Total | 37 | 100.00% | 7 | 100.00% |
static void clear_work_data(struct work_struct *work)
{
smp_wmb(); /* see set_work_pool_and_clear_pending() */
set_work_data(work, WORK_STRUCT_NO_POOL, 0);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 21 | 87.50% | 4 | 80.00% |
Oleg Nesterov | 3 | 12.50% | 1 | 20.00% |
Total | 24 | 100.00% | 5 | 100.00% |
static struct pool_workqueue *get_work_pwq(struct work_struct *work)
{
unsigned long data = atomic_long_read(&work->data);
if (data & WORK_STRUCT_PWQ)
return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
else
return NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 33 | 71.74% | 3 | 60.00% |
David Howells | 12 | 26.09% | 1 | 20.00% |
Oleg Nesterov | 1 | 2.17% | 1 | 20.00% |
Total | 46 | 100.00% | 5 | 100.00% |
/**
* get_work_pool - return the worker_pool a given work was associated with
* @work: the work item of interest
*
* Pools are created and destroyed under wq_pool_mutex, and allows read
* access under sched-RCU read lock. As such, this function should be
* called under wq_pool_mutex or with preemption disabled.
*
* All fields of the returned pool are accessible as long as the above
* mentioned locking is in effect. If the returned pool needs to be used
* beyond the critical section, the caller is responsible for ensuring the
* returned pool is and stays online.
*
* Return: The worker_pool @work was last associated with. %NULL if none.
*/
static struct worker_pool *get_work_pool(struct work_struct *work)
{
unsigned long data = atomic_long_read(&work->data);
int pool_id;
assert_rcu_or_pool_mutex();
if (data & WORK_STRUCT_PWQ)
return ((struct pool_workqueue *)
(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
pool_id = data >> WORK_OFFQ_POOL_SHIFT;
if (pool_id == WORK_OFFQ_POOL_NONE)
return NULL;
return idr_find(&worker_pool_idr, pool_id);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 70 | 90.91% | 6 | 66.67% |
David Howells | 4 | 5.19% | 1 | 11.11% |
Linus Torvalds | 2 | 2.60% | 1 | 11.11% |
Lai Jiangshan | 1 | 1.30% | 1 | 11.11% |
Total | 77 | 100.00% | 9 | 100.00% |
/**
* get_work_pool_id - return the worker pool ID a given work is associated with
* @work: the work item of interest
*
* Return: The worker_pool ID @work was last associated with.
* %WORK_OFFQ_POOL_NONE if none.
*/
static int get_work_pool_id(struct work_struct *work)
{
unsigned long data = atomic_long_read(&work->data);
if (data & WORK_STRUCT_PWQ)
return ((struct pool_workqueue *)
(data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
return data >> WORK_OFFQ_POOL_SHIFT;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Lai Jiangshan | 29 | 55.77% | 1 | 25.00% |
Tejun Heo | 23 | 44.23% | 3 | 75.00% |
Total | 52 | 100.00% | 4 | 100.00% |
static void mark_work_canceling(struct work_struct *work)
{
unsigned long pool_id = get_work_pool_id(work);
pool_id <<= WORK_OFFQ_POOL_SHIFT;
set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 35 | 100.00% | 2 | 100.00% |
Total | 35 | 100.00% | 2 | 100.00% |
static bool work_is_canceling(struct work_struct *work)
{
unsigned long data = atomic_long_read(&work->data);
return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 37 | 100.00% | 2 | 100.00% |
Total | 37 | 100.00% | 2 | 100.00% |
/*
* Policy functions. These define the policies on how the global worker
* pools are managed. Unless noted otherwise, these functions assume that
* they're being called with pool->lock held.
*/
static bool __need_more_worker(struct worker_pool *pool)
{
return !atomic_read(&pool->nr_running);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 21 | 100.00% | 5 | 100.00% |
Total | 21 | 100.00% | 5 | 100.00% |
/*
* Need to wake up a worker? Called from anything but currently
* running workers.
*
* Note that, because unbound workers never contribute to nr_running, this
* function will always return %true for unbound pools as long as the
* worklist isn't empty.
*/
static bool need_more_worker(struct worker_pool *pool)
{
return !list_empty(&pool->worklist) && __need_more_worker(pool);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 26 | 100.00% | 5 | 100.00% |
Total | 26 | 100.00% | 5 | 100.00% |
/* Can I start working? Called from busy but !running workers. */
static bool may_start_working(struct worker_pool *pool)
{
return pool->nr_idle;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 16 | 100.00% | 4 | 100.00% |
Total | 16 | 100.00% | 4 | 100.00% |
/* Do I need to keep working? Called from currently running workers. */
static bool keep_working(struct worker_pool *pool)
{
return !list_empty(&pool->worklist) &&
atomic_read(&pool->nr_running) <= 1;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 31 | 100.00% | 5 | 100.00% |
Total | 31 | 100.00% | 5 | 100.00% |
/* Do we need a new worker? Called from manager. */
static bool need_to_create_worker(struct worker_pool *pool)
{
return need_more_worker(pool) && !may_start_working(pool);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 23 | 100.00% | 3 | 100.00% |
Total | 23 | 100.00% | 3 | 100.00% |
/* Do we have too many workers and should some go away? */
static bool too_many_workers(struct worker_pool *pool)
{
bool managing = pool->flags & POOL_MANAGER_ACTIVE;
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
int nr_busy = pool->nr_workers - nr_idle;
return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 54 | 100.00% | 5 | 100.00% |
Total | 54 | 100.00% | 5 | 100.00% |
/*
* Wake up functions.
*/
/* Return the first idle worker. Safe with preemption disabled */
static struct worker *first_idle_worker(struct worker_pool *pool)
{
if (unlikely(list_empty(&pool->idle_list)))
return NULL;
return list_first_entry(&pool->idle_list, struct worker, entry);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 42 | 97.67% | 4 | 80.00% |
Lai Jiangshan | 1 | 2.33% | 1 | 20.00% |
Total | 43 | 100.00% | 5 | 100.00% |
/**
* wake_up_worker - wake up an idle worker
* @pool: worker pool to wake worker from
*
* Wake up the first idle worker of @pool.
*
* CONTEXT:
* spin_lock_irq(pool->lock).
*/
static void wake_up_worker(struct worker_pool *pool)
{
struct worker *worker = first_idle_worker(pool);
if (likely(worker))
wake_up_process(worker->task);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 34 | 97.14% | 3 | 75.00% |
Lai Jiangshan | 1 | 2.86% | 1 | 25.00% |
Total | 35 | 100.00% | 4 | 100.00% |
/**
* wq_worker_waking_up - a worker is waking up
* @task: task waking up
* @cpu: CPU @task is waking up to
*
* This function is called during try_to_wake_up() when a worker is
* being awoken.
*
* CONTEXT:
* spin_lock_irq(rq->lock)
*/
void wq_worker_waking_up(struct task_struct *task, int cpu)
{
struct worker *worker = kthread_data(task);
if (!(worker->flags & WORKER_NOT_RUNNING)) {
WARN_ON_ONCE(worker->pool->cpu != cpu);
atomic_inc(&worker->pool->nr_running);
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 31 | 54.39% | 4 | 44.44% |
JoonSoo Kim | 13 | 22.81% | 1 | 11.11% |
Ingo Molnar | 9 | 15.79% | 1 | 11.11% |
Oleg Nesterov | 3 | 5.26% | 2 | 22.22% |
Andrew Morton | 1 | 1.75% | 1 | 11.11% |
Total | 57 | 100.00% | 9 | 100.00% |
/**
* wq_worker_sleeping - a worker is going to sleep
* @task: task going to sleep
*
* This function is called during schedule() when a busy worker is
* going to sleep. Worker on the same cpu can be woken up by
* returning pointer to its task.
*
* CONTEXT:
* spin_lock_irq(rq->lock)
*
* Return:
* Worker task on @cpu to wake up, %NULL if none.
*/
struct task_struct *wq_worker_sleeping(struct task_struct *task)
{
struct worker *worker = kthread_data(task), *to_wakeup = NULL;
struct worker_pool *pool;
/*
* Rescuers, which may not have all the fields set up like normal
* workers, also reach here, let's not access anything before
* checking NOT_RUNNING.
*/
if (worker->flags & WORKER_NOT_RUNNING)
return NULL;
pool = worker->pool;
/* this can only happen on the local cpu */
if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
return NULL;
/*
* The counterpart of the following dec_and_test, implied mb,
* worklist not empty test sequence is in insert_work().
* Please read comment there.
*
* NOT_RUNNING is clear. This means that we're bound to and
* running on the local cpu w/ rq lock held and preemption
* disabled, which in turn means that none else could be
* manipulating idle_list, so dereferencing idle_list without pool
* lock is safe.
*/
if (atomic_dec_and_test(&pool->nr_running) &&
!list_empty(&pool->worklist))
to_wakeup = first_idle_worker(pool);
return to_wakeup ? to_wakeup->task : NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 95 | 93.14% | 8 | 72.73% |
Lai Jiangshan | 5 | 4.90% | 2 | 18.18% |
Alexander Gordeev | 2 | 1.96% | 1 | 9.09% |
Total | 102 | 100.00% | 11 | 100.00% |
/**
* worker_set_flags - set worker flags and adjust nr_running accordingly
* @worker: self
* @flags: flags to set
*
* Set @flags in @worker->flags and adjust nr_running accordingly.
*
* CONTEXT:
* spin_lock_irq(pool->lock)
*/
static inline void worker_set_flags(struct worker *worker, unsigned int flags)
{
struct worker_pool *pool = worker->pool;
WARN_ON_ONCE(worker->task != current);
/* If transitioning into NOT_RUNNING, adjust nr_running. */
if ((flags & WORKER_NOT_RUNNING) &&
!(worker->flags & WORKER_NOT_RUNNING)) {
atomic_dec(&pool->nr_running);
}
worker->flags |= flags;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 59 | 86.76% | 7 | 77.78% |
Oleg Nesterov | 8 | 11.76% | 1 | 11.11% |
Lai Jiangshan | 1 | 1.47% | 1 | 11.11% |
Total | 68 | 100.00% | 9 | 100.00% |
/**
* worker_clr_flags - clear worker flags and adjust nr_running accordingly
* @worker: self
* @flags: flags to clear
*
* Clear @flags in @worker->flags and adjust nr_running accordingly.
*
* CONTEXT:
* spin_lock_irq(pool->lock)
*/
static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
{
struct worker_pool *pool = worker->pool;
unsigned int oflags = worker->flags;
WARN_ON_ONCE(worker->task != current);
worker->flags &= ~flags;
/*
* If transitioning out of NOT_RUNNING, increment nr_running. Note
* that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask
* of multiple flags, not a single flag.
*/
if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
if (!(worker->flags & WORKER_NOT_RUNNING))
atomic_inc(&pool->nr_running);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 83 | 100.00% | 6 | 100.00% |
Total | 83 | 100.00% | 6 | 100.00% |
/**
* find_worker_executing_work - find worker which is executing a work
* @pool: pool of interest
* @work: work to find worker for
*
* Find a worker which is executing @work on @pool by searching
* @pool->busy_hash which is keyed by the address of @work. For a worker
* to match, its current execution should match the address of @work and
* its work function. This is to avoid unwanted dependency between
* unrelated work executions through a work item being recycled while still
* being executed.
*
* This is a bit tricky. A work item may be freed once its execution
* starts and nothing prevents the freed area from being recycled for
* another work item. If the same work item address ends up being reused
* before the original execution finishes, workqueue will identify the
* recycled work item as currently executing and make it wait until the
* current execution finishes, introducing an unwanted dependency.
*
* This function checks the work item address and work function to avoid
* false positives. Note that this isn't complete as one may construct a
* work function which can introduce dependency onto itself through a
* recycled work item. Well, if somebody wants to shoot oneself in the
* foot that badly, there's only so much we can do, and if such deadlock
* actually occurs, it should be easy to locate the culprit work function.
*
* CONTEXT:
* spin_lock_irq(pool->lock).
*
* Return:
* Pointer to worker which is executing @work if found, %NULL
* otherwise.
*/
static struct worker *find_worker_executing_work(struct worker_pool *pool,
struct work_struct *work)
{
struct worker *worker;
hash_for_each_possible(pool->busy_hash, worker, hentry,
(unsigned long)work)
if (worker->current_work == work &&
worker->current_func == work->func)
return worker;
return NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 50 | 90.91% | 3 | 75.00% |
Sasha Levin | 5 | 9.09% | 1 | 25.00% |
Total | 55 | 100.00% | 4 | 100.00% |
/**
* move_linked_works - move linked works to a list
* @work: start of series of works to be scheduled
* @head: target list to append @work to
* @nextp: out parameter for nested worklist walking
*
* Schedule linked works starting from @work to @head. Work series to
* be scheduled starts at @work and includes any consecutive work with
* WORK_STRUCT_LINKED set in its predecessor.
*
* If @nextp is not NULL, it's updated to point to the next work of
* the last scheduled work. This allows move_linked_works() to be
* nested inside outer list_for_each_entry_safe().
*
* CONTEXT:
* spin_lock_irq(pool->lock).
*/
static void move_linked_works(struct work_struct *work, struct list_head *head,
struct work_struct **nextp)
{
struct work_struct *n;
/*
* Linked worklist will always end before the end of the list,
* use NULL for list head.
*/
list_for_each_entry_safe_from(work, n, NULL, entry) {
list_move_tail(&work->entry, head);
if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))