Release 4.15 kernel/events/core.c
/*
* Performance events core code:
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
*/
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/idr.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/hash.h>
#include <linux/tick.h>
#include <linux/sysfs.h>
#include <linux/dcache.h>
#include <linux/percpu.h>
#include <linux/ptrace.h>
#include <linux/reboot.h>
#include <linux/vmstat.h>
#include <linux/device.h>
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/hardirq.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/anon_inodes.h>
#include <linux/kernel_stat.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>
#include <linux/trace_events.h>
#include <linux/hw_breakpoint.h>
#include <linux/mm_types.h>
#include <linux/module.h>
#include <linux/mman.h>
#include <linux/compat.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/namei.h>
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>
#include "internal.h"
#include <asm/irq_regs.h>
typedef int (*remote_function_f)(void *);
struct remote_function_call {
struct task_struct *p;
remote_function_f func;
void *info;
int ret;
};
static void remote_function(void *data)
{
struct remote_function_call *tfc = data;
struct task_struct *p = tfc->p;
if (p) {
/* -EAGAIN */
if (task_cpu(p) != smp_processor_id())
return;
/*
* Now that we're on right CPU with IRQs disabled, we can test
* if we hit the right task without races.
*/
tfc->ret = -ESRCH; /* No such (running) process */
if (p != current)
return;
}
tfc->ret = tfc->func(tfc->info);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 73 | 100.00% | 2 | 100.00% |
Total | 73 | 100.00% | 2 | 100.00% |
/**
* task_function_call - call a function on the cpu on which a task runs
* @p: the task to evaluate
* @func: the function to be called
* @info: the function call argument
*
* Calls the function @func when the task is currently running. This might
* be on the current CPU, which just calls the function directly
*
* returns: @func return value, or
* -ESRCH - when the process isn't running
* -EAGAIN - when the process moved away
*/
static int
task_function_call(struct task_struct *p, remote_function_f func, void *info)
{
struct remote_function_call data = {
.p = p,
.func = func,
.info = info,
.ret = -EAGAIN,
};
int ret;
do {
ret = smp_call_function_single(task_cpu(p), remote_function, &data, 1);
if (!ret)
ret = data.ret;
} while (ret == -EAGAIN);
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 90 | 100.00% | 3 | 100.00% |
Total | 90 | 100.00% | 3 | 100.00% |
/**
* cpu_function_call - call a function on the cpu
* @func: the function to be called
* @info: the function call argument
*
* Calls the function @func on the remote cpu.
*
* returns: @func return value or -ENXIO when the cpu is offline
*/
static int cpu_function_call(int cpu, remote_function_f func, void *info)
{
struct remote_function_call data = {
.p = NULL,
.func = func,
.info = info,
.ret = -ENXIO, /* No such CPU */
};
smp_call_function_single(cpu, remote_function, &data, 1);
return data.ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 62 | 100.00% | 2 | 100.00% |
Total | 62 | 100.00% | 2 | 100.00% |
static inline struct perf_cpu_context *
__get_cpu_context(struct perf_event_context *ctx)
{
return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 24 | 100.00% | 1 | 100.00% |
Total | 24 | 100.00% | 1 | 100.00% |
static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
raw_spin_lock(&cpuctx->ctx.lock);
if (ctx)
raw_spin_lock(&ctx->lock);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 38 | 100.00% | 2 | 100.00% |
Total | 38 | 100.00% | 2 | 100.00% |
static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
if (ctx)
raw_spin_unlock(&ctx->lock);
raw_spin_unlock(&cpuctx->ctx.lock);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 38 | 100.00% | 1 | 100.00% |
Total | 38 | 100.00% | 1 | 100.00% |
#define TASK_TOMBSTONE ((void *)-1L)
static bool is_kernel_event(struct perf_event *event)
{
return READ_ONCE(event->owner) == TASK_TOMBSTONE;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 21 | 100.00% | 3 | 100.00% |
Total | 21 | 100.00% | 3 | 100.00% |
/*
* On task ctx scheduling...
*
* When !ctx->nr_events a task context will not be scheduled. This means
* we can disable the scheduler hooks (for performance) without leaving
* pending task ctx state.
*
* This however results in two special cases:
*
* - removing the last event from a task ctx; this is relatively straight
* forward and is done in __perf_remove_from_context.
*
* - adding the first event to a task ctx; this is tricky because we cannot
* rely on ctx->is_active and therefore cannot use event_function_call().
* See perf_install_in_context().
*
* If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
*/
typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
struct perf_event_context *, void *);
struct event_function_struct {
struct perf_event *event;
event_f func;
void *data;
};
static int event_function(void *info)
{
struct event_function_struct *efs = info;
struct perf_event *event = efs->event;
struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
struct perf_event_context *task_ctx = cpuctx->task_ctx;
int ret = 0;
lockdep_assert_irqs_disabled();
perf_ctx_lock(cpuctx, task_ctx);
/*
* Since we do the IPI call without holding ctx->lock things can have
* changed, double check we hit the task we set out to hit.
*/
if (ctx->task) {
if (ctx->task != current) {
ret = -ESRCH;
goto unlock;
}
/*
* We only use event_function_call() on established contexts,
* and event_function() is only ever called when active (or
* rather, we'll have bailed in task_function_call() or the
* above ctx->task != current test), therefore we must have
* ctx->is_active here.
*/
WARN_ON_ONCE(!ctx->is_active);
/*
* And since we have ctx->is_active, cpuctx->task_ctx must
* match.
*/
WARN_ON_ONCE(task_ctx != ctx);
} else {
WARN_ON_ONCE(&cpuctx->ctx != ctx);
}
efs->func(event, cpuctx, ctx, efs->data);
unlock:
perf_ctx_unlock(cpuctx, task_ctx);
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 152 | 99.35% | 4 | 80.00% |
Frédéric Weisbecker | 1 | 0.65% | 1 | 20.00% |
Total | 153 | 100.00% | 5 | 100.00% |
static void event_function_call(struct perf_event *event, event_f func, void *data)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = READ_ONCE(ctx->task); /* verified in event_function */
struct event_function_struct efs = {
.event = event,
.func = func,
.data = data,
};
if (!event->parent) {
/*
* If this is a !child event, we must hold ctx::mutex to
* stabilize the the event->ctx relation. See
* perf_event_ctx_lock().
*/
lockdep_assert_held(&ctx->mutex);
}
if (!task) {
cpu_function_call(event->cpu, event_function, &efs);
return;
}
if (task == TASK_TOMBSTONE)
return;
again:
if (!task_function_call(task, event_function, &efs))
return;
raw_spin_lock_irq(&ctx->lock);
/*
* Reload the task pointer, it might have been changed by
* a concurrent perf_event_context_sched_out().
*/
task = ctx->task;
if (task == TASK_TOMBSTONE) {
raw_spin_unlock_irq(&ctx->lock);
return;
}
if (ctx->is_active) {
raw_spin_unlock_irq(&ctx->lock);
goto again;
}
func(event, NULL, ctx, data);
raw_spin_unlock_irq(&ctx->lock);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 192 | 100.00% | 5 | 100.00% |
Total | 192 | 100.00% | 5 | 100.00% |
/*
* Similar to event_function_call() + event_function(), but hard assumes IRQs
* are already disabled and we're on the right CPU.
*/
static void event_function_local(struct perf_event *event, event_f func, void *data)
{
struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
struct task_struct *task = READ_ONCE(ctx->task);
struct perf_event_context *task_ctx = NULL;
lockdep_assert_irqs_disabled();
if (task) {
if (task == TASK_TOMBSTONE)
return;
task_ctx = ctx;
}
perf_ctx_lock(cpuctx, task_ctx);
task = ctx->task;
if (task == TASK_TOMBSTONE)
goto unlock;
if (task) {
/*
* We must be either inactive or active and the right task,
* otherwise we're screwed, since we cannot IPI to somewhere
* else.
*/
if (ctx->is_active) {
if (WARN_ON_ONCE(task != current))
goto unlock;
if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
goto unlock;
}
} else {
WARN_ON_ONCE(&cpuctx->ctx != ctx);
}
func(event, cpuctx, ctx, data);
unlock:
perf_ctx_unlock(cpuctx, task_ctx);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 171 | 99.42% | 1 | 50.00% |
Frédéric Weisbecker | 1 | 0.58% | 1 | 50.00% |
Total | 172 | 100.00% | 2 | 100.00% |
#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
PERF_FLAG_FD_OUTPUT |\
PERF_FLAG_PID_CGROUP |\
PERF_FLAG_FD_CLOEXEC)
/*
* branch priv levels that need permission checks
*/
#define PERF_SAMPLE_BRANCH_PERM_PLM \
(PERF_SAMPLE_BRANCH_KERNEL |\
PERF_SAMPLE_BRANCH_HV)
enum event_type_t {
EVENT_FLEXIBLE = 0x1,
EVENT_PINNED = 0x2,
EVENT_TIME = 0x4,
/* see ctx_resched() for details */
EVENT_CPU = 0x8,
EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
};
/*
* perf_sched_events : >0 events exist
* perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
*/
static void perf_sched_delayed(struct work_struct *work);
DEFINE_STATIC_KEY_FALSE(perf_sched_events);
static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
static DEFINE_MUTEX(perf_sched_mutex);
static atomic_t perf_sched_count;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static struct srcu_struct pmus_srcu;
static cpumask_var_t perf_online_mask;
/*
* perf event paranoia level:
* -1 - not paranoid at all
* 0 - disallow raw tracepoint access for unpriv
* 1 - disallow cpu events for unpriv
* 2 - disallow kernel profiling for unpriv
*/
int sysctl_perf_event_paranoid __read_mostly = 2;
/* Minimum for 512 kiB + 1 user control page */
int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024);
/* 'free' kiB per user */
/*
* max perf event sample rate
*/
#define DEFAULT_MAX_SAMPLE_RATE 100000
#define DEFAULT_SAMPLE_PERIOD_NS (NSEC_PER_SEC / DEFAULT_MAX_SAMPLE_RATE)
#define DEFAULT_CPU_TIME_MAX_PERCENT 25
int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS;
static int perf_sample_allowed_ns __read_mostly =
DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100;
static void update_perf_cpu_limits(void)
{
u64 tmp = perf_sample_period_ns;
tmp *= sysctl_perf_cpu_time_max_percent;
tmp = div_u64(tmp, 100);
if (!tmp)
tmp = 1;
WRITE_ONCE(perf_sample_allowed_ns, tmp);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Dave Hansen | 25 | 59.52% | 1 | 25.00% |
Peter Zijlstra | 16 | 38.10% | 2 | 50.00% |
Geliang Tang | 1 | 2.38% | 1 | 25.00% |
Total | 42 | 100.00% | 4 | 100.00% |
static int perf_rotate_context(struct perf_cpu_context *cpuctx);
int perf_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
return ret;
/*
* If throttling is disabled don't allow the write:
*/
if (sysctl_perf_cpu_time_max_percent == 100 ||
sysctl_perf_cpu_time_max_percent == 0)
return -EINVAL;
max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
update_perf_cpu_limits();
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 63 | 71.59% | 1 | 25.00% |
Kan Liang | 15 | 17.05% | 1 | 25.00% |
Dave Hansen | 9 | 10.23% | 1 | 25.00% |
Knut Petersen | 1 | 1.14% | 1 | 25.00% |
Total | 88 | 100.00% | 4 | 100.00% |
int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT;
int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
return ret;
if (sysctl_perf_cpu_time_max_percent == 100 ||
sysctl_perf_cpu_time_max_percent == 0) {
printk(KERN_WARNING
"perf: Dynamic interrupt throttling disabled, can hang your system!\n");
WRITE_ONCE(perf_sample_allowed_ns, 0);
} else {
update_perf_cpu_limits();
}
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Dave Hansen | 57 | 66.28% | 1 | 25.00% |
Peter Zijlstra | 28 | 32.56% | 2 | 50.00% |
Tan Xiaojun | 1 | 1.16% | 1 | 25.00% |
Total | 86 | 100.00% | 4 | 100.00% |
/*
* perf samples are done in some very critical code paths (NMIs).
* If they take too much CPU time, the system can lock up and not
* get any real work done. This will drop the sample rate when
* we detect that events are taking too long.
*/
#define NR_ACCUMULATED_SAMPLES 128
static DEFINE_PER_CPU(u64, running_sample_length);
static u64 __report_avg;
static u64 __report_allowed;
static void perf_duration_warn(struct irq_work *w)
{
printk_ratelimited(KERN_INFO
"perf: interrupt took too long (%lld > %lld), lowering "
"kernel.perf_event_max_sample_rate to %d\n",
__report_avg, __report_allowed,
sysctl_perf_event_sample_rate);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 19 | 79.17% | 2 | 50.00% |
Dave Hansen | 4 | 16.67% | 1 | 25.00% |
David Ahern | 1 | 4.17% | 1 | 25.00% |
Total | 24 | 100.00% | 4 | 100.00% |
static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);
void perf_sample_event_took(u64 sample_len_ns)
{
u64 max_len = READ_ONCE(perf_sample_allowed_ns);
u64 running_len;
u64 avg_len;
u32 max;
if (max_len == 0)
return;
/* Decay the counter by 1 average sample. */
running_len = __this_cpu_read(running_sample_length);
running_len -= running_len/NR_ACCUMULATED_SAMPLES;
running_len += sample_len_ns;
__this_cpu_write(running_sample_length, running_len);
/*
* Note: this will be biased artifically low until we have
* seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
* from having to maintain a count.
*/
avg_len = running_len/NR_ACCUMULATED_SAMPLES;
if (avg_len <= max_len)
return;
__report_avg = avg_len;
__report_allowed = max_len;
/*
* Compute a throttle threshold 25% below the current duration.
*/
avg_len += avg_len / 4;
max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent;
if (avg_len < max)
max /= (u32)avg_len;
else
max = 1;
WRITE_ONCE(perf_sample_allowed_ns, avg_len);
WRITE_ONCE(max_samples_per_tick, max);
sysctl_perf_event_sample_rate = max * HZ;
perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
if (!irq_work_queue(&perf_duration_work)) {
early_printk("perf: interrupt took too long (%lld > %lld), lowering "
"kernel.perf_event_max_sample_rate to %d\n",
__report_avg, __report_allowed,
sysctl_perf_event_sample_rate);
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 107 | 65.64% | 4 | 66.67% |
Dave Hansen | 52 | 31.90% | 1 | 16.67% |
Christoph Lameter | 4 | 2.45% | 1 | 16.67% |
Total | 163 | 100.00% | 6 | 100.00% |
static atomic64_t perf_event_id;
static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
enum event_type_t event_type);
static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
enum event_type_t event_type,
struct task_struct *task);
static void update_context_time(struct perf_event_context *ctx);
static u64 perf_event_time(struct perf_event *event);
void __weak perf_event_print_debug(void) { }
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Paul Mackerras | 6 | 85.71% | 1 | 50.00% |
Ingo Molnar | 1 | 14.29% | 1 | 50.00% |
Total | 7 | 100.00% | 2 | 100.00% |
extern __weak const char *perf_pmu_name(void)
{
return "pmu";
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Thomas Gleixner | 7 | 50.00% | 1 | 25.00% |
Matt Fleming | 4 | 28.57% | 1 | 25.00% |
Ingo Molnar | 3 | 21.43% | 2 | 50.00% |
Total | 14 | 100.00% | 4 | 100.00% |
static inline u64 perf_clock(void)
{
return local_clock();
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stéphane Eranian | 13 | 100.00% | 1 | 100.00% |
Total | 13 | 100.00% | 1 | 100.00% |
static inline u64 perf_event_clock(struct perf_event *event)
{
return event->clock();
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 18 | 100.00% | 1 | 100.00% |
Total | 18 | 100.00% | 1 | 100.00% |
/*
* State based event timekeeping...
*
* The basic idea is to use event->state to determine which (if any) time
* fields to increment with the current delta. This means we only need to
* update timestamps when we change state or when they are explicitly requested
* (read).
*
* Event groups make things a little more complicated, but not terribly so. The
* rules for a group are that if the group leader is OFF the entire group is
* OFF, irrespecive of what the group member states are. This results in
* __perf_effective_state().
*
* A futher ramification is that when a group leader flips between OFF and
* !OFF, we need to update all group member times.
*
*
* NOTE: perf_event_time() is based on the (cgroup) context time, and thus we
* need to make sure the relevant context time is updated before we try and
* update our timestamps.
*/
static __always_inline enum perf_event_state
__perf_effective_state(struct perf_event *event)
{
struct perf_event *leader = event->group_leader;
if (leader->state <= PERF_EVENT_STATE_OFF)
return leader->state;
return event->state;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 40 | 100.00% | 1 | 100.00% |
Total | 40 | 100.00% | 1 | 100.00% |
static __always_inline void
__perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *running)
{
enum perf_event_state state = __perf_effective_state(event);
u64 delta = now - event->tstamp;
*enabled = event->total_time_enabled;
if (state >= PERF_EVENT_STATE_INACTIVE)
*enabled += delta;
*running = event->total_time_running;
if (state >= PERF_EVENT_STATE_ACTIVE)
*running += delta;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 77 | 100.00% | 1 | 100.00% |
Total | 77 | 100.00% | 1 | 100.00% |
static void perf_event_update_time(struct perf_event *event)
{
u64 now = perf_event_time(event);
__perf_update_times(event, now, &event->total_time_enabled,
&event->total_time_running);
event->tstamp = now;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 42 | 100.00% | 1 | 100.00% |
Total | 42 | 100.00% | 1 | 100.00% |
static void perf_event_update_sibling_time(struct perf_event *leader)
{
struct perf_event *sibling;
list_for_each_entry(sibling, &leader->sibling_list, group_entry)
perf_event_update_time(sibling);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 29 | 100.00% | 1 | 100.00% |
Total | 29 | 100.00% | 1 | 100.00% |
static void
perf_event_set_state(struct perf_event *event, enum perf_event_state state)
{
if (event->state == state)
return;
perf_event_update_time(event);
/*
* If a group leader gets enabled/disabled all its siblings
* are affected too.
*/
if ((event->state < 0) ^ (state < 0))
perf_event_update_sibling_time(event);
WRITE_ONCE(event->state, state);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Peter Zijlstra | 60 | 100.00% | 1 | 100.00% |
Total | 60 | 100.00% | 1 | 100.00% |
#ifdef CONFIG_CGROUP_PERF
static inline bool
perf_cgroup_match(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
/* @event doesn't care about cgroup */
if (!event->cgrp)
return true;
/* wants specific cgroup scope but @cpuctx isn't associated with any */
if (!cpuctx->cgrp)
return false;
/*
* Cgroup scoping is recursive. An event enabled for a cgroup is
* also enabled for all its descendant cgroups. If @cpuctx's
* cgroup is a descendant of @event's (the test covers identity
* case), it's a match.
*/
return cgroup_is_descendant(cpuctx->cgrp->css.cgroup,
event->cgrp->css.cgroup);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 33 | 44.59% | 1 | 16.67% |
Peter Zijlstra | 22 | 29.73% | 4 | 66.67% |
Stéphane Eranian | 19 | 25.68% | 1 | 16.67% |
Total | 74 | 100.00% | 6 | 100.00% |
static inline void perf_detach_cgroup(struct perf_event *event)
{
css_put(&event->cgrp->css);
event->cgrp = NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stéphane Eranian | 14 | 50.00% | 1 | 25.00% |
Paul Mackerras | 13 | 46.43% | 2 | 50.00% |
Li Zefan | 1 | 3.57% | 1 | 25.00% |
Total | 28 | 100.00% | 4 | 100.00% |
static inline int is_cgroup_event(struct perf_event *event)
{
return event->cgrp != NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stéphane Eranian | 10 | 52.63% | 1 | 50.00% |
Peter Zijlstra | 9 | 47.37% | 1 | 50.00% |
Total | 19 | 100.00% | 2 | 100.00% |
static inline u64 perf_cgroup_event_time(struct perf_event *event)
{
struct perf_cgroup_info *t;
t = per_cpu_ptr(event->cgrp->info, event->cpu);
return t->time;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stéphane Eranian | 20 | 54.05% | 1 | 50.00% |
Arnaldo Carvalho de Melo | 17 | 45.95% | 1 | 50.00% |
Total | 37 | 100.00% | 2 | 100.00% |
static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
{
struct perf_cgroup_info *info;
u64 now;
now = perf_clock();
info = this_cpu_ptr(cgrp->info);
info->time += now - info->timestamp;
info->timestamp = now;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stéphane Eranian | 44 | 88.00% | 1 | 50.00% |
Arnaldo Carvalho de Melo | 6 | 12.00% | 1 | 50.00% |
Total | 50 | 100.00% | 2 | 100.00% |
static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
{
struct perf_cgroup *cgrp_out = cpuctx->cgrp;
if (cgrp_out)
__update_cgrp_time(cgrp_out);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stéphane Eranian | 22 | 73.33% | 1 | 50.00% |
Arnaldo Carvalho de Melo | 8 | 26.67% | 1 | 50.00% |
Total | 30 | 100.00% | 2 | 100.00% |
static inline void update_cgrp_time_from_event(struct perf_event *event)
{
struct perf_cgroup *cgrp;
/*
* ensure we access cgroup data only when needed and
* when we know the cgroup is pinned (css_get)
*/
if (!is_cgroup_event(event))
return;
cgrp = perf_cgroup_from_task(current, event->ctx);
/*
* Do not update time when cgroup is not active
*/
if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
__update_cgrp_time(event->cgrp);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stéphane Eranian | 36 | 55.38% | 3 | 50.00% |
Peter Zijlstra | 13 | 20.00% | 1 | 16.67% |
leilei.lin | 12 | 18.46% | 1 | 16.67% |
Ingo Molnar | 4 | 6.15% | 1 | 16.67% |
Total | 65 | 100.00% | 6 | 100.00% |
static inline void
perf_cgroup_set_timestamp(struct task_struct *task,
struct perf_event_context *ctx)
{
struct perf_cgroup *cgrp;
struct perf_cgroup_info *info;
/*
* ctx->lock held by caller
* ensure we do not access cgroup data
* unless we have the cgroup pinned (css_get)
*/
if (!task || !ctx->nr_cgroups)
return;
cgrp = perf_cgroup_from_task(task, ctx);
info = this_cpu_ptr(cgrp->info);
info->timestamp = ctx->timestamp;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stéphane Eranian | 39 | 60.00% | 3 | 60.00% |
Paul Mackerras | 21 | 32.31% | 1 | 20.00% |
Peter Zijlstra | 5 | 7.69% | 1 | 20.00% |
Total | 65 | 100.00% | 5 | 100.00% |
static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
#define PERF_CGROUP_SWOUT 0x1
/* cgroup switch out every event */
#define PERF_CGROUP_SWIN 0x2
/* cgroup switch in events based on task */
/*
* reschedule events based on the cgroup constraint of task.
*
* mode SWOUT : schedule out everything
* mode SWIN : schedule in based on cgroup for next
*/
static void perf_cgroup_switch(struct task_struct *task, int mode)
{
struct perf_cpu_context *cpuctx;
struct list_head *list;
unsigned long flags;
/*
* Disable interrupts and preemption to avoid this CPU's
* cgrp_cpuctx_entry to change under us.
*/
local_irq_save(flags);
list = this_cpu_ptr(&cgrp_cpuctx_list);
list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) {
WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(cpuctx->ctx.pmu);
if (mode & PERF_CGROUP_SWOUT) {
cpu_ctx_sched_out(cpuctx, EVENT_ALL);
/*
* must not be done before ctxswout due
* to event_filter_match() in event_sched_out()
*/
cpuctx->cgrp = NULL;
}
if (mode & PERF_CGROUP_SWIN) {
WARN_ON_ONCE(cpuctx->cgrp);
/*
* set cgrp before ctxsw in to allow
* event_filter_match() to not have to pass
* task around
* we pass the cpuctx->ctx to perf_cgroup_from_task()
* because cgorup events are only per-cpu
*/
cpuctx->cgrp = perf_cgroup_from_task(task,
&cpuctx->ctx);
cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
}
perf_pmu_enable(cpuctx->ctx.pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
local_irq_restore(flags);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stéphane Eranian | 85 | 51.52% | 3 | 37.50% |
Peter Zijlstra | 31 | 18.79% | 2 | 25.00% |
Paul Mackerras | 27 | 16.36% | 1 | 12.50% |
David Carrillo-Cisneros | 21 | 12.73% | 1 | 12.50% |
Geliang Tang | 1 | 0.61% | 1 | 12.50% |
Total | 165 | 100.00% | 8 | 100.00% |
static inline void perf_cgroup_sched_out(struct task_struct *task,
struct task_struct *next)
{
struct perf_cgroup *cgrp1;
struct perf_cgroup *cgrp2 = NULL;
rcu_read_lock();
/*
* we come here when we know perf_cgroup_events > 0
* we do not need to pass the ctx here because we know
* we are holding the rcu lock
*/
cgrp1 = perf_cgroup_from_task(task, NULL);
cgrp2 = perf_cgroup_from_task(next, NULL);
/*
* only schedule out current cgroup events if we know
* that we are switching to a different cgroup. Otherwise,
* do no touch the cgroup events.
*/
if (cgrp1 != cgrp2)
perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
rcu_read_unlock();
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stéphane Eranian | 56 | 82.35% | 4 | 80.00% |
Paul Mackerras | 12 | 17.65% | 1 | 20.00% |
Total | 68 | 100.00% | 5 | 100.00% |
static inline void perf_cgroup_sched_in(struct task_struct *