cregit-Linux how code gets into the kernel

Release 4.17 samples/bpf/cpustat_kern.c

Directory: samples/bpf
// SPDX-License-Identifier: GPL-2.0

#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"

/*
 * The CPU number, cstate number and pstate number are based
 * on 96boards Hikey with octa CA53 CPUs.
 *
 * Every CPU have three idle states for cstate:
 *   WFI, CPU_OFF, CLUSTER_OFF
 *
 * Every CPU have 5 operating points:
 *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
 *
 * This code is based on these assumption and other platforms
 * need to adjust these definitions.
 */

#define MAX_CPU			8

#define MAX_PSTATE_ENTRIES	5

#define MAX_CSTATE_ENTRIES	3


static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };

/*
 * my_map structure is used to record cstate and pstate index and
 * timestamp (Idx, Ts), when new event incoming we need to update
 * combination for new state index and timestamp (Idx`, Ts`).
 *
 * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
 * interval for the previous state: Duration(Idx) = Ts` - Ts.
 *
 * Every CPU has one below array for recording state index and
 * timestamp, and record for cstate and pstate saperately:
 *
 * +--------------------------+
 * | cstate timestamp         |
 * +--------------------------+
 * | cstate index             |
 * +--------------------------+
 * | pstate timestamp         |
 * +--------------------------+
 * | pstate index             |
 * +--------------------------+
 */

#define MAP_OFF_CSTATE_TIME	0

#define MAP_OFF_CSTATE_IDX	1

#define MAP_OFF_PSTATE_TIME	2

#define MAP_OFF_PSTATE_IDX	3

#define MAP_OFF_NUM		4

struct bpf_map_def SEC("maps") my_map = {
	.type = BPF_MAP_TYPE_ARRAY,
	.key_size = sizeof(u32),
	.value_size = sizeof(u64),
	.max_entries = MAX_CPU * MAP_OFF_NUM,
};

/* cstate_duration records duration time for every idle state per CPU */
struct bpf_map_def SEC("maps") cstate_duration = {
	.type = BPF_MAP_TYPE_ARRAY,
	.key_size = sizeof(u32),
	.value_size = sizeof(u64),
	.max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
};

/* pstate_duration records duration time for every operating point per CPU */
struct bpf_map_def SEC("maps") pstate_duration = {
	.type = BPF_MAP_TYPE_ARRAY,
	.key_size = sizeof(u32),
	.value_size = sizeof(u64),
	.max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
};

/*
 * The trace events for cpu_idle and cpu_frequency are taken from:
 * /sys/kernel/debug/tracing/events/power/cpu_idle/format
 * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
 *
 * These two events have same format, so define one common structure.
 */

struct cpu_args {
	
u64 pad;
	
u32 state;
	
u32 cpu_id;
};

/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */

static u32 find_cpu_pstate_idx(u32 frequency) { u32 i; for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) { if (frequency == cpu_opps[i]) return i; } return i; }

Contributors

PersonTokensPropCommitsCommitProp
Leo Yan50100.00%1100.00%
Total50100.00%1100.00%

SEC("tracepoint/power/cpu_idle")
int bpf_prog1(struct cpu_args *ctx) { u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta; u32 key, cpu, pstate_idx; u64 *val; if (ctx->cpu_id > MAX_CPU) return 0; cpu = ctx->cpu_id; key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME; cts = bpf_map_lookup_elem(&my_map, &key); if (!cts) return 0; key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; cstate = bpf_map_lookup_elem(&my_map, &key); if (!cstate) return 0; key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; pts = bpf_map_lookup_elem(&my_map, &key); if (!pts) return 0; key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; pstate = bpf_map_lookup_elem(&my_map, &key); if (!pstate) return 0; prev_state = *cstate; *cstate = ctx->state; if (!*cts) { *cts = bpf_ktime_get_ns(); return 0; } cur_ts = bpf_ktime_get_ns(); delta = cur_ts - *cts; *cts = cur_ts; /* * When state doesn't equal to (u32)-1, the cpu will enter * one idle state; for this case we need to record interval * for the pstate. * * OPP2 * +---------------------+ * OPP1 | | * ---------+ | * | Idle state * +--------------- * * |<- pstate duration ->| * ^ ^ * pts cur_ts */ if (ctx->state != (u32)-1) { /* record pstate after have first cpu_frequency event */ if (!*pts) return 0; delta = cur_ts - *pts; pstate_idx = find_cpu_pstate_idx(*pstate); if (pstate_idx >= MAX_PSTATE_ENTRIES) return 0; key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; val = bpf_map_lookup_elem(&pstate_duration, &key); if (val) __sync_fetch_and_add((long *)val, delta); /* * When state equal to (u32)-1, the cpu just exits from one * specific idle state; for this case we need to record * interval for the pstate. * * OPP2 * -----------+ * | OPP1 * | +----------- * | Idle state | * +---------------------+ * * |<- cstate duration ->| * ^ ^ * cts cur_ts */ } else { key = cpu * MAX_CSTATE_ENTRIES + prev_state; val = bpf_map_lookup_elem(&cstate_duration, &key); if (val) __sync_fetch_and_add((long *)val, delta); } /* Update timestamp for pstate as new start time */ if (*pts) *pts = cur_ts; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Leo Yan346100.00%1100.00%
Total346100.00%1100.00%

SEC("tracepoint/power/cpu_frequency")
int bpf_prog2(struct cpu_args *ctx) { u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta; u32 key, cpu, pstate_idx; u64 *val; cpu = ctx->cpu_id; key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; pts = bpf_map_lookup_elem(&my_map, &key); if (!pts) return 0; key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; pstate = bpf_map_lookup_elem(&my_map, &key); if (!pstate) return 0; key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; cstate = bpf_map_lookup_elem(&my_map, &key); if (!cstate) return 0; prev_state = *pstate; *pstate = ctx->state; if (!*pts) { *pts = bpf_ktime_get_ns(); return 0; } cur_ts = bpf_ktime_get_ns(); delta = cur_ts - *pts; *pts = cur_ts; /* When CPU is in idle, bail out to skip pstate statistics */ if (*cstate != (u32)(-1)) return 0; /* * The cpu changes to another different OPP (in below diagram * change frequency from OPP3 to OPP1), need recording interval * for previous frequency OPP3 and update timestamp as start * time for new frequency OPP1. * * OPP3 * +---------------------+ * OPP2 | | * ---------+ | * | OPP1 * +--------------- * * |<- pstate duration ->| * ^ ^ * pts cur_ts */ pstate_idx = find_cpu_pstate_idx(*pstate); if (pstate_idx >= MAX_PSTATE_ENTRIES) return 0; key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; val = bpf_map_lookup_elem(&pstate_duration, &key); if (val) __sync_fetch_and_add((long *)val, delta); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Leo Yan242100.00%1100.00%
Total242100.00%1100.00%

char _license[] SEC("license") = "GPL"; u32 _version SEC("version") = LINUX_VERSION_CODE;

Overall Contributors

PersonTokensPropCommitsCommitProp
Leo Yan861100.00%1100.00%
Total861100.00%1100.00%
Directory: samples/bpf
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with cregit.