Contributors: 14
Author Tokens Token Proportion Commits Commit Proportion
CodyYao-oc 2880 98.09% 1 5.00%
Peter Zijlstra 16 0.54% 5 25.00%
Yanmin Zhang 7 0.24% 1 5.00%
Linus Torvalds 6 0.20% 2 10.00%
Vince Weaver 6 0.20% 1 5.00%
Andi Kleen 5 0.17% 2 10.00%
Jeremy Fitzhardinge 3 0.10% 1 5.00%
Dave Jones 3 0.10% 1 5.00%
Kevin Winchester 3 0.10% 1 5.00%
Linus Torvalds (pre-git) 2 0.07% 1 5.00%
Markus Metzger 2 0.07% 1 5.00%
Hu Haowen 1 0.03% 1 5.00%
Paul Gortmaker 1 0.03% 1 5.00%
Ingo Molnar 1 0.03% 1 5.00%
Total 2936 20


// SPDX-License-Identifier: GPL-2.0-only
/*
 * Zhaoxin PMU; like Intel Architectural PerfMon-v2
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/nmi.h>

#include <asm/cpufeature.h>
#include <asm/hardirq.h>
#include <asm/apic.h>

#include "../perf_event.h"

/*
 * Zhaoxin PerfMon, used on zxc and later.
 */
static u64 zx_pmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = {

	[PERF_COUNT_HW_CPU_CYCLES]        = 0x0082,
	[PERF_COUNT_HW_INSTRUCTIONS]      = 0x00c0,
	[PERF_COUNT_HW_CACHE_REFERENCES]  = 0x0515,
	[PERF_COUNT_HW_CACHE_MISSES]      = 0x051a,
	[PERF_COUNT_HW_BUS_CYCLES]        = 0x0083,
};

static struct event_constraint zxc_event_constraints[] __read_mostly = {

	FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
	EVENT_CONSTRAINT_END
};

static struct event_constraint zxd_event_constraints[] __read_mostly = {

	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* retired instructions */
	FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
	FIXED_EVENT_CONSTRAINT(0x0083, 2), /* unhalted bus clock cycles */
	EVENT_CONSTRAINT_END
};

static __initconst const u64 zxd_hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = 0x0042,
		[C(RESULT_MISS)] = 0x0538,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = 0x0043,
		[C(RESULT_MISS)] = 0x0562,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
},
[C(L1I)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = 0x0300,
		[C(RESULT_MISS)] = 0x0301,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = 0x030a,
		[C(RESULT_MISS)] = 0x030b,
	},
},
[C(LL)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
},
[C(DTLB)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = 0x0042,
		[C(RESULT_MISS)] = 0x052c,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = 0x0043,
		[C(RESULT_MISS)] = 0x0530,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = 0x0564,
		[C(RESULT_MISS)] = 0x0565,
	},
},
[C(ITLB)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = 0x00c0,
		[C(RESULT_MISS)] = 0x0534,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
},
[C(BPU)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = 0x0700,
		[C(RESULT_MISS)] = 0x0709,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
},
[C(NODE)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
},
};

static __initconst const u64 zxe_hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = 0x0568,
		[C(RESULT_MISS)] = 0x054b,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = 0x0669,
		[C(RESULT_MISS)] = 0x0562,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
},
[C(L1I)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = 0x0300,
		[C(RESULT_MISS)] = 0x0301,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = 0x030a,
		[C(RESULT_MISS)] = 0x030b,
	},
},
[C(LL)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = 0x0,
		[C(RESULT_MISS)] = 0x0,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = 0x0,
		[C(RESULT_MISS)] = 0x0,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = 0x0,
		[C(RESULT_MISS)] = 0x0,
	},
},
[C(DTLB)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = 0x0568,
		[C(RESULT_MISS)] = 0x052c,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = 0x0669,
		[C(RESULT_MISS)] = 0x0530,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = 0x0564,
		[C(RESULT_MISS)] = 0x0565,
	},
},
[C(ITLB)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = 0x00c0,
		[C(RESULT_MISS)] = 0x0534,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
},
[C(BPU)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = 0x0028,
		[C(RESULT_MISS)] = 0x0029,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
},
[C(NODE)] = {
	[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
	[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = -1,
		[C(RESULT_MISS)] = -1,
	},
},
};

static void zhaoxin_pmu_disable_all(void)
{
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
}

static void zhaoxin_pmu_enable_all(int added)
{
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
}

static inline u64 zhaoxin_pmu_get_status(void)
{
	u64 status;

	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);

	return status;
}

static inline void zhaoxin_pmu_ack_status(u64 ack)
{
	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}

static inline void zxc_pmu_ack_status(u64 ack)
{
	/*
	 * ZXC needs global control enabled in order to clear status bits.
	 */
	zhaoxin_pmu_enable_all(0);
	zhaoxin_pmu_ack_status(ack);
	zhaoxin_pmu_disable_all();
}

static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc)
{
	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
	u64 ctrl_val, mask;

	mask = 0xfULL << (idx * 4);

	rdmsrl(hwc->config_base, ctrl_val);
	ctrl_val &= ~mask;
	wrmsrl(hwc->config_base, ctrl_val);
}

static void zhaoxin_pmu_disable_event(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;

	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
		zhaoxin_pmu_disable_fixed(hwc);
		return;
	}

	x86_pmu_disable_event(event);
}

static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
{
	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
	u64 ctrl_val, bits, mask;

	/*
	 * Enable IRQ generation (0x8),
	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
	 * if requested:
	 */
	bits = 0x8ULL;
	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
		bits |= 0x2;
	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
		bits |= 0x1;

	bits <<= (idx * 4);
	mask = 0xfULL << (idx * 4);

	rdmsrl(hwc->config_base, ctrl_val);
	ctrl_val &= ~mask;
	ctrl_val |= bits;
	wrmsrl(hwc->config_base, ctrl_val);
}

static void zhaoxin_pmu_enable_event(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;

	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
		zhaoxin_pmu_enable_fixed(hwc);
		return;
	}

	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}

/*
 * This handler is triggered by the local APIC, so the APIC IRQ handling
 * rules apply:
 */
static int zhaoxin_pmu_handle_irq(struct pt_regs *regs)
{
	struct perf_sample_data data;
	struct cpu_hw_events *cpuc;
	int handled = 0;
	u64 status;
	int bit;

	cpuc = this_cpu_ptr(&cpu_hw_events);
	apic_write(APIC_LVTPC, APIC_DM_NMI);
	zhaoxin_pmu_disable_all();
	status = zhaoxin_pmu_get_status();
	if (!status)
		goto done;

again:
	if (x86_pmu.enabled_ack)
		zxc_pmu_ack_status(status);
	else
		zhaoxin_pmu_ack_status(status);

	inc_irq_stat(apic_perf_irqs);

	/*
	 * CondChgd bit 63 doesn't mean any overflow status. Ignore
	 * and clear the bit.
	 */
	if (__test_and_clear_bit(63, (unsigned long *)&status)) {
		if (!status)
			goto done;
	}

	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
		struct perf_event *event = cpuc->events[bit];

		handled++;

		if (!test_bit(bit, cpuc->active_mask))
			continue;

		x86_perf_event_update(event);
		perf_sample_data_init(&data, 0, event->hw.last_period);

		if (!x86_perf_event_set_period(event))
			continue;

		if (perf_event_overflow(event, &data, regs))
			x86_pmu_stop(event, 0);
	}

	/*
	 * Repeat if there is more work to be done:
	 */
	status = zhaoxin_pmu_get_status();
	if (status)
		goto again;

done:
	zhaoxin_pmu_enable_all(0);
	return handled;
}

static u64 zhaoxin_pmu_event_map(int hw_event)
{
	return zx_pmon_event_map[hw_event];
}

static struct event_constraint *
zhaoxin_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
			struct perf_event *event)
{
	struct event_constraint *c;

	if (x86_pmu.event_constraints) {
		for_each_event_constraint(c, x86_pmu.event_constraints) {
			if ((event->hw.config & c->cmask) == c->code)
				return c;
		}
	}

	return &unconstrained;
}

PMU_FORMAT_ATTR(event,	"config:0-7");
PMU_FORMAT_ATTR(umask,	"config:8-15");
PMU_FORMAT_ATTR(edge,	"config:18");
PMU_FORMAT_ATTR(inv,	"config:23");
PMU_FORMAT_ATTR(cmask,	"config:24-31");

static struct attribute *zx_arch_formats_attr[] = {
	&format_attr_event.attr,
	&format_attr_umask.attr,
	&format_attr_edge.attr,
	&format_attr_inv.attr,
	&format_attr_cmask.attr,
	NULL,
};

static ssize_t zhaoxin_event_sysfs_show(char *page, u64 config)
{
	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);

	return x86_event_sysfs_show(page, config, event);
}

static const struct x86_pmu zhaoxin_pmu __initconst = {
	.name			= "zhaoxin",
	.handle_irq		= zhaoxin_pmu_handle_irq,
	.disable_all		= zhaoxin_pmu_disable_all,
	.enable_all		= zhaoxin_pmu_enable_all,
	.enable			= zhaoxin_pmu_enable_event,
	.disable		= zhaoxin_pmu_disable_event,
	.hw_config		= x86_pmu_hw_config,
	.schedule_events	= x86_schedule_events,
	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
	.event_map		= zhaoxin_pmu_event_map,
	.max_events		= ARRAY_SIZE(zx_pmon_event_map),
	.apic			= 1,
	/*
	 * For zxd/zxe, read/write operation for PMCx MSR is 48 bits.
	 */
	.max_period		= (1ULL << 47) - 1,
	.get_event_constraints	= zhaoxin_get_event_constraints,

	.format_attrs		= zx_arch_formats_attr,
	.events_sysfs_show	= zhaoxin_event_sysfs_show,
};

static const struct { int id; char *name; } zx_arch_events_map[] __initconst = {
	{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
	{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
	{ PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
	{ PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
	{ PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
	{ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
	{ PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
};

static __init void zhaoxin_arch_events_quirk(void)
{
	int bit;

	/* disable event that reported as not present by cpuid */
	for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(zx_arch_events_map)) {
		zx_pmon_event_map[zx_arch_events_map[bit].id] = 0;
		pr_warn("CPUID marked event: \'%s\' unavailable\n",
			zx_arch_events_map[bit].name);
	}
}

__init int zhaoxin_pmu_init(void)
{
	union cpuid10_edx edx;
	union cpuid10_eax eax;
	union cpuid10_ebx ebx;
	struct event_constraint *c;
	unsigned int unused;
	int version;

	pr_info("Welcome to zhaoxin pmu!\n");

	/*
	 * Check whether the Architectural PerfMon supports
	 * hw_event or not.
	 */
	cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);

	if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT - 1)
		return -ENODEV;

	version = eax.split.version_id;
	if (version != 2)
		return -ENODEV;

	x86_pmu = zhaoxin_pmu;
	pr_info("Version check pass!\n");

	x86_pmu.version			= version;
	x86_pmu.num_counters		= eax.split.num_counters;
	x86_pmu.cntval_bits		= eax.split.bit_width;
	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
	x86_pmu.events_maskl		= ebx.full;
	x86_pmu.events_mask_len		= eax.split.mask_length;

	x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
	x86_add_quirk(zhaoxin_arch_events_quirk);

	switch (boot_cpu_data.x86) {
	case 0x06:
		if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) {

			x86_pmu.max_period = x86_pmu.cntval_mask >> 1;

			/* Clearing status works only if the global control is enable on zxc. */
			x86_pmu.enabled_ack = 1;

			x86_pmu.event_constraints = zxc_event_constraints;
			zx_pmon_event_map[PERF_COUNT_HW_INSTRUCTIONS] = 0;
			zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0;
			zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0;
			zx_pmon_event_map[PERF_COUNT_HW_BUS_CYCLES] = 0;

			pr_cont("ZXC events, ");
			break;
		}
		return -ENODEV;

	case 0x07:
		zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
			X86_CONFIG(.event = 0x01, .umask = 0x01, .inv = 0x01, .cmask = 0x01);

		zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
			X86_CONFIG(.event = 0x0f, .umask = 0x04, .inv = 0, .cmask = 0);

		switch (boot_cpu_data.x86_model) {
		case 0x1b:
			memcpy(hw_cache_event_ids, zxd_hw_cache_event_ids,
			       sizeof(hw_cache_event_ids));

			x86_pmu.event_constraints = zxd_event_constraints;

			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0700;
			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0709;

			pr_cont("ZXD events, ");
			break;
		case 0x3b:
			memcpy(hw_cache_event_ids, zxe_hw_cache_event_ids,
			       sizeof(hw_cache_event_ids));

			x86_pmu.event_constraints = zxd_event_constraints;

			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0028;
			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0029;

			pr_cont("ZXE events, ");
			break;
		default:
			return -ENODEV;
		}
		break;

	default:
		return -ENODEV;
	}

	x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1;
	x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;

	if (x86_pmu.event_constraints) {
		for_each_event_constraint(c, x86_pmu.event_constraints) {
			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
			c->weight += x86_pmu.num_counters;
		}
	}

	return 0;
}