Contributors: 17
Author Tokens Token Proportion Commits Commit Proportion
Ravi Bangoria 768 56.89% 1 3.45%
Fenghua Yu 271 20.07% 4 13.79%
Peter Zijlstra 146 10.81% 2 6.90%
Tony Luck 61 4.52% 5 17.24%
Linus Torvalds (pre-git) 18 1.33% 2 6.90%
Chenyi Qiang 15 1.11% 1 3.45%
Thomas Gleixner 13 0.96% 1 3.45%
David Woodhouse 13 0.96% 2 6.90%
Sean Christopherson 11 0.81% 1 3.45%
Andrew Morton 8 0.59% 1 3.45%
Dave Jones 6 0.44% 1 3.45%
Joe Perches 6 0.44% 1 3.45%
Xiaoyao Li 5 0.37% 2 6.90%
Paul E. McKenney 4 0.30% 2 6.90%
Tejun Heo 2 0.15% 1 3.45%
Reinette Chatre 2 0.15% 1 3.45%
Greg Kroah-Hartman 1 0.07% 1 3.45%
Total 1350 29

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
// SPDX-License-Identifier: GPL-2.0

#define pr_fmt(fmt) "x86/split lock detection: " fmt

#include <linux/semaphore.h>
#include <linux/workqueue.h>
#include <linux/delay.h>
#include <linux/cpuhotplug.h>
#include <asm/cpu_device_id.h>
#include <asm/cmdline.h>
#include <asm/traps.h>
#include <asm/cpu.h>

enum split_lock_detect_state {
	sld_off = 0,
	sld_warn,
	sld_fatal,
	sld_ratelimit,
};

/*
 * Default to sld_off because most systems do not support split lock detection.
 * sld_state_setup() will switch this to sld_warn on systems that support
 * split lock/bus lock detect, unless there is a command line override.
 */
static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
static u64 msr_test_ctrl_cache __ro_after_init;

/*
 * With a name like MSR_TEST_CTL it should go without saying, but don't touch
 * MSR_TEST_CTL unless the CPU is one of the whitelisted models.  Writing it
 * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
 */
static bool cpu_model_supports_sld __ro_after_init;

static const struct {
	const char			*option;
	enum split_lock_detect_state	state;
} sld_options[] __initconst = {
	{ "off",	sld_off   },
	{ "warn",	sld_warn  },
	{ "fatal",	sld_fatal },
	{ "ratelimit:", sld_ratelimit },
};

static struct ratelimit_state bld_ratelimit;

static unsigned int sysctl_sld_mitigate = 1;
static DEFINE_SEMAPHORE(buslock_sem, 1);

#ifdef CONFIG_PROC_SYSCTL
static struct ctl_table sld_sysctls[] = {
	{
		.procname       = "split_lock_mitigate",
		.data           = &sysctl_sld_mitigate,
		.maxlen         = sizeof(unsigned int),
		.mode           = 0644,
		.proc_handler	= proc_douintvec_minmax,
		.extra1         = SYSCTL_ZERO,
		.extra2         = SYSCTL_ONE,
	},
};

static int __init sld_mitigate_sysctl_init(void)
{
	register_sysctl_init("kernel", sld_sysctls);
	return 0;
}

late_initcall(sld_mitigate_sysctl_init);
#endif

static inline bool match_option(const char *arg, int arglen, const char *opt)
{
	int len = strlen(opt), ratelimit;

	if (strncmp(arg, opt, len))
		return false;

	/*
	 * Min ratelimit is 1 bus lock/sec.
	 * Max ratelimit is 1000 bus locks/sec.
	 */
	if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
	    ratelimit > 0 && ratelimit <= 1000) {
		ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
		ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
		return true;
	}

	return len == arglen;
}

static bool split_lock_verify_msr(bool on)
{
	u64 ctrl, tmp;

	if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
		return false;
	if (on)
		ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
	else
		ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
	if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
		return false;
	rdmsrl(MSR_TEST_CTRL, tmp);
	return ctrl == tmp;
}

static void __init sld_state_setup(void)
{
	enum split_lock_detect_state state = sld_warn;
	char arg[20];
	int i, ret;

	if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
	    !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
		return;

	ret = cmdline_find_option(boot_command_line, "split_lock_detect",
				  arg, sizeof(arg));
	if (ret >= 0) {
		for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
			if (match_option(arg, ret, sld_options[i].option)) {
				state = sld_options[i].state;
				break;
			}
		}
	}
	sld_state = state;
}

static void __init __split_lock_setup(void)
{
	if (!split_lock_verify_msr(false)) {
		pr_info("MSR access failed: Disabled\n");
		return;
	}

	rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);

	if (!split_lock_verify_msr(true)) {
		pr_info("MSR access failed: Disabled\n");
		return;
	}

	/* Restore the MSR to its cached value. */
	wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);

	setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
}

/*
 * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
 * is not implemented as one thread could undo the setting of the other
 * thread immediately after dropping the lock anyway.
 */
static void sld_update_msr(bool on)
{
	u64 test_ctrl_val = msr_test_ctrl_cache;

	if (on)
		test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;

	wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
}

void split_lock_init(void)
{
	/*
	 * #DB for bus lock handles ratelimit and #AC for split lock is
	 * disabled.
	 */
	if (sld_state == sld_ratelimit) {
		split_lock_verify_msr(false);
		return;
	}

	if (cpu_model_supports_sld)
		split_lock_verify_msr(sld_state != sld_off);
}

static void __split_lock_reenable_unlock(struct work_struct *work)
{
	sld_update_msr(true);
	up(&buslock_sem);
}

static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);

static void __split_lock_reenable(struct work_struct *work)
{
	sld_update_msr(true);
}
static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);

/*
 * If a CPU goes offline with pending delayed work to re-enable split lock
 * detection then the delayed work will be executed on some other CPU. That
 * handles releasing the buslock_sem, but because it executes on a
 * different CPU probably won't re-enable split lock detection. This is a
 * problem on HT systems since the sibling CPU on the same core may then be
 * left running with split lock detection disabled.
 *
 * Unconditionally re-enable detection here.
 */
static int splitlock_cpu_offline(unsigned int cpu)
{
	sld_update_msr(true);

	return 0;
}

static void split_lock_warn(unsigned long ip)
{
	struct delayed_work *work;
	int cpu;

	if (!current->reported_split_lock)
		pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
				    current->comm, current->pid, ip);
	current->reported_split_lock = 1;

	if (sysctl_sld_mitigate) {
		/*
		 * misery factor #1:
		 * sleep 10ms before trying to execute split lock.
		 */
		if (msleep_interruptible(10) > 0)
			return;
		/*
		 * Misery factor #2:
		 * only allow one buslocked disabled core at a time.
		 */
		if (down_interruptible(&buslock_sem) == -EINTR)
			return;
		work = &sl_reenable_unlock;
	} else {
		work = &sl_reenable;
	}

	cpu = get_cpu();
	schedule_delayed_work_on(cpu, work, 2);

	/* Disable split lock detection on this CPU to make progress */
	sld_update_msr(false);
	put_cpu();
}

bool handle_guest_split_lock(unsigned long ip)
{
	if (sld_state == sld_warn) {
		split_lock_warn(ip);
		return true;
	}

	pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
		     current->comm, current->pid,
		     sld_state == sld_fatal ? "fatal" : "bogus", ip);

	current->thread.error_code = 0;
	current->thread.trap_nr = X86_TRAP_AC;
	force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
	return false;
}
EXPORT_SYMBOL_GPL(handle_guest_split_lock);

void bus_lock_init(void)
{
	u64 val;

	if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
		return;

	rdmsrl(MSR_IA32_DEBUGCTLMSR, val);

	if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
	    (sld_state == sld_warn || sld_state == sld_fatal)) ||
	    sld_state == sld_off) {
		/*
		 * Warn and fatal are handled by #AC for split lock if #AC for
		 * split lock is supported.
		 */
		val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
	} else {
		val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
	}

	wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
}

bool handle_user_split_lock(struct pt_regs *regs, long error_code)
{
	if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
		return false;
	split_lock_warn(regs->ip);
	return true;
}

void handle_bus_lock(struct pt_regs *regs)
{
	switch (sld_state) {
	case sld_off:
		break;
	case sld_ratelimit:
		/* Enforce no more than bld_ratelimit bus locks/sec. */
		while (!__ratelimit(&bld_ratelimit))
			msleep(20);
		/* Warn on the bus lock. */
		fallthrough;
	case sld_warn:
		pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
				    current->comm, current->pid, regs->ip);
		break;
	case sld_fatal:
		force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
		break;
	}
}

/*
 * CPU models that are known to have the per-core split-lock detection
 * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
 */
static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
	X86_MATCH_VFM(INTEL_ICELAKE_X,	0),
	X86_MATCH_VFM(INTEL_ICELAKE_L,	0),
	X86_MATCH_VFM(INTEL_ICELAKE_D,	0),
	{}
};

static void __init split_lock_setup(struct cpuinfo_x86 *c)
{
	const struct x86_cpu_id *m;
	u64 ia32_core_caps;

	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
		return;

	/* Check for CPUs that have support but do not enumerate it: */
	m = x86_match_cpu(split_lock_cpu_ids);
	if (m)
		goto supported;

	if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
		return;

	/*
	 * Not all bits in MSR_IA32_CORE_CAPS are architectural, but
	 * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is.  All CPUs that set
	 * it have split lock detection.
	 */
	rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
	if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
		goto supported;

	/* CPU is not in the model list and does not have the MSR bit: */
	return;

supported:
	cpu_model_supports_sld = true;
	__split_lock_setup();
}

static void sld_state_show(void)
{
	if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
	    !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
		return;

	switch (sld_state) {
	case sld_off:
		pr_info("disabled\n");
		break;
	case sld_warn:
		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
			pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
			if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
					      "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
				pr_warn("No splitlock CPU offline handler\n");
		} else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
			pr_info("#DB: warning on user-space bus_locks\n");
		}
		break;
	case sld_fatal:
		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
			pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
		} else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
			pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
				boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
				" from non-WB" : "");
		}
		break;
	case sld_ratelimit:
		if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
			pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
		break;
	}
}

void __init sld_setup(struct cpuinfo_x86 *c)
{
	split_lock_setup(c);
	sld_state_setup();
	sld_state_show();
}