Contributors: 9
Author Tokens Token Proportion Commits Commit Proportion
Sean Christopherson 489 43.54% 3 15.00%
Paolo Bonzini 388 34.55% 4 20.00%
Yosry Ahmed 139 12.38% 5 25.00%
David Matlack 95 8.46% 2 10.00%
Andrew Jones 6 0.53% 2 10.00%
Vipin Sharma 3 0.27% 1 5.00%
Andy Shevchenko 1 0.09% 1 5.00%
Colton Lewis 1 0.09% 1 5.00%
Wainer dos Santos Moschetta 1 0.09% 1 5.00%
Total 1123 20


// SPDX-License-Identifier: GPL-2.0
/*
 * KVM dirty page logging test
 *
 * Copyright (C) 2018, Red Hat, Inc.
 */
#include <stdio.h>
#include <stdlib.h>
#include <linux/bitmap.h>
#include <linux/bitops.h>

#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "svm_util.h"
#include "vmx.h"

/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX		1

/*
 * Allocate four pages total.  Two pages are used to verify that the KVM marks
 * the accessed page/GFN as marked dirty, but not the "other" page.  Times two
 * so that each "normal" page can be accessed from L2 via an aliased L2 GVA+GPA
 * (when TDP is enabled), to verify KVM marks _L1's_ page/GFN as dirty (to
 * detect failures, L2 => L1 GPAs can't be identity mapped in the TDP page
 * tables, as marking L2's GPA dirty would get a false pass if L1 == L2).
 */
#define TEST_MEM_PAGES			4

#define TEST_MEM_BASE			0xc0000000
#define TEST_MEM_ALIAS_BASE		0xc0002000

#define TEST_GUEST_ADDR(base, idx)	((base) + (idx) * PAGE_SIZE)

#define TEST_GVA(idx)			TEST_GUEST_ADDR(TEST_MEM_BASE, idx)
#define TEST_GPA(idx)			TEST_GUEST_ADDR(TEST_MEM_BASE, idx)

#define TEST_ALIAS_GPA(idx)		TEST_GUEST_ADDR(TEST_MEM_ALIAS_BASE, idx)

#define TEST_HVA(vm, idx)		addr_gpa2hva(vm, TEST_GPA(idx))

#define L2_GUEST_STACK_SIZE 64

/* Use the page offset bits to communicate the access+fault type. */
#define TEST_SYNC_READ_FAULT		BIT(0)
#define TEST_SYNC_WRITE_FAULT		BIT(1)
#define TEST_SYNC_NO_FAULT		BIT(2)

static void l2_guest_code(vm_vaddr_t base)
{
	vm_vaddr_t page0 = TEST_GUEST_ADDR(base, 0);
	vm_vaddr_t page1 = TEST_GUEST_ADDR(base, 1);

	READ_ONCE(*(u64 *)page0);
	GUEST_SYNC(page0 | TEST_SYNC_READ_FAULT);
	WRITE_ONCE(*(u64 *)page0, 1);
	GUEST_SYNC(page0 | TEST_SYNC_WRITE_FAULT);
	READ_ONCE(*(u64 *)page0);
	GUEST_SYNC(page0 | TEST_SYNC_NO_FAULT);

	WRITE_ONCE(*(u64 *)page1, 1);
	GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
	WRITE_ONCE(*(u64 *)page1, 1);
	GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
	READ_ONCE(*(u64 *)page1);
	GUEST_SYNC(page1 | TEST_SYNC_NO_FAULT);

	/* Exit to L1 and never come back.  */
	vmcall();
}

static void l2_guest_code_tdp_enabled(void)
{
	/*
	 * Use the aliased virtual addresses when running with TDP to verify
	 * that KVM correctly handles the case where a page is dirtied via a
	 * different GPA than would be used by L1.
	 */
	l2_guest_code(TEST_MEM_ALIAS_BASE);
}

static void l2_guest_code_tdp_disabled(void)
{
	/*
	 * Use the "normal" virtual addresses when running without TDP enabled,
	 * in which case L2 will use the same page tables as L1, and thus needs
	 * to use the same virtual addresses that are mapped into L1.
	 */
	l2_guest_code(TEST_MEM_BASE);
}

void l1_vmx_code(struct vmx_pages *vmx)
{
	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
	void *l2_rip;

	GUEST_ASSERT(vmx->vmcs_gpa);
	GUEST_ASSERT(prepare_for_vmx_operation(vmx));
	GUEST_ASSERT(load_vmcs(vmx));

	if (vmx->eptp_gpa)
		l2_rip = l2_guest_code_tdp_enabled;
	else
		l2_rip = l2_guest_code_tdp_disabled;

	prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);

	GUEST_SYNC(TEST_SYNC_NO_FAULT);
	GUEST_ASSERT(!vmlaunch());
	GUEST_SYNC(TEST_SYNC_NO_FAULT);
	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
	GUEST_DONE();
}

static void l1_svm_code(struct svm_test_data *svm)
{
	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
	void *l2_rip;

	if (svm->ncr3_gpa)
		l2_rip = l2_guest_code_tdp_enabled;
	else
		l2_rip = l2_guest_code_tdp_disabled;

	generic_svm_setup(svm, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);

	GUEST_SYNC(TEST_SYNC_NO_FAULT);
	run_guest(svm->vmcb, svm->vmcb_gpa);
	GUEST_SYNC(TEST_SYNC_NO_FAULT);
	GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
	GUEST_DONE();
}

static void l1_guest_code(void *data)
{
	if (this_cpu_has(X86_FEATURE_VMX))
		l1_vmx_code(data);
	else
		l1_svm_code(data);
}

static void test_handle_ucall_sync(struct kvm_vm *vm, u64 arg,
				   unsigned long *bmap)
{
	vm_vaddr_t gva = arg & ~(PAGE_SIZE - 1);
	int page_nr, i;

	/*
	 * Extract the page number of underlying physical page, which is also
	 * the _L1_ page number.  The dirty bitmap _must_ be updated based on
	 * the L1 GPA, not L2 GPA, i.e. whether or not L2 used an aliased GPA
	 * (i.e. if TDP enabled for L2) is irrelevant with respect to the dirty
	 * bitmap and which underlying physical page is accessed.
	 *
	 * Note, gva will be '0' if there was no access, i.e. if the purpose of
	 * the sync is to verify all pages are clean.
	 */
	if (!gva)
		page_nr = 0;
	else if (gva >= TEST_MEM_ALIAS_BASE)
		page_nr = (gva - TEST_MEM_ALIAS_BASE) >> PAGE_SHIFT;
	else
		page_nr = (gva - TEST_MEM_BASE) >> PAGE_SHIFT;
	TEST_ASSERT(page_nr == 0 || page_nr == 1,
		    "Test bug, unexpected frame number '%u' for arg = %lx", page_nr, arg);
	TEST_ASSERT(gva || (arg & TEST_SYNC_NO_FAULT),
		    "Test bug, gva must be valid if a fault is expected");

	kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);

	/*
	 * Check all pages to verify the correct physical page was modified (or
	 * not), and that all pages are clean/dirty as expected.
	 *
	 * If a fault of any kind is expected, the target page should be dirty
	 * as the Dirty bit is set in the gPTE.  KVM should create a writable
	 * SPTE even on a read fault, *and* KVM must mark the GFN as dirty
	 * when doing so.
	 */
	for (i = 0; i < TEST_MEM_PAGES; i++) {
		if (i == page_nr && (arg & TEST_SYNC_WRITE_FAULT))
			TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 1,
				    "Page %u incorrectly not written by guest", i);
		else
			TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 0xaaaaaaaaaaaaaaaaULL,
				    "Page %u incorrectly written by guest", i);

		if (i == page_nr && !(arg & TEST_SYNC_NO_FAULT))
			TEST_ASSERT(test_bit(i, bmap),
				    "Page %u incorrectly reported clean on %s fault",
				    i, arg & TEST_SYNC_READ_FAULT ? "read" : "write");
		else
			TEST_ASSERT(!test_bit(i, bmap),
				    "Page %u incorrectly reported dirty", i);
	}
}

static void test_dirty_log(bool nested_tdp)
{
	vm_vaddr_t nested_gva = 0;
	unsigned long *bmap;
	struct kvm_vcpu *vcpu;
	struct kvm_vm *vm;
	struct ucall uc;
	bool done = false;

	pr_info("Nested TDP: %s\n", nested_tdp ? "enabled" : "disabled");

	/* Create VM */
	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
	if (nested_tdp)
		vm_enable_tdp(vm);

	if (kvm_cpu_has(X86_FEATURE_VMX))
		vcpu_alloc_vmx(vm, &nested_gva);
	else
		vcpu_alloc_svm(vm, &nested_gva);

	vcpu_args_set(vcpu, 1, nested_gva);

	/* Add an extra memory slot for testing dirty logging */
	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
				    TEST_MEM_BASE,
				    TEST_MEM_SLOT_INDEX,
				    TEST_MEM_PAGES,
				    KVM_MEM_LOG_DIRTY_PAGES);

	/*
	 * Add an identity map for GVA range [0xc0000000, 0xc0004000).  This
	 * affects both L1 and L2.  However...
	 */
	virt_map(vm, TEST_MEM_BASE, TEST_MEM_BASE, TEST_MEM_PAGES);

	/*
	 * ... pages in the L2 GPA address range [0xc0002000, 0xc0004000) will
	 * map to [0xc0000000, 0xc0002000) when TDP is enabled (for L2).
	 *
	 * When TDP is disabled, the L2 guest code will still access the same L1
	 * GPAs as the TDP enabled case.
	 *
	 * Set the Dirty bit in the PTEs used by L2 so that KVM will create
	 * writable SPTEs when handling read faults (if the Dirty bit isn't
	 * set, KVM must intercept the next write to emulate the Dirty bit
	 * update).
	 */
	if (nested_tdp) {
		tdp_identity_map_default_memslots(vm);
		tdp_map(vm, TEST_ALIAS_GPA(0), TEST_GPA(0), PAGE_SIZE);
		tdp_map(vm, TEST_ALIAS_GPA(1), TEST_GPA(1), PAGE_SIZE);

		*tdp_get_pte(vm, TEST_ALIAS_GPA(0)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
		*tdp_get_pte(vm, TEST_ALIAS_GPA(1)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
	} else {
		*vm_get_pte(vm, TEST_GVA(0)) |= PTE_DIRTY_MASK(&vm->mmu);
		*vm_get_pte(vm, TEST_GVA(1)) |= PTE_DIRTY_MASK(&vm->mmu);
	}

	bmap = bitmap_zalloc(TEST_MEM_PAGES);

	while (!done) {
		memset(TEST_HVA(vm, 0), 0xaa, TEST_MEM_PAGES * PAGE_SIZE);

		vcpu_run(vcpu);
		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);

		switch (get_ucall(vcpu, &uc)) {
		case UCALL_ABORT:
			REPORT_GUEST_ASSERT(uc);
			/* NOT REACHED */
		case UCALL_SYNC:
			test_handle_ucall_sync(vm, uc.args[1], bmap);
			break;
		case UCALL_DONE:
			done = true;
			break;
		default:
			TEST_FAIL("Unknown ucall %lu", uc.cmd);
		}
	}
}

int main(int argc, char *argv[])
{
	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM));

	test_dirty_log(/*nested_tdp=*/false);

	if (kvm_cpu_has_tdp())
		test_dirty_log(/*nested_tdp=*/true);

	return 0;
}