Contributors: 17
Author Tokens Token Proportion Commits Commit Proportion
David Hildenbrand 358 19.94% 8 24.24%
Andrey Vagin 348 19.39% 1 3.03%
Zi Yan 245 13.65% 1 3.03%
Peter Xu 209 11.64% 5 15.15%
Mike Kravetz 175 9.75% 2 6.06%
Konstantin Khlebnikov 154 8.58% 1 3.03%
Zach O'Keefe 122 6.80% 2 6.06%
Muhammad Usama Anjum 90 5.01% 2 6.06%
Mina Almasry 40 2.23% 1 3.03%
Mike Rapoport 11 0.61% 2 6.06%
Pedro Demarchi Gomes 9 0.50% 1 3.03%
Aneesh Kumar K.V 9 0.50% 1 3.03%
Kirill A. Shutemov 8 0.45% 2 6.06%
Zhansaya Bagdauletkyzy 6 0.33% 1 3.03%
Nadav Amit 5 0.28% 1 3.03%
Breno Leitão 4 0.22% 1 3.03%
Andrea Arcangeli 2 0.11% 1 3.03%
Total 1795 33


// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
#include <linux/fs.h>
#include <sys/syscall.h>
#include <unistd.h>
#include "../kselftest.h"
#include "vm_util.h"

#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
#define SMAP_FILE_PATH "/proc/self/smaps"
#define MAX_LINE_LENGTH 500

unsigned int __page_size;
unsigned int __page_shift;

uint64_t pagemap_get_entry(int fd, char *start)
{
	const unsigned long pfn = (unsigned long)start / getpagesize();
	uint64_t entry;
	int ret;

	ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
	if (ret != sizeof(entry))
		ksft_exit_fail_msg("reading pagemap failed\n");
	return entry;
}

static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r)
{
	struct pm_scan_arg arg;

	arg.start = (uintptr_t)start;
	arg.end = (uintptr_t)(start + psize());
	arg.vec = (uintptr_t)r;
	arg.vec_len = 1;
	arg.flags = 0;
	arg.size = sizeof(struct pm_scan_arg);
	arg.max_pages = 0;
	arg.category_inverted = 0;
	arg.category_mask = 0;
	arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
				  PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
				  PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
	arg.return_mask = arg.category_anyof_mask;

	return ioctl(fd, PAGEMAP_SCAN, &arg);
}

static uint64_t pagemap_scan_get_categories(int fd, char *start)
{
	struct page_region r;
	long ret;

	ret = __pagemap_scan_get_categories(fd, start, &r);
	if (ret < 0)
		ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno));
	if (ret == 0)
		return 0;
	return r.categories;
}

/* `start` is any valid address. */
static bool pagemap_scan_supported(int fd, char *start)
{
	static int supported = -1;
	int ret;

	if (supported != -1)
		return supported;

	/* Provide an invalid address in order to trigger EFAULT. */
	ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL);
	if (ret == 0)
		ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n");

	supported = errno == EFAULT;

	return supported;
}

static bool page_entry_is(int fd, char *start, char *desc,
			  uint64_t pagemap_flags, uint64_t pagescan_flags)
{
	bool m = pagemap_get_entry(fd, start) & pagemap_flags;

	if (pagemap_scan_supported(fd, start)) {
		bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags;

		if (m == s)
			return m;

		ksft_exit_fail_msg(
			"read and ioctl return unmatched results for %s: %d %d", desc, m, s);
	}
	return m;
}

bool pagemap_is_softdirty(int fd, char *start)
{
	return page_entry_is(fd, start, "soft-dirty",
				PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY);
}

bool pagemap_is_swapped(int fd, char *start)
{
	return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED);
}

bool pagemap_is_populated(int fd, char *start)
{
	return page_entry_is(fd, start, "populated",
				PM_PRESENT | PM_SWAP,
				PAGE_IS_PRESENT | PAGE_IS_SWAPPED);
}

unsigned long pagemap_get_pfn(int fd, char *start)
{
	uint64_t entry = pagemap_get_entry(fd, start);

	/* If present (63th bit), PFN is at bit 0 -- 54. */
	if (entry & PM_PRESENT)
		return entry & 0x007fffffffffffffull;
	return -1ul;
}

void clear_softdirty(void)
{
	int ret;
	const char *ctrl = "4";
	int fd = open("/proc/self/clear_refs", O_WRONLY);

	if (fd < 0)
		ksft_exit_fail_msg("opening clear_refs failed\n");
	ret = write(fd, ctrl, strlen(ctrl));
	close(fd);
	if (ret != strlen(ctrl))
		ksft_exit_fail_msg("writing clear_refs failed\n");
}

bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len)
{
	while (fgets(buf, len, fp)) {
		if (!strncmp(buf, pattern, strlen(pattern)))
			return true;
	}
	return false;
}

uint64_t read_pmd_pagesize(void)
{
	int fd;
	char buf[20];
	ssize_t num_read;

	fd = open(PMD_SIZE_FILE_PATH, O_RDONLY);
	if (fd == -1)
		return 0;

	num_read = read(fd, buf, 19);
	if (num_read < 1) {
		close(fd);
		return 0;
	}
	buf[num_read] = '\0';
	close(fd);

	return strtoul(buf, NULL, 10);
}

bool __check_huge(void *addr, char *pattern, int nr_hpages,
		  uint64_t hpage_size)
{
	uint64_t thp = -1;
	int ret;
	FILE *fp;
	char buffer[MAX_LINE_LENGTH];
	char addr_pattern[MAX_LINE_LENGTH];

	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
		       (unsigned long) addr);
	if (ret >= MAX_LINE_LENGTH)
		ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);

	fp = fopen(SMAP_FILE_PATH, "r");
	if (!fp)
		ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);

	if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
		goto err_out;

	/*
	 * Fetch the pattern in the same block and check the number of
	 * hugepages.
	 */
	if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer)))
		goto err_out;

	snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern);

	if (sscanf(buffer, addr_pattern, &thp) != 1)
		ksft_exit_fail_msg("Reading smap error\n");

err_out:
	fclose(fp);
	return thp == (nr_hpages * (hpage_size >> 10));
}

bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size)
{
	return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size);
}

bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size)
{
	return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size);
}

bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size)
{
	return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size);
}

int64_t allocate_transhuge(void *ptr, int pagemap_fd)
{
	uint64_t ent[2];

	/* drop pmd */
	if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
		 MAP_FIXED | MAP_ANONYMOUS |
		 MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
		ksft_exit_fail_msg("mmap transhuge\n");

	if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
		ksft_exit_fail_msg("MADV_HUGEPAGE\n");

	/* allocate transparent huge page */
	*(volatile void **)ptr = ptr;

	if (pread(pagemap_fd, ent, sizeof(ent),
		  (uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent))
		ksft_exit_fail_msg("read pagemap\n");

	if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
	    PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
	    !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1)))
		return PAGEMAP_PFN(ent[0]);

	return -1;
}

unsigned long default_huge_page_size(void)
{
	unsigned long hps = 0;
	char *line = NULL;
	size_t linelen = 0;
	FILE *f = fopen("/proc/meminfo", "r");

	if (!f)
		return 0;
	while (getline(&line, &linelen, f) > 0) {
		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
			hps <<= 10;
			break;
		}
	}

	free(line);
	fclose(f);
	return hps;
}

int detect_hugetlb_page_sizes(size_t sizes[], int max)
{
	DIR *dir = opendir("/sys/kernel/mm/hugepages/");
	int count = 0;

	if (!dir)
		return 0;

	while (count < max) {
		struct dirent *entry = readdir(dir);
		size_t kb;

		if (!entry)
			break;
		if (entry->d_type != DT_DIR)
			continue;
		if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
			continue;
		sizes[count++] = kb * 1024;
		ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n",
			       kb);
	}
	closedir(dir);
	return count;
}

/* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
			      bool miss, bool wp, bool minor, uint64_t *ioctls)
{
	struct uffdio_register uffdio_register = { 0 };
	uint64_t mode = 0;
	int ret = 0;

	if (miss)
		mode |= UFFDIO_REGISTER_MODE_MISSING;
	if (wp)
		mode |= UFFDIO_REGISTER_MODE_WP;
	if (minor)
		mode |= UFFDIO_REGISTER_MODE_MINOR;

	uffdio_register.range.start = (unsigned long)addr;
	uffdio_register.range.len = len;
	uffdio_register.mode = mode;

	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
		ret = -errno;
	else if (ioctls)
		*ioctls = uffdio_register.ioctls;

	return ret;
}

int uffd_register(int uffd, void *addr, uint64_t len,
		  bool miss, bool wp, bool minor)
{
	return uffd_register_with_ioctls(uffd, addr, len,
					 miss, wp, minor, NULL);
}

int uffd_unregister(int uffd, void *addr, uint64_t len)
{
	struct uffdio_range range = { .start = (uintptr_t)addr, .len = len };
	int ret = 0;

	if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1)
		ret = -errno;

	return ret;
}

unsigned long get_free_hugepages(void)
{
	unsigned long fhp = 0;
	char *line = NULL;
	size_t linelen = 0;
	FILE *f = fopen("/proc/meminfo", "r");

	if (!f)
		return fhp;
	while (getline(&line, &linelen, f) > 0) {
		if (sscanf(line, "HugePages_Free:      %lu", &fhp) == 1)
			break;
	}

	free(line);
	fclose(f);
	return fhp;
}