Contributors: 2
Author Tokens Token Proportion Commits Commit Proportion
Domenico Cerasuolo 1643 79.03% 4 57.14%
Nhat Pham 436 20.97% 3 42.86%
Total 2079 7


// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE

#include <linux/limits.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <sys/sysinfo.h>
#include <string.h>
#include <sys/wait.h>
#include <sys/mman.h>

#include "../kselftest.h"
#include "cgroup_util.h"

static int read_int(const char *path, size_t *value)
{
	FILE *file;
	int ret = 0;

	file = fopen(path, "r");
	if (!file)
		return -1;
	if (fscanf(file, "%ld", value) != 1)
		ret = -1;
	fclose(file);
	return ret;
}

static int set_min_free_kb(size_t value)
{
	FILE *file;
	int ret;

	file = fopen("/proc/sys/vm/min_free_kbytes", "w");
	if (!file)
		return -1;
	ret = fprintf(file, "%ld\n", value);
	fclose(file);
	return ret;
}

static int read_min_free_kb(size_t *value)
{
	return read_int("/proc/sys/vm/min_free_kbytes", value);
}

static int get_zswap_stored_pages(size_t *value)
{
	return read_int("/sys/kernel/debug/zswap/stored_pages", value);
}

static int get_cg_wb_count(const char *cg)
{
	return cg_read_key_long(cg, "memory.stat", "zswpwb");
}

static long get_zswpout(const char *cgroup)
{
	return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
}

static int allocate_and_read_bytes(const char *cgroup, void *arg)
{
	size_t size = (size_t)arg;
	char *mem = (char *)malloc(size);
	int ret = 0;

	if (!mem)
		return -1;
	for (int i = 0; i < size; i += 4095)
		mem[i] = 'a';

	/* Go through the allocated memory to (z)swap in and out pages */
	for (int i = 0; i < size; i += 4095) {
		if (mem[i] != 'a')
			ret = -1;
	}

	free(mem);
	return ret;
}

static int allocate_bytes(const char *cgroup, void *arg)
{
	size_t size = (size_t)arg;
	char *mem = (char *)malloc(size);

	if (!mem)
		return -1;
	for (int i = 0; i < size; i += 4095)
		mem[i] = 'a';
	free(mem);
	return 0;
}

static char *setup_test_group_1M(const char *root, const char *name)
{
	char *group_name = cg_name(root, name);

	if (!group_name)
		return NULL;
	if (cg_create(group_name))
		goto fail;
	if (cg_write(group_name, "memory.max", "1M")) {
		cg_destroy(group_name);
		goto fail;
	}
	return group_name;
fail:
	free(group_name);
	return NULL;
}

/*
 * Sanity test to check that pages are written into zswap.
 */
static int test_zswap_usage(const char *root)
{
	long zswpout_before, zswpout_after;
	int ret = KSFT_FAIL;
	char *test_group;

	test_group = cg_name(root, "no_shrink_test");
	if (!test_group)
		goto out;
	if (cg_create(test_group))
		goto out;
	if (cg_write(test_group, "memory.max", "1M"))
		goto out;

	zswpout_before = get_zswpout(test_group);
	if (zswpout_before < 0) {
		ksft_print_msg("Failed to get zswpout\n");
		goto out;
	}

	/* Allocate more than memory.max to push memory into zswap */
	if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
		goto out;

	/* Verify that pages come into zswap */
	zswpout_after = get_zswpout(test_group);
	if (zswpout_after <= zswpout_before) {
		ksft_print_msg("zswpout does not increase after test program\n");
		goto out;
	}
	ret = KSFT_PASS;

out:
	cg_destroy(test_group);
	free(test_group);
	return ret;
}

/*
 * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
 * the cgroup.
 */
static int test_swapin_nozswap(const char *root)
{
	int ret = KSFT_FAIL;
	char *test_group;
	long swap_peak, zswpout;

	test_group = cg_name(root, "no_zswap_test");
	if (!test_group)
		goto out;
	if (cg_create(test_group))
		goto out;
	if (cg_write(test_group, "memory.max", "8M"))
		goto out;
	if (cg_write(test_group, "memory.zswap.max", "0"))
		goto out;

	/* Allocate and read more than memory.max to trigger swapin */
	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
		goto out;

	/* Verify that pages are swapped out, but no zswap happened */
	swap_peak = cg_read_long(test_group, "memory.swap.peak");
	if (swap_peak < 0) {
		ksft_print_msg("failed to get cgroup's swap_peak\n");
		goto out;
	}

	if (swap_peak < MB(24)) {
		ksft_print_msg("at least 24MB of memory should be swapped out\n");
		goto out;
	}

	zswpout = get_zswpout(test_group);
	if (zswpout < 0) {
		ksft_print_msg("failed to get zswpout\n");
		goto out;
	}

	if (zswpout > 0) {
		ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
		goto out;
	}

	ret = KSFT_PASS;

out:
	cg_destroy(test_group);
	free(test_group);
	return ret;
}

/* Simple test to verify the (z)swapin code paths */
static int test_zswapin(const char *root)
{
	int ret = KSFT_FAIL;
	char *test_group;
	long zswpin;

	test_group = cg_name(root, "zswapin_test");
	if (!test_group)
		goto out;
	if (cg_create(test_group))
		goto out;
	if (cg_write(test_group, "memory.max", "8M"))
		goto out;
	if (cg_write(test_group, "memory.zswap.max", "max"))
		goto out;

	/* Allocate and read more than memory.max to trigger (z)swap in */
	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
		goto out;

	zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
	if (zswpin < 0) {
		ksft_print_msg("failed to get zswpin\n");
		goto out;
	}

	if (zswpin < MB(24) / PAGE_SIZE) {
		ksft_print_msg("at least 24MB should be brought back from zswap\n");
		goto out;
	}

	ret = KSFT_PASS;

out:
	cg_destroy(test_group);
	free(test_group);
	return ret;
}

/*
 * When trying to store a memcg page in zswap, if the memcg hits its memory
 * limit in zswap, writeback should affect only the zswapped pages of that
 * memcg.
 */
static int test_no_invasive_cgroup_shrink(const char *root)
{
	int ret = KSFT_FAIL;
	size_t control_allocation_size = MB(10);
	char *control_allocation, *wb_group = NULL, *control_group = NULL;

	wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
	if (!wb_group)
		return KSFT_FAIL;
	if (cg_write(wb_group, "memory.zswap.max", "10K"))
		goto out;
	control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
	if (!control_group)
		goto out;

	/* Push some test_group2 memory into zswap */
	if (cg_enter_current(control_group))
		goto out;
	control_allocation = malloc(control_allocation_size);
	for (int i = 0; i < control_allocation_size; i += 4095)
		control_allocation[i] = 'a';
	if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
		goto out;

	/* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
	if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
		goto out;

	/* Verify that only zswapped memory from gwb_group has been written back */
	if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
		ret = KSFT_PASS;
out:
	cg_enter_current(root);
	if (control_group) {
		cg_destroy(control_group);
		free(control_group);
	}
	cg_destroy(wb_group);
	free(wb_group);
	if (control_allocation)
		free(control_allocation);
	return ret;
}

struct no_kmem_bypass_child_args {
	size_t target_alloc_bytes;
	size_t child_allocated;
};

static int no_kmem_bypass_child(const char *cgroup, void *arg)
{
	struct no_kmem_bypass_child_args *values = arg;
	void *allocation;

	allocation = malloc(values->target_alloc_bytes);
	if (!allocation) {
		values->child_allocated = true;
		return -1;
	}
	for (long i = 0; i < values->target_alloc_bytes; i += 4095)
		((char *)allocation)[i] = 'a';
	values->child_allocated = true;
	pause();
	free(allocation);
	return 0;
}

/*
 * When pages owned by a memcg are pushed to zswap by kswapd, they should be
 * charged to that cgroup. This wasn't the case before commit
 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
 *
 * The test first allocates memory in a memcg, then raises min_free_kbytes to
 * a very high value so that the allocation falls below low wm, then makes
 * another allocation to trigger kswapd that should push the memcg-owned pages
 * to zswap and verifies that the zswap pages are correctly charged.
 *
 * To be run on a VM with at most 4G of memory.
 */
static int test_no_kmem_bypass(const char *root)
{
	size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
	struct no_kmem_bypass_child_args *values;
	size_t trigger_allocation_size;
	int wait_child_iteration = 0;
	long stored_pages_threshold;
	struct sysinfo sys_info;
	int ret = KSFT_FAIL;
	int child_status;
	char *test_group;
	pid_t child_pid;

	/* Read sys info and compute test values accordingly */
	if (sysinfo(&sys_info) != 0)
		return KSFT_FAIL;
	if (sys_info.totalram > 5000000000)
		return KSFT_SKIP;
	values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
			PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
	if (values == MAP_FAILED)
		return KSFT_FAIL;
	if (read_min_free_kb(&min_free_kb_original))
		return KSFT_FAIL;
	min_free_kb_high = sys_info.totalram / 2000;
	min_free_kb_low = sys_info.totalram / 500000;
	values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
		sys_info.totalram * 5 / 100;
	stored_pages_threshold = sys_info.totalram / 5 / 4096;
	trigger_allocation_size = sys_info.totalram / 20;

	/* Set up test memcg */
	if (cg_write(root, "cgroup.subtree_control", "+memory"))
		goto out;
	test_group = cg_name(root, "kmem_bypass_test");
	if (!test_group)
		goto out;

	/* Spawn memcg child and wait for it to allocate */
	set_min_free_kb(min_free_kb_low);
	if (cg_create(test_group))
		goto out;
	values->child_allocated = false;
	child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
	if (child_pid < 0)
		goto out;
	while (!values->child_allocated && wait_child_iteration++ < 10000)
		usleep(1000);

	/* Try to wakeup kswapd and let it push child memory to zswap */
	set_min_free_kb(min_free_kb_high);
	for (int i = 0; i < 20; i++) {
		size_t stored_pages;
		char *trigger_allocation = malloc(trigger_allocation_size);

		if (!trigger_allocation)
			break;
		for (int i = 0; i < trigger_allocation_size; i += 4095)
			trigger_allocation[i] = 'b';
		usleep(100000);
		free(trigger_allocation);
		if (get_zswap_stored_pages(&stored_pages))
			break;
		if (stored_pages < 0)
			break;
		/* If memory was pushed to zswap, verify it belongs to memcg */
		if (stored_pages > stored_pages_threshold) {
			int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
			int delta = stored_pages * 4096 - zswapped;
			int result_ok = delta < stored_pages * 4096 / 4;

			ret = result_ok ? KSFT_PASS : KSFT_FAIL;
			break;
		}
	}

	kill(child_pid, SIGTERM);
	waitpid(child_pid, &child_status, 0);
out:
	set_min_free_kb(min_free_kb_original);
	cg_destroy(test_group);
	free(test_group);
	return ret;
}

#define T(x) { x, #x }
struct zswap_test {
	int (*fn)(const char *root);
	const char *name;
} tests[] = {
	T(test_zswap_usage),
	T(test_swapin_nozswap),
	T(test_zswapin),
	T(test_no_kmem_bypass),
	T(test_no_invasive_cgroup_shrink),
};
#undef T

static bool zswap_configured(void)
{
	return access("/sys/module/zswap", F_OK) == 0;
}

int main(int argc, char **argv)
{
	char root[PATH_MAX];
	int i, ret = EXIT_SUCCESS;

	if (cg_find_unified_root(root, sizeof(root)))
		ksft_exit_skip("cgroup v2 isn't mounted\n");

	if (!zswap_configured())
		ksft_exit_skip("zswap isn't configured\n");

	/*
	 * Check that memory controller is available:
	 * memory is listed in cgroup.controllers
	 */
	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
		ksft_exit_skip("memory controller isn't available\n");

	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
		if (cg_write(root, "cgroup.subtree_control", "+memory"))
			ksft_exit_skip("Failed to set memory controller\n");

	for (i = 0; i < ARRAY_SIZE(tests); i++) {
		switch (tests[i].fn(root)) {
		case KSFT_PASS:
			ksft_test_result_pass("%s\n", tests[i].name);
			break;
		case KSFT_SKIP:
			ksft_test_result_skip("%s\n", tests[i].name);
			break;
		default:
			ret = EXIT_FAILURE;
			ksft_test_result_fail("%s\n", tests[i].name);
			break;
		}
	}

	return ret;
}