Contributors: 1
Author Tokens Token Proportion Commits Commit Proportion
Christian Brauner 3839 100.00% 1 100.00%
Total 3839 1


// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/capability.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <linux/nsfs.h>
#include "../kselftest_harness.h"
#include "../filesystems/utils.h"
#include "wrappers.h"

/*
 * Test credential changes and their impact on namespace active references.
 */

/*
 * Test setuid() in a user namespace properly swaps active references.
 * Create a user namespace with multiple UIDs mapped, then setuid() between them.
 * Verify that the user namespace remains active throughout.
 */
TEST(setuid_preserves_active_refs)
{
	pid_t pid;
	int status;
	__u64 userns_id;
	struct ns_id_req req = {
		.size = sizeof(req),
		.spare = 0,
		.ns_id = 0,
		.ns_type = CLONE_NEWUSER,
		.spare2 = 0,
		.user_ns_id = 0,
	};
	__u64 ns_ids[256];
	ssize_t ret;
	int i;
	bool found = false;
	int pipefd[2];

	ASSERT_EQ(pipe(pipefd), 0);

	pid = fork();
	ASSERT_GE(pid, 0);

	if (pid == 0) {
		/* Child process */
		int fd, userns_fd;
		__u64 child_userns_id;
		uid_t orig_uid = getuid();
		int setuid_count;

		close(pipefd[0]);

		/* Create new user namespace with multiple UIDs mapped (0-9) */
		userns_fd = get_userns_fd(0, orig_uid, 10);
		if (userns_fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
			close(userns_fd);
			close(pipefd[1]);
			exit(1);
		}
		close(userns_fd);

		/* Get user namespace ID */
		fd = open("/proc/self/ns/user", O_RDONLY);
		if (fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
			close(fd);
			close(pipefd[1]);
			exit(1);
		}
		close(fd);

		/* Send namespace ID to parent */
		write(pipefd[1], &child_userns_id, sizeof(child_userns_id));

		/*
		 * Perform multiple setuid() calls.
		 * Each setuid() triggers commit_creds() which should properly
		 * swap active references via switch_cred_namespaces().
		 */
		for (setuid_count = 0; setuid_count < 50; setuid_count++) {
			uid_t target_uid = (setuid_count % 10);
			if (setuid(target_uid) < 0) {
				if (errno != EPERM) {
					close(pipefd[1]);
					exit(1);
				}
			}
		}

		close(pipefd[1]);
		exit(0);
	}

	/* Parent process */
	close(pipefd[1]);

	if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
		close(pipefd[0]);
		kill(pid, SIGKILL);
		waitpid(pid, NULL, 0);
		SKIP(return, "Failed to get namespace ID from child");
	}
	close(pipefd[0]);

	TH_LOG("Child user namespace ID: %llu", (unsigned long long)userns_id);

	/* Verify namespace is active while child is running */
	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
	if (ret < 0) {
		kill(pid, SIGKILL);
		waitpid(pid, NULL, 0);
		if (errno == ENOSYS)
			SKIP(return, "listns() not supported");
		ASSERT_GE(ret, 0);
	}

	for (i = 0; i < ret; i++) {
		if (ns_ids[i] == userns_id) {
			found = true;
			break;
		}
	}
	ASSERT_TRUE(found);

	waitpid(pid, &status, 0);
	ASSERT_TRUE(WIFEXITED(status));
	ASSERT_EQ(WEXITSTATUS(status), 0);

	/* Verify namespace becomes inactive after child exits */
	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
	ASSERT_GE(ret, 0);

	found = false;
	for (i = 0; i < ret; i++) {
		if (ns_ids[i] == userns_id) {
			found = true;
			break;
		}
	}

	ASSERT_FALSE(found);
	TH_LOG("setuid() correctly preserved active references (no leak)");
}

/*
 * Test setgid() in a user namespace properly handles active references.
 */
TEST(setgid_preserves_active_refs)
{
	pid_t pid;
	int status;
	__u64 userns_id;
	struct ns_id_req req = {
		.size = sizeof(req),
		.spare = 0,
		.ns_id = 0,
		.ns_type = CLONE_NEWUSER,
		.spare2 = 0,
		.user_ns_id = 0,
	};
	__u64 ns_ids[256];
	ssize_t ret;
	int i;
	bool found = false;
	int pipefd[2];

	ASSERT_EQ(pipe(pipefd), 0);

	pid = fork();
	ASSERT_GE(pid, 0);

	if (pid == 0) {
		/* Child process */
		int fd, userns_fd;
		__u64 child_userns_id;
		uid_t orig_uid = getuid();
		int setgid_count;

		close(pipefd[0]);

		/* Create new user namespace with multiple GIDs mapped */
		userns_fd = get_userns_fd(0, orig_uid, 10);
		if (userns_fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
			close(userns_fd);
			close(pipefd[1]);
			exit(1);
		}
		close(userns_fd);

		/* Get user namespace ID */
		fd = open("/proc/self/ns/user", O_RDONLY);
		if (fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
			close(fd);
			close(pipefd[1]);
			exit(1);
		}
		close(fd);

		write(pipefd[1], &child_userns_id, sizeof(child_userns_id));

		/* Perform multiple setgid() calls */
		for (setgid_count = 0; setgid_count < 50; setgid_count++) {
			gid_t target_gid = (setgid_count % 10);
			if (setgid(target_gid) < 0) {
				if (errno != EPERM) {
					close(pipefd[1]);
					exit(1);
				}
			}
		}

		close(pipefd[1]);
		exit(0);
	}

	/* Parent process */
	close(pipefd[1]);

	if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
		close(pipefd[0]);
		kill(pid, SIGKILL);
		waitpid(pid, NULL, 0);
		SKIP(return, "Failed to get namespace ID from child");
	}
	close(pipefd[0]);

	waitpid(pid, &status, 0);
	ASSERT_TRUE(WIFEXITED(status));
	ASSERT_EQ(WEXITSTATUS(status), 0);

	/* Verify namespace becomes inactive */
	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
	if (ret < 0) {
		if (errno == ENOSYS)
			SKIP(return, "listns() not supported");
		ASSERT_GE(ret, 0);
	}

	for (i = 0; i < ret; i++) {
		if (ns_ids[i] == userns_id) {
			found = true;
			break;
		}
	}

	ASSERT_FALSE(found);
	TH_LOG("setgid() correctly preserved active references (no leak)");
}

/*
 * Test setresuid() which changes real, effective, and saved UIDs.
 * This should properly swap active references via commit_creds().
 */
TEST(setresuid_preserves_active_refs)
{
	pid_t pid;
	int status;
	__u64 userns_id;
	struct ns_id_req req = {
		.size = sizeof(req),
		.spare = 0,
		.ns_id = 0,
		.ns_type = CLONE_NEWUSER,
		.spare2 = 0,
		.user_ns_id = 0,
	};
	__u64 ns_ids[256];
	ssize_t ret;
	int i;
	bool found = false;
	int pipefd[2];

	ASSERT_EQ(pipe(pipefd), 0);

	pid = fork();
	ASSERT_GE(pid, 0);

	if (pid == 0) {
		/* Child process */
		int fd, userns_fd;
		__u64 child_userns_id;
		uid_t orig_uid = getuid();
		int setres_count;

		close(pipefd[0]);

		/* Create new user namespace */
		userns_fd = get_userns_fd(0, orig_uid, 10);
		if (userns_fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
			close(userns_fd);
			close(pipefd[1]);
			exit(1);
		}
		close(userns_fd);

		/* Get user namespace ID */
		fd = open("/proc/self/ns/user", O_RDONLY);
		if (fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
			close(fd);
			close(pipefd[1]);
			exit(1);
		}
		close(fd);

		write(pipefd[1], &child_userns_id, sizeof(child_userns_id));

		/* Perform multiple setresuid() calls */
		for (setres_count = 0; setres_count < 30; setres_count++) {
			uid_t uid1 = (setres_count % 5);
			uid_t uid2 = ((setres_count + 1) % 5);
			uid_t uid3 = ((setres_count + 2) % 5);

			if (setresuid(uid1, uid2, uid3) < 0) {
				if (errno != EPERM) {
					close(pipefd[1]);
					exit(1);
				}
			}
		}

		close(pipefd[1]);
		exit(0);
	}

	/* Parent process */
	close(pipefd[1]);

	if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
		close(pipefd[0]);
		kill(pid, SIGKILL);
		waitpid(pid, NULL, 0);
		SKIP(return, "Failed to get namespace ID from child");
	}
	close(pipefd[0]);

	waitpid(pid, &status, 0);
	ASSERT_TRUE(WIFEXITED(status));
	ASSERT_EQ(WEXITSTATUS(status), 0);

	/* Verify namespace becomes inactive */
	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
	if (ret < 0) {
		if (errno == ENOSYS)
			SKIP(return, "listns() not supported");
		ASSERT_GE(ret, 0);
	}

	for (i = 0; i < ret; i++) {
		if (ns_ids[i] == userns_id) {
			found = true;
			break;
		}
	}

	ASSERT_FALSE(found);
	TH_LOG("setresuid() correctly preserved active references (no leak)");
}

/*
 * Test credential changes across multiple user namespaces.
 * Create nested user namespaces and verify active reference tracking.
 */
TEST(cred_change_nested_userns)
{
	pid_t pid;
	int status;
	__u64 parent_userns_id, child_userns_id;
	struct ns_id_req req = {
		.size = sizeof(req),
		.spare = 0,
		.ns_id = 0,
		.ns_type = CLONE_NEWUSER,
		.spare2 = 0,
		.user_ns_id = 0,
	};
	__u64 ns_ids[256];
	ssize_t ret;
	int i;
	bool found_parent = false, found_child = false;
	int pipefd[2];

	ASSERT_EQ(pipe(pipefd), 0);

	pid = fork();
	ASSERT_GE(pid, 0);

	if (pid == 0) {
		/* Child process */
		int fd, userns_fd;
		__u64 parent_id, child_id;
		uid_t orig_uid = getuid();

		close(pipefd[0]);

		/* Create first user namespace */
		userns_fd = get_userns_fd(0, orig_uid, 1);
		if (userns_fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
			close(userns_fd);
			close(pipefd[1]);
			exit(1);
		}
		close(userns_fd);

		/* Get first namespace ID */
		fd = open("/proc/self/ns/user", O_RDONLY);
		if (fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (ioctl(fd, NS_GET_ID, &parent_id) < 0) {
			close(fd);
			close(pipefd[1]);
			exit(1);
		}
		close(fd);

		/* Create nested user namespace */
		userns_fd = get_userns_fd(0, 0, 1);
		if (userns_fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
			close(userns_fd);
			close(pipefd[1]);
			exit(1);
		}
		close(userns_fd);

		/* Get nested namespace ID */
		fd = open("/proc/self/ns/user", O_RDONLY);
		if (fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (ioctl(fd, NS_GET_ID, &child_id) < 0) {
			close(fd);
			close(pipefd[1]);
			exit(1);
		}
		close(fd);

		/* Send both IDs to parent */
		write(pipefd[1], &parent_id, sizeof(parent_id));
		write(pipefd[1], &child_id, sizeof(child_id));

		/* Perform some credential changes in nested namespace */
		setuid(0);
		setgid(0);

		close(pipefd[1]);
		exit(0);
	}

	/* Parent process */
	close(pipefd[1]);

	/* Read both namespace IDs */
	if (read(pipefd[0], &parent_userns_id, sizeof(parent_userns_id)) != sizeof(parent_userns_id)) {
		close(pipefd[0]);
		kill(pid, SIGKILL);
		waitpid(pid, NULL, 0);
		SKIP(return, "Failed to get parent namespace ID");
	}

	if (read(pipefd[0], &child_userns_id, sizeof(child_userns_id)) != sizeof(child_userns_id)) {
		close(pipefd[0]);
		kill(pid, SIGKILL);
		waitpid(pid, NULL, 0);
		SKIP(return, "Failed to get child namespace ID");
	}
	close(pipefd[0]);

	TH_LOG("Parent userns: %llu, Child userns: %llu",
	       (unsigned long long)parent_userns_id,
	       (unsigned long long)child_userns_id);

	/* Verify both namespaces are active */
	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
	if (ret < 0) {
		kill(pid, SIGKILL);
		waitpid(pid, NULL, 0);
		if (errno == ENOSYS)
			SKIP(return, "listns() not supported");
		ASSERT_GE(ret, 0);
	}

	for (i = 0; i < ret; i++) {
		if (ns_ids[i] == parent_userns_id)
			found_parent = true;
		if (ns_ids[i] == child_userns_id)
			found_child = true;
	}

	ASSERT_TRUE(found_parent);
	ASSERT_TRUE(found_child);

	/* Wait for child */
	waitpid(pid, &status, 0);
	ASSERT_TRUE(WIFEXITED(status));
	ASSERT_EQ(WEXITSTATUS(status), 0);

	/* Verify both namespaces become inactive */
	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
	ASSERT_GE(ret, 0);

	found_parent = false;
	found_child = false;
	for (i = 0; i < ret; i++) {
		if (ns_ids[i] == parent_userns_id)
			found_parent = true;
		if (ns_ids[i] == child_userns_id)
			found_child = true;
	}

	ASSERT_FALSE(found_parent);
	ASSERT_FALSE(found_child);
	TH_LOG("Nested user namespace credential changes preserved active refs (no leak)");
}

/*
 * Test rapid credential changes don't cause refcount imbalances.
 * This stress-tests the switch_cred_namespaces() logic.
 */
TEST(rapid_cred_changes_no_leak)
{
	pid_t pid;
	int status;
	__u64 userns_id;
	struct ns_id_req req = {
		.size = sizeof(req),
		.spare = 0,
		.ns_id = 0,
		.ns_type = CLONE_NEWUSER,
		.spare2 = 0,
		.user_ns_id = 0,
	};
	__u64 ns_ids[256];
	ssize_t ret;
	int i;
	bool found = false;
	int pipefd[2];

	ASSERT_EQ(pipe(pipefd), 0);

	pid = fork();
	ASSERT_GE(pid, 0);

	if (pid == 0) {
		/* Child process */
		int fd, userns_fd;
		__u64 child_userns_id;
		uid_t orig_uid = getuid();
		int change_count;

		close(pipefd[0]);

		/* Create new user namespace with wider range of UIDs/GIDs */
		userns_fd = get_userns_fd(0, orig_uid, 100);
		if (userns_fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
			close(userns_fd);
			close(pipefd[1]);
			exit(1);
		}
		close(userns_fd);

		/* Get user namespace ID */
		fd = open("/proc/self/ns/user", O_RDONLY);
		if (fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
			close(fd);
			close(pipefd[1]);
			exit(1);
		}
		close(fd);

		write(pipefd[1], &child_userns_id, sizeof(child_userns_id));

		/*
		 * Perform many rapid credential changes.
		 * Mix setuid, setgid, setreuid, setregid, setresuid, setresgid.
		 */
		for (change_count = 0; change_count < 200; change_count++) {
			switch (change_count % 6) {
			case 0:
				setuid(change_count % 50);
				break;
			case 1:
				setgid(change_count % 50);
				break;
			case 2:
				setreuid(change_count % 50, (change_count + 1) % 50);
				break;
			case 3:
				setregid(change_count % 50, (change_count + 1) % 50);
				break;
			case 4:
				setresuid(change_count % 50, (change_count + 1) % 50, (change_count + 2) % 50);
				break;
			case 5:
				setresgid(change_count % 50, (change_count + 1) % 50, (change_count + 2) % 50);
				break;
			}
		}

		close(pipefd[1]);
		exit(0);
	}

	/* Parent process */
	close(pipefd[1]);

	if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
		close(pipefd[0]);
		kill(pid, SIGKILL);
		waitpid(pid, NULL, 0);
		SKIP(return, "Failed to get namespace ID from child");
	}
	close(pipefd[0]);

	TH_LOG("Testing with user namespace ID: %llu", (unsigned long long)userns_id);

	waitpid(pid, &status, 0);
	ASSERT_TRUE(WIFEXITED(status));
	ASSERT_EQ(WEXITSTATUS(status), 0);

	/* Verify namespace becomes inactive (no leaked active refs) */
	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
	if (ret < 0) {
		if (errno == ENOSYS)
			SKIP(return, "listns() not supported");
		ASSERT_GE(ret, 0);
	}

	for (i = 0; i < ret; i++) {
		if (ns_ids[i] == userns_id) {
			found = true;
			break;
		}
	}

	ASSERT_FALSE(found);
	TH_LOG("200 rapid credential changes completed with no active ref leak");
}

/*
 * Test setfsuid/setfsgid which change filesystem UID/GID.
 * These also trigger credential changes but may have different code paths.
 */
TEST(setfsuid_preserves_active_refs)
{
	pid_t pid;
	int status;
	__u64 userns_id;
	struct ns_id_req req = {
		.size = sizeof(req),
		.spare = 0,
		.ns_id = 0,
		.ns_type = CLONE_NEWUSER,
		.spare2 = 0,
		.user_ns_id = 0,
	};
	__u64 ns_ids[256];
	ssize_t ret;
	int i;
	bool found = false;
	int pipefd[2];

	ASSERT_EQ(pipe(pipefd), 0);

	pid = fork();
	ASSERT_GE(pid, 0);

	if (pid == 0) {
		/* Child process */
		int fd, userns_fd;
		__u64 child_userns_id;
		uid_t orig_uid = getuid();
		int change_count;

		close(pipefd[0]);

		/* Create new user namespace */
		userns_fd = get_userns_fd(0, orig_uid, 10);
		if (userns_fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
			close(userns_fd);
			close(pipefd[1]);
			exit(1);
		}
		close(userns_fd);

		/* Get user namespace ID */
		fd = open("/proc/self/ns/user", O_RDONLY);
		if (fd < 0) {
			close(pipefd[1]);
			exit(1);
		}

		if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
			close(fd);
			close(pipefd[1]);
			exit(1);
		}
		close(fd);

		write(pipefd[1], &child_userns_id, sizeof(child_userns_id));

		/* Perform multiple setfsuid/setfsgid calls */
		for (change_count = 0; change_count < 50; change_count++) {
			setfsuid(change_count % 10);
			setfsgid(change_count % 10);
		}

		close(pipefd[1]);
		exit(0);
	}

	/* Parent process */
	close(pipefd[1]);

	if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
		close(pipefd[0]);
		kill(pid, SIGKILL);
		waitpid(pid, NULL, 0);
		SKIP(return, "Failed to get namespace ID from child");
	}
	close(pipefd[0]);

	waitpid(pid, &status, 0);
	ASSERT_TRUE(WIFEXITED(status));
	ASSERT_EQ(WEXITSTATUS(status), 0);

	/* Verify namespace becomes inactive */
	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
	if (ret < 0) {
		if (errno == ENOSYS)
			SKIP(return, "listns() not supported");
		ASSERT_GE(ret, 0);
	}

	for (i = 0; i < ret; i++) {
		if (ns_ids[i] == userns_id) {
			found = true;
			break;
		}
	}

	ASSERT_FALSE(found);
	TH_LOG("setfsuid/setfsgid correctly preserved active references (no leak)");
}

TEST_HARNESS_MAIN