Contributors: 3
Author Tokens Token Proportion Commits Commit Proportion
Mickaël Salaün 2088 69.81% 6 42.86%
Konstantin Meskhidze 895 29.92% 6 42.86%
Günther Noack 8 0.27% 2 14.29%
Total 2991 14


// SPDX-License-Identifier: GPL-2.0-only
/*
 * Landlock LSM - Ruleset management
 *
 * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
 * Copyright © 2018-2020 ANSSI
 */

#include <linux/bits.h>
#include <linux/bug.h>
#include <linux/compiler_types.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/lockdep.h>
#include <linux/overflow.h>
#include <linux/rbtree.h>
#include <linux/refcount.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>

#include "limits.h"
#include "object.h"
#include "ruleset.h"

static struct landlock_ruleset *create_ruleset(const u32 num_layers)
{
	struct landlock_ruleset *new_ruleset;

	new_ruleset =
		kzalloc(struct_size(new_ruleset, access_masks, num_layers),
			GFP_KERNEL_ACCOUNT);
	if (!new_ruleset)
		return ERR_PTR(-ENOMEM);
	refcount_set(&new_ruleset->usage, 1);
	mutex_init(&new_ruleset->lock);
	new_ruleset->root_inode = RB_ROOT;

#if IS_ENABLED(CONFIG_INET)
	new_ruleset->root_net_port = RB_ROOT;
#endif /* IS_ENABLED(CONFIG_INET) */

	new_ruleset->num_layers = num_layers;
	/*
	 * hierarchy = NULL
	 * num_rules = 0
	 * access_masks[] = 0
	 */
	return new_ruleset;
}

struct landlock_ruleset *
landlock_create_ruleset(const access_mask_t fs_access_mask,
			const access_mask_t net_access_mask)
{
	struct landlock_ruleset *new_ruleset;

	/* Informs about useless ruleset. */
	if (!fs_access_mask && !net_access_mask)
		return ERR_PTR(-ENOMSG);
	new_ruleset = create_ruleset(1);
	if (IS_ERR(new_ruleset))
		return new_ruleset;
	if (fs_access_mask)
		landlock_add_fs_access_mask(new_ruleset, fs_access_mask, 0);
	if (net_access_mask)
		landlock_add_net_access_mask(new_ruleset, net_access_mask, 0);
	return new_ruleset;
}

static void build_check_rule(void)
{
	const struct landlock_rule rule = {
		.num_layers = ~0,
	};

	BUILD_BUG_ON(rule.num_layers < LANDLOCK_MAX_NUM_LAYERS);
}

static bool is_object_pointer(const enum landlock_key_type key_type)
{
	switch (key_type) {
	case LANDLOCK_KEY_INODE:
		return true;

#if IS_ENABLED(CONFIG_INET)
	case LANDLOCK_KEY_NET_PORT:
		return false;
#endif /* IS_ENABLED(CONFIG_INET) */

	default:
		WARN_ON_ONCE(1);
		return false;
	}
}

static struct landlock_rule *
create_rule(const struct landlock_id id,
	    const struct landlock_layer (*const layers)[], const u32 num_layers,
	    const struct landlock_layer *const new_layer)
{
	struct landlock_rule *new_rule;
	u32 new_num_layers;

	build_check_rule();
	if (new_layer) {
		/* Should already be checked by landlock_merge_ruleset(). */
		if (WARN_ON_ONCE(num_layers >= LANDLOCK_MAX_NUM_LAYERS))
			return ERR_PTR(-E2BIG);
		new_num_layers = num_layers + 1;
	} else {
		new_num_layers = num_layers;
	}
	new_rule = kzalloc(struct_size(new_rule, layers, new_num_layers),
			   GFP_KERNEL_ACCOUNT);
	if (!new_rule)
		return ERR_PTR(-ENOMEM);
	RB_CLEAR_NODE(&new_rule->node);
	if (is_object_pointer(id.type)) {
		/* This should be catched by insert_rule(). */
		WARN_ON_ONCE(!id.key.object);
		landlock_get_object(id.key.object);
	}

	new_rule->key = id.key;
	new_rule->num_layers = new_num_layers;
	/* Copies the original layer stack. */
	memcpy(new_rule->layers, layers,
	       flex_array_size(new_rule, layers, num_layers));
	if (new_layer)
		/* Adds a copy of @new_layer on the layer stack. */
		new_rule->layers[new_rule->num_layers - 1] = *new_layer;
	return new_rule;
}

static struct rb_root *get_root(struct landlock_ruleset *const ruleset,
				const enum landlock_key_type key_type)
{
	switch (key_type) {
	case LANDLOCK_KEY_INODE:
		return &ruleset->root_inode;

#if IS_ENABLED(CONFIG_INET)
	case LANDLOCK_KEY_NET_PORT:
		return &ruleset->root_net_port;
#endif /* IS_ENABLED(CONFIG_INET) */

	default:
		WARN_ON_ONCE(1);
		return ERR_PTR(-EINVAL);
	}
}

static void free_rule(struct landlock_rule *const rule,
		      const enum landlock_key_type key_type)
{
	might_sleep();
	if (!rule)
		return;
	if (is_object_pointer(key_type))
		landlock_put_object(rule->key.object);
	kfree(rule);
}

static void build_check_ruleset(void)
{
	const struct landlock_ruleset ruleset = {
		.num_rules = ~0,
		.num_layers = ~0,
	};
	typeof(ruleset.access_masks[0]) access_masks = ~0;

	BUILD_BUG_ON(ruleset.num_rules < LANDLOCK_MAX_NUM_RULES);
	BUILD_BUG_ON(ruleset.num_layers < LANDLOCK_MAX_NUM_LAYERS);
	BUILD_BUG_ON(access_masks <
		     ((LANDLOCK_MASK_ACCESS_FS << LANDLOCK_SHIFT_ACCESS_FS) |
		      (LANDLOCK_MASK_ACCESS_NET << LANDLOCK_SHIFT_ACCESS_NET)));
}

/**
 * insert_rule - Create and insert a rule in a ruleset
 *
 * @ruleset: The ruleset to be updated.
 * @id: The ID to build the new rule with.  The underlying kernel object, if
 *      any, must be held by the caller.
 * @layers: One or multiple layers to be copied into the new rule.
 * @num_layers: The number of @layers entries.
 *
 * When user space requests to add a new rule to a ruleset, @layers only
 * contains one entry and this entry is not assigned to any level.  In this
 * case, the new rule will extend @ruleset, similarly to a boolean OR between
 * access rights.
 *
 * When merging a ruleset in a domain, or copying a domain, @layers will be
 * added to @ruleset as new constraints, similarly to a boolean AND between
 * access rights.
 */
static int insert_rule(struct landlock_ruleset *const ruleset,
		       const struct landlock_id id,
		       const struct landlock_layer (*const layers)[],
		       const size_t num_layers)
{
	struct rb_node **walker_node;
	struct rb_node *parent_node = NULL;
	struct landlock_rule *new_rule;
	struct rb_root *root;

	might_sleep();
	lockdep_assert_held(&ruleset->lock);
	if (WARN_ON_ONCE(!layers))
		return -ENOENT;

	if (is_object_pointer(id.type) && WARN_ON_ONCE(!id.key.object))
		return -ENOENT;

	root = get_root(ruleset, id.type);
	if (IS_ERR(root))
		return PTR_ERR(root);

	walker_node = &root->rb_node;
	while (*walker_node) {
		struct landlock_rule *const this =
			rb_entry(*walker_node, struct landlock_rule, node);

		if (this->key.data != id.key.data) {
			parent_node = *walker_node;
			if (this->key.data < id.key.data)
				walker_node = &((*walker_node)->rb_right);
			else
				walker_node = &((*walker_node)->rb_left);
			continue;
		}

		/* Only a single-level layer should match an existing rule. */
		if (WARN_ON_ONCE(num_layers != 1))
			return -EINVAL;

		/* If there is a matching rule, updates it. */
		if ((*layers)[0].level == 0) {
			/*
			 * Extends access rights when the request comes from
			 * landlock_add_rule(2), i.e. @ruleset is not a domain.
			 */
			if (WARN_ON_ONCE(this->num_layers != 1))
				return -EINVAL;
			if (WARN_ON_ONCE(this->layers[0].level != 0))
				return -EINVAL;
			this->layers[0].access |= (*layers)[0].access;
			return 0;
		}

		if (WARN_ON_ONCE(this->layers[0].level == 0))
			return -EINVAL;

		/*
		 * Intersects access rights when it is a merge between a
		 * ruleset and a domain.
		 */
		new_rule = create_rule(id, &this->layers, this->num_layers,
				       &(*layers)[0]);
		if (IS_ERR(new_rule))
			return PTR_ERR(new_rule);
		rb_replace_node(&this->node, &new_rule->node, root);
		free_rule(this, id.type);
		return 0;
	}

	/* There is no match for @id. */
	build_check_ruleset();
	if (ruleset->num_rules >= LANDLOCK_MAX_NUM_RULES)
		return -E2BIG;
	new_rule = create_rule(id, layers, num_layers, NULL);
	if (IS_ERR(new_rule))
		return PTR_ERR(new_rule);
	rb_link_node(&new_rule->node, parent_node, walker_node);
	rb_insert_color(&new_rule->node, root);
	ruleset->num_rules++;
	return 0;
}

static void build_check_layer(void)
{
	const struct landlock_layer layer = {
		.level = ~0,
		.access = ~0,
	};

	BUILD_BUG_ON(layer.level < LANDLOCK_MAX_NUM_LAYERS);
	BUILD_BUG_ON(layer.access < LANDLOCK_MASK_ACCESS_FS);
}

/* @ruleset must be locked by the caller. */
int landlock_insert_rule(struct landlock_ruleset *const ruleset,
			 const struct landlock_id id,
			 const access_mask_t access)
{
	struct landlock_layer layers[] = { {
		.access = access,
		/* When @level is zero, insert_rule() extends @ruleset. */
		.level = 0,
	} };

	build_check_layer();
	return insert_rule(ruleset, id, &layers, ARRAY_SIZE(layers));
}

static void get_hierarchy(struct landlock_hierarchy *const hierarchy)
{
	if (hierarchy)
		refcount_inc(&hierarchy->usage);
}

static void put_hierarchy(struct landlock_hierarchy *hierarchy)
{
	while (hierarchy && refcount_dec_and_test(&hierarchy->usage)) {
		const struct landlock_hierarchy *const freeme = hierarchy;

		hierarchy = hierarchy->parent;
		kfree(freeme);
	}
}

static int merge_tree(struct landlock_ruleset *const dst,
		      struct landlock_ruleset *const src,
		      const enum landlock_key_type key_type)
{
	struct landlock_rule *walker_rule, *next_rule;
	struct rb_root *src_root;
	int err = 0;

	might_sleep();
	lockdep_assert_held(&dst->lock);
	lockdep_assert_held(&src->lock);

	src_root = get_root(src, key_type);
	if (IS_ERR(src_root))
		return PTR_ERR(src_root);

	/* Merges the @src tree. */
	rbtree_postorder_for_each_entry_safe(walker_rule, next_rule, src_root,
					     node) {
		struct landlock_layer layers[] = { {
			.level = dst->num_layers,
		} };
		const struct landlock_id id = {
			.key = walker_rule->key,
			.type = key_type,
		};

		if (WARN_ON_ONCE(walker_rule->num_layers != 1))
			return -EINVAL;

		if (WARN_ON_ONCE(walker_rule->layers[0].level != 0))
			return -EINVAL;

		layers[0].access = walker_rule->layers[0].access;

		err = insert_rule(dst, id, &layers, ARRAY_SIZE(layers));
		if (err)
			return err;
	}
	return err;
}

static int merge_ruleset(struct landlock_ruleset *const dst,
			 struct landlock_ruleset *const src)
{
	int err = 0;

	might_sleep();
	/* Should already be checked by landlock_merge_ruleset() */
	if (WARN_ON_ONCE(!src))
		return 0;
	/* Only merge into a domain. */
	if (WARN_ON_ONCE(!dst || !dst->hierarchy))
		return -EINVAL;

	/* Locks @dst first because we are its only owner. */
	mutex_lock(&dst->lock);
	mutex_lock_nested(&src->lock, SINGLE_DEPTH_NESTING);

	/* Stacks the new layer. */
	if (WARN_ON_ONCE(src->num_layers != 1 || dst->num_layers < 1)) {
		err = -EINVAL;
		goto out_unlock;
	}
	dst->access_masks[dst->num_layers - 1] = src->access_masks[0];

	/* Merges the @src inode tree. */
	err = merge_tree(dst, src, LANDLOCK_KEY_INODE);
	if (err)
		goto out_unlock;

#if IS_ENABLED(CONFIG_INET)
	/* Merges the @src network port tree. */
	err = merge_tree(dst, src, LANDLOCK_KEY_NET_PORT);
	if (err)
		goto out_unlock;
#endif /* IS_ENABLED(CONFIG_INET) */

out_unlock:
	mutex_unlock(&src->lock);
	mutex_unlock(&dst->lock);
	return err;
}

static int inherit_tree(struct landlock_ruleset *const parent,
			struct landlock_ruleset *const child,
			const enum landlock_key_type key_type)
{
	struct landlock_rule *walker_rule, *next_rule;
	struct rb_root *parent_root;
	int err = 0;

	might_sleep();
	lockdep_assert_held(&parent->lock);
	lockdep_assert_held(&child->lock);

	parent_root = get_root(parent, key_type);
	if (IS_ERR(parent_root))
		return PTR_ERR(parent_root);

	/* Copies the @parent inode or network tree. */
	rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
					     parent_root, node) {
		const struct landlock_id id = {
			.key = walker_rule->key,
			.type = key_type,
		};

		err = insert_rule(child, id, &walker_rule->layers,
				  walker_rule->num_layers);
		if (err)
			return err;
	}
	return err;
}

static int inherit_ruleset(struct landlock_ruleset *const parent,
			   struct landlock_ruleset *const child)
{
	int err = 0;

	might_sleep();
	if (!parent)
		return 0;

	/* Locks @child first because we are its only owner. */
	mutex_lock(&child->lock);
	mutex_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);

	/* Copies the @parent inode tree. */
	err = inherit_tree(parent, child, LANDLOCK_KEY_INODE);
	if (err)
		goto out_unlock;

#if IS_ENABLED(CONFIG_INET)
	/* Copies the @parent network port tree. */
	err = inherit_tree(parent, child, LANDLOCK_KEY_NET_PORT);
	if (err)
		goto out_unlock;
#endif /* IS_ENABLED(CONFIG_INET) */

	if (WARN_ON_ONCE(child->num_layers <= parent->num_layers)) {
		err = -EINVAL;
		goto out_unlock;
	}
	/* Copies the parent layer stack and leaves a space for the new layer. */
	memcpy(child->access_masks, parent->access_masks,
	       flex_array_size(parent, access_masks, parent->num_layers));

	if (WARN_ON_ONCE(!parent->hierarchy)) {
		err = -EINVAL;
		goto out_unlock;
	}
	get_hierarchy(parent->hierarchy);
	child->hierarchy->parent = parent->hierarchy;

out_unlock:
	mutex_unlock(&parent->lock);
	mutex_unlock(&child->lock);
	return err;
}

static void free_ruleset(struct landlock_ruleset *const ruleset)
{
	struct landlock_rule *freeme, *next;

	might_sleep();
	rbtree_postorder_for_each_entry_safe(freeme, next, &ruleset->root_inode,
					     node)
		free_rule(freeme, LANDLOCK_KEY_INODE);

#if IS_ENABLED(CONFIG_INET)
	rbtree_postorder_for_each_entry_safe(freeme, next,
					     &ruleset->root_net_port, node)
		free_rule(freeme, LANDLOCK_KEY_NET_PORT);
#endif /* IS_ENABLED(CONFIG_INET) */

	put_hierarchy(ruleset->hierarchy);
	kfree(ruleset);
}

void landlock_put_ruleset(struct landlock_ruleset *const ruleset)
{
	might_sleep();
	if (ruleset && refcount_dec_and_test(&ruleset->usage))
		free_ruleset(ruleset);
}

static void free_ruleset_work(struct work_struct *const work)
{
	struct landlock_ruleset *ruleset;

	ruleset = container_of(work, struct landlock_ruleset, work_free);
	free_ruleset(ruleset);
}

void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
{
	if (ruleset && refcount_dec_and_test(&ruleset->usage)) {
		INIT_WORK(&ruleset->work_free, free_ruleset_work);
		schedule_work(&ruleset->work_free);
	}
}

/**
 * landlock_merge_ruleset - Merge a ruleset with a domain
 *
 * @parent: Parent domain.
 * @ruleset: New ruleset to be merged.
 *
 * Returns the intersection of @parent and @ruleset, or returns @parent if
 * @ruleset is empty, or returns a duplicate of @ruleset if @parent is empty.
 */
struct landlock_ruleset *
landlock_merge_ruleset(struct landlock_ruleset *const parent,
		       struct landlock_ruleset *const ruleset)
{
	struct landlock_ruleset *new_dom;
	u32 num_layers;
	int err;

	might_sleep();
	if (WARN_ON_ONCE(!ruleset || parent == ruleset))
		return ERR_PTR(-EINVAL);

	if (parent) {
		if (parent->num_layers >= LANDLOCK_MAX_NUM_LAYERS)
			return ERR_PTR(-E2BIG);
		num_layers = parent->num_layers + 1;
	} else {
		num_layers = 1;
	}

	/* Creates a new domain... */
	new_dom = create_ruleset(num_layers);
	if (IS_ERR(new_dom))
		return new_dom;
	new_dom->hierarchy =
		kzalloc(sizeof(*new_dom->hierarchy), GFP_KERNEL_ACCOUNT);
	if (!new_dom->hierarchy) {
		err = -ENOMEM;
		goto out_put_dom;
	}
	refcount_set(&new_dom->hierarchy->usage, 1);

	/* ...as a child of @parent... */
	err = inherit_ruleset(parent, new_dom);
	if (err)
		goto out_put_dom;

	/* ...and including @ruleset. */
	err = merge_ruleset(new_dom, ruleset);
	if (err)
		goto out_put_dom;

	return new_dom;

out_put_dom:
	landlock_put_ruleset(new_dom);
	return ERR_PTR(err);
}

/*
 * The returned access has the same lifetime as @ruleset.
 */
const struct landlock_rule *
landlock_find_rule(const struct landlock_ruleset *const ruleset,
		   const struct landlock_id id)
{
	const struct rb_root *root;
	const struct rb_node *node;

	root = get_root((struct landlock_ruleset *)ruleset, id.type);
	if (IS_ERR(root))
		return NULL;
	node = root->rb_node;

	while (node) {
		struct landlock_rule *this =
			rb_entry(node, struct landlock_rule, node);

		if (this->key.data == id.key.data)
			return this;
		if (this->key.data < id.key.data)
			node = node->rb_right;
		else
			node = node->rb_left;
	}
	return NULL;
}

/*
 * @layer_masks is read and may be updated according to the access request and
 * the matching rule.
 * @masks_array_size must be equal to ARRAY_SIZE(*layer_masks).
 *
 * Returns true if the request is allowed (i.e. relevant layer masks for the
 * request are empty).
 */
bool landlock_unmask_layers(const struct landlock_rule *const rule,
			    const access_mask_t access_request,
			    layer_mask_t (*const layer_masks)[],
			    const size_t masks_array_size)
{
	size_t layer_level;

	if (!access_request || !layer_masks)
		return true;
	if (!rule)
		return false;

	/*
	 * An access is granted if, for each policy layer, at least one rule
	 * encountered on the pathwalk grants the requested access,
	 * regardless of its position in the layer stack.  We must then check
	 * the remaining layers for each inode, from the first added layer to
	 * the last one.  When there is multiple requested accesses, for each
	 * policy layer, the full set of requested accesses may not be granted
	 * by only one rule, but by the union (binary OR) of multiple rules.
	 * E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
	 */
	for (layer_level = 0; layer_level < rule->num_layers; layer_level++) {
		const struct landlock_layer *const layer =
			&rule->layers[layer_level];
		const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
		const unsigned long access_req = access_request;
		unsigned long access_bit;
		bool is_empty;

		/*
		 * Records in @layer_masks which layer grants access to each
		 * requested access.
		 */
		is_empty = true;
		for_each_set_bit(access_bit, &access_req, masks_array_size) {
			if (layer->access & BIT_ULL(access_bit))
				(*layer_masks)[access_bit] &= ~layer_bit;
			is_empty = is_empty && !(*layer_masks)[access_bit];
		}
		if (is_empty)
			return true;
	}
	return false;
}

typedef access_mask_t
get_access_mask_t(const struct landlock_ruleset *const ruleset,
		  const u16 layer_level);

/**
 * landlock_init_layer_masks - Initialize layer masks from an access request
 *
 * Populates @layer_masks such that for each access right in @access_request,
 * the bits for all the layers are set where this access right is handled.
 *
 * @domain: The domain that defines the current restrictions.
 * @access_request: The requested access rights to check.
 * @layer_masks: It must contain %LANDLOCK_NUM_ACCESS_FS or
 * %LANDLOCK_NUM_ACCESS_NET elements according to @key_type.
 * @key_type: The key type to switch between access masks of different types.
 *
 * Returns: An access mask where each access right bit is set which is handled
 * in any of the active layers in @domain.
 */
access_mask_t
landlock_init_layer_masks(const struct landlock_ruleset *const domain,
			  const access_mask_t access_request,
			  layer_mask_t (*const layer_masks)[],
			  const enum landlock_key_type key_type)
{
	access_mask_t handled_accesses = 0;
	size_t layer_level, num_access;
	get_access_mask_t *get_access_mask;

	switch (key_type) {
	case LANDLOCK_KEY_INODE:
		get_access_mask = landlock_get_fs_access_mask;
		num_access = LANDLOCK_NUM_ACCESS_FS;
		break;

#if IS_ENABLED(CONFIG_INET)
	case LANDLOCK_KEY_NET_PORT:
		get_access_mask = landlock_get_net_access_mask;
		num_access = LANDLOCK_NUM_ACCESS_NET;
		break;
#endif /* IS_ENABLED(CONFIG_INET) */

	default:
		WARN_ON_ONCE(1);
		return 0;
	}

	memset(layer_masks, 0,
	       array_size(sizeof((*layer_masks)[0]), num_access));

	/* An empty access request can happen because of O_WRONLY | O_RDWR. */
	if (!access_request)
		return 0;

	/* Saves all handled accesses per layer. */
	for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
		const unsigned long access_req = access_request;
		const access_mask_t access_mask =
			get_access_mask(domain, layer_level);
		unsigned long access_bit;

		for_each_set_bit(access_bit, &access_req, num_access) {
			if (BIT_ULL(access_bit) & access_mask) {
				(*layer_masks)[access_bit] |=
					BIT_ULL(layer_level);
				handled_accesses |= BIT_ULL(access_bit);
			}
		}
	}
	return handled_accesses;
}