cregit-Linux how code gets into the kernel

Release 4.11 kernel/cgroup/rdma.c

Directory: kernel/cgroup
/*
 * RDMA resource limiting controller for cgroups.
 *
 * Used to allow a cgroup hierarchy to stop processes from consuming
 * additional RDMA resources after a certain limit is reached.
 *
 * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
 *
 * This file is subject to the terms and conditions of version 2 of the GNU
 * General Public License. See the file COPYING in the main directory of the
 * Linux distribution for more details.
 */

#include <linux/bitops.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/cgroup.h>
#include <linux/parser.h>
#include <linux/cgroup_rdma.h>


#define RDMACG_MAX_STR "max"

/*
 * Protects list of resource pools maintained on per cgroup basis
 * and rdma device list.
 */
static DEFINE_MUTEX(rdmacg_mutex);
static LIST_HEAD(rdmacg_devices);


enum rdmacg_file_type {
	
RDMACG_RESOURCE_TYPE_MAX,
	
RDMACG_RESOURCE_TYPE_STAT,
};

/*
 * resource table definition as to be seen by the user.
 * Need to add entries to it when more resources are
 * added/defined at IB verb/core layer.
 */

static char const *rdmacg_resource_names[] = {
	[RDMACG_RESOURCE_HCA_HANDLE]	= "hca_handle",
	[RDMACG_RESOURCE_HCA_OBJECT]	= "hca_object",
};

/* resource tracker for each resource of rdma cgroup */

struct rdmacg_resource {
	
int max;
	
int usage;
};

/*
 * resource pool object which represents per cgroup, per device
 * resources. There are multiple instances of this object per cgroup,
 * therefore it cannot be embedded within rdma_cgroup structure. It
 * is maintained as list.
 */

struct rdmacg_resource_pool {
	
struct rdmacg_device	*device;
	
struct rdmacg_resource	resources[RDMACG_RESOURCE_MAX];

	
struct list_head	cg_node;
	
struct list_head	dev_node;

	/* count active user tasks of this pool */
	
u64			usage_sum;
	/* total number counts which are set to max */
	
int			num_max_cnt;
};


static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css) { return container_of(css, struct rdma_cgroup, css); }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit24100.00%1100.00%
Total24100.00%1100.00%


static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg) { return css_rdmacg(cg->css.parent); }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit23100.00%1100.00%
Total23100.00%1100.00%


static inline struct rdma_cgroup *get_current_rdmacg(void) { return css_rdmacg(task_get_css(current, rdma_cgrp_id)); }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit22100.00%1100.00%
Total22100.00%1100.00%


static void set_resource_limit(struct rdmacg_resource_pool *rpool, int index, int new_max) { if (new_max == S32_MAX) { if (rpool->resources[index].max != S32_MAX) rpool->num_max_cnt++; } else { if (rpool->resources[index].max == S32_MAX) rpool->num_max_cnt--; } rpool->resources[index].max = new_max; }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit75100.00%1100.00%
Total75100.00%1100.00%


static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool) { int i; for (i = 0; i < RDMACG_RESOURCE_MAX; i++) set_resource_limit(rpool, i, S32_MAX); }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit36100.00%1100.00%
Total36100.00%1100.00%


static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool) { lockdep_assert_held(&rdmacg_mutex); list_del(&rpool->cg_node); list_del(&rpool->dev_node); kfree(rpool); }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit38100.00%1100.00%
Total38100.00%1100.00%


static struct rdmacg_resource_pool * find_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device) { struct rdmacg_resource_pool *pool; lockdep_assert_held(&rdmacg_mutex); list_for_each_entry(pool, &cg->rpools, cg_node) if (pool->device == device) return pool; return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit51100.00%1100.00%
Total51100.00%1100.00%


static struct rdmacg_resource_pool * get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device) { struct rdmacg_resource_pool *rpool; rpool = find_cg_rpool_locked(cg, device); if (rpool) return rpool; rpool = kzalloc(sizeof(*rpool), GFP_KERNEL); if (!rpool) return ERR_PTR(-ENOMEM); rpool->device = device; set_all_resource_max_limit(rpool); INIT_LIST_HEAD(&rpool->cg_node); INIT_LIST_HEAD(&rpool->dev_node); list_add_tail(&rpool->cg_node, &cg->rpools); list_add_tail(&rpool->dev_node, &device->rpools); return rpool; }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit120100.00%1100.00%
Total120100.00%1100.00%

/** * uncharge_cg_locked - uncharge resource for rdma cgroup * @cg: pointer to cg to uncharge and all parents in hierarchy * @device: pointer to rdmacg device * @index: index of the resource to uncharge in cg (resource pool) * * It also frees the resource pool which was created as part of * charging operation when there are no resources attached to * resource pool. */
static void uncharge_cg_locked(struct rdma_cgroup *cg, struct rdmacg_device *device, enum rdmacg_resource_type index) { struct rdmacg_resource_pool *rpool; rpool = find_cg_rpool_locked(cg, device); /* * rpool cannot be null at this stage. Let kernel operate in case * if there a bug in IB stack or rdma controller, instead of crashing * the system. */ if (unlikely(!rpool)) { pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device); return; } rpool->resources[index].usage--; /* * A negative count (or overflow) is invalid, * it indicates a bug in the rdma controller. */ WARN_ON_ONCE(rpool->resources[index].usage < 0); rpool->usage_sum--; if (rpool->usage_sum == 0 && rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { /* * No user of the rpool and all entries are set to max, so * safe to delete this rpool. */ free_cg_rpool_locked(rpool); } }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit107100.00%1100.00%
Total107100.00%1100.00%

/** * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count * @device: pointer to rdmacg device * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup * stop uncharging * @index: index of the resource to uncharge in cg in given resource pool */
static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg, struct rdmacg_device *device, struct rdma_cgroup *stop_cg, enum rdmacg_resource_type index) { struct rdma_cgroup *p; mutex_lock(&rdmacg_mutex); for (p = cg; p != stop_cg; p = parent_rdmacg(p)) uncharge_cg_locked(p, device, index); mutex_unlock(&rdmacg_mutex); css_put(&cg->css); }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit76100.00%1100.00%
Total76100.00%1100.00%

/** * rdmacg_uncharge - hierarchically uncharge rdma resource count * @device: pointer to rdmacg device * @index: index of the resource to uncharge in cgroup in given resource pool */
void rdmacg_uncharge(struct rdma_cgroup *cg, struct rdmacg_device *device, enum rdmacg_resource_type index) { if (index >= RDMACG_RESOURCE_MAX) return; rdmacg_uncharge_hierarchy(cg, device, NULL, index); }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit37100.00%1100.00%
Total37100.00%1100.00%

EXPORT_SYMBOL(rdmacg_uncharge); /** * rdmacg_try_charge - hierarchically try to charge the rdma resource * @rdmacg: pointer to rdma cgroup which will own this resource * @device: pointer to rdmacg device * @index: index of the resource to charge in cgroup (resource pool) * * This function follows charging resource in hierarchical way. * It will fail if the charge would cause the new value to exceed the * hierarchical limit. * Returns 0 if the charge succeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. * Returns pointer to rdmacg for this resource when charging is successful. * * Charger needs to account resources on two criteria. * (a) per cgroup & (b) per device resource usage. * Per cgroup resource usage ensures that tasks of cgroup doesn't cross * the configured limits. Per device provides granular configuration * in multi device usage. It allocates resource pool in the hierarchy * for each parent it come across for first resource. Later on resource * pool will be available. Therefore it will be much faster thereon * to charge/uncharge. */
int rdmacg_try_charge(struct rdma_cgroup **rdmacg, struct rdmacg_device *device, enum rdmacg_resource_type index) { struct rdma_cgroup *cg, *p; struct rdmacg_resource_pool *rpool; s64 new; int ret = 0; if (index >= RDMACG_RESOURCE_MAX) return -EINVAL; /* * hold on to css, as cgroup can be removed but resource * accounting happens on css. */ cg = get_current_rdmacg(); mutex_lock(&rdmacg_mutex); for (p = cg; p; p = parent_rdmacg(p)) { rpool = get_cg_rpool_locked(p, device); if (IS_ERR(rpool)) { ret = PTR_ERR(rpool); goto err; } else { new = rpool->resources[index].usage + 1; if (new > rpool->resources[index].max) { ret = -EAGAIN; goto err; } else { rpool->resources[index].usage = new; rpool->usage_sum++; } } } mutex_unlock(&rdmacg_mutex); *rdmacg = cg; return 0; err: mutex_unlock(&rdmacg_mutex); rdmacg_uncharge_hierarchy(cg, device, p, index); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit202100.00%1100.00%
Total202100.00%1100.00%

EXPORT_SYMBOL(rdmacg_try_charge); /** * rdmacg_register_device - register rdmacg device to rdma controller. * @device: pointer to rdmacg device whose resources need to be accounted. * * If IB stack wish a device to participate in rdma cgroup resource * tracking, it must invoke this API to register with rdma cgroup before * any user space application can start using the RDMA resources. * Returns 0 on success or EINVAL when table length given is beyond * supported size. */
int rdmacg_register_device(struct rdmacg_device *device) { INIT_LIST_HEAD(&device->dev_node); INIT_LIST_HEAD(&device->rpools); mutex_lock(&rdmacg_mutex); list_add_tail(&device->dev_node, &rdmacg_devices); mutex_unlock(&rdmacg_mutex); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit52100.00%1100.00%
Total52100.00%1100.00%

EXPORT_SYMBOL(rdmacg_register_device); /** * rdmacg_unregister_device - unregister rdmacg device from rdma controller. * @device: pointer to rdmacg device which was previously registered with rdma * controller using rdmacg_register_device(). * * IB stack must invoke this after all the resources of the IB device * are destroyed and after ensuring that no more resources will be created * when this API is invoked. */
void rdmacg_unregister_device(struct rdmacg_device *device) { struct rdmacg_resource_pool *rpool, *tmp; /* * Synchronize with any active resource settings, * usage query happening via configfs. */ mutex_lock(&rdmacg_mutex); list_del_init(&device->dev_node); /* * Now that this device is off the cgroup list, its safe to free * all the rpool resources. */ list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node) free_cg_rpool_locked(rpool); mutex_unlock(&rdmacg_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit55100.00%1100.00%
Total55100.00%1100.00%

EXPORT_SYMBOL(rdmacg_unregister_device);
static int parse_resource(char *c, int *intval) { substring_t argstr; const char **table = &rdmacg_resource_names[0]; char *name, *value = c; size_t len; int ret, i = 0; name = strsep(&value, "="); if (!name || !value) return -EINVAL; len = strlen(value); for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { if (strcmp(table[i], name)) continue; argstr.from = value; argstr.to = value + len; ret = match_int(&argstr, intval); if (ret >= 0) { if (*intval < 0) break; return i; } if (strncmp(value, RDMACG_MAX_STR, len) == 0) { *intval = S32_MAX; return i; } break; } return -EINVAL; }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit176100.00%1100.00%
Total176100.00%1100.00%


static int rdmacg_parse_limits(char *options, int *new_limits, unsigned long *enables) { char *c; int err = -EINVAL; /* parse resource options */ while ((c = strsep(&options, " ")) != NULL) { int index, intval; index = parse_resource(c, &intval); if (index < 0) goto err; new_limits[index] = intval; *enables |= BIT(index); } return 0; err: return err; }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit95100.00%1100.00%
Total95100.00%1100.00%


static struct rdmacg_device *rdmacg_get_device_locked(const char *name) { struct rdmacg_device *device; lockdep_assert_held(&rdmacg_mutex); list_for_each_entry(device, &rdmacg_devices, dev_node) if (!strcmp(name, device->name)) return device; return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit50100.00%1100.00%
Total50100.00%1100.00%


static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct rdma_cgroup *cg = css_rdmacg(of_css(of)); const char *dev_name; struct rdmacg_resource_pool *rpool; struct rdmacg_device *device; char *options = strstrip(buf); int *new_limits; unsigned long enables = 0; int i = 0, ret = 0; /* extract the device name first */ dev_name = strsep(&options, " "); if (!dev_name) { ret = -EINVAL; goto err; } new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL); if (!new_limits) { ret = -ENOMEM; goto err; } ret = rdmacg_parse_limits(options, new_limits, &enables); if (ret) goto parse_err; /* acquire lock to synchronize with hot plug devices */ mutex_lock(&rdmacg_mutex); device = rdmacg_get_device_locked(dev_name); if (!device) { ret = -ENODEV; goto dev_err; } rpool = get_cg_rpool_locked(cg, device); if (IS_ERR(rpool)) { ret = PTR_ERR(rpool); goto dev_err; } /* now set the new limits of the rpool */ for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX) set_resource_limit(rpool, i, new_limits[i]); if (rpool->usage_sum == 0 && rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { /* * No user of the rpool and all entries are set to max, so * safe to delete this rpool. */ free_cg_rpool_locked(rpool); } dev_err: mutex_unlock(&rdmacg_mutex); parse_err: kfree(new_limits); err: return ret ?: nbytes; }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit274100.00%1100.00%
Total274100.00%1100.00%


static void print_rpool_values(struct seq_file *sf, struct rdmacg_resource_pool *rpool) { enum rdmacg_file_type sf_type; int i; u32 value; sf_type = seq_cft(sf)->private; for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { seq_puts(sf, rdmacg_resource_names[i]); seq_putc(sf, '='); if (sf_type == RDMACG_RESOURCE_TYPE_MAX) { if (rpool) value = rpool->resources[i].max; else value = S32_MAX; } else { if (rpool) value = rpool->resources[i].usage; else value = 0; } if (value == S32_MAX) seq_puts(sf, RDMACG_MAX_STR); else seq_printf(sf, "%d", value); seq_putc(sf, ' '); } }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit148100.00%2100.00%
Total148100.00%2100.00%


static int rdmacg_resource_read(struct seq_file *sf, void *v) { struct rdmacg_device *device; struct rdmacg_resource_pool *rpool; struct rdma_cgroup *cg = css_rdmacg(seq_css(sf)); mutex_lock(&rdmacg_mutex); list_for_each_entry(device, &rdmacg_devices, dev_node) { seq_printf(sf, "%s ", device->name); rpool = find_cg_rpool_locked(cg, device); print_rpool_values(sf, rpool); seq_putc(sf, '\n'); } mutex_unlock(&rdmacg_mutex); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit97100.00%1100.00%
Total97100.00%1100.00%

static struct cftype rdmacg_files[] = { { .name = "max", .write = rdmacg_resource_set_max, .seq_show = rdmacg_resource_read, .private = RDMACG_RESOURCE_TYPE_MAX, .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "current", .seq_show = rdmacg_resource_read, .private = RDMACG_RESOURCE_TYPE_STAT, .flags = CFTYPE_NOT_ON_ROOT, }, { } /* terminate */ };
static struct cgroup_subsys_state * rdmacg_css_alloc(struct cgroup_subsys_state *parent) { struct rdma_cgroup *cg; cg = kzalloc(sizeof(*cg), GFP_KERNEL); if (!cg) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&cg->rpools); return &cg->css; }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit57100.00%1100.00%
Total57100.00%1100.00%


static void rdmacg_css_free(struct cgroup_subsys_state *css) { struct rdma_cgroup *cg = css_rdmacg(css); kfree(cg); }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit26100.00%1100.00%
Total26100.00%1100.00%

/** * rdmacg_css_offline - cgroup css_offline callback * @css: css of interest * * This function is called when @css is about to go away and responsible * for shooting down all rdmacg associated with @css. As part of that it * marks all the resource pool entries to max value, so that when resources are * uncharged, associated resource pool can be freed as well. */
static void rdmacg_css_offline(struct cgroup_subsys_state *css) { struct rdma_cgroup *cg = css_rdmacg(css); struct rdmacg_resource_pool *rpool; mutex_lock(&rdmacg_mutex); list_for_each_entry(rpool, &cg->rpools, cg_node) set_all_resource_max_limit(rpool); mutex_unlock(&rdmacg_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit51100.00%1100.00%
Total51100.00%1100.00%

struct cgroup_subsys rdma_cgrp_subsys = { .css_alloc = rdmacg_css_alloc, .css_free = rdmacg_css_free, .css_offline = rdmacg_css_offline, .legacy_cftypes = rdmacg_files, .dfl_cftypes = rdmacg_files, };

Overall Contributors

PersonTokensPropCommitsCommitProp
Parav Pandit2122100.00%2100.00%
Total2122100.00%2100.00%
Directory: kernel/cgroup
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with cregit.