Release 4.15 kernel/pid_namespace.c
/*
* Pid namespaces
*
* Authors:
* (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
* (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
* Many thanks to Oleg Nesterov for comments and help
*
*/
#include <linux/pid.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <linux/syscalls.h>
#include <linux/cred.h>
#include <linux/err.h>
#include <linux/acct.h>
#include <linux/slab.h>
#include <linux/proc_ns.h>
#include <linux/reboot.h>
#include <linux/export.h>
#include <linux/sched/task.h>
#include <linux/sched/signal.h>
#include <linux/idr.h>
struct pid_cache {
int nr_ids;
char name[16];
struct kmem_cache *cachep;
struct list_head list;
};
static LIST_HEAD(pid_caches_lh);
static DEFINE_MUTEX(pid_caches_mutex);
static struct kmem_cache *pid_ns_cachep;
/*
* creates the kmem cache to allocate pids from.
* @nr_ids: the number of numerical ids this pid will have to carry
*/
static struct kmem_cache *create_pid_cachep(int nr_ids)
{
struct pid_cache *pcache;
struct kmem_cache *cachep;
mutex_lock(&pid_caches_mutex);
list_for_each_entry(pcache, &pid_caches_lh, list)
if (pcache->nr_ids == nr_ids)
goto out;
pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
if (pcache == NULL)
goto err_alloc;
snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
cachep = kmem_cache_create(pcache->name,
sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
0, SLAB_HWCACHE_ALIGN, NULL);
if (cachep == NULL)
goto err_cachep;
pcache->nr_ids = nr_ids;
pcache->cachep = cachep;
list_add(&pcache->list, &pid_caches_lh);
out:
mutex_unlock(&pid_caches_mutex);
return pcache->cachep;
err_cachep:
kfree(pcache);
err_alloc:
mutex_unlock(&pid_caches_mutex);
return NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 182 | 100.00% | 1 | 100.00% |
Total | 182 | 100.00% | 1 | 100.00% |
static void proc_cleanup_work(struct work_struct *work)
{
struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
pid_ns_release_proc(ns);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Eric W. Biedermann | 31 | 100.00% | 1 | 100.00% |
Total | 31 | 100.00% | 1 | 100.00% |
/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
#define MAX_PID_NS_LEVEL 32
static struct ucounts *inc_pid_namespaces(struct user_namespace *ns)
{
return inc_ucount(ns, current_euid(), UCOUNT_PID_NAMESPACES);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Eric W. Biedermann | 24 | 100.00% | 1 | 100.00% |
Total | 24 | 100.00% | 1 | 100.00% |
static void dec_pid_namespaces(struct ucounts *ucounts)
{
dec_ucount(ucounts, UCOUNT_PID_NAMESPACES);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Eric W. Biedermann | 18 | 100.00% | 1 | 100.00% |
Total | 18 | 100.00% | 1 | 100.00% |
static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
struct pid_namespace *parent_pid_ns)
{
struct pid_namespace *ns;
unsigned int level = parent_pid_ns->level + 1;
struct ucounts *ucounts;
int err;
err = -EINVAL;
if (!in_userns(parent_pid_ns->user_ns, user_ns))
goto out;
err = -ENOSPC;
if (level > MAX_PID_NS_LEVEL)
goto out;
ucounts = inc_pid_namespaces(user_ns);
if (!ucounts)
goto out;
err = -ENOMEM;
ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
if (ns == NULL)
goto out_dec;
idr_init(&ns->idr);
ns->pid_cachep = create_pid_cachep(level + 1);
if (ns->pid_cachep == NULL)
goto out_free_idr;
err = ns_alloc_inum(&ns->ns);
if (err)
goto out_free_idr;
ns->ns.ops = &pidns_operations;
kref_init(&ns->kref);
ns->level = level;
ns->parent = get_pid_ns(parent_pid_ns);
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
ns->pid_allocated = PIDNS_ADDING;
INIT_WORK(&ns->proc_work, proc_cleanup_work);
return ns;
out_free_idr:
idr_destroy(&ns->idr);
kmem_cache_free(pid_ns_cachep, ns);
out_dec:
dec_pid_namespaces(ucounts);
out:
return ERR_PTR(err);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Eric W. Biedermann | 103 | 39.92% | 8 | 47.06% |
Pavel Emelyanov | 94 | 36.43% | 2 | 11.76% |
Alexey Dobriyan | 23 | 8.91% | 1 | 5.88% |
Andrey Vagin | 15 | 5.81% | 1 | 5.88% |
Gargi Sharma | 12 | 4.65% | 2 | 11.76% |
Al Viro | 11 | 4.26% | 3 | 17.65% |
Total | 258 | 100.00% | 17 | 100.00% |
static void delayed_free_pidns(struct rcu_head *p)
{
struct pid_namespace *ns = container_of(p, struct pid_namespace, rcu);
dec_pid_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
kmem_cache_free(pid_ns_cachep, ns);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Andrey Vagin | 25 | 53.19% | 1 | 50.00% |
Al Viro | 22 | 46.81% | 1 | 50.00% |
Total | 47 | 100.00% | 2 | 100.00% |
static void destroy_pid_namespace(struct pid_namespace *ns)
{
ns_free_inum(&ns->ns);
idr_destroy(&ns->idr);
call_rcu(&ns->rcu, delayed_free_pidns);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 20 | 54.05% | 1 | 16.67% |
Al Viro | 9 | 24.32% | 3 | 50.00% |
Eric W. Biedermann | 5 | 13.51% | 1 | 16.67% |
Gargi Sharma | 3 | 8.11% | 1 | 16.67% |
Total | 37 | 100.00% | 6 | 100.00% |
struct pid_namespace *copy_pid_ns(unsigned long flags,
struct user_namespace *user_ns, struct pid_namespace *old_ns)
{
if (!(flags & CLONE_NEWPID))
return get_pid_ns(old_ns);
if (task_active_pid_ns(current) != old_ns)
return ERR_PTR(-EINVAL);
return create_pid_namespace(user_ns, old_ns);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 33 | 55.00% | 1 | 25.00% |
Eric W. Biedermann | 21 | 35.00% | 2 | 50.00% |
Alexey Dobriyan | 6 | 10.00% | 1 | 25.00% |
Total | 60 | 100.00% | 4 | 100.00% |
static void free_pid_ns(struct kref *kref)
{
struct pid_namespace *ns;
ns = container_of(kref, struct pid_namespace, kref);
destroy_pid_namespace(ns);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 26 | 78.79% | 1 | 50.00% |
Cyrill V. Gorcunov | 7 | 21.21% | 1 | 50.00% |
Total | 33 | 100.00% | 2 | 100.00% |
void put_pid_ns(struct pid_namespace *ns)
{
struct pid_namespace *parent;
while (ns != &init_pid_ns) {
parent = ns->parent;
if (!kref_put(&ns->kref, free_pid_ns))
break;
ns = parent;
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Cyrill V. Gorcunov | 35 | 72.92% | 1 | 50.00% |
Pavel Emelyanov | 13 | 27.08% | 1 | 50.00% |
Total | 48 | 100.00% | 2 | 100.00% |
EXPORT_SYMBOL_GPL(put_pid_ns);
void zap_pid_ns_processes(struct pid_namespace *pid_ns)
{
int nr;
int rc;
struct task_struct *task, *me = current;
int init_pids = thread_group_leader(me) ? 1 : 2;
struct pid *pid;
/* Don't allow any more processes into the pid namespace */
disable_pid_allocation(pid_ns);
/*
* Ignore SIGCHLD causing any terminated children to autoreap.
* This speeds up the namespace shutdown, plus see the comment
* below.
*/
spin_lock_irq(&me->sighand->siglock);
me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
spin_unlock_irq(&me->sighand->siglock);
/*
* The last thread in the cgroup-init thread group is terminating.
* Find remaining pid_ts in the namespace, signal and wait for them
* to exit.
*
* Note: This signals each threads in the namespace - even those that
* belong to the same thread group, To avoid this, we would have
* to walk the entire tasklist looking a processes in this
* namespace, but that could be unnecessarily expensive if the
* pid namespace has just a few processes. Or we need to
* maintain a tasklist for each pid namespace.
*
*/
rcu_read_lock();
read_lock(&tasklist_lock);
nr = 2;
idr_for_each_entry_continue(&pid_ns->idr, pid, nr) {
task = pid_task(pid, PIDTYPE_PID);
if (task && !__fatal_signal_pending(task))
send_sig_info(SIGKILL, SEND_SIG_FORCED, task);
}
read_unlock(&tasklist_lock);
rcu_read_unlock();
/*
* Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD.
* sys_wait4() will also block until our children traced from the
* parent namespace are detached and become EXIT_DEAD.
*/
do {
clear_thread_flag(TIF_SIGPENDING);
rc = sys_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);
/*
* sys_wait4() above can't reap the EXIT_DEAD children but we do not
* really care, we could reparent them to the global init. We could
* exit and reap ->child_reaper even if it is not the last thread in
* this pid_ns, free_pid(pid_allocated == 0) calls proc_cleanup_work(),
* pid_ns can not go away until proc_kill_sb() drops the reference.
*
* But this ns can also have other tasks injected by setns()+fork().
* Again, ignoring the user visible semantics we do not really need
* to wait until they are all reaped, but they can be reparented to
* us and thus we need to ensure that pid->child_reaper stays valid
* until they all go away. See free_pid()->wake_up_process().
*
* We rely on ignored SIGCHLD, an injected zombie must be autoreaped
* if reparented.
*/
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (pid_ns->pid_allocated == init_pids)
break;
schedule();
}
__set_current_state(TASK_RUNNING);
if (pid_ns->reboot)
current->signal->group_exit_code = pid_ns->reboot;
acct_exit_ns(pid_ns);
return;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Eric W. Biedermann | 88 | 38.10% | 6 | 40.00% |
Pavel Emelyanov | 77 | 33.33% | 2 | 13.33% |
Gargi Sharma | 21 | 9.09% | 2 | 13.33% |
Sukadev Bhattiprolu | 18 | 7.79% | 1 | 6.67% |
Daniel Lezcano | 16 | 6.93% | 1 | 6.67% |
Oleg Nesterov | 11 | 4.76% | 3 | 20.00% |
Total | 231 | 100.00% | 15 | 100.00% |
#ifdef CONFIG_CHECKPOINT_RESTORE
static int pid_ns_ctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct pid_namespace *pid_ns = task_active_pid_ns(current);
struct ctl_table tmp = *table;
int ret, next;
if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
/*
* Writing directly to ns' last_pid field is OK, since this field
* is volatile in a living namespace anyway and a code writing to
* it should synchronize its usage with external means.
*/
next = idr_get_cursor(&pid_ns->idr) - 1;
tmp.data = &next;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (!ret && write)
idr_set_cursor(&pid_ns->idr, next + 1);
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 67 | 53.60% | 1 | 25.00% |
Gargi Sharma | 42 | 33.60% | 1 | 25.00% |
Eric W. Biedermann | 15 | 12.00% | 1 | 25.00% |
Andrey Vagin | 1 | 0.80% | 1 | 25.00% |
Total | 125 | 100.00% | 4 | 100.00% |
extern int pid_max;
static int zero = 0;
static struct ctl_table pid_ns_ctl_table[] = {
{
.procname = "ns_last_pid",
.maxlen = sizeof(int),
.mode = 0666, /* permissions are checked in the handler */
.proc_handler = pid_ns_ctl_handler,
.extra1 = &zero,
.extra2 = &pid_max,
},
{ }
};
static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
#endif /* CONFIG_CHECKPOINT_RESTORE */
int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
{
if (pid_ns == &init_pid_ns)
return 0;
switch (cmd) {
case LINUX_REBOOT_CMD_RESTART2:
case LINUX_REBOOT_CMD_RESTART:
pid_ns->reboot = SIGHUP;
break;
case LINUX_REBOOT_CMD_POWER_OFF:
case LINUX_REBOOT_CMD_HALT:
pid_ns->reboot = SIGINT;
break;
default:
return -EINVAL;
}
read_lock(&tasklist_lock);
force_sig(SIGKILL, pid_ns->child_reaper);
read_unlock(&tasklist_lock);
do_exit(0);
/* Not reached */
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Daniel Lezcano | 90 | 100.00% | 1 | 100.00% |
Total | 90 | 100.00% | 1 | 100.00% |
static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
{
return container_of(ns, struct pid_namespace, ns);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Al Viro | 25 | 100.00% | 1 | 100.00% |
Total | 25 | 100.00% | 1 | 100.00% |
static struct ns_common *pidns_get(struct task_struct *task)
{
struct pid_namespace *ns;
rcu_read_lock();
ns = task_active_pid_ns(task);
if (ns)
get_pid_ns(ns);
rcu_read_unlock();
return ns ? &ns->ns : NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Eric W. Biedermann | 33 | 66.00% | 1 | 25.00% |
Al Viro | 9 | 18.00% | 2 | 50.00% |
Oleg Nesterov | 8 | 16.00% | 1 | 25.00% |
Total | 50 | 100.00% | 4 | 100.00% |
static struct ns_common *pidns_for_children_get(struct task_struct *task)
{
struct pid_namespace *ns = NULL;
task_lock(task);
if (task->nsproxy) {
ns = task->nsproxy->pid_ns_for_children;
get_pid_ns(ns);
}
task_unlock(task);
if (ns) {
read_lock(&tasklist_lock);
if (!ns->child_reaper) {
put_pid_ns(ns);
ns = NULL;
}
read_unlock(&tasklist_lock);
}
return ns ? &ns->ns : NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Kirill Tkhai | 97 | 100.00% | 1 | 100.00% |
Total | 97 | 100.00% | 1 | 100.00% |
static void pidns_put(struct ns_common *ns)
{
put_pid_ns(to_pid_ns(ns));
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Eric W. Biedermann | 14 | 73.68% | 1 | 33.33% |
Al Viro | 5 | 26.32% | 2 | 66.67% |
Total | 19 | 100.00% | 3 | 100.00% |
static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
{
struct pid_namespace *active = task_active_pid_ns(current);
struct pid_namespace *ancestor, *new = to_pid_ns(ns);
if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
/*
* Only allow entering the current active pid namespace
* or a child of the current active pid namespace.
*
* This is required for fork to return a usable pid value and
* this maintains the property that processes and their
* children can not escape their current pid namespace.
*/
if (new->level < active->level)
return -EINVAL;
ancestor = new;
while (ancestor->level > active->level)
ancestor = ancestor->parent;
if (ancestor != active)
return -EINVAL;
put_pid_ns(nsproxy->pid_ns_for_children);
nsproxy->pid_ns_for_children = get_pid_ns(new);
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Eric W. Biedermann | 121 | 94.53% | 3 | 50.00% |
Al Viro | 5 | 3.91% | 2 | 33.33% |
Andrew Lutomirski | 2 | 1.56% | 1 | 16.67% |
Total | 128 | 100.00% | 6 | 100.00% |
static struct ns_common *pidns_get_parent(struct ns_common *ns)
{
struct pid_namespace *active = task_active_pid_ns(current);
struct pid_namespace *pid_ns, *p;
/* See if the parent is in the current namespace */
pid_ns = p = to_pid_ns(ns)->parent;
for (;;) {
if (!p)
return ERR_PTR(-EPERM);
if (p == active)
break;
p = p->parent;
}
return &get_pid_ns(pid_ns)->ns;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Andrey Vagin | 84 | 100.00% | 1 | 100.00% |
Total | 84 | 100.00% | 1 | 100.00% |
static struct user_namespace *pidns_owner(struct ns_common *ns)
{
return to_pid_ns(ns)->user_ns;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Andrey Vagin | 21 | 100.00% | 1 | 100.00% |
Total | 21 | 100.00% | 1 | 100.00% |
const struct proc_ns_operations pidns_operations = {
.name = "pid",
.type = CLONE_NEWPID,
.get = pidns_get,
.put = pidns_put,
.install = pidns_install,
.owner = pidns_owner,
.get_parent = pidns_get_parent,
};
const struct proc_ns_operations pidns_for_children_operations = {
.name = "pid_for_children",
.real_ns_name = "pid",
.type = CLONE_NEWPID,
.get = pidns_for_children_get,
.put = pidns_put,
.install = pidns_install,
.owner = pidns_owner,
.get_parent = pidns_get_parent,
};
static __init int pid_namespaces_init(void)
{
pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
#ifdef CONFIG_CHECKPOINT_RESTORE
register_sysctl_paths(kern_path, pid_ns_ctl_table);
#endif
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 28 | 84.85% | 2 | 66.67% |
Cyrill V. Gorcunov | 5 | 15.15% | 1 | 33.33% |
Total | 33 | 100.00% | 3 | 100.00% |
__initcall(pid_namespaces_init);
Overall Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 656 | 34.33% | 4 | 7.69% |
Eric W. Biedermann | 517 | 27.05% | 18 | 34.62% |
Andrey Vagin | 183 | 9.58% | 5 | 9.62% |
Kirill Tkhai | 144 | 7.54% | 1 | 1.92% |
Daniel Lezcano | 109 | 5.70% | 1 | 1.92% |
Al Viro | 86 | 4.50% | 6 | 11.54% |
Gargi Sharma | 81 | 4.24% | 2 | 3.85% |
Cyrill V. Gorcunov | 54 | 2.83% | 2 | 3.85% |
Alexey Dobriyan | 29 | 1.52% | 2 | 3.85% |
Oleg Nesterov | 19 | 0.99% | 4 | 7.69% |
Sukadev Bhattiprolu | 18 | 0.94% | 1 | 1.92% |
Ingo Molnar | 9 | 0.47% | 3 | 5.77% |
Tejun Heo | 3 | 0.16% | 1 | 1.92% |
Andrew Lutomirski | 2 | 0.10% | 1 | 1.92% |
David Howells | 1 | 0.05% | 1 | 1.92% |
Total | 1911 | 100.00% | 52 | 100.00% |
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.