cregit-Linux how code gets into the kernel

Release 4.15 kernel/fork.c

Directory: kernel
/*
 *  linux/kernel/fork.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 *  'fork.c' contains the help-routines for the 'fork' system call
 * (see also entry.S and others).
 * Fork is rather simple, once you get the hang of it, but the memory
 * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
 */

#include <linux/slab.h>
#include <linux/sched/autogroup.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/sched/user.h>
#include <linux/sched/numa_balancing.h>
#include <linux/sched/stat.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/cputime.h>
#include <linux/rtmutex.h>
#include <linux/init.h>
#include <linux/unistd.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/completion.h>
#include <linux/personality.h>
#include <linux/mempolicy.h>
#include <linux/sem.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/iocontext.h>
#include <linux/key.h>
#include <linux/binfmts.h>
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/hmm.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/vmacache.h>
#include <linux/nsproxy.h>
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/cgroup.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/seccomp.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/jiffies.h>
#include <linux/futex.h>
#include <linux/compat.h>
#include <linux/kthread.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/rcupdate.h>
#include <linux/ptrace.h>
#include <linux/mount.h>
#include <linux/audit.h>
#include <linux/memcontrol.h>
#include <linux/ftrace.h>
#include <linux/proc_fs.h>
#include <linux/profile.h>
#include <linux/rmap.h>
#include <linux/ksm.h>
#include <linux/acct.h>
#include <linux/userfaultfd_k.h>
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/freezer.h>
#include <linux/delayacct.h>
#include <linux/taskstats_kern.h>
#include <linux/random.h>
#include <linux/tty.h>
#include <linux/blkdev.h>
#include <linux/fs_struct.h>
#include <linux/magic.h>
#include <linux/perf_event.h>
#include <linux/posix-timers.h>
#include <linux/user-return-notifier.h>
#include <linux/oom.h>
#include <linux/khugepaged.h>
#include <linux/signalfd.h>
#include <linux/uprobes.h>
#include <linux/aio.h>
#include <linux/compiler.h>
#include <linux/sysctl.h>
#include <linux/kcov.h>
#include <linux/livepatch.h>
#include <linux/thread_info.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>

#include <trace/events/sched.h>


#define CREATE_TRACE_POINTS
#include <trace/events/task.h>

/*
 * Minimum number of threads to boot the kernel
 */

#define MIN_THREADS 20

/*
 * Maximum number of threads
 */

#define MAX_THREADS FUTEX_TID_MASK

/*
 * Protected counters by write_lock_irq(&tasklist_lock)
 */

unsigned long total_forks;	
/* Handle normal Linux uptimes. */

int nr_threads;			
/* The idle threads do not count.. */


int max_threads;		
/* tunable limit on nr_threads */

DEFINE_PER_CPU(unsigned long, process_counts) = 0;

__cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */

#ifdef CONFIG_PROVE_RCU

int lockdep_tasklist_lock_is_held(void) { return lockdep_is_held(&tasklist_lock); }

Contributors

PersonTokensPropCommitsCommitProp
Paul E. McKenney14100.00%1100.00%
Total14100.00%1100.00%

EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); #endif /* #ifdef CONFIG_PROVE_RCU */
int nr_processes(void) { int cpu; int total = 0; for_each_possible_cpu(cpu) total += per_cpu(process_counts, cpu); return total; }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Morton3096.77%266.67%
Ian Campbell13.23%133.33%
Total31100.00%3100.00%


void __weak arch_release_task_struct(struct task_struct *tsk) { }

Contributors

PersonTokensPropCommitsCommitProp
Akinobu Mita10100.00%1100.00%
Total10100.00%1100.00%

#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR static struct kmem_cache *task_struct_cachep;
static inline struct task_struct *alloc_task_struct_node(int node) { return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); }

Contributors

PersonTokensPropCommitsCommitProp
Thomas Gleixner1881.82%150.00%
Eric Dumazet418.18%150.00%
Total22100.00%2100.00%


static inline void free_task_struct(struct task_struct *tsk) { kmem_cache_free(task_struct_cachep, tsk); }

Contributors

PersonTokensPropCommitsCommitProp
Thomas Gleixner1578.95%133.33%
David Mosberger-Tang315.79%133.33%
Christoph Lameter15.26%133.33%
Total19100.00%3100.00%

#endif
void __weak arch_release_thread_stack(unsigned long *stack) { }

Contributors

PersonTokensPropCommitsCommitProp
Thomas Gleixner660.00%150.00%
Linus Torvalds440.00%150.00%
Total10100.00%2100.00%

#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a * kmemcache based allocator. */ # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) #ifdef CONFIG_VMAP_STACK /* * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB * flush. Try to minimize the number of calls by caching stacks. */ #define NR_CACHED_STACKS 2 static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
static int free_vm_stack_cache(unsigned int cpu) { struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu); int i; for (i = 0; i < NR_CACHED_STACKS; i++) { struct vm_struct *vm_stack = cached_vm_stacks[i]; if (!vm_stack) continue; vfree(vm_stack->addr); cached_vm_stacks[i] = NULL; } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Hoeun Ryu74100.00%1100.00%
Total74100.00%1100.00%

#endif
static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { #ifdef CONFIG_VMAP_STACK void *stack; int i; for (i = 0; i < NR_CACHED_STACKS; i++) { struct vm_struct *s; s = this_cpu_xchg(cached_stacks[i], NULL); if (!s) continue; #ifdef CONFIG_DEBUG_KMEMLEAK /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); #endif tsk->stack_vm_area = s; return s->addr; } stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START, VMALLOC_END, THREADINFO_GFP, PAGE_KERNEL, 0, node, __builtin_return_address(0)); /* * We can't call find_vm_area() in interrupt context, and * free_thread_stack() can be called in interrupt context, * so cache the vm_struct. */ if (stack) tsk->stack_vm_area = find_vm_area(stack); return stack; #else struct page *page = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); return page ? page_address(page) : NULL; #endif }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski9860.12%220.00%
Eric Dumazet2012.27%110.00%
Konstantin Khlebnikov1710.43%110.00%
FUJITA Tomonori159.20%110.00%
Christoph Lameter74.29%110.00%
Linus Torvalds31.84%110.00%
Vladimir Davydov21.23%220.00%
Mark Rutland10.61%110.00%
Total163100.00%10100.00%


static inline void free_thread_stack(struct task_struct *tsk) { #ifdef CONFIG_VMAP_STACK if (task_stack_vm_area(tsk)) { int i; for (i = 0; i < NR_CACHED_STACKS; i++) { if (this_cpu_cmpxchg(cached_stacks[i], NULL, tsk->stack_vm_area) != NULL) continue; return; } vfree_atomic(tsk->stack); return; } #endif __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski5970.24%337.50%
FUJITA Tomonori1214.29%112.50%
Christoph Lameter78.33%112.50%
Vladimir Davydov33.57%112.50%
Linus Torvalds22.38%112.50%
Andrey Ryabinin11.19%112.50%
Total84100.00%8100.00%

# else static struct kmem_cache *thread_stack_cache;
static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); }

Contributors

PersonTokensPropCommitsCommitProp
Thomas Gleixner2284.62%133.33%
Linus Torvalds27.69%133.33%
Michael Ellerman27.69%133.33%
Total26100.00%3100.00%


static void free_thread_stack(struct task_struct *tsk) { kmem_cache_free(thread_stack_cache, tsk->stack); }

Contributors

PersonTokensPropCommitsCommitProp
Thomas Gleixner1260.00%125.00%
Andrew Lutomirski525.00%125.00%
Linus Torvalds210.00%125.00%
Michael Ellerman15.00%125.00%
Total20100.00%4100.00%


void thread_stack_cache_init(void) { thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, THREAD_SIZE, 0, NULL); BUG_ON(thread_stack_cache == NULL); }

Contributors

PersonTokensPropCommitsCommitProp
Thomas Gleixner2586.21%150.00%
Linus Torvalds413.79%150.00%
Total29100.00%2100.00%

# endif #endif /* SLAB cache for signal_struct structures (tsk->signal) */ static struct kmem_cache *signal_cachep; /* SLAB cache for sighand_struct structures (tsk->sighand) */ struct kmem_cache *sighand_cachep; /* SLAB cache for files_struct structures (tsk->files) */ struct kmem_cache *files_cachep; /* SLAB cache for fs_struct structures (tsk->fs) */ struct kmem_cache *fs_cachep; /* SLAB cache for vm_area_struct structures */ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep;
static void account_kernel_stack(struct task_struct *tsk, int account) { void *stack = task_stack_page(tsk); struct vm_struct *vm = task_stack_vm_area(tsk); BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); if (vm) { int i; BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { mod_zone_page_state(page_zone(vm->pages[i]), NR_KERNEL_STACK_KB, PAGE_SIZE / 1024 * account); } /* All stack pages belong to the same memcg. */ mod_memcg_page_state(vm->pages[0], MEMCG_KERNEL_STACK_KB, account * (THREAD_SIZE / 1024)); } else { /* * All stack pages are in the same zone and belong to the * same memcg. */ struct page *first_page = virt_to_page(stack); mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, THREAD_SIZE / 1024 * account); mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB, account * (THREAD_SIZE / 1024)); } }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski14383.63%350.00%
Motohiro Kosaki2514.62%116.67%
Johannes Weiner21.17%116.67%
Linus Torvalds10.58%116.67%
Total171100.00%6100.00%


static void release_task_stack(struct task_struct *tsk) { if (WARN_ON(tsk->state != TASK_DEAD)) return; /* Better to leak the stack than to free prematurely */ account_kernel_stack(tsk, -1); arch_release_thread_stack(tsk->stack); free_thread_stack(tsk); tsk->stack = NULL; #ifdef CONFIG_VMAP_STACK tsk->stack_vm_area = NULL; #endif }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski3354.10%233.33%
Ingo Molnar1219.67%116.67%
Motohiro Kosaki813.11%116.67%
Akinobu Mita69.84%116.67%
Linus Torvalds23.28%116.67%
Total61100.00%6100.00%

#ifdef CONFIG_THREAD_INFO_IN_TASK
void put_task_stack(struct task_struct *tsk) { if (atomic_dec_and_test(&tsk->stack_refcount)) release_task_stack(tsk); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski25100.00%1100.00%
Total25100.00%1100.00%

#endif
void free_task(struct task_struct *tsk) { #ifndef CONFIG_THREAD_INFO_IN_TASK /* * The task is finally done with both the stack and thread_info, * so free both. */ release_task_stack(tsk); #else /* * If the task had a separate stack allocation, it should be gone * by now. */ WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0); #endif rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); put_seccomp_filter(tsk); arch_release_task_struct(tsk); if (tsk->flags & PF_KTHREAD) free_kthread_struct(tsk); free_task_struct(tsk); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski3648.00%111.11%
Oleg Nesterov1317.33%111.11%
Ingo Molnar1013.33%222.22%
Frédéric Weisbecker56.67%222.22%
Akinobu Mita56.67%111.11%
Will Drewry56.67%111.11%
David Mosberger-Tang11.33%111.11%
Total75100.00%9100.00%

EXPORT_SYMBOL(free_task);
static inline void free_signal_struct(struct signal_struct *sig) { taskstats_tgid_free(sig); sched_autogroup_exit(sig); /* * __mmdrop is not safe to call from softirq context on x86 due to * pgd_dtor so postpone it to the async context */ if (sig->oom_mm) mmdrop_async(sig->oom_mm); kmem_cache_free(signal_cachep, sig); }

Contributors

PersonTokensPropCommitsCommitProp
Oleg Nesterov2455.81%240.00%
Michal Hocko1432.56%240.00%
Mike Galbraith511.63%120.00%
Total43100.00%5100.00%


static inline void put_signal_struct(struct signal_struct *sig) { if (atomic_dec_and_test(&sig->sigcnt)) free_signal_struct(sig); }

Contributors

PersonTokensPropCommitsCommitProp
Oleg Nesterov27100.00%1100.00%
Total27100.00%1100.00%


void __put_task_struct(struct task_struct *tsk) { WARN_ON(!tsk->exit_state); WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); cgroup_free(tsk); task_numa_free(tsk); security_task_free(tsk); exit_creds(tsk); delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); if (!profile_handoff_task(tsk)) free_task(tsk); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds3543.21%17.69%
John Levon89.88%17.69%
Oleg Nesterov78.64%17.69%
Shailabh Nagar56.17%17.69%
Tejun Heo56.17%17.69%
Kees Cook56.17%17.69%
Andrew Morton56.17%215.38%
Mike Galbraith56.17%17.69%
Christoph Hellwig33.70%17.69%
David Howells11.23%17.69%
David Mosberger-Tang11.23%17.69%
Ingo Molnar11.23%17.69%
Total81100.00%13100.00%

EXPORT_SYMBOL_GPL(__put_task_struct);
void __init __weak arch_task_cache_init(void) { }

Contributors

PersonTokensPropCommitsCommitProp
Thomas Gleixner787.50%150.00%
Suresh B. Siddha112.50%150.00%
Total8100.00%2100.00%

/* * set_max_threads */
static void set_max_threads(unsigned int max_threads_suggested) { u64 threads; /* * The number of threads shall be limited such that the thread * structures may only consume a small part of the available memory. */ if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) threads = MAX_THREADS; else threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, (u64) THREAD_SIZE * 8UL); if (threads > max_threads_suggested) threads = max_threads_suggested; max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); }

Contributors

PersonTokensPropCommitsCommitProp
Heinrich Schuchardt78100.00%3100.00%
Total78100.00%3100.00%

#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT /* Initialized by the architecture: */ int arch_task_struct_size __read_mostly; #endif
void __init fork_init(void) { int i; #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN #define ARCH_MIN_TASKALIGN 0 #endif int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); /* create a slab on which task_structs can be allocated */ task_struct_cachep = kmem_cache_create("task_struct", arch_task_struct_size, align, SLAB_PANIC|SLAB_ACCOUNT, NULL); #endif /* do the arch specific task caches init */ arch_task_cache_init(); set_max_threads(MAX_THREADS); init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC]; for (i = 0; i < UCOUNT_COUNTS; i++) { init_user_ns.ucount_max[i] = max_threads/2; } #ifdef CONFIG_VMAP_STACK cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", NULL, free_vm_stack_cache); #endif lockdep_init_task(&init_task); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)3421.25%519.23%
Eric W. Biedermann2817.50%311.54%
Roland McGrath2213.75%27.69%
Hoeun Ryu1610.00%13.85%
Peter Zijlstra148.75%27.69%
David Howells138.12%13.85%
Andrew Morton95.62%27.69%
Byungchul Park63.75%13.85%
Heinrich Schuchardt53.12%27.69%
Suresh B. Siddha42.50%13.85%
David Mosberger-Tang42.50%13.85%
Vegard Nossum10.62%13.85%
Thomas Gleixner10.62%13.85%
Alan Cox10.62%13.85%
Ingo Molnar10.62%13.85%
Vladimir Davydov10.62%13.85%
Total160100.00%26100.00%


int __weak arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Suresh B. Siddha1768.00%150.00%
Gideon Israel Dsouza832.00%150.00%
Total25100.00%2100.00%


void set_task_stack_end_magic(struct task_struct *tsk) { unsigned long *stackend; stackend = end_of_stack(tsk); *stackend = STACK_END_MAGIC; /* for overflow detection */ }

Contributors

PersonTokensPropCommitsCommitProp
Aaron Tomlin28100.00%1100.00%
Total28100.00%1100.00%


static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; unsigned long *stack; struct vm_struct *stack_vm_area; int err; if (node == NUMA_NO_NODE) node = tsk_fork_get_node(orig); tsk = alloc_task_struct_node(node); if (!tsk) return NULL; stack = alloc_thread_stack_node(tsk, node); if (!stack) goto free_tsk; stack_vm_area = task_stack_vm_area(tsk); err = arch_dup_task_struct(tsk, orig); /* * arch_dup_task_struct() clobbers the stack-related fields. Make * sure they're properly initialized before using any stack-related * functions again. */ tsk->stack = stack; #ifdef CONFIG_VMAP_STACK tsk->stack_vm_area = stack_vm_area; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK atomic_set(&tsk->stack_refcount, 1); #endif if (err) goto free_stack; #ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under * the sighand lock in case orig has changed between now and * then. Until then, filter must be NULL to avoid messing up * the usage counts on the error path calling free_task. */ tsk->seccomp.filter = NULL; #endif setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); set_task_stack_end_magic(tsk); #ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_canary(); #endif /* * One for us, one for whoever does the "release_task()" (usually * parent) */ atomic_set(&tsk->usage, 2); #ifdef CONFIG_BLK_DEV_IO_TRACE tsk->btrace_seq = 0; #endif tsk->splice_pipe = NULL; tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; account_kernel_stack(tsk, 1); kcov_task_init(tsk); #ifdef CONFIG_FAULT_INJECTION tsk->fail_nth = 0; #endif return tsk; free_stack: free_thread_stack(tsk); free_tsk: free_task_struct(tsk); return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
David Howells5219.26%13.03%
Andrew Lutomirski4717.41%26.06%
Eric Dumazet197.04%412.12%
Suresh B. Siddha186.67%13.03%
Dmitriy Vyukov165.93%26.06%
Kees Cook145.19%13.03%
Andi Kleen134.81%13.03%
Jens Axboe124.44%26.06%
Arjan van de Ven114.07%13.03%
Linus Torvalds103.70%26.06%
Sebastian Andrzej Siewior82.96%13.03%
Andrew Morton62.22%13.03%
Motohiro Kosaki62.22%13.03%
Mike Galbraith51.85%13.03%
Akinobu Mita51.85%13.03%
Alexey Dobriyan51.85%13.03%
Al Viro51.85%13.03%
Avi Kivity51.85%13.03%
Eric Sandeen41.48%13.03%
Ingo Molnar20.74%13.03%
David Mosberger-Tang20.74%13.03%
Peter Zijlstra10.37%13.03%
Aaron Tomlin10.37%13.03%
Rik Van Riel10.37%13.03%
Stephen Rothwell10.37%13.03%
Daniel Rebelo de Oliveira10.37%13.03%
Total270100.00%33100.00%

#ifdef CONFIG_MMU
static __latent_entropy int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) { struct vm_area_struct *mpnt, *tmp, *prev, **pprev; struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; LIST_HEAD(uf); uprobe_start_dup_mmap(); if (down_write_killable(&oldmm->mmap_sem)) { retval = -EINTR; goto fail_uprobe_end; } flush_cache_dup_mm(oldmm); uprobe_dup_mmap(oldmm, mm); /* * Not linked in yet - no deadlock potential: */ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); /* No ordering required: file already has been exposed. */ RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); mm->total_vm = oldmm->total_vm; mm->data_vm = oldmm->data_vm; mm->exec_vm = oldmm->exec_vm; mm->stack_vm = oldmm->stack_vm; rb_link = &mm->mm_rb.rb_node; rb_parent = NULL; pprev = &mm->mmap; retval = ksm_fork(mm, oldmm); if (retval) goto out; retval = khugepaged_fork(mm, oldmm); if (retval) goto out; prev = NULL; for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { struct file *file; if (mpnt->vm_flags & VM_DONTCOPY) { vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); continue; } charge = 0; if (mpnt->vm_flags & VM_ACCOUNT) { unsigned long len = vma_pages(mpnt); if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ goto fail_nomem; charge = len; } tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!tmp) goto fail_nomem; *tmp = *mpnt; INIT_LIST_HEAD(&tmp->anon_vma_chain); retval = vma_dup_policy(mpnt, tmp); if (retval) goto fail_nomem_policy; tmp->vm_mm = mm; retval = dup_userfaultfd(tmp, &uf); if (retval) goto fail_nomem_anon_vma_fork; if (tmp->vm_flags & VM_WIPEONFORK) { /* VM_WIPEONFORK gets a clean slate in the child. */ tmp->anon_vma = NULL; if (anon_vma_prepare(tmp)) goto fail_nomem_anon_vma_fork; } else if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); tmp->vm_next = tmp->vm_prev = NULL; file = tmp->vm_file; if (file) { struct inode *inode = file_inode(file); struct address_space *mapping = file->f_mapping; get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); i_mmap_lock_write(mapping); if (tmp->vm_flags & VM_SHARED) atomic_inc(&mapping->i_mmap_writable); flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ vma_interval_tree_insert_after(tmp, mpnt, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); i_mmap_unlock_write(mapping); } /* * Clear hugetlb-related page reserves for children. This only * affects MAP_PRIVATE mappings. Faults generated by the child * are not guaranteed to succeed, even if read-only */ if (is_vm_hugetlb_page(tmp)) reset_vma_resv_huge_pages(tmp); /* * Link in the new vma and copy the page table entries. */ *pprev = tmp; pprev = &tmp->vm_next; tmp->vm_prev = prev; prev = tmp; __vma_link_rb(mm, tmp, rb_link, rb_parent); rb_link = &tmp->vm_rb.rb_right; rb_parent = &tmp->vm_rb; mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) retval = copy_page_range(mm, oldmm, mpnt); if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); if (retval) goto out; } /* a new mm has just been created */ retval = arch_dup_mmap(oldmm, mm); out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); up_write(&oldmm->mmap_sem); dup_userfaultfd_complete(&uf); fail_uprobe_end: uprobe_end_dup_mmap(); return retval; fail_nomem_anon_vma_fork: mpol_put(vma_policy(tmp)