cregit-Linux how code gets into the kernel

Release 4.11 fs/userfaultfd.c

Directory: fs
/*
 *  fs/userfaultfd.c
 *
 *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
 *  Copyright (C) 2008-2009 Red Hat, Inc.
 *  Copyright (C) 2015  Red Hat, Inc.
 *
 *  This work is licensed under the terms of the GNU GPL, version 2. See
 *  the COPYING file in the top-level directory.
 *
 *  Some part derived from fs/eventfd.c (anon inode setup) and
 *  mm/ksm.c (mm hashing).
 */

#include <linux/list.h>
#include <linux/hashtable.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/mm.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/file.h>
#include <linux/bug.h>
#include <linux/anon_inodes.h>
#include <linux/syscalls.h>
#include <linux/userfaultfd_k.h>
#include <linux/mempolicy.h>
#include <linux/ioctl.h>
#include <linux/security.h>
#include <linux/hugetlb.h>


static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;


enum userfaultfd_state {
	
UFFD_STATE_WAIT_API,
	
UFFD_STATE_RUNNING,
};

/*
 * Start with fault_pending_wqh and fault_wqh so they're more likely
 * to be in the same cacheline.
 */

struct userfaultfd_ctx {
	/* waitqueue head for the pending (i.e. not read) userfaults */
	
wait_queue_head_t fault_pending_wqh;
	/* waitqueue head for the userfaults */
	
wait_queue_head_t fault_wqh;
	/* waitqueue head for the pseudo fd to wakeup poll/read */
	
wait_queue_head_t fd_wqh;
	/* waitqueue head for events */
	
wait_queue_head_t event_wqh;
	/* a refile sequence protected by fault_pending_wqh lock */
	
struct seqcount refile_seq;
	/* pseudo fd refcounting */
	
atomic_t refcount;
	/* userfaultfd syscall flags */
	
unsigned int flags;
	/* features requested from the userspace */
	
unsigned int features;
	/* state machine */
	
enum userfaultfd_state state;
	/* released */
	
bool released;
	/* mm with one ore more vmas attached to this userfaultfd_ctx */
	
struct mm_struct *mm;
};


struct userfaultfd_fork_ctx {
	
struct userfaultfd_ctx *orig;
	
struct userfaultfd_ctx *new;
	
struct list_head list;
};


struct userfaultfd_unmap_ctx {
	
struct userfaultfd_ctx *ctx;
	
unsigned long start;
	
unsigned long end;
	
struct list_head list;
};


struct userfaultfd_wait_queue {
	
struct uffd_msg msg;
	
wait_queue_t wq;
	
struct userfaultfd_ctx *ctx;
	
bool waken;
};


struct userfaultfd_wake_range {
	
unsigned long start;
	
unsigned long len;
};


static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode, int wake_flags, void *key) { struct userfaultfd_wake_range *range = key; int ret; struct userfaultfd_wait_queue *uwq; unsigned long start, len; uwq = container_of(wq, struct userfaultfd_wait_queue, wq); ret = 0; /* len == 0 means wake all */ start = range->start; len = range->len; if (len && (start > uwq->msg.arg.pagefault.address || start + len <= uwq->msg.arg.pagefault.address)) goto out; WRITE_ONCE(uwq->waken, true); /* * The implicit smp_mb__before_spinlock in try_to_wake_up() * renders uwq->waken visible to other CPUs before the task is * waken. */ ret = wake_up_state(wq->private, mode); if (ret) /* * Wake only once, autoremove behavior. * * After the effect of list_del_init is visible to the * other CPUs, the waitqueue may disappear from under * us, see the !list_empty_careful() in * handle_userfault(). try_to_wake_up() has an * implicit smp_mb__before_spinlock, and the * wq->private is read before calling the extern * function "wake_up_state" (which in turns calls * try_to_wake_up). While the spin_lock;spin_unlock; * wouldn't be enough, the smp_mb__before_spinlock is * enough to avoid an explicit smp_mb() here. */ list_del_init(&wq->task_list); out: return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Andrea Arcangeli144100.00%3100.00%
Total144100.00%3100.00%

/** * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd * context. * @ctx: [in] Pointer to the userfaultfd context. */
static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) { if (!atomic_inc_not_zero(&ctx->refcount)) BUG(); }

Contributors

PersonTokensPropCommitsCommitProp
Andrea Arcangeli25100.00%1100.00%
Total25100.00%1100.00%

/** * userfaultfd_ctx_put - Releases a reference to the internal userfaultfd * context. * @ctx: [in] Pointer to userfaultfd context. * * The userfaultfd context reference must have been previously acquired either * with userfaultfd_ctx_get() or userfaultfd_ctx_fdget(). */
static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx) { if (atomic_dec_and_test(&ctx->refcount)) { VM_BUG_ON(spin_is_locked(&ctx->fault_pending_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh)); VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->fault_wqh)); VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->event_wqh)); VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->fd_wqh)); mmdrop(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); } }

Contributors

PersonTokensPropCommitsCommitProp
Andrea Arcangeli10881.20%250.00%
Pavel Emelyanov2418.05%125.00%
Oleg Nesterov10.75%125.00%
Total133100.00%4100.00%


static inline void msg_init(struct uffd_msg *msg) { BUILD_BUG_ON(sizeof(struct uffd_msg) != 32); /* * Must use memset to zero out the paddings or kernel data is * leaked to userland. */ memset(msg, 0, sizeof(struct uffd_msg)); }

Contributors

PersonTokensPropCommitsCommitProp
Andrea Arcangeli37100.00%2100.00%
Total37100.00%2100.00%


static inline struct uffd_msg userfault_msg(unsigned long address, unsigned int flags, unsigned long reason) { struct uffd_msg msg; msg_init(&msg); msg.event = UFFD_EVENT_PAGEFAULT; msg.arg.pagefault.address = address; if (flags & FAULT_FLAG_WRITE) /* * If UFFD_FEATURE_PAGEFAULT_FLAG_WP was set in the * uffdio_api.features and UFFD_PAGEFAULT_FLAG_WRITE * was not set in a UFFD_EVENT_PAGEFAULT, it means it * was a read fault, otherwise if set it means it's * a write fault. */ msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WRITE; if (reason & VM_UFFD_WP) /* * If UFFD_FEATURE_PAGEFAULT_FLAG_WP was set in the * uffdio_api.features and UFFD_PAGEFAULT_FLAG_WP was * not set in a UFFD_EVENT_PAGEFAULT, it means it was * a missing fault, otherwise if set it means it's a * write protect fault. */ msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP; return msg; }

Contributors

PersonTokensPropCommitsCommitProp
Andrea Arcangeli83100.00%3100.00%
Total83100.00%3100.00%

#ifdef CONFIG_HUGETLB_PAGE /* * Same functionality as userfaultfd_must_wait below with modifications for * hugepmd ranges. */
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, unsigned long address, unsigned long flags, unsigned long reason) { struct mm_struct *mm = ctx->mm; pte_t *pte; bool ret = true; VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); pte = huge_pte_offset(mm, address); if (!pte) goto out; ret = false; /* * Lockless access: we're in a wait_event so it's ok if it * changes under us. */ if (huge_pte_none(*pte)) ret = true; if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP)) ret = true; out: return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Mike Kravetz112100.00%1100.00%
Total112100.00%1100.00%

#else
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, unsigned long address, unsigned long flags, unsigned long reason) { return false; /* should never get here */ }

Contributors

PersonTokensPropCommitsCommitProp
Mike Kravetz28100.00%1100.00%
Total28100.00%1100.00%

#endif /* CONFIG_HUGETLB_PAGE */ /* * Verify the pagetables are still not ok after having reigstered into * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any * userfault that has already been resolved, if userfaultfd_read and * UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different * threads. */
static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, unsigned long address, unsigned long flags, unsigned long reason) { struct mm_struct *mm = ctx->mm; pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd, _pmd; pte_t *pte; bool ret = true; VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto out; p4d = p4d_offset(pgd, address); if (!p4d_present(*p4d)) goto out; pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto out; pmd = pmd_offset(pud, address); /* * READ_ONCE must function as a barrier with narrower scope * and it must be equivalent to: * _pmd = *pmd; barrier(); * * This is to deal with the instability (as in * pmd_trans_unstable) of the pmd. */ _pmd = READ_ONCE(*pmd); if (!pmd_present(_pmd)) goto out; ret = false; if (pmd_trans_huge(_pmd)) goto out; /* * the pmd is stable (as in !pmd_trans_unstable) so we can re-read it * and use the standard pte_offset_map() instead of parsing _pmd. */ pte = pte_offset_map(pmd, address); /* * Lockless access: we're in a wait_event so it's ok if it * changes under us. */ if (pte_none(*pte)) ret = true; pte_unmap(pte); out: return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Andrea Arcangeli18587.68%150.00%
Kirill A. Shutemov2612.32%150.00%
Total211100.00%2100.00%

/* * The locking rules involved in returning VM_FAULT_RETRY depending on * FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and * FAULT_FLAG_KILLABLE are not straightforward. The "Caution" * recommendation in __lock_page_or_retry is not an understatement. * * If FAULT_FLAG_ALLOW_RETRY is set, the mmap_sem must be released * before returning VM_FAULT_RETRY only if FAULT_FLAG_RETRY_NOWAIT is * not set. * * If FAULT_FLAG_ALLOW_RETRY is set but FAULT_FLAG_KILLABLE is not * set, VM_FAULT_RETRY can still be returned if and only if there are * fatal_signal_pending()s, and the mmap_sem must be released before * returning it. */
int handle_userfault(struct vm_fault *vmf, unsigned long reason) { struct mm_struct *mm = vmf->vma->vm_mm; struct userfaultfd_ctx *ctx; struct userfaultfd_wait_queue uwq; int ret; bool must_wait, return_to_userland; long blocking_state; BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); ret = VM_FAULT_SIGBUS; ctx = vmf->vma->vm_userfaultfd_ctx.ctx; if (!ctx) goto out; BUG_ON(ctx->mm != mm); VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP)); VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP)); /* * If it's already released don't get it. This avoids to loop * in __get_user_pages if userfaultfd_release waits on the * caller of handle_userfault to release the mmap_sem. */ if (unlikely(ACCESS_ONCE(ctx->released))) goto out; /* * We don't do userfault handling for the final child pid update. */ if (current->flags & PF_EXITING) goto out; /* * Check that we can return VM_FAULT_RETRY. * * NOTE: it should become possible to return VM_FAULT_RETRY * even if FAULT_FLAG_TRIED is set without leading to gup() * -EBUSY failures, if the userfaultfd is to be extended for * VM_UFFD_WP tracking and we intend to arm the userfault * without first stopping userland access to the memory. For * VM_UFFD_MISSING userfaults this is enough for now. */ if (unlikely(!(vmf->flags & FAULT_FLAG_ALLOW_RETRY))) { /* * Validate the invariant that nowait must allow retry * to be sure not to return SIGBUS erroneously on * nowait invocations. */ BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT); #ifdef CONFIG_DEBUG_VM if (printk_ratelimit()) { printk(KERN_WARNING "FAULT_FLAG_ALLOW_RETRY missing %x\n", vmf->flags); dump_stack(); } #endif goto out; } /* * Handle nowait, not much to do other than tell it to retry * and wait. */ ret = VM_FAULT_RETRY; if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) goto out; /* take the reference before dropping the mmap_sem */ userfaultfd_ctx_get(ctx); init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function); uwq.wq.private = current; uwq.msg = userfault_msg(vmf->address, vmf->flags, reason); uwq.ctx = ctx; uwq.waken = false; return_to_userland = (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) == (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE); blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : TASK_KILLABLE; spin_lock(&ctx->fault_pending_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). */ __add_wait_queue(&ctx->fault_pending_wqh, &uwq.wq); /* * The smp_mb() after __set_current_state prevents the reads * following the spin_unlock to happen before the list_add in * __add_wait_queue. */ set_current_state(blocking_state); spin_unlock(&ctx->fault_pending_wqh.lock); if (!is_vm_hugetlb_page(vmf->vma)) must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, reason); else must_wait = userfaultfd_huge_must_wait(ctx, vmf->address, vmf->flags, reason); up_read(&mm->mmap_sem); if (likely(must_wait && !ACCESS_ONCE(ctx->released) && (return_to_userland ? !signal_pending(current) : !fatal_signal_pending(current)))) { wake_up_poll(&ctx->fd_wqh, POLLIN); schedule(); ret |= VM_FAULT_MAJOR; /* * False wakeups can orginate even from rwsem before * up_read() however userfaults will wait either for a * targeted wakeup on the specific uwq waitqueue from * wake_userfault() or for signals or for uffd * release. */ while (!READ_ONCE(uwq.waken)) { /* * This needs the full smp_store_mb() * guarantee as the state write must be * visible to other CPUs before reading * uwq.waken from other CPUs. */ set_current_state(blocking_state); if (READ_ONCE(uwq.waken) || READ_ONCE(ctx->released) || (return_to_userland ? signal_pending(current) : fatal_signal_pending(current))) break; schedule(); } } __set_current_state(TASK_RUNNING); if (return_to_userland) { if (signal_pending(current) && !fatal_signal_pending(current)) { /* * If we got a SIGSTOP or SIGCONT and this is * a normal userland page fault, just let * userland return so the signal will be * handled and gdb debugging works. The page * fault code immediately after we return from * this function is going to release the * mmap_sem and it's not depending on it * (unlike gup would if we were not to return * VM_FAULT_RETRY). * * If a fatal signal is pending we still take * the streamlined VM_FAULT_RETRY failure path * and there's no need to retake the mmap_sem * in such case. */ down_read(&mm->mmap_sem); ret = VM_FAULT_NOPAGE; } } /* * Here we race with the list_del; list_add in * userfaultfd_ctx_read(), however because we don't ever run * list_del_init() to refile across the two lists, the prev * and next pointers will never point to self. list_add also * would never let any of the two pointers to point to * self. So list_empty_careful won't risk to see both pointers * pointing to self at any time during the list refile. The * only case where list_del_init() is called is the full * removal in the wake function and there we don't re-list_add * and it's fine not to block on the spinlock. The uwq on this * kernel stack can be released after the list_del_init. */ if (!list_empty_careful(&uwq.wq.task_list)) { spin_lock(&ctx->fault_pending_wqh.lock); /* * No need of list_del_init(), the uwq on the stack * will be freed shortly anyway. */ list_del(&uwq.wq.task_list); spin_unlock(&ctx->fault_pending_wqh.lock); } /* * ctx may go away after this if the userfault pseudo fd is * already released. */ userfaultfd_ctx_put(ctx); out: return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Andrea Arcangeli52789.17%866.67%
Mike Kravetz284.74%18.33%
Jan Kara132.20%18.33%
Linus Torvalds122.03%18.33%
Kirill A. Shutemov111.86%18.33%
Total591100.00%12100.00%


static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, struct userfaultfd_wait_queue *ewq) { if (WARN_ON_ONCE(current->flags & PF_EXITING)) goto out; ewq->ctx = ctx; init_waitqueue_entry(&ewq->wq, current); spin_lock(&ctx->event_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). */ __add_wait_queue(&ctx->event_wqh, &ewq->wq); for (;;) { set_current_state(TASK_KILLABLE); if (ewq->msg.event == 0) break; if (ACCESS_ONCE(ctx->released) || fatal_signal_pending(current)) { __remove_wait_queue(&ctx->event_wqh, &ewq->wq); if (ewq->msg.event == UFFD_EVENT_FORK) { struct userfaultfd_ctx *new; new = (struct userfaultfd_ctx *) (unsigned long) ewq->msg.arg.reserved.reserved1; userfaultfd_ctx_put(new); } break; } spin_unlock(&ctx->event_wqh.lock); wake_up_poll(&ctx->fd_wqh, POLLIN); schedule(); spin_lock(&ctx->event_wqh.lock); } __set_current_state(TASK_RUNNING); spin_unlock(&ctx->event_wqh.lock); /* * ctx may go away after this if the userfault pseudo fd is * already released. */ out: userfaultfd_ctx_put(ctx); }

Contributors

PersonTokensPropCommitsCommitProp
Pavel Emelyanov16272.97%125.00%
Mike Rapoport4319.37%125.00%
Andrea Arcangeli177.66%250.00%
Total222100.00%4100.00%


static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, struct userfaultfd_wait_queue *ewq) { ewq->msg.event = 0; wake_up_locked(&ctx->event_wqh); __remove_wait_queue(&ctx->event_wqh, &ewq->wq); }

Contributors

PersonTokensPropCommitsCommitProp
Pavel Emelyanov45100.00%1100.00%
Total45100.00%1100.00%


int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) { struct userfaultfd_ctx *ctx = NULL, *octx; struct userfaultfd_fork_ctx *fctx; octx = vma->vm_userfaultfd_ctx.ctx; if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING); return 0; } list_for_each_entry(fctx, fcs, list) if (fctx->orig == octx) { ctx = fctx->new; break; } if (!ctx) { fctx = kmalloc(sizeof(*fctx), GFP_KERNEL); if (!fctx) return -ENOMEM; ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL); if (!ctx) { kfree(fctx); return -ENOMEM; } atomic_set(&ctx->refcount, 1); ctx->flags = octx->flags; ctx->state = UFFD_STATE_RUNNING; ctx->features = octx->features; ctx->released = false; ctx->mm = vma->vm_mm; atomic_inc(&ctx->mm->mm_count); userfaultfd_ctx_get(octx); fctx->orig = octx; fctx->new = ctx; list_add_tail(&fctx->list, fcs); } vma->vm_userfaultfd_ctx.ctx = ctx; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Pavel Emelyanov17872.06%125.00%
Andrea Arcangeli6526.32%125.00%
Oleg Nesterov31.21%125.00%
Mike Rapoport10.40%125.00%
Total247100.00%4100.00%


static void dup_fctx(struct userfaultfd_fork_ctx *fctx) { struct userfaultfd_ctx *ctx = fctx->orig; struct userfaultfd_wait_queue ewq; msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_FORK; ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; userfaultfd_event_wait_completion(ctx, &ewq); }

Contributors

PersonTokensPropCommitsCommitProp
Pavel Emelyanov6496.97%150.00%
Andrea Arcangeli23.03%150.00%
Total66100.00%2100.00%


void dup_userfaultfd_complete(struct list_head *fcs) { struct userfaultfd_fork_ctx *fctx, *n; list_for_each_entry_safe(fctx, n, fcs, list) { dup_fctx(fctx); list_del(&fctx->list); kfree(fctx); } }

Contributors

PersonTokensPropCommitsCommitProp
Pavel Emelyanov48100.00%1100.00%
Total48100.00%1100.00%


void mremap_userfaultfd_prep(struct vm_area_struct *vma, struct vm_userfaultfd_ctx *vm_ctx) { struct userfaultfd_ctx *ctx; ctx = vma->vm_userfaultfd_ctx.ctx; if (ctx && (ctx->features & UFFD_FEATURE_EVENT_REMAP)) { vm_ctx->ctx = ctx; userfaultfd_ctx_get(ctx); } }

Contributors

PersonTokensPropCommitsCommitProp
Pavel Emelyanov53100.00%1100.00%
Total53100.00%1100.00%


void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, unsigned long from, unsigned long to, unsigned long len) { struct userfaultfd_ctx *ctx = vm_ctx->ctx; struct userfaultfd_wait_queue ewq; if (!ctx) return; if (to & ~PAGE_MASK) { userfaultfd_ctx_put(ctx); return; } msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_REMAP; ewq.msg.arg.remap.from = from; ewq.msg.arg.remap.to = to; ewq.msg.arg.remap.len = len; userfaultfd_event_wait_completion(ctx, &ewq); }

Contributors

PersonTokensPropCommitsCommitProp
Pavel Emelyanov11498.28%150.00%
Andrea Arcangeli21.72%150.00%
Total116100.00%2100.00%


bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; struct userfaultfd_ctx *ctx; struct userfaultfd_wait_queue ewq; ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) return true; userfaultfd_ctx_get(ctx); up_read(&mm->mmap_sem); msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_REMOVE; ewq.msg.arg.remove.start = start; ewq.msg.arg.remove.end = end; userfaultfd_event_wait_completion(ctx, &ewq); return false; }

Contributors

PersonTokensPropCommitsCommitProp
Pavel Emelyanov11390.40%133.33%
Andrea Arcangeli75.60%133.33%
Mike Rapoport54.00%133.33%
Total125100.00%3100.00%


static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, unsigned long start, unsigned long end) { struct userfaultfd_unmap_ctx *unmap_ctx; list_for_each_entry(unmap_ctx, unmaps, list) if (unmap_ctx->ctx == ctx && unmap_ctx->start == start && unmap_ctx->end == end) return true; return false; }

Contributors

PersonTokensPropCommitsCommitProp
Mike Rapoport63100.00%1100.00%
Total63100.00%1100.00%


int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *unmaps) { for ( ; vma && vma->vm_start < end; vma = vma->vm_next) { struct userfaultfd_unmap_ctx *unmap_ctx; struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) || has_unmap_ctx(ctx, unmaps, start, end)) continue; unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL); if (!unmap_ctx) return -ENOMEM; userfaultfd_ctx_get(ctx); unmap_ctx->ctx = ctx; unmap_ctx->start = start; unmap_ctx->end = end; list_add_tail(&unmap_ctx->list, unmaps); } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Mike Rapoport142100.00%1100.00%
Total142100.00%1100.00%


void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) { struct userfaultfd_unmap_ctx *ctx, *n; struct userfaultfd_wait_queue ewq; list_for_each_entry_safe(ctx, n, uf, list) { msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_UNMAP; ewq.msg.arg.remove.start = ctx->start; ewq.msg.arg.remove.end = ctx->end; userfaultfd_event_wait_completion(ctx->ctx, &ewq); list_del(&ctx->list); kfree(ctx); } }

Contributors

PersonTokensPropCommitsCommitProp
Mike Rapoport106100.00%1100.00%
Total106100.00%1100.00%


static int userfaultfd_release(struct inode *inode, struct file *file) { struct userfaultfd_ctx *ctx = file->private_data; struct mm_struct *mm = ctx->mm; struct vm_area_struct *vma, *prev; /* len == 0 means wake all */ struct userfaultfd_wake_range range = {