Release 4.11 fs/userfaultfd.c
/*
* fs/userfaultfd.c
*
* Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
* Copyright (C) 2008-2009 Red Hat, Inc.
* Copyright (C) 2015 Red Hat, Inc.
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
* Some part derived from fs/eventfd.c (anon inode setup) and
* mm/ksm.c (mm hashing).
*/
#include <linux/list.h>
#include <linux/hashtable.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/mm.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/file.h>
#include <linux/bug.h>
#include <linux/anon_inodes.h>
#include <linux/syscalls.h>
#include <linux/userfaultfd_k.h>
#include <linux/mempolicy.h>
#include <linux/ioctl.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
enum userfaultfd_state {
UFFD_STATE_WAIT_API,
UFFD_STATE_RUNNING,
};
/*
* Start with fault_pending_wqh and fault_wqh so they're more likely
* to be in the same cacheline.
*/
struct userfaultfd_ctx {
/* waitqueue head for the pending (i.e. not read) userfaults */
wait_queue_head_t fault_pending_wqh;
/* waitqueue head for the userfaults */
wait_queue_head_t fault_wqh;
/* waitqueue head for the pseudo fd to wakeup poll/read */
wait_queue_head_t fd_wqh;
/* waitqueue head for events */
wait_queue_head_t event_wqh;
/* a refile sequence protected by fault_pending_wqh lock */
struct seqcount refile_seq;
/* pseudo fd refcounting */
atomic_t refcount;
/* userfaultfd syscall flags */
unsigned int flags;
/* features requested from the userspace */
unsigned int features;
/* state machine */
enum userfaultfd_state state;
/* released */
bool released;
/* mm with one ore more vmas attached to this userfaultfd_ctx */
struct mm_struct *mm;
};
struct userfaultfd_fork_ctx {
struct userfaultfd_ctx *orig;
struct userfaultfd_ctx *new;
struct list_head list;
};
struct userfaultfd_unmap_ctx {
struct userfaultfd_ctx *ctx;
unsigned long start;
unsigned long end;
struct list_head list;
};
struct userfaultfd_wait_queue {
struct uffd_msg msg;
wait_queue_t wq;
struct userfaultfd_ctx *ctx;
bool waken;
};
struct userfaultfd_wake_range {
unsigned long start;
unsigned long len;
};
static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
int wake_flags, void *key)
{
struct userfaultfd_wake_range *range = key;
int ret;
struct userfaultfd_wait_queue *uwq;
unsigned long start, len;
uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
ret = 0;
/* len == 0 means wake all */
start = range->start;
len = range->len;
if (len && (start > uwq->msg.arg.pagefault.address ||
start + len <= uwq->msg.arg.pagefault.address))
goto out;
WRITE_ONCE(uwq->waken, true);
/*
* The implicit smp_mb__before_spinlock in try_to_wake_up()
* renders uwq->waken visible to other CPUs before the task is
* waken.
*/
ret = wake_up_state(wq->private, mode);
if (ret)
/*
* Wake only once, autoremove behavior.
*
* After the effect of list_del_init is visible to the
* other CPUs, the waitqueue may disappear from under
* us, see the !list_empty_careful() in
* handle_userfault(). try_to_wake_up() has an
* implicit smp_mb__before_spinlock, and the
* wq->private is read before calling the extern
* function "wake_up_state" (which in turns calls
* try_to_wake_up). While the spin_lock;spin_unlock;
* wouldn't be enough, the smp_mb__before_spinlock is
* enough to avoid an explicit smp_mb() here.
*/
list_del_init(&wq->task_list);
out:
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Andrea Arcangeli | 144 | 100.00% | 3 | 100.00% |
Total | 144 | 100.00% | 3 | 100.00% |
/**
* userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd
* context.
* @ctx: [in] Pointer to the userfaultfd context.
*/
static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
{
if (!atomic_inc_not_zero(&ctx->refcount))
BUG();
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Andrea Arcangeli | 25 | 100.00% | 1 | 100.00% |
Total | 25 | 100.00% | 1 | 100.00% |
/**
* userfaultfd_ctx_put - Releases a reference to the internal userfaultfd
* context.
* @ctx: [in] Pointer to userfaultfd context.
*
* The userfaultfd context reference must have been previously acquired either
* with userfaultfd_ctx_get() or userfaultfd_ctx_fdget().
*/
static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
{
if (atomic_dec_and_test(&ctx->refcount)) {
VM_BUG_ON(spin_is_locked(&ctx->fault_pending_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh));
VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->fault_wqh));
VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->event_wqh));
VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
mmdrop(ctx->mm);
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Andrea Arcangeli | 108 | 81.20% | 2 | 50.00% |
Pavel Emelyanov | 24 | 18.05% | 1 | 25.00% |
Oleg Nesterov | 1 | 0.75% | 1 | 25.00% |
Total | 133 | 100.00% | 4 | 100.00% |
static inline void msg_init(struct uffd_msg *msg)
{
BUILD_BUG_ON(sizeof(struct uffd_msg) != 32);
/*
* Must use memset to zero out the paddings or kernel data is
* leaked to userland.
*/
memset(msg, 0, sizeof(struct uffd_msg));
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Andrea Arcangeli | 37 | 100.00% | 2 | 100.00% |
Total | 37 | 100.00% | 2 | 100.00% |
static inline struct uffd_msg userfault_msg(unsigned long address,
unsigned int flags,
unsigned long reason)
{
struct uffd_msg msg;
msg_init(&msg);
msg.event = UFFD_EVENT_PAGEFAULT;
msg.arg.pagefault.address = address;
if (flags & FAULT_FLAG_WRITE)
/*
* If UFFD_FEATURE_PAGEFAULT_FLAG_WP was set in the
* uffdio_api.features and UFFD_PAGEFAULT_FLAG_WRITE
* was not set in a UFFD_EVENT_PAGEFAULT, it means it
* was a read fault, otherwise if set it means it's
* a write fault.
*/
msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WRITE;
if (reason & VM_UFFD_WP)
/*
* If UFFD_FEATURE_PAGEFAULT_FLAG_WP was set in the
* uffdio_api.features and UFFD_PAGEFAULT_FLAG_WP was
* not set in a UFFD_EVENT_PAGEFAULT, it means it was
* a missing fault, otherwise if set it means it's a
* write protect fault.
*/
msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP;
return msg;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Andrea Arcangeli | 83 | 100.00% | 3 | 100.00% |
Total | 83 | 100.00% | 3 | 100.00% |
#ifdef CONFIG_HUGETLB_PAGE
/*
* Same functionality as userfaultfd_must_wait below with modifications for
* hugepmd ranges.
*/
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
unsigned long address,
unsigned long flags,
unsigned long reason)
{
struct mm_struct *mm = ctx->mm;
pte_t *pte;
bool ret = true;
VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
pte = huge_pte_offset(mm, address);
if (!pte)
goto out;
ret = false;
/*
* Lockless access: we're in a wait_event so it's ok if it
* changes under us.
*/
if (huge_pte_none(*pte))
ret = true;
if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP))
ret = true;
out:
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Mike Kravetz | 112 | 100.00% | 1 | 100.00% |
Total | 112 | 100.00% | 1 | 100.00% |
#else
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
unsigned long address,
unsigned long flags,
unsigned long reason)
{
return false; /* should never get here */
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Mike Kravetz | 28 | 100.00% | 1 | 100.00% |
Total | 28 | 100.00% | 1 | 100.00% |
#endif /* CONFIG_HUGETLB_PAGE */
/*
* Verify the pagetables are still not ok after having reigstered into
* the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any
* userfault that has already been resolved, if userfaultfd_read and
* UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different
* threads.
*/
static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
unsigned long address,
unsigned long flags,
unsigned long reason)
{
struct mm_struct *mm = ctx->mm;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd, _pmd;
pte_t *pte;
bool ret = true;
VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
goto out;
p4d = p4d_offset(pgd, address);
if (!p4d_present(*p4d))
goto out;
pud = pud_offset(p4d, address);
if (!pud_present(*pud))
goto out;
pmd = pmd_offset(pud, address);
/*
* READ_ONCE must function as a barrier with narrower scope
* and it must be equivalent to:
* _pmd = *pmd; barrier();
*
* This is to deal with the instability (as in
* pmd_trans_unstable) of the pmd.
*/
_pmd = READ_ONCE(*pmd);
if (!pmd_present(_pmd))
goto out;
ret = false;
if (pmd_trans_huge(_pmd))
goto out;
/*
* the pmd is stable (as in !pmd_trans_unstable) so we can re-read it
* and use the standard pte_offset_map() instead of parsing _pmd.
*/
pte = pte_offset_map(pmd, address);
/*
* Lockless access: we're in a wait_event so it's ok if it
* changes under us.
*/
if (pte_none(*pte))
ret = true;
pte_unmap(pte);
out:
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Andrea Arcangeli | 185 | 87.68% | 1 | 50.00% |
Kirill A. Shutemov | 26 | 12.32% | 1 | 50.00% |
Total | 211 | 100.00% | 2 | 100.00% |
/*
* The locking rules involved in returning VM_FAULT_RETRY depending on
* FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and
* FAULT_FLAG_KILLABLE are not straightforward. The "Caution"
* recommendation in __lock_page_or_retry is not an understatement.
*
* If FAULT_FLAG_ALLOW_RETRY is set, the mmap_sem must be released
* before returning VM_FAULT_RETRY only if FAULT_FLAG_RETRY_NOWAIT is
* not set.
*
* If FAULT_FLAG_ALLOW_RETRY is set but FAULT_FLAG_KILLABLE is not
* set, VM_FAULT_RETRY can still be returned if and only if there are
* fatal_signal_pending()s, and the mmap_sem must be released before
* returning it.
*/
int handle_userfault(struct vm_fault *vmf, unsigned long reason)
{
struct mm_struct *mm = vmf->vma->vm_mm;
struct userfaultfd_ctx *ctx;
struct userfaultfd_wait_queue uwq;
int ret;
bool must_wait, return_to_userland;
long blocking_state;
BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
ret = VM_FAULT_SIGBUS;
ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
if (!ctx)
goto out;
BUG_ON(ctx->mm != mm);
VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP));
VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP));
/*
* If it's already released don't get it. This avoids to loop
* in __get_user_pages if userfaultfd_release waits on the
* caller of handle_userfault to release the mmap_sem.
*/
if (unlikely(ACCESS_ONCE(ctx->released)))
goto out;
/*
* We don't do userfault handling for the final child pid update.
*/
if (current->flags & PF_EXITING)
goto out;
/*
* Check that we can return VM_FAULT_RETRY.
*
* NOTE: it should become possible to return VM_FAULT_RETRY
* even if FAULT_FLAG_TRIED is set without leading to gup()
* -EBUSY failures, if the userfaultfd is to be extended for
* VM_UFFD_WP tracking and we intend to arm the userfault
* without first stopping userland access to the memory. For
* VM_UFFD_MISSING userfaults this is enough for now.
*/
if (unlikely(!(vmf->flags & FAULT_FLAG_ALLOW_RETRY))) {
/*
* Validate the invariant that nowait must allow retry
* to be sure not to return SIGBUS erroneously on
* nowait invocations.
*/
BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
#ifdef CONFIG_DEBUG_VM
if (printk_ratelimit()) {
printk(KERN_WARNING
"FAULT_FLAG_ALLOW_RETRY missing %x\n",
vmf->flags);
dump_stack();
}
#endif
goto out;
}
/*
* Handle nowait, not much to do other than tell it to retry
* and wait.
*/
ret = VM_FAULT_RETRY;
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
goto out;
/* take the reference before dropping the mmap_sem */
userfaultfd_ctx_get(ctx);
init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
uwq.wq.private = current;
uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
uwq.ctx = ctx;
uwq.waken = false;
return_to_userland =
(vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
TASK_KILLABLE;
spin_lock(&ctx->fault_pending_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
* through poll/read().
*/
__add_wait_queue(&ctx->fault_pending_wqh, &uwq.wq);
/*
* The smp_mb() after __set_current_state prevents the reads
* following the spin_unlock to happen before the list_add in
* __add_wait_queue.
*/
set_current_state(blocking_state);
spin_unlock(&ctx->fault_pending_wqh.lock);
if (!is_vm_hugetlb_page(vmf->vma))
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
reason);
else
must_wait = userfaultfd_huge_must_wait(ctx, vmf->address,
vmf->flags, reason);
up_read(&mm->mmap_sem);
if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
(return_to_userland ? !signal_pending(current) :
!fatal_signal_pending(current)))) {
wake_up_poll(&ctx->fd_wqh, POLLIN);
schedule();
ret |= VM_FAULT_MAJOR;
/*
* False wakeups can orginate even from rwsem before
* up_read() however userfaults will wait either for a
* targeted wakeup on the specific uwq waitqueue from
* wake_userfault() or for signals or for uffd
* release.
*/
while (!READ_ONCE(uwq.waken)) {
/*
* This needs the full smp_store_mb()
* guarantee as the state write must be
* visible to other CPUs before reading
* uwq.waken from other CPUs.
*/
set_current_state(blocking_state);
if (READ_ONCE(uwq.waken) ||
READ_ONCE(ctx->released) ||
(return_to_userland ? signal_pending(current) :
fatal_signal_pending(current)))
break;
schedule();
}
}
__set_current_state(TASK_RUNNING);
if (return_to_userland) {
if (signal_pending(current) &&
!fatal_signal_pending(current)) {
/*
* If we got a SIGSTOP or SIGCONT and this is
* a normal userland page fault, just let
* userland return so the signal will be
* handled and gdb debugging works. The page
* fault code immediately after we return from
* this function is going to release the
* mmap_sem and it's not depending on it
* (unlike gup would if we were not to return
* VM_FAULT_RETRY).
*
* If a fatal signal is pending we still take
* the streamlined VM_FAULT_RETRY failure path
* and there's no need to retake the mmap_sem
* in such case.
*/
down_read(&mm->mmap_sem);
ret = VM_FAULT_NOPAGE;
}
}
/*
* Here we race with the list_del; list_add in
* userfaultfd_ctx_read(), however because we don't ever run
* list_del_init() to refile across the two lists, the prev
* and next pointers will never point to self. list_add also
* would never let any of the two pointers to point to
* self. So list_empty_careful won't risk to see both pointers
* pointing to self at any time during the list refile. The
* only case where list_del_init() is called is the full
* removal in the wake function and there we don't re-list_add
* and it's fine not to block on the spinlock. The uwq on this
* kernel stack can be released after the list_del_init.
*/
if (!list_empty_careful(&uwq.wq.task_list)) {
spin_lock(&ctx->fault_pending_wqh.lock);
/*
* No need of list_del_init(), the uwq on the stack
* will be freed shortly anyway.
*/
list_del(&uwq.wq.task_list);
spin_unlock(&ctx->fault_pending_wqh.lock);
}
/*
* ctx may go away after this if the userfault pseudo fd is
* already released.
*/
userfaultfd_ctx_put(ctx);
out:
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Andrea Arcangeli | 527 | 89.17% | 8 | 66.67% |
Mike Kravetz | 28 | 4.74% | 1 | 8.33% |
Jan Kara | 13 | 2.20% | 1 | 8.33% |
Linus Torvalds | 12 | 2.03% | 1 | 8.33% |
Kirill A. Shutemov | 11 | 1.86% | 1 | 8.33% |
Total | 591 | 100.00% | 12 | 100.00% |
static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
struct userfaultfd_wait_queue *ewq)
{
if (WARN_ON_ONCE(current->flags & PF_EXITING))
goto out;
ewq->ctx = ctx;
init_waitqueue_entry(&ewq->wq, current);
spin_lock(&ctx->event_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
* through poll/read().
*/
__add_wait_queue(&ctx->event_wqh, &ewq->wq);
for (;;) {
set_current_state(TASK_KILLABLE);
if (ewq->msg.event == 0)
break;
if (ACCESS_ONCE(ctx->released) ||
fatal_signal_pending(current)) {
__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
if (ewq->msg.event == UFFD_EVENT_FORK) {
struct userfaultfd_ctx *new;
new = (struct userfaultfd_ctx *)
(unsigned long)
ewq->msg.arg.reserved.reserved1;
userfaultfd_ctx_put(new);
}
break;
}
spin_unlock(&ctx->event_wqh.lock);
wake_up_poll(&ctx->fd_wqh, POLLIN);
schedule();
spin_lock(&ctx->event_wqh.lock);
}
__set_current_state(TASK_RUNNING);
spin_unlock(&ctx->event_wqh.lock);
/*
* ctx may go away after this if the userfault pseudo fd is
* already released.
*/
out:
userfaultfd_ctx_put(ctx);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 162 | 72.97% | 1 | 25.00% |
Mike Rapoport | 43 | 19.37% | 1 | 25.00% |
Andrea Arcangeli | 17 | 7.66% | 2 | 50.00% |
Total | 222 | 100.00% | 4 | 100.00% |
static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx,
struct userfaultfd_wait_queue *ewq)
{
ewq->msg.event = 0;
wake_up_locked(&ctx->event_wqh);
__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 45 | 100.00% | 1 | 100.00% |
Total | 45 | 100.00% | 1 | 100.00% |
int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
{
struct userfaultfd_ctx *ctx = NULL, *octx;
struct userfaultfd_fork_ctx *fctx;
octx = vma->vm_userfaultfd_ctx.ctx;
if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING);
return 0;
}
list_for_each_entry(fctx, fcs, list)
if (fctx->orig == octx) {
ctx = fctx->new;
break;
}
if (!ctx) {
fctx = kmalloc(sizeof(*fctx), GFP_KERNEL);
if (!fctx)
return -ENOMEM;
ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL);
if (!ctx) {
kfree(fctx);
return -ENOMEM;
}
atomic_set(&ctx->refcount, 1);
ctx->flags = octx->flags;
ctx->state = UFFD_STATE_RUNNING;
ctx->features = octx->features;
ctx->released = false;
ctx->mm = vma->vm_mm;
atomic_inc(&ctx->mm->mm_count);
userfaultfd_ctx_get(octx);
fctx->orig = octx;
fctx->new = ctx;
list_add_tail(&fctx->list, fcs);
}
vma->vm_userfaultfd_ctx.ctx = ctx;
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 178 | 72.06% | 1 | 25.00% |
Andrea Arcangeli | 65 | 26.32% | 1 | 25.00% |
Oleg Nesterov | 3 | 1.21% | 1 | 25.00% |
Mike Rapoport | 1 | 0.40% | 1 | 25.00% |
Total | 247 | 100.00% | 4 | 100.00% |
static void dup_fctx(struct userfaultfd_fork_ctx *fctx)
{
struct userfaultfd_ctx *ctx = fctx->orig;
struct userfaultfd_wait_queue ewq;
msg_init(&ewq.msg);
ewq.msg.event = UFFD_EVENT_FORK;
ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new;
userfaultfd_event_wait_completion(ctx, &ewq);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 64 | 96.97% | 1 | 50.00% |
Andrea Arcangeli | 2 | 3.03% | 1 | 50.00% |
Total | 66 | 100.00% | 2 | 100.00% |
void dup_userfaultfd_complete(struct list_head *fcs)
{
struct userfaultfd_fork_ctx *fctx, *n;
list_for_each_entry_safe(fctx, n, fcs, list) {
dup_fctx(fctx);
list_del(&fctx->list);
kfree(fctx);
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 48 | 100.00% | 1 | 100.00% |
Total | 48 | 100.00% | 1 | 100.00% |
void mremap_userfaultfd_prep(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx *vm_ctx)
{
struct userfaultfd_ctx *ctx;
ctx = vma->vm_userfaultfd_ctx.ctx;
if (ctx && (ctx->features & UFFD_FEATURE_EVENT_REMAP)) {
vm_ctx->ctx = ctx;
userfaultfd_ctx_get(ctx);
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 53 | 100.00% | 1 | 100.00% |
Total | 53 | 100.00% | 1 | 100.00% |
void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
unsigned long from, unsigned long to,
unsigned long len)
{
struct userfaultfd_ctx *ctx = vm_ctx->ctx;
struct userfaultfd_wait_queue ewq;
if (!ctx)
return;
if (to & ~PAGE_MASK) {
userfaultfd_ctx_put(ctx);
return;
}
msg_init(&ewq.msg);
ewq.msg.event = UFFD_EVENT_REMAP;
ewq.msg.arg.remap.from = from;
ewq.msg.arg.remap.to = to;
ewq.msg.arg.remap.len = len;
userfaultfd_event_wait_completion(ctx, &ewq);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 114 | 98.28% | 1 | 50.00% |
Andrea Arcangeli | 2 | 1.72% | 1 | 50.00% |
Total | 116 | 100.00% | 2 | 100.00% |
bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
struct userfaultfd_ctx *ctx;
struct userfaultfd_wait_queue ewq;
ctx = vma->vm_userfaultfd_ctx.ctx;
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
return true;
userfaultfd_ctx_get(ctx);
up_read(&mm->mmap_sem);
msg_init(&ewq.msg);
ewq.msg.event = UFFD_EVENT_REMOVE;
ewq.msg.arg.remove.start = start;
ewq.msg.arg.remove.end = end;
userfaultfd_event_wait_completion(ctx, &ewq);
return false;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Pavel Emelyanov | 113 | 90.40% | 1 | 33.33% |
Andrea Arcangeli | 7 | 5.60% | 1 | 33.33% |
Mike Rapoport | 5 | 4.00% | 1 | 33.33% |
Total | 125 | 100.00% | 3 | 100.00% |
static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
unsigned long start, unsigned long end)
{
struct userfaultfd_unmap_ctx *unmap_ctx;
list_for_each_entry(unmap_ctx, unmaps, list)
if (unmap_ctx->ctx == ctx && unmap_ctx->start == start &&
unmap_ctx->end == end)
return true;
return false;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Mike Rapoport | 63 | 100.00% | 1 | 100.00% |
Total | 63 | 100.00% | 1 | 100.00% |
int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct list_head *unmaps)
{
for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
struct userfaultfd_unmap_ctx *unmap_ctx;
struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
has_unmap_ctx(ctx, unmaps, start, end))
continue;
unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
if (!unmap_ctx)
return -ENOMEM;
userfaultfd_ctx_get(ctx);
unmap_ctx->ctx = ctx;
unmap_ctx->start = start;
unmap_ctx->end = end;
list_add_tail(&unmap_ctx->list, unmaps);
}
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Mike Rapoport | 142 | 100.00% | 1 | 100.00% |
Total | 142 | 100.00% | 1 | 100.00% |
void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
{
struct userfaultfd_unmap_ctx *ctx, *n;
struct userfaultfd_wait_queue ewq;
list_for_each_entry_safe(ctx, n, uf, list) {
msg_init(&ewq.msg);
ewq.msg.event = UFFD_EVENT_UNMAP;
ewq.msg.arg.remove.start = ctx->start;
ewq.msg.arg.remove.end = ctx->end;
userfaultfd_event_wait_completion(ctx->ctx, &ewq);
list_del(&ctx->list);
kfree(ctx);
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Mike Rapoport | 106 | 100.00% | 1 | 100.00% |
Total | 106 | 100.00% | 1 | 100.00% |
static int userfaultfd_release(struct inode *inode, struct file *file)
{
struct userfaultfd_ctx *ctx = file->private_data;
struct mm_struct *mm = ctx->mm;
struct vm_area_struct *vma, *prev;
/* len == 0 means wake all */
struct userfaultfd_wake_range range = {