 *  mm/mprotect.c
 *  (C) Copyright 1994 Linus Torvalds
 *  (C) Copyright 2002 Christoph Hellwig
 *  Address space accounting code       <>
 *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved

#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/shm.h>
#include <linux/mman.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/security.h>
#include <linux/mempolicy.h>
#include <linux/personality.h>
#include <linux/syscalls.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
#include <linux/perf_event.h>
#include <linux/ksm.h>
#include <linux/pkeys.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>

#include "internal.h"

 * For a prot_numa update we only hold mmap_sem for read so there is a
 * potential race with faulting where a pmd was temporarily none. This
 * function checks for a transhuge pmd under the appropriate lock. It
 * returns a pte if it was successfully locked or NULL if it raced with
 * a transhuge insertion.

static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, int prot_numa, spinlock_t **ptl) { pte_t *pte; spinlock_t *pmdl; /* !prot_numa is protected by mmap_sem held for write */ if (!prot_numa) return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); pmdl = pmd_lock(vma->vm_mm, pmd); if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) { spin_unlock(pmdl); return NULL; } pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); spin_unlock(pmdl); return pte; }


mel gormanmel gorman117100.00%1100.00%

static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) { struct mm_struct *mm = vma->vm_mm; pte_t *pte, oldpte; spinlock_t *ptl; unsigned long pages = 0; pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl); if (!pte) return 0; arch_enter_lazy_mmu_mode(); do { oldpte = *pte; if (pte_present(oldpte)) { pte_t ptent; bool preserve_write = prot_numa && pte_write(oldpte); /* * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ if (prot_numa) { struct page *page; page = vm_normal_page(vma, addr, oldpte); if (!page || PageKsm(page)) continue; /* Avoid TLB flush if possible */ if (pte_protnone(oldpte)) continue; } ptent = ptep_modify_prot_start(mm, addr, pte); ptent = pte_modify(ptent, newprot); if (preserve_write) ptent = pte_mkwrite(ptent); /* Avoid taking write faults for known dirty pages */ if (dirty_accountable && pte_dirty(ptent) && (pte_soft_dirty(ptent) || !(vma->vm_flags & VM_SOFTDIRTY))) { ptent = pte_mkwrite(ptent); } ptep_modify_prot_commit(mm, addr, pte, ptent); pages++; } else if (IS_ENABLED(CONFIG_MIGRATION)) { swp_entry_t entry = pte_to_swp_entry(oldpte); if (is_write_migration_entry(entry)) { pte_t newpte; /* * A protection check is difficult so * just be safe and disable write */ make_migration_entry_read(&entry); newpte = swp_entry_to_pte(entry); if (pte_swp_soft_dirty(oldpte)) newpte = pte_swp_mksoft_dirty(newpte); set_pte_at(mm, addr, pte, newpte); pages++; } } } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); return pages; }


mel gormanmel gorman9728.61%728.00%
christoph lameterchristoph lameter4713.86%14.00%
aneesh kumaraneesh kumar277.96%14.00%
hugh dickinshugh dickins267.67%28.00%
cyrill gorcunovcyrill gorcunov257.37%14.00%
peter zijlstrapeter zijlstra236.78%28.00%
peter feinerpeter feiner164.72%14.00%
ingo molnaringo molnar61.77%14.00%
zachary amsdenzachary amsden61.77%14.00%
david s. millerdavid s. miller61.77%14.00%
konstantin khlebnikovkonstantin khlebnikov41.18%14.00%
rohit sethrohit seth30.88%14.00%
jeremy fitzhardingejeremy fitzhardinge10.29%14.00%

static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) { pmd_t *pmd; struct mm_struct *mm = vma->vm_mm; unsigned long next; unsigned long pages = 0; unsigned long nr_huge_updates = 0; unsigned long mni_start = 0; pmd = pmd_offset(pud, addr); do { unsigned long this_pages; next = pmd_addr_end(addr, end); if (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) && pmd_none_or_clear_bad(pmd)) continue; /* invoke the mmu notifier if the pmd is populated */ if (!mni_start) { mni_start = addr; mmu_notifier_invalidate_range_start(mm, mni_start, end); } if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) { split_huge_pmd(vma, pmd, addr); if (pmd_trans_unstable(pmd)) continue; } else { int nr_ptes = change_huge_pmd(vma, pmd, addr, newprot, prot_numa); if (nr_ptes) { if (nr_ptes == HPAGE_PMD_NR) { pages += HPAGE_PMD_NR; nr_huge_updates++; } /* huge pmd was handled */ continue; } } /* fall through, the trans huge pmd just split */ } this_pages = change_pte_range(vma, pmd, addr, next, newprot, dirty_accountable, prot_numa); pages += this_pages; } while (pmd++, addr = next, addr != end); if (mni_start) mmu_notifier_invalidate_range_end(mm, mni_start, end); if (nr_huge_updates) count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); return pages; }


rik van rielrik van riel6522.65%28.70%
mel gormanmel gorman6020.91%730.43%
johannes weinerjohannes weiner3411.85%28.70%
andi kleenandi kleen279.41%14.35%
hugh dickinshugh dickins238.01%14.35%
peter zijlstrapeter zijlstra206.97%28.70%
kirill a. shutemovkirill a. shutemov134.53%313.04%
dan williamsdan williams134.53%14.35%
david s. millerdavid s. miller41.39%14.35%
andrea arcangeliandrea arcangeli41.39%14.35%
naoya horiguchinaoya horiguchi10.35%14.35%

static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) { pud_t *pud; unsigned long next; unsigned long pages = 0; pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; pages += change_pmd_range(vma, pud, addr, next, newprot, dirty_accountable, prot_numa); } while (pud++, addr = next, addr != end); return pages; }


hugh dickinshugh dickins3127.43%218.18%
andi kleenandi kleen2925.66%19.09%
peter zijlstrapeter zijlstra1815.93%218.18%
mel gormanmel gorman54.42%19.09%
david s. millerdavid s. miller43.54%19.09%
johannes weinerjohannes weiner32.65%19.09%

static unsigned long change_protection_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) { struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; unsigned long next; unsigned long start = addr; unsigned long pages = 0; BUG_ON(addr >= end); pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); set_tlb_flush_pending(mm); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; pages += change_pud_range(vma, pgd, addr, next, newprot, dirty_accountable, prot_numa); } while (pgd++, addr = next, addr != end); /* Only flush the TLB if we actually modified any entries: */ if (pages) flush_tlb_range(vma, start, end); clear_tlb_flush_pending(mm); return pages; }


hugh dickinshugh dickins4628.22%212.50%
andi kleenandi kleen2716.56%16.25%
peter zijlstrapeter zijlstra2012.27%212.50%
rik van rielrik van riel106.13%16.25%
kanoj sarcarkanoj sarcar84.91%16.25%
linus torvaldslinus torvalds63.68%16.25%
mel gormanmel gorman53.07%16.25%
ingo molnaringo molnar53.07%16.25%
johannes weinerjohannes weiner10.61%16.25%
david s. millerdavid s. miller10.61%16.25%

unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) { unsigned long pages; if (is_vm_hugetlb_page(vma)) pages = hugetlb_change_protection(vma, start, end, newprot); else pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); return pages; }


peter zijlstrapeter zijlstra6791.78%133.33%
mel gormanmel gorman56.85%133.33%

int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags) { struct mm_struct *mm = vma->vm_mm; unsigned long oldflags = vma->vm_flags; long nrpages = (end - start) >> PAGE_SHIFT; unsigned long charged = 0; pgoff_t pgoff; int error; int dirty_accountable = 0; if (newflags == oldflags) { *pprev = vma; return 0; } /* * If we make a private mapping writable we increase our commit; * but (without finer accounting) cannot reduce our commit if we * make it unwritable again. hugetlb mapping were accounted for * even if read-only so there is no need to account for them here */ if (newflags & VM_WRITE) { /* Check space limits when area turns into data. */ if (!may_expand_vm(mm, newflags, nrpages) && may_expand_vm(mm, oldflags, nrpages)) return -ENOMEM; if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| VM_SHARED|VM_NORESERVE))) { charged = nrpages; if (security_vm_enough_memory_mm(mm, charged)) return -ENOMEM; newflags |= VM_ACCOUNT; } } /* * First try to merge with previous and/or next vma. */ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *pprev = vma_merge(mm, *pprev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), vma->vm_userfaultfd_ctx); if (*pprev) { vma = *pprev; goto success; } *pprev = vma; if (start != vma->vm_start) { error = split_vma(mm, vma, start, 1); if (error) goto fail; } if (end != vma->vm_end) { error = split_vma(mm, vma, end, 0); if (error) goto fail; } success: /* * vm_flags and vm_page_prot are protected by the mmap_sem * held in write mode. */ vma->vm_flags = newflags; dirty_accountable = vma_wants_writenotify(vma); vma_set_page_prot(vma); change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0); /* * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major * fault on access. */ if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED && (newflags & VM_WRITE)) { populate_vma_page_range(vma, start, end, NULL); } vm_stat_account(mm, oldflags, -nrpages); vm_stat_account(mm, newflags, nrpages); perf_event_mmap(vma); return 0; fail: vm_unacct_memory(charged); return error; }


andrew mortonandrew morton17542.48%620.00%
hugh dickinshugh dickins399.47%26.67%
kirill a. shutemovkirill a. shutemov368.74%13.33%
linus torvaldslinus torvalds338.01%13.33%
konstantin khlebnikovkonstantin khlebnikov266.31%13.33%
peter zijlstrapeter zijlstra194.61%26.67%
andrea arcangeliandrea arcangeli71.70%26.67%
pekka j enbergpekka j enberg51.21%13.33%
chris wrightchris wright51.21%13.33%
mel gormanmel gorman40.97%26.67%
david howellsdavid howells40.97%13.33%
al viroal viro30.73%13.33%
william lee irwin iiiwilliam lee irwin iii30.73%13.33%
venkatesh pallipadivenkatesh pallipadi30.73%13.33%
peter feinerpeter feiner30.73%13.33%
andy whitcroftandy whitcroft20.49%13.33%

SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, unsigned long, prot) { unsigned long nstart, end, tmp, reqprot; struct vm_area_struct *vma, *prev; int error = -EINVAL; const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); const bool rier = (current->personality & READ_IMPLIES_EXEC) && (prot & PROT_READ); prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ return -EINVAL; if (start & ~PAGE_MASK) return -EINVAL; if (!len) return 0; len = PAGE_ALIGN(len); end = start + len; if (end <= start) return -ENOMEM; if (!arch_validate_prot(prot)) return -EINVAL; reqprot = prot; if (down_write_killable(&current->mm->mmap_sem)) return -EINTR; vma = find_vma(current->mm, start); error = -ENOMEM; if (!vma) goto out; prev = vma->vm_prev; if (unlikely(grows & PROT_GROWSDOWN)) { if (vma->vm_start >= end) goto out; start = vma->vm_start; error = -EINVAL; if (!(vma->vm_flags & VM_GROWSDOWN)) goto out; } else { if (vma->vm_start > start) goto out; if (unlikely(grows & PROT_GROWSUP)) { end = vma->vm_end; error = -EINVAL; if (!(vma->vm_flags & VM_GROWSUP)) goto out; } } if (start > vma->vm_start) prev = vma; for (nstart = start ; ; ) { unsigned long newflags; int pkey = arch_override_mprotect_pkey(vma, prot, -1); /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ /* Does the application expect PROT_READ to imply PROT_EXEC */ if (rier && (vma->vm_flags & VM_MAYEXEC)) prot |= PROT_EXEC; newflags = calc_vm_prot_bits(prot, pkey); newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); /* newflags >> 4 shift VM_MAY% in place of VM_% */ if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { error = -EACCES; goto out; } error = security_file_mprotect(vma, reqprot, prot); if (error) goto out; tmp = vma->vm_end; if (tmp > end) tmp = end; error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); if (error) goto out; nstart = tmp; if (nstart < prev->vm_end) nstart = prev->vm_end; if (nstart >= end) goto out; vma = prev->vm_next; if (!vma || vma->vm_start != nstart) { error = -ENOMEM; goto out; } prot = reqprot; } out: up_write(&current->mm->mmap_sem); return error; }

