cregit-Linux how code gets into the kernel

Release 4.16 mm/madvise.c

Directory: mm
// SPDX-License-Identifier: GPL-2.0
/*
 *      linux/mm/madvise.c
 *
 * Copyright (C) 1999  Linus Torvalds
 * Copyright (C) 2002  Christoph Hellwig
 */

#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/syscalls.h>
#include <linux/mempolicy.h>
#include <linux/page-isolation.h>
#include <linux/userfaultfd_k.h>
#include <linux/hugetlb.h>
#include <linux/falloc.h>
#include <linux/sched.h>
#include <linux/ksm.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/shmem_fs.h>
#include <linux/mmu_notifier.h>

#include <asm/tlb.h>

#include "internal.h"

/*
 * Any behaviour which results in changes to the vma->vm_flags needs to
 * take mmap_sem for writing. Others, which simply traverse vmas, need
 * to only take it for reading.
 */

static int madvise_need_mmap_write(int behavior) { switch (behavior) { case MADV_REMOVE: case MADV_WILLNEED: case MADV_DONTNEED: case MADV_FREE: return 0; default: /* be safe, default to 1. list exceptions explicitly */ return 1; } }

Contributors

PersonTokensPropCommitsCommitProp
Nicholas Piggin3291.43%150.00%
MinChan Kim38.57%150.00%
Total35100.00%2100.00%

/* * We can potentially split a vm area into separate * areas, each area with its own behavior. */
static long madvise_behavior(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) { struct mm_struct *mm = vma->vm_mm; int error = 0; pgoff_t pgoff; unsigned long new_flags = vma->vm_flags; switch (behavior) { case MADV_NORMAL: new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; break; case MADV_SEQUENTIAL: new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; break; case MADV_RANDOM: new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; break; case MADV_DONTFORK: new_flags |= VM_DONTCOPY; break; case MADV_DOFORK: if (vma->vm_flags & VM_IO) { error = -EINVAL; goto out; } new_flags &= ~VM_DONTCOPY; break; case MADV_WIPEONFORK: /* MADV_WIPEONFORK is only supported on anonymous memory. */ if (vma->vm_file || vma->vm_flags & VM_SHARED) { error = -EINVAL; goto out; } new_flags |= VM_WIPEONFORK; break; case MADV_KEEPONFORK: new_flags &= ~VM_WIPEONFORK; break; case MADV_DONTDUMP: new_flags |= VM_DONTDUMP; break; case MADV_DODUMP: if (new_flags & VM_SPECIAL) { error = -EINVAL; goto out; } new_flags &= ~VM_DONTDUMP; break; case MADV_MERGEABLE: case MADV_UNMERGEABLE: error = ksm_madvise(vma, start, end, behavior, &new_flags); if (error) { /* * madvise() returns EAGAIN if kernel resources, such as * slab, are temporarily unavailable. */ if (error == -ENOMEM) error = -EAGAIN; goto out; } break; case MADV_HUGEPAGE: case MADV_NOHUGEPAGE: error = hugepage_madvise(vma, &new_flags, behavior); if (error) { /* * madvise() returns EAGAIN if kernel resources, such as * slab, are temporarily unavailable. */ if (error == -ENOMEM) error = -EAGAIN; goto out; } break; } if (new_flags == vma->vm_flags) { *prev = vma; goto out; } pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), vma->vm_userfaultfd_ctx); if (*prev) { vma = *prev; goto success; } *prev = vma; if (start != vma->vm_start) { if (unlikely(mm->map_count >= sysctl_max_map_count)) { error = -ENOMEM; goto out; } error = __split_vma(mm, vma, start, 1); if (error) { /* * madvise() returns EAGAIN if kernel resources, such as * slab, are temporarily unavailable. */ if (error == -ENOMEM) error = -EAGAIN; goto out; } } if (end != vma->vm_end) { if (unlikely(mm->map_count >= sysctl_max_map_count)) { error = -ENOMEM; goto out; } error = __split_vma(mm, vma, end, 0); if (error) { /* * madvise() returns EAGAIN if kernel resources, such as * slab, are temporarily unavailable. */ if (error == -ENOMEM) error = -EAGAIN; goto out; } } success: /* * vm_flags is protected by the mmap_sem held in write mode. */ vma->vm_flags = new_flags; out: return error; }

Contributors

PersonTokensPropCommitsCommitProp
Prasanna Meda12523.36%211.11%
David Rientjes10419.44%15.56%
Andrew Morton9317.38%316.67%
Hugh Dickins6411.96%422.22%
Michael S. Tsirkin468.60%15.56%
Rik Van Riel407.48%15.56%
Andrea Arcangeli305.61%422.22%
Konstantin Khlebnikov183.36%15.56%
Jason Baron152.80%15.56%
Total535100.00%18100.00%

#ifdef CONFIG_SWAP
static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, unsigned long end, struct mm_walk *walk) { pte_t *orig_pte; struct vm_area_struct *vma = walk->private; unsigned long index; if (pmd_none_or_trans_huge_or_clear_bad(pmd)) return 0; for (index = start; index != end; index += PAGE_SIZE) { pte_t pte; swp_entry_t entry; struct page *page; spinlock_t *ptl; orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl); pte = *(orig_pte + ((index - start) / PAGE_SIZE)); pte_unmap_unlock(orig_pte, ptl); if (pte_present(pte) || pte_none(pte)) continue; entry = pte_to_swp_entry(pte); if (unlikely(non_swap_entry(entry))) continue; page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE, vma, index, false); if (page) put_page(page); } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Shaohua Li17899.44%266.67%
Kirill A. Shutemov10.56%133.33%
Total179100.00%3100.00%


static void force_swapin_readahead(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct mm_walk walk = { .mm = vma->vm_mm, .pmd_entry = swapin_walk_pmd_entry, .private = vma, }; walk_page_range(start, end, &walk); lru_add_drain(); /* Push any new pages onto the LRU now */ }

Contributors

PersonTokensPropCommitsCommitProp
Shaohua Li56100.00%1100.00%
Total56100.00%1100.00%


static void force_shm_swapin_readahead(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct address_space *mapping) { pgoff_t index; struct page *page; swp_entry_t swap; for (; start < end; start += PAGE_SIZE) { index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; page = find_get_entry(mapping, index); if (!radix_tree_exceptional_entry(page)) { if (page) put_page(page); continue; } swap = radix_to_swp_entry(page); page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE, NULL, 0, false); if (page) put_page(page); } lru_add_drain(); /* Push any new pages onto the LRU now */ }

Contributors

PersonTokensPropCommitsCommitProp
Shaohua Li12797.69%250.00%
Kirill A. Shutemov21.54%125.00%
Johannes Weiner10.77%125.00%
Total130100.00%4100.00%

#endif /* CONFIG_SWAP */ /* * Schedule all required I/O operations. Do not wait for completion. */
static long madvise_willneed(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { struct file *file = vma->vm_file; *prev = vma; #ifdef CONFIG_SWAP if (!file) { force_swapin_readahead(vma, start, end); return 0; } if (shmem_mapping(file->f_mapping)) { force_shm_swapin_readahead(vma, start, end, file->f_mapping); return 0; } #else if (!file) return -EBADF; #endif if (IS_DAX(file_inode(file))) { /* no bad return value, but ignore advice */ return 0; } start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (end > vma->vm_end) end = vma->vm_end; end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; force_page_cache_readahead(file->f_mapping, file, start, end - start); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Morton9551.35%433.33%
Shaohua Li3518.92%18.33%
Christoph Hellwig189.73%18.33%
Suzuki K. Poulose94.86%18.33%
Carsten Otte84.32%18.33%
Matthew Wilcox63.24%18.33%
Prasanna Meda63.24%18.33%
Chen Jie52.70%18.33%
David Howells31.62%18.33%
Total185100.00%12100.00%


static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct mmu_gather *tlb = walk->private; struct mm_struct *mm = tlb->mm; struct vm_area_struct *vma = walk->vma; spinlock_t *ptl; pte_t *orig_pte, *pte, ptent; struct page *page; int nr_swap = 0; unsigned long next; next = pmd_addr_end(addr, end); if (pmd_trans_huge(*pmd)) if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next)) goto next; if (pmd_trans_unstable(pmd)) return 0; tlb_remove_check_page_size_change(tlb, PAGE_SIZE); orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; if (pte_none(ptent)) continue; /* * If the pte has swp_entry, just clear page table to * prevent swap-in which is more expensive rather than * (page allocation + zeroing). */ if (!pte_present(ptent)) { swp_entry_t entry; entry = pte_to_swp_entry(ptent); if (non_swap_entry(entry)) continue; nr_swap--; free_swap_and_cache(entry); pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); continue; } page = _vm_normal_page(vma, addr, ptent, true); if (!page) continue; /* * If pmd isn't transhuge but the page is THP and * is owned by only this process, split it and * deactivate all pages. */ if (PageTransCompound(page)) { if (page_mapcount(page) != 1) goto out; get_page(page); if (!trylock_page(page)) { put_page(page); goto out; } pte_unmap_unlock(orig_pte, ptl); if (split_huge_page(page)) { unlock_page(page); put_page(page); pte_offset_map_lock(mm, pmd, addr, &ptl); goto out; } unlock_page(page); put_page(page); pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte--; addr -= PAGE_SIZE; continue; } VM_BUG_ON_PAGE(PageTransCompound(page), page); if (PageSwapCache(page) || PageDirty(page)) { if (!trylock_page(page)) continue; /* * If page is shared with others, we couldn't clear * PG_dirty of the page. */ if (page_mapcount(page) != 1) { unlock_page(page); continue; } if (PageSwapCache(page) && !try_to_free_swap(page)) { unlock_page(page); continue; } ClearPageDirty(page); unlock_page(page); } if (pte_young(ptent) || pte_dirty(ptent)) { /* * Some of architecture(ex, PPC) don't update TLB * with set_pte_at and tlb_remove_tlb_entry so for * the portability, remap the pte with old|clean * after pte clearing. */ ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); ptent = pte_mkold(ptent); ptent = pte_mkclean(ptent); set_pte_at(mm, addr, pte, ptent); tlb_remove_tlb_entry(tlb, pte, addr); } mark_page_lazyfree(page); } out: if (nr_swap) { if (current->mm == mm) sync_mm_rss(mm); add_mm_counter(mm, MM_SWAPENTS, nr_swap); } arch_leave_lazy_mmu_mode(); pte_unmap_unlock(orig_pte, ptl); cond_resched(); next: return 0; }

Contributors

PersonTokensPropCommitsCommitProp
MinChan Kim50488.58%320.00%
Andrew Morton213.69%213.33%
Badari Pulavarty81.41%16.67%
Aneesh Kumar K.V71.23%16.67%
Mel Gorman50.88%16.67%
Shaohua Li50.88%16.67%
Andrew Lutomirski50.88%16.67%
Hugh Dickins40.70%16.67%
Prasanna Meda30.53%16.67%
Jérôme Glisse30.53%16.67%
Eric Biggers20.35%16.67%
Nicholas Piggin20.35%16.67%
Total569100.00%15100.00%


static void madvise_free_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct mm_walk free_walk = { .pmd_entry = madvise_free_pte_range, .mm = vma->vm_mm, .private = tlb, }; tlb_start_vma(tlb, vma); walk_page_range(addr, end, &free_walk); tlb_end_vma(tlb, vma); }

Contributors

PersonTokensPropCommitsCommitProp
MinChan Kim71100.00%1100.00%
Total71100.00%1100.00%


static int madvise_free_single_vma(struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr) { unsigned long start, end; struct mm_struct *mm = vma->vm_mm; struct mmu_gather tlb; /* MADV_FREE works for only anon vma at the moment */ if (!vma_is_anonymous(vma)) return -EINVAL; start = max(vma->vm_start, start_addr); if (start >= vma->vm_end) return -EINVAL; end = min(vma->vm_end, end_addr); if (end <= vma->vm_start) return -EINVAL; lru_add_drain(); tlb_gather_mmu(&tlb, mm, start, end); update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(mm, start, end); madvise_free_page_range(&tlb, vma, start, end); mmu_notifier_invalidate_range_end(mm, start, end); tlb_finish_mmu(&tlb, start, end); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
MinChan Kim160100.00%1100.00%
Total160100.00%1100.00%

/* * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about * data it wants to keep. Be sure to free swap resources too. The * zap_page_range call sets things up for shrink_active_list to actually free * these pages later if no one else has touched them in the meantime, * although we could add these pages to a global reuse list for * shrink_active_list to pick up before reclaiming other pages. * * NB: This interface discards data rather than pushes it out to swap, * as some implementations do. This has performance implications for * applications like large transactional databases which want to discard * pages in anonymous maps after committing to backing store the data * that was kept in them. There is no reason to write this data out to * the swap area if the application is discarding it. * * An interface that causes the system to free clean pages and flush * dirty pages is already available as msync(MS_INVALIDATE). */
static long madvise_dontneed_single_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end) { zap_page_range(vma, start, end - start); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
MinChan Kim2678.79%150.00%
Mike Rapoport721.21%150.00%
Total33100.00%2100.00%


static long madvise_dontneed_free(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) { *prev = vma; if (!can_madv_dontneed_vma(vma)) return -EINVAL; if (!userfaultfd_remove(vma, start, end)) { *prev = NULL; /* mmap_sem has been dropped, prev is stale */ down_read(&current->mm->mmap_sem); vma = find_vma(current->mm, start); if (!vma) return -ENOMEM; if (start < vma->vm_start) { /* * This "vma" under revalidation is the one * with the lowest vma->vm_start where start * is also < vma->vm_end. If start < * vma->vm_start it means an hole materialized * in the user address space within the * virtual range passed to MADV_DONTNEED * or MADV_FREE. */ return -ENOMEM; } if (!can_madv_dontneed_vma(vma)) return -EINVAL; if (end > vma->vm_end) { /* * Don't fail if end > vma->vm_end. If the old * vma was splitted while the mmap_sem was * released the effect of the concurrent * operation may not cause madvise() to * have an undefined result. There may be an * adjacent next vma that we'll walk * next. userfaultfd_remove() will generate an * UFFD_EVENT_REMOVE repetition on the * end-vma->vm_end range, but the manager can * handle a repetition fine. */ end = vma->vm_end; } VM_WARN_ON(start >= end); } if (behavior == MADV_DONTNEED) return madvise_dontneed_single_vma(vma, start, end); else if (behavior == MADV_FREE) return madvise_free_single_vma(vma, start, end); else return -EINVAL; }

Contributors

PersonTokensPropCommitsCommitProp
Andrea Arcangeli9250.00%228.57%
MinChan Kim4625.00%114.29%
Mike Rapoport3519.02%228.57%
Pavel Emelyanov73.80%114.29%
Kirill A. Shutemov42.17%114.29%
Total184100.00%7100.00%

/* * Application wants to free up the pages and associated backing store. * This is effectively punching a hole into the middle of a file. */
static long madvise_remove(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { loff_t offset; int error; struct file *f; *prev = NULL; /* tell sys_madvise we drop mmap_sem */ if (vma->vm_flags & VM_LOCKED) return -EINVAL; f = vma->vm_file; if (!f || !f->f_mapping || !f->f_mapping->host) { return -EINVAL; } if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)) return -EACCES; offset = (loff_t)(start - vma->vm_start) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); /* * Filesystem's fallocate may need to take i_mutex. We need to * explicitly grab a reference because the vma (and hence the * vma's reference to the file) can go away as soon as we drop * mmap_sem. */ get_file(f); if (userfaultfd_remove(vma, start, end)) { /* mmap_sem was not released by userfaultfd_remove() */ up_read(&current->mm->mmap_sem); } error = vfs_fallocate(f, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, end - start); fput(f); down_read(&current->mm->mmap_sem); return error; }

Contributors

PersonTokensPropCommitsCommitProp
MinChan Kim11156.63%111.11%
Hugh Dickins3015.31%222.22%
Badari Pulavarty2613.27%111.11%
Andrew Lutomirski126.12%111.11%
Mike Rapoport84.08%111.11%
Andrea Arcangeli63.06%111.11%
Nicholas Piggin21.02%111.11%
Anna Schumaker10.51%111.11%
Total196100.00%9100.00%

#ifdef CONFIG_MEMORY_FAILURE /* * Error injection support for memory error handling. */
static int madvise_inject_error(int behavior, unsigned long start, unsigned long end) { struct page *page; struct zone *zone; unsigned int order; if (!capable(CAP_SYS_ADMIN)) return -EPERM; for (; start < end; start += PAGE_SIZE << order) { int ret; ret = get_user_pages_fast(start, 1, 0, &page); if (ret != 1) return ret; /* * When soft offlining hugepages, after migrating the page * we dissolve it, therefore in the second loop "page" will * no longer be a compound page, and order will be 0. */ order = compound_order(compound_head(page)); if (PageHWPoison(page)) { put_page(page); continue; } if (behavior == MADV_SOFT_OFFLINE) { pr_info("Soft offlining pfn %#lx at process virtual address %#lx\n", page_to_pfn(page), start); ret = soft_offline_page(page, MF_COUNT_INCREASED); if (ret) return ret; continue; } pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n", page_to_pfn(page), start); ret = memory_failure(page_to_pfn(page), MF_COUNT_INCREASED); if (ret) return ret; } /* Ensure that all poisoned pages are removed from per-cpu lists */ for_each_populated_zone(zone) drain_all_pages(zone); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Andi Kleen10957.07%428.57%
Wanpeng Li2312.04%428.57%
Alexandru Moise168.38%17.14%
Mel Gorman157.85%17.14%
Anshuman Khandual136.81%17.14%
Naoya Horiguchi94.71%17.14%
Andrew Morton52.62%17.14%
Tony Luck10.52%17.14%
Total191100.00%14100.00%

#endif
static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) { switch (behavior) { case MADV_REMOVE: return madvise_remove(vma, prev, start, end); case MADV_WILLNEED: return madvise_willneed(vma, prev, start, end); case MADV_FREE: case MADV_DONTNEED: return madvise_dontneed_free(vma, prev, start, end, behavior); default: return madvise_behavior(vma, prev, start, end, behavior); } }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Morton5353.54%114.29%
Hugh Dickins1717.17%114.29%
Prasanna Meda1212.12%114.29%
Badari Pulavarty99.09%114.29%
MinChan Kim33.03%114.29%
Mike Rapoport33.03%114.29%
Nicholas Piggin22.02%114.29%
Total99100.00%7100.00%


static bool madvise_behavior_valid(int behavior) { switch (behavior) { case MADV_DOFORK: case MADV_DONTFORK: case MADV_NORMAL: case MADV_SEQUENTIAL: case MADV_RANDOM: case MADV_REMOVE: case MADV_WILLNEED: case MADV_DONTNEED: case MADV_FREE: #ifdef CONFIG_KSM case MADV_MERGEABLE: case MADV_UNMERGEABLE: #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE case MADV_HUGEPAGE: case MADV_NOHUGEPAGE: #endif case MADV_DONTDUMP: case MADV_DODUMP: case MADV_WIPEONFORK: case MADV_KEEPONFORK: #ifdef CONFIG_MEMORY_FAILURE case MADV_SOFT_OFFLINE: case MADV_HWPOISON: #endif return true; default: return false; } }

Contributors

PersonTokensPropCommitsCommitProp
Nicholas Piggin4345.74%111.11%
Andrea Arcangeli1111.70%222.22%
Anshuman Khandual1111.70%111.11%
Hugh Dickins1111.70%111.11%
Rik Van Riel66.38%111.11%
Jason Baron66.38%111.11%
Nicholas Krause33.19%111.11%
MinChan Kim33.19%111.11%
Total94100.00%9100.00%

/* * The madvise(2) system call. * * Applications can use madvise() to advise the kernel how it should * handle paging I/O in this VM area. The idea is to help the kernel * use appropriate read-ahead and caching techniques. The information * provided is advisory only, and can be safely disregarded by the * kernel without affecting the correct operation of the application. * * behavior values: * MADV_NORMAL - the default behavior is to read clusters. This * results in some read-ahead and read-behind. * MADV_RANDOM - the system should read the minimum amount of data * on any access, since it is unlikely that the appli- * cation will need more than what it asks for. * MADV_SEQUENTIAL - pages in the given range will probably be accessed * once, so they can be aggressively read ahead, and * can be freed soon after they are accessed. * MADV_WILLNEED - the application is notifying the system to read * some pages ahead. * MADV_DONTNEED - the application is finished with the given range, * so the kernel can free resources associated with it. * MADV_FREE - the application marks pages in the given range as lazy free, * where actual purges are postponed until memory pressure happens. * MADV_REMOVE - the application wants to free up the given range of * pages and associated backing store. * MADV_DONTFORK - omit this area from child's address space when forking: * typically, to avoid COWing pages pinned by get_user_pages(). * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. * MADV_WIPEONFORK - present the child process with zero-filled memory in this * range after a fork. * MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK * MADV_HWPOISON - trigger memory error handler as if the given memory range * were corrupted by unrecoverable hardware memory failure. * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory. * MADV_MERGEABLE - the application recommends that KSM try to merge pages in * this area with pages of identical content from other such areas. * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others. * MADV_HUGEPAGE - the application wants to back the given range by transparent * huge pages in the future. Existing pages might be coalesced and * new pages might be allocated as THP. * MADV_NOHUGEPAGE - mark the given range as not worth being backed by * transparent huge pages so the existing pages will not be * coalesced into THP and new pages will not be allocated as THP. * MADV_DONTDUMP - the application wants to prevent pages in the given range * from being included in its core dump. * MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump. * * return values: * zero - success * -EINVAL - start + len < 0, start is not page-aligned, * "behavior" is not a valid value, or application * is attempting to release locked or shared pages, * or the specified address range includes file, Huge TLB, * MAP_SHARED or VMPFNMAP range. * -ENOMEM - addresses in the specified range are not currently * mapped, or are outside the AS of the process. * -EIO - an I/O error occurred while paging in data. * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. */ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) { unsigned long end, tmp; struct vm_area_struct *vma, *prev; int unmapped_error = 0; int error = -EINVAL; int write; size_t len; struct blk_plug plug; if (!madvise_behavior_valid(behavior)) return error; if (start & ~PAGE_MASK) return error; len = (len_in + ~PAGE_MASK) & PAGE_MASK; /* Check to see whether len was rounded up from small -ve to zero */ if (len_in && !len) return error; end = start + len; if (end < start) return error; error = 0; if (end == start) return error; #ifdef CONFIG_MEMORY_FAILURE if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) return madvise_inject_error(behavior, start, start + len_in); #endif write = madvise_need_mmap_write(behavior); if (write) { if (down_write_killable(&current->mm->mmap_sem)) return -EINTR; } else { down_read(&current->mm->mmap_sem); } /* * If the interval [start,end) covers some unmapped address * ranges, just ignore them, but return -ENOMEM at the end. * - different from the way of handling in mlock etc. */ vma = find_vma_prev(current->mm, start, &prev); if (vma && start > vma->vm_start) prev = vma; blk_start_plug(&plug); for (;;) { /* Still start < end. */ error = -ENOMEM; if (!vma) goto out; /* Here start < (end|vma->vm_end). */ if (start < vma->vm_start) { unmapped_error = -ENOMEM; start = vma->vm_start; if (start >= end) goto out; } /* Here vma->vm_start <= start < (end|vma->vm_end) */ tmp = vma->vm_end; if (end < tmp) tmp = end; /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ error = madvise_vma(vma, &prev, start, tmp, behavior); if (error) goto out; start = tmp; if (prev && start < prev->vm_end) start = prev->vm_end; error = unmapped_error; if (start >= end) goto out; if (prev) vma = prev->vm_next; else /* madvise_remove dropped mmap_sem */ vma = find_vma(current->mm, start); } out: blk_finish_plug(&plug); if (write) up_write(&current->mm->mmap_sem); else up_read(&current->mm->mmap_sem); return error; }

Overall Contributors

PersonTokensPropCommitsCommitProp
MinChan Kim93429.10%33.61%
Andrew Morton45614.21%910.84%
Shaohua Li43313.49%33.61%
Prasanna Meda2086.48%22.41%
Hugh Dickins1675.20%89.64%
Andrea Arcangeli1394.33%67.23%
Andi Kleen1183.68%44.82%
Nicholas Piggin1073.33%33.61%
David Rientjes1043.24%11.20%
Mike Rapoport541.68%33.61%
Anshuman Khandual511.59%22.41%
Rasmus Villemoes471.46%11.20%
Michael S. Tsirkin461.43%11.20%
Rik Van Riel461.43%11.20%
Badari Pulavarty431.34%11.20%
Jason Baron250.78%22.41%
Wanpeng Li230.72%44.82%
Mel Gorman200.62%22.41%
Andrew Lutomirski200.62%11.20%
Konstantin Khlebnikov180.56%11.20%
Christoph Hellwig180.56%11.20%
Heiko Carstens160.50%11.20%
Alexandru Moise160.50%11.20%
Michal Hocko120.37%11.20%
Pavel Emelyanov100.31%11.20%
Kirill A. Shutemov100.31%22.41%
Suzuki K. Poulose90.28%11.20%
Naoya Horiguchi90.28%11.20%
Carsten Otte80.25%11.20%
Aneesh Kumar K.V70.22%11.20%
Matthew Wilcox60.19%11.20%
Chen Jie50.16%11.20%
David Howells30.09%11.20%
Tejun Heo30.09%11.20%
Jérôme Glisse30.09%11.20%
Nicholas Krause30.09%11.20%
Alexey Dobriyan30.09%11.20%
Arnd Bergmann30.09%11.20%
Eric Biggers20.06%11.20%
Greg Kroah-Hartman10.03%11.20%
Anna Schumaker10.03%11.20%
Johannes Weiner10.03%11.20%
Tony Luck10.03%11.20%
Yang Shi10.03%11.20%
Total3210100.00%83100.00%
Directory: mm
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with cregit.